From 84c3b6260bf448f67ee850374cf54a706bdf9281 Mon Sep 17 00:00:00 2001 From: Peter Nose Date: Sat, 18 Feb 2023 13:52:00 +0100 Subject: [PATCH 1/7] go/keymanager/api: Add publish master secret transaction --- .../cometbft/apps/keymanager/keymanager.go | 10 + .../cometbft/apps/keymanager/query.go | 5 + .../cometbft/apps/keymanager/state/state.go | 27 +- .../apps/keymanager/state/state_test.go | 50 +++- .../cometbft/apps/keymanager/transactions.go | 273 +++++++++++++----- .../cometbft/keymanager/keymanager.go | 42 ++- go/keymanager/api/api.go | 80 ++++- go/keymanager/api/api_test.go | 16 + go/keymanager/api/grpc.go | 104 +++++++ go/keymanager/api/policy_sgx.go | 4 + keymanager/src/policy/cached.rs | 2 + runtime/src/consensus/keymanager.rs | 2 + runtime/src/consensus/state/beacon.rs | 2 +- runtime/src/consensus/state/keymanager.rs | 38 ++- runtime/src/consensus/state/registry.rs | 2 +- runtime/src/consensus/state/staking.rs | 2 +- 16 files changed, 564 insertions(+), 95 deletions(-) diff --git a/go/consensus/cometbft/apps/keymanager/keymanager.go b/go/consensus/cometbft/apps/keymanager/keymanager.go index f1c165433da..184b4fec23b 100644 --- a/go/consensus/cometbft/apps/keymanager/keymanager.go +++ b/go/consensus/cometbft/apps/keymanager/keymanager.go @@ -79,6 +79,12 @@ func (app *keymanagerApplication) ExecuteTx(ctx *tmapi.Context, tx *transaction. return api.ErrInvalidArgument } return app.updatePolicy(ctx, state, &sigPol) + case api.MethodPublishMasterSecret: + var sigSec api.SignedEncryptedMasterSecret + if err := cbor.Unmarshal(tx.Body, &sigSec); err != nil { + return api.ErrInvalidArgument + } + return app.publishMasterSecret(ctx, state, &sigSec) case api.MethodPublishEphemeralSecret: var sigSec api.SignedEncryptedEphemeralSecret if err := cbor.Unmarshal(tx.Body, &sigSec); err != nil { @@ -179,6 +185,8 @@ func (app *keymanagerApplication) onEpochChange(ctx *tmapi.Context, epoch beacon "id", newStatus.ID, "is_initialized", newStatus.IsInitialized, "is_secure", newStatus.IsSecure, + "generation", newStatus.Generation, + "rotation_epoch", newStatus.RotationEpoch, "checksum", hex.EncodeToString(newStatus.Checksum), "rsk", newStatus.RSK, "nodes", newStatus.Nodes, @@ -226,6 +234,8 @@ func (app *keymanagerApplication) generateStatus( ID: kmrt.ID, IsInitialized: oldStatus.IsInitialized, IsSecure: oldStatus.IsSecure, + Generation: oldStatus.Generation, + RotationEpoch: oldStatus.RotationEpoch, Checksum: oldStatus.Checksum, Policy: oldStatus.Policy, } diff --git a/go/consensus/cometbft/apps/keymanager/query.go b/go/consensus/cometbft/apps/keymanager/query.go index 316e4aa9406..2dad99e81c0 100644 --- a/go/consensus/cometbft/apps/keymanager/query.go +++ b/go/consensus/cometbft/apps/keymanager/query.go @@ -14,6 +14,7 @@ import ( type Query interface { Status(context.Context, common.Namespace) (*keymanager.Status, error) Statuses(context.Context) ([]*keymanager.Status, error) + MasterSecret(context.Context, common.Namespace) (*keymanager.SignedEncryptedMasterSecret, error) EphemeralSecret(context.Context, common.Namespace, beacon.EpochTime) (*keymanager.SignedEncryptedEphemeralSecret, error) Genesis(context.Context) (*keymanager.Genesis, error) } @@ -44,6 +45,10 @@ func (kq *keymanagerQuerier) Statuses(ctx context.Context) ([]*keymanager.Status return kq.state.Statuses(ctx) } +func (kq *keymanagerQuerier) MasterSecret(ctx context.Context, id common.Namespace) (*keymanager.SignedEncryptedMasterSecret, error) { + return kq.state.MasterSecret(ctx, id) +} + func (kq *keymanagerQuerier) EphemeralSecret(ctx context.Context, id common.Namespace, epoch beacon.EpochTime) (*keymanager.SignedEncryptedEphemeralSecret, error) { return kq.state.EphemeralSecret(ctx, id, epoch) } diff --git a/go/consensus/cometbft/apps/keymanager/state/state.go b/go/consensus/cometbft/apps/keymanager/state/state.go index a7cef439ccd..562e09cd92a 100644 --- a/go/consensus/cometbft/apps/keymanager/state/state.go +++ b/go/consensus/cometbft/apps/keymanager/state/state.go @@ -23,10 +23,14 @@ var ( // // Value is CBOR-serialized keymanager.ConsensusParameters. parametersKeyFmt = keyformat.New(0x71) + // masterSecretKeyFmt is the key manager master secret key format. + // + // Value is CBOR-serialized key manager signed encrypted master secret. + masterSecretKeyFmt = keyformat.New(0x72, keyformat.H(&common.Namespace{})) // ephemeralSecretKeyFmt is the key manager ephemeral secret key format. // // Value is CBOR-serialized key manager signed encrypted ephemeral secret. - ephemeralSecretKeyFmt = keyformat.New(0x72, keyformat.H(&common.Namespace{}), uint64(0)) + ephemeralSecretKeyFmt = keyformat.New(0x73, keyformat.H(&common.Namespace{}), uint64(0)) ) // ImmutableState is the immutable key manager state wrapper. @@ -102,6 +106,22 @@ func (st *ImmutableState) Status(ctx context.Context, id common.Namespace) (*api return &status, nil } +func (st *ImmutableState) MasterSecret(ctx context.Context, id common.Namespace) (*api.SignedEncryptedMasterSecret, error) { + data, err := st.is.Get(ctx, masterSecretKeyFmt.Encode(&id)) + if err != nil { + return nil, abciAPI.UnavailableStateError(err) + } + if data == nil { + return nil, api.ErrNoSuchMasterSecret + } + + var secret api.SignedEncryptedMasterSecret + if err := cbor.Unmarshal(data, &secret); err != nil { + return nil, abciAPI.UnavailableStateError(err) + } + return &secret, nil +} + func (st *ImmutableState) EphemeralSecret(ctx context.Context, id common.Namespace, epoch beacon.EpochTime) (*api.SignedEncryptedEphemeralSecret, error) { data, err := st.is.Get(ctx, ephemeralSecretKeyFmt.Encode(&id, uint64(epoch))) if err != nil { @@ -149,6 +169,11 @@ func (st *MutableState) SetStatus(ctx context.Context, status *api.Status) error return abciAPI.UnavailableStateError(err) } +func (st *MutableState) SetMasterSecret(ctx context.Context, secret *api.SignedEncryptedMasterSecret) error { + err := st.ms.Insert(ctx, masterSecretKeyFmt.Encode(&secret.Secret.ID), cbor.Marshal(secret)) + return abciAPI.UnavailableStateError(err) +} + func (st *MutableState) SetEphemeralSecret(ctx context.Context, secret *api.SignedEncryptedEphemeralSecret) error { err := st.ms.Insert(ctx, ephemeralSecretKeyFmt.Encode(&secret.Secret.ID, uint64(secret.Secret.Epoch)), cbor.Marshal(secret)) return abciAPI.UnavailableStateError(err) diff --git a/go/consensus/cometbft/apps/keymanager/state/state_test.go b/go/consensus/cometbft/apps/keymanager/state/state_test.go index b4617413447..9b503a1da8e 100644 --- a/go/consensus/cometbft/apps/keymanager/state/state_test.go +++ b/go/consensus/cometbft/apps/keymanager/state/state_test.go @@ -5,15 +5,54 @@ import ( "github.com/stretchr/testify/require" - "github.com/oasisprotocol/curve25519-voi/primitives/x25519" - beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common" abciAPI "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" "github.com/oasisprotocol/oasis-core/go/keymanager/api" ) -func TestEphemeralSecrets(t *testing.T) { +func TestMasterSecret(t *testing.T) { + require := require.New(t) + + appState := abciAPI.NewMockApplicationState(&abciAPI.MockApplicationStateConfig{}) + ctx := appState.NewContext(abciAPI.ContextBeginBlock) + defer ctx.Close() + + s := NewMutableState(ctx.State()) + + // Prepare data. + runtimes := []common.Namespace{ + common.NewTestNamespaceFromSeed([]byte("runtime 1"), common.NamespaceKeyManager), + common.NewTestNamespaceFromSeed([]byte("runtime 2"), common.NamespaceKeyManager), + } + secrets := make([]*api.SignedEncryptedMasterSecret, 0, 10) + for i := 0; i < cap(secrets); i++ { + secret := api.SignedEncryptedMasterSecret{ + Secret: api.EncryptedMasterSecret{ + ID: runtimes[i%2], + Generation: uint64(i), + }, + } + secrets = append(secrets, &secret) + } + + // Test adding secrets. + for _, secret := range secrets { + err := s.SetMasterSecret(ctx, secret) + require.NoError(err, "SetMasterSecret()") + } + + // Test querying secrets. + for i, runtime := range runtimes { + secret, err := s.MasterSecret(ctx, runtime) + require.NoError(err, "MasterSecret()") + require.Equal(secrets[8+i], secret, "last master secret should be kept") + } + _, err := s.MasterSecret(ctx, common.Namespace{1, 2, 3}) + require.EqualError(err, api.ErrNoSuchMasterSecret.Error(), "MasterSecret should error for non-existing secrets") +} + +func TestEphemeralSecret(t *testing.T) { require := require.New(t) appState := abciAPI.NewMockApplicationState(&abciAPI.MockApplicationStateConfig{}) @@ -33,11 +72,6 @@ func TestEphemeralSecrets(t *testing.T) { Secret: api.EncryptedEphemeralSecret{ ID: runtimes[(i/5)%2], Epoch: beacon.EpochTime(i), - Secret: api.EncryptedSecret{ - Checksum: []byte{}, - PubKey: x25519.PublicKey{}, - Ciphertexts: map[x25519.PublicKey][]byte{}, - }, }, } secrets = append(secrets, &secret) diff --git a/go/consensus/cometbft/apps/keymanager/transactions.go b/go/consensus/cometbft/apps/keymanager/transactions.go index 1f3c84e63d8..65a58e33820 100644 --- a/go/consensus/cometbft/apps/keymanager/transactions.go +++ b/go/consensus/cometbft/apps/keymanager/transactions.go @@ -7,6 +7,7 @@ import ( "github.com/oasisprotocol/curve25519-voi/primitives/x25519" + "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" "github.com/oasisprotocol/oasis-core/go/common/node" tmapi "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" @@ -23,27 +24,24 @@ func (app *keymanagerApplication) updatePolicy( ) error { // Ensure that the runtime exists and is a key manager. regState := registryState.NewMutableState(ctx.State()) - rt, err := regState.Runtime(ctx, sigPol.Policy.ID) + kmRt, err := keyManagerRuntime(ctx, regState, sigPol.Policy.ID) if err != nil { return err } - if rt.Kind != registry.KindKeyManager { - return fmt.Errorf("keymanager: runtime is not a key manager: %s", sigPol.Policy.ID) - } // Ensure that the tx signer is the key manager owner. - if !rt.EntityID.Equal(ctx.TxSigner()) { + if !kmRt.EntityID.Equal(ctx.TxSigner()) { return fmt.Errorf("keymanager: invalid update signer: %s", sigPol.Policy.ID) } // Get the existing policy document, if one exists. - oldStatus, err := state.Status(ctx, rt.ID) + oldStatus, err := state.Status(ctx, kmRt.ID) switch err { case nil: case api.ErrNoSuchStatus: // This must be a new key manager runtime. oldStatus = &api.Status{ - ID: rt.ID, + ID: kmRt.ID, } default: return err @@ -96,7 +94,7 @@ func (app *keymanagerApplication) updatePolicy( nodes, _ := regState.Nodes(ctx) registry.SortNodeList(nodes) oldStatus.Policy = sigPol - newStatus := app.generateStatus(ctx, rt, oldStatus, nodes, regParams, epoch) + newStatus := app.generateStatus(ctx, kmRt, oldStatus, nodes, regParams, epoch) if err := state.SetStatus(ctx, newStatus); err != nil { ctx.Logger().Error("keymanager: failed to set key manager status", "err", err, @@ -111,6 +109,125 @@ func (app *keymanagerApplication) updatePolicy( return nil } +// publishMasterSecret stores a new proposal for the master secret, which may overwrite +// the previous one. +// +// Key managers have the ability to rotate the master secret at predetermined intervals. +// Each rotation introduces a new generation, or version, of the master secret that is +// sequentially numbered, starting from zero. These rotations occur during key manager +// status updates, which typically happen during epoch transitions. To perform a rotation, +// one of the key manager enclaves must publish a proposal for the next generation of +// the master secret, which must then be replicated by the majority of enclaves. +// If the replication process is not completed by the end of the epoch, the proposal can +// be replaced with a new one. +// +// Since key managers have to store all generations of the master secret, rotations should +// not take place too frequently. The frequency of rotations does not affect runtimes, +// as they can skip generations when performing state re-encryptions. +// +// It's worth noting that the process of generating, publishing, and replicating master +// secrets differs from that of ephemeral secrets. For more information, please refer +// to the description of the publishEphemeralSecret function. +func (app *keymanagerApplication) publishMasterSecret( + ctx *tmapi.Context, + state *keymanagerState.MutableState, + secret *api.SignedEncryptedMasterSecret, +) error { + // Ensure that the runtime exists and is a key manager. + regState := registryState.NewMutableState(ctx.State()) + kmRt, err := keyManagerRuntime(ctx, regState, secret.Secret.ID) + if err != nil { + return err + } + + // Reject if the signer is not in the key manager committee. + kmStatus, err := state.Status(ctx, kmRt.ID) + if err != nil { + return err + } + if !slices.Contains(kmStatus.Nodes, ctx.TxSigner()) { + return fmt.Errorf("keymanager: master secret can be published only by the key manager committee") + } + + // Reject if the master secret has been proposed in this epoch. + lastSecret, err := state.MasterSecret(ctx, secret.Secret.ID) + if err != nil && err != api.ErrNoSuchMasterSecret { + return err + } + if lastSecret != nil && secret.Secret.Epoch == lastSecret.Secret.Epoch { + return fmt.Errorf("keymanager: master secret can be proposed once per epoch") + } + + // Reject if rotation is not allowed. + if err = kmStatus.VerifyRotationEpoch(secret.Secret.Epoch); err != nil { + return fmt.Errorf("keymanager: master secret rotation not allowed: %w", err) + } + + // Verify the secret. Master secrets can be published for the next epoch and for + // the next generation only. + nextGen := kmStatus.NextGeneration() + epoch, err := app.state.GetCurrentEpoch(ctx) + if err != nil { + return err + } + nextEpoch := epoch + 1 + rak, err := runtimeAttestationKey(ctx, regState, kmRt) + if err != nil { + return err + } + reks := runtimeEncryptionKeys(ctx, regState, kmRt, kmStatus) + + if err = secret.Verify(nextGen, nextEpoch, reks, rak); err != nil { + return err + } + + // Return early if this is a CheckTx context. + if ctx.IsCheckOnly() { + return nil + } + + // Charge gas for this operation. + kmParams, err := state.ConsensusParameters(ctx) + if err != nil { + return err + } + if err = ctx.Gas().UseGas(1, api.GasOpPublishMasterSecret, kmParams.GasCosts); err != nil { + return err + } + + // Return early if simulating since this is just estimating gas. + if ctx.IsSimulation() { + return nil + } + + // Ok, as far as we can tell the secret is valid, save it. + if err := state.SetMasterSecret(ctx, secret); err != nil { + ctx.Logger().Error("keymanager: failed to set key manager master secret", + "err", err, + ) + return fmt.Errorf("keymanager: failed to set key manager master secret: %w", err) + } + + ctx.EmitEvent(tmapi.NewEventBuilder(app.Name()).TypedAttribute(&api.MasterSecretPublishedEvent{ + Secret: secret, + })) + + return nil +} + +// publishEphemeralSecret stores the ephemeral secret for the given epoch. +// +// Key managers support forward-secret ephemeral secrets which are never encrypted with +// SGX sealing key nor stored in the enclave's cold storage. These secrets are generated +// by the enclaves themselves for the next epoch only and published encrypted in the consensus. +// Only one secret can be published for an epoch, others are discarded. Overwrites are not +// allowed as with master secrets. So if all enclaves restart at the same time, no one +// will be able to decrypt ephemeral secrets for the past. The number of generated secrets +// does not effect the performance, as key managers store in memory only the last few secrets, +// as defined in the policy. +// +// Note that ephemeral secrets differ from master secrets. For more information, see +// the description of the publishMasterSecret function. func (app *keymanagerApplication) publishEphemeralSecret( ctx *tmapi.Context, state *keymanagerState.MutableState, @@ -118,12 +235,18 @@ func (app *keymanagerApplication) publishEphemeralSecret( ) error { // Ensure that the runtime exists and is a key manager. regState := registryState.NewMutableState(ctx.State()) - kmRt, err := regState.Runtime(ctx, secret.Secret.ID) + kmRt, err := keyManagerRuntime(ctx, regState, secret.Secret.ID) + if err != nil { + return err + } + + // Reject if the signer is not in the key manager committee. + kmStatus, err := state.Status(ctx, kmRt.ID) if err != nil { return err } - if kmRt.Kind != registry.KindKeyManager { - return fmt.Errorf("keymanager: runtime is not a key manager: %s", secret.Secret.ID) + if !slices.Contains(kmStatus.Nodes, ctx.TxSigner()) { + return fmt.Errorf("keymanager: ephemeral secret can be published only by the key manager committee") } // Reject if the secret has been published. @@ -137,28 +260,81 @@ func (app *keymanagerApplication) publishEphemeralSecret( return err } - // Reject if the signer is not in the key manager committee. - signer := ctx.TxSigner() - kmStatus, err := state.Status(ctx, kmRt.ID) + // Verify the secret. Ephemeral secrets can be published for the next epoch only. + epoch, err := app.state.GetCurrentEpoch(ctx) if err != nil { return err } - if !slices.Contains(kmStatus.Nodes, signer) { - return fmt.Errorf("keymanager: ephemeral secret can be published only by the key manager committee") + nextEpoch := epoch + 1 + rak, err := runtimeAttestationKey(ctx, regState, kmRt) + if err != nil { + return err } + reks := runtimeEncryptionKeys(ctx, regState, kmRt, kmStatus) - // Ensure that the signer is a key manager. - n, err := regState.Node(ctx, signer) + if err = secret.Verify(nextEpoch, reks, rak); err != nil { + return err + } + + // Return early if this is a CheckTx context. + if ctx.IsCheckOnly() { + return nil + } + + // Charge gas for this operation. + kmParams, err := state.ConsensusParameters(ctx) if err != nil { return err } + if err = ctx.Gas().UseGas(1, api.GasOpPublishEphemeralSecret, kmParams.GasCosts); err != nil { + return err + } + + // Return early if simulating since this is just estimating gas. + if ctx.IsSimulation() { + return nil + } + + // Ok, as far as we can tell the secret is valid, save it. + if err := state.SetEphemeralSecret(ctx, secret); err != nil { + ctx.Logger().Error("keymanager: failed to set key manager ephemeral secret", + "err", err, + ) + return fmt.Errorf("keymanager: failed to set key manager ephemeral secret: %w", err) + } + + ctx.EmitEvent(tmapi.NewEventBuilder(app.Name()).TypedAttribute(&api.EphemeralSecretPublishedEvent{ + Secret: secret, + })) + + return nil +} + +func keyManagerRuntime(ctx *tmapi.Context, regState *registryState.MutableState, id common.Namespace) (*registry.Runtime, error) { + // Ensure that the runtime exists and is a key manager. + rt, err := regState.Runtime(ctx, id) + if err != nil { + return nil, err + } + if rt.Kind != registry.KindKeyManager { + return nil, fmt.Errorf("keymanager: runtime is not a key manager: %s", id) + } + return rt, nil +} + +func runtimeAttestationKey(ctx *tmapi.Context, regState *registryState.MutableState, kmRt *registry.Runtime) (*signature.PublicKey, error) { + // Ensure that the signer is a key manager. + n, err := regState.Node(ctx, ctx.TxSigner()) + if err != nil { + return nil, err + } idx := slices.IndexFunc(n.Runtimes, func(rt *node.Runtime) bool { // Skipping version check as key managers are running exactly one // version of the runtime. return rt.ID == kmRt.ID }) if idx == -1 { - return fmt.Errorf("keymanager: node is not a key manager") + return nil, fmt.Errorf("keymanager: node is not a key manager") } nRt := n.Runtimes[idx] @@ -170,23 +346,23 @@ func (app *keymanagerApplication) publishEphemeralSecret( rak = &api.InsecureRAK case node.TEEHardwareIntelSGX: if nRt.Capabilities.TEE == nil { - return fmt.Errorf("keymanager: node doesn't have TEE capability") + return nil, fmt.Errorf("keymanager: node doesn't have TEE capability") } rak = &nRt.Capabilities.TEE.RAK default: - return fmt.Errorf("keymanager: TEE hardware mismatch") + return nil, fmt.Errorf("keymanager: TEE hardware mismatch") } + return rak, nil +} + +func runtimeEncryptionKeys(ctx *tmapi.Context, regState *registryState.MutableState, kmRt *registry.Runtime, kmStatus *api.Status) map[x25519.PublicKey]struct{} { // Fetch REKs of the key manager committee. reks := make(map[x25519.PublicKey]struct{}) for _, id := range kmStatus.Nodes { - n, err = regState.Node(ctx, id) - switch err { - case nil: - case registry.ErrNoSuchNode: + n, err := regState.Node(ctx, id) + if err != nil { continue - default: - return err } idx := slices.IndexFunc(n.Runtimes, func(rt *node.Runtime) bool { @@ -209,50 +385,11 @@ func (app *keymanagerApplication) publishEphemeralSecret( } rek = *nRt.Capabilities.TEE.REK default: - // Dead code (handled above). + continue } reks[rek] = struct{}{} } - // Verify the secret. Ephemeral secrets can be published for the next epoch only. - epoch, err := app.state.GetCurrentEpoch(ctx) - if err != nil { - return err - } - if err = secret.Verify(epoch+1, reks, rak); err != nil { - return err - } - - if ctx.IsCheckOnly() { - return nil - } - - // Charge gas for this operation. - kmParams, err := state.ConsensusParameters(ctx) - if err != nil { - return err - } - if err = ctx.Gas().UseGas(1, api.GasOpPublishEphemeralSecret, kmParams.GasCosts); err != nil { - return err - } - - // Return early if simulating since this is just estimating gas. - if ctx.IsSimulation() { - return nil - } - - // Ok, as far as we can tell the secret is valid, save it. - if err := state.SetEphemeralSecret(ctx, secret); err != nil { - ctx.Logger().Error("keymanager: failed to set key manager ephemeral secret", - "err", err, - ) - return fmt.Errorf("keymanager: failed to set key manager ephemeral secret: %w", err) - } - - ctx.EmitEvent(tmapi.NewEventBuilder(app.Name()).TypedAttribute(&api.EphemeralSecretPublishedEvent{ - Secret: secret, - })) - - return nil + return reks } diff --git a/go/consensus/cometbft/keymanager/keymanager.go b/go/consensus/cometbft/keymanager/keymanager.go index 6fab9a6a106..471bb05edfc 100644 --- a/go/consensus/cometbft/keymanager/keymanager.go +++ b/go/consensus/cometbft/keymanager/keymanager.go @@ -32,9 +32,10 @@ type serviceClient struct { logger *logging.Logger - querier *app.QueryFactory - statusNotifier *pubsub.Broker - secretNotifier *pubsub.Broker + querier *app.QueryFactory + statusNotifier *pubsub.Broker + mstSecretNotifier *pubsub.Broker + ephSecretNotifier *pubsub.Broker } func (sc *serviceClient) GetStatus(ctx context.Context, query *registry.NamespaceQuery) (*api.Status, error) { @@ -72,6 +73,15 @@ func (sc *serviceClient) StateToGenesis(ctx context.Context, height int64) (*api return q.Genesis(ctx) } +func (sc *serviceClient) GetMasterSecret(ctx context.Context, query *registry.NamespaceQuery) (*api.SignedEncryptedMasterSecret, error) { + q, err := sc.querier.QueryAt(ctx, query.Height) + if err != nil { + return nil, err + } + + return q.MasterSecret(ctx, query.ID) +} + func (sc *serviceClient) GetEphemeralSecret(ctx context.Context, query *registry.NamespaceEpochQuery) (*api.SignedEncryptedEphemeralSecret, error) { q, err := sc.querier.QueryAt(ctx, query.Height) if err != nil { @@ -81,8 +91,16 @@ func (sc *serviceClient) GetEphemeralSecret(ctx context.Context, query *registry return q.EphemeralSecret(ctx, query.ID, query.Epoch) } +func (sc *serviceClient) WatchMasterSecrets() (<-chan *api.SignedEncryptedMasterSecret, *pubsub.Subscription) { + sub := sc.mstSecretNotifier.Subscribe() + ch := make(chan *api.SignedEncryptedMasterSecret) + sub.Unwrap(ch) + + return ch, sub +} + func (sc *serviceClient) WatchEphemeralSecrets() (<-chan *api.SignedEncryptedEphemeralSecret, *pubsub.Subscription) { - sub := sc.secretNotifier.Subscribe() + sub := sc.ephSecretNotifier.Subscribe() ch := make(chan *api.SignedEncryptedEphemeralSecret) sub.Unwrap(ch) @@ -110,6 +128,17 @@ func (sc *serviceClient) DeliverEvent(ctx context.Context, height int64, tx cmtt sc.statusNotifier.Broadcast(status) } } + if events.IsAttributeKind(pair.GetKey(), &api.MasterSecretPublishedEvent{}) { + var event api.MasterSecretPublishedEvent + if err := events.DecodeValue(pair.GetValue(), &event); err != nil { + sc.logger.Error("worker: failed to get master secret from tag", + "err", err, + ) + continue + } + + sc.mstSecretNotifier.Broadcast(event.Secret) + } if events.IsAttributeKind(pair.GetKey(), &api.EphemeralSecretPublishedEvent{}) { var event api.EphemeralSecretPublishedEvent if err := events.DecodeValue(pair.GetValue(), &event); err != nil { @@ -119,7 +148,7 @@ func (sc *serviceClient) DeliverEvent(ctx context.Context, height int64, tx cmtt continue } - sc.secretNotifier.Broadcast(event.Secret) + sc.ephSecretNotifier.Broadcast(event.Secret) } } return nil @@ -151,7 +180,8 @@ func New(ctx context.Context, backend tmapi.Backend) (ServiceClient, error) { wr <- v } }) - sc.secretNotifier = pubsub.NewBroker(false) + sc.mstSecretNotifier = pubsub.NewBroker(false) + sc.ephSecretNotifier = pubsub.NewBroker(false) return sc, nil } diff --git a/go/keymanager/api/api.go b/go/keymanager/api/api.go index 3e1b539c3aa..aab8bdb5279 100644 --- a/go/keymanager/api/api.go +++ b/go/keymanager/api/api.go @@ -41,15 +41,21 @@ var ( // exist. ErrNoSuchStatus = errors.New(ModuleName, 2, "keymanager: no such status") + // ErrNoSuchMasterSecret is the error returned when a key manager master secret does not exist. + ErrNoSuchMasterSecret = errors.New(ModuleName, 3, "keymanager: no such master secret") + // ErrNoSuchEphemeralSecret is the error returned when a key manager ephemeral secret // for the given epoch does not exist. - ErrNoSuchEphemeralSecret = errors.New(ModuleName, 3, "keymanager: no such ephemeral secret") + ErrNoSuchEphemeralSecret = errors.New(ModuleName, 4, "keymanager: no such ephemeral secret") // MethodUpdatePolicy is the method name for policy updates. MethodUpdatePolicy = transaction.NewMethodName(ModuleName, "UpdatePolicy", SignedPolicySGX{}) + // MethodPublishMasterSecret is the method name for publishing master secret. + MethodPublishMasterSecret = transaction.NewMethodName(ModuleName, "PublishMasterSecret", SignedEncryptedMasterSecret{}) + // MethodPublishEphemeralSecret is the method name for publishing ephemeral secret. - MethodPublishEphemeralSecret = transaction.NewMethodName(ModuleName, "PublishEphemeralSecret", EncryptedEphemeralSecret{}) + MethodPublishEphemeralSecret = transaction.NewMethodName(ModuleName, "PublishEphemeralSecret", SignedEncryptedEphemeralSecret{}) // InsecureRAK is the insecure hardcoded key manager public key, used // in insecure builds when a RAK is unavailable. @@ -66,6 +72,7 @@ var ( // Methods is the list of all methods supported by the key manager backend. Methods = []transaction.MethodName{ MethodUpdatePolicy, + MethodPublishMasterSecret, MethodPublishEphemeralSecret, } @@ -92,6 +99,9 @@ const ( // GasOpUpdatePolicy is the gas operation identifier for policy updates // costs. GasOpUpdatePolicy transaction.Op = "update_policy" + // GasOpPublishMasterSecret is the gas operation identifier for publishing + // key manager master secret. + GasOpPublishMasterSecret transaction.Op = "publish_master_secret" // GasOpPublishEphemeralSecret is the gas operation identifier for publishing // key manager ephemeral secret. GasOpPublishEphemeralSecret transaction.Op = "publish_ephemeral_secret" @@ -102,6 +112,7 @@ const ( // DefaultGasCosts are the "default" gas costs for operations. var DefaultGasCosts = transaction.Costs{ GasOpUpdatePolicy: 1000, + GasOpPublishMasterSecret: 1000, GasOpPublishEphemeralSecret: 1000, } @@ -119,6 +130,12 @@ type Status struct { // IsSecure is true iff the key manager is secure. IsSecure bool `json:"is_secure"` + // Generation is the generation of the latest master secret. + Generation uint64 `json:"generation,omitempty"` + + // RotationEpoch is the epoch of the last master secret rotation. + RotationEpoch beacon.EpochTime `json:"rotation_epoch,omitempty"` + // Checksum is the key manager master secret verification checksum. Checksum []byte `json:"checksum"` @@ -132,6 +149,40 @@ type Status struct { RSK *signature.PublicKey `json:"rsk,omitempty"` } +// NextGeneration returns the generation of the next master secret. +func (s *Status) NextGeneration() uint64 { + if len(s.Checksum) == 0 { + return 0 + } + return s.Generation + 1 +} + +// VerifyRotationEpoch verifies if rotation can be performed in the given epoch. +func (s *Status) VerifyRotationEpoch(epoch beacon.EpochTime) error { + if nextGen := s.NextGeneration(); nextGen == 0 { + return nil + } + + // By default, rotation is disabled unless specified in the policy. + var rotationInterval beacon.EpochTime + if s.Policy != nil { + rotationInterval = s.Policy.Policy.MasterSecretRotationInterval + } + + // Reject if rotation is disabled. + if rotationInterval == 0 { + return fmt.Errorf("master secret rotation disabled") + } + + // Reject if the rotation period has not expired. + rotationEpoch := s.RotationEpoch + rotationInterval + if epoch < rotationEpoch { + return fmt.Errorf("master secret rotation interval has not yet expired") + } + + return nil +} + // Backend is a key manager management implementation. type Backend interface { // GetStatus returns a key manager status by key manager ID. @@ -149,6 +200,12 @@ type Backend interface { // StateToGenesis returns the genesis state at specified block height. StateToGenesis(context.Context, int64) (*Genesis, error) + // GetMasterSecret returns the key manager master secret. + GetMasterSecret(context.Context, *registry.NamespaceQuery) (*SignedEncryptedMasterSecret, error) + + // WatchMasterSecrets returns a channel that produces a stream of master secrets. + WatchMasterSecrets() (<-chan *SignedEncryptedMasterSecret, *pubsub.Subscription) + // GetEphemeralSecret returns the key manager ephemeral secret. GetEphemeralSecret(context.Context, *registry.NamespaceEpochQuery) (*SignedEncryptedEphemeralSecret, error) @@ -161,9 +218,14 @@ func NewUpdatePolicyTx(nonce uint64, fee *transaction.Fee, sigPol *SignedPolicyS return transaction.NewTransaction(nonce, fee, MethodUpdatePolicy, sigPol) } +// NewPublishMasterSecretTx creates a new publish master secret transaction. +func NewPublishMasterSecretTx(nonce uint64, fee *transaction.Fee, sigSec *SignedEncryptedMasterSecret) *transaction.Transaction { + return transaction.NewTransaction(nonce, fee, MethodPublishMasterSecret, sigSec) +} + // NewPublishEphemeralSecretTx creates a new publish ephemeral secret transaction. -func NewPublishEphemeralSecretTx(nonce uint64, fee *transaction.Fee, sigEnt *SignedEncryptedEphemeralSecret) *transaction.Transaction { - return transaction.NewTransaction(nonce, fee, MethodPublishEphemeralSecret, sigEnt) +func NewPublishEphemeralSecretTx(nonce uint64, fee *transaction.Fee, sigSec *SignedEncryptedEphemeralSecret) *transaction.Transaction { + return transaction.NewTransaction(nonce, fee, MethodPublishEphemeralSecret, sigSec) } // InitRequest is the initialization RPC request, sent to the key manager @@ -326,6 +388,16 @@ func (ev *StatusUpdateEvent) EventKind() string { return "status" } +// MasterSecretPublishedEvent is the key manager master secret published event. +type MasterSecretPublishedEvent struct { + Secret *SignedEncryptedMasterSecret +} + +// EventKind returns a string representation of this event's kind. +func (ev *MasterSecretPublishedEvent) EventKind() string { + return "master_secret" +} + // EphemeralSecretPublishedEvent is the key manager ephemeral secret published event. type EphemeralSecretPublishedEvent struct { Secret *SignedEncryptedEphemeralSecret diff --git a/go/keymanager/api/api_test.go b/go/keymanager/api/api_test.go index 36ef6c14ad5..965bbc751d1 100644 --- a/go/keymanager/api/api_test.go +++ b/go/keymanager/api/api_test.go @@ -29,3 +29,19 @@ func TestSignVerify(t *testing.T) { err = sigInitResponse.Verify(signer2.Public()) require.Error(err, "verification with different public key should fail") } + +func TestStatus(t *testing.T) { + require := require.New(t) + + // Uninitialized key manager. + var s Status + require.Equal(uint64(0), s.NextGeneration()) + + // Key manager with one master secret generation. + s.Checksum = []byte{1, 2, 3} + require.Equal(uint64(1), s.NextGeneration()) + + // Key manager with ten master secret generations. + s.Generation = 9 + require.Equal(uint64(10), s.NextGeneration()) +} diff --git a/go/keymanager/api/grpc.go b/go/keymanager/api/grpc.go index 91e05fada0b..2b0e09fa4ac 100644 --- a/go/keymanager/api/grpc.go +++ b/go/keymanager/api/grpc.go @@ -18,11 +18,15 @@ var ( methodGetStatus = serviceName.NewMethod("GetStatus", registry.NamespaceQuery{}) // methodGetStatuses is the GetStatuses method. methodGetStatuses = serviceName.NewMethod("GetStatuses", int64(0)) + // methodGetMasterSecret is the GetMasterSecret method. + methodGetMasterSecret = serviceName.NewMethod("GetMasterSecret", registry.NamespaceQuery{}) // methodGetEphemeralSecret is the GetEphemeralSecret method. methodGetEphemeralSecret = serviceName.NewMethod("GetEphemeralSecret", registry.NamespaceEpochQuery{}) // methodWatchStatuses is the WatchStatuses method. methodWatchStatuses = serviceName.NewMethod("WatchStatuses", nil) + // methodWatchMasterSecrets is the WatchMasterSecrets method. + methodWatchMasterSecrets = serviceName.NewMethod("WatchMasterSecrets", nil) // methodWatchEphemeralSecrets is the WatchEphemeralSecrets method. methodWatchEphemeralSecrets = serviceName.NewMethod("WatchEphemeralSecrets", nil) @@ -39,6 +43,10 @@ var ( MethodName: methodGetStatuses.ShortName(), Handler: handlerGetStatuses, }, + { + MethodName: methodGetMasterSecret.ShortName(), + Handler: handlerGetMasterSecret, + }, { MethodName: methodGetEphemeralSecret.ShortName(), Handler: handlerGetEphemeralSecret, @@ -50,6 +58,11 @@ var ( Handler: handlerWatchStatuses, ServerStreams: true, }, + { + StreamName: methodWatchMasterSecrets.ShortName(), + Handler: handlerWatchMasterSecrets, + ServerStreams: true, + }, { StreamName: methodWatchEphemeralSecrets.ShortName(), Handler: handlerWatchEphemeralSecrets, @@ -105,6 +118,29 @@ func handlerGetStatuses( return interceptor(ctx, height, info, handler) } +func handlerGetMasterSecret( + srv interface{}, + ctx context.Context, + dec func(interface{}) error, + interceptor grpc.UnaryServerInterceptor, +) (interface{}, error) { + var query registry.NamespaceQuery + if err := dec(&query); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(Backend).GetMasterSecret(ctx, &query) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: methodGetMasterSecret.FullName(), + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(Backend).GetMasterSecret(ctx, req.(*registry.NamespaceQuery)) + } + return interceptor(ctx, &query, info, handler) +} + func handlerGetEphemeralSecret( srv interface{}, ctx context.Context, @@ -153,6 +189,31 @@ func handlerWatchStatuses(srv interface{}, stream grpc.ServerStream) error { } } +func handlerWatchMasterSecrets(srv interface{}, stream grpc.ServerStream) error { + if err := stream.RecvMsg(nil); err != nil { + return err + } + + ctx := stream.Context() + ch, sub := srv.(Backend).WatchMasterSecrets() + defer sub.Close() + + for { + select { + case sec, ok := <-ch: + if !ok { + return nil + } + + if err := stream.SendMsg(sec); err != nil { + return err + } + case <-ctx.Done(): + return ctx.Err() + } + } +} + func handlerWatchEphemeralSecrets(srv interface{}, stream grpc.ServerStream) error { if err := stream.RecvMsg(nil); err != nil { return err @@ -204,6 +265,14 @@ func (c *KeymanagerClient) GetStatuses(ctx context.Context, height int64) ([]*St return resp, nil } +func (c *KeymanagerClient) GetMasterSecret(ctx context.Context, query *registry.NamespaceQuery) (*SignedEncryptedMasterSecret, error) { + var resp *SignedEncryptedMasterSecret + if err := c.conn.Invoke(ctx, methodGetMasterSecret.FullName(), query, &resp); err != nil { + return nil, err + } + return resp, nil +} + func (c *KeymanagerClient) GetEphemeralSecret(ctx context.Context, query *registry.NamespaceEpochQuery) (*SignedEncryptedEphemeralSecret, error) { var resp *SignedEncryptedEphemeralSecret if err := c.conn.Invoke(ctx, methodGetEphemeralSecret.FullName(), query, &resp); err != nil { @@ -247,6 +316,41 @@ func (c *KeymanagerClient) WatchStatuses(ctx context.Context) (<-chan *Status, p return ch, sub, nil } +func (c *KeymanagerClient) WatchMasterSecrets(ctx context.Context) (<-chan *SignedEncryptedMasterSecret, pubsub.ClosableSubscription, error) { + ctx, sub := pubsub.NewContextSubscription(ctx) + + stream, err := c.conn.NewStream(ctx, &serviceDesc.Streams[0], methodWatchMasterSecrets.FullName()) + if err != nil { + return nil, nil, err + } + if err = stream.SendMsg(nil); err != nil { + return nil, nil, err + } + if err = stream.CloseSend(); err != nil { + return nil, nil, err + } + + ch := make(chan *SignedEncryptedMasterSecret) + go func() { + defer close(ch) + + for { + var sec SignedEncryptedMasterSecret + if serr := stream.RecvMsg(&sec); serr != nil { + return + } + + select { + case ch <- &sec: + case <-ctx.Done(): + return + } + } + }() + + return ch, sub, nil +} + func (c *KeymanagerClient) WatchEphemeralSecrets(ctx context.Context) (<-chan *SignedEncryptedEphemeralSecret, pubsub.ClosableSubscription, error) { ctx, sub := pubsub.NewContextSubscription(ctx) diff --git a/go/keymanager/api/policy_sgx.go b/go/keymanager/api/policy_sgx.go index 0ad70690acd..7d9a9d55ed9 100644 --- a/go/keymanager/api/policy_sgx.go +++ b/go/keymanager/api/policy_sgx.go @@ -25,6 +25,10 @@ type PolicySGX struct { // Enclaves is the per-key manager enclave ID access control policy. Enclaves map[sgx.EnclaveIdentity]*EnclavePolicySGX `json:"enclaves"` + // MasterSecretRotationInterval is the time interval in epochs between master secret rotations. + // Zero disables rotations. + MasterSecretRotationInterval beacon.EpochTime `json:"master_secret_rotation_interval,omitempty"` + // MaxEphemeralSecretAge is the maximum age of an ephemeral secret in the number of epochs. MaxEphemeralSecretAge beacon.EpochTime `json:"max_ephemeral_secret_age,omitempty"` } diff --git a/keymanager/src/policy/cached.rs b/keymanager/src/policy/cached.rs index 961592227f1..1f05054653c 100644 --- a/keymanager/src/policy/cached.rs +++ b/keymanager/src/policy/cached.rs @@ -196,6 +196,7 @@ struct CachedPolicy { pub may_query: HashMap>, pub may_replicate: HashSet, pub may_replicate_from: HashSet, + pub master_secret_rotation_interval: EpochTime, pub max_ephemeral_secret_age: EpochTime, } @@ -242,6 +243,7 @@ impl CachedPolicy { } } + cached_policy.master_secret_rotation_interval = policy.master_secret_rotation_interval; cached_policy.max_ephemeral_secret_age = policy.max_ephemeral_secret_age; Ok(cached_policy) diff --git a/runtime/src/consensus/keymanager.rs b/runtime/src/consensus/keymanager.rs index 82ab3034c09..ce522b49edc 100644 --- a/runtime/src/consensus/keymanager.rs +++ b/runtime/src/consensus/keymanager.rs @@ -39,6 +39,8 @@ pub struct PolicySGX { pub id: Namespace, pub enclaves: HashMap, #[cbor(optional)] + pub master_secret_rotation_interval: EpochTime, + #[cbor(optional)] pub max_ephemeral_secret_age: EpochTime, } diff --git a/runtime/src/consensus/state/beacon.rs b/runtime/src/consensus/state/beacon.rs index dcdcba7c533..6acbe5eb16b 100644 --- a/runtime/src/consensus/state/beacon.rs +++ b/runtime/src/consensus/state/beacon.rs @@ -154,7 +154,7 @@ mod test { let mock_consensus_root = Root { version: 1, root_type: RootType::State, - hash: Hash::from("770079d8f120597eb8c4d3d3dfd9cf9eb6c4b3adbe80fc579cbb312667aa8443"), + hash: Hash::from("b5ee772727869caf8d0d333a7a9d65562ca34d8d6f3cf496af9e90f1705f10ec"), ..Default::default() }; let mkvs = Tree::builder() diff --git a/runtime/src/consensus/state/keymanager.rs b/runtime/src/consensus/state/keymanager.rs index 632698534f6..16edba9320e 100644 --- a/runtime/src/consensus/state/keymanager.rs +++ b/runtime/src/consensus/state/keymanager.rs @@ -9,7 +9,9 @@ use crate::{ }, consensus::{ beacon::EpochTime, - keymanager::{SignedEncryptedEphemeralSecret, SignedPolicySGX}, + keymanager::{ + SignedEncryptedEphemeralSecret, SignedEncryptedMasterSecret, SignedPolicySGX, + }, state::StateError, }, key_format, @@ -29,7 +31,8 @@ impl<'a, T: ImmutableMKVS> ImmutableState<'a, T> { } key_format!(StatusKeyFmt, 0x70, Hash); -key_format!(EphemeralSecretKeyFmt, 0x72, (Hash, EpochTime)); +key_format!(MasterSecretKeyFmt, 0x72, Hash); +key_format!(EphemeralSecretKeyFmt, 0x73, (Hash, EpochTime)); /// Current key manager status. #[derive(Clone, Debug, Default, PartialEq, Eq, cbor::Decode, cbor::Encode)] @@ -42,6 +45,8 @@ pub struct Status { pub is_secure: bool, /// Generation of the latest master secret. pub generation: u64, + /// Epoch of the last master secret rotation. + pub rotation_epoch: EpochTime, /// Key manager master secret verification checksum. pub checksum: Vec, /// List of currently active key manager node IDs. @@ -80,6 +85,19 @@ impl<'a, T: ImmutableMKVS> ImmutableState<'a, T> { Ok(result) } + /// Looks up a specific key manager master secret by its namespace identifier. + pub fn master_secret( + &self, + id: Namespace, + ) -> Result, StateError> { + let h = Hash::digest_bytes(id.as_ref()); + match self.mkvs.get(&MasterSecretKeyFmt(h).encode()) { + Ok(Some(b)) => Ok(Some(self.decode_master_secret(&b)?)), + Ok(None) => Ok(None), + Err(err) => Err(StateError::Unavailable(anyhow!(err))), + } + } + /// Looks up a specific key manager ephemeral secret by its namespace identifier and epoch. pub fn ephemeral_secret( &self, @@ -88,7 +106,7 @@ impl<'a, T: ImmutableMKVS> ImmutableState<'a, T> { ) -> Result, StateError> { let h = Hash::digest_bytes(id.as_ref()); match self.mkvs.get(&EphemeralSecretKeyFmt((h, epoch)).encode()) { - Ok(Some(b)) => Ok(Some(self.decode_secret(&b)?)), + Ok(Some(b)) => Ok(Some(self.decode_ephemeral_secret(&b)?)), Ok(None) => Ok(None), Err(err) => Err(StateError::Unavailable(anyhow!(err))), } @@ -98,7 +116,14 @@ impl<'a, T: ImmutableMKVS> ImmutableState<'a, T> { cbor::from_slice(data).map_err(|err| StateError::Unavailable(anyhow!(err))) } - fn decode_secret(&self, data: &[u8]) -> Result { + fn decode_master_secret(&self, data: &[u8]) -> Result { + cbor::from_slice(data).map_err(|err| StateError::Unavailable(anyhow!(err))) + } + + fn decode_ephemeral_secret( + &self, + data: &[u8], + ) -> Result { cbor::from_slice(data).map_err(|err| StateError::Unavailable(anyhow!(err))) } } @@ -142,7 +167,7 @@ mod test { let mock_consensus_root = Root { version: 1, root_type: RootType::State, - hash: Hash::from("770079d8f120597eb8c4d3d3dfd9cf9eb6c4b3adbe80fc579cbb312667aa8443"), + hash: Hash::from("b5ee772727869caf8d0d333a7a9d65562ca34d8d6f3cf496af9e90f1705f10ec"), ..Default::default() }; let mkvs = Tree::builder() @@ -198,6 +223,7 @@ mod test { is_initialized: false, is_secure: false, generation: 0, + rotation_epoch: 0, checksum: vec![], nodes: vec![], policy: None, @@ -208,6 +234,7 @@ mod test { is_initialized: true, is_secure: true, generation: 0, + rotation_epoch: 0, checksum: checksum, nodes: vec![signer1, signer2], policy: Some(SignedPolicySGX { @@ -221,6 +248,7 @@ mod test { may_replicate: vec![keymanager_enclave2], }, )]), + master_secret_rotation_interval: 0, max_ephemeral_secret_age: 10, }, signatures: vec![ diff --git a/runtime/src/consensus/state/registry.rs b/runtime/src/consensus/state/registry.rs index e4b3bd2ba22..1474b3dc45e 100644 --- a/runtime/src/consensus/state/registry.rs +++ b/runtime/src/consensus/state/registry.rs @@ -131,7 +131,7 @@ mod test { let mock_consensus_root = Root { version: 1, root_type: RootType::State, - hash: Hash::from("770079d8f120597eb8c4d3d3dfd9cf9eb6c4b3adbe80fc579cbb312667aa8443"), + hash: Hash::from("b5ee772727869caf8d0d333a7a9d65562ca34d8d6f3cf496af9e90f1705f10ec"), ..Default::default() }; let mkvs = Tree::builder() diff --git a/runtime/src/consensus/state/staking.rs b/runtime/src/consensus/state/staking.rs index 7aae5097f84..26665786256 100644 --- a/runtime/src/consensus/state/staking.rs +++ b/runtime/src/consensus/state/staking.rs @@ -221,7 +221,7 @@ mod test { let mock_consensus_root = Root { version: 1, root_type: RootType::State, - hash: Hash::from("770079d8f120597eb8c4d3d3dfd9cf9eb6c4b3adbe80fc579cbb312667aa8443"), + hash: Hash::from("b5ee772727869caf8d0d333a7a9d65562ca34d8d6f3cf496af9e90f1705f10ec"), ..Default::default() }; let mkvs = Tree::builder() From 28efadd1437c584c981058e7a4eed78212919070 Mon Sep 17 00:00:00 2001 From: Peter Nose Date: Thu, 9 Mar 2023 03:08:23 +0100 Subject: [PATCH 2/7] oasis-test-runner: Update fixtures with master secret rotation interval --- go/oasis-net-runner/fixtures/default.go | 2 +- go/oasis-node/cmd/keymanager/keymanager.go | 33 +++++++++++-------- go/oasis-test-runner/oasis/cli/keymanager.go | 5 ++- go/oasis-test-runner/oasis/fixture.go | 10 +++--- go/oasis-test-runner/oasis/keymanager.go | 21 +++++++----- .../e2e/runtime/keymanager_upgrade.go | 2 +- .../scenario/e2e/runtime/runtime.go | 2 +- .../scenario/e2e/runtime/runtime_dynamic.go | 2 +- .../scenario/e2e/runtime/runtime_upgrade.go | 2 +- .../scenario/e2e/runtime/trust_root.go | 2 +- 10 files changed, 49 insertions(+), 32 deletions(-) diff --git a/go/oasis-net-runner/fixtures/default.go b/go/oasis-net-runner/fixtures/default.go index 0f8eaace92b..e4bf26d1d32 100644 --- a/go/oasis-net-runner/fixtures/default.go +++ b/go/oasis-net-runner/fixtures/default.go @@ -156,7 +156,7 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { }, }) fixture.KeymanagerPolicies = []oasis.KeymanagerPolicyFixture{ - {Runtime: 0, Serial: 1}, + {Runtime: 0, Serial: 1, MasterSecretRotationInterval: 0}, } fixture.Keymanagers = []oasis.KeymanagerFixture{ {Runtime: 0, Entity: 1, RuntimeProvisioner: runtimeProvisioner}, diff --git a/go/oasis-node/cmd/keymanager/keymanager.go b/go/oasis-node/cmd/keymanager/keymanager.go index 2e043a921e5..26c65dfb874 100644 --- a/go/oasis-node/cmd/keymanager/keymanager.go +++ b/go/oasis-node/cmd/keymanager/keymanager.go @@ -13,6 +13,7 @@ import ( flag "github.com/spf13/pflag" "github.com/spf13/viper" + "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/cbor" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" @@ -27,16 +28,17 @@ import ( ) const ( - CfgPolicySerial = "keymanager.policy.serial" - CfgPolicyID = "keymanager.policy.id" - CfgPolicyFile = "keymanager.policy.file" - CfgPolicyEnclaveID = "keymanager.policy.enclave.id" - CfgPolicyMayQuery = "keymanager.policy.may.query" - CfgPolicyMayReplicate = "keymanager.policy.may.replicate" - CfgPolicyKeyFile = "keymanager.policy.key.file" - CfgPolicyTestKey = "keymanager.policy.testkey" - CfgPolicySigFile = "keymanager.policy.signature.file" - CfgPolicyIgnoreSig = "keymanager.policy.ignore.signature" + CfgPolicySerial = "keymanager.policy.serial" + CfgPolicyID = "keymanager.policy.id" + CfgPolicyFile = "keymanager.policy.file" + CfgPolicyEnclaveID = "keymanager.policy.enclave.id" + CfgPolicyMayQuery = "keymanager.policy.may.query" + CfgPolicyMayReplicate = "keymanager.policy.may.replicate" + CfgPolicyKeyFile = "keymanager.policy.key.file" + CfgPolicyTestKey = "keymanager.policy.testkey" + CfgPolicySigFile = "keymanager.policy.signature.file" + CfgPolicyIgnoreSig = "keymanager.policy.ignore.signature" + CfgPolicyMasterSecretRotationInterval = "keymanager.policy.master_secret_rotation_interval" CfgStatusFile = "keymanager.status.file" CfgStatusID = "keymanager.status.id" @@ -201,10 +203,13 @@ func policyFromFlags() (*kmApi.PolicySGX, error) { } } + rotationInterval := api.EpochTime(viper.GetUint64(CfgPolicyMasterSecretRotationInterval)) + return &kmApi.PolicySGX{ - Serial: serial, - ID: id, - Enclaves: enclaves, + Serial: serial, + ID: id, + Enclaves: enclaves, + MasterSecretRotationInterval: rotationInterval, }, nil } @@ -548,6 +553,7 @@ func registerKMInitPolicyFlags(cmd *cobra.Command) { cmd.Flags().String(CfgPolicyEnclaveID, "", "512-bit Key Manager Enclave ID in hex (concatenated MRENCLAVE and MRSIGNER). Multiple Enclave IDs with corresponding permissions can be provided respectively.") cmd.Flags().StringSlice(CfgPolicyMayReplicate, []string{}, "enclave_id1,enclave_id2... list of new enclaves which are allowed to access the master secret. Requires "+CfgPolicyEnclaveID) cmd.Flags().StringToString(CfgPolicyMayQuery, map[string]string{}, "runtime_id=enclave_id1,enclave_id2... sets enclave query permission for runtime_id. Requires "+CfgPolicyEnclaveID) + cmd.Flags().Uint64(CfgPolicyMasterSecretRotationInterval, 0, "master secret rotation interval") } cmd.Flags().AddFlagSet(policyFileFlag) @@ -565,6 +571,7 @@ func registerKMInitPolicyFlags(cmd *cobra.Command) { CfgPolicyEnclaveID, CfgPolicyMayReplicate, CfgPolicyMayQuery, + CfgPolicyMasterSecretRotationInterval, } { _ = viper.BindPFlag(v, cmd.Flags().Lookup(v)) } diff --git a/go/oasis-test-runner/oasis/cli/keymanager.go b/go/oasis-test-runner/oasis/cli/keymanager.go index 419b58585a3..35b42178508 100644 --- a/go/oasis-test-runner/oasis/cli/keymanager.go +++ b/go/oasis-test-runner/oasis/cli/keymanager.go @@ -5,6 +5,7 @@ import ( "strconv" "strings" + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/sgx" keymanager "github.com/oasisprotocol/oasis-core/go/keymanager/api" @@ -20,10 +21,11 @@ type KeymanagerHelpers struct { } // InitPolicy generates the KM policy file. -func (k *KeymanagerHelpers) InitPolicy(runtimeID common.Namespace, serial uint32, policies map[sgx.EnclaveIdentity]*keymanager.EnclavePolicySGX, polPath string) error { +func (k *KeymanagerHelpers) InitPolicy(runtimeID common.Namespace, serial uint32, rotationInterval beacon.EpochTime, policies map[sgx.EnclaveIdentity]*keymanager.EnclavePolicySGX, polPath string) error { k.logger.Info("initing KM policy", "policy_path", polPath, "serial", serial, + "rotation_interval", rotationInterval, "num_policies", len(policies), ) @@ -32,6 +34,7 @@ func (k *KeymanagerHelpers) InitPolicy(runtimeID common.Namespace, serial uint32 "--" + cmdKM.CfgPolicyFile, polPath, "--" + cmdKM.CfgPolicyID, runtimeID.String(), "--" + cmdKM.CfgPolicySerial, strconv.FormatUint(uint64(serial), 10), + "--" + cmdKM.CfgPolicyMasterSecretRotationInterval, strconv.FormatUint(uint64(rotationInterval), 10), } for enclave, policy := range policies { args = append(args, "--"+cmdKM.CfgPolicyEnclaveID) diff --git a/go/oasis-test-runner/oasis/fixture.go b/go/oasis-test-runner/oasis/fixture.go index 7a938d2de2a..8c341636a5e 100644 --- a/go/oasis-test-runner/oasis/fixture.go +++ b/go/oasis-test-runner/oasis/fixture.go @@ -284,8 +284,9 @@ func (f *RuntimeFixture) Create(netFixture *NetworkFixture, net *Network) (*Runt // KeymanagerPolicyFixture is a key manager policy fixture. type KeymanagerPolicyFixture struct { - Runtime int `json:"runtime"` - Serial int `json:"serial"` + Runtime int `json:"runtime"` + Serial int `json:"serial"` + MasterSecretRotationInterval beacon.EpochTime `json:"master_secret_rotation_interval,omitempty"` } // Create instantiates the key manager policy described in the fixture. @@ -296,8 +297,9 @@ func (f *KeymanagerPolicyFixture) Create(net *Network) (*KeymanagerPolicy, error } return net.NewKeymanagerPolicy(&KeymanagerPolicyCfg{ - Runtime: runtime, - Serial: f.Serial, + Runtime: runtime, + Serial: f.Serial, + MasterSecretRotationInterval: f.MasterSecretRotationInterval, }) } diff --git a/go/oasis-test-runner/oasis/keymanager.go b/go/oasis-test-runner/oasis/keymanager.go index a5ddd8e7884..c5fafc9ad6c 100644 --- a/go/oasis-test-runner/oasis/keymanager.go +++ b/go/oasis-test-runner/oasis/keymanager.go @@ -7,6 +7,7 @@ import ( "path/filepath" "strconv" + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" "github.com/oasisprotocol/oasis-core/go/common/node" "github.com/oasisprotocol/oasis-core/go/config" @@ -33,14 +34,16 @@ type KeymanagerPolicy struct { statusArgs []string - runtime *Runtime - serial int + runtime *Runtime + serial int + rotationInterval beacon.EpochTime } // KeymanagerPolicyCfg is an Oasis key manager policy document configuration. type KeymanagerPolicyCfg struct { - Runtime *Runtime - Serial int + Runtime *Runtime + Serial int + MasterSecretRotationInterval beacon.EpochTime } func (pol *KeymanagerPolicy) provisionStatusArgs() []string { @@ -56,6 +59,7 @@ func (pol *KeymanagerPolicy) provision() error { "--" + kmCmd.CfgPolicyFile, policyPath, "--" + kmCmd.CfgPolicyID, pol.runtime.ID().String(), "--" + kmCmd.CfgPolicySerial, strconv.Itoa(pol.serial), + "--" + kmCmd.CfgPolicyMasterSecretRotationInterval, strconv.FormatUint(uint64(pol.rotationInterval), 10), } if pol.runtime.teeHardware == node.TEEHardwareIntelSGX { policyArgs = append(policyArgs, []string{ @@ -134,10 +138,11 @@ func (net *Network) NewKeymanagerPolicy(cfg *KeymanagerPolicyCfg) (*KeymanagerPo } newPol := &KeymanagerPolicy{ - net: net, - dir: policyDir, - runtime: cfg.Runtime, - serial: cfg.Serial, + net: net, + dir: policyDir, + runtime: cfg.Runtime, + serial: cfg.Serial, + rotationInterval: cfg.MasterSecretRotationInterval, } net.keymanagerPolicies = append(net.keymanagerPolicies, newPol) diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_upgrade.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_upgrade.go index 874c13877b0..f58483ebf50 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_upgrade.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_upgrade.go @@ -149,7 +149,7 @@ func (sc *KmUpgradeImpl) applyUpgradePolicy(childEnv *env.Env) error { } sc.Logger.Info("initing updated KM policy") - if err := cli.Keymanager.InitPolicy(oldKMRuntime.ID(), 2, enclavePolicies, kmPolicyPath); err != nil { + if err := cli.Keymanager.InitPolicy(oldKMRuntime.ID(), 2, 0, enclavePolicies, kmPolicyPath); err != nil { return err } sc.Logger.Info("signing updated KM policy") diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go index 4c37ed02a4f..34cb4829619 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go @@ -263,7 +263,7 @@ func (sc *Scenario) Fixture() (*oasis.NetworkFixture, error) { {Entity: 1, Consensus: oasis.ConsensusFixture{}}, }, KeymanagerPolicies: []oasis.KeymanagerPolicyFixture{ - {Runtime: 0, Serial: 1}, + {Runtime: 0, Serial: 1, MasterSecretRotationInterval: 0}, }, Keymanagers: []oasis.KeymanagerFixture{ { diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime_dynamic.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime_dynamic.go index 4069379d24f..8db93f4a4eb 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime_dynamic.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime_dynamic.go @@ -165,7 +165,7 @@ func (sc *runtimeDynamicImpl) Run(ctx context.Context, childEnv *env.Env) error } } sc.Logger.Info("initing KM policy") - if err = cli.Keymanager.InitPolicy(kmRt.ID(), 1, enclavePolicies, kmPolicyPath); err != nil { + if err = cli.Keymanager.InitPolicy(kmRt.ID(), 1, 0, enclavePolicies, kmPolicyPath); err != nil { return err } sc.Logger.Info("signing KM policy") diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime_upgrade.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime_upgrade.go index 2195b9d043e..f9f3e60a142 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime_upgrade.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime_upgrade.go @@ -153,7 +153,7 @@ func (sc *runtimeUpgradeImpl) applyUpgradePolicy(childEnv *env.Env) error { } sc.Logger.Info("initing updated KM policy") - if err := cli.Keymanager.InitPolicy(kmRuntime.ID(), 2, enclavePolicies, kmPolicyPath); err != nil { + if err := cli.Keymanager.InitPolicy(kmRuntime.ID(), 2, 0, enclavePolicies, kmPolicyPath); err != nil { return err } sc.Logger.Info("signing updated KM policy") diff --git a/go/oasis-test-runner/scenario/e2e/runtime/trust_root.go b/go/oasis-test-runner/scenario/e2e/runtime/trust_root.go index 9803ac45046..20dadeec1d7 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/trust_root.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/trust_root.go @@ -187,7 +187,7 @@ func (sc *TrustRootImpl) updateKeyManagerPolicy(ctx context.Context, childEnv *e } } sc.Logger.Info("initing KM policy") - if err := cli.Keymanager.InitPolicy(kmRt.ID(), 1, enclavePolicies, kmPolicyPath); err != nil { + if err := cli.Keymanager.InitPolicy(kmRt.ID(), 1, 0, enclavePolicies, kmPolicyPath); err != nil { return err } sc.Logger.Info("signing KM policy") From bff627f84f4ed703c0a4499162491b05cc8c6a35 Mon Sep 17 00:00:00 2001 From: Peter Nose Date: Tue, 28 Feb 2023 11:18:11 +0100 Subject: [PATCH 3/7] keymanager/src/runtime: Support master secret rotations Key managers now have the ability to rotate the master secret at predetermined intervals. Each rotation introduces a new generation, or version, of the master secret that is sequentially numbered, starting from zero. --- .buildkite/code.pipeline.yml | 2 +- .changelog/5196.feature.md | 11 + .../cometbft/apps/keymanager/keymanager.go | 69 +- .../apps/keymanager/keymanager_test.go | 106 +- .../cometbft/apps/keymanager/transactions.go | 2 +- go/keymanager/api/api.go | 44 +- .../scenario/e2e/runtime/keymanager_client.go | 190 ++++ .../e2e/runtime/keymanager_dump_restore.go | 132 +++ .../e2e/runtime/keymanager_ephemeral_keys.go | 148 +-- .../e2e/runtime/keymanager_master_secrets.go | 314 ++++++ .../e2e/runtime/keymanager_replicate.go | 146 +-- .../e2e/runtime/keymanager_restart.go | 40 +- .../scenario/e2e/runtime/keymanager_util.go | 345 +++++++ .../scenario/e2e/runtime/runtime.go | 47 +- .../scenario/e2e/runtime/runtime_client_kv.go | 11 + .../scenario/e2e/runtime/trust_root.go | 21 - go/worker/keymanager/api/api.go | 22 +- go/worker/keymanager/init.go | 8 +- go/worker/keymanager/status.go | 10 +- go/worker/keymanager/worker.go | 536 +++++++--- keymanager/src/api/errors.rs | 8 +- keymanager/src/api/methods.rs | 4 + keymanager/src/api/requests.rs | 41 +- keymanager/src/client/interface.rs | 12 +- keymanager/src/client/mock.rs | 7 +- keymanager/src/client/remote.rs | 12 +- keymanager/src/crypto/kdf.rs | 975 +++++++++++++----- keymanager/src/crypto/packing.rs | 30 - keymanager/src/crypto/types.rs | 9 + keymanager/src/runtime/init.rs | 23 +- keymanager/src/runtime/methods.rs | 270 +++-- 31 files changed, 2776 insertions(+), 819 deletions(-) create mode 100644 .changelog/5196.feature.md create mode 100644 go/oasis-test-runner/scenario/e2e/runtime/keymanager_client.go create mode 100644 go/oasis-test-runner/scenario/e2e/runtime/keymanager_dump_restore.go create mode 100644 go/oasis-test-runner/scenario/e2e/runtime/keymanager_master_secrets.go create mode 100644 go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go diff --git a/.buildkite/code.pipeline.yml b/.buildkite/code.pipeline.yml index e503d83d59f..c1c7d21cba9 100644 --- a/.buildkite/code.pipeline.yml +++ b/.buildkite/code.pipeline.yml @@ -227,7 +227,7 @@ steps: - export CFLAGS_x86_64_fortanix_unknown_sgx="-isystem/usr/include/x86_64-linux-gnu -mlvi-hardening -mllvm -x86-experimental-lvi-inline-asm-hardening" - export CC_x86_64_fortanix_unknown_sgx=clang-11 # Only run runtime scenarios as others do not use SGX. - - .buildkite/scripts/test_e2e.sh --scenario e2e/runtime/runtime-encryption --scenario e2e/runtime/trust-root/.+ --scenario e2e/runtime/keymanager-ephemeral-keys + - .buildkite/scripts/test_e2e.sh --scenario e2e/runtime/runtime-encryption --scenario e2e/runtime/trust-root/.+ --scenario e2e/runtime/keymanager-.+ artifact_paths: - coverage-merged-e2e-*.txt - /tmp/e2e/**/*.log diff --git a/.changelog/5196.feature.md b/.changelog/5196.feature.md new file mode 100644 index 00000000000..bd1756c5e8f --- /dev/null +++ b/.changelog/5196.feature.md @@ -0,0 +1,11 @@ +keymanager/src/runtime: Support master secret rotations + +Key managers now have the ability to rotate the master secret +at predetermined intervals. Each rotation introduces a new generation, +or version, of the master secret that is sequentially numbered, starting +from zero. These rotations occur during key manager status updates, which +typically happen during epoch transitions. To perform a rotation, +one of the key manager enclaves must publish a proposal for the next +generation of the master secret, which must then be replicated by +the majority of enclaves. If the replication process is not completed +by the end of the epoch, the proposal can be replaced with a new one. diff --git a/go/consensus/cometbft/apps/keymanager/keymanager.go b/go/consensus/cometbft/apps/keymanager/keymanager.go index 184b4fec23b..e4fa7922d3a 100644 --- a/go/consensus/cometbft/apps/keymanager/keymanager.go +++ b/go/consensus/cometbft/apps/keymanager/keymanager.go @@ -10,6 +10,7 @@ import ( beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common/cbor" + "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" "github.com/oasisprotocol/oasis-core/go/common/node" "github.com/oasisprotocol/oasis-core/go/consensus/api/transaction" tmapi "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" @@ -24,6 +25,10 @@ import ( // maxEphemeralSecretAge is the maximum age of an ephemeral secret in the number of epochs. const maxEphemeralSecretAge = 20 +// minProposalReplicationPercent is the minimum percentage of enclaves in the key manager committee +// that must replicate the proposal for the next master secret before it is accepted. +const minProposalReplicationPercent = 66 + var emptyHashSha3 = sha3.Sum256(nil) type keymanagerApplication struct { @@ -179,7 +184,16 @@ func (app *keymanagerApplication) onEpochChange(ctx *tmapi.Context, epoch beacon return fmt.Errorf("failed to query key manager status: %w", err) } - newStatus := app.generateStatus(ctx, rt, oldStatus, nodes, params, epoch) + secret, err := state.MasterSecret(ctx, rt.ID) + if err != nil && err != api.ErrNoSuchMasterSecret { + ctx.Logger().Error("failed to query key manager master secret", + "id", rt.ID, + "err", err, + ) + return fmt.Errorf("failed to query key manager master secret: %w", err) + } + + newStatus := app.generateStatus(ctx, rt, oldStatus, secret, nodes, params, epoch) if forceEmit || !bytes.Equal(cbor.Marshal(oldStatus), cbor.Marshal(newStatus)) { ctx.Logger().Debug("status updated", "id", newStatus.ID, @@ -222,10 +236,11 @@ func (app *keymanagerApplication) onEpochChange(ctx *tmapi.Context, epoch beacon return nil } -func (app *keymanagerApplication) generateStatus( +func (app *keymanagerApplication) generateStatus( // nolint: gocyclo ctx *tmapi.Context, kmrt *registry.Runtime, oldStatus *api.Status, + secret *api.SignedEncryptedMasterSecret, nodes []*node.Node, params *registry.ConsensusParameters, epoch beacon.EpochTime, @@ -240,6 +255,19 @@ func (app *keymanagerApplication) generateStatus( Policy: oldStatus.Policy, } + // Data need to count the nodes that have replicated the proposal for the next master secret. + var ( + nextGeneration uint64 + nextChecksum []byte + nextRSK *signature.PublicKey + updatedNodes []signature.PublicKey + ) + nextGeneration = status.NextGeneration() + if secret != nil && secret.Secret.Generation == nextGeneration && secret.Secret.Epoch == epoch { + nextChecksum = secret.Secret.Secret.Checksum + } + + // Compute the policy hash to reject nodes that are not up-to-date. var rawPolicy []byte if status.Policy != nil { rawPolicy = cbor.Marshal(status.Policy) @@ -261,10 +289,11 @@ nextNode: continue } + secretReplicated := true isInitialized := status.IsInitialized isSecure := status.IsSecure - checksum := status.Checksum RSK := status.RSK + nRSK := nextRSK var numVersions int for _, nodeRt := range n.Runtimes { @@ -316,7 +345,6 @@ nextNode: // The first version gets to be the source of truth. isInitialized = true isSecure = initResponse.IsSecure - checksum = initResponse.Checksum } // Skip nodes with mismatched status fields. @@ -324,7 +352,7 @@ nextNode: ctx.Logger().Error("Security status mismatch for runtime", vars...) continue nextNode } - if !bytes.Equal(initResponse.Checksum, checksum) { + if !bytes.Equal(initResponse.Checksum, status.Checksum) { ctx.Logger().Error("Checksum mismatch for runtime", vars...) continue nextNode } @@ -342,6 +370,18 @@ nextNode: continue nextNode } + // Check if all versions have replicated the last master secret, + // derived the same RSK and are ready to move to the next generation. + if !bytes.Equal(initResponse.NextChecksum, nextChecksum) { + secretReplicated = false + } + if nRSK == nil { + nRSK = initResponse.NextRSK + } + if initResponse.NextRSK != nil && !initResponse.NextRSK.Equal(*nRSK) { + secretReplicated = false + } + numVersions++ } @@ -351,19 +391,34 @@ nextNode: if !isInitialized { panic("the key manager must be initialized") } + if secretReplicated { + nextRSK = nRSK + updatedNodes = append(updatedNodes, n.ID) + } // If the key manager is not initialized, the first verified node gets to be the source // of truth, every other node will sync off it. if !status.IsInitialized { status.IsInitialized = true status.IsSecure = isSecure - status.Checksum = checksum } status.RSK = RSK - status.Nodes = append(status.Nodes, n.ID) } + // Accept the proposal if the majority of the nodes have replicated + // the proposal for the next master secret. + if numNodes := len(status.Nodes); numNodes > 0 && nextChecksum != nil { + percent := len(updatedNodes) * 100 / numNodes + if percent > minProposalReplicationPercent { + status.Generation = nextGeneration + status.RotationEpoch = epoch + status.Checksum = nextChecksum + status.RSK = nextRSK + status.Nodes = updatedNodes + } + } + return status } diff --git a/go/consensus/cometbft/apps/keymanager/keymanager_test.go b/go/consensus/cometbft/apps/keymanager/keymanager_test.go index d34df5f28b5..dba2ff30ec2 100644 --- a/go/consensus/cometbft/apps/keymanager/keymanager_test.go +++ b/go/consensus/cometbft/apps/keymanager/keymanager_test.go @@ -50,6 +50,10 @@ func TestGenerateStatus(t *testing.T) { sigInitResponse, err := api.SignInitResponse(rakSigner, &initResponse) require.NoError(t, err, "SignInitResponse") + initResponse.Checksum = nil + sigInitResponseSecure, err := api.SignInitResponse(rakSigner, &initResponse) + require.NoError(t, err, "SignInitResponse") + initResponse.IsSecure = false sigInitResponseInsecure, err := api.SignInitResponse(rakSigner, &initResponse) require.NoError(t, err, "SignInitResponse") @@ -81,11 +85,11 @@ func TestGenerateStatus(t *testing.T) { Version: version.Version{Major: 1, Minor: 0, Patch: 0}, ExtraInfo: cbor.Marshal(sigInitResponseInsecure), }, - // Key manager 1, version 2.0.0 + // Key manager 1, version 2.0.0 (secure enclave) { ID: runtimeIDs[0], Version: version.Version{Major: 2, Minor: 0, Patch: 0}, - ExtraInfo: cbor.Marshal(sigInitResponse), + ExtraInfo: cbor.Marshal(sigInitResponseSecure), }, // Key manager 1, version 3.0.0 { @@ -93,6 +97,12 @@ func TestGenerateStatus(t *testing.T) { Version: version.Version{Major: 3, Minor: 0, Patch: 0}, ExtraInfo: cbor.Marshal(sigInitResponse), }, + // Key manager 1, version 4.0.0 + { + ID: runtimeIDs[0], + Version: version.Version{Major: 4, Minor: 0, Patch: 0}, + ExtraInfo: cbor.Marshal(sigInitResponse), + }, // Key manager 2, version 1.0.0 { ID: runtimeIDs[1], @@ -152,21 +162,21 @@ func TestGenerateStatus(t *testing.T) { ID: memorySigner.NewTestSigner("node 3").Public(), Expiration: uint64(epoch), Roles: node.RoleKeyManager, - Runtimes: nodeRuntimes[5:6], + Runtimes: nodeRuntimes[6:7], }, // The second key manager. { ID: memorySigner.NewTestSigner("node 4").Public(), Expiration: uint64(epoch), Roles: node.RoleKeyManager, - Runtimes: nodeRuntimes[3:5], + Runtimes: nodeRuntimes[4:6], }, // One key manager, incompatible versions. { ID: memorySigner.NewTestSigner("node 5").Public(), Expiration: uint64(epoch), Roles: node.RoleKeyManager, - Runtimes: nodeRuntimes[0:3], + Runtimes: nodeRuntimes[0:4], }, // One key manager, one version (secure = false). { @@ -175,29 +185,36 @@ func TestGenerateStatus(t *testing.T) { Roles: node.RoleKeyManager, Runtimes: nodeRuntimes[0:1], }, - // One key managers, two versions (secure = true). + // One key manager, two versions (secure = true). { ID: memorySigner.NewTestSigner("node 7").Public(), Expiration: uint64(epoch), Roles: node.RoleKeyManager, - Runtimes: nodeRuntimes[1:3], + Runtimes: nodeRuntimes[1:2], }, - // Two key managers, two versions. + // One key manager, two versions (secure = true). { ID: memorySigner.NewTestSigner("node 8").Public(), Expiration: uint64(epoch), Roles: node.RoleKeyManager, - Runtimes: nodeRuntimes[1:5], + Runtimes: nodeRuntimes[2:4], + }, + // Two key managers, two versions. + { + ID: memorySigner.NewTestSigner("node 9").Public(), + Expiration: uint64(epoch), + Roles: node.RoleKeyManager, + Runtimes: nodeRuntimes[2:6], }, } t.Run("No nodes", func(t *testing.T) { require := require.New(t) - newStatus := app.generateStatus(ctx, runtimes[0], uninitializedStatus, nodes[0:6], params, epoch) + newStatus := app.generateStatus(ctx, runtimes[0], uninitializedStatus, nil, nodes[0:6], params, epoch) require.Equal(uninitializedStatus, newStatus, "key manager committee should be empty") - newStatus = app.generateStatus(ctx, runtimes[0], initializedStatus, nodes[0:6], params, epoch) + newStatus = app.generateStatus(ctx, runtimes[0], initializedStatus, nil, nodes[0:6], params, epoch) require.Equal(initializedStatus, newStatus, "key manager committee should be empty") }) @@ -209,70 +226,63 @@ func TestGenerateStatus(t *testing.T) { ID: runtimeIDs[0], IsInitialized: true, IsSecure: false, - Checksum: checksum, Policy: &policy, Nodes: []signature.PublicKey{nodes[6].ID}, } - newStatus := app.generateStatus(ctx, runtimes[0], uninitializedStatus, nodes[6:7], params, epoch) + newStatus := app.generateStatus(ctx, runtimes[0], uninitializedStatus, nil, nodes[6:7], params, epoch) require.Equal(expStatus, newStatus, "node 6 should form the committee if key manager not initialized") - newStatus = app.generateStatus(ctx, runtimes[0], expStatus, nodes[6:7], params, epoch) + newStatus = app.generateStatus(ctx, runtimes[0], expStatus, nil, nodes[6:7], params, epoch) require.Equal(expStatus, newStatus, "node 6 should form the committee if key manager is not secure") expStatus.IsSecure = true + expStatus.Checksum = checksum expStatus.Nodes = nil - newStatus = app.generateStatus(ctx, runtimes[0], initializedStatus, nodes[6:7], params, epoch) - require.Equal(expStatus, newStatus, "node 6 should not be added to the committee if key manager is secure") + newStatus = app.generateStatus(ctx, runtimes[0], initializedStatus, nil, nodes[6:7], params, epoch) + require.Equal(expStatus, newStatus, "node 6 should not be added to the committee if key manager is secure or checksum differs") }) t.Run("Multiple nodes", func(t *testing.T) { require := require.New(t) - // The first node is the source of truth when constructing a committee. If the node 6 is - // processed before nodes 7 and 8, the latter won't be accepted as they are secure. + // The first node is the source of truth when constructing a committee. + // If the node 6 is processed before node 7, the latter won't be accepted as it is secure. + // Nodes 8 and 9 cannot be a part of the committee as their checksum differs. expStatus := &api.Status{ ID: runtimeIDs[0], IsInitialized: true, IsSecure: false, - Checksum: checksum, Policy: &policy, Nodes: []signature.PublicKey{nodes[6].ID}, } - newStatus := app.generateStatus(ctx, runtimes[0], uninitializedStatus, nodes, params, epoch) - require.Equal(expStatus, newStatus, "node 6 should form the committee if node 6 is the source of truth") + newStatus := app.generateStatus(ctx, runtimes[0], uninitializedStatus, nil, nodes, params, epoch) + require.Equal(expStatus, newStatus, "node 6 should be the source of truth and form the committee") // If the order is reversed, it should be the other way around. expStatus.IsSecure = true - expStatus.Nodes = []signature.PublicKey{nodes[8].ID, nodes[7].ID} - newStatus = app.generateStatus(ctx, runtimes[0], uninitializedStatus, reverse(nodes), params, epoch) - require.Equal(expStatus, newStatus, "node 7 and 8 should form the committee if node 8 is the source of truth") + expStatus.Nodes = []signature.PublicKey{nodes[7].ID} + newStatus = app.generateStatus(ctx, runtimes[0], uninitializedStatus, nil, reverse(nodes), params, epoch) + require.Equal(expStatus, newStatus, "node 7 should be the source of truth and form the committee") - // If the key manager is not secure, then nodes 7 and 8 are ignored. - initializedStatus.IsSecure = false - expStatus.IsSecure = false - expStatus.Nodes = []signature.PublicKey{nodes[6].ID} - newStatus = app.generateStatus(ctx, runtimes[0], initializedStatus, reverse(nodes), params, epoch) - require.Equal(expStatus, newStatus, "node 6 should form the committee if key manager is not secure") - - // If the key manager is secure, then node 6 is ignored. - initializedStatus.IsSecure = true - expStatus.IsSecure = true - expStatus.Nodes = []signature.PublicKey{nodes[7].ID, nodes[8].ID} - newStatus = app.generateStatus(ctx, runtimes[0], initializedStatus, nodes, params, epoch) - require.Equal(expStatus, newStatus, "node 7 and 8 should form the committee if key manager is secure") + // If the key manager is already initialized as secure with a checksum, then all nodes + // except 8 and 9 are ignored. + expStatus.Checksum = checksum + expStatus.Nodes = []signature.PublicKey{nodes[8].ID, nodes[9].ID} + newStatus = app.generateStatus(ctx, runtimes[0], initializedStatus, nil, nodes, params, epoch) + require.Equal(expStatus, newStatus, "node 7 and 8 should form the committee if key manager is initialized as secure") // The second key manager. - expStatus.ID = runtimes[1].ID - expStatus.Nodes = []signature.PublicKey{nodes[4].ID, nodes[8].ID} - newStatus = app.generateStatus(ctx, runtimes[1], uninitializedStatus, nodes, params, epoch) - require.Equal(expStatus, newStatus, "node 4 and 8 should form the committee") - - newStatus = app.generateStatus(ctx, runtimes[1], initializedStatus, nodes, params, epoch) - require.Equal(expStatus, newStatus, "node 4 and 8 should form the committee") - - expStatus.Nodes = []signature.PublicKey{nodes[8].ID, nodes[4].ID} - newStatus = app.generateStatus(ctx, runtimes[1], initializedStatus, reverse(nodes), params, epoch) - require.Equal(expStatus, newStatus, "node 4 and 8 should form the committee") + expStatus = &api.Status{ + ID: runtimeIDs[1], + IsInitialized: true, + IsSecure: true, + Policy: &policy, + Checksum: checksum, + Nodes: []signature.PublicKey{nodes[4].ID, nodes[9].ID}, + } + initializedStatus.ID = runtimeIDs[1] + newStatus = app.generateStatus(ctx, runtimes[1], initializedStatus, nil, nodes, params, epoch) + require.Equal(expStatus, newStatus, "node 4 and 9 should form the committee") }) } diff --git a/go/consensus/cometbft/apps/keymanager/transactions.go b/go/consensus/cometbft/apps/keymanager/transactions.go index 65a58e33820..2c2f94ca79b 100644 --- a/go/consensus/cometbft/apps/keymanager/transactions.go +++ b/go/consensus/cometbft/apps/keymanager/transactions.go @@ -94,7 +94,7 @@ func (app *keymanagerApplication) updatePolicy( nodes, _ := regState.Nodes(ctx) registry.SortNodeList(nodes) oldStatus.Policy = sigPol - newStatus := app.generateStatus(ctx, kmRt, oldStatus, nodes, regParams, epoch) + newStatus := app.generateStatus(ctx, kmRt, oldStatus, nil, nodes, regParams, epoch) if err := state.SetStatus(ctx, newStatus); err != nil { ctx.Logger().Error("keymanager: failed to set key manager status", "err", err, diff --git a/go/keymanager/api/api.go b/go/keymanager/api/api.go index aab8bdb5279..56e8ea965f3 100644 --- a/go/keymanager/api/api.go +++ b/go/keymanager/api/api.go @@ -85,9 +85,15 @@ var ( // RPCMethodGetPublicEphemeralKey is the name of the `get_public_ephemeral_key` method. RPCMethodGetPublicEphemeralKey = "get_public_ephemeral_key" + // RPCMethodGenerateMasterSecret is the name of the `generate_master_secret` RPC method. + RPCMethodGenerateMasterSecret = "generate_master_secret" + // RPCMethodGenerateEphemeralSecret is the name of the `generate_ephemeral_secret` RPC method. RPCMethodGenerateEphemeralSecret = "generate_ephemeral_secret" + // RPCMethodLoadMasterSecret is the name of the `load_master_secret` RPC method. + RPCMethodLoadMasterSecret = "load_master_secret" + // RPCMethodLoadEphemeralSecret is the name of the `load_ephemeral_secret` RPC method. RPCMethodLoadEphemeralSecret = "load_ephemeral_secret" @@ -231,10 +237,10 @@ func NewPublishEphemeralSecretTx(nonce uint64, fee *transaction.Fee, sigSec *Sig // InitRequest is the initialization RPC request, sent to the key manager // enclave. type InitRequest struct { - Status *Status `json:"status,omitempty"` // TODO: Change in PR-5205. - Checksum []byte `json:"checksum,omitempty"` // TODO: Remove in PR-5205. - Policy []byte `json:"policy,omitempty"` // TODO: Remove in PR-5205. - MayGenerate bool `json:"may_generate"` + Status *Status `json:"status,omitempty"` // TODO: Change in PR-5205. + Checksum []byte `json:"checksum,omitempty"` // TODO: Remove in PR-5205. + Policy []byte `json:"policy,omitempty"` // TODO: Remove in PR-5205. + MayGenerate bool `json:"may_generate,omitempty"` // TODO: Remove in PR-5205. } // InitResponse is the initialization RPC response, returned as part of a @@ -242,8 +248,10 @@ type InitRequest struct { type InitResponse struct { IsSecure bool `json:"is_secure"` Checksum []byte `json:"checksum"` + NextChecksum []byte `json:"next_checksum,omitempty"` PolicyChecksum []byte `json:"policy_checksum"` RSK *signature.PublicKey `json:"rsk,omitempty"` + NextRSK *signature.PublicKey `json:"next_rsk,omitempty"` } // SignedInitResponse is the signed initialization RPC response, returned @@ -274,6 +282,15 @@ func SignInitResponse(signer signature.Signer, response *InitResponse) (*SignedI }, nil } +// LongTermKeyRequest is the long-term key RPC request, sent to the key manager +// enclave. +type LongTermKeyRequest struct { + Height *uint64 `json:"height"` + ID common.Namespace `json:"runtime_id"` + KeyPairID KeyPairID `json:"key_pair_id"` + Generation uint64 `json:"generation"` +} + // EphemeralKeyRequest is the ephemeral key RPC request, sent to the key manager // enclave. type EphemeralKeyRequest struct { @@ -292,6 +309,19 @@ type SignedPublicKey struct { Expiration *beacon.EpochTime `json:"expiration,omitempty"` } +// GenerateMasterSecretRequest is the generate master secret RPC request, +// sent to the key manager enclave. +type GenerateMasterSecretRequest struct { + Generation uint64 `json:"generation"` + Epoch beacon.EpochTime `json:"epoch"` +} + +// GenerateMasterSecretResponse is the RPC response, returned as part of +// a GenerateMasterSecretRequest from the key manager enclave. +type GenerateMasterSecretResponse struct { + SignedSecret SignedEncryptedMasterSecret `json:"signed_secret"` +} + // GenerateEphemeralSecretRequest is the generate ephemeral secret RPC request, // sent to the key manager enclave. type GenerateEphemeralSecretRequest struct { @@ -304,6 +334,12 @@ type GenerateEphemeralSecretResponse struct { SignedSecret SignedEncryptedEphemeralSecret `json:"signed_secret"` } +// LoadMasterSecretRequest is the load master secret RPC request, +// sent to the key manager enclave. +type LoadMasterSecretRequest struct { + SignedSecret SignedEncryptedMasterSecret `json:"signed_secret"` +} + // LoadEphemeralSecretRequest is the load ephemeral secret RPC request, // sent to the key manager enclave. type LoadEphemeralSecretRequest struct { diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_client.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_client.go new file mode 100644 index 00000000000..9587d881f9e --- /dev/null +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_client.go @@ -0,0 +1,190 @@ +package runtime + +import ( + "context" + "crypto/rand" + "fmt" + "time" + + "github.com/cenkalti/backoff/v4" + "github.com/libp2p/go-libp2p" + "github.com/libp2p/go-libp2p/core/host" + "github.com/libp2p/go-libp2p/core/peer" + "github.com/multiformats/go-multiaddr" + + "github.com/oasisprotocol/curve25519-voi/primitives/x25519" + + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" + "github.com/oasisprotocol/oasis-core/go/common/cbor" + "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" + "github.com/oasisprotocol/oasis-core/go/common/crypto/signature/signers/memory" + keymanager "github.com/oasisprotocol/oasis-core/go/keymanager/api" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis" + p2p "github.com/oasisprotocol/oasis-core/go/p2p/api" + "github.com/oasisprotocol/oasis-core/go/p2p/protocol" + "github.com/oasisprotocol/oasis-core/go/p2p/rpc" + enclaverpc "github.com/oasisprotocol/oasis-core/go/runtime/enclaverpc/api" + kmp2p "github.com/oasisprotocol/oasis-core/go/worker/keymanager/p2p" +) + +type keyManagerRPCClient struct { + host host.Host + client rpc.Client +} + +func newKeyManagerRPCClient(chainContext string) (*keyManagerRPCClient, error) { + signer, err := memory.NewFactory().Generate(signature.SignerP2P, rand.Reader) + if err != nil { + return nil, err + } + + listenAddr, err := multiaddr.NewMultiaddr("/ip4/0.0.0.0/tcp/0") + if err != nil { + return nil, err + } + + host, err := libp2p.New( + libp2p.ListenAddrs(listenAddr), + libp2p.Identity(p2p.SignerToPrivKey(signer)), + ) + if err != nil { + return nil, err + } + + pid := protocol.NewRuntimeProtocolID(chainContext, keymanagerID, kmp2p.KeyManagerProtocolID, kmp2p.KeyManagerProtocolVersion) + client := rpc.NewClient(host, pid) + + return &keyManagerRPCClient{ + host: host, + client: client, + }, nil +} + +func (c *keyManagerRPCClient) addKeyManagerAddrToHost(km *oasis.Keymanager) (peer.ID, error) { + identity, err := km.LoadIdentity() + if err != nil { + return "", err + } + + peerID, err := p2p.PublicKeyToPeerID(identity.P2PSigner.Public()) + if err != nil { + return "", err + } + + listenAddr, err := multiaddr.NewMultiaddr(fmt.Sprintf("/ip4/0.0.0.0/tcp/%d", km.P2PPort())) + if err != nil { + return "", err + } + + c.host.Peerstore().AddAddr(peerID, listenAddr, time.Hour) + + return peerID, nil +} + +func (c *keyManagerRPCClient) fetchPublicKey(ctx context.Context, generation uint64, peerID peer.ID) (*x25519.PublicKey, error) { + args := keymanager.LongTermKeyRequest{ + Height: nil, + ID: keymanagerID, + KeyPairID: keymanager.KeyPairID{1, 2, 3}, + Generation: generation, + } + + req := enclaverpc.Request{ + Method: keymanager.RPCMethodGetPublicKey, + Args: args, + } + + p2pReq := kmp2p.CallEnclaveRequest{ + Kind: enclaverpc.KindInsecureQuery, + Data: cbor.Marshal(req), + } + + var p2pRsp kmp2p.CallEnclaveResponse + _, err := c.client.Call(ctx, peerID, kmp2p.MethodCallEnclave, p2pReq, &p2pRsp) + if err != nil { + return nil, err + } + + var rsp enclaverpc.Response + if err = cbor.Unmarshal(p2pRsp.Data, &rsp); err != nil { + return nil, err + } + + if rsp.Body.Error != nil { + msg := *rsp.Body.Error + if msg == fmt.Sprintf("master secret generation %d not found", generation) { + return nil, nil + } + return nil, fmt.Errorf(msg) + } + + var key keymanager.SignedPublicKey + if err = cbor.Unmarshal(rsp.Body.Success, &key); err != nil { + return nil, err + } + + return &key.Key, nil +} + +func (c *keyManagerRPCClient) fetchEphemeralPublicKey(ctx context.Context, epoch beacon.EpochTime, peerID peer.ID) (*x25519.PublicKey, error) { + args := keymanager.EphemeralKeyRequest{ + Height: nil, + ID: keymanagerID, + KeyPairID: keymanager.KeyPairID{1, 2, 3}, + Epoch: epoch, + } + + req := enclaverpc.Request{ + Method: keymanager.RPCMethodGetPublicEphemeralKey, + Args: args, + } + + p2pReq := kmp2p.CallEnclaveRequest{ + Kind: enclaverpc.KindInsecureQuery, + Data: cbor.Marshal(req), + } + + var p2pRsp kmp2p.CallEnclaveResponse + _, err := c.client.Call(ctx, peerID, kmp2p.MethodCallEnclave, p2pReq, &p2pRsp) + if err != nil { + return nil, err + } + + var rsp enclaverpc.Response + if err = cbor.Unmarshal(p2pRsp.Data, &rsp); err != nil { + return nil, err + } + + if rsp.Body.Error != nil { + msg := *rsp.Body.Error + if msg == fmt.Sprintf("ephemeral secret for epoch %d not found", epoch) { + return nil, nil + } + return nil, fmt.Errorf(msg) + } + + var key keymanager.SignedPublicKey + if err = cbor.Unmarshal(rsp.Body.Success, &key); err != nil { + return nil, err + } + + return &key.Key, nil +} + +func (c *keyManagerRPCClient) fetchEphemeralPublicKeyWithRetry(ctx context.Context, epoch beacon.EpochTime, peerID peer.ID) (*x25519.PublicKey, error) { + var ( + err error + key *x25519.PublicKey + ) + + retry := backoff.WithContext(backoff.WithMaxRetries(backoff.NewConstantBackOff(time.Second), 5), ctx) + err = backoff.Retry(func() error { + key, err = c.fetchEphemeralPublicKey(ctx, epoch, peerID) + return err + }, retry) + if err != nil { + return nil, err + } + + return key, err +} diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_dump_restore.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_dump_restore.go new file mode 100644 index 00000000000..15bfd59aa84 --- /dev/null +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_dump_restore.go @@ -0,0 +1,132 @@ +package runtime + +import ( + "context" + "fmt" + + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/env" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/scenario" +) + +// KeymanagerDumpRestore is the keymanager dump restore rotation scenario. +// +// In this scenario we test if the deployment of the master secret rotation +// feature is backwards compatible. The old key managers which are already +// initialized with the first master secret should be able to rotate secrets +// once enabled via the policy. +var KeymanagerDumpRestore scenario.Scenario = newKmDumpRestoreImpl() + +type kmDumpRestoreImpl struct { + Scenario + + nonce uint64 +} + +func newKmDumpRestoreImpl() scenario.Scenario { + return &kmDumpRestoreImpl{ + Scenario: *NewScenario( + "keymanager-dump-restore", + NewKVTestClient().WithScenario(InsertRemoveKeyValueEncScenario), + ), + } +} + +func (sc *kmDumpRestoreImpl) Fixture() (*oasis.NetworkFixture, error) { + f, err := sc.Scenario.Fixture() + if err != nil { + return nil, err + } + + // Speed up the test. + f.Network.Beacon.VRFParameters = &beacon.VRFParameters{ + Interval: 10, + ProofSubmissionDelay: 2, + } + + // Compute workers are not needed. + f.ComputeWorkers = []oasis.ComputeWorkerFixture{} + + // Test requires multiple key managers. + f.Keymanagers = []oasis.KeymanagerFixture{ + {Runtime: 0, Entity: 1}, + {Runtime: 0, Entity: 1}, + } + + return f, nil +} + +func (sc *kmDumpRestoreImpl) Clone() scenario.Scenario { + return &kmDumpRestoreImpl{ + Scenario: *sc.Scenario.Clone().(*Scenario), + } +} + +func (sc *kmDumpRestoreImpl) Run(ctx context.Context, childEnv *env.Env) (err error) { // nolint: gocyclo + // Start the network. + if err = sc.StartNetworkAndWaitForClientSync(ctx); err != nil { + return err + } + + // Wait until the first master secret is generated. + if _, err = sc.waitMasterSecret(ctx, 0); err != nil { + return err + } + + // Dump/restore should erase the last master secret and leave the key manager initialized. + fixture, err := sc.Fixture() + if err != nil { + return err + } + for i := range fixture.Keymanagers { + fixture.Keymanagers[i].NoAutoStart = true + } + if err = sc.DumpRestoreNetwork(childEnv, fixture, false, nil, nil); err != nil { + return err + } + + // Start the network. + if err = sc.StartNetworkAndWaitForClientSync(ctx); err != nil { + return err + } + + // Make sure the last secret was not preserved. + secret, err := sc.keymanagerMasterSecret(ctx) + if err != nil { + return err + } + if secret != nil { + return fmt.Errorf("dump/restore should not preserve the master secret proposal") + } + + // Make sure the manager is initialized. + status, err := sc.keymanagerStatus(ctx) + if err != nil { + return err + } + if !status.IsInitialized || len(status.Checksum) == 0 || status.Generation != 0 { + return fmt.Errorf("key manager should be initialized") + } + + // Start both key manager nodes. + if err = sc.startAndWaitKeymanagers(ctx, []int{0, 1}); err != nil { + return err + } + + // Test master secret rotations. To enable them, update the rotation interval in the policy. + if err = sc.updateRotationInterval(ctx, sc.nonce, childEnv, 1); err != nil { + return err + } + sc.nonce++ + if _, err = sc.waitMasterSecret(ctx, 3); err != nil { + return err + } + + // Test if all key managers can derive keys from all master secrets. + if err = sc.compareLongtermPublicKeys(ctx, []int{0, 1}); err != nil { + return err + } + + return nil +} diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_ephemeral_keys.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_ephemeral_keys.go index 83ac0c15e2d..35c263ff35a 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_ephemeral_keys.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_ephemeral_keys.go @@ -2,36 +2,24 @@ package runtime import ( "context" - "crypto/rand" "fmt" "reflect" "time" - "github.com/cenkalti/backoff/v4" - "github.com/libp2p/go-libp2p" - "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/peer" - "github.com/multiformats/go-multiaddr" "github.com/oasisprotocol/curve25519-voi/primitives/x25519" beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/cbor" - "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" - "github.com/oasisprotocol/oasis-core/go/common/crypto/signature/signers/memory" "github.com/oasisprotocol/oasis-core/go/common/node" consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" keymanager "github.com/oasisprotocol/oasis-core/go/keymanager/api" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/env" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/scenario" - p2p "github.com/oasisprotocol/oasis-core/go/p2p/api" - "github.com/oasisprotocol/oasis-core/go/p2p/protocol" - "github.com/oasisprotocol/oasis-core/go/p2p/rpc" registry "github.com/oasisprotocol/oasis-core/go/registry/api" - enclaverpc "github.com/oasisprotocol/oasis-core/go/runtime/enclaverpc/api" - kmp2p "github.com/oasisprotocol/oasis-core/go/worker/keymanager/p2p" ) // KeymanagerEphemeralKeys is the keymanager ephemeral secret and ephemeral @@ -113,19 +101,19 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error if err != nil { return err } - rpcClient, rpcHost, err := sc.keyManagerRPCClient(chainContext) + rpcClient, err := newKeyManagerRPCClient(chainContext) if err != nil { return err } - firstKmPeerID, err := sc.addKeyManagerAddrToHost(firstKm, rpcHost) + firstKmPeerID, err := rpcClient.addKeyManagerAddrToHost(firstKm) if err != nil { return err } - secondKmPeerID, err := sc.addKeyManagerAddrToHost(secondKm, rpcHost) + secondKmPeerID, err := rpcClient.addKeyManagerAddrToHost(secondKm) if err != nil { return err } - thirdKmPeerID, err := sc.addKeyManagerAddrToHost(thirdKm, rpcHost) + thirdKmPeerID, err := rpcClient.addKeyManagerAddrToHost(thirdKm) if err != nil { return err } @@ -166,7 +154,7 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error "epoch", sigSecret.Secret.Epoch-1, ) - key, err := sc.fetchEphemeralPublicKey(ctx, sigSecret.Secret.Epoch-1, firstKmPeerID, rpcClient) + key, err := rpcClient.fetchEphemeralPublicKey(ctx, sigSecret.Secret.Epoch-1, firstKmPeerID) if err != nil { return err } @@ -181,7 +169,7 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error "epoch", sigSecret.Secret.Epoch, ) - key, err = sc.fetchEphemeralPublicKeyWithRetry(ctx, sigSecret.Secret.Epoch, firstKmPeerID, rpcClient) + key, err = rpcClient.fetchEphemeralPublicKeyWithRetry(ctx, sigSecret.Secret.Epoch, firstKmPeerID) if err != nil { return err } @@ -204,7 +192,7 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error sc.Logger.Info("testing ephemeral keys - restart", "epoch", sigSecret.Secret.Epoch, ) - key, err = sc.fetchEphemeralPublicKeyWithRetry(ctx, sigSecret.Secret.Epoch, firstKmPeerID, rpcClient) + key, err = rpcClient.fetchEphemeralPublicKeyWithRetry(ctx, sigSecret.Secret.Epoch, firstKmPeerID) if err != nil { return err } @@ -244,7 +232,7 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error } // Fetch public key which will be used to test replication. - key, err = sc.fetchEphemeralPublicKeyWithRetry(ctx, sigSecret.Secret.Epoch, firstKmPeerID, rpcClient) + key, err = rpcClient.fetchEphemeralPublicKeyWithRetry(ctx, sigSecret.Secret.Epoch, firstKmPeerID) if err != nil { return err } @@ -290,7 +278,7 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error sc.Logger.Info("testing ephemeral keys - replication", "epoch", sigSecret.Secret.Epoch, ) - keyCopy, err := sc.fetchEphemeralPublicKey(ctx, sigSecret.Secret.Epoch, secondKmPeerID, rpcClient) + keyCopy, err := rpcClient.fetchEphemeralPublicKey(ctx, sigSecret.Secret.Epoch, secondKmPeerID) if err != nil { return err } @@ -304,7 +292,7 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error sc.Logger.Info("testing ephemeral keys - replication", "epoch", sigSecret.Secret.Epoch, ) - keyCopy, err = sc.fetchEphemeralPublicKey(ctx, sigSecret.Secret.Epoch, thirdKmPeerID, rpcClient) + keyCopy, err = rpcClient.fetchEphemeralPublicKey(ctx, sigSecret.Secret.Epoch, thirdKmPeerID) if err != nil { return err } @@ -363,7 +351,7 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error ) for _, peerID := range []peer.ID{firstKmPeerID, secondKmPeerID, thirdKmPeerID} { - key, err = sc.fetchEphemeralPublicKeyWithRetry(ctx, epoch, peerID, rpcClient) + key, err = rpcClient.fetchEphemeralPublicKeyWithRetry(ctx, epoch, peerID) if err != nil { return fmt.Errorf("fetching ephemeral key should succeed") } @@ -605,117 +593,3 @@ func (sc *kmEphemeralKeysImpl) checkNumberOfKeyManagers(ctx context.Context, n i return nil } - -func (sc *kmEphemeralKeysImpl) keyManagerRPCClient(chainContext string) (rpc.Client, host.Host, error) { - signer, err := memory.NewFactory().Generate(signature.SignerP2P, rand.Reader) - if err != nil { - return nil, nil, err - } - - listenAddr, err := multiaddr.NewMultiaddr("/ip4/0.0.0.0/tcp/0") - if err != nil { - return nil, nil, err - } - - host, err := libp2p.New( - libp2p.ListenAddrs(listenAddr), - libp2p.Identity(p2p.SignerToPrivKey(signer)), - ) - if err != nil { - return nil, nil, err - } - - pid := protocol.NewRuntimeProtocolID(chainContext, keymanagerID, kmp2p.KeyManagerProtocolID, kmp2p.KeyManagerProtocolVersion) - rc := rpc.NewClient(host, pid) - - return rc, host, nil -} - -func (sc *kmEphemeralKeysImpl) addKeyManagerAddrToHost(km *oasis.Keymanager, host host.Host) (peer.ID, error) { - identity, err := km.LoadIdentity() - if err != nil { - return "", err - } - - peerID, err := p2p.PublicKeyToPeerID(identity.P2PSigner.Public()) - if err != nil { - return "", err - } - - listenAddr, err := multiaddr.NewMultiaddr(fmt.Sprintf("/ip4/0.0.0.0/tcp/%d", km.P2PPort())) - if err != nil { - return "", err - } - - host.Peerstore().AddAddr(peerID, listenAddr, time.Hour) - - return peerID, nil -} - -func (sc *kmEphemeralKeysImpl) fetchEphemeralPublicKey(ctx context.Context, epoch beacon.EpochTime, peerID peer.ID, rc rpc.Client) (*x25519.PublicKey, error) { - args := keymanager.EphemeralKeyRequest{ - Height: nil, - ID: keymanagerID, - KeyPairID: keymanager.KeyPairID{1, 2, 3}, - Epoch: epoch, - } - - req := enclaverpc.Request{ - Method: keymanager.RPCMethodGetPublicEphemeralKey, - Args: args, - } - - p2pReq := kmp2p.CallEnclaveRequest{ - Kind: enclaverpc.KindInsecureQuery, - Data: cbor.Marshal(req), - } - - var p2pRsp kmp2p.CallEnclaveResponse - _, err := rc.Call(ctx, peerID, kmp2p.MethodCallEnclave, p2pReq, &p2pRsp) - if err != nil { - return nil, err - } - - var rsp enclaverpc.Response - if err = cbor.Unmarshal(p2pRsp.Data, &rsp); err != nil { - return nil, err - } - - if rsp.Body.Error != nil { - msg := *rsp.Body.Error - if msg == fmt.Sprintf("ephemeral secret for epoch %d not found", epoch) { - return nil, nil - } - return nil, fmt.Errorf(msg) - } - - var key keymanager.SignedPublicKey - if err = cbor.Unmarshal(rsp.Body.Success, &key); err != nil { - return nil, err - } - - return &key.Key, nil -} - -func (sc *kmEphemeralKeysImpl) fetchEphemeralPublicKeyWithRetry(ctx context.Context, epoch beacon.EpochTime, peerID peer.ID, rc rpc.Client) (*x25519.PublicKey, error) { - var ( - err error - key *x25519.PublicKey - ) - - retry := backoff.WithContext(backoff.WithMaxRetries(backoff.NewConstantBackOff(time.Second), 5), ctx) - err = backoff.Retry(func() error { - key, err = sc.fetchEphemeralPublicKey(ctx, epoch, peerID, rc) - if err != nil { - sc.Logger.Warn("failed to fetch ephemeral public key", - "err", err, - ) - } - return err - }, retry) - if err != nil { - return nil, err - } - - return key, err -} diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_master_secrets.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_master_secrets.go new file mode 100644 index 00000000000..439be62ef4b --- /dev/null +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_master_secrets.go @@ -0,0 +1,314 @@ +package runtime + +import ( + "context" + "encoding/base64" + "fmt" + + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" + keymanager "github.com/oasisprotocol/oasis-core/go/keymanager/api" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/env" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/scenario" +) + +// KeymanagerMasterSecrets is the keymanager master secret rotation scenario. +var KeymanagerMasterSecrets scenario.Scenario = newKmMasterSecretsImpl() + +type kmMasterSecretsImpl struct { + Scenario + + nonce uint64 +} + +func newKmMasterSecretsImpl() scenario.Scenario { + return &kmMasterSecretsImpl{ + Scenario: *NewScenario( + "keymanager-master-secrets", + NewKVTestClient().WithScenario(InsertRemoveKeyValueEncScenario), + ), + } +} + +func (sc *kmMasterSecretsImpl) Fixture() (*oasis.NetworkFixture, error) { + f, err := sc.Scenario.Fixture() + if err != nil { + return nil, err + } + + // Speed up the test. + f.Network.Beacon.VRFParameters = &beacon.VRFParameters{ + Interval: 10, + ProofSubmissionDelay: 2, + } + + // Test requires multiple key managers. + f.Keymanagers = []oasis.KeymanagerFixture{ + {Runtime: 0, Entity: 1}, + {Runtime: 0, Entity: 1}, + {Runtime: 0, Entity: 1}, + } + + return f, nil +} + +func (sc *kmMasterSecretsImpl) Clone() scenario.Scenario { + return &kmMasterSecretsImpl{ + Scenario: *sc.Scenario.Clone().(*Scenario), + } +} + +func (sc *kmMasterSecretsImpl) Run(ctx context.Context, childEnv *env.Env) (err error) { // nolint: gocyclo + // Start the network. + if err = sc.Net.Start(); err != nil { + return err + } + + // Verify in the background that all published master secrets are unique. + stop, err := sc.monitorMasterSecrets(ctx) + if err != nil { + return err + } + defer func() { + if err2 := stop(); err == nil { + err = err2 + } + }() + + // Test that only one master secret is generated if rotations are disabled. + if _, err = sc.waitMasterSecret(ctx, 0); err != nil { + return fmt.Errorf("master secret not generated: %w", err) + } + if err = sc.waitEpochs(ctx, 5); err != nil { + return err + } + + sc.Logger.Info("verifying that exactly one master secret has been generated") + + status, err := sc.keymanagerStatus(ctx) + if err != nil { + return err + } + if !status.IsInitialized || len(status.Checksum) == 0 || status.Generation != 0 { + return fmt.Errorf("exactly one master secret should be generated if rotation is disabled %+v", status) + } + secret, err := sc.keymanagerMasterSecret(ctx) + if err != nil { + return err + } + if secret.Secret.Generation != 0 { + return fmt.Errorf("the last master secret should have generation zero") + } + + // Enable master secret rotations. + if err = sc.updateRotationInterval(ctx, sc.nonce, childEnv, 1); err != nil { + return err + } + sc.nonce++ + if _, err = sc.waitMasterSecret(ctx, 3); err != nil { + return err + } + + // Test if all key managers can derive keys from all master secrets. + if err = sc.compareLongtermPublicKeys(ctx, []int{0, 1, 2}); err != nil { + return err + } + + // Test master secrets if only two/one manager is running. + if err = sc.stopKeymanagers(ctx, []int{2}); err != nil { + return err + } + if _, err = sc.waitMasterSecret(ctx, 4); err != nil { + return err + } + if err = sc.stopKeymanagers(ctx, []int{1}); err != nil { + return err + } + if _, err = sc.waitMasterSecret(ctx, 6); err != nil { + return err + } + + // Check how frequently secrets are rotated. + interval := beacon.EpochTime(3) + if err = sc.updateRotationInterval(ctx, sc.nonce, childEnv, interval); err != nil { + return err + } + sc.nonce++ + prev, err := sc.waitMasterSecret(ctx, 7) + if err != nil { + return err + } + next, err := sc.waitMasterSecret(ctx, 8) + if err != nil { + return err + } + if diff := next.RotationEpoch - prev.RotationEpoch; diff != interval { + return fmt.Errorf("rotation interval is not correct: expected %d got %d", interval, diff) + } + + // Disable master secret rotations. + if err = sc.updateRotationInterval(ctx, sc.nonce, childEnv, 0); err != nil { + return err + } + sc.nonce++ + if err = sc.waitEpochs(ctx, 3); err != nil { + return err + } + + // No more secrets should be generated. + status, err = sc.keymanagerStatus(ctx) + if err != nil { + return err + } + if status.Generation != next.Generation { + return fmt.Errorf("master secret rotations should be disabled: got %d, expected %d", status.Generation, next.Generation) + } + + return nil +} + +func (sc *kmMasterSecretsImpl) monitorMasterSecrets(ctx context.Context) (func() error, error) { + sc.Logger.Info("started watching master secrets to see if they are unique and ordered") + + total := 0 + secretsOk := true + statusesOk := true + checksums := make(map[string]struct{}) + stopCh := make(chan struct{}) + + cancel := func() error { + stopCh <- struct{}{} + stopCh <- struct{}{} + unique := len(checksums) + + sc.Logger.Info("stopped watching master secrets to see if they are unique and ordered", + "unique", unique, + "total", total, + ) + + switch { + case total == 0: + return fmt.Errorf("no master secrets published") + case unique != total: + return fmt.Errorf("master secrets not unique: unique %d, total %d,", unique, total) + case !secretsOk: + return fmt.Errorf("invalid master secrets") + case !statusesOk: + return fmt.Errorf("invalid key manager statuses") + default: + return nil + } + } + + // Monitor proposed secrets. + go func() { + mstCh, mstSub, err := sc.Net.ClientController().Keymanager.WatchMasterSecrets(ctx) + if err != nil { + return + } + defer mstSub.Close() + + var prev, next *keymanager.SignedEncryptedMasterSecret + for { + select { + case <-stopCh: + return + case next = <-mstCh: + } + + if next.Secret.ID != keymanagerID { + continue + } + + sc.Logger.Info("master secret published", + "generation", next.Secret.Generation, + "epoch", next.Secret.Epoch, + "ciphertexts", len(next.Secret.Secret.Ciphertexts), + ) + + total++ + checksums[base64.StdEncoding.EncodeToString(next.Secret.Secret.Checksum)] = struct{}{} + + switch prev { + case nil: + if next.Secret.Generation != 0 { + sc.Logger.Error("master secrets should start with zero generation", + "generation", next.Secret.Generation, + ) + secretsOk = false + } + default: + if prev.Secret.Generation != next.Secret.Generation && prev.Secret.Generation != next.Secret.Generation-1 { + sc.Logger.Error("master secret generations should be ordered", + "prev", prev.Secret.Generation, + "next", next.Secret.Generation, + ) + secretsOk = false + } + if prev.Secret.Epoch >= next.Secret.Epoch { + sc.Logger.Error("master secret epochs should be ordered", + "prev", prev.Secret.Epoch, + "next", next.Secret.Epoch, + ) + secretsOk = false + } + } + prev = next + } + }() + + // Monitor accepted secrets. + go func() { + stCh, stSub, err := sc.Net.ClientController().Keymanager.WatchStatuses(ctx) + if err != nil { + return + } + defer stSub.Close() + + var prev, next *keymanager.Status + for { + select { + case <-stopCh: + return + case next = <-stCh: + } + + if next.ID != keymanagerID { + continue + } + + sc.Logger.Info("key manager status updated", + "generation", next.Generation, + "rotation_epoch", next.RotationEpoch, + ) + + switch prev { + case nil: + if next.Generation != 0 { + sc.Logger.Error("status should start with zero generation", + "generation", next.Generation, + ) + statusesOk = false + } + default: + if prev.Generation != next.Generation && prev.Generation != next.Generation-1 { + sc.Logger.Error("status should have ordered master secrets", + "prev", prev.Generation, + "next", next.Generation, + ) + statusesOk = false + } + if prev.Generation != next.Generation && prev.RotationEpoch >= next.RotationEpoch { + sc.Logger.Error("status should have ordered rotation epochs", + "prev", prev.RotationEpoch, + "next", next.RotationEpoch, + ) + statusesOk = false + } + } + prev = next + } + }() + + return cancel, nil +} diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_replicate.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_replicate.go index 24b99f121c4..e1d3598db2d 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_replicate.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_replicate.go @@ -5,13 +5,12 @@ import ( "context" "fmt" - "github.com/oasisprotocol/oasis-core/go/common/cbor" - "github.com/oasisprotocol/oasis-core/go/common/version" - keymanager "github.com/oasisprotocol/oasis-core/go/keymanager/api" + "golang.org/x/exp/slices" + + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/env" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/scenario" - registry "github.com/oasisprotocol/oasis-core/go/registry/api" ) // KeymanagerReplicate is the keymanager replication scenario. @@ -42,96 +41,115 @@ func (sc *kmReplicateImpl) Fixture() (*oasis.NetworkFixture, error) { return nil, err } + // Speed up the test. + f.Network.Beacon.VRFParameters = &beacon.VRFParameters{ + Interval: 10, + ProofSubmissionDelay: 2, + } + + // We don't need compute workers. + f.ComputeWorkers = []oasis.ComputeWorkerFixture{} + // This requires multiple keymanagers. f.Keymanagers = []oasis.KeymanagerFixture{ - {Runtime: 0, Entity: 1}, - {Runtime: 0, Entity: 1}, + {Runtime: 0, Entity: 1, Policy: 0}, + {Runtime: 0, Entity: 1, Policy: 0}, + {Runtime: 0, Entity: 1, Policy: 0, NodeFixture: oasis.NodeFixture{NoAutoStart: true}}, + {Runtime: 0, Entity: 1, Policy: 0, NodeFixture: oasis.NodeFixture{NoAutoStart: true}}, } + // Enable master secret rotation. + f.KeymanagerPolicies[0].MasterSecretRotationInterval = 1 + return f, nil } func (sc *kmReplicateImpl) Run(ctx context.Context, childEnv *env.Env) error { - if err := sc.StartNetworkAndTestClient(ctx, childEnv); err != nil { + // Start the first two key managers. + if err := sc.Net.Start(); err != nil { return err } - // Wait for the client to exit. - if err := sc.WaitTestClientOnly(); err != nil { - return err + // Wait until 3 master secrets are generated. + if _, err := sc.waitMasterSecret(ctx, 2); err != nil { + return fmt.Errorf("master secret not generated: %w", err) } - // Open a control connection to the replica. - if kmLen := len(sc.Net.Keymanagers()); kmLen < 2 { - return fmt.Errorf("expected more than 1 keymanager, have: %v", kmLen) + // Make sure exactly two key managers were generating secrets. + status, err := sc.keymanagerStatus(ctx) + if err != nil { + return err + } + if len(status.Nodes) != 2 { + return fmt.Errorf("key manager committee should consist of two nodes") } - replica := sc.Net.Keymanagers()[1] - ctrl, err := oasis.NewController(replica.SocketPath()) - if err != nil { + // Stop the second manager. + // Upon restarting, its master secrets will be partially synchronized (3 out of 6). + if err = sc.Net.Keymanagers()[1].Stop(); err != nil { return err } - // Extract the replica's ExtraInfo. - node, err := ctrl.Registry.GetNode( - ctx, - ®istry.IDQuery{ - ID: replica.NodeID, - }, - ) + // Generate another 3 master secrets. + if _, err = sc.waitMasterSecret(ctx, 5); err != nil { + return fmt.Errorf("master secret not generated: %w", err) + } + + // Make sure the first key manager was generating secrets. + status, err = sc.keymanagerStatus(ctx) if err != nil { return err } - rt := node.GetRuntime(keymanagerID, version.Version{}) - if rt == nil { - return fmt.Errorf("replica is missing keymanager runtime from descriptor") - } - var signedInitResponse keymanager.SignedInitResponse - if err = cbor.Unmarshal(rt.ExtraInfo, &signedInitResponse); err != nil { - return fmt.Errorf("failed to unmarshal replica extrainfo") + if len(status.Nodes) != 1 { + return fmt.Errorf("key manager committee should consist of one node") } - // Grab a state dump and cross check the checksum with that of - // the replica. - doc, err := ctrl.Consensus.StateToGenesis(ctx, 0) - if err != nil { - return fmt.Errorf("failed to obtain consensus state: %w", err) - } - if err = func() error { - for _, status := range doc.KeyManager.Statuses { - if !status.ID.Equal(&keymanagerID) { - continue - } - if !status.IsInitialized { - return fmt.Errorf("key manager failed to initialize") - } - if !bytes.Equal(status.Checksum, signedInitResponse.InitResponse.Checksum) { - return fmt.Errorf("key manager failed to replicate, checksum mismatch") - } - return nil - } - return fmt.Errorf("consensus state missing km status") - }(); err != nil { + // Start key managers that are not running and wait until they replicate + // master secrets from the first one. + if err = sc.startAndWaitKeymanagers(ctx, []int{1, 2, 3}); err != nil { return err } - // Since the replica has published an ExtraInfo that shows that it has - // the correct master secret checksum, the replication process has - // succeeded from the enclave's point of view. + // If the replication was successful, the next key manager committee should + // consist of all nodes. + if status, err = sc.waitKeymanagerStatuses(ctx, 2); err != nil { + return err + } + if !status.IsInitialized { + return fmt.Errorf("key manager failed to initialize") + } + if len(status.Nodes) != len(sc.Net.Keymanagers()) { + return fmt.Errorf("key manager committee should consist of all nodes") + } + for _, km := range sc.Net.Keymanagers() { + if !slices.Contains(status.Nodes, km.NodeID) { + return fmt.Errorf("node missing from key manager status") + } + } - // Query the node's keymanager consensus endpoint. - status, err := ctrl.Keymanager.GetStatus(ctx, ®istry.NamespaceQuery{ - ID: keymanagerID, - }) - if err != nil { + // Wait few blocks so that the key managers transition to the new secret and register + // with the latest checksum. The latter can take some time. + if _, err = sc.waitBlocks(ctx, 8); err != nil { return err } - for _, v := range status.Nodes { - // And ensure that the node is present. - if v.Equal(replica.NodeID) { - return nil + + // Check if checksums match. + for idx := range sc.Net.Keymanagers() { + initRsp, err := sc.keymanagerInitResponse(ctx, idx) + if err != nil { + return err + } + if !bytes.Equal(initRsp.Checksum, status.Checksum) { + return fmt.Errorf("key manager checksum mismatch") } } - return fmt.Errorf("node missing from km status") + // If we came this far than all key managers should have the same state. + // Let's test if they replicated the same secrets by fetching long-term + // public keys for all generations. + if err := sc.compareLongtermPublicKeys(ctx, []int{0, 1, 2, 3}); err != nil { + return err + } + + return nil } diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_restart.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_restart.go index 371c261b928..a43ce437cb1 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_restart.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_restart.go @@ -2,7 +2,9 @@ package runtime import ( "context" + "fmt" + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/env" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/scenario" @@ -30,9 +32,25 @@ func (sc *kmRestartImpl) Fixture() (*oasis.NetworkFixture, error) { return nil, err } + // Speed up the test. + f.Network.Beacon.VRFParameters = &beacon.VRFParameters{ + Interval: 10, + ProofSubmissionDelay: 2, + } + + // This requires multiple keymanagers. + f.Keymanagers = []oasis.KeymanagerFixture{ + {Runtime: 0, Entity: 1, Policy: 0}, + {Runtime: 0, Entity: 1, Policy: 0}, + {Runtime: 0, Entity: 1, Policy: 0}, + } + // The round is allowed to fail until the keymanager becomes available after restart. f.Network.DefaultLogWatcherHandlerFactories = nil + // Enable master secret rotation. + f.KeymanagerPolicies[0].MasterSecretRotationInterval = 1 + return f, nil } @@ -52,22 +70,18 @@ func (sc *kmRestartImpl) Run(ctx context.Context, childEnv *env.Env) error { return err } - // XXX: currently assumes single keymanager. - km := sc.Net.Keymanagers()[0] - - // Restart the key manager. - sc.Logger.Info("restarting the key manager") - if err := km.Restart(ctx); err != nil { - return err + // Wait until 3 master secrets are generated. + if _, err := sc.waitMasterSecret(ctx, 2); err != nil { + return fmt.Errorf("master secret not generated: %w", err) } - // Wait for the key manager to be ready. - sc.Logger.Info("waiting for the key manager to become ready") - kmCtrl, err := oasis.NewController(km.SocketPath()) - if err != nil { + // Restart the key managers. + if err := sc.restartAndWaitKeymanagers(ctx, []int{0, 1, 2}); err != nil { return err } - if err = kmCtrl.WaitReady(ctx); err != nil { + + // Test if rotations still work. + if _, err := sc.waitMasterSecret(ctx, 5); err != nil { return err } @@ -75,7 +89,7 @@ func (sc *kmRestartImpl) Run(ctx context.Context, childEnv *env.Env) error { // a second trip to the keymanager. sc.Logger.Info("starting a second client to check if key manager works") sc.Scenario.testClient = NewKVTestClient().WithSeed("seed2").WithScenario(InsertRemoveKeyValueEncScenarioV2) - if err = sc.startTestClientOnly(ctx, childEnv); err != nil { + if err := sc.startTestClientOnly(ctx, childEnv); err != nil { return err } return sc.waitTestClient() diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go new file mode 100644 index 00000000000..d90d890f6aa --- /dev/null +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go @@ -0,0 +1,345 @@ +package runtime + +import ( + "bytes" + "context" + "fmt" + "os" + "path/filepath" + + "github.com/oasisprotocol/curve25519-voi/primitives/x25519" + + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" + "github.com/oasisprotocol/oasis-core/go/common/cbor" + "github.com/oasisprotocol/oasis-core/go/common/sgx" + "github.com/oasisprotocol/oasis-core/go/common/version" + consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" + keymanager "github.com/oasisprotocol/oasis-core/go/keymanager/api" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/env" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis/cli" + registry "github.com/oasisprotocol/oasis-core/go/registry/api" +) + +func (sc *Scenario) waitKeymanagers(ctx context.Context, idxs []int) error { + sc.Logger.Info("waiting for the key managers to become ready", "ids", fmt.Sprintf("%+v", idxs)) + + kms := sc.Net.Keymanagers() + for _, idx := range idxs { + kmCtrl, err := oasis.NewController(kms[idx].SocketPath()) + if err != nil { + return err + } + if err = kmCtrl.WaitReady(ctx); err != nil { + return err + } + } + return nil +} + +func (sc *Scenario) startKeymanagers(ctx context.Context, idxs []int) error { + sc.Logger.Info("starting the key managers", "ids", fmt.Sprintf("%+v", idxs)) + + kms := sc.Net.Keymanagers() + for _, idx := range idxs { + if err := kms[idx].Start(); err != nil { + return err + } + } + return nil +} + +func (sc *Scenario) stopKeymanagers(ctx context.Context, idxs []int) error { + sc.Logger.Info("stopping the key managers", "ids", fmt.Sprintf("%+v", idxs)) + + kms := sc.Net.Keymanagers() + for _, idx := range idxs { + if err := kms[idx].Stop(); err != nil { + return err + } + } + return nil +} + +func (sc *Scenario) restartKeymanagers(ctx context.Context, idxs []int) error { + sc.Logger.Info("restarting the key managers", "ids", fmt.Sprintf("%+v", idxs)) + + kms := sc.Net.Keymanagers() + for _, idx := range idxs { + if err := kms[idx].Restart(ctx); err != nil { + return err + } + } + return nil +} + +func (sc *Scenario) startAndWaitKeymanagers(ctx context.Context, idxs []int) error { + if err := sc.startKeymanagers(ctx, idxs); err != nil { + return err + } + return sc.waitKeymanagers(ctx, idxs) +} + +func (sc *Scenario) restartAndWaitKeymanagers(ctx context.Context, idxs []int) error { + if err := sc.restartKeymanagers(ctx, idxs); err != nil { + return err + } + return sc.waitKeymanagers(ctx, idxs) +} + +func (sc *Scenario) keymanagerStatus(ctx context.Context) (*keymanager.Status, error) { + return sc.Net.ClientController().Keymanager.GetStatus(ctx, ®istry.NamespaceQuery{ + Height: consensus.HeightLatest, + ID: keymanagerID, + }) +} + +func (sc *Scenario) keymanagerMasterSecret(ctx context.Context) (*keymanager.SignedEncryptedMasterSecret, error) { + secret, err := sc.Net.ClientController().Keymanager.GetMasterSecret(ctx, ®istry.NamespaceQuery{ + Height: consensus.HeightLatest, + ID: keymanagerID, + }) + if err == keymanager.ErrNoSuchMasterSecret { + return nil, nil + } + return secret, err +} + +func (sc *Scenario) keymanagerInitResponse(ctx context.Context, idx int) (*keymanager.InitResponse, error) { + kms := sc.Net.Keymanagers() + if kmLen := len(kms); kmLen <= idx { + return nil, fmt.Errorf("expected more than %d keymanager, have: %v", idx, kmLen) + } + km := kms[idx] + + ctrl, err := oasis.NewController(km.SocketPath()) + if err != nil { + return nil, err + } + + // Extract ExtraInfo. + node, err := ctrl.Registry.GetNode( + ctx, + ®istry.IDQuery{ + ID: km.NodeID, + }, + ) + if err != nil { + return nil, err + } + rt := node.GetRuntime(keymanagerID, version.Version{}) + if rt == nil { + return nil, fmt.Errorf("key manager is missing keymanager runtime from descriptor") + } + var signedInitResponse keymanager.SignedInitResponse + if err = cbor.Unmarshal(rt.ExtraInfo, &signedInitResponse); err != nil { + return nil, fmt.Errorf("failed to unmarshal extrainfo") + } + + return &signedInitResponse.InitResponse, nil +} + +func (sc *kmReplicateImpl) waitKeymanagerStatuses(ctx context.Context, n int) (*keymanager.Status, error) { + sc.Logger.Info("waiting for key manager status", "n", n) + + stCh, stSub, err := sc.Net.Controller().Keymanager.WatchStatuses(ctx) + if err != nil { + return nil, err + } + defer stSub.Close() + + for { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case status := <-stCh: + if !status.ID.Equal(&keymanagerID) { + continue + } + n-- + if n <= 0 { + return status, nil + } + } + } +} + +func (sc *Scenario) waitMasterSecret(ctx context.Context, generation uint64) (*keymanager.Status, error) { + sc.Logger.Info("waiting for master secret", "generation", generation) + + mstCh, mstSub, err := sc.Net.Controller().Keymanager.WatchMasterSecrets(ctx) + if err != nil { + return nil, err + } + defer mstSub.Close() + + stCh, stSub, err := sc.Net.Controller().Keymanager.WatchStatuses(ctx) + if err != nil { + return nil, err + } + defer stSub.Close() + + var last *keymanager.Status + for { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case secret := <-mstCh: + if !secret.Secret.ID.Equal(&keymanagerID) { + continue + } + + sc.Logger.Info("master secret proposed", + "generation", secret.Secret.Generation, + "epoch", secret.Secret.Epoch, + ) + case status := <-stCh: + if !status.ID.Equal(&keymanagerID) { + continue + } + if status.NextGeneration() == 0 { + continue + } + if last != nil && status.Generation == last.Generation { + last = status + continue + } + + sc.Logger.Info("master secret rotation", + "generation", status.Generation, + "rotation_epoch", status.RotationEpoch, + ) + + if status.Generation >= generation { + return status, nil + } + last = status + } + } +} + +func (sc *Scenario) updateRotationInterval(ctx context.Context, nonce uint64, childEnv *env.Env, rotationInterval beacon.EpochTime) error { + sc.Logger.Info("updating master secret rotation interval in the key manager policy", + "interval", rotationInterval, + ) + + status, err := sc.keymanagerStatus(ctx) + if err != nil { + return err + } + + // Update the policy, or create a new one if it doesn't already exist. + var policy keymanager.PolicySGX + if status != nil && status.Policy != nil { + policy = status.Policy.Policy + policy.Serial++ + } else { + policy.Serial = 1 + policy.ID = keymanagerID + policy.Enclaves = make(map[sgx.EnclaveIdentity]*keymanager.EnclavePolicySGX) + } + policy.MasterSecretRotationInterval = rotationInterval + + // Sign and publish the new policy. + kmPolicyPath := filepath.Join(childEnv.Dir(), "km_policy.cbor") + kmPolicySig1Path := filepath.Join(childEnv.Dir(), "km_policy_sig1.pem") + kmPolicySig2Path := filepath.Join(childEnv.Dir(), "km_policy_sig2.pem") + kmPolicySig3Path := filepath.Join(childEnv.Dir(), "km_policy_sig3.pem") + kmUpdateTxPath := filepath.Join(childEnv.Dir(), "km_gen_update.json") + + sc.Logger.Info("saving key manager policy") + raw := cbor.Marshal(policy) + if err = os.WriteFile(kmPolicyPath, raw, 0o644); err != nil { // nolint: gosec + return err + } + + sc.Logger.Info("signing key manager policy") + cli := cli.New(childEnv, sc.Net, sc.Logger) + if err := cli.Keymanager.SignPolicy("1", kmPolicyPath, kmPolicySig1Path); err != nil { + return err + } + if err := cli.Keymanager.SignPolicy("2", kmPolicyPath, kmPolicySig2Path); err != nil { + return err + } + if err := cli.Keymanager.SignPolicy("3", kmPolicyPath, kmPolicySig3Path); err != nil { + return err + } + + sc.Logger.Info("updating key manager policy") + if err := cli.Keymanager.GenUpdate(nonce, kmPolicyPath, []string{kmPolicySig1Path, kmPolicySig2Path, kmPolicySig3Path}, kmUpdateTxPath); err != nil { + return err + } + if err := cli.Consensus.SubmitTx(kmUpdateTxPath); err != nil { + return fmt.Errorf("failed to update key manager policy: %w", err) + } + + return nil +} + +func (sc *Scenario) compareLongtermPublicKeys(ctx context.Context, idxs []int) error { + chainContext, err := sc.Net.Controller().Consensus.GetChainContext(ctx) + if err != nil { + return err + } + + status, err := sc.keymanagerStatus(ctx) + if err != nil { + return err + } + + var generation uint64 + if status.Generation > 0 { + // Avoid verification problems when the consensus verifier is one block behind. + generation = status.Generation - 1 + } + + sc.Logger.Info("comparing the key managers for master secrets", + "ids", idxs, + "generation", generation, + ) + + keys := make(map[uint64]*x25519.PublicKey) + kms := sc.Net.Keymanagers() + for _, idx := range idxs { + km := kms[idx] + + // Prepare an RPC client which will be used to query key manager nodes + // for public ephemeral keys. + rpcClient, err := newKeyManagerRPCClient(chainContext) + if err != nil { + return err + } + peerID, err := rpcClient.addKeyManagerAddrToHost(km) + if err != nil { + return err + } + + for gen := uint64(0); gen <= generation; gen++ { + sc.Logger.Info("fetching public key", "generation", gen, "node", km.Name) + + var key *x25519.PublicKey + key, err = rpcClient.fetchPublicKey(ctx, gen, peerID) + switch { + case err != nil: + return err + case key == nil: + return fmt.Errorf("master secret generation %d not found", gen) + } + + if expected, ok := keys[gen]; ok && !bytes.Equal(expected[:], key[:]) { + return fmt.Errorf("derived keys don't match: expected %+X, given %+X", expected, key) + } + keys[gen] = key + + sc.Logger.Info("public key fetched", "key", fmt.Sprintf("%+X", key)) + } + if err != nil { + return err + } + } + if expected, size := int(generation)+1, len(keys); expected != size { + return fmt.Errorf("the number of derived keys doesn't match: expected %d, found %d", expected, size) + } + + return nil +} diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go index 34cb4829619..439f1e9a0c7 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go @@ -13,6 +13,7 @@ import ( memorySigner "github.com/oasisprotocol/oasis-core/go/common/crypto/signature/signers/memory" "github.com/oasisprotocol/oasis-core/go/common/node" "github.com/oasisprotocol/oasis-core/go/common/sgx" + consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" "github.com/oasisprotocol/oasis-core/go/consensus/api/transaction" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/cmd" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/env" @@ -623,6 +624,43 @@ func (sc *Scenario) waitNodesSynced(ctx context.Context) error { return nil } +func (sc *Scenario) waitBlocks(ctx context.Context, n int) (*consensus.Block, error) { + sc.Logger.Info("waiting for blocks", "n", n) + + blockCh, blockSub, err := sc.Net.Controller().Consensus.WatchBlocks(ctx) + if err != nil { + return nil, err + } + defer blockSub.Close() + + var blk *consensus.Block + for i := 0; i < n; i++ { + select { + case blk = <-blockCh: + sc.Logger.Info("new block", + "height", blk.Height, + ) + case <-ctx.Done(): + return nil, fmt.Errorf("timed out waiting for blocks") + } + } + + return blk, nil +} + +func (sc *Scenario) waitEpochs(ctx context.Context, n beacon.EpochTime) error { + sc.Logger.Info("waiting few epochs", "n", n) + + epoch, err := sc.Net.ClientController().Beacon.GetEpoch(ctx, consensus.HeightLatest) + if err != nil { + return err + } + if err := sc.Net.ClientController().Beacon.WaitEpoch(ctx, epoch+n); err != nil { + return err + } + return nil +} + func (sc *Scenario) initialEpochTransitions(ctx context.Context, fixture *oasis.NetworkFixture) (beacon.EpochTime, error) { return sc.initialEpochTransitionsWith(ctx, fixture, 0) } @@ -779,12 +817,13 @@ func RegisterScenarios() error { StorageEarlyStateSync, // Sentry test. Sentry, - // Keymanager ephemeral keys test. + // Keymanager tests. + KeymanagerMasterSecrets, KeymanagerEphemeralKeys, - // Keymanager restart test. + KeymanagerDumpRestore, KeymanagerRestart, - // Keymanager replicate test. KeymanagerReplicate, + KeymanagerUpgrade, // Dump/restore test. DumpRestore, DumpRestoreRuntimeRoundAdvance, @@ -811,8 +850,6 @@ func RegisterScenarios() error { TxSourceMultiShort, // Late start test. LateStart, - // KeymanagerUpgrade test. - KeymanagerUpgrade, // RuntimeUpgrade test. RuntimeUpgrade, // HistoryReindex test. diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime_client_kv.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime_client_kv.go index 2ffb662dfc7..86483a0cb38 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime_client_kv.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime_client_kv.go @@ -103,6 +103,17 @@ func (cli *KVTestClient) workload(ctx context.Context) error { return err } + cli.sc.Logger.Info("waiting for key managers to generate the first master secret") + + if _, err = cli.sc.waitMasterSecret(ctx, 0); err != nil { + return fmt.Errorf("first master secret not generated: %w", err) + } + // The CometBFT verifier is one block behind, so wait for an additional + // two blocks to ensure that the first secret has been loaded. + if _, err = cli.sc.waitBlocks(ctx, 2); err != nil { + return fmt.Errorf("failed to wait two blocks: %w", err) + } + cli.sc.Logger.Info("starting k/v runtime test client") if err := cli.scenario(func(req interface{}) error { diff --git a/go/oasis-test-runner/scenario/e2e/runtime/trust_root.go b/go/oasis-test-runner/scenario/e2e/runtime/trust_root.go index 20dadeec1d7..4ed230d5f48 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/trust_root.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/trust_root.go @@ -214,27 +214,6 @@ func (sc *TrustRootImpl) updateKeyManagerPolicy(ctx context.Context, childEnv *e return nil } -func (sc *TrustRootImpl) waitBlocks(ctx context.Context, n int) (*consensus.Block, error) { - sc.Logger.Info("waiting for a block") - - blockCh, blockSub, err := sc.Net.Controller().Consensus.WatchBlocks(ctx) - if err != nil { - return nil, err - } - defer blockSub.Close() - - var blk *consensus.Block - for i := 0; i < n; i++ { - select { - case blk = <-blockCh: - case <-ctx.Done(): - return nil, fmt.Errorf("timed out waiting for blocks") - } - } - - return blk, nil -} - func (sc *TrustRootImpl) chainContext(ctx context.Context) (string, error) { sc.Logger.Info("fetching consensus chain context") diff --git a/go/worker/keymanager/api/api.go b/go/worker/keymanager/api/api.go index c82fbc2a7ab..9eb2c288f7f 100644 --- a/go/worker/keymanager/api/api.go +++ b/go/worker/keymanager/api/api.go @@ -114,18 +114,32 @@ type WorkerStatus struct { // PolicyChecksum is the checksum of the key manager policy. PolicyChecksum []byte `json:"policy_checksum"` + // MasterSecrets are the master secret generation and replication stats. + MasterSecrets MasterSecretStats `json:"master_secrets"` // EphemeralSecrets are the ephemeral secret generation and replication stats. EphemeralSecrets EphemeralSecretStats `json:"ephemeral_secrets"` } +// MasterSecretStats are the master secret generation and replication stats. +type MasterSecretStats struct { + // NumLoaded is the number of loaded secrets. + NumLoaded int `json:"num_loaded"` + // LastLoaded is the generation of the last loaded secret. + LastLoaded uint64 `json:"last_loaded_generation"` + // NumGenerated is the number of generated secrets. + NumGenerated int `json:"num_generated"` + // LastGenerated is the generation of the last generated secret. + LastGenerated uint64 `json:"last_generated_generation"` +} + // EphemeralSecretStats are the ephemeral secret generation and replication stats. type EphemeralSecretStats struct { - // NumLoaded is the number of loaded ephemeral secrets. + // NumLoaded is the number of loaded secrets. NumLoaded int `json:"num_loaded"` - // LastLoaded is the epoch of the last loaded ephemeral secret. + // LastLoaded is the epoch of the last loaded secret. LastLoaded beacon.EpochTime `json:"last_loaded_epoch"` - // NumGenerated is the number of generated ephemeral secrets. + // NumGenerated is the number of generated secrets. NumGenerated int `json:"num_generated"` - // LastGenerated is the epoch of the last generated ephemeral secret. + // LastGenerated is the epoch of the last generated secret. LastGenerated beacon.EpochTime `json:"last_generated_epoch"` } diff --git a/go/worker/keymanager/init.go b/go/worker/keymanager/init.go index 443e8fa786a..2464082a7eb 100644 --- a/go/worker/keymanager/init.go +++ b/go/worker/keymanager/init.go @@ -57,10 +57,10 @@ func New( enabled: enabled, mayGenerate: config.GlobalConfig.Keymanager.MayGenerate, initEnclaveDoneCh: make(chan *api.SignedInitResponse, 1), - loadSecretCh: make(chan struct{}, 1), - genSecretCh: make(chan struct{}, 1), - genSecretDoneCh: make(chan bool, 1), - genSecretHeight: int64(math.MaxInt64), + genMstSecDoneCh: make(chan bool, 1), + genMstSecEpoch: math.MaxUint64, + genEphSecDoneCh: make(chan bool, 1), + genSecHeight: int64(math.MaxInt64), } if !w.enabled { diff --git a/go/worker/keymanager/status.go b/go/worker/keymanager/status.go index 7a53a2427d1..668ebcfa4fc 100644 --- a/go/worker/keymanager/status.go +++ b/go/worker/keymanager/status.go @@ -62,13 +62,6 @@ func (w *Worker) GetStatus(ctx context.Context) (*api.Status, error) { pc = w.enclaveStatus.InitResponse.PolicyChecksum } - es := api.EphemeralSecretStats{ - NumLoaded: w.numLoadedSecrets, - LastLoaded: w.lastLoadedSecret, - NumGenerated: w.numGeneratedSecrets, - LastGenerated: w.lastGeneratedSecret, - } - gs := w.globalStatus ws := api.WorkerStatus{ Status: ss, @@ -78,7 +71,8 @@ func (w *Worker) GetStatus(ctx context.Context) (*api.Status, error) { PrivatePeers: ps, Policy: w.policy, PolicyChecksum: pc, - EphemeralSecrets: es, + MasterSecrets: w.masterSecretStats, + EphemeralSecrets: w.ephemeralSecretStats, } return &api.Status{ diff --git a/go/worker/keymanager/worker.go b/go/worker/keymanager/worker.go index 6a24fe9e385..b2b9f6d4e95 100644 --- a/go/worker/keymanager/worker.go +++ b/go/worker/keymanager/worker.go @@ -37,15 +37,16 @@ import ( runtimeRegistry "github.com/oasisprotocol/oasis-core/go/runtime/registry" scheduler "github.com/oasisprotocol/oasis-core/go/scheduler/api" workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" + workerKeymanager "github.com/oasisprotocol/oasis-core/go/worker/keymanager/api" "github.com/oasisprotocol/oasis-core/go/worker/registration" ) const ( rpcCallTimeout = 2 * time.Second - loadEphemeralSecretMaxRetries = 5 - generateEphemeralSecretMaxRetries = 5 - ephemeralSecretCacheSize = 20 + generateSecretMaxRetries = 5 + loadSecretMaxRetries = 5 + ephemeralSecretCacheSize = 20 ) var ( @@ -93,10 +94,8 @@ type Worker struct { // nolint: maligned enclaveStatus *api.SignedInitResponse policy *api.SignedPolicySGX - numLoadedSecrets int - lastLoadedSecret beacon.EpochTime - numGeneratedSecrets int - lastGeneratedSecret beacon.EpochTime + masterSecretStats workerKeymanager.MasterSecretStats + ephemeralSecretStats workerKeymanager.EphemeralSecretStats enabled bool mayGenerate bool @@ -110,15 +109,22 @@ type Worker struct { // nolint: maligned initEnclaveRetryCh <-chan time.Time initEnclaveRetryTicker *backoff.Ticker - secrets []*api.SignedEncryptedEphemeralSecret + mstSecret *api.SignedEncryptedMasterSecret - loadSecretCh chan struct{} - loadSecretRetry int - genSecretCh chan struct{} - genSecretDoneCh chan bool - genSecretHeight int64 - genSecretInProgress bool - genSecretRetry int + loadMstSecRetry int + genMstSecDoneCh chan bool + genMstSecEpoch beacon.EpochTime + genMstSecInProgress bool + genMstSecRetry int + + ephSecrets []*api.SignedEncryptedEphemeralSecret + + loadEphSecRetry int + genEphSecDoneCh chan bool + genEphSecInProgress bool + genEphSecRetry int + + genSecHeight int64 } func (w *Worker) Name() string { @@ -170,6 +176,12 @@ func (w *Worker) Initialized() <-chan struct{} { } func (w *Worker) CallEnclave(ctx context.Context, data []byte, kind enclaverpc.Kind) ([]byte, error) { + select { + case <-w.initCh: + default: + return nil, fmt.Errorf("not initialized") + } + switch kind { case enclaverpc.KindNoiseSession: // Handle access control as only peers on the access list can call this method. @@ -212,13 +224,6 @@ func (w *Worker) CallEnclave(ctx context.Context, data []byte, kind enclaverpc.K ctx, cancel := context.WithTimeout(ctx, rpcCallTimeout) defer cancel() - // Wait for initialization to complete. - select { - case <-w.initCh: - case <-ctx.Done(): - return nil, ctx.Err() - } - req := &protocol.Body{ RuntimeRPCCallRequest: &protocol.RuntimeRPCCallRequest{ Request: data, @@ -226,7 +231,7 @@ func (w *Worker) CallEnclave(ctx context.Context, data []byte, kind enclaverpc.K }, } - // NOTE: Hosted runtime should not be nil as we wait for initialization above. + // Hosted runtime should not be nil as we are initialized. rt := w.GetHostedRuntime() response, err := rt.Call(ctx, req) if err != nil { @@ -262,6 +267,7 @@ func (w *Worker) localCallEnclave(method string, args interface{}, rsp interface response, err := rt.Call(w.ctx, body) if err != nil { w.logger.Error("failed to dispatch local RPC call to runtime", + "method", method, "err", err, ) return err @@ -270,6 +276,7 @@ func (w *Worker) localCallEnclave(method string, args interface{}, rsp interface resp := response.RuntimeLocalRPCCallResponse if resp == nil { w.logger.Error("malformed response from runtime", + "method", method, "response", response, ) return errMalformedResponse @@ -322,8 +329,7 @@ func (w *Worker) initEnclave(kmStatus *api.Status, rtStatus *runtimeStatus) (*ap var args api.InitRequest if rtInfo.Features != nil && rtInfo.Features.KeyManagerMasterSecretRotation { args = api.InitRequest{ - Status: kmStatus, - MayGenerate: w.mayGenerate, + Status: kmStatus, } } else { var policy []byte @@ -371,8 +377,10 @@ func (w *Worker) initEnclave(kmStatus *api.Status, rtStatus *runtimeStatus) (*ap w.logger.Info("key manager enclave initialized", "is_secure", signedInitResp.InitResponse.IsSecure, "checksum", hex.EncodeToString(signedInitResp.InitResponse.Checksum), + "next_checksum", hex.EncodeToString(signedInitResp.InitResponse.NextChecksum), "policy_checksum", hex.EncodeToString(signedInitResp.InitResponse.PolicyChecksum), "rsk", signedInitResp.InitResponse.RSK, + "next_rsk", signedInitResp.InitResponse.NextRSK, ) // Cache the key manager enclave status and the currently active policy. @@ -390,11 +398,12 @@ func (w *Worker) initEnclave(kmStatus *api.Status, rtStatus *runtimeStatus) (*ap } func (w *Worker) registerNode(rsp *api.SignedInitResponse) { - w.logger.Info("registering key manager using the latest init response", + w.logger.Info("registering key manager", "is_secure", rsp.InitResponse.IsSecure, "checksum", hex.EncodeToString(rsp.InitResponse.Checksum), "policy_checksum", hex.EncodeToString(rsp.InitResponse.PolicyChecksum), "rsk", rsp.InitResponse.RSK, + "next_rsk", rsp.InitResponse.NextRSK, ) // Register as we are now ready to handle requests. @@ -428,20 +437,36 @@ func (w *Worker) setStatus(status *api.Status) { w.globalStatus = status } -func (w *Worker) setLastGeneratedSecretEpoch(epoch beacon.EpochTime) { +func (w *Worker) setLastGeneratedMasterSecretGeneration(generation uint64) { w.Lock() defer w.Unlock() - w.numGeneratedSecrets++ - w.lastGeneratedSecret = epoch + w.masterSecretStats.NumGenerated++ + w.masterSecretStats.LastGenerated = generation } -func (w *Worker) setLastLoadedSecretEpoch(epoch beacon.EpochTime) { +func (w *Worker) setLastLoadedMasterSecretGeneration(generation uint64) { w.Lock() defer w.Unlock() - w.numLoadedSecrets++ - w.lastLoadedSecret = epoch + w.masterSecretStats.NumLoaded++ + w.masterSecretStats.LastLoaded = generation +} + +func (w *Worker) setLastGeneratedEphemeralSecretEpoch(epoch beacon.EpochTime) { + w.Lock() + defer w.Unlock() + + w.ephemeralSecretStats.NumGenerated++ + w.ephemeralSecretStats.LastGenerated = epoch +} + +func (w *Worker) setLastLoadedEphemeralSecretEpoch(epoch beacon.EpochTime) { + w.Lock() + defer w.Unlock() + + w.ephemeralSecretStats.NumLoaded++ + w.ephemeralSecretStats.LastLoaded = epoch } func (w *Worker) addClientRuntimeWatcher(n common.Namespace, crw *clientRuntimeWatcher) { @@ -601,13 +626,90 @@ func (w *Worker) setAccessList(runtimeID common.Namespace, nodes []*node.Node) { ) } -func (w *Worker) generateEphemeralSecret(runtimeID common.Namespace, epoch beacon.EpochTime, kmStatus *api.Status, runtimeStatus *runtimeStatus) error { +func (w *Worker) generateMasterSecret(runtimeID common.Namespace, generation uint64, epoch beacon.EpochTime, kmStatus *api.Status, rtStatus *runtimeStatus) error { + w.logger.Info("generating master secret", + "generation", generation, + "epoch", epoch, + ) + // Check if the master secret has been proposed in this epoch. + // Note that despite this check, the nodes can still publish master secrets at the same time. + lastSecret, err := w.commonWorker.Consensus.KeyManager().GetMasterSecret(w.ctx, ®istry.NamespaceQuery{ + Height: consensus.HeightLatest, + ID: runtimeID, + }) + if err != nil && err != api.ErrNoSuchMasterSecret { + return err + } + if lastSecret != nil && epoch == lastSecret.Secret.Epoch { + return fmt.Errorf("master secret can be proposed once per epoch") + } + + // Check if rotation is allowed. + if err = kmStatus.VerifyRotationEpoch(epoch); err != nil { + return err + } + + // Skip generation if the node is not in the key manager committee. + id := w.commonWorker.Identity.NodeSigner.Public() + if !slices.Contains(kmStatus.Nodes, id) { + w.logger.Info("skipping master secret generation, node not in the key manager committee") + return fmt.Errorf("node not in the key manager committee") + } + + // Generate master secret. + args := api.GenerateMasterSecretRequest{ + Generation: generation, + Epoch: epoch, + } + + var rsp api.GenerateMasterSecretResponse + if err = w.localCallEnclave(api.RPCMethodGenerateMasterSecret, args, &rsp); err != nil { + w.logger.Error("failed to generate master secret", + "err", err, + ) + return fmt.Errorf("failed to generate master secret: %w", err) + } + + // Fetch key manager runtime details. + kmRt, err := w.commonWorker.Consensus.Registry().GetRuntime(w.ctx, ®istry.GetRuntimeQuery{ + Height: consensus.HeightLatest, + ID: kmStatus.ID, + }) + if err != nil { + return err + } + + rak, err := w.runtimeAttestationKey(rtStatus, kmRt) + if err != nil { + return err + } + + reks, err := w.runtimeEncryptionKeys(kmStatus, kmRt) + if err != nil { + return err + } + + // Verify the response. + if err = rsp.SignedSecret.Verify(generation, epoch, reks, rak); err != nil { + return fmt.Errorf("failed to validate master secret signature: %w", err) + } + + // Publish transaction. + tx := api.NewPublishMasterSecretTx(0, nil, &rsp.SignedSecret) + if err = consensus.SignAndSubmitTx(w.ctx, w.commonWorker.Consensus, w.commonWorker.Identity.NodeSigner, tx); err != nil { + return err + } + + return err +} + +func (w *Worker) generateEphemeralSecret(runtimeID common.Namespace, epoch beacon.EpochTime, kmStatus *api.Status, rtStatus *runtimeStatus) error { w.logger.Info("generating ephemeral secret", "epoch", epoch, ) - // Check if secret has been published. Note that despite this check, the nodes can still publish - // ephemeral secrets at the same time. + // Check if the ephemeral secret has been published in this epoch. + // Note that despite this check, the nodes can still publish ephemeral secrets at the same time. _, err := w.commonWorker.Consensus.KeyManager().GetEphemeralSecret(w.ctx, ®istry.NamespaceEpochQuery{ Height: consensus.HeightLatest, ID: runtimeID, @@ -655,25 +757,52 @@ func (w *Worker) generateEphemeralSecret(runtimeID common.Namespace, epoch beaco return err } - // Fetch RAK. + rak, err := w.runtimeAttestationKey(rtStatus, kmRt) + if err != nil { + return err + } + + reks, err := w.runtimeEncryptionKeys(kmStatus, kmRt) + if err != nil { + return err + } + + // Verify the response. + if err = rsp.SignedSecret.Verify(epoch, reks, rak); err != nil { + return fmt.Errorf("failed to validate ephemeral secret signature: %w", err) + } + + // Publish transaction. + tx := api.NewPublishEphemeralSecretTx(0, nil, &rsp.SignedSecret) + if err = consensus.SignAndSubmitTx(w.ctx, w.commonWorker.Consensus, w.commonWorker.Identity.NodeSigner, tx); err != nil { + return err + } + + return err +} + +func (w *Worker) runtimeAttestationKey(rtStatus *runtimeStatus, kmRt *registry.Runtime) (*signature.PublicKey, error) { var rak *signature.PublicKey switch kmRt.TEEHardware { case node.TEEHardwareInvalid: rak = &api.InsecureRAK case node.TEEHardwareIntelSGX: - if runtimeStatus.capabilityTEE == nil { - return fmt.Errorf("node doesn't have TEE capability") + if rtStatus.capabilityTEE == nil { + return nil, fmt.Errorf("node doesn't have TEE capability") } - rak = &runtimeStatus.capabilityTEE.RAK + rak = &rtStatus.capabilityTEE.RAK default: - return fmt.Errorf("TEE hardware mismatch") + return nil, fmt.Errorf("TEE hardware mismatch") } - // Fetch REKs of the key manager committee. + return rak, nil +} + +func (w *Worker) runtimeEncryptionKeys(kmStatus *api.Status, kmRt *registry.Runtime) (map[x25519.PublicKey]struct{}, error) { reks := make(map[x25519.PublicKey]struct{}) for _, id := range kmStatus.Nodes { var n *node.Node - n, err = w.commonWorker.Consensus.Registry().GetNode(w.ctx, ®istry.IDQuery{ + n, err := w.commonWorker.Consensus.Registry().GetNode(w.ctx, ®istry.IDQuery{ Height: consensus.HeightLatest, ID: id, }) @@ -682,7 +811,7 @@ func (w *Worker) generateEphemeralSecret(runtimeID common.Namespace, epoch beaco case registry.ErrNoSuchNode: continue default: - return err + return nil, err } idx := slices.IndexFunc(n.Runtimes, func(rt *node.Runtime) bool { @@ -711,18 +840,28 @@ func (w *Worker) generateEphemeralSecret(runtimeID common.Namespace, epoch beaco reks[rek] = struct{}{} } - // Verify the response. - if err = rsp.SignedSecret.Verify(epoch, reks, rak); err != nil { - return fmt.Errorf("failed to validate generate ephemeral secret response signature: %w", err) + return reks, nil +} + +func (w *Worker) loadMasterSecret(sigSecret *api.SignedEncryptedMasterSecret) error { + w.logger.Info("loading master secret", + "generation", sigSecret.Secret.Generation, + "epoch", sigSecret.Secret.Epoch, + ) + + args := api.LoadMasterSecretRequest{ + SignedSecret: *sigSecret, } - // Publish transaction. - tx := api.NewPublishEphemeralSecretTx(0, nil, &rsp.SignedSecret) - if err = consensus.SignAndSubmitTx(w.ctx, w.commonWorker.Consensus, w.commonWorker.Identity.NodeSigner, tx); err != nil { - return err + var rsp protocol.Empty + if err := w.localCallEnclave(api.RPCMethodLoadMasterSecret, args, &rsp); err != nil { + w.logger.Error("failed to load master secret", + "err", err, + ) + return fmt.Errorf("failed to load master secret: %w", err) } - return err + return nil } func (w *Worker) loadEphemeralSecret(sigSecret *api.SignedEncryptedEphemeralSecret) error { @@ -809,11 +948,50 @@ func (w *Worker) randomBlockHeight(epoch beacon.EpochTime, percentile int64) (in return height, nil } +func (w *Worker) updateGenerateMasterSecretEpoch() { + var nextEpoch beacon.EpochTime + + // If at least one master secret has been generated, respect the rotation interval. + nextGen := w.kmStatus.NextGeneration() + if nextGen != 0 { + // Disable rotation if the policy is not set. + var rotationInterval beacon.EpochTime + if w.kmStatus.Policy != nil { + rotationInterval = w.kmStatus.Policy.Policy.MasterSecretRotationInterval + } + + // Secrets are allowed to be generated at most one epoch before the rotation. + nextEpoch = w.kmStatus.RotationEpoch + rotationInterval - 1 + + // Rotation not allowed. + if rotationInterval == 0 { + nextEpoch = math.MaxUint64 + } + } + + // If a master secret has been proposed, wait for the next epoch. + if w.mstSecret != nil && nextEpoch < w.mstSecret.Secret.Epoch { + nextEpoch = w.mstSecret.Secret.Epoch + } + + w.genMstSecEpoch = nextEpoch + + w.logger.Debug("epoch for generating master secret updated", + "epoch", w.genMstSecEpoch, + ) +} + func (w *Worker) handleStatusUpdate(kmStatus *api.Status) { if kmStatus == nil || !kmStatus.ID.Equal(&w.runtimeID) { return } + w.logger.Debug("key manager status updated", + "generation", kmStatus.Generation, + "rotation_epoch", kmStatus.RotationEpoch, + "checksum", hex.EncodeToString(kmStatus.Checksum), + ) + // Cache the latest status. w.setStatus(kmStatus) w.kmStatus = kmStatus @@ -827,8 +1005,10 @@ func (w *Worker) handleStatusUpdate(kmStatus *api.Status) { w.logger.Error("failed rechecking runtimes", "err", err, ) - return } + + // The epoch for generating the next master secret may change with the policy update. + w.updateGenerateMasterSecretEpoch() } func (w *Worker) handleInitEnclave() { @@ -907,19 +1087,16 @@ func (w *Worker) handleRuntimeHostEvent(ev *host.Event) { return } - // Fetch last few ephemeral secrets and send a signal to load them. + // Fetch last few ephemeral secrets and load them. var err error - w.secrets, err = w.fetchLastEphemeralSecrets(w.runtimeID) + w.ephSecrets, err = w.fetchLastEphemeralSecrets(w.runtimeID) if err != nil { w.logger.Error("failed to fetch last ephemeral secrets", "err", err, ) } - w.loadSecretRetry = 0 - select { - case w.loadSecretCh <- struct{}{}: - default: - } + w.loadEphSecRetry = 0 + w.handleLoadEphemeralSecret() if w.kmStatus == nil { return @@ -969,46 +1146,140 @@ func (w *Worker) handleNewEpoch(epoch beacon.EpochTime) { crw.epochTransition() } - // Choose a random height for ephemeral secret generation. Avoid blocks at the end - // of the epoch as secret generation, publication and replication takes some time. - var err error - if w.genSecretHeight, err = w.randomBlockHeight(epoch, 90); err != nil { - // If randomization fails, the height will be set to zero meaning that the ephemeral - // secret will be generated immediately without a delay. - w.logger.Error("failed to select ephemeral secret block height", + // Choose a random height for generating master/ephemeral secrets. + // Avoid blocks at the end of the epoch as secret generation, + // publication and replication takes some time. + height, err := w.randomBlockHeight(epoch, 50) + if err != nil { + // If randomization fails, the height will be set to zero meaning that + // the secrets will be generated immediately without a delay. + w.logger.Error("failed to select a random block height", "err", err, ) } - w.genSecretRetry = 0 - w.logger.Debug("block height for ephemeral secret generation selected", - "height", w.genSecretHeight, + w.logger.Debug("block height for generating secrets selected", + "height", height, "epoch", epoch, ) + + // Reset retries. + w.genSecHeight = height + w.genMstSecRetry = 0 + w.genEphSecRetry = 0 } -func (w *Worker) handleNewBlock(blk *consensus.Block) { +func (w *Worker) handleNewBlock(blk *consensus.Block, epoch beacon.EpochTime) { if blk == nil { w.logger.Error("watch blocks channel closed unexpectedly") return } - // (Re)Generate ephemeral secret once we reach the chosen height. - if blk.Height >= w.genSecretHeight { - select { - case w.genSecretCh <- struct{}{}: - default: - } - } + // (Re)Generate master/ephemeral secrets once we reach the chosen height and epoch. + w.handleGenerateMasterSecret(blk.Height, epoch) + w.handleGenerateEphemeralSecret(blk.Height, epoch) - // (Re)Load ephemeral secrets. When using CometBFT as a backend service the first load + // (Re)Load master/ephemeral secrets. + // When using CometBFT as a backend service the first load // will probably fail as the verifier is one block behind. - if len(w.secrets) > 0 { - select { - case w.loadSecretCh <- struct{}{}: - default: + w.handleLoadMasterSecret() + w.handleLoadEphemeralSecret() +} + +func (w *Worker) handleNewMasterSecret(secret *api.SignedEncryptedMasterSecret) { + if !secret.Secret.ID.Equal(&w.runtimeID) { + return + } + + w.logger.Debug("master secret published", + "generation", secret.Secret.Generation, + "epoch", secret.Secret.Epoch, + "checksum", hex.EncodeToString(secret.Secret.Secret.Checksum), + ) + + w.mstSecret = secret + w.loadMstSecRetry = 0 + + w.updateGenerateMasterSecretEpoch() + w.handleLoadMasterSecret() +} + +func (w *Worker) handleGenerateMasterSecret(height int64, epoch beacon.EpochTime) { + if w.kmStatus == nil || w.rtStatus == nil { + return + } + if w.genMstSecInProgress || w.genMstSecRetry > generateSecretMaxRetries { + return + } + if w.genSecHeight > height || w.genMstSecEpoch > epoch { + return + } + + // Lock. Allow only one active master secret generation. + w.genMstSecInProgress = true + + // Master secrets are generated for the next generation and for the next epoch. + nextGen := w.kmStatus.NextGeneration() + nextEpoch := epoch + 1 + retry := w.genMstSecRetry + + // Retry only few times per epoch. + w.genMstSecRetry++ + + // Submitting transaction can take time, so don't block the loop. + generateMasterSecret := func(kmStatus *api.Status, rtStatus *runtimeStatus) { + if err := w.generateMasterSecret(w.runtimeID, nextGen, nextEpoch, kmStatus, rtStatus); err != nil { + w.logger.Error("failed to generate master secret", + "err", err, + "retry", retry, + ) + w.genMstSecDoneCh <- false + return } + + w.setLastGeneratedMasterSecretGeneration(nextGen) + w.genMstSecDoneCh <- true } + + go generateMasterSecret(w.kmStatus, w.rtStatus) +} + +func (w *Worker) handleGenerateMasterSecretDone(ok bool) { + // Unlock. + w.genMstSecInProgress = false + + // Disarm master secret generation if we are still in the same epoch. + if ok && w.genMstSecRetry > 0 { + w.genMstSecRetry = math.MaxInt64 + } +} + +func (w *Worker) handleLoadMasterSecret() { + if w.kmStatus == nil || w.rtStatus == nil || w.mstSecret == nil { + return + } + if w.loadMstSecRetry > loadSecretMaxRetries { + return + } + + // Retry only few times per epoch. + w.loadMstSecRetry++ + + if err := w.loadMasterSecret(w.mstSecret); err != nil { + w.logger.Error("failed to load master secret", + "err", err, + "retry", w.loadMstSecRetry-1, + ) + return + } + + // Disarm master secret loading. + w.loadMstSecRetry = math.MaxInt64 + w.setLastLoadedMasterSecretGeneration(w.mstSecret.Secret.Generation) + + // Announce that the enclave has replicated the proposal for the next master + // secret and is ready for rotation. + w.handleInitEnclave() } func (w *Worker) handleNewEphemeralSecret(secret *api.SignedEncryptedEphemeralSecret, epoch beacon.EpochTime) { @@ -1022,76 +1293,88 @@ func (w *Worker) handleNewEphemeralSecret(secret *api.SignedEncryptedEphemeralSe if secret.Secret.Epoch == epoch+1 { // Disarm ephemeral secret generation. - w.genSecretHeight = math.MaxInt64 + w.genEphSecRetry = math.MaxInt64 } // Add secret to the list and send a signal to load it. - w.secrets = append(w.secrets, secret) - w.loadSecretRetry = 0 - select { - case w.loadSecretCh <- struct{}{}: - default: - } + w.ephSecrets = append(w.ephSecrets, secret) + w.loadEphSecRetry = 0 + + w.handleLoadEphemeralSecret() } -func (w *Worker) handleGenerateEphemeralSecret(epoch beacon.EpochTime) { - if w.kmStatus == nil || w.rtStatus == nil || w.genSecretInProgress || w.genSecretHeight == math.MaxInt64 { +func (w *Worker) handleGenerateEphemeralSecret(height int64, epoch beacon.EpochTime) { + if w.kmStatus == nil || w.rtStatus == nil { return } - - w.genSecretRetry++ - if w.genSecretRetry > generateEphemeralSecretMaxRetries { - // Disarm ephemeral secret generation. - w.genSecretHeight = math.MaxInt64 + if w.genEphSecInProgress || w.genEphSecRetry > generateSecretMaxRetries { + return + } + if w.genSecHeight > height { + return } - w.genSecretInProgress = true + // Lock. Allow only one active ephemeral secret generation. + w.genEphSecInProgress = true + + // Ephemeral secrets are generated for the next epoch. + nextEpoch := epoch + 1 + retry := w.genEphSecRetry + + // Retry only few times per epoch. + w.genEphSecRetry++ // Submitting transaction can take time, so don't block the loop. - generateEphemeralSecret := func(epoch beacon.EpochTime, kmStatus *api.Status, rtStatus *runtimeStatus, retry int) { - err := w.generateEphemeralSecret(w.runtimeID, epoch, kmStatus, rtStatus) - if err != nil { + generateEphemeralSecret := func(kmStatus *api.Status, rtStatus *runtimeStatus) { + if err := w.generateEphemeralSecret(w.runtimeID, nextEpoch, kmStatus, rtStatus); err != nil { w.logger.Error("failed to generate ephemeral secret", "err", err, "retry", retry, ) - w.genSecretDoneCh <- false + w.genEphSecDoneCh <- false return } - w.genSecretDoneCh <- true - w.setLastGeneratedSecretEpoch(epoch) + + w.setLastGeneratedEphemeralSecretEpoch(nextEpoch) + w.genEphSecDoneCh <- true } - go generateEphemeralSecret(epoch+1, w.kmStatus, w.rtStatus, w.genSecretRetry-1) + go generateEphemeralSecret(w.kmStatus, w.rtStatus) } func (w *Worker) handleGenerateEphemeralSecretDone(ok bool) { - // Disarm ephemeral secret generation unless a new height was chosen. - if ok && w.genSecretRetry > 0 { - w.genSecretHeight = math.MaxInt64 + // Unlock. + w.genEphSecInProgress = false + + // Disarm ephemeral secret generation if we are still in the same epoch. + if ok && w.genEphSecRetry > 0 { + w.genEphSecRetry = math.MaxInt64 } - w.genSecretInProgress = false } func (w *Worker) handleLoadEphemeralSecret() { + if w.kmStatus == nil || w.rtStatus == nil { + return + } + var failed []*api.SignedEncryptedEphemeralSecret - for _, secret := range w.secrets { + for _, secret := range w.ephSecrets { if err := w.loadEphemeralSecret(secret); err != nil { w.logger.Error("failed to load ephemeral secret", "err", err, - "retry", w.loadSecretRetry, + "retry", w.loadEphSecRetry, ) failed = append(failed, secret) continue } - w.setLastLoadedSecretEpoch(secret.Secret.Epoch) + w.setLastLoadedEphemeralSecretEpoch(secret.Secret.Epoch) } - w.secrets = failed + w.ephSecrets = failed - w.loadSecretRetry++ - if w.loadSecretRetry > loadEphemeralSecretMaxRetries { + w.loadEphSecRetry++ + if w.loadEphSecRetry > loadSecretMaxRetries { // Disarm ephemeral secret loading. - w.secrets = nil + w.ephSecrets = nil } } @@ -1102,8 +1385,11 @@ func (w *Worker) handleStop() { if w.initEnclaveInProgress { <-w.initEnclaveDoneCh } - if w.genSecretInProgress { - <-w.genSecretDoneCh + if w.genMstSecInProgress { + <-w.genMstSecDoneCh + } + if w.genEphSecInProgress { + <-w.genEphSecDoneCh } } @@ -1177,6 +1463,10 @@ func (w *Worker) worker() { statusCh, statusSub := w.backend.WatchStatuses() defer statusSub.Close() + // Subscribe to key manager master secret publications. + mstCh, mstSub := w.backend.WatchMasterSecrets() + defer mstSub.Close() + // Subscribe to key manager ephemeral secret publications. ephCh, ephSub := w.backend.WatchEphemeralSecrets() defer ephSub.Close() @@ -1237,14 +1527,14 @@ func (w *Worker) worker() { case epoch = <-epoCh: w.handleNewEpoch(epoch) case blk := <-blkCh: - w.handleNewBlock(blk) + w.handleNewBlock(blk, epoch) + case secret := <-mstCh: + w.handleNewMasterSecret(secret) + case ok := <-w.genMstSecDoneCh: + w.handleGenerateMasterSecretDone(ok) case secret := <-ephCh: w.handleNewEphemeralSecret(secret, epoch) - case <-w.loadSecretCh: - w.handleLoadEphemeralSecret() - case <-w.genSecretCh: - w.handleGenerateEphemeralSecret(epoch) - case ok := <-w.genSecretDoneCh: + case ok := <-w.genEphSecDoneCh: w.handleGenerateEphemeralSecretDone(ok) case <-w.stopCh: w.handleStop() diff --git a/keymanager/src/api/errors.rs b/keymanager/src/api/errors.rs index bf1234e90df..09f2f5f3f37 100644 --- a/keymanager/src/api/errors.rs +++ b/keymanager/src/api/errors.rs @@ -21,8 +21,8 @@ pub enum KeyManagerError { NotInitialized, #[error("key manager state corrupted")] StateCorrupted, - #[error("key manager replication required")] - ReplicationRequired, + #[error("key manager storage corrupted")] + StorageCorrupted, #[error("policy required")] PolicyRequired, #[error("policy rollback")] @@ -41,10 +41,14 @@ pub enum KeyManagerError { REKNotPublished, #[error("signature verification failed: {0}")] InvalidSignature(#[source] anyhow::Error), + #[error("master secret checksum mismatch")] + MasterSecretChecksumMismatch, #[error("master secret generation {0} not found")] MasterSecretNotFound(u64), #[error("master secret generation {0} not replicated")] MasterSecretNotReplicated(u64), + #[error("master secret not published")] + MasterSecretNotPublished, #[error("ephemeral secret for epoch {0} not found")] EphemeralSecretNotFound(u64), #[error("ephemeral secret for epoch {0} not replicated")] diff --git a/keymanager/src/api/methods.rs b/keymanager/src/api/methods.rs index 92b862df5e9..61c2db99747 100644 --- a/keymanager/src/api/methods.rs +++ b/keymanager/src/api/methods.rs @@ -13,7 +13,11 @@ pub const METHOD_REPLICATE_EPHEMERAL_SECRET: &str = "replicate_ephemeral_secret" /// Name of the `init` local method. pub const LOCAL_METHOD_INIT: &str = "init"; +/// Name of the `generate_master_secret` local method. +pub const LOCAL_METHOD_GENERATE_MASTER_SECRET: &str = "generate_master_secret"; /// Name of the `generate_ephemeral_secret` local method. pub const LOCAL_METHOD_GENERATE_EPHEMERAL_SECRET: &str = "generate_ephemeral_secret"; +/// Name of the `load_master_secret` local method. +pub const LOCAL_METHOD_LOAD_MASTER_SECRET: &str = "load_master_secret"; /// Name of the `load_ephemeral_secret` local method. pub const LOCAL_METHOD_LOAD_EPHEMERAL_SECRET: &str = "load_ephemeral_secret"; diff --git a/keymanager/src/api/requests.rs b/keymanager/src/api/requests.rs index 2739d6cae38..51314d39c35 100644 --- a/keymanager/src/api/requests.rs +++ b/keymanager/src/api/requests.rs @@ -8,7 +8,9 @@ use oasis_core_runtime::{ namespace::Namespace, }, consensus::{ - beacon::EpochTime, keymanager::SignedEncryptedEphemeralSecret, state::keymanager::Status, + beacon::EpochTime, + keymanager::{SignedEncryptedEphemeralSecret, SignedEncryptedMasterSecret}, + state::keymanager::Status, }, }; @@ -22,8 +24,6 @@ const INIT_RESPONSE_CONTEXT: &[u8] = b"oasis-core/keymanager: init response"; pub struct InitRequest { /// Key manager status. pub status: Status, - /// True iff the enclave may generate a new master secret. - pub may_generate: bool, } /// Key manager initialization response. @@ -33,10 +33,17 @@ pub struct InitResponse { pub is_secure: bool, /// Checksum for validating replication. pub checksum: Vec, + /// Checksum for validating the next replication. + #[cbor(optional)] + pub next_checksum: Vec, /// Checksum for identifying policy. pub policy_checksum: Vec, /// Runtime signing key. - pub rsk: signature::PublicKey, + #[cbor(optional)] + pub rsk: Option, + /// Runtime signing key of the next replication. + #[cbor(optional)] + pub next_rsk: Option, } /// Signed InitResponse. @@ -79,6 +86,9 @@ pub struct ReplicateMasterSecretRequest { pub struct ReplicateMasterSecretResponse { /// Master secret. pub master_secret: Secret, + /// Checksum of the preceding master secret. + #[cbor(optional)] + pub checksum: Vec, } /// Key manager ephemeral secret replication request. @@ -97,6 +107,22 @@ pub struct ReplicateEphemeralSecretResponse { pub ephemeral_secret: Secret, } +/// Generate master secret request. +#[derive(Clone, Default, cbor::Encode, cbor::Decode)] +pub struct GenerateMasterSecretRequest { + /// Generation. + pub generation: u64, + /// Epoch time. + pub epoch: EpochTime, +} + +/// Generate master secret response. +#[derive(Clone, Default, cbor::Encode, cbor::Decode)] +pub struct GenerateMasterSecretResponse { + /// Signed encrypted master secret. + pub signed_secret: SignedEncryptedMasterSecret, +} + /// Generate ephemeral secret request. #[derive(Clone, Default, cbor::Encode, cbor::Decode)] pub struct GenerateEphemeralSecretRequest { @@ -111,6 +137,13 @@ pub struct GenerateEphemeralSecretResponse { pub signed_secret: SignedEncryptedEphemeralSecret, } +/// Load master secret request. +#[derive(Clone, Default, cbor::Encode, cbor::Decode)] +pub struct LoadMasterSecretRequest { + /// Signed encrypted master secret. + pub signed_secret: SignedEncryptedMasterSecret, +} + /// Load ephemeral secret request. #[derive(Clone, Default, cbor::Encode, cbor::Decode)] pub struct LoadEphemeralSecretRequest { diff --git a/keymanager/src/client/interface.rs b/keymanager/src/client/interface.rs index c0314fed8b2..a0414c36585 100644 --- a/keymanager/src/client/interface.rs +++ b/keymanager/src/client/interface.rs @@ -7,7 +7,7 @@ use oasis_core_runtime::consensus::beacon::EpochTime; use crate::{ api::KeyManagerError, - crypto::{KeyPair, KeyPairId, Secret, SignedPublicKey}, + crypto::{KeyPair, KeyPairId, Secret, SignedPublicKey, VerifiableSecret}, }; /// Key manager client interface. @@ -55,7 +55,10 @@ pub trait KeyManagerClient: Send + Sync { ) -> Result; /// Get a copy of the master secret for replication. - async fn replicate_master_secret(&self, generation: u64) -> Result; + async fn replicate_master_secret( + &self, + generation: u64, + ) -> Result; /// Get a copy of the ephemeral secret for replication. async fn replicate_ephemeral_secret(&self, epoch: EpochTime) @@ -100,7 +103,10 @@ impl KeyManagerClient for Arc { KeyManagerClient::get_public_ephemeral_key(&**self, key_pair_id, epoch).await } - async fn replicate_master_secret(&self, generation: u64) -> Result { + async fn replicate_master_secret( + &self, + generation: u64, + ) -> Result { KeyManagerClient::replicate_master_secret(&**self, generation).await } diff --git a/keymanager/src/client/mock.rs b/keymanager/src/client/mock.rs index 03030b7ecb0..6edd5e7589e 100644 --- a/keymanager/src/client/mock.rs +++ b/keymanager/src/client/mock.rs @@ -7,7 +7,7 @@ use oasis_core_runtime::{common::crypto::signature::Signature, consensus::beacon use crate::{ api::KeyManagerError, - crypto::{KeyPair, KeyPairId, Secret, SignedPublicKey}, + crypto::{KeyPair, KeyPairId, Secret, SignedPublicKey, VerifiableSecret}, }; use super::KeyManagerClient; @@ -91,7 +91,10 @@ impl KeyManagerClient for MockClient { }) } - async fn replicate_master_secret(&self, _generation: u64) -> Result { + async fn replicate_master_secret( + &self, + _generation: u64, + ) -> Result { unimplemented!(); } diff --git a/keymanager/src/client/remote.rs b/keymanager/src/client/remote.rs index 419366c6a2d..7b61d4ac58f 100644 --- a/keymanager/src/client/remote.rs +++ b/keymanager/src/client/remote.rs @@ -34,7 +34,7 @@ use crate::{ METHOD_GET_OR_CREATE_KEYS, METHOD_GET_PUBLIC_EPHEMERAL_KEY, METHOD_GET_PUBLIC_KEY, METHOD_REPLICATE_EPHEMERAL_SECRET, METHOD_REPLICATE_MASTER_SECRET, }, - crypto::{KeyPair, KeyPairId, Secret, SignedPublicKey}, + crypto::{KeyPair, KeyPairId, Secret, SignedPublicKey, VerifiableSecret}, policy::{set_trusted_policy_signers, verify_policy_and_trusted_signers, TrustedPolicySigners}, }; @@ -456,7 +456,10 @@ impl KeyManagerClient for RemoteClient { Ok(key) } - async fn replicate_master_secret(&self, generation: u64) -> Result { + async fn replicate_master_secret( + &self, + generation: u64, + ) -> Result { let height = self .inner .consensus_verifier @@ -475,7 +478,10 @@ impl KeyManagerClient for RemoteClient { ) .await .map_err(|err| KeyManagerError::Other(err.into())) - .map(|rsp: ReplicateMasterSecretResponse| rsp.master_secret) + .map(|rsp: ReplicateMasterSecretResponse| VerifiableSecret { + secret: rsp.master_secret, + checksum: rsp.checksum, + }) } async fn replicate_ephemeral_secret( diff --git a/keymanager/src/crypto/kdf.rs b/keymanager/src/crypto/kdf.rs index 2c48197ea71..d0609e2aa7a 100644 --- a/keymanager/src/crypto/kdf.rs +++ b/keymanager/src/crypto/kdf.rs @@ -17,8 +17,7 @@ use oasis_core_runtime::{ common::{ crypto::{ mrae::{deoxysii::DeoxysII, nonce::Nonce}, - signature::{self, PublicKey}, - x25519, + signature, x25519, }, namespace::Namespace, sgx::egetkey::egetkey, @@ -31,8 +30,8 @@ use oasis_core_runtime::{ use crate::{ api::KeyManagerError, crypto::{ - pack_runtime_id_generation, unpack_encrypted_generation_nonce, - unpack_encrypted_secret_nonce, KeyPair, KeyPairId, Secret, SignedPublicKey, StateKey, + pack_runtime_id_generation, unpack_encrypted_secret_nonce, KeyPair, KeyPairId, Secret, + SignedPublicKey, StateKey, VerifiableSecret, }, }; @@ -97,13 +96,35 @@ lazy_static! { }; } -const LATEST_GENERATION_STORAGE_KEY: &[u8] = b"keymanager_master_secret_generation"; const MASTER_SECRET_STORAGE_KEY_PREFIX: &[u8] = b"keymanager_master_secret"; +const MASTER_SECRET_CHECKSUM_STORAGE_KEY_PREFIX: &[u8] = b"keymanager_master_secret_checksum"; +const MASTER_SECRET_PROPOSAL_STORAGE_KEY: &[u8] = b"keymanager_master_secret_proposal"; const MASTER_SECRET_SEAL_CONTEXT: &[u8] = b"Ekiden Keymanager Seal master secret v0"; const MASTER_SECRET_CACHE_SIZE: usize = 20; const EPHEMERAL_SECRET_CACHE_SIZE: usize = 20; +/// KDF state. +#[derive(Default, Debug, PartialEq, Eq)] +pub struct State { + /// Checksum of the master secret. + /// + /// Empty if KDF is not initialized. + pub checksum: Vec, + /// Checksum of the next master secret. + /// + /// Empty if the proposal for the next master secret is not set. + pub next_checksum: Vec, + /// Key manager committee public runtime signing key. + /// + /// None if KDF is not initialized. + pub signing_key: Option, + /// Next key manager committee public runtime signing key. + /// + /// Empty if the proposal for the next master secret is not set. + pub next_signing_key: Option, +} + /// Kdf, which derives key manager keys from a master secret. pub struct Kdf { inner: RwLock, @@ -116,8 +137,10 @@ struct Inner { master_secrets: LruCache, // Ephemeral secrets used to derive ephemeral runtime keys. ephemeral_secrets: HashMap, - /// Checksum of the master secret and the key manager runtime ID. + /// Checksum of the master secret. checksum: Option>, + /// Checksum of the proposal for the next master secret. + next_checksum: Option>, /// Key manager runtime ID. runtime_id: Option, /// Key manager committee runtime signer derived from @@ -130,6 +153,9 @@ struct Inner { /// /// Used to verify derived long-term and ephemeral public runtime keys. signing_key: Option, + /// Key manager committee public runtime signing key derived from + /// the proposal for the next master secret. + next_signing_key: Option, /// Local cache for the long-term private keys. longterm_keys: LruCache<(Vec, u64), KeyPair>, /// Local cache for the ephemeral private keys. @@ -184,7 +210,7 @@ impl Inner { None => return Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()), }; - Self::derive_secret(secret, kdf_custom, seed) + Ok(Self::derive_secret(secret, kdf_custom, seed)) } /// Derive long-term secret from the key manager's master secret. @@ -199,10 +225,10 @@ impl Inner { None => return Err(KeyManagerError::MasterSecretNotFound(generation).into()), }; - Self::derive_secret(secret, kdf_custom, seed) + Ok(Self::derive_secret(secret, kdf_custom, seed)) } - fn derive_secret(secret: &Secret, kdf_custom: &[u8], seed: &[u8]) -> Result { + fn derive_secret(secret: &Secret, kdf_custom: &[u8], seed: &[u8]) -> Secret { let mut k = Secret::default(); // KMAC256(secret, seed, 32, kdf_custom) @@ -210,7 +236,7 @@ impl Inner { f.update(seed); f.finalize(&mut k.0); - Ok(k) + k } fn get_checksum(&self) -> Result> { @@ -227,17 +253,18 @@ impl Inner { } } - fn get_signing_key(&self) -> Result { - match self.signing_key { - Some(signing_key) => Ok(signing_key), - None => Err(KeyManagerError::NotInitialized.into()), - } - } - fn get_next_generation(&self) -> u64 { self.generation.map(|g| g + 1).unwrap_or_default() } + fn verify_next_generation(&self, generation: u64) -> Result<()> { + let next_generation = self.get_next_generation(); + if next_generation != generation { + return Err(KeyManagerError::InvalidGeneration(next_generation, generation).into()); + } + Ok(()) + } + fn get_runtime_id(&self) -> Result { match self.runtime_id { Some(runtime_id) => Ok(runtime_id), @@ -274,9 +301,11 @@ impl Kdf { master_secrets: LruCache::new(NonZeroUsize::new(MASTER_SECRET_CACHE_SIZE).unwrap()), ephemeral_secrets: HashMap::new(), checksum: None, + next_checksum: None, runtime_id: None, signer: None, signing_key: None, + next_signing_key: None, longterm_keys: LruCache::new(NonZeroUsize::new(1024).unwrap()), ephemeral_keys: LruCache::new(NonZeroUsize::new(128).unwrap()), }), @@ -288,60 +317,175 @@ impl Kdf { &KDF } - /// Set the runtime ID if it is not already set. + /// Initialize the KDF to ensure that its internal state is up-to-date. /// - /// If the runtime ID changes, the internal state is reset and an error is returned. - pub fn set_runtime_id(&self, runtime_id: Namespace) -> Result<()> { - let mut inner = self.inner.write().unwrap(); - inner.set_runtime_id(runtime_id).map_err(|_| { - // Whowa, the caller's idea of our runtime ID has changed, - // something really screwed up is going on. - inner.reset(); - KeyManagerError::StateCorrupted.into() - }) - } - - /// Key manager runtime ID. - pub fn runtime_id(&self) -> Option { - let inner = self.inner.read().unwrap(); - inner.runtime_id - } - - /// Next generation of the key manager master secret. - pub fn next_generation(&self) -> u64 { - let inner = self.inner.read().unwrap(); - inner.get_next_generation() - } - - /// Status of the internal state, i.e. checksum and runtime signing key. + /// The state is considered up-to-date if all generations of the master secret are encrypted + /// and stored locally, and the checksum of the last generation matches the given checksum. + /// If this condition is not met, the internal state is reset and the KDF needs to be + /// initialized again. /// - /// If given checksum and generation don't match internal state, - /// the state is reset and an error is returned. - pub fn status( + /// WARNINGS: + /// - Once master secrets have been persisted to disk, it is intended that manual + /// intervention by the operator is required to remove/alter them. + /// - The first initialization can take a very long time, especially if all generations + /// of the master secret must be replicated from other enclaves. + pub fn init( &self, - runtime_id: &Namespace, - checksum: Vec, + storage: &dyn KeyValue, + runtime_id: Namespace, generation: u64, - ) -> Result<(Vec, PublicKey)> { - let mut inner = self.inner.write().unwrap(); - inner.verify_runtime_id(runtime_id)?; + checksum: Vec, + master_secret_fetcher: M, + ) -> Result + where + M: Fn(u64) -> Result, + { + // If the key manager has no secrets, nothing needs to be replicated. + if checksum.is_empty() { + let mut inner = self.inner.write().unwrap(); + inner.set_runtime_id(runtime_id)?; - let local_checksum = inner.get_checksum()?; - if !checksum.is_empty() && local_checksum != checksum { - // The caller provided a checksum and there was a mismatch. + if inner.checksum.is_some() { + inner.reset(); + return Err(KeyManagerError::StateCorrupted.into()); + } + + return Ok(State { + checksum, + next_checksum: inner.next_checksum.clone().unwrap_or_default(), + signing_key: inner.signing_key, + next_signing_key: inner.next_signing_key, + }); + } + + // Fetch internal state. + let (mut next_generation, mut curr_checksum) = { + let mut inner = self.inner.write().unwrap(); + inner.set_runtime_id(runtime_id)?; + + let next_generation = inner.get_next_generation(); + let curr_checksum = inner.checksum.clone().unwrap_or(runtime_id.0.to_vec()); + (next_generation, curr_checksum) + }; + + // On startup load all master secrets. + if next_generation == 0 { + loop { + let secret = match Self::load_master_secret(storage, &runtime_id, next_generation) { + Some(secret) => secret, + None => break, + }; + + let prev_checksum = Self::load_checksum(storage, next_generation); + if prev_checksum != curr_checksum { + let mut inner = self.inner.write().unwrap(); + inner.reset(); + return Err(KeyManagerError::StorageCorrupted.into()); + } + + curr_checksum = Self::checksum_master_secret(&secret, &curr_checksum); + next_generation += 1; + } + } + + // If only one master secret is missing, try using stored proposal. + if next_generation == generation { + if let Some(secret) = Self::load_master_secret_proposal(storage) { + // Proposed secret is untrusted and needs to be verified. + let next_checksum = Self::checksum_master_secret(&secret, &curr_checksum); + + if next_checksum == checksum { + Self::store_master_secret(storage, &runtime_id, &secret, generation); + Self::store_checksum(storage, curr_checksum, generation); + + curr_checksum = next_checksum; + next_generation += 1; + } + } + } + + // Load and replicate the missing master secrets in reverse order so that every secret + // is verified against the consensus checksum before being saved. + let mut last_checksum = checksum.clone(); + for generation in (next_generation..=generation).rev() { + // Check the local storage first. + let secret = Self::load_master_secret(storage, &runtime_id, generation); + if let Some(secret) = secret { + // Previous checksum is untrusted and needs to be verified. + let prev_checksum = Self::load_checksum(storage, generation); + let next_checksum = Self::checksum_master_secret(&secret, &prev_checksum); + + if next_checksum != last_checksum { + let mut inner = self.inner.write().unwrap(); + inner.reset(); + return Err(KeyManagerError::StorageCorrupted.into()); + } + + last_checksum = prev_checksum; + continue; + } + + // Master secret wasn't found and needs to be fetched from another enclave. + // Fetched values are untrusted and need to be verified. + let vs = master_secret_fetcher(generation)?; + let (secret, prev_checksum) = match vs.checksum.is_empty() { + true => (vs.secret, runtime_id.0.to_vec()), + false => (vs.secret, vs.checksum), + }; + let next_checksum = Self::checksum_master_secret(&secret, &prev_checksum); + + if next_checksum != last_checksum { + return Err(KeyManagerError::MasterSecretChecksumMismatch.into()); + } + + Self::store_master_secret(storage, &runtime_id, &secret, generation); + Self::store_checksum(storage, prev_checksum.clone(), generation); + + last_checksum = prev_checksum; + } + + // Replication finished, verify the final state. + if next_generation > generation + 1 || curr_checksum != last_checksum { + // The caller provided a checksum and a generation and replication produced a mismatch. // The global key manager state disagrees with the enclave state. + let mut inner = self.inner.write().unwrap(); inner.reset(); return Err(KeyManagerError::StateCorrupted.into()); } - let last_generation = inner.get_generation()?; - if generation != last_generation { - return Err(KeyManagerError::InvalidGeneration(last_generation, generation).into()); + // Update internal state. + let mut inner = self.inner.write().unwrap(); + inner.set_runtime_id(runtime_id)?; + + if inner.generation != Some(generation) { + // Derive signing key from the latest secret. + let secret = Self::load_master_secret(storage, &runtime_id, generation) + .ok_or(anyhow::anyhow!(KeyManagerError::StateCorrupted))?; + + let sk = Self::derive_signing_key(&runtime_id, &secret); + let pk = sk.public_key(); + + inner.generation = Some(generation); + inner.checksum = Some(checksum); + inner.signing_key = Some(pk); + inner.signer = Some(Arc::new(sk)); + inner.next_checksum = None; + inner.next_signing_key = None; + inner.master_secrets.push(generation, secret); } - let rsk = inner.get_signing_key()?; + Ok(State { + checksum: inner.checksum.clone().unwrap_or_default(), + next_checksum: inner.next_checksum.clone().unwrap_or_default(), + signing_key: inner.signing_key, + next_signing_key: inner.next_signing_key, + }) + } - Ok((local_checksum, rsk)) + /// Key manager runtime ID. + pub fn runtime_id(&self) -> Result { + let inner = self.inner.read().unwrap(); + inner.get_runtime_id() } /// Get or create long-term keys. @@ -357,15 +501,23 @@ impl Kdf { let mut seed = runtime_id.as_ref().to_vec(); seed.extend_from_slice(key_pair_id.as_ref()); + let mut inner = self.inner.write().unwrap(); + + // Return only generations we know. + let last_generation = inner.get_generation()?; + if generation > last_generation { + return Err(KeyManagerError::GenerationFromFuture(last_generation, generation).into()); + } + // Check to see if the cached value exists. let id = (seed, generation); - let mut inner = self.inner.write().unwrap(); if let Some(keys) = inner.longterm_keys.get(&id) { return Ok(keys.clone()); }; // Make sure the secret is loaded. if !inner.master_secrets.contains(&generation) { + let runtime_id = inner.get_runtime_id()?; let secret = match Self::load_master_secret(storage, &runtime_id, generation) { Some(secret) => secret, None => { @@ -400,8 +552,9 @@ impl Kdf { seed.extend_from_slice(key_pair_id.as_ref()); seed.extend_from_slice(epoch.to_be_bytes().as_ref()); // TODO: Remove once we transition to ephemeral secrets (how?) - // Check to see if the cached value exists. let mut inner = self.inner.write().unwrap(); + + // Check to see if the cached value exists. let id = (seed, epoch); if let Some(keys) = inner.ephemeral_keys.get(&id) { return Ok(keys.clone()); @@ -479,7 +632,7 @@ impl Kdf { } // Then try to load it from the storage. - // Don't update the cache as someone could be replicating old secrets. + // Don't update the cache as the caller could be replicating old secrets. let runtime_id = inner.get_runtime_id()?; let secret = match Self::load_master_secret(storage, &runtime_id, generation) { Some(secret) => secret, @@ -504,65 +657,52 @@ impl Kdf { Ok(secret) } - /// Save master secret to the local cache. - fn save_master_secret( + /// Verify the proposal for the next master secret and store it encrypted in untrusted + /// local storage. + pub fn add_master_secret_proposal( &self, + storage: &dyn KeyValue, runtime_id: &Namespace, secret: Secret, generation: u64, + checksum: &Vec, ) -> Result<()> { let mut inner = self.inner.write().unwrap(); inner.verify_runtime_id(runtime_id)?; + inner.verify_next_generation(generation)?; - // Master secrets need to be added in sequential order. - let next_generation = inner.get_next_generation(); - if generation != next_generation { - return Err(KeyManagerError::InvalidGeneration(next_generation, generation).into()); + let last_checksum = inner.get_checksum().unwrap_or(runtime_id.0.to_vec()); + let next_checksum = Self::checksum_master_secret(&secret, &last_checksum); + if &next_checksum != checksum { + return Err(KeyManagerError::MasterSecretChecksumMismatch.into()); } - // Compute next checksum. - let last_checksum = inner.get_checksum().unwrap_or(runtime_id.as_ref().to_vec()); - let checksum = Self::checksum_master_secret(&secret, &last_checksum); + Self::store_master_secret_proposal(storage, &secret); + inner.next_checksum = Some(next_checksum); - // Derive signing key from the latest secret. - let rsk_secret = - Inner::derive_secret(&secret, &RUNTIME_SIGNING_KEY_CUSTOM, runtime_id.as_ref())?; - let sk = signature::PrivateKey::from_bytes(rsk_secret.0.to_vec()); - let pk = sk.public_key(); - - // Update state. - inner.generation = Some(generation); - inner.checksum = Some(checksum); - inner.signing_key = Some(pk); - inner.signer = Some(Arc::new(sk)); - inner.master_secrets.push(generation, secret); + let next_signing_key = Self::derive_signing_key(runtime_id, &secret).public_key(); + inner.next_signing_key = Some(next_signing_key); Ok(()) } - /// Add master secret to the local cache and store it encrypted to untrusted local storage. - pub fn add_master_secret( + /// Add ephemeral secret to the local cache. + pub fn add_ephemeral_secret( &self, - storage: &dyn KeyValue, runtime_id: &Namespace, secret: Secret, - generation: u64, + epoch: EpochTime, + checksum: &Vec, ) -> Result<()> { - // Add to the cache before storing locally to make sure that secrets are added - // in sequential order. - self.save_master_secret(runtime_id, secret.clone(), generation)?; - - // Update the last generation after the secret is stored to avoid problems - // if we panic in between. - Self::store_master_secret(storage, runtime_id, &secret, generation); - Self::store_last_generation(storage, runtime_id, generation); - - Ok(()) - } + let expected_checksum = Self::checksum_ephemeral_secret(runtime_id, &secret, epoch); + if &expected_checksum != checksum { + return Err(KeyManagerError::EphemeralSecretChecksumMismatch.into()); + } - /// Add ephemeral secret to the local cache. - pub fn add_ephemeral_secret(&self, secret: Secret, epoch: EpochTime) { let mut inner = self.inner.write().unwrap(); + inner.verify_runtime_id(runtime_id)?; + + // Add to the cache. inner.ephemeral_secrets.insert(epoch, Secret(secret.0)); // Drop the oldest secret, if we exceed the capacity. @@ -574,42 +714,6 @@ impl Kdf { .expect("map should not be empty"); inner.ephemeral_secrets.remove(&min); } - } - - /// Load master secrets from untrusted local storage, if not loaded already. - pub fn load_master_secrets( - &self, - storage: &dyn KeyValue, - runtime_id: &Namespace, - ) -> Result<()> { - if self.next_generation() != 0 { - return Ok(()); - } - - // Fetch the last generation number. - let last_generation = match Self::load_last_generation(storage, runtime_id) { - Some(generation) => generation, - None => { - // Empty storage, nothing to load. - return Ok(()); - } - }; - - // Fetch secrets and add them to the cache. - for generation in 0..=last_generation { - let secret = match Kdf::load_master_secret(storage, runtime_id, generation) { - Some(secret) => secret, - None => { - // We could stop here and let the caller replicate other secrets, - // but we won't as this looks like a state corruption. - let mut inner = self.inner.write().unwrap(); - inner.reset(); - return Err(KeyManagerError::StateCorrupted.into()); - } - }; - - self.save_master_secret(runtime_id, secret, generation)?; - } Ok(()) } @@ -621,7 +725,7 @@ impl Kdf { /// and generation, while the consensus layer guarantees uniqueness, i.e. only one generation /// of the master secret can be published per key manager runtime. fn load_master_secret( - untrusted_local: &dyn KeyValue, + storage: &dyn KeyValue, runtime_id: &Namespace, generation: u64, ) -> Option { @@ -629,7 +733,7 @@ impl Kdf { let mut key = MASTER_SECRET_STORAGE_KEY_PREFIX.to_vec(); key.extend(generation.to_le_bytes()); - let ciphertext = untrusted_local.get(key).unwrap(); + let ciphertext = storage.get(key).unwrap(); if ciphertext.is_empty() { return None; } @@ -649,8 +753,7 @@ impl Kdf { /// Encrypt and store the master secret to untrusted local storage. /// - /// WARNING: To ensure uniqueness always verify that the master secret has been published - /// in the consensus layer!!! + /// WARNING: Always verify that the master secret has been published in the consensus layer!!! fn store_master_secret( storage: &dyn KeyValue, runtime_id: &Namespace, @@ -665,7 +768,7 @@ impl Kdf { let nonce = Nonce::generate(); let additional_data = pack_runtime_id_generation(runtime_id, generation); let d2 = Self::new_d2(); - let mut ciphertext = d2.seal(&nonce, secret.as_ref(), additional_data); + let mut ciphertext = d2.seal(&nonce, secret, additional_data); ciphertext.extend_from_slice(&nonce.to_vec()); // Persist the encrypted master secret. @@ -674,46 +777,92 @@ impl Kdf { .expect("failed to persist master secret"); } - /// Load the generation of the last stored master secret from untrusted local storage. - fn load_last_generation(untrusted_local: &dyn KeyValue, runtime_id: &Namespace) -> Option { - // Fetch the encrypted generation if it exists. - let key = LATEST_GENERATION_STORAGE_KEY.to_vec(); - let ciphertext = untrusted_local.get(key).unwrap(); + /// Load the proposal for the next master secret from untrusted local storage. + /// + /// Since master secret proposals can be overwritten if not accepted by the end of the rotation + /// period, it is impossible to know whether the loaded proposal is the latest one. Therefore, + /// it is crucial to ALWAYS verify that the checksum of the proposal matches the one published + /// in the consensus before accepting it. + fn load_master_secret_proposal(storage: &dyn KeyValue) -> Option { + // Fetch the encrypted master secret proposal if it exists. + let key = MASTER_SECRET_PROPOSAL_STORAGE_KEY.to_vec(); + + let ciphertext = storage.get(key).unwrap(); if ciphertext.is_empty() { return None; } - let (ciphertext, nonce) = unpack_encrypted_generation_nonce(&ciphertext) + let (ciphertext, nonce) = unpack_encrypted_secret_nonce(&ciphertext) .expect("persisted state is corrupted, invalid size"); - // Decrypt the persisted generation. + // Decrypt the persisted master secret proposal. let d2 = Self::new_d2(); - let plaintext = d2 - .open(&nonce, ciphertext.to_vec(), runtime_id) - .expect("persisted state is corrupted"); + let plaintext = match d2.open(&nonce, ciphertext.to_vec(), vec![]) { + Ok(plaintext) => plaintext, + Err(_) => return None, + }; - Some(u64::from_le_bytes(plaintext.try_into().unwrap())) + Some(Secret(plaintext.try_into().unwrap())) } - /// Store the generation of the last master secret to untrusted local storage. - fn store_last_generation( - untrusted_local: &dyn KeyValue, - runtime_id: &Namespace, - generation: u64, - ) { - // We only store the latest generation. - let key = LATEST_GENERATION_STORAGE_KEY.to_vec(); + /// Encrypt and store the next master secret proposal in untrusted local storage. + /// + /// If a proposal already exists, it will be overwritten. + fn store_master_secret_proposal(storage: &dyn KeyValue, secret: &Secret) { + // Using the same key for all proposals will override the previous one. + let key = MASTER_SECRET_PROPOSAL_STORAGE_KEY.to_vec(); - // Encrypt the generation. + // Encrypt the master secret. + // Additional data has to be different from the one used when storing verified master + // secrets so that the attacker cannot replace secrets with rejected proposals. + // Since proposals are always verified before being accepted, confidentiality will suffice. let nonce = Nonce::generate(); let d2 = Self::new_d2(); - let mut ciphertext = d2.seal(&nonce, generation.to_le_bytes(), runtime_id); + let mut ciphertext = d2.seal(&nonce, secret, vec![]); ciphertext.extend_from_slice(&nonce.to_vec()); - // Persist the encrypted generation. - untrusted_local + // Persist the encrypted master secret. + storage .insert(key, ciphertext) - .expect("failed to persist master secret generation"); + .expect("failed to persist master secret proposal"); + } + + /// Load the master secret checksum from untrusted local storage. + pub fn load_checksum(storage: &dyn KeyValue, generation: u64) -> Vec { + // Fetch the checksum if it exists. + let mut key = MASTER_SECRET_CHECKSUM_STORAGE_KEY_PREFIX.to_vec(); + key.extend(generation.to_le_bytes()); + + storage.get(key).expect("failed to fetch checksum") + } + + /// Store the previous master secret checksum to untrusted local storage. + fn store_checksum(storage: &dyn KeyValue, checksum: Vec, generation: u64) { + // Every checksum is stored under its own key. + let mut key = MASTER_SECRET_CHECKSUM_STORAGE_KEY_PREFIX.to_vec(); + key.extend(generation.to_le_bytes()); + + // Persist the checksum. + storage + .insert(key, checksum) + .expect("failed to persist checksum"); + } + + /// Compute the checksum of the master secret that should follow the last know generation. + pub fn checksum_master_secret_proposal( + &self, + runtime_id: Namespace, + secret: &Secret, + generation: u64, + ) -> Result> { + let inner = self.inner.read().unwrap(); + inner.verify_runtime_id(&runtime_id)?; + inner.verify_next_generation(generation)?; + + let last_checksum = inner.get_checksum().unwrap_or(runtime_id.0.to_vec()); + let next_checksum = Self::checksum_master_secret(secret, &last_checksum); + + Ok(next_checksum) } /// Compute the checksum of the master secret. @@ -722,11 +871,11 @@ impl Kdf { /// to the key manager's runtime ID, using master secret generations as the KMAC keys /// at each step. The checksum calculation for the n-th generation can be expressed by /// the formula: KMAC(gen_n, ... KMAC(gen_2, KMAC(gen_1, KMAC(gen_0, runtime_id)))). - fn checksum_master_secret(master_secret: &Secret, last_checksum: &Vec) -> Vec { + fn checksum_master_secret(secret: &Secret, last_checksum: &Vec) -> Vec { let mut k = [0u8; 32]; // KMAC256(master_secret, last_checksum, 32, "ekiden-checksum-master-secret") - let mut f = KMac::new_kmac256(master_secret.as_ref(), &CHECKSUM_MASTER_SECRET_CUSTOM); + let mut f = KMac::new_kmac256(secret.as_ref(), &CHECKSUM_MASTER_SECRET_CUSTOM); f.update(last_checksum); f.finalize(&mut k); @@ -739,14 +888,14 @@ impl Kdf { /// concatenation of the key manager's runtime ID and the epoch, using ephemeral secret /// as the KMAC key. pub fn checksum_ephemeral_secret( - ephemeral_secret: &Secret, runtime_id: &Namespace, + secret: &Secret, epoch: EpochTime, ) -> Vec { let mut k = [0u8; 32]; // KMAC256(ephemeral_secret, kmRuntimeID, epoch, 32, "ekiden-checksum-ephemeral-secret") - let mut f = KMac::new_kmac256(ephemeral_secret.as_ref(), &CHECKSUM_EPHEMERAL_SECRET_CUSTOM); + let mut f = KMac::new_kmac256(secret.as_ref(), &CHECKSUM_EPHEMERAL_SECRET_CUSTOM); f.update(runtime_id.as_ref()); f.update(epoch.to_le_bytes().as_ref()); f.finalize(&mut k); @@ -754,6 +903,11 @@ impl Kdf { k.to_vec() } + fn derive_signing_key(runtime_id: &Namespace, secret: &Secret) -> signature::PrivateKey { + let sec = Inner::derive_secret(secret, &RUNTIME_SIGNING_KEY_CUSTOM, runtime_id.as_ref()); + signature::PrivateKey::from_bytes(sec.0.to_vec()) + } + fn new_d2() -> DeoxysII { let mut seal_key = egetkey(Keypolicy::MRENCLAVE, MASTER_SECRET_SEAL_CONTEXT); let d2 = DeoxysII::new(&seal_key); @@ -772,8 +926,10 @@ mod tests { num::NonZeroUsize, panic, sync::{Arc, Mutex, RwLock}, + vec, }; + use anyhow::Result; use lru::LruCache; use rustc_hex::{FromHex, ToHex}; @@ -787,12 +943,17 @@ mod tests { types::Error, }; - use crate::crypto::{ - kdf::{ - CHECKSUM_CUSTOM, CHECKSUM_EPHEMERAL_SECRET_CUSTOM, CHECKSUM_MASTER_SECRET_CUSTOM, - EPHEMERAL_SECRET_CACHE_SIZE, RUNTIME_SIGNING_KEY_CUSTOM, + use crate::{ + api::KeyManagerError, + crypto::{ + kdf::{ + State, CHECKSUM_CUSTOM, CHECKSUM_EPHEMERAL_SECRET_CUSTOM, + CHECKSUM_MASTER_SECRET_CUSTOM, EPHEMERAL_SECRET_CACHE_SIZE, + MASTER_SECRET_CHECKSUM_STORAGE_KEY_PREFIX, MASTER_SECRET_STORAGE_KEY_PREFIX, + RUNTIME_SIGNING_KEY_CUSTOM, + }, + KeyPairId, Secret, VerifiableSecret, SECRET_SIZE, }, - KeyPairId, Secret, SECRET_SIZE, }; use super::{ @@ -825,9 +986,11 @@ mod tests { master_secrets, ephemeral_secrets, checksum: Some(vec![2u8; 32]), + next_checksum: None, runtime_id: Some(Namespace([3u8; 32])), signer: Some(Arc::new(PrivateKey::from_bytes(vec![4u8; 32]))), signing_key: Some(PrivateKey::from_bytes(vec![4u8; 32]).public_key()), + next_signing_key: None, longterm_keys: LruCache::new(NonZeroUsize::new(1).unwrap()), ephemeral_keys: LruCache::new(NonZeroUsize::new(1).unwrap()), }), @@ -864,6 +1027,361 @@ mod tests { } } + /// Master secret and checksum provider. + pub struct MasterSecretProvider { + runtime_id: Namespace, + } + + impl MasterSecretProvider { + fn new(runtime_id: Namespace) -> Self { + return Self { runtime_id }; + } + + fn fetch(&self, generation: u64) -> Result { + let mut secret = Default::default(); + let mut prev_checksum = Default::default(); + let mut next_checksum = self.runtime_id.0.to_vec(); + + for generation in 0..=generation { + secret = Secret([generation as u8; SECRET_SIZE]); + + prev_checksum = next_checksum; + next_checksum = Kdf::checksum_master_secret(&secret, &prev_checksum); + } + + Ok(VerifiableSecret { + secret, + checksum: prev_checksum, + }) + } + + fn checksum(&self, generation: u64) -> Vec { + self.fetch(generation + 1).unwrap().checksum + } + } + + #[test] + fn init_replication() { + let kdf = Kdf::new(); + let storage = InMemoryKeyValue::new(); + let runtime_id = Namespace::from(vec![1u8; 32]); + let provider = MasterSecretProvider::new(runtime_id); + let master_secret_fetcher = |generation| provider.fetch(generation); + + // No secrets. + let result = kdf.init(&storage, runtime_id, 0, vec![], master_secret_fetcher); + assert!(result.is_ok()); + + let state = result.unwrap(); + assert_eq!(state, State::default()); + + // Secrets replicated from other enclaves. + for generation in [0, 0, 1, 1, 2, 2, 5, 5] { + let checksum = provider.checksum(generation); + + let result = kdf.init( + &storage, + runtime_id, + generation, + checksum.clone(), + master_secret_fetcher, + ); + assert!(result.is_ok()); + + let state = result.unwrap(); + assert_eq!(state.checksum, checksum); + assert!(state.signing_key.is_some()); + assert!(state.next_checksum.is_empty()); + assert!(state.next_signing_key.is_none()); + } + + // Secrets loaded from local storage or replicated from other enclaves. + for generation in [5, 5, 6, 6, 10, 10] { + let kdf = Kdf::new(); + let checksum = provider.checksum(generation); + + let result = kdf.init( + &storage, + runtime_id, + generation, + checksum.clone(), + master_secret_fetcher, + ); + assert!(result.is_ok()); + + let state = result.unwrap(); + assert_eq!(state.checksum, checksum); + assert!(state.signing_key.is_some()); + assert!(state.next_checksum.is_empty()); + assert!(state.next_signing_key.is_none()); + } + } + + #[test] + fn init_rotation() { + let kdf = Kdf::new(); + let storage = InMemoryKeyValue::new(); + let runtime_id = Namespace::from(vec![1u8; 32]); + let provider = MasterSecretProvider::new(runtime_id); + let master_secret_fetcher = + |generation| Err(KeyManagerError::MasterSecretNotFound(generation).into()); + + // KDF needs to be initialized. + let result = kdf.init(&storage, runtime_id, 0, vec![], master_secret_fetcher); + assert!(result.is_ok()); + + // Rotate master secrets. + for generation in 0..5 { + let secret = provider.fetch(generation).unwrap().secret; + let checksum = provider.checksum(generation); + let result = kdf.add_master_secret_proposal( + &storage, + &runtime_id, + secret, + generation, + &checksum, + ); + assert!(result.is_ok()); + + let result = kdf.init( + &storage, + runtime_id, + generation, + checksum.clone(), + master_secret_fetcher, + ); + assert!(result.is_ok()); + + let state = result.unwrap(); + assert_eq!(state.checksum, checksum); + } + + // Invalid proposal. + let generation = 5; + let secret = Secret([0; SECRET_SIZE]); + let checksum = kdf + .checksum_master_secret_proposal(runtime_id, &secret, generation) + .unwrap(); + let result = + kdf.add_master_secret_proposal(&storage, &runtime_id, secret, generation, &checksum); + assert!(result.is_ok()); + + let checksum = provider.checksum(generation); + let result = kdf.init( + &storage, + runtime_id, + generation, + checksum, + master_secret_fetcher, + ); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err().to_string(), + KeyManagerError::MasterSecretNotFound(generation).to_string() + ); + + // Valid proposal. + let secret = provider.fetch(generation).unwrap().secret; + let checksum = provider.checksum(generation); + let result = + kdf.add_master_secret_proposal(&storage, &runtime_id, secret, generation, &checksum); + assert!(result.is_ok()); + + // Rotate master secret after restart. + let kdf = Kdf::new(); + let result = kdf.init( + &storage, + runtime_id, + generation, + checksum.clone(), + master_secret_fetcher, + ); + assert!(result.is_ok()); + + let state = result.unwrap(); + assert_eq!(state.checksum, checksum); + } + + #[test] + fn init_corrupted_checksum() { + let kdf = Kdf::new(); + let storage = InMemoryKeyValue::new(); + let runtime_id = Namespace::from(vec![1u8; 32]); + let provider = MasterSecretProvider::new(runtime_id); + let master_secret_fetcher = |generation| provider.fetch(generation); + + // Init. + let generation = 5; + let checksum = provider.checksum(generation); + + let result = kdf.init( + &storage, + runtime_id, + generation, + checksum.clone(), + master_secret_fetcher, + ); + assert!(result.is_ok()); + + // Corrupt checksum. + let mut key = MASTER_SECRET_CHECKSUM_STORAGE_KEY_PREFIX.to_vec(); + key.extend(generation.to_le_bytes()); + + storage + .insert(key, vec![1, 2, 3]) + .expect("checksum should be inserted"); + + // Init. + let kdf = Kdf::new(); + let result = kdf.init( + &storage, + runtime_id, + generation, + checksum.clone(), + master_secret_fetcher, + ); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err().to_string(), + KeyManagerError::StorageCorrupted.to_string() + ); + } + + #[test] + fn init_corrupted_secret() { + let kdf = Kdf::new(); + let storage = InMemoryKeyValue::new(); + let runtime_id = Namespace::from(vec![1u8; 32]); + let provider = MasterSecretProvider::new(runtime_id); + let master_secret_fetcher = |generation| provider.fetch(generation); + + // Init. + let generation = 5; + let checksum = provider.checksum(generation); + + let result = kdf.init( + &storage, + runtime_id, + generation, + checksum.clone(), + master_secret_fetcher, + ); + assert!(result.is_ok()); + + // Corrupt master secret. + let mut key = MASTER_SECRET_STORAGE_KEY_PREFIX.to_vec(); + key.extend(generation.to_le_bytes()); + + storage + .insert(key, vec![1, 2, 3]) + .expect("secret should be inserted"); + + // Init. + let kdf = Kdf::new(); + let result = panic::catch_unwind(|| { + kdf.init( + &storage, + runtime_id, + generation, + checksum.clone(), + master_secret_fetcher, + ) + }); + assert!(result.is_err()); + } + + #[test] + fn init_invalid_generation() { + let kdf = Kdf::new(); + let storage = InMemoryKeyValue::new(); + let runtime_id = Namespace::from(vec![1u8; 32]); + let provider = MasterSecretProvider::new(runtime_id); + let master_secret_fetcher = |generation| provider.fetch(generation); + + // Init. + let generation = 10; + let checksum = provider.checksum(generation); + + let result = kdf.init( + &storage, + runtime_id, + generation, + checksum.clone(), + master_secret_fetcher, + ); + assert!(result.is_ok()); + + // Init with outdated generation. + let generation = 5; + let checksum = provider.checksum(generation); + + let result = kdf.init( + &storage, + runtime_id, + generation, + checksum.clone(), + master_secret_fetcher, + ); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err().to_string(), + KeyManagerError::StateCorrupted.to_string() + ); + } + + #[test] + fn init_invalid_runtime_id() { + let kdf = Kdf::new(); + let storage = InMemoryKeyValue::new(); + let runtime_id = Namespace::from(vec![1u8; 32]); + let invalid_runtime_id = Namespace::from(vec![2u8; 32]); + let provider = MasterSecretProvider::new(runtime_id); + let master_secret_fetcher = |generation| provider.fetch(generation); + + // No secrets. + let result = kdf.init(&storage, runtime_id, 0, vec![], master_secret_fetcher); + assert!(result.is_ok()); + + let result = kdf.init( + &storage, + invalid_runtime_id, + 0, + vec![], + master_secret_fetcher, + ); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err().to_string(), + KeyManagerError::RuntimeMismatch.to_string() + ); + + // Few secrets. + let generation = 5; + let checksum = provider.checksum(generation); + + let result = kdf.init( + &storage, + runtime_id, + generation, + checksum.clone(), + master_secret_fetcher, + ); + assert!(result.is_ok()); + + let result = kdf.init( + &storage, + invalid_runtime_id, + generation, + checksum, + master_secret_fetcher, + ); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err().to_string(), + KeyManagerError::RuntimeMismatch.to_string() + ); + } + #[test] fn key_generation_is_deterministic() { let kdf = Kdf::default(); @@ -1028,7 +1546,7 @@ mod tests { ); // Secret loaded from the storage. - let generation = kdf.next_generation(); + let generation = 2; let new_secret = Secret([3u8; SECRET_SIZE]); { let mut inner = kdf.inner.write().unwrap(); @@ -1073,6 +1591,7 @@ mod tests { #[test] fn ephemeral_secret_can_be_loaded() { let kdf = Kdf::default(); + let runtime_id = kdf.runtime_id().expect("runtime id should be set"); // Secret for epoch 1 should exist. kdf.inner @@ -1101,8 +1620,11 @@ mod tests { .map(|_| panic!("ephemeral secret for epoch 3 should not exist")); // Insert enough secrets so that the oldest one is removed. - for epoch in 1..EPHEMERAL_SECRET_CACHE_SIZE + 2 { - kdf.add_ephemeral_secret(Secret([100; SECRET_SIZE]), epoch.try_into().unwrap()); + for epoch in 1..(EPHEMERAL_SECRET_CACHE_SIZE + 2) as EpochTime { + let secret = Secret([100; SECRET_SIZE]); + let checksum = Kdf::checksum_ephemeral_secret(&runtime_id, &secret, epoch); + let result = kdf.add_ephemeral_secret(&runtime_id, secret, epoch, &checksum); + assert!(result.is_ok()) } // Secret for epoch 1 should be removed. @@ -1291,11 +1813,13 @@ mod tests { master_secrets, ephemeral_secrets, checksum: Some(checksum.from_hex().unwrap()), + next_checksum: None, runtime_id: Some(Namespace::from(runtime_id)), signer: Some(Arc::new(PrivateKey::from_bytes(signer.from_hex().unwrap()))), signing_key: Some( PrivateKey::from_bytes(signer.from_hex().unwrap()).public_key(), ), + next_signing_key: None, longterm_keys: LruCache::new(NonZeroUsize::new(1).unwrap()), ephemeral_keys: LruCache::new(NonZeroUsize::new(1).unwrap()), }), @@ -1319,29 +1843,6 @@ mod tests { } } - #[test] - fn generation_save_load() { - let storage = InMemoryKeyValue::new(); - let generation = 1; - let runtime_id = Namespace([2; NAMESPACE_SIZE]); - - // Empty storage. - let result = Kdf::load_last_generation(&storage, &runtime_id); - assert!(result.is_none()); - - // Happy path. - Kdf::store_last_generation(&storage, &runtime_id, generation); - let loaded = - Kdf::load_last_generation(&storage, &runtime_id).expect("generation should be loaded"); - assert_eq!(generation, loaded); - - // Decryption panics (invalid runtime ID). - let invalid_runtime_id = Namespace([3; NAMESPACE_SIZE]); - let result = - panic::catch_unwind(|| Kdf::load_last_generation(&storage, &invalid_runtime_id)); - assert!(result.is_err()); - } - #[test] fn master_secret_save_load() { let storage = InMemoryKeyValue::new(); @@ -1368,59 +1869,41 @@ mod tests { } #[test] - fn load_master_secrets() { - let runtime_id = Namespace([1; NAMESPACE_SIZE]); - let master_secrets = vec![ - Secret([0; SECRET_SIZE]), - Secret([1; SECRET_SIZE]), - Secret([2; SECRET_SIZE]), - ]; - - let empty_storage = InMemoryKeyValue::new(); - let full_storage = InMemoryKeyValue::new(); + fn checksum_save_load() { + let storage = InMemoryKeyValue::new(); + let generation = 0; + let checksum = vec![1, 2, 3]; - Kdf::store_last_generation(&full_storage, &runtime_id, 2); - for (generation, secret) in master_secrets.iter().enumerate() { - Kdf::store_master_secret(&full_storage, &runtime_id, secret, generation as u64); - } + // Empty storage. + let result = Kdf::load_checksum(&storage, generation); + assert!(result.is_empty()); // Happy path. - let kdf = Kdf::new(); - kdf.set_runtime_id(runtime_id) - .expect("runtime id should not be set"); - - let result = kdf.load_master_secrets(&full_storage, &runtime_id); - assert!(result.is_ok()); - - let mut inner = kdf.inner.write().unwrap(); - assert_eq!(inner.generation.unwrap(), 2); - assert_eq!( - inner.checksum.clone().unwrap().to_hex::(), - "ca9b0c294056ef674c4266e267e8972df8c6b8b0b5a3a86e081ed24daf306abf" - ); - assert_eq!(inner.master_secrets.len(), 3); - for (generation, secret) in master_secrets.iter().enumerate() { - let generation = generation as u64; - let loaded = inner.master_secrets.get(&generation).cloned(); - assert_eq!(loaded.unwrap().0, secret.0); - } + Kdf::store_checksum(&storage, checksum.clone(), generation); + let loaded = Kdf::load_checksum(&storage, generation); + assert_eq!(checksum, loaded); + } - // One master secret is missing. - Kdf::store_last_generation(&full_storage, &runtime_id, 3); + #[test] + fn master_secret_proposal_save_load() { + let storage = InMemoryKeyValue::new(); + let secret = Secret([0; SECRET_SIZE]); + let new_secret = Secret([1; SECRET_SIZE]); - let kdf = Kdf::new(); - kdf.set_runtime_id(runtime_id) - .expect("runtime id should not be set"); + // Empty storage. + let result = Kdf::load_master_secret_proposal(&storage); + assert!(result.is_none()); - let result = kdf.load_master_secrets(&full_storage, &runtime_id); - assert_eq!( - result.unwrap_err().to_string(), - "key manager state corrupted" - ); + // Happy path. + Kdf::store_master_secret_proposal(&storage, &secret); + let loaded = + Kdf::load_master_secret_proposal(&storage).expect("master secret should be loaded"); + assert_eq!(secret.0, loaded.0); - // Empty store. - let kdf = Kdf::new(); - let result = kdf.load_master_secrets(&empty_storage, &runtime_id); - assert!(result.is_ok()); + // Overwrite the proposal and check if the last secret is kept. + Kdf::store_master_secret_proposal(&storage, &new_secret); + let loaded = + Kdf::load_master_secret_proposal(&storage).expect("master secret should be loaded"); + assert_eq!(new_secret.0, loaded.0); } } diff --git a/keymanager/src/crypto/packing.rs b/keymanager/src/crypto/packing.rs index 5cb5e72fecd..c17f771cbf4 100644 --- a/keymanager/src/crypto/packing.rs +++ b/keymanager/src/crypto/packing.rs @@ -20,8 +20,6 @@ const EPOCH_SIZE: usize = 8; const GENERATION_SIZE: usize = 8; /// The size of an encrypted secret. const SECRET_STORAGE_SIZE: usize = SECRET_SIZE + TAG_SIZE + NONCE_SIZE; -/// The size of an encrypted generation. -const GENERATION_STORAGE_SIZE: usize = GENERATION_SIZE + TAG_SIZE + NONCE_SIZE; /// Concatenate runtime ID and epoch (runtime_id || epoch) /// into a byte vector using little-endian byte order. @@ -152,26 +150,6 @@ pub fn unpack_encrypted_secret_nonce(data: &Vec) -> Option<(Vec, [u8; NO Some((ciphertext, nonce)) } -/// Unpack the concatenation of encrypted generation and nonce (ciphertext || nonce). -pub fn unpack_encrypted_generation_nonce(data: &Vec) -> Option<(Vec, [u8; NONCE_SIZE])> { - if data.len() != GENERATION_STORAGE_SIZE { - return None; - } - - let ciphertext = data - .get(..GENERATION_STORAGE_SIZE - NONCE_SIZE) - .unwrap() - .to_vec(); - - let nonce: [u8; NONCE_SIZE] = data - .get(GENERATION_STORAGE_SIZE - NONCE_SIZE..) - .unwrap() - .try_into() - .expect("slice with incorrect length"); - - Some((ciphertext, nonce)) -} - #[cfg(test)] mod test { use oasis_core_runtime::{ @@ -198,7 +176,6 @@ mod test { let d2 = DeoxysII::new(&key); let encrypted_secret = d2.seal(&nonce, secret, vec![]); - let encrypted_generation = d2.seal(&nonce, generation.to_le_bytes(), vec![]); let data = crypto::pack_runtime_id_epoch(&runtime_id, epoch); let res = crypto::unpack_runtime_id_epoch(data).expect("data should unpack"); @@ -227,12 +204,5 @@ mod test { let res = crypto::unpack_encrypted_secret_nonce(&vec![1, 2, 3]); assert_eq!(None, res); - - let data = crypto::pack_ciphertext_nonce(&encrypted_generation, &Nonce::new(nonce)); - let res = crypto::unpack_encrypted_generation_nonce(&data).expect("data should unpack"); - assert_eq!((encrypted_generation, nonce), res); - - let res = crypto::unpack_encrypted_secret_nonce(&vec![1, 2, 3]); - assert_eq!(None, res); } } diff --git a/keymanager/src/crypto/types.rs b/keymanager/src/crypto/types.rs index 5631732b20c..9f0e64db388 100644 --- a/keymanager/src/crypto/types.rs +++ b/keymanager/src/crypto/types.rs @@ -65,6 +65,15 @@ impl AsRef<[u8]> for Secret { } } +/// A secret with a checksum of the preceding secret. +#[derive(Clone, Default, cbor::Encode, cbor::Decode)] +pub struct VerifiableSecret { + /// Secret. + pub secret: Secret, + /// Checksum of the preceding secret. + pub checksum: Vec, +} + /// A key pair managed by the key manager. #[derive(Clone, Default, cbor::Encode, cbor::Decode)] pub struct KeyPair { diff --git a/keymanager/src/runtime/init.rs b/keymanager/src/runtime/init.rs index 9bbc4577386..720f3a3c093 100644 --- a/keymanager/src/runtime/init.rs +++ b/keymanager/src/runtime/init.rs @@ -9,10 +9,11 @@ use oasis_core_runtime::{ use crate::{ api::{ - LOCAL_METHOD_GENERATE_EPHEMERAL_SECRET, LOCAL_METHOD_INIT, - LOCAL_METHOD_LOAD_EPHEMERAL_SECRET, METHOD_GET_OR_CREATE_EPHEMERAL_KEYS, - METHOD_GET_OR_CREATE_KEYS, METHOD_GET_PUBLIC_EPHEMERAL_KEY, METHOD_GET_PUBLIC_KEY, - METHOD_REPLICATE_EPHEMERAL_SECRET, METHOD_REPLICATE_MASTER_SECRET, + LOCAL_METHOD_GENERATE_EPHEMERAL_SECRET, LOCAL_METHOD_GENERATE_MASTER_SECRET, + LOCAL_METHOD_INIT, LOCAL_METHOD_LOAD_EPHEMERAL_SECRET, LOCAL_METHOD_LOAD_MASTER_SECRET, + METHOD_GET_OR_CREATE_EPHEMERAL_KEYS, METHOD_GET_OR_CREATE_KEYS, + METHOD_GET_PUBLIC_EPHEMERAL_KEY, METHOD_GET_PUBLIC_KEY, METHOD_REPLICATE_EPHEMERAL_SECRET, + METHOD_REPLICATE_MASTER_SECRET, }, policy::{set_trusted_policy_signers, TrustedPolicySigners}, }; @@ -78,6 +79,13 @@ pub fn new_keymanager(signers: TrustedPolicySigners) -> Box { }, methods::init_kdf, )); + state.rpc_dispatcher.add_method(RpcMethod::new( + RpcMethodDescriptor { + name: LOCAL_METHOD_GENERATE_MASTER_SECRET.to_string(), + kind: RpcKind::LocalQuery, + }, + methods::generate_master_secret, + )); state.rpc_dispatcher.add_method(RpcMethod::new( RpcMethodDescriptor { name: LOCAL_METHOD_GENERATE_EPHEMERAL_SECRET.to_string(), @@ -85,6 +93,13 @@ pub fn new_keymanager(signers: TrustedPolicySigners) -> Box { }, methods::generate_ephemeral_secret, )); + state.rpc_dispatcher.add_method(RpcMethod::new( + RpcMethodDescriptor { + name: LOCAL_METHOD_LOAD_MASTER_SECRET.to_string(), + kind: RpcKind::LocalQuery, + }, + methods::load_master_secret, + )); state.rpc_dispatcher.add_method(RpcMethod::new( RpcMethodDescriptor { name: LOCAL_METHOD_LOAD_EPHEMERAL_SECRET.to_string(), diff --git a/keymanager/src/runtime/methods.rs b/keymanager/src/runtime/methods.rs index dcac88d6363..63c2f1d1438 100644 --- a/keymanager/src/runtime/methods.rs +++ b/keymanager/src/runtime/methods.rs @@ -22,7 +22,10 @@ use oasis_core_runtime::{ }, consensus::{ beacon::EpochTime, - keymanager::{EncryptedEphemeralSecret, EncryptedSecret, SignedEncryptedEphemeralSecret}, + keymanager::{ + EncryptedEphemeralSecret, EncryptedMasterSecret, EncryptedSecret, + SignedEncryptedEphemeralSecret, SignedEncryptedMasterSecret, + }, state::{ beacon::ImmutableState as BeaconState, keymanager::{ImmutableState as KeyManagerState, Status}, @@ -38,14 +41,16 @@ use oasis_core_runtime::{ use crate::{ api::{ EphemeralKeyRequest, GenerateEphemeralSecretRequest, GenerateEphemeralSecretResponse, - InitRequest, InitResponse, KeyManagerError, LoadEphemeralSecretRequest, LongTermKeyRequest, + GenerateMasterSecretRequest, GenerateMasterSecretResponse, InitRequest, InitResponse, + KeyManagerError, LoadEphemeralSecretRequest, LoadMasterSecretRequest, LongTermKeyRequest, ReplicateEphemeralSecretRequest, ReplicateEphemeralSecretResponse, ReplicateMasterSecretRequest, ReplicateMasterSecretResponse, SignedInitResponse, }, client::{KeyManagerClient, RemoteClient}, crypto::{ - kdf::Kdf, pack_runtime_id_epoch, unpack_encrypted_secret_nonce, KeyPair, Secret, - SignedPublicKey, SECRET_SIZE, + kdf::{Kdf, State}, + pack_runtime_id_epoch, pack_runtime_id_generation_epoch, unpack_encrypted_secret_nonce, + KeyPair, Secret, SignedPublicKey, VerifiableSecret, SECRET_SIZE, }, policy::Policy, runtime::context::Context as KmContext, @@ -74,44 +79,24 @@ pub fn init_kdf(ctx: &mut RpcContext, req: &InitRequest) -> Result Result { + let kdf = Kdf::global(); + let runtime_id = kdf.runtime_id()?; + + // Allow generating a secret for the next epoch only. + let epoch = consensus_epoch(ctx)? + 1; + if epoch != req.epoch { + return Err(KeyManagerError::InvalidEpoch(epoch, req.epoch).into()); + } + + // Generate a secret and encrypt it. + // Note that the checksum can be computed for the next generation only. + let generation = req.generation; + let secret = Secret::generate(); + let checksum = kdf.checksum_master_secret_proposal(runtime_id, &secret, generation)?; + let additional_data = pack_runtime_id_generation_epoch(&runtime_id, generation, epoch); + let secret = encrypt_secret(ctx, secret, checksum, additional_data, runtime_id)?; + + // Sign the secret. + let signer: Arc = ctx.identity.clone(); + let secret = EncryptedMasterSecret { + runtime_id, + generation, + epoch, + secret, + }; + let signed_secret = SignedEncryptedMasterSecret::new(secret, &signer)?; + + Ok(GenerateMasterSecretResponse { signed_secret }) +} + +/// Generate an ephemeral secret and encrypt it using the key manager's REK keys. pub fn generate_ephemeral_secret( ctx: &mut RpcContext, req: &GenerateEphemeralSecretRequest, ) -> Result { - // Allow to generate secret for the next epoch only. + let kdf = Kdf::global(); + let runtime_id = kdf.runtime_id()?; + + // Allow generating a secret for the next epoch only. let epoch = consensus_epoch(ctx)? + 1; if epoch != req.epoch { return Err(KeyManagerError::InvalidEpoch(epoch, req.epoch).into()); } + // Generate a secret and encrypt it. + let secret = Secret::generate(); + let checksum = Kdf::checksum_ephemeral_secret(&runtime_id, &secret, epoch); + let additional_data = pack_runtime_id_epoch(&runtime_id, epoch); + let secret = encrypt_secret(ctx, secret, checksum, additional_data, runtime_id)?; + + // Sign the secret. + let signer: Arc = ctx.identity.clone(); + let secret = EncryptedEphemeralSecret { + runtime_id, + epoch, + secret, + }; + let signed_secret = SignedEncryptedEphemeralSecret::new(secret, &signer)?; + + Ok(GenerateEphemeralSecretResponse { signed_secret }) +} + +/// Encrypt a secret using the Deoxys-II MRAE algorithm and the key manager's REK keys. +fn encrypt_secret( + ctx: &mut RpcContext, + secret: Secret, + checksum: Vec, + additional_data: Vec, + runtime_id: Namespace, +) -> Result { // Fetch REK keys of the key manager committee members. - let runtime_id = Kdf::global() - .runtime_id() - .ok_or(KeyManagerError::NotInitialized)?; let rek_keys = key_manager_rek_keys(ctx, runtime_id)?; let rek_keys: HashSet<_> = rek_keys.values().collect(); - // Abort if our REK hasn't been published. if rek_keys.get(&ctx.identity.public_rek()).is_none() { return Err(KeyManagerError::REKNotPublished.into()); } - - // Generate a random encryption key, a random secret and encrypt the latter with REK keys. + // Encrypt the secret. let priv_key = x25519::PrivateKey::generate(); let pub_key = x25519::PublicKey::from(&priv_key); let mut nonce = Nonce::generate(); - let secret = Secret::generate(); let plaintext = secret.0.to_vec(); - let additional_data = pack_runtime_id_epoch(&runtime_id, epoch); let mut ciphertexts = HashMap::new(); - let checksum = Kdf::checksum_ephemeral_secret(&secret, &runtime_id, epoch); - for &rek in rek_keys.iter() { nonce.increment()?; @@ -252,20 +297,29 @@ pub fn generate_ephemeral_secret( ciphertexts.insert(*rek, ciphertext); } - // Sign the secret. - let signer: Arc = ctx.identity.clone(); - let secret = EncryptedEphemeralSecret { - runtime_id, - epoch, - secret: EncryptedSecret { - checksum, - pub_key, - ciphertexts, - }, + Ok(EncryptedSecret { + checksum, + pub_key, + ciphertexts, + }) +} + +/// Decrypt and store a proposal for the next master secret. +pub fn load_master_secret(ctx: &mut RpcContext, req: &LoadMasterSecretRequest) -> Result<()> { + let signed_secret = validate_signed_master_secret(ctx, &req.signed_secret)?; + + let secret = match decrypt_master_secret(ctx, &signed_secret)? { + Some(secret) => secret, + None => return Ok(()), }; - let signed_secret = SignedEncryptedEphemeralSecret::new(secret, &signer)?; - Ok(GenerateEphemeralSecretResponse { signed_secret }) + Kdf::global().add_master_secret_proposal( + ctx.untrusted_local_storage, + &signed_secret.runtime_id, + secret, + signed_secret.generation, + &signed_secret.secret.checksum, + ) } /// Decrypt and store an ephemeral secret. If decryption fails, try to replicate the secret @@ -277,19 +331,51 @@ pub fn load_ephemeral_secret(ctx: &mut RpcContext, req: &LoadEphemeralSecretRequ Some(secret) => secret, None => { let nodes = nodes_with_ephemeral_secret(ctx, &signed_secret)?; - fetch_ephemeral_secret(ctx, signed_secret.epoch, nodes)? + let client = key_manager_client_for_replication(ctx); + fetch_ephemeral_secret(signed_secret.epoch, &nodes, &client)? } }; - let checksum = - Kdf::checksum_ephemeral_secret(&secret, &signed_secret.runtime_id, signed_secret.epoch); - if checksum != signed_secret.secret.checksum { - return Err(KeyManagerError::EphemeralSecretChecksumMismatch.into()); + Kdf::global().add_ephemeral_secret( + &signed_secret.runtime_id, + secret, + signed_secret.epoch, + &signed_secret.secret.checksum, + ) +} + +/// Decrypt master secret with local REK key. +fn decrypt_master_secret( + ctx: &mut RpcContext, + secret: &EncryptedMasterSecret, +) -> Result> { + let generation = secret.generation; + let epoch = secret.epoch; + let runtime_id = secret.runtime_id; + let rek = ctx.identity.public_rek(); + + let ciphertext = match secret.secret.ciphertexts.get(&rek) { + Some(ciphertext) => ciphertext, + None => return Ok(None), + }; + + let (ciphertext, nonce) = + unpack_encrypted_secret_nonce(ciphertext).ok_or(KeyManagerError::InvalidCiphertext)?; + let additional_data = pack_runtime_id_generation_epoch(&runtime_id, generation, epoch); + let plaintext = ctx.identity.box_open( + &nonce, + ciphertext, + additional_data, + &secret.secret.pub_key.0, + )?; + + if plaintext.len() != SECRET_SIZE { + return Err(KeyManagerError::InvalidCiphertext.into()); } - Kdf::global().add_ephemeral_secret(secret, signed_secret.epoch); + let secret = Secret(plaintext.try_into().expect("slice with incorrect length")); - Ok(()) + Ok(Some(secret)) } /// Decrypt ephemeral secret with local REK key. @@ -327,14 +413,13 @@ fn decrypt_ephemeral_secret( /// Fetch master secret from another key manager enclave. fn fetch_master_secret( - ctx: &mut RpcContext, generation: u64, nodes: &Vec, -) -> Result { - let km_client = key_manager_client_for_replication(ctx); + client: &RemoteClient, +) -> Result { for node in nodes.iter() { - km_client.set_nodes(vec![*node]); - let result = block_on(km_client.replicate_master_secret(generation)); + client.set_nodes(vec![*node]); + let result = block_on(client.replicate_master_secret(generation)); if let Ok(secret) = result { return Ok(secret); } @@ -345,14 +430,13 @@ fn fetch_master_secret( /// Fetch ephemeral secret from another key manager enclave. fn fetch_ephemeral_secret( - ctx: &mut RpcContext, epoch: EpochTime, - nodes: Vec, + nodes: &Vec, + client: &RemoteClient, ) -> Result { - let km_client = key_manager_client_for_replication(ctx); for node in nodes.iter() { - km_client.set_nodes(vec![*node]); - let result = block_on(km_client.replicate_ephemeral_secret(epoch)); + client.set_nodes(vec![*node]); + let result = block_on(client.replicate_ephemeral_secret(epoch)); if let Ok(secret) = result { return Ok(secret); } @@ -383,16 +467,17 @@ fn key_manager_client_for_replication(ctx: &mut RpcContext) -> RemoteClient { /// Create init response and sign it with RAK. fn sign_init_response( ctx: &mut RpcContext, - checksum: Vec, + state: State, policy_checksum: Vec, - rsk: signature::PublicKey, ) -> Result { let is_secure = BUILD_INFO.is_secure && !Policy::unsafe_skip(); let init_response = InitResponse { is_secure, - checksum, + checksum: state.checksum, + next_checksum: state.next_checksum, policy_checksum, - rsk, + rsk: state.signing_key, + next_rsk: state.next_signing_key, }; let signer: Arc = ctx.identity.clone(); SignedInitResponse::new(init_response, &signer) @@ -473,6 +558,21 @@ fn validate_height_freshness(ctx: &RpcContext, height: Option) -> Result<() Ok(()) } +/// Verify that the master secret has been published in the consensus layer. +fn validate_signed_master_secret( + ctx: &RpcContext, + signed_secret: &SignedEncryptedMasterSecret, +) -> Result { + let consensus_state = block_on(ctx.consensus_verifier.latest_state())?; + let km_state = KeyManagerState::new(&consensus_state); + let published_signed_secret = km_state + .master_secret(signed_secret.secret.runtime_id)? + .filter(|published_signed_secret| published_signed_secret == signed_secret) + .ok_or(KeyManagerError::MasterSecretNotPublished)?; + + Ok(published_signed_secret.secret) +} + /// Validate that the ephemeral secret has been published in the consensus layer. fn validate_signed_ephemeral_secret( ctx: &RpcContext, From 6b50bf98475486bf2aded83be5cf9aecfdc10968 Mon Sep 17 00:00:00 2001 From: Peter Nose Date: Fri, 24 Mar 2023 12:20:44 +0100 Subject: [PATCH 4/7] keymanager: Simplify ephemeral secrets Store only the last ephemeral secret in the key manager state. --- .buildkite/code.pipeline.yml | 2 +- .../cometbft/apps/keymanager/keymanager.go | 12 -- .../cometbft/apps/keymanager/query.go | 7 +- .../apps/keymanager/state/interop/interop.go | 52 +++-- .../cometbft/apps/keymanager/state/state.go | 40 +--- .../apps/keymanager/state/state_test.go | 78 +------- .../cometbft/apps/keymanager/transactions.go | 14 +- .../apps/keymanager/transactions_test.go | 2 +- .../cometbft/keymanager/keymanager.go | 4 +- go/keymanager/api/api.go | 4 +- go/keymanager/api/grpc.go | 8 +- ...eys.go => keymanager_ephemeral_secrets.go} | 186 ++++++------------ .../scenario/e2e/runtime/keymanager_util.go | 23 +++ .../scenario/e2e/runtime/runtime.go | 2 +- go/registry/api/api.go | 7 - go/worker/keymanager/worker.go | 109 +++------- keymanager/src/crypto/kdf.rs | 125 ++++++++++-- keymanager/src/runtime/methods.rs | 6 +- runtime/src/consensus/state/beacon.rs | 2 +- runtime/src/consensus/state/keymanager.rs | 19 +- runtime/src/consensus/state/registry.rs | 2 +- runtime/src/consensus/state/staking.rs | 2 +- 22 files changed, 280 insertions(+), 426 deletions(-) rename go/oasis-test-runner/scenario/e2e/runtime/{keymanager_ephemeral_keys.go => keymanager_ephemeral_secrets.go} (74%) diff --git a/.buildkite/code.pipeline.yml b/.buildkite/code.pipeline.yml index c1c7d21cba9..9f0a5bdab7d 100644 --- a/.buildkite/code.pipeline.yml +++ b/.buildkite/code.pipeline.yml @@ -218,7 +218,7 @@ steps: - "build-rust-runtime-loader" - "build-rust-runtimes" branches: "!master !stable/*" - parallelism: 3 + parallelism: 4 timeout_in_minutes: 20 command: - .buildkite/scripts/download_e2e_test_artifacts.sh diff --git a/go/consensus/cometbft/apps/keymanager/keymanager.go b/go/consensus/cometbft/apps/keymanager/keymanager.go index e4fa7922d3a..50ac5d2b5fd 100644 --- a/go/consensus/cometbft/apps/keymanager/keymanager.go +++ b/go/consensus/cometbft/apps/keymanager/keymanager.go @@ -22,9 +22,6 @@ import ( registry "github.com/oasisprotocol/oasis-core/go/registry/api" ) -// maxEphemeralSecretAge is the maximum age of an ephemeral secret in the number of epochs. -const maxEphemeralSecretAge = 20 - // minProposalReplicationPercent is the minimum percentage of enclaves in the key manager committee // that must replicate the proposal for the next master secret before it is accepted. const minProposalReplicationPercent = 66 @@ -212,15 +209,6 @@ func (app *keymanagerApplication) onEpochChange(ctx *tmapi.Context, epoch beacon } toEmit = append(toEmit, newStatus) } - - // Clean ephemeral secrets. - // TODO: use max ephemeral secret age from the key manager policy - if epoch > maxEphemeralSecretAge { - expiryEpoch := epoch - maxEphemeralSecretAge - if err = state.CleanEphemeralSecrets(ctx, rt.ID, expiryEpoch); err != nil { - return fmt.Errorf("failed to clean ephemeral secrets: %w", err) - } - } } // Note: It may be a good idea to sweep statuses that don't have runtimes, diff --git a/go/consensus/cometbft/apps/keymanager/query.go b/go/consensus/cometbft/apps/keymanager/query.go index 2dad99e81c0..6134d7ea597 100644 --- a/go/consensus/cometbft/apps/keymanager/query.go +++ b/go/consensus/cometbft/apps/keymanager/query.go @@ -3,7 +3,6 @@ package keymanager import ( "context" - beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common" abciAPI "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" keymanagerState "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/apps/keymanager/state" @@ -15,7 +14,7 @@ type Query interface { Status(context.Context, common.Namespace) (*keymanager.Status, error) Statuses(context.Context) ([]*keymanager.Status, error) MasterSecret(context.Context, common.Namespace) (*keymanager.SignedEncryptedMasterSecret, error) - EphemeralSecret(context.Context, common.Namespace, beacon.EpochTime) (*keymanager.SignedEncryptedEphemeralSecret, error) + EphemeralSecret(context.Context, common.Namespace) (*keymanager.SignedEncryptedEphemeralSecret, error) Genesis(context.Context) (*keymanager.Genesis, error) } @@ -49,8 +48,8 @@ func (kq *keymanagerQuerier) MasterSecret(ctx context.Context, id common.Namespa return kq.state.MasterSecret(ctx, id) } -func (kq *keymanagerQuerier) EphemeralSecret(ctx context.Context, id common.Namespace, epoch beacon.EpochTime) (*keymanager.SignedEncryptedEphemeralSecret, error) { - return kq.state.EphemeralSecret(ctx, id, epoch) +func (kq *keymanagerQuerier) EphemeralSecret(ctx context.Context, id common.Namespace) (*keymanager.SignedEncryptedEphemeralSecret, error) { + return kq.state.EphemeralSecret(ctx, id) } func (app *keymanagerApplication) QueryFactory() interface{} { diff --git a/go/consensus/cometbft/apps/keymanager/state/interop/interop.go b/go/consensus/cometbft/apps/keymanager/state/interop/interop.go index d7239b9482a..9f609257337 100644 --- a/go/consensus/cometbft/apps/keymanager/state/interop/interop.go +++ b/go/consensus/cometbft/apps/keymanager/state/interop/interop.go @@ -124,40 +124,38 @@ func InitializeTestKeyManagerState(ctx context.Context, mkvs mkvs.Tree) error { RSK: nil, }, } { - if err := state.SetStatus(ctx, status); err != nil { + if err = state.SetStatus(ctx, status); err != nil { return fmt.Errorf("setting key manager status: %w", err) } } - // Add two ephemeral secrets. + // Add an ephemeral secret. rek1 := x25519.PrivateKey(sha512.Sum512_256([]byte("first rek"))) rek2 := x25519.PrivateKey(sha512.Sum512_256([]byte("second rek"))) - - for epoch := 1; epoch <= 2; epoch++ { - secret := kmApi.EncryptedEphemeralSecret{ - ID: keymanager1, - Epoch: beacon.EpochTime(epoch), - Secret: kmApi.EncryptedSecret{ - Checksum: []byte{1, 2, 3, 4, 5}, - PubKey: *rek1.Public(), - Ciphertexts: map[x25519.PublicKey][]byte{ - *rek1.Public(): {1, 2, 3}, - *rek2.Public(): {4, 5, 6}, - }, + epoch := 1 + secret := kmApi.EncryptedEphemeralSecret{ + ID: keymanager1, + Epoch: beacon.EpochTime(epoch), + Secret: kmApi.EncryptedSecret{ + Checksum: []byte{1, 2, 3, 4, 5}, + PubKey: *rek1.Public(), + Ciphertexts: map[x25519.PublicKey][]byte{ + *rek1.Public(): {1, 2, 3}, + *rek2.Public(): {4, 5, 6}, }, - } - sig, err := signature.Sign(signers[0], kmApi.EncryptedEphemeralSecretSignatureContext, cbor.Marshal(secret)) - if err != nil { - return fmt.Errorf("failed to sign ephemeral secret: %w", err) - } - sigSecret := kmApi.SignedEncryptedEphemeralSecret{ - Secret: secret, - Signature: sig.Signature, - } - err = state.SetEphemeralSecret(ctx, &sigSecret) - if err != nil { - return fmt.Errorf("failed to set ephemeral secret: %w", err) - } + }, + } + sig, err := signature.Sign(signers[0], kmApi.EncryptedEphemeralSecretSignatureContext, cbor.Marshal(secret)) + if err != nil { + return fmt.Errorf("failed to sign ephemeral secret: %w", err) + } + sigSecret := kmApi.SignedEncryptedEphemeralSecret{ + Secret: secret, + Signature: sig.Signature, + } + err = state.SetEphemeralSecret(ctx, &sigSecret) + if err != nil { + return fmt.Errorf("failed to set ephemeral secret: %w", err) } return nil diff --git a/go/consensus/cometbft/apps/keymanager/state/state.go b/go/consensus/cometbft/apps/keymanager/state/state.go index 562e09cd92a..7f4909cad1b 100644 --- a/go/consensus/cometbft/apps/keymanager/state/state.go +++ b/go/consensus/cometbft/apps/keymanager/state/state.go @@ -4,14 +4,12 @@ import ( "context" "fmt" - beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/cbor" "github.com/oasisprotocol/oasis-core/go/common/keyformat" abciAPI "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" "github.com/oasisprotocol/oasis-core/go/keymanager/api" "github.com/oasisprotocol/oasis-core/go/storage/mkvs" - "github.com/oasisprotocol/oasis-core/go/storage/mkvs/node" ) var ( @@ -30,7 +28,7 @@ var ( // ephemeralSecretKeyFmt is the key manager ephemeral secret key format. // // Value is CBOR-serialized key manager signed encrypted ephemeral secret. - ephemeralSecretKeyFmt = keyformat.New(0x73, keyformat.H(&common.Namespace{}), uint64(0)) + ephemeralSecretKeyFmt = keyformat.New(0x73, keyformat.H(&common.Namespace{})) ) // ImmutableState is the immutable key manager state wrapper. @@ -122,8 +120,8 @@ func (st *ImmutableState) MasterSecret(ctx context.Context, id common.Namespace) return &secret, nil } -func (st *ImmutableState) EphemeralSecret(ctx context.Context, id common.Namespace, epoch beacon.EpochTime) (*api.SignedEncryptedEphemeralSecret, error) { - data, err := st.is.Get(ctx, ephemeralSecretKeyFmt.Encode(&id, uint64(epoch))) +func (st *ImmutableState) EphemeralSecret(ctx context.Context, id common.Namespace) (*api.SignedEncryptedEphemeralSecret, error) { + data, err := st.is.Get(ctx, ephemeralSecretKeyFmt.Encode(&id)) if err != nil { return nil, abciAPI.UnavailableStateError(err) } @@ -175,40 +173,10 @@ func (st *MutableState) SetMasterSecret(ctx context.Context, secret *api.SignedE } func (st *MutableState) SetEphemeralSecret(ctx context.Context, secret *api.SignedEncryptedEphemeralSecret) error { - err := st.ms.Insert(ctx, ephemeralSecretKeyFmt.Encode(&secret.Secret.ID, uint64(secret.Secret.Epoch)), cbor.Marshal(secret)) + err := st.ms.Insert(ctx, ephemeralSecretKeyFmt.Encode(&secret.Secret.ID), cbor.Marshal(secret)) return abciAPI.UnavailableStateError(err) } -// CleanEphemeralSecrets removes all ephemeral secrets before the given epoch. -func (st *MutableState) CleanEphemeralSecrets(ctx context.Context, id common.Namespace, epoch beacon.EpochTime) error { - it := st.is.NewIterator(ctx) - defer it.Close() - - hID := keyformat.PreHashed(id.Hash()) - - var toDelete []node.Key - for it.Seek(ephemeralSecretKeyFmt.Encode(&id)); it.Valid(); it.Next() { - var esID keyformat.PreHashed - var esEpoch beacon.EpochTime - if !ephemeralSecretKeyFmt.Decode(it.Key(), &esID, (*uint64)(&esEpoch)) { - break - } - if hID != esID || esEpoch >= epoch { - break - } - toDelete = append(toDelete, it.Key()) - } - if it.Err() != nil { - return abciAPI.UnavailableStateError(it.Err()) - } - for _, key := range toDelete { - if err := st.ms.Remove(ctx, key); err != nil { - return abciAPI.UnavailableStateError(err) - } - } - return nil -} - // NewMutableState creates a new mutable key manager state wrapper. func NewMutableState(tree mkvs.KeyValueTree) *MutableState { return &MutableState{ diff --git a/go/consensus/cometbft/apps/keymanager/state/state_test.go b/go/consensus/cometbft/apps/keymanager/state/state_test.go index 9b503a1da8e..96ec010f36b 100644 --- a/go/consensus/cometbft/apps/keymanager/state/state_test.go +++ b/go/consensus/cometbft/apps/keymanager/state/state_test.go @@ -66,11 +66,11 @@ func TestEphemeralSecret(t *testing.T) { common.NewTestNamespaceFromSeed([]byte("runtime 1"), common.NamespaceKeyManager), common.NewTestNamespaceFromSeed([]byte("runtime 2"), common.NamespaceKeyManager), } - secrets := make([]*api.SignedEncryptedEphemeralSecret, 0, 20) + secrets := make([]*api.SignedEncryptedEphemeralSecret, 0, 10) for i := 0; i < cap(secrets); i++ { secret := api.SignedEncryptedEphemeralSecret{ Secret: api.EncryptedEphemeralSecret{ - ID: runtimes[(i/5)%2], + ID: runtimes[i%2], Epoch: beacon.EpochTime(i), }, } @@ -84,75 +84,11 @@ func TestEphemeralSecret(t *testing.T) { } // Test querying secrets. - for i := range secrets { - secret, err := s.EphemeralSecret(ctx, secrets[i].Secret.ID, secrets[i].Secret.Epoch) + for i, runtime := range runtimes { + secret, err := s.EphemeralSecret(ctx, runtime) require.NoError(err, "EphemeralSecret()") - require.Equal(secrets[i], secret, "ephemeral secret should match") - } - for i := range secrets { - _, err := s.EphemeralSecret(ctx, secrets[i].Secret.ID, secrets[i].Secret.Epoch+5) - require.EqualError(err, api.ErrNoSuchEphemeralSecret.Error(), "EphemeralSecret should error for non-existing secrets") - } - - // Test partial/complete secret removal. - testCases := []struct { - runtime common.Namespace - epoch beacon.EpochTime - removed int - kept int - }{ - // Remove all secrets for the first runtime. - { - runtimes[0], - 100, - 10, - 10, - }, - // Remove 6 secrets (epochs 0-4, 10) for the first runtime. - { - runtimes[0], - 11, - 6, - 14, - }, - // Remove all secrets for the second runtime. - { - runtimes[1], - 100, - 10, - 10, - }, - // Remove 8 secrets (epochs 5-9, 15-17) for the second runtime. - { - runtimes[1], - 18, - 8, - 12, - }, - } - for _, tc := range testCases { - for _, secret := range secrets { - err := s.SetEphemeralSecret(ctx, secret) - require.NoError(err, "SetEphemeralSecret()") - } - - err := s.CleanEphemeralSecrets(ctx, tc.runtime, tc.epoch) - require.NoError(err, "CleanEphemeralSecrets()") - - var removed, kept int - for i := range secrets { - secret, err := s.EphemeralSecret(ctx, secrets[i].Secret.ID, secrets[i].Secret.Epoch) - switch { - case secrets[i].Secret.ID == tc.runtime && secrets[i].Secret.Epoch < tc.epoch: - require.EqualError(err, api.ErrNoSuchEphemeralSecret.Error(), "EphemeralSecret should error for non-existing secrets") - removed++ - default: - require.NoError(err, "EphemeralSecret()") - require.Equal(secrets[i], secret, "ephemeral secret should match") - kept++ - } - } - require.Equal(tc.removed, removed, "the number of removed ephemeral secrets is incorrect") - require.Equal(tc.kept, kept, "the number of kept ephemeral secrets is incorrect") + require.Equal(secrets[8+i], secret, "last ephemeral secret should be kept") } + _, err := s.EphemeralSecret(ctx, common.Namespace{1, 2, 3}) + require.EqualError(err, api.ErrNoSuchEphemeralSecret.Error(), "EphemeralSecret should error for non-existing secrets") } diff --git a/go/consensus/cometbft/apps/keymanager/transactions.go b/go/consensus/cometbft/apps/keymanager/transactions.go index 2c2f94ca79b..fafb37e2f24 100644 --- a/go/consensus/cometbft/apps/keymanager/transactions.go +++ b/go/consensus/cometbft/apps/keymanager/transactions.go @@ -249,16 +249,14 @@ func (app *keymanagerApplication) publishEphemeralSecret( return fmt.Errorf("keymanager: ephemeral secret can be published only by the key manager committee") } - // Reject if the secret has been published. - _, err = state.EphemeralSecret(ctx, secret.Secret.ID, secret.Secret.Epoch) - switch err { - case nil: - return fmt.Errorf("keymanager: ephemeral secret for epoch %d already published", secret.Secret.Epoch) - case api.ErrNoSuchEphemeralSecret: - // Secret hasn't been published. - default: + // Reject if the ephemeral secret has been published in this epoch. + lastSecret, err := state.EphemeralSecret(ctx, secret.Secret.ID) + if err != nil && err != api.ErrNoSuchEphemeralSecret { return err } + if lastSecret != nil && secret.Secret.Epoch == lastSecret.Secret.Epoch { + return fmt.Errorf("keymanager: ephemeral secret can be proposed once per epoch") + } // Verify the secret. Ephemeral secrets can be published for the next epoch only. epoch, err := app.state.GetCurrentEpoch(ctx) diff --git a/go/consensus/cometbft/apps/keymanager/transactions_test.go b/go/consensus/cometbft/apps/keymanager/transactions_test.go index 80932365b99..881fee282ac 100644 --- a/go/consensus/cometbft/apps/keymanager/transactions_test.go +++ b/go/consensus/cometbft/apps/keymanager/transactions_test.go @@ -254,6 +254,6 @@ func TestPublishEphemeralSecret(t *testing.T) { t.Run("ephemeral secret already published", func(t *testing.T) { sigSecret := newSignedSecret() err := app.publishEphemeralSecret(txCtx, kmState, sigSecret) - require.EqualError(t, err, "keymanager: ephemeral secret for epoch 1 already published") + require.EqualError(t, err, "keymanager: ephemeral secret can be proposed once per epoch") }) } diff --git a/go/consensus/cometbft/keymanager/keymanager.go b/go/consensus/cometbft/keymanager/keymanager.go index 471bb05edfc..eef4620caed 100644 --- a/go/consensus/cometbft/keymanager/keymanager.go +++ b/go/consensus/cometbft/keymanager/keymanager.go @@ -82,13 +82,13 @@ func (sc *serviceClient) GetMasterSecret(ctx context.Context, query *registry.Na return q.MasterSecret(ctx, query.ID) } -func (sc *serviceClient) GetEphemeralSecret(ctx context.Context, query *registry.NamespaceEpochQuery) (*api.SignedEncryptedEphemeralSecret, error) { +func (sc *serviceClient) GetEphemeralSecret(ctx context.Context, query *registry.NamespaceQuery) (*api.SignedEncryptedEphemeralSecret, error) { q, err := sc.querier.QueryAt(ctx, query.Height) if err != nil { return nil, err } - return q.EphemeralSecret(ctx, query.ID, query.Epoch) + return q.EphemeralSecret(ctx, query.ID) } func (sc *serviceClient) WatchMasterSecrets() (<-chan *api.SignedEncryptedMasterSecret, *pubsub.Subscription) { diff --git a/go/keymanager/api/api.go b/go/keymanager/api/api.go index 56e8ea965f3..9926415bbcc 100644 --- a/go/keymanager/api/api.go +++ b/go/keymanager/api/api.go @@ -45,7 +45,7 @@ var ( ErrNoSuchMasterSecret = errors.New(ModuleName, 3, "keymanager: no such master secret") // ErrNoSuchEphemeralSecret is the error returned when a key manager ephemeral secret - // for the given epoch does not exist. + // does not exist. ErrNoSuchEphemeralSecret = errors.New(ModuleName, 4, "keymanager: no such ephemeral secret") // MethodUpdatePolicy is the method name for policy updates. @@ -213,7 +213,7 @@ type Backend interface { WatchMasterSecrets() (<-chan *SignedEncryptedMasterSecret, *pubsub.Subscription) // GetEphemeralSecret returns the key manager ephemeral secret. - GetEphemeralSecret(context.Context, *registry.NamespaceEpochQuery) (*SignedEncryptedEphemeralSecret, error) + GetEphemeralSecret(context.Context, *registry.NamespaceQuery) (*SignedEncryptedEphemeralSecret, error) // WatchEphemeralSecrets returns a channel that produces a stream of ephemeral secrets. WatchEphemeralSecrets() (<-chan *SignedEncryptedEphemeralSecret, *pubsub.Subscription) diff --git a/go/keymanager/api/grpc.go b/go/keymanager/api/grpc.go index 2b0e09fa4ac..666847e28ad 100644 --- a/go/keymanager/api/grpc.go +++ b/go/keymanager/api/grpc.go @@ -21,7 +21,7 @@ var ( // methodGetMasterSecret is the GetMasterSecret method. methodGetMasterSecret = serviceName.NewMethod("GetMasterSecret", registry.NamespaceQuery{}) // methodGetEphemeralSecret is the GetEphemeralSecret method. - methodGetEphemeralSecret = serviceName.NewMethod("GetEphemeralSecret", registry.NamespaceEpochQuery{}) + methodGetEphemeralSecret = serviceName.NewMethod("GetEphemeralSecret", registry.NamespaceQuery{}) // methodWatchStatuses is the WatchStatuses method. methodWatchStatuses = serviceName.NewMethod("WatchStatuses", nil) @@ -147,7 +147,7 @@ func handlerGetEphemeralSecret( dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor, ) (interface{}, error) { - var query registry.NamespaceEpochQuery + var query registry.NamespaceQuery if err := dec(&query); err != nil { return nil, err } @@ -159,7 +159,7 @@ func handlerGetEphemeralSecret( FullMethod: methodGetEphemeralSecret.FullName(), } handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(Backend).GetEphemeralSecret(ctx, req.(*registry.NamespaceEpochQuery)) + return srv.(Backend).GetEphemeralSecret(ctx, req.(*registry.NamespaceQuery)) } return interceptor(ctx, &query, info, handler) } @@ -273,7 +273,7 @@ func (c *KeymanagerClient) GetMasterSecret(ctx context.Context, query *registry. return resp, nil } -func (c *KeymanagerClient) GetEphemeralSecret(ctx context.Context, query *registry.NamespaceEpochQuery) (*SignedEncryptedEphemeralSecret, error) { +func (c *KeymanagerClient) GetEphemeralSecret(ctx context.Context, query *registry.NamespaceQuery) (*SignedEncryptedEphemeralSecret, error) { var resp *SignedEncryptedEphemeralSecret if err := c.conn.Invoke(ctx, methodGetEphemeralSecret.FullName(), query, &resp); err != nil { return nil, err diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_ephemeral_keys.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_ephemeral_secrets.go similarity index 74% rename from go/oasis-test-runner/scenario/e2e/runtime/keymanager_ephemeral_keys.go rename to go/oasis-test-runner/scenario/e2e/runtime/keymanager_ephemeral_secrets.go index 35c263ff35a..eb9f56ccf44 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_ephemeral_keys.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_ephemeral_secrets.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "reflect" - "time" "github.com/libp2p/go-libp2p/core/peer" @@ -15,14 +14,13 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/cbor" "github.com/oasisprotocol/oasis-core/go/common/node" consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" - keymanager "github.com/oasisprotocol/oasis-core/go/keymanager/api" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/env" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis" "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/scenario" registry "github.com/oasisprotocol/oasis-core/go/registry/api" ) -// KeymanagerEphemeralKeys is the keymanager ephemeral secret and ephemeral +// KeymanagerEphemeralSecrets is the keymanager ephemeral secret and ephemeral // key generation scenario. // // It uses encryption and decryption transactions provided by the @@ -36,22 +34,22 @@ import ( // - Start all managers and test that ephemeral secrets can be replicated. // - Run managers for few epochs and test that everything works. // - Publish transactions that use ephemeral keys to encrypt/decrypt messages. -var KeymanagerEphemeralKeys scenario.Scenario = newKmEphemeralKeysImpl() +var KeymanagerEphemeralSecrets scenario.Scenario = newKmEphemeralSecretsImpl() -type kmEphemeralKeysImpl struct { +type kmEphemeralSecretsImpl struct { Scenario } -func newKmEphemeralKeysImpl() scenario.Scenario { - return &kmEphemeralKeysImpl{ +func newKmEphemeralSecretsImpl() scenario.Scenario { + return &kmEphemeralSecretsImpl{ Scenario: *NewScenario( - "keymanager-ephemeral-keys", + "keymanager-ephemeral-secrets", NewKVTestClient().WithScenario(InsertRemoveKeyValueEncScenario), ), } } -func (sc *kmEphemeralKeysImpl) Fixture() (*oasis.NetworkFixture, error) { +func (sc *kmEphemeralSecretsImpl) Fixture() (*oasis.NetworkFixture, error) { f, err := sc.Scenario.Fixture() if err != nil { return nil, err @@ -67,13 +65,13 @@ func (sc *kmEphemeralKeysImpl) Fixture() (*oasis.NetworkFixture, error) { return f, nil } -func (sc *kmEphemeralKeysImpl) Clone() scenario.Scenario { - return &kmEphemeralKeysImpl{ +func (sc *kmEphemeralSecretsImpl) Clone() scenario.Scenario { + return &kmEphemeralSecretsImpl{ Scenario: *sc.Scenario.Clone().(*Scenario), } } -func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error { // nolint: gocyclo +func (sc *kmEphemeralSecretsImpl) Run(ctx context.Context, childEnv *env.Env) error { // nolint: gocyclo // Start the network, but no need to start the client. Just ensure it // is synced. if err := sc.Scenario.StartNetworkAndWaitForClientSync(ctx); err != nil { @@ -89,12 +87,6 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error return err } - // Prepare key managers. - kms := sc.Net.Keymanagers() - firstKm := kms[0] - secondKm := kms[1] - thirdKm := kms[2] - // Prepare an RPC client which will be used to query key manager nodes // for public ephemeral keys. chainContext, err := sc.Net.Controller().Consensus.GetChainContext(ctx) @@ -105,34 +97,29 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error if err != nil { return err } - firstKmPeerID, err := rpcClient.addKeyManagerAddrToHost(firstKm) + kms := sc.Net.Keymanagers() + firstKmPeerID, err := rpcClient.addKeyManagerAddrToHost(kms[0]) if err != nil { return err } - secondKmPeerID, err := rpcClient.addKeyManagerAddrToHost(secondKm) + secondKmPeerID, err := rpcClient.addKeyManagerAddrToHost(kms[1]) if err != nil { return err } - thirdKmPeerID, err := rpcClient.addKeyManagerAddrToHost(thirdKm) + thirdKmPeerID, err := rpcClient.addKeyManagerAddrToHost(kms[2]) if err != nil { return err } // Wait until the first key manager is ready. - sc.Logger.Info("ensuring the first key manager is ready") - - firstKmCtrl, err := oasis.NewController(firstKm.SocketPath()) - if err != nil { - return err - } - if err = firstKmCtrl.WaitReady(ctx); err != nil { + if err = sc.waitKeymanagers(ctx, []int{0}); err != nil { return err } // Wait until the first ephemeral secret is published. sc.Logger.Info("waiting for the first ephemeral secret") - sigSecret, err := sc.waitForNextEphemeralSecret(ctx) + sigSecret, err := sc.waitEphemeralSecrets(ctx, 1) if err != nil { return err } @@ -178,13 +165,7 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error } // Restart the first key manager. - sc.Logger.Info("restarting the first key manager") - if err = firstKm.Restart(ctx); err != nil { - return fmt.Errorf("failed to restart the first key manager: %w", err) - } - - sc.Logger.Info("ensuring the first key manager is ready") - if err = firstKmCtrl.WaitReady(ctx); err != nil { + if err = sc.restartAndWaitKeymanagers(ctx, []int{0}); err != nil { return err } @@ -196,25 +177,14 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error if err != nil { return err } - switch rt.TEEHardware { - case node.TEEHardwareIntelSGX: - // REK changes on restarts and therefore the key managers shouldn't be able to decrypt - // previous ciphertexts. - if key != nil { - return fmt.Errorf("ephemeral key for epoch %d should not be available", sigSecret.Secret.Epoch) - } - default: - // Insecure REK doesn't change on restarts so the key manager should be able to decrypt - // all previous ciphertexts. - if key == nil { - return fmt.Errorf("ephemeral key for epoch %d should be available", sigSecret.Secret.Epoch) - } + if key != nil { + return fmt.Errorf("ephemeral key for epoch %d should not be available", sigSecret.Secret.Epoch) } // Wait until the next ephemeral secret is published. sc.Logger.Info("waiting for the first ephemeral secret") - sigSecret, err = sc.waitForNextEphemeralSecret(ctx) + sigSecret, err = sc.waitEphemeralSecrets(ctx, 1) if err != nil { return err } @@ -247,30 +217,7 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error } // Start other key managers. - sc.Logger.Info("starting all key managers") - - if err = secondKm.Start(); err != nil { - return fmt.Errorf("failed to start the second key manager: %w", err) - } - if err = thirdKm.Start(); err != nil { - return fmt.Errorf("failed to start the third key manager: %w", err) - } - - sc.Logger.Info("ensuring all key manager are ready") - - secondKmCtrl, err := oasis.NewController(secondKm.SocketPath()) - if err != nil { - return err - } - if err = secondKmCtrl.WaitReady(ctx); err != nil { - return err - } - - thirdKmCtrl, err := oasis.NewController(thirdKm.SocketPath()) - if err != nil { - return err - } - if err = thirdKmCtrl.WaitReady(ctx); err != nil { + if err = sc.startAndWaitKeymanagers(ctx, []int{1, 2}); err != nil { return err } @@ -303,9 +250,8 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error return fmt.Errorf("ephemeral keys should be the same") } - // Test that all key managers produce the same keys and that ephemeral secrets - // are published in the consensus layer. - sc.Logger.Info("testing if ephemeral keys are the same and ephemeral secrets published") + // Test that all key managers derive the same keys. + sc.Logger.Info("testing if ephemeral keys are the same") epoCh, epoSub, err := sc.Net.Controller().Beacon.WatchEpochs(ctx) if err != nil { @@ -314,38 +260,9 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error defer epoSub.Close() set := make(map[x25519.PublicKey]struct{}) - for i := 0; i < 5; i++ { + for i := 1; i <= 3; i++ { epoch := <-epoCh - sc.Logger.Info("checking if ephemeral secret was published", - "epoch", epoch, - ) - - sigSecret, err = sc.Net.Controller().Keymanager.GetEphemeralSecret(ctx, ®istry.NamespaceEpochQuery{ - Height: consensus.HeightLatest, - ID: keymanagerID, - Epoch: epoch, - }) - if err != nil { - return err - } - - var numCiphertexts int - switch rt.TEEHardware { - case node.TEEHardwareIntelSGX: - numCiphertexts = 3 - default: - numCiphertexts = 1 - } - - // Skip first two secrets as we cannot be sure how many key manager nodes were registered - // when the secret was generated. - if i > 1 { - if n := len(sigSecret.Secret.Secret.Ciphertexts); n != numCiphertexts { - return fmt.Errorf("ephemeral secret should be encrypted to %d enclaves, not %d", numCiphertexts, n) - } - } - sc.Logger.Info("fetching ephemeral keys from all key managers", "epoch", epoch, ) @@ -353,7 +270,7 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error for _, peerID := range []peer.ID{firstKmPeerID, secondKmPeerID, thirdKmPeerID} { key, err = rpcClient.fetchEphemeralPublicKeyWithRetry(ctx, epoch, peerID) if err != nil { - return fmt.Errorf("fetching ephemeral key should succeed") + return fmt.Errorf("fetching ephemeral key should succeed, %w", err) } if key == nil { return fmt.Errorf("ephemeral key for epoch %d should be available", epoch) @@ -361,11 +278,38 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error set[*key] = struct{}{} } - if len(set) != i+1 { + if len(set) != i { return fmt.Errorf("ephemeral keys should match") } } + // Test that published secrets are encrypted to all enclaves. + ephCh, ephSub, err := sc.Net.Controller().Keymanager.WatchEphemeralSecrets(ctx) + if err != nil { + return err + } + defer ephSub.Close() + + for i := 1; i <= 3; i++ { + sigSecret := <-ephCh + + sc.Logger.Info("checking if published ephemeral secret contains enough ciphertexts", + "epoch", sigSecret.Secret.Epoch, + ) + + var numCiphertexts int + switch rt.TEEHardware { + case node.TEEHardwareIntelSGX: + numCiphertexts = 3 + default: + numCiphertexts = 1 + } + + if n := len(sigSecret.Secret.Secret.Ciphertexts); n != numCiphertexts { + return fmt.Errorf("ephemeral secret should be encrypted to %d enclaves, not %d", numCiphertexts, n) + } + } + // Confirm that all key managers were registered. err = sc.checkNumberOfKeyManagers(ctx, 3) if err != nil { @@ -503,7 +447,7 @@ func (sc *kmEphemeralKeysImpl) Run(ctx context.Context, childEnv *env.Env) error return nil } -func (sc *kmEphemeralKeysImpl) submitKeyValueRuntimeEncryptTx( +func (sc *kmEphemeralSecretsImpl) submitKeyValueRuntimeEncryptTx( ctx context.Context, id common.Namespace, nonce uint64, @@ -532,7 +476,7 @@ func (sc *kmEphemeralKeysImpl) submitKeyValueRuntimeEncryptTx( return rsp, nil } -func (sc *kmEphemeralKeysImpl) submitKeyValueRuntimeDecryptTx( +func (sc *kmEphemeralSecretsImpl) submitKeyValueRuntimeDecryptTx( ctx context.Context, id common.Namespace, nonce uint64, @@ -561,25 +505,7 @@ func (sc *kmEphemeralKeysImpl) submitKeyValueRuntimeDecryptTx( return rsp, nil } -func (sc *kmEphemeralKeysImpl) waitForNextEphemeralSecret(ctx context.Context) (*keymanager.SignedEncryptedEphemeralSecret, error) { - ch, sub, err := sc.Net.Controller().Keymanager.WatchEphemeralSecrets(ctx) - if err != nil { - return nil, err - } - defer sub.Close() - - select { - case secret, ok := <-ch: - if !ok { - return nil, fmt.Errorf("channel for ephemeral secrets closed") - } - return secret, nil - case <-time.After(time.Minute): - return nil, fmt.Errorf("timed out waiting for the next ephemeral secret") - } -} - -func (sc *kmEphemeralKeysImpl) checkNumberOfKeyManagers(ctx context.Context, n int) error { +func (sc *kmEphemeralSecretsImpl) checkNumberOfKeyManagers(ctx context.Context, n int) error { status, err := sc.Net.Controller().Keymanager.GetStatus(ctx, ®istry.NamespaceQuery{ Height: consensus.HeightLatest, ID: keymanagerID, diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go index d90d890f6aa..a18ef0a67e1 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go @@ -218,6 +218,29 @@ func (sc *Scenario) waitMasterSecret(ctx context.Context, generation uint64) (*k } } +func (sc *Scenario) waitEphemeralSecrets(ctx context.Context, n int) (*keymanager.SignedEncryptedEphemeralSecret, error) { + sc.Logger.Info("waiting ephemeral secrets", "n", n) + + ephCh, ephSub, err := sc.Net.Controller().Keymanager.WatchEphemeralSecrets(ctx) + if err != nil { + return nil, err + } + defer ephSub.Close() + + var secret *keymanager.SignedEncryptedEphemeralSecret + for i := 0; i < n; i++ { + select { + case secret = <-ephCh: + sc.Logger.Info("ephemeral secret published", + "epoch", secret.Secret.Epoch, + ) + case <-ctx.Done(): + return nil, fmt.Errorf("timed out waiting for ephemeral secrets") + } + } + return secret, nil +} + func (sc *Scenario) updateRotationInterval(ctx context.Context, nonce uint64, childEnv *env.Env, rotationInterval beacon.EpochTime) error { sc.Logger.Info("updating master secret rotation interval in the key manager policy", "interval", rotationInterval, diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go index 439f1e9a0c7..78803c99e1e 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go @@ -819,7 +819,7 @@ func RegisterScenarios() error { Sentry, // Keymanager tests. KeymanagerMasterSecrets, - KeymanagerEphemeralKeys, + KeymanagerEphemeralSecrets, KeymanagerDumpRestore, KeymanagerRestart, KeymanagerReplicate, diff --git a/go/registry/api/api.go b/go/registry/api/api.go index 34702a9a0f8..de3fb1158ed 100644 --- a/go/registry/api/api.go +++ b/go/registry/api/api.go @@ -241,13 +241,6 @@ type NamespaceQuery struct { ID common.Namespace `json:"id"` } -// NamespaceEpochQuery is a registry query by namespace (Runtime ID) and epoch. -type NamespaceEpochQuery struct { - Height int64 `json:"height"` - ID common.Namespace `json:"id"` - Epoch beacon.EpochTime `json:"epoch"` -} - // GetRuntimeQuery is a registry query by namespace (Runtime ID). type GetRuntimeQuery struct { Height int64 `json:"height"` diff --git a/go/worker/keymanager/worker.go b/go/worker/keymanager/worker.go index b2b9f6d4e95..5bf187fcd79 100644 --- a/go/worker/keymanager/worker.go +++ b/go/worker/keymanager/worker.go @@ -117,7 +117,7 @@ type Worker struct { // nolint: maligned genMstSecInProgress bool genMstSecRetry int - ephSecrets []*api.SignedEncryptedEphemeralSecret + ephSecret *api.SignedEncryptedEphemeralSecret loadEphSecRetry int genEphSecDoneCh chan bool @@ -710,22 +710,15 @@ func (w *Worker) generateEphemeralSecret(runtimeID common.Namespace, epoch beaco // Check if the ephemeral secret has been published in this epoch. // Note that despite this check, the nodes can still publish ephemeral secrets at the same time. - _, err := w.commonWorker.Consensus.KeyManager().GetEphemeralSecret(w.ctx, ®istry.NamespaceEpochQuery{ + lastSecret, err := w.commonWorker.Consensus.KeyManager().GetEphemeralSecret(w.ctx, ®istry.NamespaceQuery{ Height: consensus.HeightLatest, ID: runtimeID, - Epoch: epoch, }) - switch err { - case nil: - w.logger.Info("skipping secret generation, ephemeral secret already published") - return nil - case api.ErrNoSuchEphemeralSecret: - // Secret hasn't been published. - default: - w.logger.Error("failed to fetch ephemeral secret", - "err", err, - ) - return fmt.Errorf("failed to fetch ephemeral secret: %w", err) + if err != nil && err != api.ErrNoSuchEphemeralSecret { + return err + } + if lastSecret != nil && epoch == lastSecret.Secret.Epoch { + return fmt.Errorf("ephemeral secret can be proposed once per epoch") } // Skip generation if the node is not in the key manager committee. @@ -884,45 +877,6 @@ func (w *Worker) loadEphemeralSecret(sigSecret *api.SignedEncryptedEphemeralSecr return nil } -func (w *Worker) fetchLastEphemeralSecrets(runtimeID common.Namespace) ([]*api.SignedEncryptedEphemeralSecret, error) { - w.logger.Info("fetching last ephemeral secrets") - - // Get next epoch. - epoch, err := w.commonWorker.Consensus.Beacon().GetEpoch(w.ctx, consensus.HeightLatest) - if err != nil { - w.logger.Error("failed to fetch epoch", - "err", err, - ) - return nil, fmt.Errorf("failed to fetch epoch: %w", err) - } - epoch++ - - // Fetch last few ephemeral secrets. - N := ephemeralSecretCacheSize - secrets := make([]*api.SignedEncryptedEphemeralSecret, 0, N) - for i := 0; i < N && epoch > 0; i, epoch = i+1, epoch-1 { - secret, err := w.commonWorker.Consensus.KeyManager().GetEphemeralSecret(w.ctx, ®istry.NamespaceEpochQuery{ - Height: consensus.HeightLatest, - ID: runtimeID, - Epoch: epoch, - }) - - switch err { - case nil: - secrets = append(secrets, secret) - case api.ErrNoSuchEphemeralSecret: - // Secret hasn't been published. - default: - w.logger.Error("failed to fetch ephemeral secret", - "err", err, - ) - return nil, fmt.Errorf("failed to fetch ephemeral secret: %w", err) - } - } - - return secrets, nil -} - // randomBlockHeight returns the height of a random block in the k-th percentile of the given epoch. func (w *Worker) randomBlockHeight(epoch beacon.EpochTime, percentile int64) (int64, error) { // Get height of the first block. @@ -1087,17 +1041,6 @@ func (w *Worker) handleRuntimeHostEvent(ev *host.Event) { return } - // Fetch last few ephemeral secrets and load them. - var err error - w.ephSecrets, err = w.fetchLastEphemeralSecrets(w.runtimeID) - if err != nil { - w.logger.Error("failed to fetch last ephemeral secrets", - "err", err, - ) - } - w.loadEphSecRetry = 0 - w.handleLoadEphemeralSecret() - if w.kmStatus == nil { return } @@ -1291,15 +1234,14 @@ func (w *Worker) handleNewEphemeralSecret(secret *api.SignedEncryptedEphemeralSe "epoch", secret.Secret.Epoch, ) + w.ephSecret = secret + w.loadEphSecRetry = 0 + if secret.Secret.Epoch == epoch+1 { // Disarm ephemeral secret generation. w.genEphSecRetry = math.MaxInt64 } - // Add secret to the list and send a signal to load it. - w.ephSecrets = append(w.ephSecrets, secret) - w.loadEphSecRetry = 0 - w.handleLoadEphemeralSecret() } @@ -1353,29 +1295,26 @@ func (w *Worker) handleGenerateEphemeralSecretDone(ok bool) { } func (w *Worker) handleLoadEphemeralSecret() { - if w.kmStatus == nil || w.rtStatus == nil { + if w.kmStatus == nil || w.rtStatus == nil || w.ephSecret == nil { return } - - var failed []*api.SignedEncryptedEphemeralSecret - for _, secret := range w.ephSecrets { - if err := w.loadEphemeralSecret(secret); err != nil { - w.logger.Error("failed to load ephemeral secret", - "err", err, - "retry", w.loadEphSecRetry, - ) - failed = append(failed, secret) - continue - } - w.setLastLoadedEphemeralSecretEpoch(secret.Secret.Epoch) + if w.loadEphSecRetry > loadSecretMaxRetries { + return } - w.ephSecrets = failed + // Retry only few times per epoch. w.loadEphSecRetry++ - if w.loadEphSecRetry > loadSecretMaxRetries { - // Disarm ephemeral secret loading. - w.ephSecrets = nil + + if err := w.loadEphemeralSecret(w.ephSecret); err != nil { + w.logger.Error("failed to load ephemeral secret", + "err", err, + ) + return } + + // Disarm ephemeral secret loading. + w.loadEphSecRetry = math.MaxInt64 + w.setLastLoadedEphemeralSecretEpoch(w.ephSecret.Secret.Epoch) } func (w *Worker) handleStop() { diff --git a/keymanager/src/crypto/kdf.rs b/keymanager/src/crypto/kdf.rs index d0609e2aa7a..f44b832dee3 100644 --- a/keymanager/src/crypto/kdf.rs +++ b/keymanager/src/crypto/kdf.rs @@ -239,6 +239,20 @@ impl Inner { k } + fn add_ephemeral_secret(&mut self, secret: Secret, epoch: EpochTime) { + self.ephemeral_secrets.insert(epoch, secret); + + // Drop the oldest secret, if we exceed the capacity. + if self.ephemeral_secrets.len() > EPHEMERAL_SECRET_CACHE_SIZE { + let min = *self + .ephemeral_secrets + .keys() + .min() + .expect("map should not be empty"); + self.ephemeral_secrets.remove(&min); + } + } + fn get_checksum(&self) -> Result> { match self.checksum.as_ref() { Some(checksum) => Ok(checksum.clone()), @@ -329,16 +343,20 @@ impl Kdf { /// intervention by the operator is required to remove/alter them. /// - The first initialization can take a very long time, especially if all generations /// of the master secret must be replicated from other enclaves. - pub fn init( + #[allow(clippy::too_many_arguments)] + pub fn init( &self, storage: &dyn KeyValue, runtime_id: Namespace, generation: u64, checksum: Vec, + epoch: EpochTime, master_secret_fetcher: M, + ephemeral_secret_fetcher: E, ) -> Result where M: Fn(u64) -> Result, + E: Fn(EpochTime) -> Result, { // If the key manager has no secrets, nothing needs to be replicated. if checksum.is_empty() { @@ -368,6 +386,22 @@ impl Kdf { (next_generation, curr_checksum) }; + // On startup replicate ephemeral secrets. + if next_generation == 0 { + let last = epoch + 1; + for epoch in (0..=last).rev() { + if let Ok(secret) = ephemeral_secret_fetcher(epoch) { + let mut inner = self.inner.write().unwrap(); + inner.verify_runtime_id(&runtime_id)?; + inner.add_ephemeral_secret(secret, epoch); + continue; + } + if epoch != last { + break; + } + } + } + // On startup load all master secrets. if next_generation == 0 { loop { @@ -703,17 +737,7 @@ impl Kdf { inner.verify_runtime_id(runtime_id)?; // Add to the cache. - inner.ephemeral_secrets.insert(epoch, Secret(secret.0)); - - // Drop the oldest secret, if we exceed the capacity. - if inner.ephemeral_secrets.len() > EPHEMERAL_SECRET_CACHE_SIZE { - let min = *inner - .ephemeral_secrets - .keys() - .min() - .expect("map should not be empty"); - inner.ephemeral_secrets.remove(&min); - } + inner.add_ephemeral_secret(secret, epoch); Ok(()) } @@ -1065,11 +1089,22 @@ mod tests { let kdf = Kdf::new(); let storage = InMemoryKeyValue::new(); let runtime_id = Namespace::from(vec![1u8; 32]); + let epoch = 0; let provider = MasterSecretProvider::new(runtime_id); let master_secret_fetcher = |generation| provider.fetch(generation); + let ephemeral_secret_fetcher = + |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); // No secrets. - let result = kdf.init(&storage, runtime_id, 0, vec![], master_secret_fetcher); + let result = kdf.init( + &storage, + runtime_id, + 0, + vec![], + epoch, + master_secret_fetcher, + ephemeral_secret_fetcher, + ); assert!(result.is_ok()); let state = result.unwrap(); @@ -1084,7 +1119,9 @@ mod tests { runtime_id, generation, checksum.clone(), + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_ok()); @@ -1105,7 +1142,9 @@ mod tests { runtime_id, generation, checksum.clone(), + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_ok()); @@ -1122,12 +1161,23 @@ mod tests { let kdf = Kdf::new(); let storage = InMemoryKeyValue::new(); let runtime_id = Namespace::from(vec![1u8; 32]); + let epoch = 0; let provider = MasterSecretProvider::new(runtime_id); let master_secret_fetcher = |generation| Err(KeyManagerError::MasterSecretNotFound(generation).into()); + let ephemeral_secret_fetcher = + |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); // KDF needs to be initialized. - let result = kdf.init(&storage, runtime_id, 0, vec![], master_secret_fetcher); + let result = kdf.init( + &storage, + runtime_id, + 0, + vec![], + epoch, + master_secret_fetcher, + ephemeral_secret_fetcher, + ); assert!(result.is_ok()); // Rotate master secrets. @@ -1148,7 +1198,9 @@ mod tests { runtime_id, generation, checksum.clone(), + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_ok()); @@ -1172,7 +1224,9 @@ mod tests { runtime_id, generation, checksum, + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_err()); assert_eq!( @@ -1194,7 +1248,9 @@ mod tests { runtime_id, generation, checksum.clone(), + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_ok()); @@ -1207,8 +1263,11 @@ mod tests { let kdf = Kdf::new(); let storage = InMemoryKeyValue::new(); let runtime_id = Namespace::from(vec![1u8; 32]); + let epoch = 0; let provider = MasterSecretProvider::new(runtime_id); let master_secret_fetcher = |generation| provider.fetch(generation); + let ephemeral_secret_fetcher = + |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); // Init. let generation = 5; @@ -1219,7 +1278,9 @@ mod tests { runtime_id, generation, checksum.clone(), + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_ok()); @@ -1238,7 +1299,9 @@ mod tests { runtime_id, generation, checksum.clone(), + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_err()); assert_eq!( @@ -1252,8 +1315,11 @@ mod tests { let kdf = Kdf::new(); let storage = InMemoryKeyValue::new(); let runtime_id = Namespace::from(vec![1u8; 32]); + let epoch = 0; let provider = MasterSecretProvider::new(runtime_id); let master_secret_fetcher = |generation| provider.fetch(generation); + let ephemeral_secret_fetcher = + |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); // Init. let generation = 5; @@ -1264,7 +1330,9 @@ mod tests { runtime_id, generation, checksum.clone(), + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_ok()); @@ -1284,7 +1352,9 @@ mod tests { runtime_id, generation, checksum.clone(), + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ) }); assert!(result.is_err()); @@ -1295,8 +1365,11 @@ mod tests { let kdf = Kdf::new(); let storage = InMemoryKeyValue::new(); let runtime_id = Namespace::from(vec![1u8; 32]); + let epoch = 0; let provider = MasterSecretProvider::new(runtime_id); let master_secret_fetcher = |generation| provider.fetch(generation); + let ephemeral_secret_fetcher = + |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); // Init. let generation = 10; @@ -1307,7 +1380,9 @@ mod tests { runtime_id, generation, checksum.clone(), + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_ok()); @@ -1320,7 +1395,9 @@ mod tests { runtime_id, generation, checksum.clone(), + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_err()); assert_eq!( @@ -1334,12 +1411,23 @@ mod tests { let kdf = Kdf::new(); let storage = InMemoryKeyValue::new(); let runtime_id = Namespace::from(vec![1u8; 32]); + let epoch = 0; let invalid_runtime_id = Namespace::from(vec![2u8; 32]); let provider = MasterSecretProvider::new(runtime_id); let master_secret_fetcher = |generation| provider.fetch(generation); + let ephemeral_secret_fetcher = + |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); // No secrets. - let result = kdf.init(&storage, runtime_id, 0, vec![], master_secret_fetcher); + let result = kdf.init( + &storage, + runtime_id, + 0, + vec![], + epoch, + master_secret_fetcher, + ephemeral_secret_fetcher, + ); assert!(result.is_ok()); let result = kdf.init( @@ -1347,7 +1435,9 @@ mod tests { invalid_runtime_id, 0, vec![], + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_err()); assert_eq!( @@ -1364,7 +1454,9 @@ mod tests { runtime_id, generation, checksum.clone(), + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_ok()); @@ -1373,7 +1465,9 @@ mod tests { invalid_runtime_id, generation, checksum, + epoch, master_secret_fetcher, + ephemeral_secret_fetcher, ); assert!(result.is_err()); assert_eq!( @@ -1381,7 +1475,6 @@ mod tests { KeyManagerError::RuntimeMismatch.to_string() ); } - #[test] fn key_generation_is_deterministic() { let kdf = Kdf::default(); diff --git a/keymanager/src/runtime/methods.rs b/keymanager/src/runtime/methods.rs index 63c2f1d1438..e1ab8470973 100644 --- a/keymanager/src/runtime/methods.rs +++ b/keymanager/src/runtime/methods.rs @@ -83,8 +83,10 @@ pub fn init_kdf(ctx: &mut RpcContext, req: &InitRequest) -> Result Result ImmutableState<'a, T> { key_format!(StatusKeyFmt, 0x70, Hash); key_format!(MasterSecretKeyFmt, 0x72, Hash); -key_format!(EphemeralSecretKeyFmt, 0x73, (Hash, EpochTime)); +key_format!(EphemeralSecretKeyFmt, 0x73, Hash); /// Current key manager status. #[derive(Clone, Debug, Default, PartialEq, Eq, cbor::Decode, cbor::Encode)] @@ -102,10 +102,9 @@ impl<'a, T: ImmutableMKVS> ImmutableState<'a, T> { pub fn ephemeral_secret( &self, id: Namespace, - epoch: EpochTime, ) -> Result, StateError> { let h = Hash::digest_bytes(id.as_ref()); - match self.mkvs.get(&EphemeralSecretKeyFmt((h, epoch)).encode()) { + match self.mkvs.get(&EphemeralSecretKeyFmt(h).encode()) { Ok(Some(b)) => Ok(Some(self.decode_ephemeral_secret(&b)?)), Ok(None) => Ok(None), Err(err) => Err(StateError::Unavailable(anyhow!(err))), @@ -167,7 +166,7 @@ mod test { let mock_consensus_root = Root { version: 1, root_type: RootType::State, - hash: Hash::from("b5ee772727869caf8d0d333a7a9d65562ca34d8d6f3cf496af9e90f1705f10ec"), + hash: Hash::from("a40448052f74a1c0c2d47c2b01a433ad7f3782ea47dfe5575170fec2587569c9"), ..Default::default() }; let mkvs = Tree::builder() @@ -308,19 +307,9 @@ mod test { // Test ephemeral secret (happy path, invalid epoch, invalid runtime). let secret = keymanager_state - .ephemeral_secret(keymanager1, 1) + .ephemeral_secret(keymanager1) .expect("ephemeral secret query should work") .expect("ephemeral secret query should return a result"); assert_eq!(secret, expected_secret, "invalid ephemeral secret"); - - let secret = keymanager_state - .ephemeral_secret(keymanager1, 2) - .expect("ephemeral secret query should work"); - assert_ne!(secret, None, "invalid ephemeral secret"); - - let secret = keymanager_state - .ephemeral_secret(keymanager2, 1) - .expect("ephemeral secret query should work"); - assert_eq!(secret, None, "invalid ephemeral secret"); } } diff --git a/runtime/src/consensus/state/registry.rs b/runtime/src/consensus/state/registry.rs index 1474b3dc45e..ae50d4a4f8a 100644 --- a/runtime/src/consensus/state/registry.rs +++ b/runtime/src/consensus/state/registry.rs @@ -131,7 +131,7 @@ mod test { let mock_consensus_root = Root { version: 1, root_type: RootType::State, - hash: Hash::from("b5ee772727869caf8d0d333a7a9d65562ca34d8d6f3cf496af9e90f1705f10ec"), + hash: Hash::from("a40448052f74a1c0c2d47c2b01a433ad7f3782ea47dfe5575170fec2587569c9"), ..Default::default() }; let mkvs = Tree::builder() diff --git a/runtime/src/consensus/state/staking.rs b/runtime/src/consensus/state/staking.rs index 26665786256..332d591979d 100644 --- a/runtime/src/consensus/state/staking.rs +++ b/runtime/src/consensus/state/staking.rs @@ -221,7 +221,7 @@ mod test { let mock_consensus_root = Root { version: 1, root_type: RootType::State, - hash: Hash::from("b5ee772727869caf8d0d333a7a9d65562ca34d8d6f3cf496af9e90f1705f10ec"), + hash: Hash::from("a40448052f74a1c0c2d47c2b01a433ad7f3782ea47dfe5575170fec2587569c9"), ..Default::default() }; let mkvs = Tree::builder() From 188f5b1852b14bdc1a62cee5adbe6735e950fbde Mon Sep 17 00:00:00 2001 From: Peter Nose Date: Wed, 12 Apr 2023 09:04:48 +0200 Subject: [PATCH 5/7] go/keymanager/api: Fix publish master secret transaction - Fixed the grammar issue in the publish ephemeral secret method. - Moved the initialization of secret notifiers to the constructor for better code organization. --- .../cometbft/apps/keymanager/transactions.go | 8 ++++---- go/consensus/cometbft/keymanager/keymanager.go | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/go/consensus/cometbft/apps/keymanager/transactions.go b/go/consensus/cometbft/apps/keymanager/transactions.go index fafb37e2f24..4d636836a53 100644 --- a/go/consensus/cometbft/apps/keymanager/transactions.go +++ b/go/consensus/cometbft/apps/keymanager/transactions.go @@ -217,13 +217,13 @@ func (app *keymanagerApplication) publishMasterSecret( // publishEphemeralSecret stores the ephemeral secret for the given epoch. // -// Key managers support forward-secret ephemeral secrets which are never encrypted with -// SGX sealing key nor stored in the enclave's cold storage. These secrets are generated -// by the enclaves themselves for the next epoch only and published encrypted in the consensus. +// Key managers support forward-secret ephemeral secrets which are never encrypted with SGX sealing +// key nor stored in the enclave's cold storage. These secrets are generated by the enclaves +// themselves for the next epoch only and published encrypted in the consensus layer. // Only one secret can be published for an epoch, others are discarded. Overwrites are not // allowed as with master secrets. So if all enclaves restart at the same time, no one // will be able to decrypt ephemeral secrets for the past. The number of generated secrets -// does not effect the performance, as key managers store in memory only the last few secrets, +// does not affect the performance, as key managers store in memory only the last few secrets, // as defined in the policy. // // Note that ephemeral secrets differ from master secrets. For more information, see diff --git a/go/consensus/cometbft/keymanager/keymanager.go b/go/consensus/cometbft/keymanager/keymanager.go index eef4620caed..20c0d0f6bdd 100644 --- a/go/consensus/cometbft/keymanager/keymanager.go +++ b/go/consensus/cometbft/keymanager/keymanager.go @@ -162,9 +162,11 @@ func New(ctx context.Context, backend tmapi.Backend) (ServiceClient, error) { return nil, fmt.Errorf("cometbft/keymanager: failed to register app: %w", err) } - sc := &serviceClient{ - logger: logging.GetLogger("cometbft/keymanager"), - querier: a.QueryFactory().(*app.QueryFactory), + sc := serviceClient{ + logger: logging.GetLogger("cometbft/keymanager"), + querier: a.QueryFactory().(*app.QueryFactory), + mstSecretNotifier: pubsub.NewBroker(false), + ephSecretNotifier: pubsub.NewBroker(false), } sc.statusNotifier = pubsub.NewBrokerEx(func(ch channels.Channel) { statuses, err := sc.GetStatuses(ctx, consensus.HeightLatest) @@ -180,8 +182,6 @@ func New(ctx context.Context, backend tmapi.Backend) (ServiceClient, error) { wr <- v } }) - sc.mstSecretNotifier = pubsub.NewBroker(false) - sc.ephSecretNotifier = pubsub.NewBroker(false) - return sc, nil + return &sc, nil } From 97f265501b0479cbe3236f2278ca42994491b5c4 Mon Sep 17 00:00:00 2001 From: Peter Nose Date: Wed, 12 Apr 2023 09:05:39 +0200 Subject: [PATCH 6/7] keymanager/src/runtime: Fix master secret rotations - Simplified method for computing master secret generation epoch. - Created scenarios for replicating multiple secrets and for rotation failures. - Added a comment to clarify that a node must register with an empty checksum until the first secret is generated. - Renamed the context for master secret sealing and added a separate context for master secret proposals to enhance the security. - Limited the number of replicated ephemeral secrets. - Replaced fetch functions with secret provider which does not break enclave initialization if the checksum of the replicated secret is invalid. - Fix off-by-one logic when rejecting a master secret proposal. - Rename and export Deoxys-II SGX constructor. --- .../cometbft/apps/keymanager/keymanager.go | 10 +- .../e2e/runtime/keymanager_replicate_many.go | 141 ++++++++ .../runtime/keymanager_rotation_failure.go | 235 ++++++++++++ .../scenario/e2e/runtime/keymanager_util.go | 1 + .../scenario/e2e/runtime/runtime.go | 2 + go/worker/keymanager/worker.go | 12 +- keymanager/src/crypto/kdf.rs | 335 +++++++----------- keymanager/src/lib.rs | 1 + keymanager/src/policy/cached.rs | 2 +- keymanager/src/runtime/methods.rs | 67 +--- keymanager/src/secrets/interface.rs | 17 + keymanager/src/secrets/mock.rs | 78 ++++ keymanager/src/secrets/mod.rs | 9 + keymanager/src/secrets/provider.rs | 90 +++++ runtime/src/common/sgx/seal.rs | 10 +- 15 files changed, 743 insertions(+), 267 deletions(-) create mode 100644 go/oasis-test-runner/scenario/e2e/runtime/keymanager_replicate_many.go create mode 100644 go/oasis-test-runner/scenario/e2e/runtime/keymanager_rotation_failure.go create mode 100644 keymanager/src/secrets/interface.rs create mode 100644 keymanager/src/secrets/mock.rs create mode 100644 keymanager/src/secrets/mod.rs create mode 100644 keymanager/src/secrets/provider.rs diff --git a/go/consensus/cometbft/apps/keymanager/keymanager.go b/go/consensus/cometbft/apps/keymanager/keymanager.go index 50ac5d2b5fd..24a5d5d89a5 100644 --- a/go/consensus/cometbft/apps/keymanager/keymanager.go +++ b/go/consensus/cometbft/apps/keymanager/keymanager.go @@ -243,7 +243,7 @@ func (app *keymanagerApplication) generateStatus( // nolint: gocyclo Policy: oldStatus.Policy, } - // Data need to count the nodes that have replicated the proposal for the next master secret. + // Data needed to count the nodes that have replicated the proposal for the next master secret. var ( nextGeneration uint64 nextChecksum []byte @@ -340,6 +340,12 @@ nextNode: ctx.Logger().Error("Security status mismatch for runtime", vars...) continue nextNode } + + // Skip nodes with mismatched checksum. + // Note that a node needs to register with an empty checksum if no master secrets + // have been generated so far. Otherwise, if secrets have been generated, the node + // needs to register with a checksum computed over all the secrets generated so far + // since the key manager's checksum is updated after every master secret rotation. if !bytes.Equal(initResponse.Checksum, status.Checksum) { ctx.Logger().Error("Checksum mismatch for runtime", vars...) continue nextNode @@ -398,7 +404,7 @@ nextNode: // the proposal for the next master secret. if numNodes := len(status.Nodes); numNodes > 0 && nextChecksum != nil { percent := len(updatedNodes) * 100 / numNodes - if percent > minProposalReplicationPercent { + if percent >= minProposalReplicationPercent { status.Generation = nextGeneration status.RotationEpoch = epoch status.Checksum = nextChecksum diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_replicate_many.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_replicate_many.go new file mode 100644 index 00000000000..7ecc460b785 --- /dev/null +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_replicate_many.go @@ -0,0 +1,141 @@ +package runtime + +import ( + "context" + "fmt" + "time" + + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/env" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/scenario" +) + +const ( + // cfgNumMasterSecrets is the number of master secrets to replicate. + cfgNumMasterSecrets = "num_master_secrets" // #nosec G101 + + // cfgRotationInterval is the master secret rotation interval. + cfgRotationInterval = "rotation_interval" +) + +// KeymanagerReplicateMany is a scenario where a large number of master secrets are generated +// and replicated. Its purpose is to benchmark how long replication takes on a local SGX machine. +// +// Scenario: +// - Start the first two key managers. +// - Generate N master secrets. +// - Start the last two key managers. +// - Start a timer. +// - Wait until the master secrets are replicated. +// - Stop the timer. +// - Verify that all key managers possess the same secrets. +// - Verify that master secret generation still works. +var KeymanagerReplicateMany scenario.Scenario = newKmReplicateManyImpl() + +type kmReplicateManyImpl struct { + Scenario +} + +func newKmReplicateManyImpl() scenario.Scenario { + sc := kmReplicateManyImpl{ + Scenario: *NewScenario("keymanager-replication-many", nil), + } + sc.Flags.Uint64(cfgNumMasterSecrets, 5, "number of master secrets to replicate") + sc.Flags.Uint64(cfgRotationInterval, 1, "master secret rotation interval") + + return &sc +} + +func (sc *kmReplicateManyImpl) Clone() scenario.Scenario { + return &kmReplicateManyImpl{ + Scenario: *sc.Scenario.Clone().(*Scenario), + } +} + +func (sc *kmReplicateManyImpl) Fixture() (*oasis.NetworkFixture, error) { + f, err := sc.Scenario.Fixture() + if err != nil { + return nil, err + } + + // Speed up the test. + f.Network.Beacon.VRFParameters = &beacon.VRFParameters{ + Interval: 10, + ProofSubmissionDelay: 2, + } + + // We don't need compute workers. + f.ComputeWorkers = []oasis.ComputeWorkerFixture{} + + // This requires multiple keymanagers. + f.Keymanagers = []oasis.KeymanagerFixture{ + {Runtime: 0, Entity: 1, Policy: 0}, + {Runtime: 0, Entity: 1, Policy: 0}, + {Runtime: 0, Entity: 1, Policy: 0, NodeFixture: oasis.NodeFixture{NoAutoStart: true}}, + {Runtime: 0, Entity: 1, Policy: 0, NodeFixture: oasis.NodeFixture{NoAutoStart: true}}, + } + + // Enable master secret rotation. + interval, _ := sc.Flags.GetUint64(cfgRotationInterval) + f.KeymanagerPolicies[0].MasterSecretRotationInterval = beacon.EpochTime(interval) + + return f, nil +} + +func (sc *kmReplicateManyImpl) Run(ctx context.Context, childEnv *env.Env) error { + // Fetch the number of secrets to replicate. + n, _ := sc.Flags.GetUint64(cfgNumMasterSecrets) + if n == 0 { + return fmt.Errorf("the number of master secrets must be a positive value") + } + generation := n - 1 + + // Start the first two key managers. + if err := sc.Net.Start(); err != nil { + return err + } + + // Wait until master secrets are generated. + if _, err := sc.waitMasterSecret(ctx, generation); err != nil { + return err + } + + // Start the last two key managers. + if err := sc.startKeymanagers(ctx, []int{2, 3}); err != nil { + return err + } + + // Wait until all secrets are replicated. + start := time.Now() + + if err := sc.waitKeymanagers(ctx, []int{2, 3}); err != nil { + return err + } + + sc.Logger.Info("replication finished", + "duration", time.Since(start), + ) + + // Compare public keys. + if err := sc.compareLongtermPublicKeys(ctx, []int{0, 1, 2, 3}); err != nil { + return err + } + + // Verify that secret can be generated after replication. + status, err := sc.keymanagerStatus(ctx) + if err != nil { + return err + } + status, err = sc.waitMasterSecret(ctx, status.Generation+2) + if err != nil { + return err + } + + // Verify that all nodes formed the committee when the last secret was generated. + if size := len(status.Nodes); size != 4 { + return fmt.Errorf("key manager committee's size is not correct: expected 4, got %d", size) + } + + return nil +} diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_rotation_failure.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_rotation_failure.go new file mode 100644 index 00000000000..c915b6459f5 --- /dev/null +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_rotation_failure.go @@ -0,0 +1,235 @@ +package runtime + +import ( + "context" + "fmt" + + beacon "github.com/oasisprotocol/oasis-core/go/beacon/api" + "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" + "github.com/oasisprotocol/oasis-core/go/common/node" + consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" + "github.com/oasisprotocol/oasis-core/go/consensus/api/transaction" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/env" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis" + "github.com/oasisprotocol/oasis-core/go/oasis-test-runner/scenario" + "github.com/oasisprotocol/oasis-core/go/registry/api" + registry "github.com/oasisprotocol/oasis-core/go/registry/api" + staking "github.com/oasisprotocol/oasis-core/go/staking/api" +) + +// KeymanagerRotationFailure is a scenario where the first master secret proposal is rejected +// because not enough nodes have replicated the secret. The second proposal is accepted, +// ensuring that nodes can properly handle potential reverts. +// +// Scenario: +// - Start all key managers. +// - Verify that master secret generation works. +// - Stop the third key manager. +// - Verify that the next proposal is not accepted. +// - Repeat these steps N times. +var KeymanagerRotationFailure scenario.Scenario = newKmRotationFailureImpl() + +type kmRotationFailureImpl struct { + Scenario +} + +func newKmRotationFailureImpl() scenario.Scenario { + return &kmRotationFailureImpl{ + Scenario: *NewScenario("keymanager-rotation-failure", nil), + } +} + +func (sc *kmRotationFailureImpl) Clone() scenario.Scenario { + return &kmRotationFailureImpl{ + Scenario: *sc.Scenario.Clone().(*Scenario), + } +} + +func (sc *kmRotationFailureImpl) Fixture() (*oasis.NetworkFixture, error) { + f, err := sc.Scenario.Fixture() + if err != nil { + return nil, err + } + + // Speed up the test. + f.Network.Beacon.VRFParameters = &beacon.VRFParameters{ + Interval: 10, + ProofSubmissionDelay: 2, + } + + // We don't need compute workers. + f.ComputeWorkers = []oasis.ComputeWorkerFixture{} + + // This requires multiple keymanagers. + f.Keymanagers = []oasis.KeymanagerFixture{ + {Runtime: 0, Entity: 1, Policy: 0}, + {Runtime: 0, Entity: 1, Policy: 0, NodeFixture: oasis.NodeFixture{NoAutoStart: true}}, + {Runtime: 0, Entity: 1, Policy: 0, NodeFixture: oasis.NodeFixture{NoAutoStart: true}}, + } + + // Enable master secret rotation. + // The rotation interval should be set to at least 2 so that key manager nodes can be shut down + // after they have accepted the last generation but before a new master secret is proposed. + f.KeymanagerPolicies[0].MasterSecretRotationInterval = 2 + + return f, nil +} + +func (sc *kmRotationFailureImpl) Run(ctx context.Context, childEnv *env.Env) error { + // Start the first two key managers. + if err := sc.Net.Start(); err != nil { + return err + } + + for i := 0; i < 3; i++ { + // Start the third key manager. + if err := sc.startKeymanagers(ctx, []int{1, 2}); err != nil { + return err + } + + // Verify that master secret generation works. + generation := uint64(2*i + 1) + status, err := sc.waitMasterSecret(ctx, generation) + if err != nil { + return fmt.Errorf("master secret was not generated: %w", err) + } + if status.Generation != generation { + return fmt.Errorf("master secret generation number is not correct: expected %d, got %d", generation, status.Generation) + } + if size := len(status.Nodes); size != 3 { + return fmt.Errorf("key manager committee's size is not correct: expected 3, got %d", size) + } + + // Give key managers enough time to apply the last proposal and register with the latests + // checksum. This process can take several blocks. + if _, err := sc.waitBlocks(ctx, 5); err != nil { + return err + } + + // Stop two key managers, leaving only 33% of the committee members to be active. + if err := sc.stopKeymanagers(ctx, []int{1, 2}); err != nil { + return err + } + + // Extend registrations to ensure that stopped key managers remain on the committee. + if err := sc.extendKeymanagerRegistrations(ctx, []int{1, 2}); err != nil { + return err + } + + // Verify that the next few master secret proposals are rejected. + // Note that the proposals will be rejected only until the registrations + // of the stopped key managers expire. + if err := sc.verifyMasterSecretRejections(ctx, 3); err != nil { + return err + } + } + + // Verify that master secret generation works after the third key manager is deregistered. + status, err := sc.waitMasterSecret(ctx, 6) + if err != nil { + return err + } + if status.Generation != 6 { + return fmt.Errorf("master secret generation number is not correct: expected 6, got %d", status.Generation) + } + if size := len(status.Nodes); size != 1 { + return fmt.Errorf("key manager committee's size is not correct: expected 1, got %d", size) + } + + return nil +} + +func (sc *kmRotationFailureImpl) extendKeymanagerRegistrations(ctx context.Context, idxs []int) error { + sc.Logger.Info("extending registrations of the key managers", "ids", fmt.Sprintf("%+v", idxs)) + + // Compute the maximum expiration epoch. + epoch, err := sc.Net.ClientController().Beacon.GetEpoch(ctx, consensus.HeightLatest) + if err != nil { + return err + } + params, err := sc.Net.ClientController().Consensus.Registry().ConsensusParameters(ctx, consensus.HeightLatest) + if err != nil { + return err + } + expiration := uint64(epoch) + params.MaxNodeExpiration + + for _, idx := range idxs { + km := sc.Net.Keymanagers()[idx] + + // Update expiration. + nodeDesc, err := sc.Net.ClientController().Registry.GetNode(ctx, &api.IDQuery{ + Height: consensus.HeightLatest, + ID: km.NodeID, + }) + if err != nil { + return err + } + nodeDesc.Expiration = expiration + + // Prepare, sign and submit the register node transaction. + identity, err := km.LoadIdentity() + if err != nil { + return err + } + nodeSigners := []signature.Signer{ + identity.NodeSigner, + identity.P2PSigner, + identity.ConsensusSigner, + identity.VRFSigner, + identity.GetTLSSigner(), + } + sigNode, err := node.MultiSignNode(nodeSigners, registry.RegisterNodeSignatureContext, nodeDesc) + if err != nil { + return err + } + nonce, err := sc.Net.Controller().Consensus.GetSignerNonce(ctx, &consensus.GetSignerNonceRequest{ + AccountAddress: staking.NewAddress(identity.NodeSigner.Public()), + Height: consensus.HeightLatest, + }) + if err != nil { + return err + } + tx := registry.NewRegisterNodeTx(nonce, &transaction.Fee{Gas: 10000}, sigNode) + sigTx, err := transaction.Sign(identity.NodeSigner, tx) + if err != nil { + return err + } + err = sc.Net.Controller().Consensus.SubmitTx(ctx, sigTx) + if err != nil { + return err + } + } + + return nil +} + +func (sc *kmRotationFailureImpl) verifyMasterSecretRejections(ctx context.Context, n int) error { + mstCh, mstSub, err := sc.Net.Controller().Keymanager.WatchMasterSecrets(ctx) + if err != nil { + return err + } + defer mstSub.Close() + + generations := make(map[uint64]struct{}) + + for j := 0; j < n; j++ { + select { + case secret := <-mstCh: + sc.Logger.Info("master secret proposed", + "generation", secret.Secret.Generation, + "epoch", secret.Secret.Epoch, + "num_ciphertexts", len(secret.Secret.Secret.Ciphertexts), + ) + + generations[secret.Secret.Generation] = struct{}{} + + if len(generations) != 1 { + return fmt.Errorf("master secret proposal was not rejected") + } + case <-ctx.Done(): + return ctx.Err() + } + } + + return nil +} diff --git a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go index a18ef0a67e1..4d04db97105 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/keymanager_util.go @@ -192,6 +192,7 @@ func (sc *Scenario) waitMasterSecret(ctx context.Context, generation uint64) (*k sc.Logger.Info("master secret proposed", "generation", secret.Secret.Generation, "epoch", secret.Secret.Epoch, + "num_ciphertexts", len(secret.Secret.Secret.Ciphertexts), ) case status := <-stCh: if !status.ID.Equal(&keymanagerID) { diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go index 78803c99e1e..204983533c5 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go @@ -823,6 +823,8 @@ func RegisterScenarios() error { KeymanagerDumpRestore, KeymanagerRestart, KeymanagerReplicate, + KeymanagerReplicateMany, + KeymanagerRotationFailure, KeymanagerUpgrade, // Dump/restore test. DumpRestore, diff --git a/go/worker/keymanager/worker.go b/go/worker/keymanager/worker.go index 5bf187fcd79..2b2be92ea7a 100644 --- a/go/worker/keymanager/worker.go +++ b/go/worker/keymanager/worker.go @@ -908,18 +908,18 @@ func (w *Worker) updateGenerateMasterSecretEpoch() { // If at least one master secret has been generated, respect the rotation interval. nextGen := w.kmStatus.NextGeneration() if nextGen != 0 { - // Disable rotation if the policy is not set. var rotationInterval beacon.EpochTime if w.kmStatus.Policy != nil { rotationInterval = w.kmStatus.Policy.Policy.MasterSecretRotationInterval } - // Secrets are allowed to be generated at most one epoch before the rotation. - nextEpoch = w.kmStatus.RotationEpoch + rotationInterval - 1 - - // Rotation not allowed. - if rotationInterval == 0 { + switch rotationInterval { + case 0: + // Rotation not allowed. nextEpoch = math.MaxUint64 + default: + // Secrets are allowed to be generated at most one epoch before the rotation. + nextEpoch = w.kmStatus.RotationEpoch + rotationInterval - 1 } } diff --git a/keymanager/src/crypto/kdf.rs b/keymanager/src/crypto/kdf.rs index f44b832dee3..7fcdffc79e8 100644 --- a/keymanager/src/crypto/kdf.rs +++ b/keymanager/src/crypto/kdf.rs @@ -15,12 +15,9 @@ use zeroize::Zeroize; use oasis_core_runtime::{ common::{ - crypto::{ - mrae::{deoxysii::DeoxysII, nonce::Nonce}, - signature, x25519, - }, + crypto::{mrae::nonce::Nonce, signature, x25519}, namespace::Namespace, - sgx::egetkey::egetkey, + sgx::seal::new_deoxysii, }, consensus::beacon::EpochTime, storage::KeyValue, @@ -31,8 +28,9 @@ use crate::{ api::KeyManagerError, crypto::{ pack_runtime_id_generation, unpack_encrypted_secret_nonce, KeyPair, KeyPairId, Secret, - SignedPublicKey, StateKey, VerifiableSecret, + SignedPublicKey, StateKey, }, + secrets::SecretProvider, }; lazy_static! { @@ -99,7 +97,10 @@ lazy_static! { const MASTER_SECRET_STORAGE_KEY_PREFIX: &[u8] = b"keymanager_master_secret"; const MASTER_SECRET_CHECKSUM_STORAGE_KEY_PREFIX: &[u8] = b"keymanager_master_secret_checksum"; const MASTER_SECRET_PROPOSAL_STORAGE_KEY: &[u8] = b"keymanager_master_secret_proposal"; -const MASTER_SECRET_SEAL_CONTEXT: &[u8] = b"Ekiden Keymanager Seal master secret v0"; + +const MASTER_SECRET_SEAL_CONTEXT: &[u8] = b"oasis-core/keymanager: master secret seal"; +const MASTER_SECRET_PROPOSAL_SEAL_CONTEXT: &[u8] = + b"oasis-core/keymanager: master secret proposal seal"; const MASTER_SECRET_CACHE_SIZE: usize = 20; const EPHEMERAL_SECRET_CACHE_SIZE: usize = 20; @@ -344,20 +345,15 @@ impl Kdf { /// - The first initialization can take a very long time, especially if all generations /// of the master secret must be replicated from other enclaves. #[allow(clippy::too_many_arguments)] - pub fn init( + pub fn init( &self, storage: &dyn KeyValue, runtime_id: Namespace, generation: u64, checksum: Vec, epoch: EpochTime, - master_secret_fetcher: M, - ephemeral_secret_fetcher: E, - ) -> Result - where - M: Fn(u64) -> Result, - E: Fn(EpochTime) -> Result, - { + provider: &dyn SecretProvider, + ) -> Result { // If the key manager has no secrets, nothing needs to be replicated. if checksum.is_empty() { let mut inner = self.inner.write().unwrap(); @@ -388,17 +384,22 @@ impl Kdf { // On startup replicate ephemeral secrets. if next_generation == 0 { - let last = epoch + 1; - for epoch in (0..=last).rev() { - if let Ok(secret) = ephemeral_secret_fetcher(epoch) { - let mut inner = self.inner.write().unwrap(); - inner.verify_runtime_id(&runtime_id)?; - inner.add_ephemeral_secret(secret, epoch); - continue; - } - if epoch != last { - break; - } + let to = epoch + 1; + let from = to.saturating_sub(EPHEMERAL_SECRET_CACHE_SIZE as u64); + + for epoch in (from..=to).rev() { + let secret = match provider.ephemeral_secret_iter(epoch).next() { + Some(secret) => secret, + _ => { + if epoch == to { + continue; + } + break; + } + }; + let mut inner = self.inner.write().unwrap(); + inner.verify_runtime_id(&runtime_id)?; + inner.add_ephemeral_secret(secret, epoch); } } @@ -424,7 +425,9 @@ impl Kdf { // If only one master secret is missing, try using stored proposal. if next_generation == generation { - if let Some(secret) = Self::load_master_secret_proposal(storage) { + if let Some(secret) = + Self::load_master_secret_proposal(storage, &runtime_id, generation) + { // Proposed secret is untrusted and needs to be verified. let next_checksum = Self::checksum_master_secret(&secret, &curr_checksum); @@ -461,16 +464,23 @@ impl Kdf { // Master secret wasn't found and needs to be fetched from another enclave. // Fetched values are untrusted and need to be verified. - let vs = master_secret_fetcher(generation)?; - let (secret, prev_checksum) = match vs.checksum.is_empty() { - true => (vs.secret, runtime_id.0.to_vec()), - false => (vs.secret, vs.checksum), - }; - let next_checksum = Self::checksum_master_secret(&secret, &prev_checksum); - - if next_checksum != last_checksum { - return Err(KeyManagerError::MasterSecretChecksumMismatch.into()); - } + let (secret, prev_checksum) = provider + .master_secret_iter(generation) + .find_map(|vs| { + let prev_checksum = if vs.checksum.is_empty() { + runtime_id.0.to_vec() + } else { + vs.checksum + }; + + let next_checksum = Self::checksum_master_secret(&vs.secret, &prev_checksum); + if next_checksum != last_checksum { + return None; + } + + Some((vs.secret, prev_checksum)) + }) + .ok_or(KeyManagerError::MasterSecretNotReplicated(generation))?; Self::store_master_secret(storage, &runtime_id, &secret, generation); Self::store_checksum(storage, prev_checksum.clone(), generation); @@ -494,7 +504,7 @@ impl Kdf { if inner.generation != Some(generation) { // Derive signing key from the latest secret. let secret = Self::load_master_secret(storage, &runtime_id, generation) - .ok_or(anyhow::anyhow!(KeyManagerError::StateCorrupted))?; + .ok_or(KeyManagerError::StateCorrupted)?; let sk = Self::derive_signing_key(&runtime_id, &secret); let pk = sk.public_key(); @@ -711,7 +721,7 @@ impl Kdf { return Err(KeyManagerError::MasterSecretChecksumMismatch.into()); } - Self::store_master_secret_proposal(storage, &secret); + Self::store_master_secret_proposal(storage, runtime_id, &secret, generation); inner.next_checksum = Some(next_checksum); let next_signing_key = Self::derive_signing_key(runtime_id, &secret).public_key(); @@ -767,7 +777,7 @@ impl Kdf { let additional_data = pack_runtime_id_generation(runtime_id, generation); // Decrypt the persisted master secret. - let d2 = Self::new_d2(); + let d2 = new_deoxysii(Keypolicy::MRENCLAVE, MASTER_SECRET_SEAL_CONTEXT); let plaintext = d2 .open(&nonce, ciphertext.to_vec(), additional_data) .expect("persisted state is corrupted"); @@ -791,7 +801,7 @@ impl Kdf { // Encrypt the master secret. let nonce = Nonce::generate(); let additional_data = pack_runtime_id_generation(runtime_id, generation); - let d2 = Self::new_d2(); + let d2 = new_deoxysii(Keypolicy::MRENCLAVE, MASTER_SECRET_SEAL_CONTEXT); let mut ciphertext = d2.seal(&nonce, secret, additional_data); ciphertext.extend_from_slice(&nonce.to_vec()); @@ -807,7 +817,11 @@ impl Kdf { /// period, it is impossible to know whether the loaded proposal is the latest one. Therefore, /// it is crucial to ALWAYS verify that the checksum of the proposal matches the one published /// in the consensus before accepting it. - fn load_master_secret_proposal(storage: &dyn KeyValue) -> Option { + fn load_master_secret_proposal( + storage: &dyn KeyValue, + runtime_id: &Namespace, + generation: u64, + ) -> Option { // Fetch the encrypted master secret proposal if it exists. let key = MASTER_SECRET_PROPOSAL_STORAGE_KEY.to_vec(); @@ -818,10 +832,11 @@ impl Kdf { let (ciphertext, nonce) = unpack_encrypted_secret_nonce(&ciphertext) .expect("persisted state is corrupted, invalid size"); + let additional_data = pack_runtime_id_generation(runtime_id, generation); // Decrypt the persisted master secret proposal. - let d2 = Self::new_d2(); - let plaintext = match d2.open(&nonce, ciphertext.to_vec(), vec![]) { + let d2 = new_deoxysii(Keypolicy::MRENCLAVE, MASTER_SECRET_PROPOSAL_SEAL_CONTEXT); + let plaintext = match d2.open(&nonce, ciphertext.to_vec(), additional_data) { Ok(plaintext) => plaintext, Err(_) => return None, }; @@ -829,20 +844,25 @@ impl Kdf { Some(Secret(plaintext.try_into().unwrap())) } - /// Encrypt and store the next master secret proposal in untrusted local storage. + /// Encrypt and store the master secret proposal in untrusted local storage. /// /// If a proposal already exists, it will be overwritten. - fn store_master_secret_proposal(storage: &dyn KeyValue, secret: &Secret) { + fn store_master_secret_proposal( + storage: &dyn KeyValue, + runtime_id: &Namespace, + secret: &Secret, + generation: u64, + ) { // Using the same key for all proposals will override the previous one. let key = MASTER_SECRET_PROPOSAL_STORAGE_KEY.to_vec(); // Encrypt the master secret. - // Additional data has to be different from the one used when storing verified master + // Seal context has to be different from the one used when storing verified master // secrets so that the attacker cannot replace secrets with rejected proposals. - // Since proposals are always verified before being accepted, confidentiality will suffice. let nonce = Nonce::generate(); - let d2 = Self::new_d2(); - let mut ciphertext = d2.seal(&nonce, secret, vec![]); + let additional_data = pack_runtime_id_generation(runtime_id, generation); + let d2 = new_deoxysii(Keypolicy::MRENCLAVE, MASTER_SECRET_PROPOSAL_SEAL_CONTEXT); + let mut ciphertext = d2.seal(&nonce, secret, additional_data); ciphertext.extend_from_slice(&nonce.to_vec()); // Persist the encrypted master secret. @@ -895,7 +915,7 @@ impl Kdf { /// to the key manager's runtime ID, using master secret generations as the KMAC keys /// at each step. The checksum calculation for the n-th generation can be expressed by /// the formula: KMAC(gen_n, ... KMAC(gen_2, KMAC(gen_1, KMAC(gen_0, runtime_id)))). - fn checksum_master_secret(secret: &Secret, last_checksum: &Vec) -> Vec { + pub fn checksum_master_secret(secret: &Secret, last_checksum: &Vec) -> Vec { let mut k = [0u8; 32]; // KMAC256(master_secret, last_checksum, 32, "ekiden-checksum-master-secret") @@ -931,14 +951,6 @@ impl Kdf { let sec = Inner::derive_secret(secret, &RUNTIME_SIGNING_KEY_CUSTOM, runtime_id.as_ref()); signature::PrivateKey::from_bytes(sec.0.to_vec()) } - - fn new_d2() -> DeoxysII { - let mut seal_key = egetkey(Keypolicy::MRENCLAVE, MASTER_SECRET_SEAL_CONTEXT); - let d2 = DeoxysII::new(&seal_key); - seal_key.zeroize(); - - d2 - } } #[cfg(test)] @@ -976,8 +988,9 @@ mod tests { MASTER_SECRET_CHECKSUM_STORAGE_KEY_PREFIX, MASTER_SECRET_STORAGE_KEY_PREFIX, RUNTIME_SIGNING_KEY_CUSTOM, }, - KeyPairId, Secret, VerifiableSecret, SECRET_SIZE, + KeyPairId, Secret, SECRET_SIZE, }, + secrets::MockSecretProvider, }; use super::{ @@ -1051,60 +1064,16 @@ mod tests { } } - /// Master secret and checksum provider. - pub struct MasterSecretProvider { - runtime_id: Namespace, - } - - impl MasterSecretProvider { - fn new(runtime_id: Namespace) -> Self { - return Self { runtime_id }; - } - - fn fetch(&self, generation: u64) -> Result { - let mut secret = Default::default(); - let mut prev_checksum = Default::default(); - let mut next_checksum = self.runtime_id.0.to_vec(); - - for generation in 0..=generation { - secret = Secret([generation as u8; SECRET_SIZE]); - - prev_checksum = next_checksum; - next_checksum = Kdf::checksum_master_secret(&secret, &prev_checksum); - } - - Ok(VerifiableSecret { - secret, - checksum: prev_checksum, - }) - } - - fn checksum(&self, generation: u64) -> Vec { - self.fetch(generation + 1).unwrap().checksum - } - } - #[test] fn init_replication() { let kdf = Kdf::new(); let storage = InMemoryKeyValue::new(); let runtime_id = Namespace::from(vec![1u8; 32]); let epoch = 0; - let provider = MasterSecretProvider::new(runtime_id); - let master_secret_fetcher = |generation| provider.fetch(generation); - let ephemeral_secret_fetcher = - |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); + let provider = MockSecretProvider::new(runtime_id, false); // No secrets. - let result = kdf.init( - &storage, - runtime_id, - 0, - vec![], - epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, - ); + let result = kdf.init(&storage, runtime_id, 0, vec![], epoch, &provider); assert!(result.is_ok()); let state = result.unwrap(); @@ -1112,7 +1081,7 @@ mod tests { // Secrets replicated from other enclaves. for generation in [0, 0, 1, 1, 2, 2, 5, 5] { - let checksum = provider.checksum(generation); + let checksum = provider.checksum_master_secret(generation); let result = kdf.init( &storage, @@ -1120,8 +1089,7 @@ mod tests { generation, checksum.clone(), epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ); assert!(result.is_ok()); @@ -1135,7 +1103,7 @@ mod tests { // Secrets loaded from local storage or replicated from other enclaves. for generation in [5, 5, 6, 6, 10, 10] { let kdf = Kdf::new(); - let checksum = provider.checksum(generation); + let checksum = provider.checksum_master_secret(generation); let result = kdf.init( &storage, @@ -1143,8 +1111,7 @@ mod tests { generation, checksum.clone(), epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ); assert!(result.is_ok()); @@ -1162,28 +1129,16 @@ mod tests { let storage = InMemoryKeyValue::new(); let runtime_id = Namespace::from(vec![1u8; 32]); let epoch = 0; - let provider = MasterSecretProvider::new(runtime_id); - let master_secret_fetcher = - |generation| Err(KeyManagerError::MasterSecretNotFound(generation).into()); - let ephemeral_secret_fetcher = - |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); + let provider = MockSecretProvider::new(runtime_id, true); // KDF needs to be initialized. - let result = kdf.init( - &storage, - runtime_id, - 0, - vec![], - epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, - ); + let result = kdf.init(&storage, runtime_id, 0, vec![], epoch, &provider); assert!(result.is_ok()); // Rotate master secrets. for generation in 0..5 { - let secret = provider.fetch(generation).unwrap().secret; - let checksum = provider.checksum(generation); + let secret = provider.master_secret(generation); + let checksum = provider.checksum_master_secret(generation); let result = kdf.add_master_secret_proposal( &storage, &runtime_id, @@ -1199,8 +1154,7 @@ mod tests { generation, checksum.clone(), epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ); assert!(result.is_ok()); @@ -1218,25 +1172,17 @@ mod tests { kdf.add_master_secret_proposal(&storage, &runtime_id, secret, generation, &checksum); assert!(result.is_ok()); - let checksum = provider.checksum(generation); - let result = kdf.init( - &storage, - runtime_id, - generation, - checksum, - epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, - ); + let checksum = provider.checksum_master_secret(generation); + let result = kdf.init(&storage, runtime_id, generation, checksum, epoch, &provider); assert!(result.is_err()); assert_eq!( result.unwrap_err().to_string(), - KeyManagerError::MasterSecretNotFound(generation).to_string() + KeyManagerError::MasterSecretNotReplicated(generation).to_string() ); // Valid proposal. - let secret = provider.fetch(generation).unwrap().secret; - let checksum = provider.checksum(generation); + let secret = provider.master_secret(generation); + let checksum = provider.checksum_master_secret(generation); let result = kdf.add_master_secret_proposal(&storage, &runtime_id, secret, generation, &checksum); assert!(result.is_ok()); @@ -1249,8 +1195,7 @@ mod tests { generation, checksum.clone(), epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ); assert!(result.is_ok()); @@ -1264,14 +1209,11 @@ mod tests { let storage = InMemoryKeyValue::new(); let runtime_id = Namespace::from(vec![1u8; 32]); let epoch = 0; - let provider = MasterSecretProvider::new(runtime_id); - let master_secret_fetcher = |generation| provider.fetch(generation); - let ephemeral_secret_fetcher = - |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); + let provider = MockSecretProvider::new(runtime_id, false); // Init. let generation = 5; - let checksum = provider.checksum(generation); + let checksum = provider.checksum_master_secret(generation); let result = kdf.init( &storage, @@ -1279,8 +1221,7 @@ mod tests { generation, checksum.clone(), epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ); assert!(result.is_ok()); @@ -1300,8 +1241,7 @@ mod tests { generation, checksum.clone(), epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ); assert!(result.is_err()); assert_eq!( @@ -1316,14 +1256,11 @@ mod tests { let storage = InMemoryKeyValue::new(); let runtime_id = Namespace::from(vec![1u8; 32]); let epoch = 0; - let provider = MasterSecretProvider::new(runtime_id); - let master_secret_fetcher = |generation| provider.fetch(generation); - let ephemeral_secret_fetcher = - |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); + let provider = MockSecretProvider::new(runtime_id, false); // Init. let generation = 5; - let checksum = provider.checksum(generation); + let checksum = provider.checksum_master_secret(generation); let result = kdf.init( &storage, @@ -1331,8 +1268,7 @@ mod tests { generation, checksum.clone(), epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ); assert!(result.is_ok()); @@ -1353,8 +1289,7 @@ mod tests { generation, checksum.clone(), epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ) }); assert!(result.is_err()); @@ -1366,14 +1301,11 @@ mod tests { let storage = InMemoryKeyValue::new(); let runtime_id = Namespace::from(vec![1u8; 32]); let epoch = 0; - let provider = MasterSecretProvider::new(runtime_id); - let master_secret_fetcher = |generation| provider.fetch(generation); - let ephemeral_secret_fetcher = - |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); + let provider = MockSecretProvider::new(runtime_id, false); // Init. let generation = 10; - let checksum = provider.checksum(generation); + let checksum = provider.checksum_master_secret(generation); let result = kdf.init( &storage, @@ -1381,14 +1313,13 @@ mod tests { generation, checksum.clone(), epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ); assert!(result.is_ok()); // Init with outdated generation. let generation = 5; - let checksum = provider.checksum(generation); + let checksum = provider.checksum_master_secret(generation); let result = kdf.init( &storage, @@ -1396,8 +1327,7 @@ mod tests { generation, checksum.clone(), epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ); assert!(result.is_err()); assert_eq!( @@ -1413,32 +1343,13 @@ mod tests { let runtime_id = Namespace::from(vec![1u8; 32]); let epoch = 0; let invalid_runtime_id = Namespace::from(vec![2u8; 32]); - let provider = MasterSecretProvider::new(runtime_id); - let master_secret_fetcher = |generation| provider.fetch(generation); - let ephemeral_secret_fetcher = - |epoch| Err(KeyManagerError::EphemeralSecretNotFound(epoch).into()); + let provider = MockSecretProvider::new(runtime_id, false); // No secrets. - let result = kdf.init( - &storage, - runtime_id, - 0, - vec![], - epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, - ); + let result = kdf.init(&storage, runtime_id, 0, vec![], epoch, &provider); assert!(result.is_ok()); - let result = kdf.init( - &storage, - invalid_runtime_id, - 0, - vec![], - epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, - ); + let result = kdf.init(&storage, invalid_runtime_id, 0, vec![], epoch, &provider); assert!(result.is_err()); assert_eq!( result.unwrap_err().to_string(), @@ -1447,7 +1358,7 @@ mod tests { // Few secrets. let generation = 5; - let checksum = provider.checksum(generation); + let checksum = provider.checksum_master_secret(generation); let result = kdf.init( &storage, @@ -1455,8 +1366,7 @@ mod tests { generation, checksum.clone(), epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ); assert!(result.is_ok()); @@ -1466,8 +1376,7 @@ mod tests { generation, checksum, epoch, - master_secret_fetcher, - ephemeral_secret_fetcher, + &provider, ); assert!(result.is_err()); assert_eq!( @@ -1475,6 +1384,7 @@ mod tests { KeyManagerError::RuntimeMismatch.to_string() ); } + #[test] fn key_generation_is_deterministic() { let kdf = Kdf::default(); @@ -1982,21 +1892,32 @@ mod tests { let storage = InMemoryKeyValue::new(); let secret = Secret([0; SECRET_SIZE]); let new_secret = Secret([1; SECRET_SIZE]); + let runtime_id = Namespace([2; NAMESPACE_SIZE]); + let generation = 3; // Empty storage. - let result = Kdf::load_master_secret_proposal(&storage); + let result = Kdf::load_master_secret_proposal(&storage, &runtime_id, generation); assert!(result.is_none()); // Happy path. - Kdf::store_master_secret_proposal(&storage, &secret); - let loaded = - Kdf::load_master_secret_proposal(&storage).expect("master secret should be loaded"); + Kdf::store_master_secret_proposal(&storage, &runtime_id, &secret, generation); + let loaded = Kdf::load_master_secret_proposal(&storage, &runtime_id, generation) + .expect("master secret should be loaded"); assert_eq!(secret.0, loaded.0); + // Decryption returns None (invalid generation). + let loaded = Kdf::load_master_secret_proposal(&storage, &runtime_id, generation + 1); + assert!(loaded.is_none()); + + // Decryption returns None (invalid runtime ID). + let invalid_runtime_id = Namespace([3; NAMESPACE_SIZE]); + let loaded = Kdf::load_master_secret_proposal(&storage, &invalid_runtime_id, generation); + assert!(loaded.is_none()); + // Overwrite the proposal and check if the last secret is kept. - Kdf::store_master_secret_proposal(&storage, &new_secret); - let loaded = - Kdf::load_master_secret_proposal(&storage).expect("master secret should be loaded"); + Kdf::store_master_secret_proposal(&storage, &runtime_id, &new_secret, generation); + let loaded = Kdf::load_master_secret_proposal(&storage, &runtime_id, generation) + .expect("master secret should be loaded"); assert_eq!(new_secret.0, loaded.0); } } diff --git a/keymanager/src/lib.rs b/keymanager/src/lib.rs index 524d4a0276d..67b40f33744 100644 --- a/keymanager/src/lib.rs +++ b/keymanager/src/lib.rs @@ -3,3 +3,4 @@ pub mod client; pub mod crypto; pub mod policy; pub mod runtime; +pub mod secrets; diff --git a/keymanager/src/policy/cached.rs b/keymanager/src/policy/cached.rs index 1f05054653c..7a9c7072cd9 100644 --- a/keymanager/src/policy/cached.rs +++ b/keymanager/src/policy/cached.rs @@ -31,7 +31,7 @@ lazy_static! { } const POLICY_STORAGE_KEY: &[u8] = b"keymanager_policy"; -const POLICY_SEAL_CONTEXT: &[u8] = b"Ekiden Keymanager Seal policy v0"; +const POLICY_SEAL_CONTEXT: &[u8] = b"oasis-core/keymanager: policy seal"; /// Policy, which manages the key manager policy. pub struct Policy { diff --git a/keymanager/src/runtime/methods.rs b/keymanager/src/runtime/methods.rs index e1ab8470973..844c0988fcb 100644 --- a/keymanager/src/runtime/methods.rs +++ b/keymanager/src/runtime/methods.rs @@ -46,14 +46,15 @@ use crate::{ ReplicateEphemeralSecretRequest, ReplicateEphemeralSecretResponse, ReplicateMasterSecretRequest, ReplicateMasterSecretResponse, SignedInitResponse, }, - client::{KeyManagerClient, RemoteClient}, + client::RemoteClient, crypto::{ kdf::{Kdf, State}, pack_runtime_id_epoch, pack_runtime_id_generation_epoch, unpack_encrypted_secret_nonce, - KeyPair, Secret, SignedPublicKey, VerifiableSecret, SECRET_SIZE, + KeyPair, Secret, SignedPublicKey, SECRET_SIZE, }, policy::Policy, runtime::context::Context as KmContext, + secrets::{KeyManagerSecretProvider, SecretProvider}, }; /// Maximum age of an ephemeral key in the number of epochs. @@ -85,19 +86,10 @@ pub fn init_kdf(ctx: &mut RpcContext, req: &InitRequest) -> Result { let nodes = nodes_with_ephemeral_secret(ctx, &signed_secret)?; let client = key_manager_client_for_replication(ctx); - fetch_ephemeral_secret(signed_secret.epoch, &nodes, &client)? + + KeyManagerSecretProvider::new(client, nodes) + .ephemeral_secret_iter(signed_secret.epoch) + .find(|secret| { + let checksum = Kdf::checksum_ephemeral_secret( + &signed_secret.runtime_id, + secret, + signed_secret.epoch, + ); + checksum == signed_secret.secret.checksum + }) + .ok_or(KeyManagerError::EphemeralSecretNotReplicated( + signed_secret.epoch, + ))? } }; @@ -415,40 +420,6 @@ fn decrypt_ephemeral_secret( Ok(Some(secret)) } -/// Fetch master secret from another key manager enclave. -fn fetch_master_secret( - generation: u64, - nodes: &Vec, - client: &RemoteClient, -) -> Result { - for node in nodes.iter() { - client.set_nodes(vec![*node]); - let result = block_on(client.replicate_master_secret(generation)); - if let Ok(secret) = result { - return Ok(secret); - } - } - - Err(KeyManagerError::MasterSecretNotReplicated(generation).into()) -} - -/// Fetch ephemeral secret from another key manager enclave. -fn fetch_ephemeral_secret( - epoch: EpochTime, - nodes: &Vec, - client: &RemoteClient, -) -> Result { - for node in nodes.iter() { - client.set_nodes(vec![*node]); - let result = block_on(client.replicate_ephemeral_secret(epoch)); - if let Ok(secret) = result { - return Ok(secret); - } - } - - Err(KeyManagerError::EphemeralSecretNotReplicated(epoch).into()) -} - /// Key manager client for master and ephemeral secret replication. fn key_manager_client_for_replication(ctx: &mut RpcContext) -> RemoteClient { let rctx = runtime_context!(ctx, KmContext); diff --git a/keymanager/src/secrets/interface.rs b/keymanager/src/secrets/interface.rs new file mode 100644 index 00000000000..5c76ede7ade --- /dev/null +++ b/keymanager/src/secrets/interface.rs @@ -0,0 +1,17 @@ +use oasis_core_runtime::consensus::beacon::EpochTime; + +use crate::crypto::{Secret, VerifiableSecret}; + +/// Interface for providing master and ephemeral secrets. +pub trait SecretProvider { + /// Returns an iterator that provides access to all the replicas of the master secret + /// for the given generation. + fn master_secret_iter( + &self, + generation: u64, + ) -> Box + '_>; + + /// Returns an iterator that provides access to all the replicas of the ephemeral secret + /// for the given epoch. + fn ephemeral_secret_iter(&self, epoch: EpochTime) -> Box + '_>; +} diff --git a/keymanager/src/secrets/mock.rs b/keymanager/src/secrets/mock.rs new file mode 100644 index 00000000000..71e6023d551 --- /dev/null +++ b/keymanager/src/secrets/mock.rs @@ -0,0 +1,78 @@ +use oasis_core_runtime::{common::namespace::Namespace, consensus::beacon::EpochTime}; + +use crate::crypto::{kdf::Kdf, Secret, VerifiableSecret, SECRET_SIZE}; + +use super::SecretProvider; + +/// Mock secret provider generates fixed master and ephemeral secrets instead of retrieving them +/// from remote key manager enclaves. Intended for testing purposes only. +pub struct MockSecretProvider { + runtime_id: Namespace, + disabled: bool, +} + +impl MockSecretProvider { + /// Create a new mock secret provider. + /// + /// The disabled provider does not return any secrets. + pub fn new(runtime_id: Namespace, disabled: bool) -> Self { + Self { + runtime_id, + disabled, + } + } + + /// Get master secret for the given generation. + pub fn master_secret(&self, generation: u64) -> Secret { + Secret([generation as u8; SECRET_SIZE]) + } + + /// Get ephemeral secret for the given epoch. + pub fn ephemeral_secret(&self, epoch: EpochTime) -> Secret { + Secret([epoch as u8; SECRET_SIZE]) + } + + /// Compute the checksum of the master secret that corresponds to the given generation. + pub fn checksum_master_secret(&self, generation: u64) -> Vec { + let mut checksum = self.runtime_id.0.to_vec(); + + for generation in 0..=generation { + let secret = self.master_secret(generation); + checksum = Kdf::checksum_master_secret(&secret, &checksum); + } + + checksum + } +} + +impl SecretProvider for MockSecretProvider { + fn master_secret_iter( + &self, + generation: u64, + ) -> Box + '_> { + let secret = self.master_secret(generation); + let checksum = if generation == 0 { + self.runtime_id.0.to_vec() + } else { + self.checksum_master_secret(generation - 1) + }; + let mut result = Some(VerifiableSecret { secret, checksum }); + + if self.disabled { + result = None; + } + + Box::new(std::iter::from_fn(move || result.take())) + } + + fn ephemeral_secret_iter(&self, epoch: EpochTime) -> Box + '_> { + let secret = self.ephemeral_secret(epoch); + let mut result = Some(secret); + + if self.disabled { + result = None; + } + + Box::new(std::iter::from_fn(move || result.take())) + } +} diff --git a/keymanager/src/secrets/mod.rs b/keymanager/src/secrets/mod.rs new file mode 100644 index 00000000000..d5ea59c6c75 --- /dev/null +++ b/keymanager/src/secrets/mod.rs @@ -0,0 +1,9 @@ +//! Key manager secret provider. +mod interface; +mod mock; +mod provider; + +// Re-exports. +pub use self::{ + interface::SecretProvider, mock::MockSecretProvider, provider::KeyManagerSecretProvider, +}; diff --git a/keymanager/src/secrets/provider.rs b/keymanager/src/secrets/provider.rs new file mode 100644 index 00000000000..3ba228d45f1 --- /dev/null +++ b/keymanager/src/secrets/provider.rs @@ -0,0 +1,90 @@ +use std::sync::Mutex; + +use oasis_core_runtime::{common::crypto::signature, future::block_on}; + +use crate::client::{KeyManagerClient, RemoteClient}; + +use super::SecretProvider; + +struct Inner { + client: RemoteClient, + nodes: Vec, + last_node: usize, +} + +/// Key manager secret provider facilitates access to master and ephemeral secrets retrieved +/// from remote key manager enclaves. +pub struct KeyManagerSecretProvider { + inner: Mutex, +} + +impl KeyManagerSecretProvider { + /// Create a new key manager secret provider. + pub fn new(client: RemoteClient, nodes: Vec) -> Self { + Self { + inner: Mutex::new(Inner { + client, + nodes, + last_node: 0, + }), + } + } +} + +impl SecretProvider for KeyManagerSecretProvider { + fn master_secret_iter( + &self, + generation: u64, + ) -> Box + '_> { + // Start fetching secrets from the last connected node. + let start = { self.inner.lock().unwrap().last_node }; + let mut counter = 0; + + // Iterate over all nodes, ignoring errors. + Box::new(std::iter::from_fn(move || { + let mut inner = self.inner.lock().unwrap(); + let total = inner.nodes.len(); + + while counter < total { + let idx = (start + counter) % total; + inner.last_node = idx; + counter += 1; + + inner.client.set_nodes(vec![inner.nodes[idx]]); + if let Ok(secret) = block_on(inner.client.replicate_master_secret(generation)) { + return Some(secret); + } + } + + None + })) + } + + fn ephemeral_secret_iter( + &self, + epoch: oasis_core_runtime::consensus::beacon::EpochTime, + ) -> Box + '_> { + // Start fetching secrets from the last connected node. + let start = { self.inner.lock().unwrap().last_node }; + let mut counter = 0; + + // Iterate over all nodes, ignoring errors. + Box::new(std::iter::from_fn(move || { + let mut inner = self.inner.lock().unwrap(); + let total = inner.nodes.len(); + + while counter < total { + let idx = (start + counter) % total; + inner.last_node = idx; + counter += 1; + + inner.client.set_nodes(vec![inner.nodes[idx]]); + if let Ok(secret) = block_on(inner.client.replicate_ephemeral_secret(epoch)) { + return Some(secret); + } + } + + None + })) + } +} diff --git a/runtime/src/common/sgx/seal.rs b/runtime/src/common/sgx/seal.rs index d01b718080d..d320cdac02c 100644 --- a/runtime/src/common/sgx/seal.rs +++ b/runtime/src/common/sgx/seal.rs @@ -17,7 +17,7 @@ pub fn seal(key_policy: Keypolicy, context: &[u8], data: &[u8]) -> Vec { // Encrypt the raw policy. let mut nonce = [0u8; NONCE_SIZE]; rng.fill(&mut nonce); - let d2 = new_d2(key_policy, context); + let d2 = new_deoxysii(key_policy, context); let mut ciphertext = d2.seal(&nonce, data, vec![]); ciphertext.extend_from_slice(&nonce); @@ -48,7 +48,7 @@ pub fn unseal(key_policy: Keypolicy, context: &[u8], ciphertext: &[u8]) -> Optio nonce.copy_from_slice(&ciphertext[ct_len..]); let ciphertext = &ciphertext[..ct_len]; - let d2 = new_d2(key_policy, context); + let d2 = new_deoxysii(key_policy, context); let plaintext = d2 .open(&nonce, ciphertext.to_vec(), vec![]) .expect("ciphertext is corrupted"); @@ -56,7 +56,11 @@ pub fn unseal(key_policy: Keypolicy, context: &[u8], ciphertext: &[u8]) -> Optio Some(plaintext) } -fn new_d2(key_policy: Keypolicy, context: &[u8]) -> DeoxysII { +/// Creates a new Deoxys-II instance initialized with an SGX sealing key derived +/// from the results of the `EGETKEY`instruction. +/// +/// The `context` field is a domain separation tag. +pub fn new_deoxysii(key_policy: Keypolicy, context: &[u8]) -> DeoxysII { let mut seal_key = egetkey(key_policy, context); let d2 = DeoxysII::new(&seal_key); seal_key.zeroize(); From c069bb3a1484875a7446ec17fa106c092b91689a Mon Sep 17 00:00:00 2001 From: Peter Nose Date: Sun, 30 Apr 2023 14:54:28 +0200 Subject: [PATCH 7/7] go/worker/keymanager: Add and refine key manager worker metrics --- .changelog/5196.feature.md | 47 +++++++++++++ docs/oasis-node/metrics.md | 16 ++++- go/worker/keymanager/init.go | 2 + go/worker/keymanager/metrics.go | 114 +++++++++++++++++++++++++++++++- go/worker/keymanager/worker.go | 51 +++++++++++--- 5 files changed, 215 insertions(+), 15 deletions(-) diff --git a/.changelog/5196.feature.md b/.changelog/5196.feature.md index bd1756c5e8f..0a580ac2359 100644 --- a/.changelog/5196.feature.md +++ b/.changelog/5196.feature.md @@ -9,3 +9,50 @@ one of the key manager enclaves must publish a proposal for the next generation of the master secret, which must then be replicated by the majority of enclaves. If the replication process is not completed by the end of the epoch, the proposal can be replaced with a new one. + +The following metrics have been added: + +- `oasis_worker_keymanager_consensus_ephemeral_secret_epoch_number` + is the epoch number of the latest ephemeral secret. + +- `oasis_worker_keymanager_consensus_master_secret_generation_number` + is the generation number of the latest master secret. + +- `oasis_worker_keymanager_consensus_master_secret_rotation_epoch_number` + is the epoch number of the latest master secret rotation. + +- `oasis_worker_keymanager_consensus_master_secret_proposal_generation_number` + is the generation number of the latest master secret proposal. + +- `oasis_worker_keymanager_consensus_master_secret_proposal_epoch_number` + is the epoch number of the latest master secret proposal. + +- `oasis_worker_keymanager_enclave_ephemeral_secret_epoch_number` + is the epoch number of the latest ephemeral secret loaded into the enclave. + +- `oasis_worker_keymanager_enclave_master_secret_generation_number` + is the generation number of the latest master secret as seen by the enclave. + +- `oasis_worker_keymanager_enclave_master_secret_proposal_generation_number` + is the generation number of the latest master secret proposal loaded + into the enclave. + +- `oasis_worker_keymanager_enclave_master_secret_proposal_epoch_number` + is the epoch number of the latest master secret proposal loaded + into the enclave. + +- `oasis_worker_keymanager_enclave_generated_master_secret_generation_number` + is the generation number of the latest master secret generated + by the enclave. + +- `oasis_worker_keymanager_enclave_generated_master_secret_epoch_number` + is the epoch number of the latest master secret generated by the enclave. + +- `oasis_worker_keymanager_enclave_generated_ephemeral_secret_epoch_number` + is the epoch number of the latest ephemeral secret generated by the enclave. + +The following metrics have had runtime labels added: + +- `oasis_worker_keymanager_compute_runtime_count`, + +- `oasis_worker_keymanager_policy_update_count`. diff --git a/docs/oasis-node/metrics.md b/docs/oasis-node/metrics.md index cfacbbf35fe..8ee67c69b75 100644 --- a/docs/oasis-node/metrics.md +++ b/docs/oasis-node/metrics.md @@ -101,9 +101,21 @@ oasis_worker_executor_liveness_live_ratio | Gauge | Ratio between live and total oasis_worker_executor_liveness_live_rounds | Gauge | Number of live rounds in last epoch. | runtime | [worker/common/committee](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/common/committee/node.go) oasis_worker_executor_liveness_total_rounds | Gauge | Number of total rounds in last epoch. | runtime | [worker/common/committee](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/common/committee/node.go) oasis_worker_failed_round_count | Counter | Number of failed roothash rounds. | runtime | [worker/common/committee](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/common/committee/node.go) -oasis_worker_keymanager_compute_runtime_count | Counter | Number of compute runtimes using the key manager. | | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_compute_runtime_count | Counter | Number of compute runtimes using the key manager. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_consensus_ephemeral_secret_epoch_number | Gauge | Epoch number of the latest ephemeral secret. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_consensus_master_secret_generation_number | Gauge | Generation number of the latest master secret. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_consensus_master_secret_proposal_epoch_number | Gauge | Epoch number of the latest master secret proposal. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_consensus_master_secret_proposal_generation_number | Gauge | Generation number of the latest master secret proposal. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_consensus_master_secret_rotation_epoch_number | Gauge | Epoch number of the latest master secret rotation. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_enclave_ephemeral_secret_epoch_number | Gauge | Epoch number of the latest ephemeral secret loaded into the enclave. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_enclave_generated_ephemeral_secret_epoch_number | Gauge | Epoch number of the latest ephemeral secret generated by the enclave. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_enclave_generated_master_secret_epoch_number | Gauge | Epoch number of the latest master secret generated by the enclave. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_enclave_generated_master_secret_generation_number | Gauge | Generation number of the latest master secret generated by the enclave. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_enclave_master_secret_generation_number | Gauge | Generation number of the latest master secret as seen by the enclave. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_enclave_master_secret_proposal_epoch_number | Gauge | Epoch number of the latest master secret proposal loaded into the enclave. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_enclave_master_secret_proposal_generation_number | Gauge | Generation number of the latest master secret proposal loaded into the enclave. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) oasis_worker_keymanager_enclave_rpc_count | Counter | Number of remote Enclave RPC requests via P2P. | method | [worker/keymanager/p2p](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/p2p/metrics.go) -oasis_worker_keymanager_policy_update_count | Counter | Number of key manager policy updates. | | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_policy_update_count | Counter | Number of key manager policy updates. | runtime | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) oasis_worker_node_registered | Gauge | Is oasis node registered (binary). | | [worker/registration](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/registration/worker.go) oasis_worker_node_registration_eligible | Gauge | Is oasis node eligible for registration (binary). | | [worker/registration](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/registration/worker.go) oasis_worker_node_status_frozen | Gauge | Is oasis node frozen (binary). | | [worker/registration](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/registration/worker.go) diff --git a/go/worker/keymanager/init.go b/go/worker/keymanager/init.go index 2464082a7eb..19d56b90382 100644 --- a/go/worker/keymanager/init.go +++ b/go/worker/keymanager/init.go @@ -85,9 +85,11 @@ func New( w.privatePeers[peerID] = struct{}{} } + // Parse runtime ID. if err := w.runtimeID.UnmarshalHex(config.GlobalConfig.Keymanager.RuntimeID); err != nil { return nil, fmt.Errorf("worker/keymanager: failed to parse runtime ID: %w", err) } + w.runtimeLabel = w.runtimeID.String() var err error w.roleProvider, err = r.NewRuntimeRoleProvider(node.RoleKeyManager, w.runtimeID) diff --git a/go/worker/keymanager/metrics.go b/go/worker/keymanager/metrics.go index c2ee196a79e..23216c56cee 100644 --- a/go/worker/keymanager/metrics.go +++ b/go/worker/keymanager/metrics.go @@ -7,23 +7,133 @@ import ( ) var ( - computeRuntimeCount = prometheus.NewCounter( + computeRuntimeCount = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "oasis_worker_keymanager_compute_runtime_count", Help: "Number of compute runtimes using the key manager.", }, + []string{"runtime"}, ) - policyUpdateCount = prometheus.NewCounter( + policyUpdateCount = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "oasis_worker_keymanager_policy_update_count", Help: "Number of key manager policy updates.", }, + []string{"runtime"}, + ) + + consensusEphemeralSecretEpochNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_consensus_ephemeral_secret_epoch_number", + Help: "Epoch number of the latest ephemeral secret.", + }, + []string{"runtime"}, + ) + + consensusMasterSecretGenerationNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_consensus_master_secret_generation_number", + Help: "Generation number of the latest master secret.", + }, + []string{"runtime"}, + ) + + consensusMasterSecretRotationEpochNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_consensus_master_secret_rotation_epoch_number", + Help: "Epoch number of the latest master secret rotation.", + }, + []string{"runtime"}, + ) + + consensusMasterSecretProposalEpochNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_consensus_master_secret_proposal_epoch_number", + Help: "Epoch number of the latest master secret proposal.", + }, + []string{"runtime"}, + ) + + consensusMasterSecretProposalGenerationNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_consensus_master_secret_proposal_generation_number", + Help: "Generation number of the latest master secret proposal.", + }, + []string{"runtime"}, + ) + + enclaveEphemeralSecretEpochNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_enclave_ephemeral_secret_epoch_number", + Help: "Epoch number of the latest ephemeral secret loaded into the enclave.", + }, + []string{"runtime"}, + ) + + enclaveMasterSecretGenerationNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_enclave_master_secret_generation_number", + Help: "Generation number of the latest master secret as seen by the enclave.", + }, + []string{"runtime"}, + ) + + enclaveMasterSecretProposalEpochNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_enclave_master_secret_proposal_epoch_number", + Help: "Epoch number of the latest master secret proposal loaded into the enclave.", + }, + []string{"runtime"}, + ) + + enclaveMasterSecretProposalGenerationNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_enclave_master_secret_proposal_generation_number", + Help: "Generation number of the latest master secret proposal loaded into the enclave.", + }, + []string{"runtime"}, + ) + + enclaveGeneratedMasterSecretEpochNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_enclave_generated_master_secret_epoch_number", + Help: "Epoch number of the latest master secret generated by the enclave.", + }, + []string{"runtime"}, + ) + + enclaveGeneratedMasterSecretGenerationNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_enclave_generated_master_secret_generation_number", + Help: "Generation number of the latest master secret generated by the enclave.", + }, + []string{"runtime"}, + ) + + enclaveGeneratedEphemeralSecretEpochNumber = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_keymanager_enclave_generated_ephemeral_secret_epoch_number", + Help: "Epoch number of the latest ephemeral secret generated by the enclave.", + }, + []string{"runtime"}, ) keymanagerWorkerCollectors = []prometheus.Collector{ computeRuntimeCount, policyUpdateCount, + consensusEphemeralSecretEpochNumber, + consensusMasterSecretGenerationNumber, + consensusMasterSecretRotationEpochNumber, + consensusMasterSecretProposalEpochNumber, + consensusMasterSecretProposalGenerationNumber, + enclaveEphemeralSecretEpochNumber, + enclaveMasterSecretGenerationNumber, + enclaveMasterSecretProposalEpochNumber, + enclaveMasterSecretProposalGenerationNumber, + enclaveGeneratedMasterSecretEpochNumber, + enclaveGeneratedMasterSecretGenerationNumber, + enclaveGeneratedEphemeralSecretEpochNumber, } metricsOnce sync.Once diff --git a/go/worker/keymanager/worker.go b/go/worker/keymanager/worker.go index 2b2be92ea7a..f7a0a1c55a8 100644 --- a/go/worker/keymanager/worker.go +++ b/go/worker/keymanager/worker.go @@ -77,8 +77,9 @@ type Worker struct { // nolint: maligned quitCh chan struct{} initCh chan struct{} - runtime runtimeRegistry.Runtime - runtimeID common.Namespace + runtime runtimeRegistry.Runtime + runtimeID common.Namespace + runtimeLabel string clientRuntimes map[common.Namespace]*clientRuntimeWatcher @@ -383,14 +384,16 @@ func (w *Worker) initEnclave(kmStatus *api.Status, rtStatus *runtimeStatus) (*ap "next_rsk", signedInitResp.InitResponse.NextRSK, ) - // Cache the key manager enclave status and the currently active policy. w.Lock() defer w.Unlock() + // Update metrics. + enclaveMasterSecretGenerationNumber.WithLabelValues(w.runtimeLabel).Set(float64(kmStatus.Generation)) if w.enclaveStatus == nil || !bytes.Equal(w.enclaveStatus.InitResponse.PolicyChecksum, signedInitResp.InitResponse.PolicyChecksum) { - policyUpdateCount.Inc() + policyUpdateCount.WithLabelValues(w.runtimeLabel).Inc() } + // Cache the key manager enclave status and the currently active policy. w.enclaveStatus = &signedInitResp w.policy = kmStatus.Policy @@ -554,7 +557,8 @@ func (w *Worker) startClientRuntimeWatcher(rt *registry.Runtime, kmStatus *api.S w.addClientRuntimeWatcher(rt.ID, crw) - computeRuntimeCount.Inc() + // Update metrics. + computeRuntimeCount.WithLabelValues(w.runtimeLabel).Inc() return nil } @@ -700,6 +704,11 @@ func (w *Worker) generateMasterSecret(runtimeID common.Namespace, generation uin return err } + // Update metrics. + enclaveGeneratedMasterSecretGenerationNumber.WithLabelValues(w.runtimeLabel).Set(float64(rsp.SignedSecret.Secret.Generation)) + enclaveGeneratedMasterSecretEpochNumber.WithLabelValues(w.runtimeLabel).Set(float64(rsp.SignedSecret.Secret.Epoch)) + w.setLastGeneratedMasterSecretGeneration(rsp.SignedSecret.Secret.Generation) + return err } @@ -771,6 +780,10 @@ func (w *Worker) generateEphemeralSecret(runtimeID common.Namespace, epoch beaco return err } + // Update metrics. + enclaveGeneratedEphemeralSecretEpochNumber.WithLabelValues(w.runtimeLabel).Set(float64(rsp.SignedSecret.Secret.Epoch)) + w.setLastGeneratedEphemeralSecretEpoch(rsp.SignedSecret.Secret.Epoch) + return err } @@ -854,6 +867,11 @@ func (w *Worker) loadMasterSecret(sigSecret *api.SignedEncryptedMasterSecret) er return fmt.Errorf("failed to load master secret: %w", err) } + // Update metrics. + enclaveMasterSecretProposalGenerationNumber.WithLabelValues(w.runtimeLabel).Set(float64(w.mstSecret.Secret.Generation)) + enclaveMasterSecretProposalEpochNumber.WithLabelValues(w.runtimeLabel).Set(float64(w.mstSecret.Secret.Epoch)) + w.setLastLoadedMasterSecretGeneration(w.mstSecret.Secret.Generation) + return nil } @@ -874,6 +892,10 @@ func (w *Worker) loadEphemeralSecret(sigSecret *api.SignedEncryptedEphemeralSecr return fmt.Errorf("failed to load ephemeral secret: %w", err) } + // Update metrics. + enclaveEphemeralSecretEpochNumber.WithLabelValues(w.runtimeLabel).Set(float64(w.ephSecret.Secret.Epoch)) + w.setLastLoadedEphemeralSecretEpoch(w.ephSecret.Secret.Epoch) + return nil } @@ -946,6 +968,10 @@ func (w *Worker) handleStatusUpdate(kmStatus *api.Status) { "checksum", hex.EncodeToString(kmStatus.Checksum), ) + // Update metrics. + consensusMasterSecretGenerationNumber.WithLabelValues(w.runtimeLabel).Set(float64(kmStatus.Generation)) + consensusMasterSecretRotationEpochNumber.WithLabelValues(w.runtimeLabel).Set(float64(kmStatus.RotationEpoch)) + // Cache the latest status. w.setStatus(kmStatus) w.kmStatus = kmStatus @@ -1140,6 +1166,11 @@ func (w *Worker) handleNewMasterSecret(secret *api.SignedEncryptedMasterSecret) "checksum", hex.EncodeToString(secret.Secret.Secret.Checksum), ) + // Update metrics. + consensusMasterSecretProposalGenerationNumber.WithLabelValues(w.runtimeLabel).Set(float64(secret.Secret.Generation)) + consensusMasterSecretProposalEpochNumber.WithLabelValues(w.runtimeLabel).Set(float64(secret.Secret.Epoch)) + + // Rearm master secret loading. w.mstSecret = secret w.loadMstSecRetry = 0 @@ -1179,8 +1210,6 @@ func (w *Worker) handleGenerateMasterSecret(height int64, epoch beacon.EpochTime w.genMstSecDoneCh <- false return } - - w.setLastGeneratedMasterSecretGeneration(nextGen) w.genMstSecDoneCh <- true } @@ -1218,7 +1247,6 @@ func (w *Worker) handleLoadMasterSecret() { // Disarm master secret loading. w.loadMstSecRetry = math.MaxInt64 - w.setLastLoadedMasterSecretGeneration(w.mstSecret.Secret.Generation) // Announce that the enclave has replicated the proposal for the next master // secret and is ready for rotation. @@ -1234,6 +1262,10 @@ func (w *Worker) handleNewEphemeralSecret(secret *api.SignedEncryptedEphemeralSe "epoch", secret.Secret.Epoch, ) + // Update metrics. + consensusEphemeralSecretEpochNumber.WithLabelValues(w.runtimeLabel).Set(float64(secret.Secret.Epoch)) + + // Rearm ephemeral secret loading. w.ephSecret = secret w.loadEphSecRetry = 0 @@ -1276,8 +1308,6 @@ func (w *Worker) handleGenerateEphemeralSecret(height int64, epoch beacon.EpochT w.genEphSecDoneCh <- false return } - - w.setLastGeneratedEphemeralSecretEpoch(nextEpoch) w.genEphSecDoneCh <- true } @@ -1314,7 +1344,6 @@ func (w *Worker) handleLoadEphemeralSecret() { // Disarm ephemeral secret loading. w.loadEphSecRetry = math.MaxInt64 - w.setLastLoadedEphemeralSecretEpoch(w.ephSecret.Secret.Epoch) } func (w *Worker) handleStop() {