Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow overriding gRPC's connection timeout with VAULT_GRPC_MIN_CONNECT_TIMEOUT #19676

Merged
merged 3 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog/19676.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
```release-note:improvement
core: Allow overriding gRPC connect timeout via VAULT_GRPC_MIN_CONNECT_TIMEOUT. This is an env var rather than a config setting because we don't expect this to ever be needed. It's being added as a last-ditch
option in case all else fails for some replication issues we may not have fully reproduced.
```
3 changes: 2 additions & 1 deletion vault/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,8 @@ func (c *Core) startClusterListener(ctx context.Context) error {
c.clusterListener.Store(cluster.NewListener(networkLayer,
c.clusterCipherSuites,
listenerLogger,
5*c.clusterHeartbeatInterval))
5*c.clusterHeartbeatInterval,
c.grpcMinConnectTimeout))

c.AddLogger(listenerLogger)

Expand Down
17 changes: 15 additions & 2 deletions vault/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,10 @@ type Listener struct {
logger log.Logger
l sync.RWMutex
tlsConnectionLoggingLevel log.Level
grpcMinConnectTimeout time.Duration
}

func NewListener(networkLayer NetworkLayer, cipherSuites []uint16, logger log.Logger, idleTimeout time.Duration) *Listener {
func NewListener(networkLayer NetworkLayer, cipherSuites []uint16, logger log.Logger, idleTimeout, grpcMinConnectTimeout time.Duration) *Listener {
var maxStreams uint32 = math.MaxUint32
if override := os.Getenv("VAULT_GRPC_MAX_STREAMS"); override != "" {
i, err := strconv.ParseUint(override, 10, 32)
Expand Down Expand Up @@ -114,6 +115,7 @@ func NewListener(networkLayer NetworkLayer, cipherSuites []uint16, logger log.Lo
cipherSuites: cipherSuites,
logger: logger,
tlsConnectionLoggingLevel: log.LevelFromString(os.Getenv("VAULT_CLUSTER_TLS_SESSION_LOG_LEVEL")),
grpcMinConnectTimeout: grpcMinConnectTimeout,
}
}

Expand Down Expand Up @@ -464,10 +466,21 @@ func (cl *Listener) GetDialerFunc(ctx context.Context, alpn string) func(string,
}

tlsConfig.NextProtos = []string{alpn}
cl.logger.Debug("creating rpc dialer", "address", addr, "alpn", alpn, "host", tlsConfig.ServerName)
args := []interface{}{
"address", addr,
"alpn", alpn,
"host", tlsConfig.ServerName,
"timeout", fmt.Sprintf("%s", timeout),
}
if cl.grpcMinConnectTimeout != 0 {
args = append(args, "timeout_env_override", fmt.Sprintf("%s", cl.grpcMinConnectTimeout))
}
cl.logger.Debug("creating rpc dialer", args...)

start := time.Now()
conn, err := cl.networkLayer.Dial(addr, timeout, tlsConfig)
if err != nil {
cl.logger.Debug("dial failure", "address", addr, "alpn", alpn, "host", tlsConfig.ServerName, "duration", fmt.Sprintf("%s", time.Since(start)), "error", err)
return nil, err
}
cl.logTLSSessionStart(conn.RemoteAddr().String(), conn.ConnectionState())
Expand Down
2 changes: 1 addition & 1 deletion vault/cluster/inmem_layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func (l *InmemLayer) Dial(addr string, timeout time.Duration, tlsConfig *tls.Con
if l.forceTimeout == addr {
l.logger.Debug("forcing timeout", "addr", addr, "me", l.addr)

// gRPC sets a deadline of 20 seconds on the dail attempt, so
// gRPC sets a deadline of 20 seconds on the dial attempt, so
// matching that here.
time.Sleep(time.Second * 20)
l.l.Unlock()
Expand Down
13 changes: 13 additions & 0 deletions vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,9 @@ type Core struct {
// if populated, the callback is called for every request
// for testing purposes
requestResponseCallback func(logical.Backend, *logical.Request, *logical.Response)

// if populated, override the default gRPC min connect timeout (currently 20s in grpc 1.51)
grpcMinConnectTimeout time.Duration
}

// c.stateLock needs to be held in read mode before calling this function.
Expand Down Expand Up @@ -1286,6 +1289,16 @@ func NewCore(conf *CoreConfig) (*Core, error) {
c.events.Start()
}

minConnectTimeoutRaw := os.Getenv("VAULT_GRPC_MIN_CONNECT_TIMEOUT")
if minConnectTimeoutRaw != "" {
dur, err := time.ParseDuration(minConnectTimeoutRaw)
if err != nil {
c.logger.Warn("VAULT_GRPC_MIN_CONNECT_TIMEOUT contains non-duration value, ignoring")
} else if dur != 0 {
c.grpcMinConnectTimeout = dur
}
}

return c, nil
}

Expand Down