Skip to content

Commit

Permalink
chore(telemetry): faster fail in case of dial error (#2694)
Browse files Browse the repository at this point in the history
- Time bound dialing (otherwise blocks for 30s)
- Remove timer after failure
- Lower max retries from 5 to 3
  • Loading branch information
qdm12 committed Jul 26, 2022
1 parent c74a5b0 commit b9449d1
Showing 1 changed file with 9 additions and 15 deletions.
24 changes: 9 additions & 15 deletions dot/telemetry/mailer.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,28 +54,22 @@ func BootstrapMailer(ctx context.Context, conns []*genesis.TelemetryEndpoint, en
}

for _, v := range conns {
const maxRetries = 5
const maxRetries = 3

for connAttempts := 0; connAttempts < maxRetries; connAttempts++ {
conn, response, err := websocket.DefaultDialer.Dial(v.Endpoint, nil)
const dialTimeout = 3 * time.Second
dialCtx, dialCancel := context.WithTimeout(ctx, dialTimeout)
conn, response, err := websocket.DefaultDialer.DialContext(dialCtx, v.Endpoint, nil)
dialCancel()
if err != nil {
mailer.logger.Debugf("cannot dial telemetry endpoint %s (try %d of %d): %s",
v.Endpoint, connAttempts+1, maxRetries, err)

const retryDelay = time.Second * 15
timer := time.NewTimer(retryDelay)

select {
case <-timer.C:
continue
case <-ctx.Done():
mailer.logger.Debugf("bootstrap telemetry issue: %w", ctx.Err())
if !timer.Stop() {
<-timer.C
}

return nil, ctx.Err()
if ctxErr := ctx.Err(); ctxErr != nil {
return nil, ctxErr
}

continue
}

err = response.Body.Close()
Expand Down

0 comments on commit b9449d1

Please sign in to comment.