Skip to content
This repository has been archived by the owner on May 12, 2021. It is now read-only.

Commit

Permalink
runtime: sleep 1 second after GetOOMEvent failed
Browse files Browse the repository at this point in the history
In some cases, for example agent crashed and not marked dead yet, the GetOOMEvent
will return errors like `connection reset by peer` or `ttrpc: closed`. Do a sleep
with 1 second (agent check interval) and let agent health check to do the check.

Fixes: #3064

Signed-off-by: bin liu <[email protected]>
  • Loading branch information
liubin committed Nov 12, 2020
1 parent 36d541c commit 559ba41
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
4 changes: 3 additions & 1 deletion containerd-shim-v2/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types/task"
"github.com/containerd/containerd/mount"
vc "github.com/kata-containers/runtime/virtcontainers"
"github.com/kata-containers/runtime/virtcontainers/pkg/oci"
"github.com/sirupsen/logrus"
"google.golang.org/grpc/codes"
Expand Down Expand Up @@ -147,9 +148,10 @@ func watchOOMEvents(ctx context.Context, s *service) {
logrus.WithField("sandbox", s.sandbox.ID()).WithError(err).Warn("failed to get OOM event from sandbox")
// If the GetOOMEvent call is not implemented, then the agent is most likely an older version,
// stop attempting to get OOM events.
if isGRPCErrorCode(codes.Unimplemented, err) {
if isGRPCErrorCode(codes.NotFound, err) || err.Error() == "Dead agent" {
return
}
time.Sleep(vc.DefaultMonitorCheckInterval)
continue
}

Expand Down
6 changes: 3 additions & 3 deletions virtcontainers/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ import (
)

const (
defaultCheckInterval = 1 * time.Second
watcherChannelSize = 128
DefaultMonitorCheckInterval = 1 * time.Second
watcherChannelSize = 128
)

type monitor struct {
Expand All @@ -31,7 +31,7 @@ type monitor struct {
func newMonitor(s *Sandbox) *monitor {
return &monitor{
sandbox: s,
checkInterval: defaultCheckInterval,
checkInterval: DefaultMonitorCheckInterval,
stopCh: make(chan bool, 1),
}
}
Expand Down

0 comments on commit 559ba41

Please sign in to comment.