From c01e297b788cfe2ea3e82f60180e3e851bbef6ae Mon Sep 17 00:00:00 2001 From: fanmin shi Date: Fri, 5 May 2017 16:34:22 -0700 Subject: [PATCH] backend: add prometheus metric for large snapshot duration. FIXES #7878 --- mvcc/backend/backend.go | 2 ++ mvcc/backend/metrics.go | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/mvcc/backend/backend.go b/mvcc/backend/backend.go index a94cd65a70b3..89881e9b78bd 100644 --- a/mvcc/backend/backend.go +++ b/mvcc/backend/backend.go @@ -176,11 +176,13 @@ func (b *backend) Snapshot() Snapshot { case <-ticker.C: plog.Warningf("snapshotting is taking more than %v seconds to finish [started at %v]", time.Since(start).Seconds(), start) case <-stopc: + snapShotDurations.Observe(time.Since(start).Seconds()) return } } }() + time.Sleep(10 * time.Second) b.batchTx.Commit() b.mu.RLock() diff --git a/mvcc/backend/metrics.go b/mvcc/backend/metrics.go index 34a56a91956f..ff43dc0f582a 100644 --- a/mvcc/backend/metrics.go +++ b/mvcc/backend/metrics.go @@ -24,8 +24,18 @@ var ( Help: "The latency distributions of commit called by backend.", Buckets: prometheus.ExponentialBuckets(0.001, 2, 14), }) + + snapShotDurations = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "etcd", + Subsystem: "disk", + Name: "backend_snapshot_duration_seconds", + Help: "The latency distributions of Snapshot called by backend.", + // 1 second -> 1024 seconds + Buckets: prometheus.ExponentialBuckets(1, 2, 10), + }) ) func init() { prometheus.MustRegister(commitDurations) + prometheus.MustRegister(snapShotDurations) }