Skip to content

Commit

Permalink
runtime,runtime/metrics: track on-cpu time per goroutine
Browse files Browse the repository at this point in the history
Fixes github.com/golang/issues/41554.

This commit introduces a /sched/goroutine/running:nanoseconds metric,
defined as the total time spent by a goroutine in the running state.
This measurement is useful for systems that would benefit from
fine-grained CPU attribution for.

An alternative for scheduler-backed CPU attribution would be the use of
profiler labels. Given it's common to spawn multiple goroutines for the
same task, goroutine-backed statistics can easily become misleading.
Profiler labels instead let you measure CPU performance across a set of
cooperating goroutines. That said, it comes with overhead that makes it
unfeasible to always enable. For high-performance systems that care
about fine-grained CPU attribution (databases for e.g. that want to
measure total CPU time spent processing each request), profiler labels
are too cost-prohibitive, especially given the Go runtime has a much
cheaper view of the data needed.

It's worth noting that we already export /sched/latencies:seconds to
track scheduling latencies, i.e. time spent by a goroutine in the
runnable state. This commit does effectively the same, except for the
running state. Users are free to use this metric to power histograms or
tracking on-CPU time across a set of goroutines.

Change-Id: Id21ae4fcee0cd5f983604d61dad373098a0966bc
  • Loading branch information
irfansharif committed Feb 24, 2022
1 parent 0261fa6 commit 1d36b93
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 2 deletions.
6 changes: 6 additions & 0 deletions src/runtime/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,12 @@ func initMetrics() {
in.sysStats.gcMiscSys + in.sysStats.otherSys
},
},
"/sched/goroutine/running:nanoseconds": {
compute: func(_ *statAggregate, out *metricValue) {
out.kind = metricKindUint64
out.scalar = uint64(grunningnanos())
},
},
"/sched/goroutines:goroutines": {
compute: func(_ *statAggregate, out *metricValue) {
out.kind = metricKindUint64
Expand Down
5 changes: 5 additions & 0 deletions src/runtime/metrics/description.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,11 @@ var allDesc = []Description{
Description: "All memory mapped by the Go runtime into the current process as read-write. Note that this does not include memory mapped by code called via cgo or via the syscall package. Sum of all metrics in /memory/classes.",
Kind: KindUint64,
},
{
Name: "/sched/goroutine/running:nanoseconds",
Description: "Time spent by the current goroutine in the running state.",
Kind: KindUint64,
},
{
Name: "/sched/goroutines:goroutines",
Description: "Count of live goroutines.",
Expand Down
3 changes: 3 additions & 0 deletions src/runtime/metrics/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ Below is the full list of supported metrics, ordered lexicographically.
by code called via cgo or via the syscall package.
Sum of all metrics in /memory/classes.
/sched/goroutine/running:nanoseconds
Time spent by the current goroutine in the running state.
/sched/goroutines:goroutines
Count of live goroutines.
Expand Down
4 changes: 4 additions & 0 deletions src/runtime/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ func TestReadMetricsConsistency(t *testing.T) {
for i := range h.Counts {
gc.pauses += h.Counts[i]
}
case "/sched/goroutine/running:nanoseconds":
if samples[i].Value.Uint64() < 1 {
t.Error("goroutine running nanoseconds is less than one")
}
case "/sched/goroutines:goroutines":
if samples[i].Value.Uint64() < 1 {
t.Error("number of goroutines is less than one")
Expand Down
20 changes: 19 additions & 1 deletion src/runtime/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -990,8 +990,18 @@ func casgstatus(gp *g, oldval, newval uint32) {
}
}

// Handle tracking for scheduling latencies.
// Handle tracking for scheduling and running latencies.
now := nanotime()
if newval == _Grunning {
// We're transitioning into the running state, record the timestamp for
// subsequent use.
gp.lastsched = now
}
if oldval == _Grunning {
// We're transitioning out of running, record how long we were in the
// state.
gp.runningnanos += now - gp.lastsched

// Track every 8th time a goroutine transitions out of running.
if gp.trackingSeq%gTrackingPeriod == 0 {
gp.tracking = true
Expand Down Expand Up @@ -3240,6 +3250,12 @@ func dropg() {
setGNoWB(&_g_.m.curg, nil)
}

// grunningnanos returns the running time observed by the current g.
func grunningnanos() int64 {
gp := getg()
return gp.runningnanos + nanotime() - gp.lastsched
}

// checkTimers runs any timers for the P that are ready.
// If now is not 0 it is the current time.
// It returns the passed time or the current time if now was passed as 0.
Expand Down Expand Up @@ -3472,6 +3488,8 @@ func goexit0(gp *g) {
gp.param = nil
gp.labels = nil
gp.timer = nil
gp.lastsched = 0
gp.runningnanos = 0

if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 {
// Flush assist credit to the global pool. This gives
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/runtime2.go
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,8 @@ type g struct {
labels unsafe.Pointer // profiler labels
timer *timer // cached timer for time.Sleep
selectDone uint32 // are we participating in a select and did someone win the race?
lastsched int64 // timestamp when the G last started running
runningnanos int64 // total time spent in the running state

// Per-G GC state

Expand Down
2 changes: 1 addition & 1 deletion src/runtime/sizeof_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
_32bit uintptr // size on 32bit platforms
_64bit uintptr // size on 64bit platforms
}{
{runtime.G{}, 236, 392}, // g, but exported for testing
{runtime.G{}, 236, 408}, // g, but exported for testing
{runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
}

Expand Down

0 comments on commit 1d36b93

Please sign in to comment.