Skip to content

Commit

Permalink
benchmarks/gc_latency: add new microbenchmark for tricky mutator late…
Browse files Browse the repository at this point in the history
…ncy issues

When run with -bench, emits latency measurements in benchmark
format.  Includes -trace option for debugging latency issues.

go run . -help
Usage of /<yuck>/gc_latency:
  -fluff
 insert 'fluff' into allocation runs to break up sweeps
  -how string
 how the buffer is allocated = {stack,heap,global} (default "stack")
  -trace string
 name of trace file to create

-OR-

runs as a go benchmark:

go test -bench=B -count=2 .
goos: darwin
goarch: amd64
pkg: golang.org/x/benchmarks/gc_latency
cpu: Intel(R) Core(TM) i5-1038NG7 CPU @ 2.00GHz
BenchmarkGCLatency/how=stack-fluff=false-8  1  3746777895 ns/op 564.0 avg-ns 409.0 median-ns 1266 p29-ns 21452 p39-ns 40774 p49-ns 122054 p59-ns 4629961 p69-ns  5152588 worst-ns
BenchmarkGCLatency/how=stack-fluff=false-8  1  3960773922 ns/op 607.0 avg-ns 410.0 median-ns 1386 p29-ns 30254 p39-ns 48302 p49-ns 117086 p59-ns 6043114 p69-ns  8353592 worst-ns
BenchmarkGCLatency/how=heap-fluff=false-8   1  3832759983 ns/op 582.0 avg-ns 411.0 median-ns 1519 p29-ns 26273 p39-ns 47897 p49-ns 114378 p59-ns  216399 p69-ns 484183 worst-ns
BenchmarkGCLatency/how=heap-fluff=false-8   1  4079520582 ns/op 618.0 avg-ns 417.0 median-ns 2263 p29-ns 29662 p39-ns 60282 p49-ns 147765 p59-ns  344228 p69-ns 638498 worst-ns
BenchmarkGCLatency/how=global-fluff=false-8 1  3688413511 ns/op 562.0 avg-ns 409.0 median-ns 1351 p29-ns 28113 p39-ns 40730 p49-ns 104352 p59-ns  202013 p69-ns 468936 worst-ns
BenchmarkGCLatency/how=global-fluff=false-8 1  3590957128 ns/op 544.0 avg-ns 409.0 median-ns 1397 p29-ns 25142 p39-ns 40059 p49-ns 101923 p59-ns  431897 p69-ns 658914 worst-ns
BenchmarkGCLatency/how=stack-fluff=true-8   1  3645081827 ns/op 552.0 avg-ns 408.0 median-ns 1387 p29-ns 14013 p39-ns 39086 p49-ns  81027 p59-ns 5174202 p69-ns  5994034 worst-ns
BenchmarkGCLatency/how=stack-fluff=true-8   1  3761717212 ns/op 573.0 avg-ns 409.0 median-ns 1648 p29-ns 20665 p39-ns 39151 p49-ns  92594 p59-ns 5747097 p69-ns  6512030 worst-ns
BenchmarkGCLatency/how=heap-fluff=true-8    1  3686269094 ns/op 559.0 avg-ns 410.0 median-ns 1441 p29-ns 14351 p39-ns 39182 p49-ns  96212 p59-ns  334248 p69-ns  1308733 worst-ns
BenchmarkGCLatency/how=heap-fluff=true-8    1  3642812482 ns/op 549.0 avg-ns 409.0 median-ns 1363 p29-ns 14274 p39-ns 38962 p49-ns  97677 p59-ns  267289 p69-ns 662851 worst-ns
BenchmarkGCLatency/how=global-fluff=true-8  1  3618725949 ns/op 549.0 avg-ns 409.0 median-ns 1369 p29-ns 13421 p39-ns 39061 p49-ns 103189 p59-ns  227176 p69-ns 541692 worst-ns
BenchmarkGCLatency/how=global-fluff=true-8  1  3634821717 ns/op 550.0 avg-ns 408.0 median-ns 1350 p29-ns 15195 p39-ns 39664 p49-ns  90323 p59-ns  529328 p69-ns 590649 worst-ns
PASS
ok    golang.org/x/benchmarks/gc_latency  55.136s

Updates golang/go#27732.

Change-Id: I19b9060f24cda1547b8d75f762316dd5271e32c6
  • Loading branch information
dr2chase committed Aug 14, 2023
1 parent d20d691 commit 4fbeb67
Show file tree
Hide file tree
Showing 3 changed files with 238 additions and 0 deletions.
195 changes: 195 additions & 0 deletions gc_latency/latency.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package main

import (
"flag"
"fmt"
"os"
"runtime/trace"
"sort"
"testing"
"time"
"unsafe"
)

const (
bufferLen = 200000
msgCount0 = 1000000 // warmup
msgCount1 = 5000000 // run (5 million)
)

type kbyte []byte
type circularBuffer [bufferLen]kbyte

type LB struct {
// Performance measurement stuff
delays []time.Duration
worst time.Duration
// For making sense of the bad outcome.
total time.Duration
worstIndex int
allStart time.Time
worstElapsed time.Duration

// How to allocate
doFluff bool
howAllocated string
}

var fluff kbyte

// newSlice allocates a 1k slice of bytes and initializes them all to byte(n)
func (lb *LB) newSlice(n int) kbyte {
m := make(kbyte, 1024)
if lb.doFluff && n&63 == 0 {
fluff = make(kbyte, 1024)
}
for i := range m {
m[i] = byte(n)
}
return m
}

// storeSlice stores a newly allocated 1k slice of bytes at circularBuffer[count%bufferLen]
// (bufferLen is the length of the array circularBuffer)
// It also checks the time needed to do this and records the worst case.
func (lb *LB) storeSlice(c *circularBuffer, highID int) {
start := time.Now()
c[highID%bufferLen] = lb.newSlice(highID)
elapsed := time.Since(start)

candElapsed := time.Since(lb.allStart) // Record location of worst in trace
if elapsed > lb.worst {
lb.worst = elapsed
lb.worstIndex = highID
lb.worstElapsed = candElapsed
}
lb.total = time.Duration(lb.total.Nanoseconds() + elapsed.Nanoseconds())
lb.delays = append(lb.delays, elapsed)
}

//go:noinline
func (lb *LB) work(c *circularBuffer, count int) {
for i := 0; i < count; i++ {
lb.storeSlice(c, i)
}
}

func (lb *LB) doAllocations() {
switch lb.howAllocated {
case "stack":
lb.stack()
case "heap":
lb.heap()
case "global":
lb.global()
}
}

//go:noinline
func (lb *LB) stack() {
var c circularBuffer
lb.work(&c, msgCount1)
}

var global_c circularBuffer

//go:noinline
func (lb *LB) global() {
lb.work(&global_c, msgCount1)
}

var sink *circularBuffer

//go:noinline
func (lb *LB) heap() {
c := &circularBuffer{}
sink = c // force to heap
lb.work(c, msgCount1)
}

var traceFile string

func flags() (string, bool) {
var howAllocated = "stack"
var doFluff bool
flag.StringVar(&traceFile, "trace", traceFile, "name of trace file to create")
flag.StringVar(&howAllocated, "how", howAllocated, "how the buffer is allocated = {stack,heap,global}")
flag.BoolVar(&doFluff, "fluff", doFluff, "insert 'fluff' into allocation runs to break up sweeps")

flag.Parse()

switch howAllocated {
case "stack", "heap", "global":
break
default:
fmt.Fprintf(os.Stderr, "-how needs to be one of 'heap', 'stack' or 'global, saw '%s' instead\n", howAllocated)
os.Exit(1)
}
return howAllocated, doFluff
}

func (lb0 *LB) bench(b *testing.B) {
var c *circularBuffer = &circularBuffer{}
lb0.delays = make([]time.Duration, 0, msgCount1)
// Warmup
lb0.work(c, msgCount0)
c = nil

lb := &LB{doFluff: lb0.doFluff, howAllocated: lb0.howAllocated}

if traceFile != "" {
f, err := os.Create(traceFile)
if err != nil {
if b != nil {
b.Fatalf("Could not create trace file '%s'\n", traceFile)
} else {
fmt.Fprintf(os.Stderr, "Could not create trace file '%s'\n", traceFile)
os.Exit(1)
}
}
defer f.Close()
trace.Start(f)
defer trace.Stop()
}
if b != nil {
b.ResetTimer()
}
lb.allStart = time.Now()

lb.doAllocations()

sort.Slice(lb.delays, func(i, j int) bool { return lb.delays[i] < lb.delays[j] })

delays := lb.delays
delayLen := float64(len(delays))
average, median := time.Duration(lb.total.Nanoseconds()/msgCount1), delays[len(delays)/2]
p29, p39, p49, p59, p69 := lb.delays[int(0.99*delayLen)], delays[int(0.999*delayLen)], delays[int(0.9999*delayLen)], delays[int(0.99999*delayLen)], delays[int(0.999999*delayLen)]

if b != nil {
b.ReportMetric(float64(average.Nanoseconds()), "avg-ns")
b.ReportMetric(float64(median), "median-ns")
b.ReportMetric(float64(p29), "p29-ns")
b.ReportMetric(float64(p39), "p39-ns")
b.ReportMetric(float64(p49), "p49-ns")
b.ReportMetric(float64(p59), "p59-ns")
b.ReportMetric(float64(p69), "p69-ns")
b.ReportMetric(float64(lb.worst), "worst-ns")
} else {
fmt.Printf("GC latency benchmark, how=%s, fluff=%v\n", lb.howAllocated, lb.doFluff)
fmt.Println("Worst allocation latency:", lb.worst)
fmt.Println("Worst allocation index:", lb.worstIndex)
fmt.Println("Worst allocation occurs at run elapsed time:", lb.worstElapsed)
fmt.Println("Average allocation latency:", average)
fmt.Println("Median allocation latency:", median)
fmt.Println("99% allocation latency:", p29)
fmt.Println("99.9% allocation latency:", p39)
fmt.Println("99.99% allocation latency:", p49)
fmt.Println("99.999% allocation latency:", p59)
fmt.Println("Sizeof(circularBuffer) =", unsafe.Sizeof(*c))
fmt.Println("Approximate live memory =", unsafe.Sizeof(*c)+bufferLen*1024)
}
}
32 changes: 32 additions & 0 deletions gc_latency/latency_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package main

import (
"fmt"
"testing"
)

type testCase struct {
howAlloc string
withFluff bool
}

func BenchmarkGCLatency(b *testing.B) {
tcs := []testCase{
{"stack", false},
{"heap", false},
{"global", false},
{"stack", true},
{"heap", true},
{"global", true},
}

for _, tc := range tcs {
lb0 := &LB{doFluff: tc.withFluff, howAllocated: tc.howAlloc}
b.Run(fmt.Sprintf("how=%s-fluff=%v", tc.howAlloc, tc.withFluff),
func(b *testing.B) { lb0.bench(b) })
}
}
11 changes: 11 additions & 0 deletions gc_latency/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package main

func main() {
howAllocated, doFluffFlag := flags()
lb := &LB{howAllocated: howAllocated, doFluff: doFluffFlag}
lb.bench(nil)
}

0 comments on commit 4fbeb67

Please sign in to comment.