Skip to content

Commit

Permalink
benchmarks/gc_latency: add new microbenchmark for tricky mutator late…
Browse files Browse the repository at this point in the history
…ncy issues

When run with -bench, emits latency measurements in benchmark
format.  Includes -trace option for debugging latency issues
(omit -bench for location of worst delay).

go run . -help
Usage of /<yuck>/gc_latency:
  -bench
    	output in Go benchmark format
  -fluff
    	insert 'fluff' into allocation runs to break up sweeps
  -how string
    	how the buffer is allocated = {stack,heap,global} (default "stack")
  -trace string
    	name of trace file to create

go run . -bench

goos: darwin
goarch: amd64
BenchmarkAverageLatency 1     574ns
BenchmarkMedianLatency  1     412ns
Benchmark99Latency      1    1791ns
Benchmark999Latency     1   25843ns
Benchmark9999Latency    1   35585ns
Benchmark99999Latency   1  109314ns
BenchmarkWorstLatency   1 6169500ns

Updates golang/go#27732.

Change-Id: I19b9060f24cda1547b8d75f762316dd5271e32c6
  • Loading branch information
dr2chase committed Oct 5, 2022
1 parent 793a452 commit 6857d06
Showing 1 changed file with 176 additions and 0 deletions.
176 changes: 176 additions & 0 deletions gc_latency/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package main

import (
"flag"
"fmt"
"math"
"os"
"runtime"
"runtime/trace"
"sort"
"time"
"unsafe"
)

const (
bufferLen = 200000
msgCount0 = 1000000 // warmup
msgCount1 = 5000000 // run (5 million)
)

type kbyte []byte
type circularBuffer [bufferLen]kbyte

var worst time.Duration

// For making sense of the bad outcome.
var total time.Duration
var worstIndex int
var allStart time.Time
var worstElapsed time.Duration

var fluff kbyte
var doFluff bool

// newSlice allocates a 1k slice of bytes and initializes them all to byte(n)
func newSlice(n int) kbyte {
m := make(kbyte, 1024)
if doFluff && n&63 == 0 {
fluff = make(kbyte, 1024)
}
for i := range m {
m[i] = byte(n)
}
return m
}

var delays []time.Duration

// storeSlice stores a newly allocated 1k slice of bytes at circularBuffer[count%bufferLen]
// (bufferLen is the length of the array circularBuffer)
// It also checks the time needed to do this and records the worst case.
func storeSlice(c *circularBuffer, highID int) {
start := time.Now()
c[highID%bufferLen] = newSlice(highID)
elapsed := time.Since(start)

candElapsed := time.Since(allStart) // Record location of worst in trace
if elapsed > worst {
worst = elapsed
worstIndex = highID
worstElapsed = candElapsed
}
total = time.Duration(total.Nanoseconds() + elapsed.Nanoseconds())
delays = append(delays, elapsed)
}

//go:noinline
func work(c *circularBuffer, count int) {
for i := 0; i < count; i++ {
storeSlice(c, i)
}
}

//go:noinline
func stack() {
var c circularBuffer
work(&c, msgCount1)
}

var global_c circularBuffer

//go:noinline
func global() {
work(&global_c, msgCount1)
}

var sink *circularBuffer

//go:noinline
func heap() {
c := &circularBuffer{}
sink = c // force to heap
work(c, msgCount1)
}

func main() {
var traceFile string
var howAllocated = "stack"
var asBench bool

flag.StringVar(&traceFile, "trace", traceFile, "name of trace file to create")
flag.StringVar(&howAllocated, "how", howAllocated, "how the buffer is allocated = {stack,heap,global}")
flag.BoolVar(&doFluff, "fluff", doFluff, "insert 'fluff' into allocation runs to break up sweeps")
flag.BoolVar(&asBench, "bench", asBench, "output in Go benchmark format")
flag.Parse()

var c *circularBuffer = &circularBuffer{}
delays = make([]time.Duration, 0, msgCount1)
// Warmup
work(c, msgCount0)
c = nil
delays = delays[:0]

total = time.Duration(0)
worstElapsed = time.Duration(0)
worst = time.Duration(0)
worstIndex = 0

if traceFile != "" {
f, err := os.Create(traceFile)
if err != nil {
fmt.Fprintf(os.Stderr, "Could not create trace file '%s'\n", traceFile)
os.Exit(1)
}
defer f.Close()
trace.Start(f)
defer trace.Stop()
}
allStart = time.Now()
c = nil

switch howAllocated {
case "stack":
stack()
case "heap":
heap()
case "global":
global()
default:
fmt.Fprintf(os.Stderr, "-how needs to be one of 'heap', 'stack' or 'global, saw '%s' instead\n", howAllocated)
os.Exit(1)
}

sort.Slice(delays, func(i, j int) bool { return delays[i] < delays[j] })

if asBench {
fmt.Printf("goos: %s\n", runtime.GOOS)
fmt.Printf("goarch: %s\n", runtime.GOARCH)
fmt.Printf("cpu: \n") // reset this key
fmt.Printf("pkg: gc_latency\n")

width := int(math.Ceil(math.Log10(float64(1 + worst))))

fmt.Printf("BenchmarkAverageLatency 1 %[1]*dns\n", width, time.Duration(total.Nanoseconds()/msgCount1))
fmt.Printf("BenchmarkMedianLatency 1 %[1]*dns\n", width, delays[len(delays)/2])
fmt.Printf("Benchmark99Latency 1 %[1]*dns\n", width, delays[int(0.99*float64(len(delays)))])
fmt.Printf("Benchmark999Latency 1 %[1]*dns\n", width, delays[int(0.999*float64(len(delays)))])
fmt.Printf("Benchmark9999Latency 1 %[1]*dns\n", width, delays[int(0.9999*float64(len(delays)))])
fmt.Printf("Benchmark99999Latency 1 %[1]*dns\n", width, delays[int(0.99999*float64(len(delays)))])
fmt.Printf("Benchmark999999Latency 1 %[1]*dns\n", width, delays[int(0.999999*float64(len(delays)))])
fmt.Printf("BenchmarkWorstLatency 1 %[1]*dns\n", width, worst)
} else {

fmt.Println("Worst allocation latency:", worst)
fmt.Println("Worst allocation index:", worstIndex)
fmt.Println("Worst allocation occurs at run elapsed time:", worstElapsed)
fmt.Println("Average allocation latency:", time.Duration(total.Nanoseconds()/msgCount1))
fmt.Println("Median allocation latency:", delays[len(delays)/2])
fmt.Println("99% allocation latency:", delays[int(0.99*float64(len(delays)))])
fmt.Println("99.9% allocation latency:", delays[int(0.999*float64(len(delays)))])
fmt.Println("99.99% allocation latency:", delays[int(0.9999*float64(len(delays)))])
fmt.Println("99.999% allocation latency:", delays[int(0.99999*float64(len(delays)))])
fmt.Println("Sizeof(circularBuffer) =", unsafe.Sizeof(*c))
fmt.Println("Approximate live memory =", unsafe.Sizeof(*c)+bufferLen*1024)
}
}

0 comments on commit 6857d06

Please sign in to comment.