Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve percentiles computation with HDR histogram #92

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
57 changes: 29 additions & 28 deletions README.md
Expand Up @@ -159,50 +159,51 @@ Specifies the kind of report to be generated. It defaults to text.

##### text
```
Requests [total] 1200
Duration [total, attack, wait] 10.094965987s, 9.949883921s, 145.082066ms
Latencies [mean, 50, 95, 99, max] 113.172398ms, 108.272568ms, 140.18235ms, 247.771566ms, 264.815246ms
Bytes In [total, mean] 3714690, 3095.57
Bytes Out [total, mean] 0, 0.00
Success [ratio] 55.42%
Status Codes [code:count] 0:535 200:665
Requests [total] 3000
Duration [total, attack, wait] 10.000908765s, 10.000333187s, 575.578us
Latencies [mean, min, 50, 95, 99, 999, max] 290.959305ms, 239.872us, 655.359us, 1.871052799s, 2.239496191s, 2.318794751s, 2.327768447s
Bytes In [total, mean] 17478504, 5826.17
Bytes Out [total, mean] 0, 0.00
Success [ratio] 99.90%
Status Codes [code:count] 200:2997 0:3
Error Set:
Get http://localhost:6060: dial tcp 127.0.0.1:6060: connection refused
Get http://localhost:6060: read tcp 127.0.0.1:6060: connection reset by peer
Get http://localhost:6060: dial tcp 127.0.0.1:6060: connection reset by peer
Get http://localhost:6060: write tcp 127.0.0.1:6060: broken pipe
Get http://localhost:6060: net/http: transport closed before response was received
Get http://localhost:6060: http: can't write HTTP request on broken connection
Get http://:6060: read tcp 127.0.0.1:6060: connection reset by peer
Get http://:6060: net/http: transport closed before response was received
Get http://:6060: write tcp 127.0.0.1:6060: broken pipe
```

##### json
```json
{
"latencies": {
"mean": 9093653647,
"50th": 2401223400,
"95th": 12553709381,
"99th": 12604629125,
"max": 12604629125
"mean": 290954696,
"50th": 655359,
"95th": 1871708159,
"99th": 2239758335,
"999th": 2319450111,
"max": 2327768447,
"min": 239872
},
"bytes_in": {
"total": 782040,
"mean": 651.7
"total": 17478504,
"mean": 5826.168
},
"bytes_out": {
"total": 0,
"mean": 0
},
"duration": 9949883921,
"wait": 145082066,
"requests": 1200,
"success": 0.11666666666666667,
"duration": 10000333187,
"wait": 575578,
"requests": 3000,
"success": 0.999,
"status_codes": {
"0": 1060,
"200": 140
"0": 3,
"200": 2997
},
"errors": [
"Get http://localhost:6060: dial tcp 127.0.0.1:6060: operation timed out"
"Get http://:6060: read tcp 127.0.0.1:6060: connection reset by peer",
"Get http://:6060: net/http: transport closed before response was received",
"Get http://:6060: write tcp 127.0.0.1:6060: broken pipe"
]
}
```
Expand All @@ -214,7 +215,7 @@ out.
Input a different number on the bottom left corner input field
to change the moving average window size (in data points).

![Plot](http://i.imgur.com/oi0cgGq.png)
![Plot](http://i.imgur.com/uCdxdaC.png)


## Usage (Library)
Expand Down
2 changes: 1 addition & 1 deletion lib/attack.go
Expand Up @@ -124,13 +124,13 @@ func TLSConfig(c *tls.Config) func(*Attacker) {
// as soon as they arrive.
func (a *Attacker) Attack(tr Targeter, rate uint64, du time.Duration) chan *Result {
resc := make(chan *Result)
throttle := time.NewTicker(time.Duration(1e9 / rate))
hits := rate * uint64(du.Seconds())
wrk := a.workers
if wrk == 0 || wrk > hits {
wrk = hits
}
share := hits / wrk
throttle := time.NewTicker(time.Duration(1e9 / rate))

var wg sync.WaitGroup
for i := uint64(0); i < wrk; i++ {
Expand Down
47 changes: 28 additions & 19 deletions lib/metrics.go
Expand Up @@ -4,18 +4,20 @@ import (
"strconv"
"time"

"github.com/bmizerany/perks/quantile"
hdr "github.com/codahale/hdrhistogram"
)

// Metrics holds the stats computed out of a slice of Results
// that is used for some of the Reporters
type Metrics struct {
Latencies struct {
Mean time.Duration `json:"mean"`
P50 time.Duration `json:"50th"` // P50 is the 50th percentile upper value
P95 time.Duration `json:"95th"` // P95 is the 95th percentile upper value
P99 time.Duration `json:"99th"` // P99 is the 99th percentile upper value
P50 time.Duration `json:"50th"` // P50 is the 50th percentile upper value
P95 time.Duration `json:"95th"` // P95 is the 95th percentile upper value
P99 time.Duration `json:"99th"` // P99 is the 99th percentile upper value
P999 time.Duration `json:"999th"` // P999 is the 99.9th percentile upper value
Max time.Duration `json:"max"`
Min time.Duration `json:"min"`
} `json:"latencies"`

BytesIn struct {
Expand Down Expand Up @@ -51,48 +53,55 @@ func NewMetrics(r Results) *Metrics {
}

var (
errorSet = map[string]struct{}{}
quants = quantile.NewTargeted(0.50, 0.95, 0.99)
totalSuccess int
totalLatencies time.Duration
latest time.Time
errorSet = map[string]struct{}{}
success int
latest time.Time
)

m.Latencies.Min = r[0].Latency
m.Latencies.Max = m.Latencies.Min

for _, result := range r {
quants.Insert(float64(result.Latency))
m.StatusCodes[strconv.Itoa(int(result.Code))]++
totalLatencies += result.Latency
m.BytesOut.Total += result.BytesOut
m.BytesIn.Total += result.BytesIn
if result.Latency > m.Latencies.Max {
m.Latencies.Max = result.Latency
}
if result.Latency < m.Latencies.Min {
m.Latencies.Min = result.Latency
}
if end := result.Timestamp.Add(result.Latency); end.After(latest) {
latest = end
}
if result.Code >= 200 && result.Code < 300 {
totalSuccess++
if result.Code >= 200 && result.Code < 400 {
success++
}
if result.Error != "" {
errorSet[result.Error] = struct{}{}
}
}

hist := hdr.New(int64(m.Latencies.Min), int64(m.Latencies.Max), 5)
for _, result := range r {
hist.RecordValue(int64(result.Latency))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're sending requests at a fixed rate, you should use RecordCorrectedValue to compensate for GC pauses in vegeta itself.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the specific case where the load gen is truly constant throughput (i.e. will speed up to catch up if it falls behind), you can avoid using correction/compensation in the histogram by computing the latency correctly:
Latency = requestEndTime - (systemStartTime + requestNumber / throughput);

This strongly depends on the loader actually working to stay on pace. It wouldn't be valid otherwise.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Vegeta relies on a periodic timer to maintain a constant request rate. From the documentation, we have:

NewTicker returns a new Ticker containing a channel that will send the time with a period specified by the duration argument. It adjusts the intervals or drops ticks to make up for slow receivers. The duration d must be greater than zero; if not, NewTicker will panic. Stop the ticker to release associated resources.

From this, I take that if a GC pause is in course while a tick is to be delivered, the tick will be dropped. I could be wrong, though.
Regarding scheduling, Vegeta uses 1 go routine per request by default. This issues quite some pressure on the runtime scheduler but in theory it reduces coordinated omission by making it possible for each request to be promptly executed.
This setting can be limited by the user with the workers=n flag. In this case, the probability of dropping ticks increases.

In conclusion, it seems we have 2 possible sources of execution delays:

  • GC pauses
  • Scheduling contention (by limiting workers)

With these in mind, I wouldn't say Vegeta is always constant throughput but does a pretty reliable job in most situations, with sane defaults. So I would say that using RecordCorrectedValue is the best approach. Let me know what you guys think.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The RecordCorrectedValue math/logic will work if you know that there were no interleaved requests that happened during the expected interval (e.g. it is valid within each serial request issuing connection), and therefore know that there are a specific number of "missing" requests that should have been sent but weren't. But it's not clear to me that you attacker knows that (can there be another attacker sending on the same connection?).

Given that your goal is to maintain a constant throughout, I would avoid relying on the scheduler to do time math for you, or on compensation inside the histogram recording. I would instead modify the code to make decisions based on actual time accounting, and to make sure that the right number of requests is actually sent, timing the latency of each from the time it was supposed to be sent until it actually responded. The simplest way to do that is to keep issuing requests as long as "not enough requests have been issued up to now". In order to avoid back-to-back sends when "behind" (which can create an unrealistic huge spike after a pause on either the sender or receiver), it is usually useful to do "catchup" at a limited throughout (e.g. 2x the normal throughput).

This usually involves keeping track of how many requests have actually been completed in a "connection", and doing the math based purely on that count, the required throughput (in that connection), and the current time.

E.g. can have the hit daisy-chain within a "connection" by kicking off another hit each time they are done doing what they need to (scheduled for when the next request in this "connection" needs to be send according to the connection's required throughput) . What a hit "needs to do" is to either send the request (if the time has arrived) or skip the request if it is not yet time to send it. The hit should always re-schedule another hit for the proper time to send the next request (which may be "now").

You can see an example of logic to calculate the time until the next request should be sent here: https://github.com/giltene/wrk2/blob/master/src/wrk.c#L461

It basically uses the current number of completed requests and the current time to decide how long to wait before the next request sis sent (or 0 if it is time to send the request). Providing the time until the next send rather than just a "send now" boolean indication is useful because it allows you to sleep/wait/delay checking again until that time arrives.

You can see logic for computing the latency when a response is complete her:
https://github.com/giltene/wrk2/blob/master/src/wrk.c#L539
(ignore all the nice debug-related output and the u_latency_histogram math, focus on the calculation that feeds the recoding into latency_histogram).

If you use this logic, your throughput (averaged over time) will remain constant, catching up to missed requests at an increased (but not back to back) rate. With a such a constant throughout plan actually being executed, your latency reporting will be precise, since you have a known plan (to execute requests at a given constant pace) and can determine exactly what the latency is for each response based purely on it's arrival time (ignoring its actual send time, which is susceptible to coordination when you are temporarily off-pace).

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On 11 Nov 2014, at 15:10, Gil Tene [email protected] wrote:

In lib/metrics.go:

    }
    if result.Error != "" {
        errorSet[result.Error] = struct{}{}
    }
}
  • hist := hdr.New(int64(m.Latencies.Min), int64(m.Latencies.Max), 5)
  • for _, result := range r {
  •   hist.RecordValue(int64(result.Latency))
    
    The RecordCorrectedValue math/logic will work if you know that there were no interleaved requests that happened during the expected interval (e.g. it is valid within each serial request issuing connection), and therefore know that there are a specific number of "missing" requests that should have been sent but weren't. But it's not clear to me that you attacker knows that (can there be another attacker sending on the same connection?).

Vegeta differs from other HTTP load testing tools in the sense that it maintains a user specified request rate for a given duration, instead of issuing as many requests as fast as possible. It answers “how do my latencies look like at this request rate” instead of the usual “how many can I do?” which is benchmarking.
I am not sure this is what you meant by “connection, but an attack is executed by a single HTTP client, shared by all go-routines. This HTTP client can optionally disable HTTP Keepalive which will force a new TCP connection on every request/hit.
Given that your goal is to maintain a constant throughout, I would avoid relying on the scheduler to do time math for you, or on compensation inside the histogram recording. I would instead modify the code to make decisions based on actual time accounting, and to make sure that the right number of requests is actually sent, timing the latency of each from the time it was supposed to be sent until it actually responded. The simplest way to do that is to keep issuing requests as long as "not enough requests have been issued up to now". In order to avoid back-to-back sends when "behind" (which can create an unrealistic huge spike after a pause on either the sender or receiver), it is usually useful to do "catchup" at a limited throughout (e.g. 2x the normal throughput).

I am not relying on the scheduler to do time math. I am relying on a high precision timer which ticks at the required request rate. There are pre-scheduled go-routines which instantly issue a request on each tick, regardless of whether previous requests have returned. I’d say the only way to fall behind is to have a massive GC pause, in which case your algorithm would be immensely helpful. Until today, I haven’t seen a single tick dropped, under various heavy load generation conditions. But I suppose it is possible. I have to investigate further.
This usually involves keeping track of how many requests have actually been completed in a "connection", and doing the math based purely on that count, the required throughput (in that connection), and the current time.

E.g. can have the hit daisy-chain within a "connection" by kicking off another hit each time they are done doing what they need to (scheduled for when the next request in this "connection" needs to be send according to the connection's required throughput) . What a hit "needs to do" is to either send the request (if the time has arrived) or skip the request if it is not yet time to send it. The hit should always re-schedule another hit for the proper time to send the next request (which may be "now").

You can see an example of logic to calculate the time until the next request should be sent here: https://github.com/giltene/wrk2/blob/master/src/wrk.c#L461 https://github.com/giltene/wrk2/blob/master/src/wrk.c#L461
It basically uses the current number of completed requests and the current time to decide how long to wait before the next request sis sent (or 0 if it is time to send the request). Providing the time until the next send rather than just a "send now" boolean indication is useful because it allows you to sleep/wait/delay checking again until that time arrives.

You can see logic for computing the latency when a response is complete her:
https://github.com/giltene/wrk2/blob/master/src/wrk.c#L539 https://github.com/giltene/wrk2/blob/master/src/wrk.c#L539
(ignore all the nice debug-related output and the u_latency_histogram math, focus on the calculation that feeds the recoding into latency_histogram).

If you use this logic, your throughput (averaged over time) will remain constant, catching up to missed requests at an increased (but not back to back) rate. With a such a constant throughout plan actually being executed, your latency reporting will be precise, since you have a known plan (to execute requests at a given constant pace) and can determine exactly what the latency is for each response based purely on it's arrival time (ignoring its actual send time, which is susceptible to coordination when you are temporarily off-pace).

Thank you so much for the detailed insight. I really appreciate it. Before I dive into this though, I’d like to first simulate a load test where the potential issues with my current approach become obvious. If you have anything in mind for this, please let me know!


Reply to this email directly or view it on GitHub https://github.com/tsenart/vegeta/pull/92/files#r20151165.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The trivial way to simulate the load test in question is to ^Z the server being tested for 20 seconds, while your load generator is trying to generate 200K reqs/sec. If at the end of the test the number of issued request is equal to actual_run_time * 200K, then you are not dropping things. If it's not, you have s problem.

You can also use this test to sanity check your reported latencies. You can hand- compute some percentiles from the known behavior. E.g. If the total run time is 40 seconds and the pause is 20 secinds, your 99%'ile should show 19.6 seconds, your 95%'ile should show 18 seconds, etc. If that's not what theyvshow, they are wrong.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now everything makes sense. Vegeta wasn't built to generate that kind of throughput and its upper bound is significantly lower. Vegeta makes the distinction between load testing (analysing your server under different kinds of load) and benchmarking (testing your server's at peak capacity). It was built to load test individual HTTP services within a Service Oriented Architecture which sustain several orders of magnitude less requests per second.

I am not so sure that Go would be my first choice in building a benchmarking tool in wrk's category but I will definitely play around with your algorithms. Once again, thanks for your input!

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can replace 200K/sec with 200/sec above and still do the same test... The ^Z sniff test applies to anything that reports latency stats.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here are the results for that request rate, suspending the server for ~10s.

$ echo "GET http://:9888" | ./vegeta attack -duration=30s -rate=200 | tee results.bin | ./vegeta report
Requests      [total]                            6000
Duration      [total, attack, wait]              30.010435746s, 30.009939779s, 495.967us
Latencies     [mean, min, 50, 95, 99, 999, max]  2.170981624s, 139.87us, 786.431us, 9.890955263s, 11.086594047s, 11.354374143s, 11.384070415s
Bytes In      [total, mean]                      66000, 11.00
Bytes Out     [total, mean]                      0, 0.00
Success       [ratio]                            100.00%
Status Codes  [code:count]                       200:6000
Error Set:

screen shot 2014-11-12 at 01 16 26

You may explore the data further with this interactive plot.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That looks perfect! Well calibrated.

}

m.Requests = uint64(len(r))
m.Duration = r[len(r)-1].Timestamp.Sub(r[0].Timestamp)
m.Wait = latest.Sub(r[len(r)-1].Timestamp)
m.Latencies.Mean = time.Duration(float64(totalLatencies) / float64(m.Requests))
m.Latencies.P50 = time.Duration(quants.Query(0.50))
m.Latencies.P95 = time.Duration(quants.Query(0.95))
m.Latencies.P99 = time.Duration(quants.Query(0.99))
m.Latencies.Mean = time.Duration(hist.Mean())
m.Latencies.P50 = time.Duration(hist.ValueAtQuantile(50))
m.Latencies.P95 = time.Duration(hist.ValueAtQuantile(95))
m.Latencies.P99 = time.Duration(hist.ValueAtQuantile(99))
m.Latencies.P999 = time.Duration(hist.ValueAtQuantile(99.9))
m.BytesIn.Mean = float64(m.BytesIn.Total) / float64(m.Requests)
m.BytesOut.Mean = float64(m.BytesOut.Total) / float64(m.Requests)
m.Success = float64(totalSuccess) / float64(m.Requests)
m.Success = float64(success) / float64(m.Requests)

m.Errors = make([]string, 0, len(errorSet))
for err := range errorSet {
m.Errors = append(m.Errors, err)
}

return m
}
6 changes: 3 additions & 3 deletions lib/reporters.go
Expand Up @@ -26,11 +26,11 @@ var ReportText ReporterFunc = func(r Results) ([]byte, error) {
m := NewMetrics(r)
out := &bytes.Buffer{}

w := tabwriter.NewWriter(out, 0, 8, 2, '\t', tabwriter.StripEscape)
w := tabwriter.NewWriter(out, 0, 8, 2, ' ', tabwriter.StripEscape)
fmt.Fprintf(w, "Requests\t[total]\t%d\n", m.Requests)
fmt.Fprintf(w, "Duration\t[total, attack, wait]\t%s, %s, %s\n", m.Duration+m.Wait, m.Duration, m.Wait)
fmt.Fprintf(w, "Latencies\t[mean, 50, 95, 99, max]\t%s, %s, %s, %s, %s\n",
m.Latencies.Mean, m.Latencies.P50, m.Latencies.P95, m.Latencies.P99, m.Latencies.Max)
fmt.Fprintf(w, "Latencies\t[mean, min, 50, 95, 99, 999, max]\t%s, %s, %s, %s, %s, %s, %s\n",
m.Latencies.Mean, m.Latencies.Min, m.Latencies.P50, m.Latencies.P95, m.Latencies.P99, m.Latencies.P999, m.Latencies.Max)
fmt.Fprintf(w, "Bytes In\t[total, mean]\t%d, %.2f\n", m.BytesIn.Total, m.BytesIn.Mean)
fmt.Fprintf(w, "Bytes Out\t[total, mean]\t%d, %.2f\n", m.BytesOut.Total, m.BytesOut.Mean)
fmt.Fprintf(w, "Success\t[ratio]\t%.2f%%\n", m.Success*100)
Expand Down