Skip to content

Commit

Permalink
improve tracking errors in reservation requests
Browse files Browse the repository at this point in the history
  • Loading branch information
sukunrt committed Mar 21, 2023
1 parent 9379c25 commit f2d358e
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 76 deletions.
56 changes: 14 additions & 42 deletions dashboards/autorelay/autorelay.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.3.6"
"version": "9.4.1"
},
{
"type": "panel",
Expand Down Expand Up @@ -247,7 +247,7 @@
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "9.3.6",
"pluginVersion": "9.4.1",
"targets": [
{
"datasource": {
Expand Down Expand Up @@ -350,22 +350,7 @@
{
"matcher": {
"id": "byName",
"options": "refresh reservation: failed"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "red",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "new reservation: success"
"options": "new: success"
},
"properties": [
{
Expand All @@ -380,28 +365,13 @@
{
"matcher": {
"id": "byName",
"options": "refresh reservation: success"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "new reservation: failed"
"options": "refresh: success"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"fixedColor": "super-light-green",
"mode": "fixed"
}
}
Expand Down Expand Up @@ -440,7 +410,7 @@
"format": "time_series",
"instant": false,
"interval": "",
"legendFormat": "{{request_type}} reservation: {{outcome}}",
"legendFormat": "{{request_type}}: {{outcome}}",
"range": true,
"refId": "A"
}
Expand Down Expand Up @@ -506,7 +476,7 @@
},
"textMode": "auto"
},
"pluginVersion": "9.3.6",
"pluginVersion": "9.4.1",
"targets": [
{
"datasource": {
Expand Down Expand Up @@ -667,7 +637,7 @@
},
"textMode": "auto"
},
"pluginVersion": "9.3.6",
"pluginVersion": "9.4.1",
"targets": [
{
"datasource": {
Expand Down Expand Up @@ -742,7 +712,7 @@
},
"textMode": "auto"
},
"pluginVersion": "9.3.6",
"pluginVersion": "9.4.1",
"targets": [
{
"datasource": {
Expand Down Expand Up @@ -868,20 +838,22 @@
"type": "timeseries"
}
],
"schemaVersion": 37,
"refresh": "",
"revision": 1,
"schemaVersion": 38,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "libp2p Autorelay",
"uid": "deQ_uf-4k",
"version": 5,
"version": 3,
"weekStart": ""
}
47 changes: 34 additions & 13 deletions p2p/host/autorelay/metrics.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package autorelay

import (
"errors"

"github.com/libp2p/go-libp2p/p2p/metricshelper"
"github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/client"
pbv2 "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/pb"
"github.com/prometheus/client_golang/prometheus"
)

Expand Down Expand Up @@ -104,7 +108,7 @@ type MetricsTracer interface {
RelayFinderStatus(isActive bool)

ReservationEnded()
ReservationRequestFinished(isRefresh bool, success bool)
ReservationRequestFinished(isRefresh bool, err error)

RelayAddressCount(int)
RelayAddressUpdated()
Expand Down Expand Up @@ -146,9 +150,7 @@ func NewMetricsTracer(opts ...MetricsTracerOption) MetricsTracer {
// Initialise these counters to 0 otherwise the first reservation requests aren't handled
// correctly when using promql increse function
reservationRequestsOutcomeTotal.WithLabelValues("refresh", "success")
reservationRequestsOutcomeTotal.WithLabelValues("refresh", "failed")
reservationRequestsOutcomeTotal.WithLabelValues("new", "success")
reservationRequestsOutcomeTotal.WithLabelValues("new", "failed")
candidatesCircuitV2SupportTotal.WithLabelValues("yes")
candidatesCircuitV2SupportTotal.WithLabelValues("no")
return &metricsTracer{}
Expand All @@ -166,7 +168,7 @@ func (mt *metricsTracer) ReservationEnded() {
reservationsClosedTotal.Inc()
}

func (mt *metricsTracer) ReservationRequestFinished(isRefresh bool, success bool) {
func (mt *metricsTracer) ReservationRequestFinished(isRefresh bool, err error) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)

Expand All @@ -175,15 +177,10 @@ func (mt *metricsTracer) ReservationRequestFinished(isRefresh bool, success bool
} else {
*tags = append(*tags, "new")
}

if success {
*tags = append(*tags, "success")
} else {
*tags = append(*tags, "failed")
}
*tags = append(*tags, getReservationRequestStatus(err))
reservationRequestsOutcomeTotal.WithLabelValues(*tags...).Inc()

if !isRefresh && success {
if !isRefresh && err == nil {
reservationsOpenedTotal.Inc()
}
}
Expand Down Expand Up @@ -245,6 +242,30 @@ func (mt *metricsTracer) DesiredReservations(cnt int) {
desiredReservations.Set(float64(cnt))
}

func getReservationRequestStatus(err error) string {
if err == nil {
return "success"
}

status := "err other"
var re client.ReservationError
if errors.As(err, &re) {
switch re.Status {
case pbv2.Status_CONNECTION_FAILED:
return "connection failed"
case pbv2.Status_MALFORMED_MESSAGE:
return "malformed message"
case pbv2.Status_RESERVATION_REFUSED:
return "reservation refused"
case pbv2.Status_PERMISSION_DENIED:
return "permission denied"
case pbv2.Status_RESOURCE_LIMIT_EXCEEDED:
return "resource limit exceeded"
}
}
return status
}

// wrappedMetricsTracer wraps MetricsTracer and ignores all calls when mt is nil
type wrappedMetricsTracer struct {
mt MetricsTracer
Expand All @@ -264,9 +285,9 @@ func (mt *wrappedMetricsTracer) ReservationEnded() {
}
}

func (mt *wrappedMetricsTracer) ReservationRequestFinished(isRefresh bool, success bool) {
func (mt *wrappedMetricsTracer) ReservationRequestFinished(isRefresh bool, err error) {
if mt.mt != nil {
mt.mt.ReservationRequestFinished(isRefresh, success)
mt.mt.ReservationRequestFinished(isRefresh, err)
}
}

Expand Down
10 changes: 9 additions & 1 deletion p2p/host/autorelay/metrics_noalloc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ import (
"math/rand"
"testing"
"time"

"github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/client"
pbv2 "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/pb"
)

func getRandScheduledWork() scheduledWorkTimes {
Expand All @@ -26,11 +29,16 @@ func TestMetricsNoAllocNoCover(t *testing.T) {
for i := 0; i < 10; i++ {
scheduledWork = append(scheduledWork, getRandScheduledWork())
}
errs := []error{
client.ReservationError{Status: pbv2.Status_MALFORMED_MESSAGE},
client.ReservationError{Status: pbv2.Status_MALFORMED_MESSAGE},
nil,
}
tr := NewMetricsTracer()
tests := map[string]func(){
"RelayFinderStatus": func() { tr.RelayFinderStatus(rand.Intn(2) == 1) },
"ReservationEnded": func() { tr.ReservationEnded() },
"ReservationRequestFinished": func() { tr.ReservationRequestFinished(rand.Intn(2) == 1, rand.Intn(2) == 1) },
"ReservationRequestFinished": func() { tr.ReservationRequestFinished(rand.Intn(2) == 1, errs[rand.Intn(len(errs))]) },
"RelayAddressCount": func() { tr.RelayAddressCount(rand.Intn(10)) },
"RelayAddressUpdated": func() { tr.RelayAddressUpdated() },
"CandidateChecked": func() { tr.CandidateChecked(rand.Intn(2) == 1) },
Expand Down
14 changes: 7 additions & 7 deletions p2p/host/autorelay/relay_finder.go
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ func (rf *relayFinder) maybeConnectToRelay(ctx context.Context) {
if err != nil {
log.Debugw("failed to connect to relay", "peer", id, "error", err)
rf.notifyMaybeNeedNewCandidates()
rf.metricsTracer.ReservationRequestFinished(false, false)
rf.metricsTracer.ReservationRequestFinished(false, err)
continue
}
log.Debugw("adding new relay", "id", id)
Expand All @@ -556,7 +556,7 @@ func (rf *relayFinder) maybeConnectToRelay(ctx context.Context) {
default:
}

rf.metricsTracer.ReservationRequestFinished(false, true)
rf.metricsTracer.ReservationRequestFinished(false, nil)

if numRelays >= rf.conf.desiredRelays {
break
Expand Down Expand Up @@ -603,15 +603,15 @@ func (rf *relayFinder) refreshReservations(ctx context.Context, now time.Time) b

// find reservations about to expire and refresh them in parallel
g := new(errgroup.Group)
for p, rsvp := range rf.relays {
if now.Add(rsvpExpirationSlack).Before(rsvp.Expiration) {
continue
}
for p := range rf.relays {
// if now.Add(rsvpExpirationSlack).Before(rsvp.Expiration) {
// continue
// }

p := p
g.Go(func() error {
err := rf.refreshRelayReservation(ctx, p)
rf.metricsTracer.ReservationRequestFinished(true, err == nil)
rf.metricsTracer.ReservationRequestFinished(true, err)

return err
})
Expand Down
Loading

0 comments on commit f2d358e

Please sign in to comment.