diff --git a/dashboards/autorelay/autorelay.json b/dashboards/autorelay/autorelay.json index dd7ee06ab1..e7a981381c 100644 --- a/dashboards/autorelay/autorelay.json +++ b/dashboards/autorelay/autorelay.json @@ -21,7 +21,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "9.3.6" + "version": "9.4.1" }, { "type": "panel", @@ -247,7 +247,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.3.6", + "pluginVersion": "9.4.1", "targets": [ { "datasource": { @@ -350,22 +350,7 @@ { "matcher": { "id": "byName", - "options": "refresh reservation: failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "new reservation: success" + "options": "new: success" }, "properties": [ { @@ -380,28 +365,13 @@ { "matcher": { "id": "byName", - "options": "refresh reservation: success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "new reservation: failed" + "options": "refresh: success" }, "properties": [ { "id": "color", "value": { - "fixedColor": "orange", + "fixedColor": "super-light-green", "mode": "fixed" } } @@ -440,7 +410,7 @@ "format": "time_series", "instant": false, "interval": "", - "legendFormat": "{{request_type}} reservation: {{outcome}}", + "legendFormat": "{{request_type}}: {{outcome}}", "range": true, "refId": "A" } @@ -506,7 +476,7 @@ }, "textMode": "auto" }, - "pluginVersion": "9.3.6", + "pluginVersion": "9.4.1", "targets": [ { "datasource": { @@ -667,7 +637,7 @@ }, "textMode": "auto" }, - "pluginVersion": "9.3.6", + "pluginVersion": "9.4.1", "targets": [ { "datasource": { @@ -742,7 +712,7 @@ }, "textMode": "auto" }, - "pluginVersion": "9.3.6", + "pluginVersion": "9.4.1", "targets": [ { "datasource": { @@ -868,20 +838,22 @@ "type": "timeseries" } ], - "schemaVersion": 37, + "refresh": "", + "revision": 1, + "schemaVersion": 38, "style": "dark", "tags": [], "templating": { "list": [] }, "time": { - "from": "now-1h", + "from": "now-5m", "to": "now" }, "timepicker": {}, "timezone": "", "title": "libp2p Autorelay", "uid": "deQ_uf-4k", - "version": 5, + "version": 3, "weekStart": "" } \ No newline at end of file diff --git a/p2p/host/autorelay/metrics.go b/p2p/host/autorelay/metrics.go index a83b2df4b5..8a56c705c1 100644 --- a/p2p/host/autorelay/metrics.go +++ b/p2p/host/autorelay/metrics.go @@ -1,7 +1,11 @@ package autorelay import ( + "errors" + "github.com/libp2p/go-libp2p/p2p/metricshelper" + "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/client" + pbv2 "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/pb" "github.com/prometheus/client_golang/prometheus" ) @@ -104,7 +108,7 @@ type MetricsTracer interface { RelayFinderStatus(isActive bool) ReservationEnded() - ReservationRequestFinished(isRefresh bool, success bool) + ReservationRequestFinished(isRefresh bool, err error) RelayAddressCount(int) RelayAddressUpdated() @@ -146,9 +150,7 @@ func NewMetricsTracer(opts ...MetricsTracerOption) MetricsTracer { // Initialise these counters to 0 otherwise the first reservation requests aren't handled // correctly when using promql increse function reservationRequestsOutcomeTotal.WithLabelValues("refresh", "success") - reservationRequestsOutcomeTotal.WithLabelValues("refresh", "failed") reservationRequestsOutcomeTotal.WithLabelValues("new", "success") - reservationRequestsOutcomeTotal.WithLabelValues("new", "failed") candidatesCircuitV2SupportTotal.WithLabelValues("yes") candidatesCircuitV2SupportTotal.WithLabelValues("no") return &metricsTracer{} @@ -166,7 +168,7 @@ func (mt *metricsTracer) ReservationEnded() { reservationsClosedTotal.Inc() } -func (mt *metricsTracer) ReservationRequestFinished(isRefresh bool, success bool) { +func (mt *metricsTracer) ReservationRequestFinished(isRefresh bool, err error) { tags := metricshelper.GetStringSlice() defer metricshelper.PutStringSlice(tags) @@ -175,15 +177,10 @@ func (mt *metricsTracer) ReservationRequestFinished(isRefresh bool, success bool } else { *tags = append(*tags, "new") } - - if success { - *tags = append(*tags, "success") - } else { - *tags = append(*tags, "failed") - } + *tags = append(*tags, getReservationRequestStatus(err)) reservationRequestsOutcomeTotal.WithLabelValues(*tags...).Inc() - if !isRefresh && success { + if !isRefresh && err == nil { reservationsOpenedTotal.Inc() } } @@ -245,6 +242,30 @@ func (mt *metricsTracer) DesiredReservations(cnt int) { desiredReservations.Set(float64(cnt)) } +func getReservationRequestStatus(err error) string { + if err == nil { + return "success" + } + + status := "err other" + var re client.ReservationError + if errors.As(err, &re) { + switch re.Status { + case pbv2.Status_CONNECTION_FAILED: + return "connection failed" + case pbv2.Status_MALFORMED_MESSAGE: + return "malformed message" + case pbv2.Status_RESERVATION_REFUSED: + return "reservation refused" + case pbv2.Status_PERMISSION_DENIED: + return "permission denied" + case pbv2.Status_RESOURCE_LIMIT_EXCEEDED: + return "resource limit exceeded" + } + } + return status +} + // wrappedMetricsTracer wraps MetricsTracer and ignores all calls when mt is nil type wrappedMetricsTracer struct { mt MetricsTracer @@ -264,9 +285,9 @@ func (mt *wrappedMetricsTracer) ReservationEnded() { } } -func (mt *wrappedMetricsTracer) ReservationRequestFinished(isRefresh bool, success bool) { +func (mt *wrappedMetricsTracer) ReservationRequestFinished(isRefresh bool, err error) { if mt.mt != nil { - mt.mt.ReservationRequestFinished(isRefresh, success) + mt.mt.ReservationRequestFinished(isRefresh, err) } } diff --git a/p2p/host/autorelay/metrics_noalloc_test.go b/p2p/host/autorelay/metrics_noalloc_test.go index c6abfafff0..de3492094e 100644 --- a/p2p/host/autorelay/metrics_noalloc_test.go +++ b/p2p/host/autorelay/metrics_noalloc_test.go @@ -6,6 +6,9 @@ import ( "math/rand" "testing" "time" + + "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/client" + pbv2 "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/pb" ) func getRandScheduledWork() scheduledWorkTimes { @@ -26,11 +29,16 @@ func TestMetricsNoAllocNoCover(t *testing.T) { for i := 0; i < 10; i++ { scheduledWork = append(scheduledWork, getRandScheduledWork()) } + errs := []error{ + client.ReservationError{Status: pbv2.Status_MALFORMED_MESSAGE}, + client.ReservationError{Status: pbv2.Status_MALFORMED_MESSAGE}, + nil, + } tr := NewMetricsTracer() tests := map[string]func(){ "RelayFinderStatus": func() { tr.RelayFinderStatus(rand.Intn(2) == 1) }, "ReservationEnded": func() { tr.ReservationEnded() }, - "ReservationRequestFinished": func() { tr.ReservationRequestFinished(rand.Intn(2) == 1, rand.Intn(2) == 1) }, + "ReservationRequestFinished": func() { tr.ReservationRequestFinished(rand.Intn(2) == 1, errs[rand.Intn(len(errs))]) }, "RelayAddressCount": func() { tr.RelayAddressCount(rand.Intn(10)) }, "RelayAddressUpdated": func() { tr.RelayAddressUpdated() }, "CandidateChecked": func() { tr.CandidateChecked(rand.Intn(2) == 1) }, diff --git a/p2p/host/autorelay/relay_finder.go b/p2p/host/autorelay/relay_finder.go index d839ac70f0..1f9bda5784 100644 --- a/p2p/host/autorelay/relay_finder.go +++ b/p2p/host/autorelay/relay_finder.go @@ -539,7 +539,7 @@ func (rf *relayFinder) maybeConnectToRelay(ctx context.Context) { if err != nil { log.Debugw("failed to connect to relay", "peer", id, "error", err) rf.notifyMaybeNeedNewCandidates() - rf.metricsTracer.ReservationRequestFinished(false, false) + rf.metricsTracer.ReservationRequestFinished(false, err) continue } log.Debugw("adding new relay", "id", id) @@ -556,7 +556,7 @@ func (rf *relayFinder) maybeConnectToRelay(ctx context.Context) { default: } - rf.metricsTracer.ReservationRequestFinished(false, true) + rf.metricsTracer.ReservationRequestFinished(false, nil) if numRelays >= rf.conf.desiredRelays { break @@ -603,15 +603,15 @@ func (rf *relayFinder) refreshReservations(ctx context.Context, now time.Time) b // find reservations about to expire and refresh them in parallel g := new(errgroup.Group) - for p, rsvp := range rf.relays { - if now.Add(rsvpExpirationSlack).Before(rsvp.Expiration) { - continue - } + for p := range rf.relays { + // if now.Add(rsvpExpirationSlack).Before(rsvp.Expiration) { + // continue + // } p := p g.Go(func() error { err := rf.refreshRelayReservation(ctx, p) - rf.metricsTracer.ReservationRequestFinished(true, err == nil) + rf.metricsTracer.ReservationRequestFinished(true, err) return err }) diff --git a/p2p/protocol/circuitv2/client/reservation.go b/p2p/protocol/circuitv2/client/reservation.go index 1cd451ad13..dbb9241937 100644 --- a/p2p/protocol/circuitv2/client/reservation.go +++ b/p2p/protocol/circuitv2/client/reservation.go @@ -37,6 +37,27 @@ type Reservation struct { Voucher *proto.ReservationVoucher } +// ReservationError is the error returned on failure to reserve a slot in the relay +type ReservationError struct { + + // Status is the status returned by the relay for rejecting the reservation + // request. It is set to pbv2.Status_CONNECTION_FAILED on other failures + Status pbv2.Status + + // Reason is the reason for reservation failure + Reason string + + err error +} + +func (re ReservationError) Error() string { + return fmt.Sprintf("reservation error: status: %s reason: %s err: %s", pbv2.Status_name[int32(re.Status)], re.Reason, re.err) +} + +func (re ReservationError) Unwrap() error { + return re.err +} + // Reserve reserves a slot in a relay and returns the reservation information. // Clients must reserve slots in order for the relay to relay connections to them. func Reserve(ctx context.Context, h host.Host, ai peer.AddrInfo) (*Reservation, error) { @@ -46,7 +67,7 @@ func Reserve(ctx context.Context, h host.Host, ai peer.AddrInfo) (*Reservation, s, err := h.NewStream(ctx, ai.ID, proto.ProtoIDv2Hop) if err != nil { - return nil, err + return nil, ReservationError{Status: pbv2.Status_CONNECTION_FAILED, Reason: "failed to open stream", err: err} } defer s.Close() @@ -61,33 +82,39 @@ func Reserve(ctx context.Context, h host.Host, ai peer.AddrInfo) (*Reservation, if err := wr.WriteMsg(&msg); err != nil { s.Reset() - return nil, fmt.Errorf("error writing reservation message: %w", err) + return nil, ReservationError{Status: pbv2.Status_CONNECTION_FAILED, Reason: "error writing reservation message", err: err} } msg.Reset() if err := rd.ReadMsg(&msg); err != nil { s.Reset() - return nil, fmt.Errorf("error reading reservation response message: %w", err) + return nil, ReservationError{Status: pbv2.Status_CONNECTION_FAILED, Reason: "error reading reservation response message: %w", err: err} } if msg.GetType() != pbv2.HopMessage_STATUS { - return nil, fmt.Errorf("unexpected relay response: not a status message (%d)", msg.GetType()) + return nil, ReservationError{ + Status: pbv2.Status_MALFORMED_MESSAGE, + Reason: fmt.Sprintf("unexpected relay response: not a status message (%d)", msg.GetType()), + err: err} } if status := msg.GetStatus(); status != pbv2.Status_OK { - return nil, fmt.Errorf("reservation failed: %s (%d)", pbv2.Status_name[int32(status)], status) + return nil, ReservationError{Status: msg.GetStatus(), Reason: "reservation failed"} } rsvp := msg.GetReservation() if rsvp == nil { - return nil, fmt.Errorf("missing reservation info") + return nil, ReservationError{Status: pbv2.Status_MALFORMED_MESSAGE, Reason: "missing reservation info"} } result := &Reservation{} result.Expiration = time.Unix(int64(rsvp.GetExpire()), 0) if result.Expiration.Before(time.Now()) { - return nil, fmt.Errorf("received reservation with expiration date in the past: %s", result.Expiration) + return nil, ReservationError{ + Status: pbv2.Status_MALFORMED_MESSAGE, + Reason: fmt.Sprintf("received reservation with expiration date in the past: %s", result.Expiration), + } } addrs := rsvp.GetAddrs() @@ -105,12 +132,19 @@ func Reserve(ctx context.Context, h host.Host, ai peer.AddrInfo) (*Reservation, if voucherBytes != nil { _, rec, err := record.ConsumeEnvelope(voucherBytes, proto.RecordDomain) if err != nil { - return nil, fmt.Errorf("error consuming voucher envelope: %w", err) + return nil, ReservationError{ + Status: pbv2.Status_MALFORMED_MESSAGE, + Reason: fmt.Sprintf("error consuming voucher envelope: %s", err), + err: err, + } } voucher, ok := rec.(*proto.ReservationVoucher) if !ok { - return nil, fmt.Errorf("unexpected voucher record type: %+T", rec) + return nil, ReservationError{ + Status: pbv2.Status_MALFORMED_MESSAGE, + Reason: fmt.Sprintf("unexpected voucher record type: %+T", rec), + } } result.Voucher = voucher } diff --git a/p2p/protocol/circuitv2/client/reservation_test.go b/p2p/protocol/circuitv2/client/reservation_test.go index 31e2865448..decb3e71de 100644 --- a/p2p/protocol/circuitv2/client/reservation_test.go +++ b/p2p/protocol/circuitv2/client/reservation_test.go @@ -2,6 +2,7 @@ package client_test import ( "context" + "errors" "math" "testing" "time" @@ -22,6 +23,7 @@ func TestReservationFailures(t *testing.T) { name string streamHandler network.StreamHandler err string + status pbv2.Status } testcases := []testcase{ { @@ -36,7 +38,8 @@ func TestReservationFailures(t *testing.T) { Type: pbv2.HopMessage_RESERVE.Enum(), }) }, - err: "unexpected relay response: not a status message", + err: "unexpected relay response: not a status message", + status: pbv2.Status_MALFORMED_MESSAGE, }, { name: "unknown status", @@ -47,7 +50,8 @@ func TestReservationFailures(t *testing.T) { Status: &status, }) }, - err: "reservation failed", + err: "reservation failed", + status: pbv2.Status(1337), }, { name: "invalid time", @@ -60,7 +64,8 @@ func TestReservationFailures(t *testing.T) { Reservation: &pbv2.Reservation{Expire: &expire}, }) }, - err: "received reservation with expiration date in the past", + err: "received reservation with expiration date in the past", + status: pbv2.Status_MALFORMED_MESSAGE, }, { name: "invalid voucher", @@ -76,7 +81,8 @@ func TestReservationFailures(t *testing.T) { }, }) }, - err: "error consuming voucher envelope: failed when unmarshalling the envelope", + err: "error consuming voucher envelope: failed when unmarshalling the envelope", + status: pbv2.Status_MALFORMED_MESSAGE, }, } @@ -98,6 +104,15 @@ func TestReservationFailures(t *testing.T) { } else { require.Error(t, err) require.Contains(t, err.Error(), tc.err) + if tc.status != 0 { + var re client.ReservationError + if !errors.As(err, &re) { + t.Errorf("expected error to be of type %T", re) + } + if re.Status != tc.status { + t.Errorf("expected status %d got %d", tc.status, re.Status) + } + } } }) }