diff --git a/ring/replication_strategy.go b/ring/replication_strategy.go index e572cb77a..44e05a538 100644 --- a/ring/replication_strategy.go +++ b/ring/replication_strategy.go @@ -2,9 +2,8 @@ package ring import ( "fmt" + "strings" "time" - - "github.com/pkg/errors" ) type ReplicationStrategy interface { @@ -40,10 +39,12 @@ func (s *defaultReplicationStrategy) Filter(instances []InstanceDesc, op Operati // Skip those that have not heartbeated in a while. NB these are still // included in the calculation of minSuccess, so if too many failed instances // will cause the whole write to fail. + var unhealthy []string for i := 0; i < len(instances); { if instances[i].IsHealthy(op, heartbeatTimeout, now) { i++ } else { + unhealthy = append(unhealthy, instances[i].Addr) instances = append(instances[:i], instances[i+1:]...) } } @@ -52,11 +53,15 @@ func (s *defaultReplicationStrategy) Filter(instances []InstanceDesc, op Operati // after filtering out dead ones, don't even bother trying. if len(instances) < minSuccess { var err error + var unhealthyStr string + if len(unhealthy) > 0 { + unhealthyStr = fmt.Sprintf(" - unhealthy instances: %s", strings.Join(unhealthy, ",")) + } if zoneAwarenessEnabled { - err = fmt.Errorf("at least %d live replicas required across different availability zones, could only find %d", minSuccess, len(instances)) + err = fmt.Errorf("at least %d live replicas required across different availability zones, could only find %d%s", minSuccess, len(instances), unhealthyStr) } else { - err = fmt.Errorf("at least %d live replicas required, could only find %d", minSuccess, len(instances)) + err = fmt.Errorf("at least %d live replicas required, could only find %d%s", minSuccess, len(instances), unhealthyStr) } return nil, 0, err @@ -74,17 +79,23 @@ func NewIgnoreUnhealthyInstancesReplicationStrategy() ReplicationStrategy { func (r *ignoreUnhealthyInstancesReplicationStrategy) Filter(instances []InstanceDesc, op Operation, _ int, heartbeatTimeout time.Duration, _ bool) (healthy []InstanceDesc, maxFailures int, err error) { now := time.Now() // Filter out unhealthy instances. + var unhealthy []string for i := 0; i < len(instances); { if instances[i].IsHealthy(op, heartbeatTimeout, now) { i++ } else { + unhealthy = append(unhealthy, instances[i].Addr) instances = append(instances[:i], instances[i+1:]...) } } // We need at least 1 healthy instance no matter what is the replication factor set to. if len(instances) == 0 { - return nil, 0, errors.New("at least 1 healthy replica required, could only find 0") + var unhealthyStr string + if len(unhealthy) > 0 { + unhealthyStr = fmt.Sprintf(" - unhealthy instances: %s", strings.Join(unhealthy, ",")) + } + return nil, 0, fmt.Errorf("at least 1 healthy replica required, could only find 0%s", unhealthyStr) } return instances, len(instances) - 1, nil diff --git a/ring/replication_strategy_test.go b/ring/replication_strategy_test.go index 0bce73350..1fe5d0e91 100644 --- a/ring/replication_strategy_test.go +++ b/ring/replication_strategy_test.go @@ -24,7 +24,7 @@ func TestRingReplicationStrategy(t *testing.T) { { replicationFactor: 1, deadIngesters: 1, - expectedError: "at least 1 live replicas required, could only find 0", + expectedError: "at least 1 live replicas required, could only find 0 - unhealthy instances: dead1", }, // Ensure it works for RF=3 and 2 ingesters. @@ -52,7 +52,7 @@ func TestRingReplicationStrategy(t *testing.T) { replicationFactor: 3, liveIngesters: 1, deadIngesters: 2, - expectedError: "at least 2 live replicas required, could only find 1", + expectedError: "at least 2 live replicas required, could only find 1 - unhealthy instances: dead1,dead2", }, // Ensure it works when adding / removing nodes. @@ -75,7 +75,7 @@ func TestRingReplicationStrategy(t *testing.T) { replicationFactor: 3, liveIngesters: 2, deadIngesters: 2, - expectedError: "at least 3 live replicas required, could only find 2", + expectedError: "at least 3 live replicas required, could only find 2 - unhealthy instances: dead1,dead2", }, } { ingesters := []InstanceDesc{} @@ -85,7 +85,7 @@ func TestRingReplicationStrategy(t *testing.T) { }) } for i := 0; i < tc.deadIngesters; i++ { - ingesters = append(ingesters, InstanceDesc{}) + ingesters = append(ingesters, InstanceDesc{Addr: fmt.Sprintf("dead%d", i+1)}) } t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) { @@ -137,7 +137,7 @@ func TestIgnoreUnhealthyInstancesReplicationStrategy(t *testing.T) { liveIngesters: 0, deadIngesters: 3, expectedMaxFailure: 0, - expectedError: "at least 1 healthy replica required, could only find 0", + expectedError: "at least 1 healthy replica required, could only find 0 - unhealthy instances: dead1,dead2,dead3", }, } { ingesters := []InstanceDesc{} @@ -147,7 +147,7 @@ func TestIgnoreUnhealthyInstancesReplicationStrategy(t *testing.T) { }) } for i := 0; i < tc.deadIngesters; i++ { - ingesters = append(ingesters, InstanceDesc{}) + ingesters = append(ingesters, InstanceDesc{Addr: fmt.Sprintf("dead%d", i+1)}) } t.Run(tc.name, func(t *testing.T) {