Skip to content

Commit

Permalink
rpk: introduce cluster partitions balancer-status
Browse files Browse the repository at this point in the history
This command will query the cluster via admin api
to retrieve information about the partition auto
balancer.
  • Loading branch information
r-vasquez committed Aug 2, 2022
1 parent 5d623ae commit f02d9c2
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 1 deletion.
38 changes: 38 additions & 0 deletions src/go/rpk/pkg/api/admin/api_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,45 @@ type ClusterHealthOverview struct {
LeaderlessPartitions []string `json:"leaderless_partitions"`
}

// PartitionBalancerStatus is the status of the partition auto balancer.
type PartitionBalancerStatus struct {
// Status is either off, ready, starting, in_progress or stalled.
//
// off: The balancer is disabled.
// ready: The balancer is active but there is nothing to do.
// starting: The balancer is starting but has not run yet.
// in_progress: The balancer is active and is in the process of
// scheduling partition movements.
// stalled: There are some violations, but for some reason, the
// balancer cannot make progress in mitigating them.
Status string `json:"status,omitempty"`
// Violations are the partition balancer violations.
Violations PartitionBalancerViolations `json:"violations,omitempty"`
// SecondsSinceLastTick is the last time the partition balancer ran.
SecondsSinceLastTick int `json:"seconds_since_last_tick,omitempty"`
// CurrentReassignmentsCount is the current number of partition
// reassignments in progress.
CurrentReassignmentsCount int `json:"current_reassignments_count,omitempty"`
}

// PartitionBalancerViolations describe the violations of the partition
// auto balancer.
type PartitionBalancerViolations struct {
// UnavailableNodes are the nodes that have been unavailable after a time
// set by 'partition_autobalancing_node_availability_timeout_sec' property.
UnavailableNodes []int `json:"unavailable_nodes,omitempty"`
// OverDiskLimitNodes are the nodes that surpassed the threshold of used
// disk percentage set by 'partition_autobalancing_max_disk_usage_percent'
// property.
OverDiskLimitNodes []int `json:"over_disk_limit_nodes,omitempty"`
}

func (a *AdminAPI) GetHealthOverview(ctx context.Context) (ClusterHealthOverview, error) {
var response ClusterHealthOverview
return response, a.sendAny(ctx, http.MethodGet, "/v1/cluster/health_overview", nil, &response)
}

func (a *AdminAPI) GetPartitionStatus(ctx context.Context) (PartitionBalancerStatus, error) {
var response PartitionBalancerStatus
return response, a.sendAny(ctx, http.MethodGet, "/v1/cluster/partition_balancer/status", nil, &response)
}
2 changes: 2 additions & 0 deletions src/go/rpk/pkg/cli/cmd/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/cluster/config"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/cluster/license"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/cluster/maintenance"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/cluster/partitions"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/common"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/group"
"github.com/spf13/afero"
Expand Down Expand Up @@ -60,6 +61,7 @@ func NewClusterCommand(fs afero.Fs) *cobra.Command {
config.NewConfigCommand(fs),
license.NewLicenseCommand(fs),
maintenance.NewMaintenanceCommand(fs),
partitions.NewPartitionsCommand(fs),
offsets,
)

Expand Down
43 changes: 43 additions & 0 deletions src/go/rpk/pkg/cli/cmd/cluster/partitions/partitions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package partitions

import (
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/common"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/spf13/afero"
"github.com/spf13/cobra"
)

func NewPartitionsCommand(fs afero.Fs) *cobra.Command {
var (
adminURL string
adminEnableTLS bool
adminCertFile string
adminKeyFile string
adminCAFile string
)

cmd := &cobra.Command{
Use: "partitions",
Args: cobra.ExactArgs(0),
Short: "Manage cluster partitions",
}

common.AddAdminAPITLSFlags(cmd,
&adminEnableTLS,
&adminCertFile,
&adminKeyFile,
&adminCAFile,
)

cmd.AddCommand(
NewBalancerStatusCommand(fs),
)

cmd.PersistentFlags().StringVar(
&adminURL,
config.FlagAdminHosts2,
"",
"Comma-separated list of admin API addresses (<IP>:<port>)")

return cmd
}
68 changes: 68 additions & 0 deletions src/go/rpk/pkg/cli/cmd/cluster/partitions/status.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package partitions

import (
"fmt"

"github.com/redpanda-data/redpanda/src/go/rpk/pkg/api/admin"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/out"
"github.com/spf13/afero"
"github.com/spf13/cobra"
)

func NewBalancerStatusCommand(fs afero.Fs) *cobra.Command {
cmd := &cobra.Command{
Use: "balancer-status",
Short: "Queries cluster for partition balancer status",
Long: `Queries cluster for partition balancer status:
FIELDS DESCRIPTION:
Status: Either off, ready, starting, in_progress or stalled.
Seconds Since Last Tick: The last time the partition balancer ran.
Current Reassignments Count: Current number of partition reassignments in progress.
Unavailable Nodes: The nodes that have been unavailable after a time set by 'partition_autobalancing_node_availability_timeout_sec' property.
Over Disk Limit Nodes: The nodes that surpassed the threshold of used disk percentage set by 'partition_autobalancing_max_disk_usage_percent' property.
BALANCER STATUS:
off: The balancer is disabled.
ready: The balancer is active but there is nothing to do.
starting: The balancer is starting but has not run yet.
in_progress: The balancer is active and is in the process of scheduling partition movements.
stalled: There are some violations, but for some reason, the balancer cannot make progress in mitigating them.
`,
Args: cobra.ExactArgs(0),
Run: func(cmd *cobra.Command, _ []string) {
p := config.ParamsFromCommand(cmd)
cfg, err := p.Load(fs)
out.MaybeDie(err, "unable to load config: %v", err)

cl, err := admin.NewClient(fs, cfg)
out.MaybeDie(err, "unable to initialize admin client: %v", err)

status, err := cl.GetPartitionStatus(cmd.Context())
out.MaybeDie(err, "unable to request balancer status: %v", err)

printBalancerStatus(status)
},
}

return cmd
}

func printBalancerStatus(pbs admin.PartitionBalancerStatus) {
format := `Status: %v
Seconds Since Last Tick: %v
Current Reassignment Count: %v
`
fmt.Printf(format, pbs.Status, pbs.SecondsSinceLastTick, pbs.CurrentReassignmentsCount)

v := pbs.Violations
if len(v.OverDiskLimitNodes) > 0 || len(v.UnavailableNodes) > 0 {
vFormat := `Unavailable Nodes: %v
Over Disk Limit Nodes: %v
`
fmt.Printf(vFormat, v.UnavailableNodes, v.OverDiskLimitNodes)
}
}
2 changes: 1 addition & 1 deletion src/go/rpk/pkg/cli/cmd/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func AddKafkaFlags(
"brokers",
[]string{},
"Comma-separated list of broker ip:port pairs (e.g."+
" --brokers '192.168.78.34:9092,192.168.78.35:9092,192.179.23.54:9092' )."+
" --brokers '192.168.78.34:9092,192.168.78.35:9092,192.179.23.54:9092')."+
" Alternatively, you may set the REDPANDA_BROKERS environment"+
" variable with the comma-separated list of broker addresses",
)
Expand Down

0 comments on commit f02d9c2

Please sign in to comment.