Skip to content

Commit

Permalink
rpk: introduce cluster partitions balancer-status
Browse files Browse the repository at this point in the history
This command will query the cluster via admin api
to retrieve information about the partition auto
balancer.
  • Loading branch information
r-vasquez committed Aug 5, 2022
1 parent ff6bc3f commit 10a3c79
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 1 deletion.
38 changes: 38 additions & 0 deletions src/go/rpk/pkg/api/admin/api_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,45 @@ type ClusterHealthOverview struct {
LeaderlessPartitions []string `json:"leaderless_partitions"`
}

// PartitionBalancerStatus is the status of the partition auto balancer.
type PartitionBalancerStatus struct {
// Status is either off, ready, starting, in_progress or stalled.
//
// off: The balancer is disabled.
// ready: The balancer is active but there is nothing to do.
// starting: The balancer is starting but has not run yet.
// in_progress: The balancer is active and is in the process of
// scheduling partition movements.
// stalled: There are some violations, but for some reason, the
// balancer cannot make progress in mitigating them.
Status string `json:"status,omitempty"`
// Violations are the partition balancer violations.
Violations PartitionBalancerViolations `json:"violations,omitempty"`
// SecondsSinceLastTick is the last time the partition balancer ran.
SecondsSinceLastTick int `json:"seconds_since_last_tick,omitempty"`
// CurrentReassignmentsCount is the current number of partition
// reassignments in progress.
CurrentReassignmentsCount int `json:"current_reassignments_count,omitempty"`
}

// PartitionBalancerViolations describe the violations of the partition
// auto balancer.
type PartitionBalancerViolations struct {
// UnavailableNodes are the nodes that have been unavailable after a time
// set by 'partition_autobalancing_node_availability_timeout_sec' property.
UnavailableNodes []int `json:"unavailable_nodes,omitempty"`
// OverDiskLimitNodes are the nodes that surpassed the threshold of used
// disk percentage set by 'partition_autobalancing_max_disk_usage_percent'
// property.
OverDiskLimitNodes []int `json:"over_disk_limit_nodes,omitempty"`
}

func (a *AdminAPI) GetHealthOverview(ctx context.Context) (ClusterHealthOverview, error) {
var response ClusterHealthOverview
return response, a.sendAny(ctx, http.MethodGet, "/v1/cluster/health_overview", nil, &response)
}

func (a *AdminAPI) GetPartitionStatus(ctx context.Context) (PartitionBalancerStatus, error) {
var response PartitionBalancerStatus
return response, a.sendAny(ctx, http.MethodGet, "/v1/cluster/partition_balancer/status", nil, &response)
}
2 changes: 2 additions & 0 deletions src/go/rpk/pkg/cli/cmd/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/cluster/config"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/cluster/license"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/cluster/maintenance"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/cluster/partitions"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/common"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/group"
"github.com/spf13/afero"
Expand Down Expand Up @@ -61,6 +62,7 @@ func NewClusterCommand(fs afero.Fs) *cobra.Command {
config.NewConfigCommand(fs),
license.NewLicenseCommand(fs),
maintenance.NewMaintenanceCommand(fs),
partitions.NewPartitionsCommand(fs),
offsets,
)

Expand Down
43 changes: 43 additions & 0 deletions src/go/rpk/pkg/cli/cmd/cluster/partitions/partitions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package partitions

import (
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/cli/cmd/common"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/spf13/afero"
"github.com/spf13/cobra"
)

func NewPartitionsCommand(fs afero.Fs) *cobra.Command {
var (
adminURL string
adminEnableTLS bool
adminCertFile string
adminKeyFile string
adminCAFile string
)

cmd := &cobra.Command{
Use: "partitions",
Args: cobra.ExactArgs(0),
Short: "Manage cluster partitions",
}

common.AddAdminAPITLSFlags(cmd,
&adminEnableTLS,
&adminCertFile,
&adminKeyFile,
&adminCAFile,
)

cmd.AddCommand(
NewBalancerStatusCommand(fs),
)

cmd.PersistentFlags().StringVar(
&adminURL,
config.FlagAdminHosts2,
"",
"Comma-separated list of admin API addresses (<IP>:<port>)")

return cmd
}
79 changes: 79 additions & 0 deletions src/go/rpk/pkg/cli/cmd/cluster/partitions/status.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package partitions

import (
"fmt"

"github.com/redpanda-data/redpanda/src/go/rpk/pkg/api/admin"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/out"
"github.com/spf13/afero"
"github.com/spf13/cobra"
)

func NewBalancerStatusCommand(fs afero.Fs) *cobra.Command {
cmd := &cobra.Command{
Use: "balancer-status",
Short: "Queries cluster for partition balancer status",
Long: `Queries cluster for partition balancer status:
If continuous partition balancing is enabled, redpanda will continuously
reassign partitions from unavailable nodes and from nodes using more disk space
than configured limit.
This command can be used to monitor the partition balancer status.
FIELDS
Status: Either off, ready, starting, in_progress or stalled.
Seconds Since Last Tick: The last time the partition balancer ran.
Current Reassignments Count: Current number of partition reassignments in progress.
Unavailable Nodes: The nodes that have been unavailable after a time set
by the 'partition_autobalancing_node_availability_timeout_sec'
cluster property.
Over Disk Limit Nodes: The nodes that surpassed the threshold of used disk
percentage set by the 'partition_autobalancing_max_disk_usage_percent'
cluster property.
BALANCER STATUS
off: The balancer is disabled.
ready: The balancer is active but there is nothing to do.
starting: The balancer is starting but has not run yet.
in_progress: The balancer is active and is in the process of scheduling partition movements.
stalled: There are some violations, but for some reason, the balancer cannot make
progress in mitigating them.
`,
Args: cobra.ExactArgs(0),
Run: func(cmd *cobra.Command, _ []string) {
p := config.ParamsFromCommand(cmd)
cfg, err := p.Load(fs)
out.MaybeDie(err, "unable to load config: %v", err)

cl, err := admin.NewClient(fs, cfg)
out.MaybeDie(err, "unable to initialize admin client: %v", err)

status, err := cl.GetPartitionStatus(cmd.Context())
out.MaybeDie(err, "unable to request balancer status: %v", err)

printBalancerStatus(status)
},
}

return cmd
}

func printBalancerStatus(pbs admin.PartitionBalancerStatus) {
const format = `Status: %v
Seconds Since Last Tick: %v
Current Reassignment Count: %v
`
fmt.Printf(format, pbs.Status, pbs.SecondsSinceLastTick, pbs.CurrentReassignmentsCount)

v := pbs.Violations
if len(v.OverDiskLimitNodes) > 0 || len(v.UnavailableNodes) > 0 {
const vFormat = `Unavailable Nodes: %v
Over Disk Limit Nodes: %v
`
fmt.Printf(vFormat, v.UnavailableNodes, v.OverDiskLimitNodes)
}
}
2 changes: 1 addition & 1 deletion src/go/rpk/pkg/cli/cmd/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func AddKafkaFlags(
"brokers",
[]string{},
"Comma-separated list of broker ip:port pairs (e.g."+
" --brokers '192.168.78.34:9092,192.168.78.35:9092,192.179.23.54:9092' )."+
" --brokers '192.168.78.34:9092,192.168.78.35:9092,192.179.23.54:9092')."+
" Alternatively, you may set the REDPANDA_BROKERS environment"+
" variable with the comma-separated list of broker addresses",
)
Expand Down

0 comments on commit 10a3c79

Please sign in to comment.