Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partition autobalancer full disk test #5839

Merged
2 changes: 2 additions & 0 deletions src/v/cluster/controller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ ss::future<> controller::start() {
.partition_autobalancing_node_availability_timeout_sec.bind(),
config::shard_local_cfg()
.partition_autobalancing_max_disk_usage_percent.bind(),
config::shard_local_cfg()
.storage_space_alert_free_threshold_percent.bind(),
config::shard_local_cfg()
.partition_autobalancing_tick_interval_ms.bind(),
config::shard_local_cfg()
Expand Down
31 changes: 26 additions & 5 deletions src/v/cluster/node/local_monitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,14 @@ local_monitor::local_monitor(
, _free_percent_alert_threshold(alert_percent)
, _min_free_bytes(min_bytes)
, _storage_node_api(node_api)
, _storage_api(api) {}
, _storage_api(api) {
// Intentionally undocumented environment variable, only for use
// in integration tests.
const char* test_disk_size_str = std::getenv("__REDPANDA_TEST_DISK_SIZE");
if (test_disk_size_str) {
_disk_size_for_test = std::stoul(std::string(test_disk_size_str));
}
}

ss::future<> local_monitor::update_state() {
// grab new snapshot of local state
Expand Down Expand Up @@ -89,11 +96,25 @@ ss::future<std::vector<storage::disk>> local_monitor::get_disks() {

auto svfs = co_await get_statvfs(path);

// f_bsize is a historical linux-ism, use f_frsize
uint64_t free = svfs.f_bfree * svfs.f_frsize;
uint64_t total = svfs.f_blocks * svfs.f_frsize;

if (_disk_size_for_test) {
uint64_t used = total - free;
vassert(
used < *_disk_size_for_test,
"mock disk size {} must be > used size {}",
*_disk_size_for_test,
used);
total = *_disk_size_for_test;
free = total - used;
}

co_return std::vector<storage::disk>{storage::disk{
.path = config::node().data_directory().as_sstring(),
// f_bsize is a historical linux-ism, use f_frsize
.free = svfs.f_bfree * svfs.f_frsize,
.total = svfs.f_blocks * svfs.f_frsize,
.free = free,
.total = total,
}};
}

Expand Down Expand Up @@ -196,4 +217,4 @@ ss::future<> local_monitor::update_disk_metrics() {
&storage::node_api::set_disk_metrics, total_space, free_space, alert);
}

} // namespace cluster::node
} // namespace cluster::node
4 changes: 3 additions & 1 deletion src/v/cluster/node/local_monitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ class local_monitor {
// Injection points for unit tests
ss::sstring _path_for_test;
std::function<struct statvfs(const ss::sstring)> _statvfs_for_test;

std::optional<size_t> _disk_size_for_test;
};

} // namespace cluster::node
} // namespace cluster::node
9 changes: 8 additions & 1 deletion src/v/cluster/partition_balancer_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ partition_balancer_backend::partition_balancer_backend(
config::binding<model::partition_autobalancing_mode>&& mode,
config::binding<std::chrono::seconds>&& availability_timeout,
config::binding<unsigned>&& max_disk_usage_percent,
config::binding<unsigned>&& storage_space_alert_free_threshold_percent,
config::binding<std::chrono::milliseconds>&& tick_interval,
config::binding<size_t>&& movement_batch_size_bytes)
: _raft0(std::move(raft0))
Expand All @@ -49,6 +50,8 @@ partition_balancer_backend::partition_balancer_backend(
, _mode(std::move(mode))
, _availability_timeout(std::move(availability_timeout))
, _max_disk_usage_percent(std::move(max_disk_usage_percent))
, _storage_space_alert_free_threshold_percent(
std::move(storage_space_alert_free_threshold_percent))
, _tick_interval(std::move(tick_interval))
, _movement_batch_size_bytes(std::move(movement_batch_size_bytes))
, _timer([this] { tick(); }) {}
Expand Down Expand Up @@ -119,10 +122,14 @@ ss::future<> partition_balancer_backend::do_tick() {
= co_await _health_monitor.get_current_cluster_health_snapshot(
cluster_report_filter{});

double soft_max_disk_usage_ratio = _max_disk_usage_percent() / 100.0;
double hard_max_disk_usage_ratio
= (100 - _storage_space_alert_free_threshold_percent()) / 100.0;
auto plan_data
= partition_balancer_planner(
planner_config{
.max_disk_usage_ratio = _max_disk_usage_percent() / 100.0,
.soft_max_disk_usage_ratio = soft_max_disk_usage_ratio,
.hard_max_disk_usage_ratio = hard_max_disk_usage_ratio,
.movement_disk_size_batch = _movement_batch_size_bytes(),
.node_availability_timeout_sec = _availability_timeout(),
},
Expand Down
2 changes: 2 additions & 0 deletions src/v/cluster/partition_balancer_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class partition_balancer_backend {
config::binding<model::partition_autobalancing_mode>&& mode,
config::binding<std::chrono::seconds>&& availability_timeout,
config::binding<unsigned>&& max_disk_usage_percent,
config::binding<unsigned>&& storage_space_alert_free_threshold_percent,
config::binding<std::chrono::milliseconds>&& tick_interval,
config::binding<size_t>&& movement_batch_size_bytes);

Expand Down Expand Up @@ -76,6 +77,7 @@ class partition_balancer_backend {
config::binding<model::partition_autobalancing_mode> _mode;
config::binding<std::chrono::seconds> _availability_timeout;
config::binding<unsigned> _max_disk_usage_percent;
config::binding<unsigned> _storage_space_alert_free_threshold_percent;
config::binding<std::chrono::milliseconds> _tick_interval;
config::binding<size_t> _movement_batch_size_bytes;

Expand Down
Loading