From 3cbcaf11ce9c5c386f7a8b53fe690a53f257a6b3 Mon Sep 17 00:00:00 2001 From: Rob Blafford Date: Fri, 15 Jul 2022 15:30:21 -0400 Subject: [PATCH 1/7] cluster: Configurable license nag interval - This is exclusively for use within ducktape tests, so the log may be printed sooner then on a 5min interval - Users could not use this to sidestep the nag as the source takes the min of the passed in value and 5min. --- src/v/cluster/feature_manager.cc | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/v/cluster/feature_manager.cc b/src/v/cluster/feature_manager.cc index faf855e8d184..35d5b6b9569e 100644 --- a/src/v/cluster/feature_manager.cc +++ b/src/v/cluster/feature_manager.cc @@ -163,7 +163,25 @@ ss::future<> feature_manager::stop() { } ss::future<> feature_manager::maybe_log_license_check_info() { - static constexpr std::chrono::seconds license_check_retry = 5min; + auto license_check_retry = std::chrono::seconds(60 * 5); + auto interval_override = std::getenv( + "__REDPANDA_LICENSE_CHECK_INTERVAL_SEC"); + if (interval_override != nullptr) { + try { + license_check_retry = std::min( + std::chrono::seconds{license_check_retry}, + std::chrono::seconds{std::stoi(interval_override)}); + vlog( + clusterlog.info, + "Overriding default license log annoy interval to: {}s", + license_check_retry.count()); + } catch (...) { + vlog( + clusterlog.error, + "Invalid license check interval override '{}'", + interval_override); + } + } const auto& cfg = config::shard_local_cfg(); std::stringstream warn_ss; if (cfg.cloud_storage_enabled) { From 678d4d66cf5019e715e1683c5a994a21a621d985 Mon Sep 17 00:00:00 2001 From: Rob Blafford Date: Fri, 15 Jul 2022 15:32:55 -0400 Subject: [PATCH 2/7] cluster: Don't nag messages until feature enabled - The cluster must be fully upgraded to the min supported version for license checks before any license nags are printed to the logs. --- src/v/cluster/feature_manager.cc | 34 ++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/v/cluster/feature_manager.cc b/src/v/cluster/feature_manager.cc index 35d5b6b9569e..668dd2e1edcb 100644 --- a/src/v/cluster/feature_manager.cc +++ b/src/v/cluster/feature_manager.cc @@ -182,21 +182,25 @@ ss::future<> feature_manager::maybe_log_license_check_info() { interval_override); } } - const auto& cfg = config::shard_local_cfg(); - std::stringstream warn_ss; - if (cfg.cloud_storage_enabled) { - fmt::print(warn_ss, "{}", "Tired Storage(cloud_storage)"); - } - const auto& warn_log = warn_ss.str(); - if (!warn_log.empty()) { - const auto& license = _feature_table.local().get_license(); - if (!license || license->is_expired()) { - vlog( - clusterlog.warn, - "Enterprise feature(s) {} detected as enabled without a valid " - "license, please contact support and/or upload a valid redpanda " - "license", - warn_log); + if (_feature_table.local().is_active(feature::license)) { + const auto& cfg = config::shard_local_cfg(); + std::stringstream warn_ss; + if (cfg.cloud_storage_enabled) { + fmt::print(warn_ss, "{}", "Tired Storage(cloud_storage)"); + } + const auto& warn_log = warn_ss.str(); + if (!warn_log.empty()) { + const auto& license = _feature_table.local().get_license(); + if (!license || license->is_expired()) { + vlog( + clusterlog.warn, + "Enterprise feature(s) {} detected as enabled without a " + "valid " + "license, please contact support and/or upload a valid " + "redpanda " + "license", + warn_log); + } } } try { From b539a1b7a8a55db7273863f1a7b34a995ba0d9b2 Mon Sep 17 00:00:00 2001 From: Rob Blafford Date: Fri, 15 Jul 2022 15:34:13 -0400 Subject: [PATCH 3/7] cluster: Fix typo in nag message - Tired -> Tiered --- src/v/cluster/feature_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/v/cluster/feature_manager.cc b/src/v/cluster/feature_manager.cc index 668dd2e1edcb..d66ec10f8060 100644 --- a/src/v/cluster/feature_manager.cc +++ b/src/v/cluster/feature_manager.cc @@ -186,7 +186,7 @@ ss::future<> feature_manager::maybe_log_license_check_info() { const auto& cfg = config::shard_local_cfg(); std::stringstream warn_ss; if (cfg.cloud_storage_enabled) { - fmt::print(warn_ss, "{}", "Tired Storage(cloud_storage)"); + fmt::print(warn_ss, "{}", "Tiered Storage(cloud_storage)"); } const auto& warn_log = warn_ss.str(); if (!warn_log.empty()) { From 8daa000786aa9c31f655f10b22feeebbfa9b984a Mon Sep 17 00:00:00 2001 From: Rob Blafford Date: Fri, 15 Jul 2022 15:35:51 -0400 Subject: [PATCH 4/7] cluster: Nag if cont rebalance used w/o license --- src/v/cluster/feature_manager.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/v/cluster/feature_manager.cc b/src/v/cluster/feature_manager.cc index d66ec10f8060..2a9f91709222 100644 --- a/src/v/cluster/feature_manager.cc +++ b/src/v/cluster/feature_manager.cc @@ -188,13 +188,18 @@ ss::future<> feature_manager::maybe_log_license_check_info() { if (cfg.cloud_storage_enabled) { fmt::print(warn_ss, "{}", "Tiered Storage(cloud_storage)"); } + if ( + cfg.partition_autobalancing_mode + == model::partition_autobalancing_mode::continuous) { + fmt::print(warn_ss, "{} & ", "Continuous partition autobalancing"); + } const auto& warn_log = warn_ss.str(); if (!warn_log.empty()) { const auto& license = _feature_table.local().get_license(); if (!license || license->is_expired()) { vlog( clusterlog.warn, - "Enterprise feature(s) {} detected as enabled without a " + "Enterprise feature(s) ({}) detected as enabled without a " "valid " "license, please contact support and/or upload a valid " "redpanda " From c9ba9e2ed29ccdeb468dcbe8932884393711827f Mon Sep 17 00:00:00 2001 From: Rob Blafford Date: Fri, 15 Jul 2022 15:29:05 -0400 Subject: [PATCH 5/7] rptest: Move search_log to wider scope - So that other tests may perform assertions on logs existing or not --- tests/rptest/services/redpanda.py | 18 +++++++++++++++ tests/rptest/tests/cluster_config_test.py | 27 ++++------------------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/tests/rptest/services/redpanda.py b/tests/rptest/services/redpanda.py index df39db951456..b865f9b3a600 100644 --- a/tests/rptest/services/redpanda.py +++ b/tests/rptest/services/redpanda.py @@ -1772,3 +1772,21 @@ def save_executable(self): else: self._saved_executable = True self._context.log_collect['executable', self] = True + + def search_log(self, pattern): + """ + Test helper for grepping the redpanda log + + :return: true if any instances of `pattern` found + """ + for node in self.nodes: + for line in node.account.ssh_capture( + f"grep \"{pattern}\" {RedpandaService.STDOUT_STDERR_CAPTURE} || true" + ): + # We got a match + self.logger.debug( + f"Found {pattern} on node {node.name}: {line}") + return True + + # Fall through, no matches + return False diff --git a/tests/rptest/tests/cluster_config_test.py b/tests/rptest/tests/cluster_config_test.py index 56bd89c0331b..290a0ddac6c5 100644 --- a/tests/rptest/tests/cluster_config_test.py +++ b/tests/rptest/tests/cluster_config_test.py @@ -33,25 +33,6 @@ SECRET_CONFIG_NAMES = frozenset(["cloud_storage_secret_key"]) -def search_log(redpanda, pattern): - """ - Test helper for grepping the redpanda log - - :return: true if any instances of `pattern` found - """ - for node in redpanda.nodes: - for line in node.account.ssh_capture( - f"grep \"{pattern}\" {redpanda.STDOUT_STDERR_CAPTURE} || true" - ): - # We got a match - redpanda.logger.debug( - f"Found {pattern} on node {node.name}: {line}") - return True - - # Fall through, no matches - return False - - class ClusterConfigUpgradeTest(RedpandaTest): def __init__(self, *args, **kwargs): super().__init__(*args, extra_rp_conf={}, **kwargs) @@ -84,8 +65,8 @@ def test_upgrade_redpanda_yaml(self): self.redpanda.restart_nodes( [node], override_cfg_params={'delete_retention_ms': '1234'}) assert admin.get_cluster_config()['delete_retention_ms'] == 9876 - assert search_log(self.redpanda, - "Ignoring value for 'delete_retention_ms'") + assert self.redpanda.search_log( + "Ignoring value for 'delete_retention_ms'") class ClusterConfigTest(RedpandaTest): @@ -948,11 +929,11 @@ def set_and_search(key, value, expect_log): self._wait_for_version_sync(patch_result['config_version']) # Check value was/was not printed to log while applying - assert search_log(self.redpanda, value) is expect_log + assert self.redpanda.search_log(value) is expect_log # Check we do/don't print on next startup self.redpanda.restart_nodes(self.redpanda.nodes) - assert search_log(self.redpanda, value) is expect_log + assert self.redpanda.search_log(value) is expect_log # Default valued secrets are still shown. self._check_value_everywhere("cloud_storage_secret_key", None) From fcc7b4b60107268d0d587d8682ea9d4cf0736c66 Mon Sep 17 00:00:00 2001 From: Rob Blafford Date: Fri, 15 Jul 2022 16:42:50 -0400 Subject: [PATCH 6/7] rptest: Break out get sample license into utils - So that other tests may correctly obtain the sample license stored in the test env --- tests/rptest/tests/cluster_features_test.py | 5 ++--- tests/rptest/utils/rpenv.py | 25 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 tests/rptest/utils/rpenv.py diff --git a/tests/rptest/tests/cluster_features_test.py b/tests/rptest/tests/cluster_features_test.py index ab7c91c43df8..59772320f620 100644 --- a/tests/rptest/tests/cluster_features_test.py +++ b/tests/rptest/tests/cluster_features_test.py @@ -11,6 +11,7 @@ import time import datetime +from rptest.utils.rpenv import sample_license from rptest.services.admin import Admin from rptest.services.redpanda import RESTART_LOG_ALLOW_LIST from rptest.tests.redpanda_test import RedpandaTest @@ -213,10 +214,8 @@ def test_license_upload_and_query(self): """ Test uploading and retrieval of license """ - license = os.environ.get("REDPANDA_SAMPLE_LICENSE", None) + license = sample_license() if license is None: - is_ci = os.environ.get("CI", "false") - assert is_ci == "false" self.logger.info( "Skipping test, REDPANDA_SAMPLE_LICENSE env var not found") return diff --git a/tests/rptest/utils/rpenv.py b/tests/rptest/utils/rpenv.py new file mode 100644 index 000000000000..8a45fdbb5496 --- /dev/null +++ b/tests/rptest/utils/rpenv.py @@ -0,0 +1,25 @@ +# Copyright 2022 Redpanda Data, Inc. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.md +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0 + +# Utilities for checking the environment of a test + +import os + + +def sample_license(): + """ + Returns the sample license from the env if it exists, asserts if its + missing and the environment is CI + """ + license = os.environ.get("REDPANDA_SAMPLE_LICENSE", None) + if license is None: + is_ci = os.environ.get("CI", "false") + assert is_ci == "false" + return None + return license From bd873807fe395faa20b60ed3fed4654b3cfa77ab Mon Sep 17 00:00:00 2001 From: Rob Blafford Date: Fri, 15 Jul 2022 15:31:32 -0400 Subject: [PATCH 7/7] rptest: Upgrade test for license feature - Asserts that nag messages are not printed before the cluster is fully updated, even when a feature that is license blocked is currently enabled, pre-upgrade. --- tests/rptest/tests/license_upgrade_test.py | 103 +++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 tests/rptest/tests/license_upgrade_test.py diff --git a/tests/rptest/tests/license_upgrade_test.py b/tests/rptest/tests/license_upgrade_test.py new file mode 100644 index 000000000000..465c53196a0d --- /dev/null +++ b/tests/rptest/tests/license_upgrade_test.py @@ -0,0 +1,103 @@ +# Copyright 2022 Redpanda Data, Inc. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.md +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0 + +import os +import re +import time + +from ducktape.utils.util import wait_until +from rptest.utils.rpenv import sample_license +from rptest.services.admin import Admin +from ducktape.utils.util import wait_until +from rptest.tests.redpanda_test import RedpandaTest +from rptest.services.redpanda import SISettings +from rptest.services.cluster import cluster +from requests.exceptions import HTTPError +from rptest.services.redpanda import RESTART_LOG_ALLOW_LIST +from rptest.services.redpanda_installer import RedpandaInstaller, wait_for_num_versions + + +class UpgradeToLicenseChecks(RedpandaTest): + """ + Test that ensures the licensing work does not incorrectly print license + enforcement errors during upgrade when a guarded feature is already + enabled. Also tests that the license can only be uploaded once the cluster + has completed upgrade to the latest version. + """ + LICENSE_CHECK_INTERVAL_SEC = 1 + + def __init__(self, test_context): + # Setting 'si_settings' enables a licensed feature, however at v22.1.4 there + # are no license checks present. This test verifies behavior between versions + # of redpanda that do and do not have the licensing feature built-in. + super(UpgradeToLicenseChecks, self).__init__(test_context=test_context, + num_brokers=3, + si_settings=SISettings()) + self.installer = self.redpanda._installer + self.admin = Admin(self.redpanda) + + def setUp(self): + self.installer.install(self.redpanda.nodes, (22, 1, 4)) + super(UpgradeToLicenseChecks, self).setUp() + + @cluster(num_nodes=3, log_allow_list=RESTART_LOG_ALLOW_LIST) + def test_basic_upgrade(self): + # Modified environment variables apply to processes restarted from this point onwards + self.redpanda.set_environment({ + '__REDPANDA_LICENSE_CHECK_INTERVAL_SEC': + f'{UpgradeToLicenseChecks.LICENSE_CHECK_INTERVAL_SEC}' + }) + + license = sample_license() + if license is None: + self.logger.info( + "Skipping test, REDPANDA_SAMPLE_LICENSE env var not found") + return + + unique_versions = wait_for_num_versions(self.redpanda, 1) + assert 'v22.1.4' in unique_versions, unique_versions + + # These logs can't exist in v22.1.4 but double check anyway... + assert self.redpanda.search_log("Enterprise feature(s).*") is False + + # Update one node to newest version + self.installer.install([self.redpanda.nodes[0]], + RedpandaInstaller.HEAD) + self.redpanda.restart_nodes([self.redpanda.nodes[0]]) + unique_versions = wait_for_num_versions(self.redpanda, 2) + + try: + # Ensure a valid license cannot be uploaded in this cluster state + self.admin.put_license(license) + assert False + except HTTPError as e: + assert e.response.status_code == 400 + + # Ensure the log is not written, if the fiber was enabled a log should + # appear within one interval of the license check fiber + time.sleep(UpgradeToLicenseChecks.LICENSE_CHECK_INTERVAL_SEC * 2) + assert self.redpanda.search_log("Enterprise feature(s).*") is False + + # Install new version on all nodes + self.installer.install(self.redpanda.nodes, RedpandaInstaller.HEAD) + + # Restart nodes 2 and 3 + self.redpanda.restart_nodes( + [self.redpanda.nodes[1], self.redpanda.nodes[2]]) + _ = wait_for_num_versions(self.redpanda, 1) + + # Assert that the log was found + wait_until( + lambda: self.redpanda.search_log("Enterprise feature(s).*"), + timeout_sec=UpgradeToLicenseChecks.LICENSE_CHECK_INTERVAL_SEC * 4, + backoff_sec=1, + err_msg="Timeout waiting for enterprise nag log") + + # Install license + assert self.admin.put_license(license).status_code == 200