diff --git a/build-scripts/profile_tool.py b/build-scripts/profile_tool.py index 4825cdb5ded..503df2dffba 100755 --- a/build-scripts/profile_tool.py +++ b/build-scripts/profile_tool.py @@ -5,7 +5,12 @@ import argparse try: - from utils.profile_tool import command_stats, command_sub, command_most_used_rules + from utils.profile_tool import ( + command_stats, + command_sub, + command_most_used_rules, + command_most_used_components, + ) except ImportError: print("The ssg module could not be found.") print( @@ -276,6 +281,23 @@ def parse_most_used_rules_subcommand(subparsers): ) +def parse_most_used_components(subparsers): + parser_most_used_components = subparsers.add_parser( + "most-used-components", + description=( + "Generates list of all components used by the rules in existing profiles." + " In various formats." + ), + help="Generates list of all components used by the rules in existing profiles.", + ) + parser_most_used_components.add_argument( + "--format", + default="plain", + choices=["plain", "json", "csv"], + help="Which format to use for output.", + ) + + def parse_args(): parser = argparse.ArgumentParser(description="Profile statistics and utilities tool") subparsers = parser.add_subparsers(title="subcommands", dest="subcommand", required=True) @@ -283,6 +305,7 @@ def parse_args(): parse_stats_subcommand(subparsers) parse_sub_subcommand(subparsers) parse_most_used_rules_subcommand(subparsers) + parse_most_used_components(subparsers) args = parser.parse_args() @@ -319,6 +342,7 @@ def parse_args(): "stats": command_stats, "sub": command_sub, "most-used-rules": command_most_used_rules, + "most-used-components": command_most_used_components, } diff --git a/docs/manual/developer/05_tools_and_utilities.md b/docs/manual/developer/05_tools_and_utilities.md index 8ec39032574..c245e74f524 100644 --- a/docs/manual/developer/05_tools_and_utilities.md +++ b/docs/manual/developer/05_tools_and_utilities.md @@ -61,6 +61,16 @@ The result will be a list of rules with the number of uses in the profiles. The list can be generated as plain text, JSON or CVS. Via the `--format FORMAT` parameter. +The tool can also generate a list of the most used component based on rules contained in profiles from the entire project: + +```bash + $ ./build-scripts/profile_tool.py most-used-components +``` + +The result will be a list of rules with the number of uses in the profiles. +The list can be generated as plain text, JSON or CVS. +Via the `--format FORMAT` parameter. + ## Generating Controls from DISA's XCCDF Files If you want a control file for product from DISA's XCCDF files you can run the following command: diff --git a/utils/profile_tool/__init__.py b/utils/profile_tool/__init__.py index 2b2f56c25ee..4f4deebda82 100644 --- a/utils/profile_tool/__init__.py +++ b/utils/profile_tool/__init__.py @@ -1,3 +1,5 @@ from .sub import command_sub from .stats import command_stats from .most_used_rules import command_most_used_rules +from .most_used_components import command_most_used_components +from .common import generate_output diff --git a/utils/profile_tool/common.py b/utils/profile_tool/common.py new file mode 100644 index 00000000000..ae5729451f0 --- /dev/null +++ b/utils/profile_tool/common.py @@ -0,0 +1,15 @@ +import json + + +def generate_output(dict_, format, csv_header): + f_string = "{}: {}" + + if format == "json": + print(json.dumps(dict_, indent=4)) + return + elif format == "csv": + print(csv_header) + f_string = "{},{}" + + for rule_id, rule_count in dict_.items(): + print(f_string.format(rule_id, rule_count)) diff --git a/utils/profile_tool/most_used_components.py b/utils/profile_tool/most_used_components.py new file mode 100644 index 00000000000..69c34d914a4 --- /dev/null +++ b/utils/profile_tool/most_used_components.py @@ -0,0 +1,64 @@ +import sys +import os +from collections import defaultdict + +import ssg.components + +from .most_used_rules import _sorted_dict_by_num_value +from .common import generate_output + +PYTHON_2 = sys.version_info[0] < 3 + +if not PYTHON_2: + from .most_used_rules import _get_profiles_for_product + from ..controleval import ( + load_controls_manager, + get_available_products, + load_product_yaml, + ) + + +def _count_components(components, rules_list, components_out): + for rule in rules_list: + component = get_component_name_by_rule_id(rule, components) + components_out[component] += 1 + + +def get_component_name_by_rule_id(rule_id, components): + for component in components.values(): + if rule_id in component.rules: + return component.name + return "without_component" + + +def load_components(product): + product_yaml = load_product_yaml(product) + product_dir = product_yaml.get("product_dir") + components_root = product_yaml.get("components_root") + if components_root is None: + return None + components_dir = os.path.abspath(os.path.join(product_dir, components_root)) + return ssg.components.load(components_dir) + + +def _process_all_products_from_controls(components_out): + if PYTHON_2: + raise Exception("This feature is not supported for python2.") + + for product in get_available_products(): + components = load_components(product) + if components is None: + continue + controls_manager = load_controls_manager("./controls/", product) + for profile in _get_profiles_for_product(controls_manager, product): + _count_components(components, profile.rules, components_out) + + +def command_most_used_components(args): + components = defaultdict(int) + + _process_all_products_from_controls(components) + + sorted_components = _sorted_dict_by_num_value(components) + csv_header = "component_name,count_of_rules" + generate_output(sorted_components, args.format, csv_header) diff --git a/utils/profile_tool/most_used_rules.py b/utils/profile_tool/most_used_rules.py index c8072ed3bb1..ca159a10f49 100644 --- a/utils/profile_tool/most_used_rules.py +++ b/utils/profile_tool/most_used_rules.py @@ -1,8 +1,10 @@ import sys -import json +from collections import defaultdict from ssg.build_profile import XCCDFBenchmark +from .common import generate_output + PYTHON_2 = sys.version_info[0] < 3 @@ -17,10 +19,7 @@ def _count_rules_per_rules_list(rules_list, rules): for rule in rules_list: - if rule in rules: - rules[rule] += 1 - else: - rules[rule] = 1 + rules[rule] += 1 def _count_rules_per_benchmark(benchmark, rules): @@ -48,16 +47,13 @@ def _process_all_products_from_controls(rules): _count_rules_per_rules_list(profile.rules, rules) -def _sorted_rules(rules): - sorted_rules = { - k: v - for k, v in sorted(rules.items(), key=lambda x: x[1], reverse=True) - } - return sorted_rules +def _sorted_dict_by_num_value(dict_): + sorted_ = {k: v for k, v in sorted(dict_.items(), key=lambda x: x[1], reverse=True)} + return sorted_ def command_most_used_rules(args): - rules = {} + rules = defaultdict(int) if not args.BENCHMARKS: _process_all_products_from_controls(rules) @@ -65,16 +61,6 @@ def command_most_used_rules(args): for benchmark in args.BENCHMARKS: _count_rules_per_benchmark(benchmark, rules) - sorted_rules = _sorted_rules(rules) - - f_string = "{}: {}" - - if args.format == "json": - print(json.dumps(sorted_rules, indent=4)) - return - elif args.format == "csv": - print("rule_id,count_of_profiles") - f_string = "{},{}" - - for rule_id, rule_count in sorted_rules.items(): - print(f_string.format(rule_id, rule_count)) + sorted_rules = _sorted_dict_by_num_value(rules) + csv_header = "rule_id,count_of_profiles" + generate_output(sorted_rules, args.format, csv_header)