From 354fdb4327ce4cb1fd5065e7b37a204cffe289fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20=C4=8Cern=C3=BD?= Date: Thu, 12 Oct 2023 09:02:12 +0200 Subject: [PATCH] Avoid duplicate loading of component files As discovered in https://github.com/ComplianceAsCode/content/pull/11190, the `components.load()` function is called many times during a build of product content. However, the component files need to be loaded only once. When the build systems resolves the content using `compile_all.py`, the `compile_all.py` creates an instance of `ssg.builld_yaml.BuildLoader` class. Moreover, many other instances of `ssg.builld_yaml.BuildLoader` class are created recursively as the loader recurses into sub-directories of the given benchmark root directory. When we iterate over the directories, for each child directory the script creates a new instance of the `ssg.build_yaml.BuildLoader` by the `ssg.builld_yaml.BuildLoader._get_new_loader()` method. This method should ensure that the component data and other data won't be unnecessarily loaded again but will be just referenced from the parent `ssg.builld_yaml.BuildLoader` that handles the parent directory. The problem is the way how the `ssg.builld_yaml.BuildLoader._load_components()` method is called in the ctor of `ssg.builld_yaml.BuildLoader`. The ctor is called before the initialization of the `rule_to_components` attribute. Also, there is no code in this ctor or elsewhere assuring that it won't load the components data if they were already loaded. In this commit, we won't call `_load_components` in the ctor, which will leave the opportunity to `_get_new_loader` to swap the data as the comment said. This should bring a speed up about 5 seconds. --- build-scripts/compile_all.py | 1 + ssg/build_yaml.py | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/build-scripts/compile_all.py b/build-scripts/compile_all.py index 90d4b4d774d..76ee26032f0 100644 --- a/build-scripts/compile_all.py +++ b/build-scripts/compile_all.py @@ -133,6 +133,7 @@ def main(): loader = ssg.build_yaml.BuildLoader( None, env_yaml, product_cpes, args.sce_metadata, args.stig_references) + loader.load_components() load_benchmark_source_data_from_directory_tree(loader, env_yaml, product_yaml) project_root_abspath = os.path.abspath(args.project_root) diff --git a/ssg/build_yaml.py b/ssg/build_yaml.py index c4ef4581031..67803752b6b 100644 --- a/ssg/build_yaml.py +++ b/ssg/build_yaml.py @@ -1331,9 +1331,9 @@ def __init__( if stig_reference_path: self.stig_references = ssg.build_stig.map_versions_to_rule_ids(stig_reference_path) self.components_dir = None - self.rule_to_components = self._load_components() + self.rule_to_components = None - def _load_components(self): + def load_components(self): if "components_root" not in self.env_yaml: return None product_dir = self.env_yaml["product_dir"] @@ -1341,9 +1341,8 @@ def _load_components(self): self.components_dir = os.path.abspath( os.path.join(product_dir, components_root)) components = ssg.components.load(self.components_dir) - rule_to_components = ssg.components.rule_component_mapping( + self.rule_to_components = ssg.components.rule_component_mapping( components) - return rule_to_components def _process_values(self): for value_yaml in self.value_files: