From 508a09ef25b429456eac152aae98ac20cf893097 Mon Sep 17 00:00:00 2001 From: Fariss Date: Tue, 4 Jun 2024 18:47:41 +0200 Subject: [PATCH] include rule caching in PyInstaller build process (#2097) * include rule caching in PyInstaller build process The following commit introduces a new function that caches the capa rule set, so that users don't have to manually run ./scripts/cache- ruleset.py, before running pyinstaller. * ci: omit Cache rule set step from build.yml workflow * refactor: move cache generation to cache.py * mkdir cache directory when it does not exist --------- Co-authored-by: Soufiane Fariss Co-authored-by: Moritz --- .github/pyinstaller/pyinstaller.spec | 15 ++++++++++++--- .github/workflows/build.yml | 2 -- CHANGELOG.md | 1 + capa/rules/cache.py | 22 ++++++++++++++++++++++ 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/.github/pyinstaller/pyinstaller.spec b/.github/pyinstaller/pyinstaller.spec index f103ba16e..021a2b294 100644 --- a/.github/pyinstaller/pyinstaller.spec +++ b/.github/pyinstaller/pyinstaller.spec @@ -1,10 +1,19 @@ # -*- mode: python -*- # Copyright (C) 2020 Mandiant, Inc. All Rights Reserved. -import os.path -import subprocess +import sys import wcwidth +import capa.rules.cache +from pathlib import Path + +# SPECPATH is a global variable which points to .spec file path +capa_dir = Path(SPECPATH).parent.parent +rules_dir = capa_dir / 'rules' +cache_dir = capa_dir / 'cache' + +if not capa.rules.cache.generate_rule_cache(rules_dir, cache_dir): + sys.exit(-1) a = Analysis( # when invoking pyinstaller from the project root, @@ -26,7 +35,7 @@ a = Analysis( # so we manually embed the wcwidth resources here. # # ref: https://stackoverflow.com/a/62278462/87207 - (os.path.dirname(wcwidth.__file__), "wcwidth"), + (Path(wcwidth.__file__).parent, "wcwidth"), ], # when invoking pyinstaller from the project root, # this gets run from the project root. diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bc9c3908c..4a9bf555f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -52,8 +52,6 @@ jobs: run: python -m pip install --upgrade pip setuptools - name: Install capa with build requirements run: pip install -e .[build] - - name: Cache the rule set - run: python ./scripts/cache-ruleset.py ./rules/ ./cache/ - name: Build standalone executable run: pyinstaller --log-level DEBUG .github/pyinstaller/pyinstaller.spec - name: Does it run (PE)? diff --git a/CHANGELOG.md b/CHANGELOG.md index 5962ade6a..f27121a8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ - ci: update github workflows to use latest version of actions that were using a deprecated version of node #1967 #2003 capa-rules#883 @sjha2048 @Ana06 - ci: update binja version to stable 4.0 #2016 @xusheng6 - ci: update github workflows to reflect the latest ghidrathon installation and bumped up jep, ghidra versions #2020 @psahithireddy +- ci: include rule caching in PyInstaller build process #2097 @s-ff - add deptry support #1497 @s-ff ### Raw diffs diff --git a/capa/rules/cache.py b/capa/rules/cache.py index 5cc1d1d4a..6f87570ef 100644 --- a/capa/rules/cache.py +++ b/capa/rules/cache.py @@ -159,3 +159,25 @@ def load_cached_ruleset(cache_dir: Path, rule_contents: List[bytes]) -> Optional return None else: return cache.ruleset + + +def generate_rule_cache(rules_dir: Path, cache_dir: Path) -> bool: + if not rules_dir.is_dir(): + logger.error("rules directory %s does not exist", rules_dir) + return False + + try: + cache_dir.mkdir(parents=True, exist_ok=True) + rules = capa.rules.get_rules([rules_dir], cache_dir) + except (IOError, capa.rules.InvalidRule, capa.rules.InvalidRuleSet) as e: + logger.error("%s", str(e)) + return False + + content = capa.rules.cache.get_ruleset_content(rules) + id = capa.rules.cache.compute_cache_identifier(content) + path = capa.rules.cache.get_cache_path(cache_dir, id) + + assert path.exists() + logger.info("rules cache saved to: %s", path) + + return True