From 97a3fba2c911278d7481ddf6446fba3027c00d3d Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 12 Jun 2024 09:24:16 +0000 Subject: [PATCH 1/7] fix black --- scripts/capa2sarif.py | 32 ++++++++------------------------ tests/test_scripts.py | 5 ++++- 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/scripts/capa2sarif.py b/scripts/capa2sarif.py index 62f8e47ae..c783354e2 100644 --- a/scripts/capa2sarif.py +++ b/scripts/capa2sarif.py @@ -72,9 +72,7 @@ def _parse_args() -> argparse.Namespace: help="Compatibility for Radare r2sarif plugin v2.0", ) parser.add_argument("-t", "--tag", help="Filter on rule meta field values (ruleid)") - parser.add_argument( - "--version", action="version", version=f"%(prog)s {__version__}" - ) + parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}") return parser.parse_args() @@ -89,22 +87,16 @@ def main() -> int: with Path(args.capa_output).open() as capa_output: json_data = json.load(capa_output) except ValueError: - logger.error( - "Input data was not valid JSON, input should be a capa json output file." - ) + logger.error("Input data was not valid JSON, input should be a capa json output file.") return -1 except json.JSONDecodeError: # An exception has occured - logger.error( - "Input data was not valid JSON, input should be a capa json output file." - ) + logger.error("Input data was not valid JSON, input should be a capa json output file.") return -2 # Marshall json into Sarif # Create baseline sarif structure to be populated from json data - sarif_structure: Optional[dict] = _sarif_boilerplate( - json_data["meta"], json_data["rules"] - ) + sarif_structure: Optional[dict] = _sarif_boilerplate(json_data["meta"], json_data["rules"]) if sarif_structure is None: logger.errort("An Error has occured creating default sarif structure.") return -3 @@ -120,9 +112,7 @@ def main() -> int: # artifacts must include a description as well with a text field. if "artifacts" in sarif_structure["runs"][0]: - sarif_structure["runs"][0]["artifacts"][0]["description"] = { - "text": "placeholder" - } + sarif_structure["runs"][0]["artifacts"][0]["description"] = {"text": "placeholder"} # For better compliance with Ghidra table. Iteraction through properties['additionalProperties'] """ @@ -170,13 +160,9 @@ def _sarif_boilerplate(data_meta: dict, data_rules: dict) -> Optional[dict]: "id": id, "name": data_rules[key]["meta"]["name"], "shortDescription": {"text": data_rules[key]["meta"]["name"]}, - "messageStrings": { - "default": {"text": data_rules[key]["meta"]["name"]} - }, + "messageStrings": {"default": {"text": data_rules[key]["meta"]["name"]}}, "properties": { - "namespace": data_rules[key]["meta"]["namespace"] - if "namespace" in data_rules[key]["meta"] - else [], + "namespace": data_rules[key]["meta"]["namespace"] if "namespace" in data_rules[key]["meta"] else [], "scopes": data_rules[key]["meta"]["scopes"], "references": data_rules[key]["meta"]["references"], "lib": data_rules[key]["meta"]["lib"], @@ -285,9 +271,7 @@ def _enumerate_evidence(node: dict, related_count: int) -> List[dict]: ) return [] else: - logger.error( - "Not implemented %s", node.get("node").get("type"), file=sys.stderr - ) + logger.error("Not implemented %s", node.get("node").get("type"), file=sys.stderr) return [] for loc in node.get("locations"): diff --git a/tests/test_scripts.py b/tests/test_scripts.py index f6f12fd68..9bad30132 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -40,7 +40,10 @@ def get_rule_path(): [ pytest.param("capa2yara.py", [get_rules_path()]), pytest.param("capafmt.py", [get_rule_path()]), - pytest.param("capa2sarif.py", [Path(__file__).resolve().parent / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"]), + pytest.param( + "capa2sarif.py", + [Path(__file__).resolve().parent / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"], + ), # testing some variations of linter script pytest.param("lint.py", ["-t", "create directory", get_rules_path()]), # `create directory` rule has native and .NET example PEs From 9ec9a6f43970705b6d31a7c4d0493eef0c32f86d Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 12 Jun 2024 09:27:13 +0000 Subject: [PATCH 2/7] fix mypy issues --- scripts/capa2sarif.py | 49 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/scripts/capa2sarif.py b/scripts/capa2sarif.py index c783354e2..63e0c6e29 100644 --- a/scripts/capa2sarif.py +++ b/scripts/capa2sarif.py @@ -98,7 +98,7 @@ def main() -> int: # Create baseline sarif structure to be populated from json data sarif_structure: Optional[dict] = _sarif_boilerplate(json_data["meta"], json_data["rules"]) if sarif_structure is None: - logger.errort("An Error has occured creating default sarif structure.") + logger.error("An Error has occured creating default sarif structure.") return -3 _populate_artifact(sarif_structure, json_data["meta"]) @@ -244,37 +244,36 @@ def _populate_invocations(sarif_log: dict, meta_data: dict) -> None: def _enumerate_evidence(node: dict, related_count: int) -> List[dict]: related_locations = [] - if node.get("success") and node.get("node").get("type") != "statement": + if node.get("success") and node.get("node", {}).get("type") != "statement": label = "" - if node.get("node").get("type") == "feature": - if node.get("node").get("feature").get("type") == "api": - label = "api: " + node.get("node").get("feature").get("api") - elif node.get("node").get("feature").get("type") == "match": - label = "match: " + node.get("node").get("feature").get("match") - elif node.get("node").get("feature").get("type") == "number": - label = f"number: {node.get('node').get('feature').get('description')} ({node.get('node').get('feature').get('number')})" - elif node.get("node").get("feature").get("type") == "offset": - label = f"offset: {node.get('node').get('feature').get('description')} ({node.get('node').get('feature').get('offset')})" - elif node.get("node").get("feature").get("type") == "mnemonic": - label = f"mnemonic: {node.get('node').get('feature').get('mnemonic')}" - elif node.get("node").get("feature").get("type") == "characteristic": - label = f"characteristic: {node.get('node').get('feature').get('characteristic')}" - elif node.get("node").get("feature").get("type") == "os": - label = f"os: {node.get('node').get('feature').get('os')}" - elif node.get("node").get("feature").get("type") == "operand number": - label = f"operand: ({node.get('node').get('feature').get('index')} ) {node.get('node').get('feature').get('description')} ({node.get('node').get('feature').get('operand_number')})" + if node.get("node", {}).get("type") == "feature": + if node.get("node", {}).get("feature", {}).get("type") == "api": + label = "api: " + node.get("node", {}).get("feature", {}).get("api") + elif node.get("node", {}).get("feature", {}).get("type") == "match": + label = "match: " + node.get("node", {}).get("feature", {}).get("match") + elif node.get("node", {}).get("feature", {}).get("type") == "number": + label = f"number: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('number')})" + elif node.get("node", {}).get("feature", {}).get("type") == "offset": + label = f"offset: {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('offset')})" + elif node.get("node", {}).get("feature", {}).get("type") == "mnemonic": + label = f"mnemonic: {node.get('node', {}).get('feature', {}).get('mnemonic')}" + elif node.get("node", {}).get("feature", {}).get("type") == "characteristic": + label = f"characteristic: {node.get('node', {}).get('feature', {}).get('characteristic')}" + elif node.get("node", {}).get("feature", {}).get("type") == "os": + label = f"os: {node.get('node', {}).get('feature', {}).get('os')}" + elif node.get("node", {}).get("feature", {}).get("type") == "operand number": + label = f"operand: ({node.get('node', {}).get('feature', {}).get('index')} ) {node.get('node', {}).get('feature', {}).get('description')} ({node.get('node', {}).get('feature', {}).get('operand_number')})" else: logger.error( "Not implemented %s", - node.get("node").get("feature").get("type"), - file=sys.stderr, + node.get("node", {}).get("feature", {}).get("type"), ) return [] else: - logger.error("Not implemented %s", node.get("node").get("type"), file=sys.stderr) + logger.error("Not implemented %s", node.get("node", {}).get("type")) return [] - for loc in node.get("locations"): + for loc in node.get("locations", []): if loc["type"] != "absolute": continue @@ -287,8 +286,8 @@ def _enumerate_evidence(node: dict, related_count: int) -> List[dict]: ) related_count += 1 - if node.get("success") and node.get("node").get("type") == "statement": - for child in node.get("children"): + if node.get("success") and node.get("node", {}).get("type") == "statement": + for child in node.get("children", []): related_locations += _enumerate_evidence(child, related_count) return related_locations From cce7774705fa9b40037109e172d5b5b2faa5fa3a Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 12 Jun 2024 15:17:31 +0000 Subject: [PATCH 3/7] add scripts section --- pyproject.toml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bd4f87a95..45cb169d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -138,9 +138,6 @@ dev = [ "black==24.4.2", "isort==5.13.2", "mypy==1.10.0", - "psutil==5.9.2", - "stix2==3.0.1", - "requests==2.31.0", "mypy-protobuf==3.6.0", # type stubs for mypy "types-backports==0.1.3", @@ -148,8 +145,6 @@ dev = [ "types-PyYAML==6.0.8", "types-tabulate==0.9.0.20240106", "types-termcolor==1.1.4", - "types-psutil==5.8.23", - "types_requests==2.32.0.20240602", "types-protobuf==5.26.0.20240422", "deptry==0.16.1" ] @@ -162,6 +157,13 @@ build = [ "setuptools==70.0.0", "build==1.2.1" ] +scripts = [ + "psutil==5.9.2", + "stix2==3.0.1", + "requests==2.31.0", + "types-psutil==5.8.23", + "types_requests==2.32.0.20240602", +] [tool.deptry] extend_exclude = [ From 7b0270980d5c98b62b888d123e84af9c5f0bd533 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 12 Jun 2024 15:19:24 +0000 Subject: [PATCH 4/7] add capa2sarif dependencies --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 45cb169d1..4f9b3dbb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -158,8 +158,10 @@ build = [ "build==1.2.1" ] scripts = [ + "jschema_to_python==1.2.3", "psutil==5.9.2", "stix2==3.0.1", + "sarif_om==1.0.4", "requests==2.31.0", "types-psutil==5.8.23", "types_requests==2.32.0.20240602", From 93cd1dceddf4e580aa0fcc92478ede8e02a3927d Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 12 Jun 2024 15:24:10 +0000 Subject: [PATCH 5/7] add scripts to install step --- .github/workflows/tests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5553ceae5..eb8ec1ced 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,7 +37,7 @@ jobs: - name: Install dependencies run: | pip install -r requirements.txt - pip install -e .[dev] + pip install -e .[dev,scripts] - name: Lint with ruff run: pre-commit run ruff - name: Lint with isort @@ -65,7 +65,7 @@ jobs: - name: Install capa run: | pip install -r requirements.txt - pip install -e .[dev] + pip install -e .[dev,scripts] - name: Run rule linter run: python scripts/lint.py rules/ @@ -102,7 +102,7 @@ jobs: - name: Install capa run: | pip install -r requirements.txt - pip install -e .[dev] + pip install -e .[dev,scripts] - name: Run tests (fast) # this set of tests runs about 80% of the cases in 20% of the time, # and should catch most errors quickly. @@ -139,7 +139,7 @@ jobs: if: ${{ env.BN_SERIAL != 0 }} run: | pip install -r requirements.txt - pip install -e .[dev] + pip install -e .[dev,scripts] - name: install Binary Ninja if: ${{ env.BN_SERIAL != 0 }} run: | @@ -198,7 +198,7 @@ jobs: - name: Install capa run: | pip install -r requirements.txt - pip install -e .[dev] + pip install -e .[dev,scripts] - name: Run tests run: | mkdir ./.github/ghidra/project From e3ea60d3546709ebe69e3d251eee2cd3965ded51 Mon Sep 17 00:00:00 2001 From: Moritz Date: Thu, 13 Jun 2024 09:36:12 +0200 Subject: [PATCH 6/7] Apply suggestions from code review Co-authored-by: Willi Ballenthin --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4f9b3dbb8..c7076823f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -145,6 +145,8 @@ dev = [ "types-PyYAML==6.0.8", "types-tabulate==0.9.0.20240106", "types-termcolor==1.1.4", + "types-psutil==5.8.23", + "types_requests==2.32.0.20240602", "types-protobuf==5.26.0.20240422", "deptry==0.16.1" ] @@ -163,8 +165,6 @@ scripts = [ "stix2==3.0.1", "sarif_om==1.0.4", "requests==2.31.0", - "types-psutil==5.8.23", - "types_requests==2.32.0.20240602", ] [tool.deptry] From 1febb224d189880e4e7dd7312babfdd7d59df9e2 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Thu, 13 Jun 2024 07:50:58 +0000 Subject: [PATCH 7/7] add scripts dependency group --- CHANGELOG.md | 1 + doc/installation.md | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index be8017180..36b26937c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - optimize rule matching #2080 @williballenthin - add aarch64 as a valid architecture #2144 mehunhoff@google.com @williballenthin - relax dependency version requirements for the capa library #2053 @williballenthin +- add scripts dependency group and update documentation #2145 @mr-tz ### Breaking Changes diff --git a/doc/installation.md b/doc/installation.md index 93df732c2..e5e7135e0 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -107,7 +107,8 @@ We use the following tools to ensure consistent code style and formatting: To install these development dependencies, run: -`$ pip install -e /local/path/to/src[dev]` +- `$ pip install -e /local/path/to/src[dev]` or +- `$ pip install -e /local/path/to/src[dev,scripts]` to also install all script dependencies We use [pre-commit](https://pre-commit.com/) so that its trivial to run the same linters & configuration locally as in CI.