From 68a38b6e6ff775641b1a1e50c8ae146ea81c5a33 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Thu, 22 Aug 2024 10:05:17 +0000 Subject: [PATCH] rules: deduplicate API features with stripped DLL closes #2323 --- CHANGELOG.md | 2 ++ capa/rules/__init__.py | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 98b69e7c7..b88f11b53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ ### Bug Fixes +- fix duplicate features shown in vverbose mode @williballenthin #2323 + ### capa explorer IDA Pro plugin ### Development diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 5f567ea10..5ecad5cce 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -575,6 +575,15 @@ def trim_dll_part(api: str) -> str: return api +def unique(sequence): + """deduplicate the items in the given sequence, returning a list with the same order. + + via: https://stackoverflow.com/a/58666031 + """ + seen = set() + return [x for x in sequence if not (x in seen or seen.add(x))] # type: ignore [func-returns-value] + + def build_statements(d, scopes: Scopes): if len(d.keys()) > 2: raise InvalidRule("too many statements") @@ -582,21 +591,21 @@ def build_statements(d, scopes: Scopes): key = list(d.keys())[0] description = pop_statement_description_entry(d[key]) if key == "and": - return ceng.And([build_statements(dd, scopes) for dd in d[key]], description=description) + return ceng.And(unique(build_statements(dd, scopes) for dd in d[key]), description=description) elif key == "or": - return ceng.Or([build_statements(dd, scopes) for dd in d[key]], description=description) + return ceng.Or(unique(build_statements(dd, scopes) for dd in d[key]), description=description) elif key == "not": if len(d[key]) != 1: raise InvalidRule("not statement must have exactly one child statement") return ceng.Not(build_statements(d[key][0], scopes), description=description) elif key.endswith(" or more"): count = int(key[: -len("or more")]) - return ceng.Some(count, [build_statements(dd, scopes) for dd in d[key]], description=description) + return ceng.Some(count, unique(build_statements(dd, scopes) for dd in d[key]), description=description) elif key == "optional": # `optional` is an alias for `0 or more` # which is useful for documenting behaviors, # like with `write file`, we might say that `WriteFile` is optionally found alongside `CreateFileA`. - return ceng.Some(0, [build_statements(dd, scopes) for dd in d[key]], description=description) + return ceng.Some(0, unique(build_statements(dd, scopes) for dd in d[key]), description=description) elif key == "process": if Scope.FILE not in scopes: @@ -672,7 +681,7 @@ def build_statements(d, scopes: Scopes): # - arch: i386 # - mnemonic: cmp # - statements = ceng.And([build_statements(dd, Scopes(static=Scope.INSTRUCTION)) for dd in d[key]]) + statements = ceng.And(unique(build_statements(dd, Scopes(static=Scope.INSTRUCTION)) for dd in d[key])) return ceng.Subscope(Scope.INSTRUCTION, statements, description=description)