From 1c64001ed8ec65d44f73c29056d01d0840050659 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Tue, 17 Sep 2024 11:51:08 +0000 Subject: [PATCH] fix byte search IDA Pro 7.5 compatability --- CHANGELOG.md | 1 + capa/features/extractors/ida/helpers.py | 67 ++++++++++++++++--------- 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9dbfbf816..71a23c6a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Unlock powerful malware analysis with capa's new [VMRay sandbox](https://www.vmr - fix code path reference in linter @williballenthin #2350 ### capa explorer IDA Pro plugin +- fix byte search IDA Pro 7.5 compatability @mr-tz #2371 ### Development diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 5ddc3c7d2..a40ca3fda 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -21,30 +21,49 @@ IDA_NALT_ENCODING = ida_nalt.get_default_encoding_idx(ida_nalt.BPU_1B) # use one byte-per-character encoding -# TODO (mr): use find_bytes -# https://github.com/mandiant/capa/issues/2339 -def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]: - """yield all ea of a given byte sequence - - args: - start: min virtual address - end: max virtual address - seq: bytes to search e.g. b"\x01\x03" - """ - patterns = ida_bytes.compiled_binpat_vec_t() - - seqstr = " ".join([f"{b:02x}" for b in seq]) - err = ida_bytes.parse_binpat_str(patterns, 0, seqstr, 16, IDA_NALT_ENCODING) - - if err: - return - - while True: - ea, _ = ida_bytes.bin_search3(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD) - if ea == idaapi.BADADDR: - break - start = ea + 1 - yield ea +if hasattr(ida_bytes, "parse_binpat_str"): + # TODO (mr): use find_bytes + # https://github.com/mandiant/capa/issues/2339 + def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]: + """yield all ea of a given byte sequence + + args: + start: min virtual address + end: max virtual address + seq: bytes to search e.g. b"\x01\x03" + """ + patterns = ida_bytes.compiled_binpat_vec_t() + + seqstr = " ".join([f"{b:02x}" for b in seq]) + err = ida_bytes.parse_binpat_str(patterns, 0, seqstr, 16, IDA_NALT_ENCODING) + + if err: + return + + while True: + ea, _ = ida_bytes.bin_search3(start, end, patterns, ida_bytes.BIN_SEARCH_FORWARD) + if ea == idaapi.BADADDR: + break + start = ea + 1 + yield ea + +else: + # for IDA 7.5 and older; using deprecated find_binary instead of bin_search + def find_byte_sequence(start: int, end: int, seq: bytes) -> Iterator[int]: + """yield all ea of a given byte sequence + + args: + start: min virtual address + end: max virtual address + seq: bytes to search e.g. b"\x01\x03" + """ + seqstr = " ".join([f"{b:02x}" for b in seq]) + while True: + ea = idaapi.find_binary(start, end, seqstr, 0, idaapi.SEARCH_DOWN) + if ea == idaapi.BADADDR: + break + start = ea + 1 + yield ea def get_functions(