diff --git a/CHANGELOG.md b/CHANGELOG.md index fab4d23a5..1c64822e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ ### Bug Fixes +- elf: extract import / export symbols from stripped binaries #2096 @ygasparis + ### capa explorer IDA Pro plugin ### Development diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index 9dac99838..c499ac4f0 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -10,8 +10,7 @@ from typing import Tuple, Iterator from pathlib import Path -from elftools.elf.elffile import ELFFile, SymbolTableSection -from elftools.elf.relocation import RelocationSection +from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection import capa.features.extractors.common from capa.features.file import Export, Import, Section @@ -47,17 +46,37 @@ def extract_file_export_names(elf: ELFFile, **kwargs): yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value) + for segment in elf.iter_segments(): + if not isinstance(segment, DynamicSegment): + continue + + logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols()) + + for symbol in segment.iter_symbols(): + # The following conditions are based on the following article + # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html + if not symbol.name: + continue + if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]: + continue + if symbol.entry.st_value == 0: + continue + if symbol.entry.st_shndx == "SHN_UNDEF": + continue + + yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value) + def extract_file_import_names(elf: ELFFile, **kwargs): # Create a dictionary to store symbol names by their index symbol_names = {} # Extract symbol names and store them in the dictionary - for section in elf.iter_sections(): - if not isinstance(section, SymbolTableSection): + for segment in elf.iter_segments(): + if not isinstance(segment, DynamicSegment): continue - for _, symbol in enumerate(section.iter_symbols()): + for _, symbol in enumerate(segment.iter_symbols()): # The following conditions are based on the following article # http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html if not symbol.name: @@ -73,21 +92,19 @@ def extract_file_import_names(elf: ELFFile, **kwargs): symbol_names[_] = symbol.name - for section in elf.iter_sections(): - if not isinstance(section, RelocationSection): - continue - - if section["sh_entsize"] == 0: - logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name) + for segment in elf.iter_segments(): + if not isinstance(segment, DynamicSegment): continue - logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations()) + relocation_tables = segment.get_relocation_tables() + logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables)) - for relocation in section.iter_relocations(): - # Extract the symbol name from the symbol table using the symbol index in the relocation - if relocation["r_info_sym"] not in symbol_names: - continue - yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"]) + for relocation_table in relocation_tables.values(): + for relocation in relocation_table.iter_relocations(): + # Extract the symbol name from the symbol table using the symbol index in the relocation + if relocation["r_info_sym"] not in symbol_names: + continue + yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"]) def extract_file_section_names(elf: ELFFile, **kwargs): diff --git a/tests/test_elffile_features.py b/tests/test_elffile_features.py index 7c10bc480..91deff8eb 100644 --- a/tests/test_elffile_features.py +++ b/tests/test_elffile_features.py @@ -14,17 +14,11 @@ CD = Path(__file__).resolve().parent SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_" +STRIPPED_SAMPLE_PATH = CD / "data" / "bb38149ff4b5c95722b83f24ca27a42b.elf_" -def test_elffile_import_features(): - expected_imports = [ - "memfrob", - "puts", - "__libc_start_main", - "malloc", - "__cxa_finalize", - ] - path = Path(SAMPLE_PATH) +def check_import_features(sample_path, expected_imports): + path = Path(sample_path) elf = ELFFile(io.BytesIO(path.read_bytes())) # Extract imports imports = list(extract_file_import_names(elf)) @@ -40,6 +34,52 @@ def test_elffile_import_features(): assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports." +def check_export_features(sample_path, expected_exports): + path = Path(sample_path) + elf = ELFFile(io.BytesIO(path.read_bytes())) + # Extract imports + exports = list(extract_file_export_names(elf)) + + # Verify that at least one export was found + assert len(exports) > 0, "No exports were found." + + # Extract the symbol names from the extracted imports + extracted_symbol_names = [exported[0].value for exported in exports] + + # Check if all expected symbol names are found + for symbol_name in expected_exports: + assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports." + + +def test_stripped_elffile_import_features(): + expected_imports = ["__cxa_atexit", "__cxa_finalize", "__stack_chk_fail", "fclose", "fopen", "__android_log_print"] + check_import_features(STRIPPED_SAMPLE_PATH, expected_imports) + + +def test_stripped_elffile_export_features(): + expected_exports = [ + "_ZN7_JNIEnv14GetArrayLengthEP7_jarray", + "Java_o_ac_a", + "Java_o_ac_b", + "_Z6existsPKc", + "_ZN7_JNIEnv17GetStringUTFCharsEP8_jstringPh", + "_ZN7_JNIEnv21GetObjectArrayElementEP13_jobjectArrayi", + "_ZN7_JNIEnv21ReleaseStringUTFCharsEP8_jstringPKc", + ] + check_export_features(STRIPPED_SAMPLE_PATH, expected_exports) + + +def test_elffile_import_features(): + expected_imports = [ + "memfrob", + "puts", + "__libc_start_main", + "malloc", + "__cxa_finalize", + ] + check_import_features(SAMPLE_PATH, expected_imports) + + def test_elffile_export_features(): expected_exports = [ "deregister_tm_clones", @@ -55,17 +95,4 @@ def test_elffile_export_features(): "_IO_stdin_used", "__libc_csu_init", ] - path = Path(SAMPLE_PATH) - elf = ELFFile(io.BytesIO(path.read_bytes())) - # Extract imports - exports = list(extract_file_export_names(elf)) - - # Verify that at least one export was found - assert len(exports) > 0, "No exports were found." - - # Extract the symbol names from the extracted imports - extracted_symbol_names = [exported[0].value for exported in exports] - - # Check if all expected symbol names are found - for symbol_name in expected_exports: - assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports." + check_export_features(SAMPLE_PATH, expected_exports)