Skip to content

Commit

Permalink
extract import / export symbols from stripped elf binaries (#2142)
Browse files Browse the repository at this point in the history
  • Loading branch information
ygasparis committed Jun 18, 2024
1 parent 1360e08 commit 1975b64
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 40 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

### Bug Fixes

- elf: extract import / export symbols from stripped binaries #2096 @ygasparis

### capa explorer IDA Pro plugin

### Development
Expand Down
51 changes: 34 additions & 17 deletions capa/features/extractors/elffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from typing import Tuple, Iterator
from pathlib import Path

from elftools.elf.elffile import ELFFile, SymbolTableSection
from elftools.elf.relocation import RelocationSection
from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection

import capa.features.extractors.common
from capa.features.file import Export, Import, Section
Expand Down Expand Up @@ -47,17 +46,37 @@ def extract_file_export_names(elf: ELFFile, **kwargs):

yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)

for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
continue

logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols())

for symbol in segment.iter_symbols():
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
continue
if symbol.entry.st_info.type not in ["STT_FUNC", "STT_OBJECT", "STT_IFUNC"]:
continue
if symbol.entry.st_value == 0:
continue
if symbol.entry.st_shndx == "SHN_UNDEF":
continue

yield Export(symbol.name), AbsoluteVirtualAddress(symbol.entry.st_value)


def extract_file_import_names(elf: ELFFile, **kwargs):
# Create a dictionary to store symbol names by their index
symbol_names = {}

# Extract symbol names and store them in the dictionary
for section in elf.iter_sections():
if not isinstance(section, SymbolTableSection):
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
continue

for _, symbol in enumerate(section.iter_symbols()):
for _, symbol in enumerate(segment.iter_symbols()):
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
if not symbol.name:
Expand All @@ -73,21 +92,19 @@ def extract_file_import_names(elf: ELFFile, **kwargs):

symbol_names[_] = symbol.name

for section in elf.iter_sections():
if not isinstance(section, RelocationSection):
continue

if section["sh_entsize"] == 0:
logger.debug("Symbol table '%s' has a sh_entsize of zero!", section.name)
for segment in elf.iter_segments():
if not isinstance(segment, DynamicSegment):
continue

logger.debug("Symbol table '%s' contains %s entries:", section.name, section.num_relocations())
relocation_tables = segment.get_relocation_tables()
logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables))

for relocation in section.iter_relocations():
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])
for relocation_table in relocation_tables.values():
for relocation in relocation_table.iter_relocations():
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue
yield Import(symbol_names[relocation["r_info_sym"]]), FileOffsetAddress(relocation["r_offset"])


def extract_file_section_names(elf: ELFFile, **kwargs):
Expand Down
73 changes: 50 additions & 23 deletions tests/test_elffile_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,11 @@

CD = Path(__file__).resolve().parent
SAMPLE_PATH = CD / "data" / "055da8e6ccfe5a9380231ea04b850e18.elf_"
STRIPPED_SAMPLE_PATH = CD / "data" / "bb38149ff4b5c95722b83f24ca27a42b.elf_"


def test_elffile_import_features():
expected_imports = [
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
path = Path(SAMPLE_PATH)
def check_import_features(sample_path, expected_imports):
path = Path(sample_path)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
imports = list(extract_file_import_names(elf))
Expand All @@ -40,6 +34,52 @@ def test_elffile_import_features():
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in imports."


def check_export_features(sample_path, expected_exports):
path = Path(sample_path)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))

# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."

# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]

# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."


def test_stripped_elffile_import_features():
expected_imports = ["__cxa_atexit", "__cxa_finalize", "__stack_chk_fail", "fclose", "fopen", "__android_log_print"]
check_import_features(STRIPPED_SAMPLE_PATH, expected_imports)


def test_stripped_elffile_export_features():
expected_exports = [
"_ZN7_JNIEnv14GetArrayLengthEP7_jarray",
"Java_o_ac_a",
"Java_o_ac_b",
"_Z6existsPKc",
"_ZN7_JNIEnv17GetStringUTFCharsEP8_jstringPh",
"_ZN7_JNIEnv21GetObjectArrayElementEP13_jobjectArrayi",
"_ZN7_JNIEnv21ReleaseStringUTFCharsEP8_jstringPKc",
]
check_export_features(STRIPPED_SAMPLE_PATH, expected_exports)


def test_elffile_import_features():
expected_imports = [
"memfrob",
"puts",
"__libc_start_main",
"malloc",
"__cxa_finalize",
]
check_import_features(SAMPLE_PATH, expected_imports)


def test_elffile_export_features():
expected_exports = [
"deregister_tm_clones",
Expand All @@ -55,17 +95,4 @@ def test_elffile_export_features():
"_IO_stdin_used",
"__libc_csu_init",
]
path = Path(SAMPLE_PATH)
elf = ELFFile(io.BytesIO(path.read_bytes()))
# Extract imports
exports = list(extract_file_export_names(elf))

# Verify that at least one export was found
assert len(exports) > 0, "No exports were found."

# Extract the symbol names from the extracted imports
extracted_symbol_names = [exported[0].value for exported in exports]

# Check if all expected symbol names are found
for symbol_name in expected_exports:
assert symbol_name in extracted_symbol_names, f"Symbol '{symbol_name}' not found in exports."
check_export_features(SAMPLE_PATH, expected_exports)

0 comments on commit 1975b64

Please sign in to comment.