Skip to content

Commit

Permalink
ELF: better handle corrupt files (#2227)
Browse files Browse the repository at this point in the history
such as when there's a missing symbol table and invalid relocation table.
and then handle when Viv fails to load a workspace.

closes #2226
  • Loading branch information
williballenthin committed Jul 24, 2024
1 parent fd686ac commit e2e84f7
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
### Bug Fixes

- elf: extract import / export symbols from stripped binaries #2096 @ygasparis
- elf: fix handling of symbols in corrupt ELF files #2226 @williballenthin

### capa explorer IDA Pro plugin

Expand Down
23 changes: 21 additions & 2 deletions capa/features/extractors/elffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@ def extract_file_export_names(elf: ELFFile, **kwargs):
if not isinstance(segment, DynamicSegment):
continue

logger.debug("Dynamic Segment contains %s symbols: ", segment.num_symbols())
tab_ptr, tab_offset = segment.get_table_offset("DT_SYMTAB")
if tab_ptr is None or tab_offset is None:
logger.debug("Dynamic segment doesn't contain DT_SYMTAB")
continue

logger.debug("Dynamic segment contains %s symbols: ", segment.num_symbols())

for symbol in segment.iter_symbols():
# The following conditions are based on the following article
Expand All @@ -76,6 +81,11 @@ def extract_file_import_names(elf: ELFFile, **kwargs):
if not isinstance(segment, DynamicSegment):
continue

tab_ptr, tab_offset = segment.get_table_offset("DT_SYMTAB")
if tab_ptr is None or tab_offset is None:
logger.debug("Dynamic segment doesn't contain DT_SYMTAB")
continue

for _, symbol in enumerate(segment.iter_symbols()):
# The following conditions are based on the following article
# http://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
Expand All @@ -100,7 +110,16 @@ def extract_file_import_names(elf: ELFFile, **kwargs):
logger.debug("Dynamic Segment contains %s relocation tables:", len(relocation_tables))

for relocation_table in relocation_tables.values():
for relocation in relocation_table.iter_relocations():
relocations = []
for i in range(relocation_table.num_relocations()):
try:
relocations.append(relocation_table.get_relocation(i))
except TypeError:
# ELF is corrupt and the relocation table is invalid,
# so stop processing it.
break

for relocation in relocations:
# Extract the symbol name from the symbol table using the symbol index in the relocation
if relocation["r_info_sym"] not in symbol_names:
continue
Expand Down
39 changes: 25 additions & 14 deletions capa/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@
BACKEND_FREEZE = "freeze"


class CorruptFile(ValueError):
pass


def is_supported_format(sample: Path) -> bool:
"""
Return if this is a supported file based on magic header values
Expand Down Expand Up @@ -137,21 +141,28 @@ def get_workspace(path: Path, input_format: str, sigpaths: List[Path]):
import viv_utils.flirt

logger.debug("generating vivisect workspace for: %s", path)
if input_format == FORMAT_AUTO:
if not is_supported_format(path):
raise UnsupportedFormatError()

# don't analyze, so that we can add our Flirt function analyzer first.
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
elif input_format in {FORMAT_PE, FORMAT_ELF}:
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
elif input_format == FORMAT_SC32:
# these are not analyzed nor saved.
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="i386", analyze=False)
elif input_format == FORMAT_SC64:
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False)
else:
raise ValueError("unexpected format: " + input_format)
try:
if input_format == FORMAT_AUTO:
if not is_supported_format(path):
raise UnsupportedFormatError()

# don't analyze, so that we can add our Flirt function analyzer first.
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
elif input_format in {FORMAT_PE, FORMAT_ELF}:
vw = viv_utils.getWorkspace(str(path), analyze=False, should_save=False)
elif input_format == FORMAT_SC32:
# these are not analyzed nor saved.
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="i386", analyze=False)
elif input_format == FORMAT_SC64:
vw = viv_utils.getShellcodeWorkspaceFromFile(str(path), arch="amd64", analyze=False)
else:
raise ValueError("unexpected format: " + input_format)
except Exception as e:
# vivisect raises raw Exception instances, and we don't want
# to do a subclass check via isinstance.
if type(e) is Exception and "Couldn't convert rva" in e.args[0]:
raise CorruptFile(e.args[0]) from e

viv_utils.flirt.register_flirt_signature_analyzers(vw, [str(s) for s in sigpaths])

Expand Down
3 changes: 3 additions & 0 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,9 @@ def get_extractor_from_cli(args, input_format: str, backend: str) -> FeatureExtr
except UnsupportedOSError as e:
log_unsupported_os_error()
raise ShouldExitError(E_INVALID_FILE_OS) from e
except capa.loader.CorruptFile as e:
logger.error("Input file '%s' is not a valid file: %s", args.input_file, str(e))
raise ShouldExitError(E_CORRUPT_FILE) from e


def main(argv: Optional[List[str]] = None):
Expand Down

0 comments on commit e2e84f7

Please sign in to comment.