Skip to content

Commit

Permalink
Merge pull request #1623 from Aayush-Goel-04/Aayush-Goel-04/Issue#1534
Browse files Browse the repository at this point in the history
Updated file paths to use pathlib.Path instance
  • Loading branch information
williballenthin committed Jul 12, 2023
2 parents deb6114 + d438b90 commit 90a2fd9
Show file tree
Hide file tree
Showing 39 changed files with 366 additions and 389 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
### Breaking Changes
- Update Metadata type in capa main [#1411](https://github.com/mandiant/capa/issues/1411) [@Aayush-Goel-04](https://github.com/aayush-goel-04) @manasghandat
- Python 3.8 is now the minimum supported Python version #1578 @williballenthin
- Updated file paths to use pathlib.Path for improved path handling and compatibility [#1534](https://github.com/mandiant/capa/issues/1534) [@Aayush-Goel-04](https://github.com/aayush-goel-04)

### New Rules (24)

Expand Down
5 changes: 3 additions & 2 deletions capa/features/extractors/binja/find_binja_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import subprocess
from pathlib import Path

# When the script gets executed as a standalone executable (via PyInstaller), `import binaryninja` does not work because
# we have excluded the binaryninja module in `pyinstaller.spec`. The trick here is to call the system Python and try
Expand All @@ -25,9 +26,9 @@
"""


def find_binja_path() -> str:
def find_binja_path() -> Path:
raw_output = subprocess.check_output(["python", "-c", code]).decode("ascii").strip()
return bytes.fromhex(raw_output).decode("utf8")
return Path(bytes.fromhex(raw_output).decode("utf8"))


if __name__ == "__main__":
Expand Down
5 changes: 3 additions & 2 deletions capa/features/extractors/dnfile/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from __future__ import annotations

from typing import Dict, List, Tuple, Union, Iterator, Optional
from pathlib import Path

import dnfile
from dncil.cil.opcode import OpCodes
Expand Down Expand Up @@ -68,9 +69,9 @@ def get_type(self, token: int) -> Optional[Union[DnType, DnUnmanagedMethod]]:


class DnfileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.pe: dnfile.dnPE = dnfile.dnPE(path)
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))

# pre-compute .NET token lookup tables; each .NET method has access to this cache for feature extraction
# most relevant at instruction scope
Expand Down
7 changes: 4 additions & 3 deletions capa/features/extractors/dnfile_.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from typing import Tuple, Iterator
from pathlib import Path

import dnfile
import pefile
Expand Down Expand Up @@ -74,10 +75,10 @@ def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]


class DnfileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.path: str = path
self.pe: dnfile.dnPE = dnfile.dnPE(path)
self.path: Path = path
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))

def get_base_address(self) -> AbsoluteVirtualAddress:
return AbsoluteVirtualAddress(0x0)
Expand Down
7 changes: 4 additions & 3 deletions capa/features/extractors/dotnetfile.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from typing import Tuple, Iterator
from pathlib import Path

import dnfile
import pefile
Expand Down Expand Up @@ -158,10 +159,10 @@ def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]


class DotnetFileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.path: str = path
self.pe: dnfile.dnPE = dnfile.dnPE(path)
self.path: Path = path
self.pe: dnfile.dnPE = dnfile.dnPE(str(path))

def get_base_address(self):
return NO_ADDRESS
Expand Down
14 changes: 6 additions & 8 deletions capa/features/extractors/elffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import io
import logging
from typing import Tuple, Iterator
from pathlib import Path

from elftools.elf.elffile import ELFFile, SymbolTableSection

Expand Down Expand Up @@ -107,11 +108,10 @@ def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature,


class ElfFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.path = path
with open(self.path, "rb") as f:
self.elf = ELFFile(io.BytesIO(f.read()))
self.path: Path = path
self.elf = ELFFile(io.BytesIO(path.read_bytes()))

def get_base_address(self):
# virtual address of the first segment with type LOAD
Expand All @@ -120,15 +120,13 @@ def get_base_address(self):
return AbsoluteVirtualAddress(segment.header.p_vaddr)

def extract_global_features(self):
with open(self.path, "rb") as f:
buf = f.read()
buf = self.path.read_bytes()

for feature, addr in extract_global_features(self.elf, buf):
yield feature, addr

def extract_file_features(self):
with open(self.path, "rb") as f:
buf = f.read()
buf = self.path.read_bytes()

for feature, addr in extract_file_features(self.elf, buf):
yield feature, addr
Expand Down
13 changes: 6 additions & 7 deletions capa/features/extractors/pefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License.

import logging
from pathlib import Path

import pefile

Expand Down Expand Up @@ -173,23 +174,21 @@ def extract_global_features(pe, buf):


class PefileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
def __init__(self, path: Path):
super().__init__()
self.path = path
self.pe = pefile.PE(path)
self.path: Path = path
self.pe = pefile.PE(str(path))

def get_base_address(self):
return AbsoluteVirtualAddress(self.pe.OPTIONAL_HEADER.ImageBase)

def extract_global_features(self):
with open(self.path, "rb") as f:
buf = f.read()
buf = Path(self.path).read_bytes()

yield from extract_global_features(self.pe, buf)

def extract_file_features(self):
with open(self.path, "rb") as f:
buf = f.read()
buf = Path(self.path).read_bytes()

yield from extract_file_features(self.pe, buf)

Expand Down
6 changes: 3 additions & 3 deletions capa/features/extractors/viv/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License.
import logging
from typing import Any, Dict, List, Tuple, Iterator
from pathlib import Path

import viv_utils
import viv_utils.flirt
Expand All @@ -25,12 +26,11 @@


class VivisectFeatureExtractor(FeatureExtractor):
def __init__(self, vw, path, os):
def __init__(self, vw, path: Path, os):
super().__init__()
self.vw = vw
self.path = path
with open(self.path, "rb") as f:
self.buf = f.read()
self.buf = path.read_bytes()

# pre-compute these because we'll yield them at *every* scope.
self.global_features: List[Tuple[Feature, Address]] = []
Expand Down
4 changes: 2 additions & 2 deletions capa/features/freeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ def load(buf: bytes) -> capa.features.extractors.base_extractor.FeatureExtractor
def main(argv=None):
import sys
import argparse
from pathlib import Path

import capa.main

Expand All @@ -398,8 +399,7 @@ def main(argv=None):

extractor = capa.main.get_extractor(args.sample, args.format, args.os, args.backend, sigpaths, False)

with open(args.output, "wb") as f:
f.write(dump(extractor))
Path(args.output).write_bytes(dump(extractor))

return 0

Expand Down
22 changes: 10 additions & 12 deletions capa/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import os
import inspect
import logging
import contextlib
import importlib.util
from typing import NoReturn
from pathlib import Path

import tqdm

Expand All @@ -32,11 +32,10 @@ def hex(n: int) -> str:
return f"0x{(n):X}"


def get_file_taste(sample_path: str) -> bytes:
if not os.path.exists(sample_path):
def get_file_taste(sample_path: Path) -> bytes:
if not sample_path.exists():
raise IOError(f"sample path {sample_path} does not exist or cannot be accessed")
with open(sample_path, "rb") as f:
taste = f.read(8)
taste = sample_path.open("rb").read(8)
return taste


Expand All @@ -50,15 +49,15 @@ def assert_never(value) -> NoReturn:
assert False, f"Unhandled value: {value} ({type(value).__name__})" # noqa: B011


def get_format_from_extension(sample: str) -> str:
if sample.endswith(EXTENSIONS_SHELLCODE_32):
def get_format_from_extension(sample: Path) -> str:
if sample.name.endswith(EXTENSIONS_SHELLCODE_32):
return FORMAT_SC32
elif sample.endswith(EXTENSIONS_SHELLCODE_64):
elif sample.name.endswith(EXTENSIONS_SHELLCODE_64):
return FORMAT_SC64
return FORMAT_UNKNOWN


def get_auto_format(path: str) -> str:
def get_auto_format(path: Path) -> str:
format_ = get_format(path)
if format_ == FORMAT_UNKNOWN:
format_ = get_format_from_extension(path)
Expand All @@ -67,13 +66,12 @@ def get_auto_format(path: str) -> str:
return format_


def get_format(sample: str) -> str:
def get_format(sample: Path) -> str:
# imported locally to avoid import cycle
from capa.features.extractors.common import extract_format
from capa.features.extractors.dnfile_ import DnfileFeatureExtractor

with open(sample, "rb") as f:
buf = f.read()
buf = sample.read_bytes()

for feature, _ in extract_format(buf):
if feature == Format(FORMAT_PE):
Expand Down
7 changes: 4 additions & 3 deletions capa/ida/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
import logging
import datetime
import contextlib
from typing import Optional
from typing import List, Optional
from pathlib import Path

import idc
import idaapi
Expand Down Expand Up @@ -120,7 +121,7 @@ def get_file_sha256():
return sha256


def collect_metadata(rules):
def collect_metadata(rules: List[Path]):
""" """
md5 = get_file_md5()
sha256 = get_file_sha256()
Expand Down Expand Up @@ -157,7 +158,7 @@ def collect_metadata(rules):
arch=arch,
os=os,
extractor="ida",
rules=rules,
rules=tuple(r.resolve().absolute().as_posix() for r in rules),
base_address=capa.features.freeze.Address.from_capa(idaapi.get_imagebase()),
layout=rdoc.Layout(
functions=(),
Expand Down
Loading

0 comments on commit 90a2fd9

Please sign in to comment.