From c45ebac1b83656bfdd92d6675f08fa33627785e6 Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Mon, 27 May 2024 17:52:33 -0400 Subject: [PATCH] perf: cache alias mapping --- CHANGES.rst | 2 +- coverage/control.py | 2 +- coverage/data.py | 8 ++++++- coverage/sqldata.py | 56 +++++++++++++++++++++++---------------------- 4 files changed, 38 insertions(+), 30 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 6da222b4d..20486a560 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -23,7 +23,7 @@ upgrading your version of coverage.py. Unreleased ---------- -Nothing yet. +- Performance improvement for combining data files. .. scriv-start-here diff --git a/coverage/control.py b/coverage/control.py index dbca2013d..614dd8d49 100644 --- a/coverage/control.py +++ b/coverage/control.py @@ -998,7 +998,7 @@ def _prepare_data_for_reporting(self) -> None: if self.config.paths: mapped_data = CoverageData(warn=self._warn, debug=self._debug, no_disk=True) if self._data is not None: - mapped_data.update(self._data, aliases=self._make_aliases()) + mapped_data.update(self._data, map_path=self._make_aliases().map) self._data = mapped_data def report( diff --git a/coverage/data.py b/coverage/data.py index 9513adfca..1252e4147 100644 --- a/coverage/data.py +++ b/coverage/data.py @@ -12,6 +12,7 @@ from __future__ import annotations +import functools import glob import hashlib import os.path @@ -134,6 +135,11 @@ def combine_parallel_data( if strict and not files_to_combine: raise NoDataError("No data to combine") + if aliases is None: + map_path = None + else: + map_path = functools.lru_cache(maxsize=None)(aliases.map) + file_hashes = set() combined_any = False @@ -176,7 +182,7 @@ def combine_parallel_data( message(f"Couldn't combine data file {rel_file_name}: {exc}") delete_this_one = False else: - data.update(new_data, aliases=aliases) + data.update(new_data, map_path=map_path) combined_any = True if message: message(f"Combined data file {rel_file_name}") diff --git a/coverage/sqldata.py b/coverage/sqldata.py index 66613a4d2..5dd29f67c 100644 --- a/coverage/sqldata.py +++ b/coverage/sqldata.py @@ -21,13 +21,12 @@ import zlib from typing import ( - cast, Any, Collection, Mapping, + cast, Any, Callable, Collection, Mapping, Sequence, ) from coverage.debug import NoDebugging, auto_repr from coverage.exceptions import CoverageException, DataError -from coverage.files import PathAliases from coverage.misc import file_be_gone, isolate_module from coverage.numbits import numbits_to_nums, numbits_union, nums_to_numbits from coverage.sqlitedb import SqliteDb @@ -647,12 +646,16 @@ def purge_files(self, filenames: Collection[str]) -> None: continue con.execute_void(sql, (file_id,)) - def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -> None: - """Update this data with data from several other :class:`CoverageData` instances. + def update( + self, + other_data: CoverageData, + map_path: Callable[[str], str] | None = None, + ) -> None: + """Update this data with data from another :class:`CoverageData`. - If `aliases` is provided, it's a `PathAliases` object that is used to - re-map paths to match the local machine's. Note: `aliases` is None - only when called directly from the test suite. + If `map_path` is provided, it's a function that re-map paths to match + the local machine's. Note: `map_path` is None only when called + directly from the test suite. """ if self._debug.should("dataop"): @@ -664,7 +667,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) - if self._has_arcs and other_data._has_lines: raise DataError("Can't combine line data with arc data") - aliases = aliases or PathAliases() + map_path = map_path or (lambda p: p) # Force the database we're writing to to exist before we start nesting contexts. self._start_using() @@ -674,7 +677,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) - with other_data._connect() as con: # Get files data. with con.execute("select path from file") as cur: - files = {path: aliases.map(path) for (path,) in cur} + files = {path: map_path(path) for (path,) in cur} # Get contexts data. with con.execute("select context from context") as cur: @@ -729,7 +732,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) - "inner join file on file.id = tracer.file_id", ) as cur: this_tracers.update({ - aliases.map(path): tracer + map_path(path): tracer for path, tracer in cur }) @@ -768,7 +771,21 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) - # and context strings with integer ids. Then use the efficient # `executemany()` to insert all rows at once. - # Get line data. + if arcs: + self._choose_lines_or_arcs(arcs=True) + + arc_rows = ( + (file_ids[file], context_ids[context], fromno, tono) + for file, context, fromno, tono in arcs + ) + + # Write the combined data. + con.executemany_void( + "insert or ignore into arc " + + "(file_id, context_id, fromno, tono) values (?, ?, ?, ?)", + arc_rows, + ) + if lines: self._choose_lines_or_arcs(lines=True) @@ -779,7 +796,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) - "inner join context on context.id = line_bits.context_id", ) as cur: for path, context, numbits in cur: - key = (aliases.map(path), context) + key = (aliases_map(path), context) if key in lines: lines[key] = numbits_union(lines[key], numbits) @@ -792,21 +809,6 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) - ], ) - if arcs: - self._choose_lines_or_arcs(arcs=True) - - arc_rows = ( - (file_ids[file], context_ids[context], fromno, tono) - for file, context, fromno, tono in arcs - ) - - # Write the combined data. - con.executemany_void( - "insert or ignore into arc " + - "(file_id, context_id, fromno, tono) values (?, ?, ?, ?)", - arc_rows, - ) - con.executemany_void( "insert or ignore into tracer (file_id, tracer) values (?, ?)", ((file_ids[filename], tracer) for filename, tracer in tracer_map.items()),