Skip to content

Commit

Permalink
perf: cache alias mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
nedbat committed May 28, 2024
1 parent 390cb97 commit c45ebac
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 30 deletions.
2 changes: 1 addition & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ upgrading your version of coverage.py.
Unreleased
----------

Nothing yet.
- Performance improvement for combining data files.


.. scriv-start-here
Expand Down
2 changes: 1 addition & 1 deletion coverage/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,7 @@ def _prepare_data_for_reporting(self) -> None:
if self.config.paths:
mapped_data = CoverageData(warn=self._warn, debug=self._debug, no_disk=True)
if self._data is not None:
mapped_data.update(self._data, aliases=self._make_aliases())
mapped_data.update(self._data, map_path=self._make_aliases().map)
self._data = mapped_data

def report(
Expand Down
8 changes: 7 additions & 1 deletion coverage/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from __future__ import annotations

import functools
import glob
import hashlib
import os.path
Expand Down Expand Up @@ -134,6 +135,11 @@ def combine_parallel_data(
if strict and not files_to_combine:
raise NoDataError("No data to combine")

if aliases is None:
map_path = None
else:
map_path = functools.lru_cache(maxsize=None)(aliases.map)

file_hashes = set()
combined_any = False

Expand Down Expand Up @@ -176,7 +182,7 @@ def combine_parallel_data(
message(f"Couldn't combine data file {rel_file_name}: {exc}")
delete_this_one = False
else:
data.update(new_data, aliases=aliases)
data.update(new_data, map_path=map_path)
combined_any = True
if message:
message(f"Combined data file {rel_file_name}")
Expand Down
56 changes: 29 additions & 27 deletions coverage/sqldata.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@
import zlib

from typing import (
cast, Any, Collection, Mapping,
cast, Any, Callable, Collection, Mapping,
Sequence,
)

from coverage.debug import NoDebugging, auto_repr
from coverage.exceptions import CoverageException, DataError
from coverage.files import PathAliases
from coverage.misc import file_be_gone, isolate_module
from coverage.numbits import numbits_to_nums, numbits_union, nums_to_numbits
from coverage.sqlitedb import SqliteDb
Expand Down Expand Up @@ -647,12 +646,16 @@ def purge_files(self, filenames: Collection[str]) -> None:
continue
con.execute_void(sql, (file_id,))

def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -> None:
"""Update this data with data from several other :class:`CoverageData` instances.
def update(
self,
other_data: CoverageData,
map_path: Callable[[str], str] | None = None,
) -> None:
"""Update this data with data from another :class:`CoverageData`.
If `aliases` is provided, it's a `PathAliases` object that is used to
re-map paths to match the local machine's. Note: `aliases` is None
only when called directly from the test suite.
If `map_path` is provided, it's a function that re-map paths to match
the local machine's. Note: `map_path` is None only when called
directly from the test suite.
"""
if self._debug.should("dataop"):
Expand All @@ -664,7 +667,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
if self._has_arcs and other_data._has_lines:
raise DataError("Can't combine line data with arc data")

aliases = aliases or PathAliases()
map_path = map_path or (lambda p: p)

# Force the database we're writing to to exist before we start nesting contexts.
self._start_using()
Expand All @@ -674,7 +677,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
with other_data._connect() as con:
# Get files data.
with con.execute("select path from file") as cur:
files = {path: aliases.map(path) for (path,) in cur}
files = {path: map_path(path) for (path,) in cur}

# Get contexts data.
with con.execute("select context from context") as cur:
Expand Down Expand Up @@ -729,7 +732,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
"inner join file on file.id = tracer.file_id",
) as cur:
this_tracers.update({
aliases.map(path): tracer
map_path(path): tracer
for path, tracer in cur
})

Expand Down Expand Up @@ -768,7 +771,21 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
# and context strings with integer ids. Then use the efficient
# `executemany()` to insert all rows at once.

# Get line data.
if arcs:
self._choose_lines_or_arcs(arcs=True)

arc_rows = (
(file_ids[file], context_ids[context], fromno, tono)
for file, context, fromno, tono in arcs
)

# Write the combined data.
con.executemany_void(
"insert or ignore into arc " +
"(file_id, context_id, fromno, tono) values (?, ?, ?, ?)",
arc_rows,
)

if lines:
self._choose_lines_or_arcs(lines=True)

Expand All @@ -779,7 +796,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
"inner join context on context.id = line_bits.context_id",
) as cur:
for path, context, numbits in cur:
key = (aliases.map(path), context)
key = (aliases_map(path), context)
if key in lines:
lines[key] = numbits_union(lines[key], numbits)

Expand All @@ -792,21 +809,6 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
],
)

if arcs:
self._choose_lines_or_arcs(arcs=True)

arc_rows = (
(file_ids[file], context_ids[context], fromno, tono)
for file, context, fromno, tono in arcs
)

# Write the combined data.
con.executemany_void(
"insert or ignore into arc " +
"(file_id, context_id, fromno, tono) values (?, ?, ?, ?)",
arc_rows,
)

con.executemany_void(
"insert or ignore into tracer (file_id, tracer) values (?, ?)",
((file_ids[filename], tracer) for filename, tracer in tracer_map.items()),
Expand Down

0 comments on commit c45ebac

Please sign in to comment.