diff --git a/dvc/commands/add.py b/dvc/commands/add.py index 38146b30e1..471fea8716 100644 --- a/dvc/commands/add.py +++ b/dvc/commands/add.py @@ -52,6 +52,7 @@ def run(self): remote_jobs=self.args.remote_jobs, force=self.args.force, relink=self.args.relink, + skip_graph_checks=self.args.skip_graph_checks, ) except FileNotFoundError: logger.exception("") @@ -127,6 +128,16 @@ def add_parser(subparsers, parent_parser): help="Don't recreate links from cache to workspace.", ) parser.set_defaults(relink=True) + parser.add_argument( + # Do we want a short code here? + "--skip-graph-checks", + action="store_true", + help=( + "Can speed up simple add operations by avoiding graph checks. " + "Warning: partial or virtual will not work when enabled." + ), + ) + parser.add_argument( "targets", nargs="+", help="Input files/directories to add." ).complete = completion.FILE diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 8d67614e0a..54c3352209 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -289,6 +289,10 @@ def check_graph( if callable(callback): callback() new.check_graph() + else: + logger.warning( + "partial or virtual add does not work when --skip-graph-checks are enabled" + ) @staticmethod def open(url: Optional[str], *args, **kwargs) -> "Repo": diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 7cbacad793..871adcd008 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -17,6 +17,7 @@ from . import locked if TYPE_CHECKING: + from dvc.output import Output from dvc.repo import Repo from dvc.stage import Stage from dvc.types import StrOrBytesPath @@ -50,11 +51,39 @@ def get_or_create_stage( to_remote: bool = False, force: bool = False, ) -> StageInfo: + """ + Adds a new tracked file or update an existing one. + + Used in the context of dvc-add. + + Args: + target : an expression that resolves to a ... + out : if specified, what does this to? + to_remote : if True, what does this to? + force : what does this to? + """ + + import xdev + + xdev.embed() + if out: target = resolve_output(target, out, force=force) path, wdir, out = resolve_paths(repo, target, always_local=to_remote and not out) try: + # How best to disable this line? With Skip Graph Checks Flag? + # repo._skip_graph_checks = True + if getattr(repo, "_skip_graph_checks", False): + print( + "WARNING: partial or virtual add does not work when --skip-graph-checks are enabled" + ) + # FIXME: this probably is not the correct implementation. when + # skip_graph_checks is enabled, we just want to avoid touching the + # graph. The output might already exist and need to be updated. + raise OutputNotFoundError(path) + + out_obj: Output (out_obj,) = repo.find_outs_by_path(target, strict=False) stage = out_obj.stage if not stage.is_data_source: @@ -187,6 +216,31 @@ def _add( stage.dump() +class _contextual_setattr: + """ + Sets an attribute on an object within the context and then restores it. + """ + + def __init__(self, obj, attr_name, attr_value): + self.obj = obj + self.attr_name = attr_name + self.attr_value = attr_value + self._prev_value = None + self._had_prev_value = None + + def __enter__(self): + self._had_prev_value = hasattr(self.obj, self.attr_name) + if self._had_prev_value: + self._prev_value = getattr(self.obj, self.attr_name) + setattr(self.obj, self.attr_name, self.attr_value) + + def __exit__(self, ex_type, ex_value, ex_traceback): + if self._had_prev_value: + setattr(self.obj, self.attr_name, self._prev_value) + else: + delattr(self.obj, self.attr_name) + + @locked @scm_context def add( @@ -200,26 +254,36 @@ def add( remote_jobs: Optional[int] = None, force: bool = False, relink: bool = True, + skip_graph_checks: bool = False, ) -> list["Stage"]: add_targets = find_targets(targets, glob=glob) if not add_targets: return [] - stages_with_targets = { - target: get_or_create_stage( - repo, - target, - out=out, - to_remote=to_remote, - force=force, - ) - for target in add_targets - } + print("ABOUT TO GET OR CREATE STAGE") + attr_context = _contextual_setattr(repo, "_skip_graph_checks", skip_graph_checks) + with attr_context: + stages_with_targets = { + target: get_or_create_stage( + repo, + target, + out=out, + to_remote=to_remote, + force=force, + ) + for target in add_targets + } + print(f"stages_with_targets={stages_with_targets}") + print("FINISHED GET OR CREATE STAGE") + attr_context = _contextual_setattr(repo, "_skip_graph_checks", skip_graph_checks) stages = [stage for stage, _ in stages_with_targets.values()] msg = "Collecting stages from the workspace" - with translate_graph_error(stages), ui.status(msg) as st: + print("ABOUT TO ENTER CHECK THE GRAPH CONTEXT") + with attr_context, translate_graph_error(stages), ui.status(msg) as st: + print("ABOUT TO CHECK THE GRAPH") repo.check_graph(stages=stages, callback=lambda: st.update("Checking graph")) + print("FINISHED CHECK THE GRAPH CONTEXT") if to_remote or out: assert len(stages_with_targets) == 1, "multiple targets are unsupported" diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py index b9deed4646..730beb1073 100644 --- a/dvc/stage/__init__.py +++ b/dvc/stage/__init__.py @@ -144,6 +144,13 @@ def __init__( # noqa: PLR0913 desc: Optional[str] = None, meta=None, ): + """ + A stage represents a dvc file? + + Attributes: + path (str): the absolute path to a .dvc file + outs (List[Output]): the "outs" associated with this .dvc file + """ if deps is None: deps = [] if outs is None: diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index a43d11ac79..e95340d7d5 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -267,6 +267,15 @@ def resolve_output(inp: str, out: Optional[str], force=False) -> str: def resolve_paths(repo, out, always_local=False): + """ + Get the path to a DVC file that corresponds to a specific "out" in the repo. + + Returns: + Tuple[str, std, str]: + path - the path to the .dvc file + wdir - the directory containing the .dvc file + out - the name of the tracked file relative to wdir. + """ from urllib.parse import urlparse from dvc.dvcfile import DVC_FILE_SUFFIX