From eae66356eb5f45fe27f620c3a5bef11429cd5c6d Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 30 May 2024 16:33:33 -0500 Subject: [PATCH] refactor(source): Fork PathSource into RecursivePathSource These are dramatically different and the shared implementation is getting in the way of #10752. --- src/cargo/ops/resolve.rs | 4 +- src/cargo/sources/git/source.rs | 8 +- src/cargo/sources/mod.rs | 1 + src/cargo/sources/path.rs | 204 ++++++++++++++++++++++++++++++-- 4 files changed, 201 insertions(+), 16 deletions(-) diff --git a/src/cargo/ops/resolve.rs b/src/cargo/ops/resolve.rs index 1b54bbc2745..a1075bdb7fa 100644 --- a/src/cargo/ops/resolve.rs +++ b/src/cargo/ops/resolve.rs @@ -73,7 +73,7 @@ use crate::core::PackageSet; use crate::core::SourceId; use crate::core::Workspace; use crate::ops; -use crate::sources::PathSource; +use crate::sources::RecursivePathSource; use crate::util::cache_lock::CacheLockMode; use crate::util::errors::CargoResult; use crate::util::CanonicalUrl; @@ -453,7 +453,7 @@ pub fn add_overrides<'a>( for (path, definition) in paths { let id = SourceId::for_path(&path)?; - let mut source = PathSource::new_recursive(&path, id, ws.gctx()); + let mut source = RecursivePathSource::new(&path, id, ws.gctx()); source.update().with_context(|| { format!( "failed to update path override `{}` \ diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 8beacdd456d..f38c880f695 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -10,7 +10,7 @@ use crate::sources::source::MaybePackage; use crate::sources::source::QueryKind; use crate::sources::source::Source; use crate::sources::IndexSummary; -use crate::sources::PathSource; +use crate::sources::RecursivePathSource; use crate::util::cache_lock::CacheLockMode; use crate::util::errors::CargoResult; use crate::util::hex::short_hash; @@ -24,7 +24,7 @@ use tracing::trace; use url::Url; /// `GitSource` contains one or more packages gathering from a Git repository. -/// Under the hood it uses [`PathSource`] to discover packages inside the +/// Under the hood it uses [`RecursivePathSource`] to discover packages inside the /// repository. /// /// ## Filesystem layout @@ -79,7 +79,7 @@ pub struct GitSource<'gctx> { /// /// This gets set to `Some` after the git repo has been checked out /// (automatically handled via [`GitSource::block_until_ready`]). - path_source: Option>, + path_source: Option>, /// A short string that uniquely identifies the version of the checkout. /// /// This is typically a 7-character string of the OID hash, automatically @@ -356,7 +356,7 @@ impl<'gctx> Source for GitSource<'gctx> { let source_id = self .source_id .with_git_precise(Some(actual_rev.to_string())); - let path_source = PathSource::new_recursive(&checkout_path, source_id, self.gctx); + let path_source = RecursivePathSource::new(&checkout_path, source_id, self.gctx); self.path_source = Some(path_source); self.short_id = Some(short_id.as_str().into()); diff --git a/src/cargo/sources/mod.rs b/src/cargo/sources/mod.rs index 1d2f51a3718..c487aada522 100644 --- a/src/cargo/sources/mod.rs +++ b/src/cargo/sources/mod.rs @@ -30,6 +30,7 @@ pub use self::config::SourceConfigMap; pub use self::directory::DirectorySource; pub use self::git::GitSource; pub use self::path::PathSource; +pub use self::path::RecursivePathSource; pub use self::registry::{ IndexSummary, RegistrySource, CRATES_IO_DOMAIN, CRATES_IO_INDEX, CRATES_IO_REGISTRY, }; diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index 6a5ce318ca8..c5ed3bec892 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -20,13 +20,9 @@ use ignore::gitignore::GitignoreBuilder; use tracing::{debug, trace, warn}; use walkdir::WalkDir; -/// A source represents one or multiple packages gathering from a given root +/// A source that represents a package gathered at the root /// path on the filesystem. /// -/// It's the cornerstone of every other source --- other implementations -/// eventually need to call `PathSource` to read local packages somewhere on -/// the filesystem. -/// /// It also provides convenient methods like [`PathSource::list_files`] to /// list all files in a package, given its ability to walk the filesystem. pub struct PathSource<'gctx> { @@ -39,7 +35,6 @@ pub struct PathSource<'gctx> { /// Packages that this sources has discovered. packages: Vec, /// Whether this source should discover nested packages recursively. - /// See [`PathSource::new_recursive`] for more. recursive: bool, gctx: &'gctx GlobalContext, } @@ -60,6 +55,191 @@ impl<'gctx> PathSource<'gctx> { } } + /// Preloads a package for this source. The source is assumed that it has + /// yet loaded any other packages. + pub fn preload_with(&mut self, pkg: Package) { + assert!(!self.updated); + assert!(!self.recursive); + assert!(self.packages.is_empty()); + self.updated = true; + self.packages.push(pkg); + } + + /// Gets the package on the root path. + pub fn root_package(&mut self) -> CargoResult { + trace!("root_package; source={:?}", self); + + self.update()?; + + match self.packages.iter().find(|p| p.root() == &*self.path) { + Some(pkg) => Ok(pkg.clone()), + None => Err(internal(format!( + "no package found in source {:?}", + self.path + ))), + } + } + + /// Returns the packages discovered by this source. It may walk the + /// filesystem if package information haven't yet updated. + pub fn read_packages(&self) -> CargoResult> { + if self.updated { + Ok(self.packages.clone()) + } else if self.recursive { + ops::read_packages(&self.path, self.source_id, self.gctx) + } else { + let path = self.path.join("Cargo.toml"); + let pkg = ops::read_package(&path, self.source_id, self.gctx)?; + Ok(vec![pkg]) + } + } + + /// List all files relevant to building this package inside this source. + /// + /// This function will use the appropriate methods to determine the + /// set of files underneath this source's directory which are relevant for + /// building `pkg`. + /// + /// The basic assumption of this method is that all files in the directory + /// are relevant for building this package, but it also contains logic to + /// use other methods like `.gitignore`, `package.include`, or + /// `package.exclude` to filter the list of files. + pub fn list_files(&self, pkg: &Package) -> CargoResult> { + list_files(pkg, self.gctx) + } + + /// Gets the last modified file in a package. + pub fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> { + if !self.updated { + return Err(internal(format!( + "BUG: source `{:?}` was not updated", + self.path + ))); + } + last_modified_file(&self.path, pkg, self.gctx) + } + + /// Returns the root path of this source. + pub fn path(&self) -> &Path { + &self.path + } + + /// Discovers packages inside this source if it hasn't yet done. + pub fn update(&mut self) -> CargoResult<()> { + if !self.updated { + let packages = self.read_packages()?; + self.packages.extend(packages.into_iter()); + self.updated = true; + } + + Ok(()) + } +} + +impl<'gctx> Debug for PathSource<'gctx> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "the paths source") + } +} + +impl<'gctx> Source for PathSource<'gctx> { + fn query( + &mut self, + dep: &Dependency, + kind: QueryKind, + f: &mut dyn FnMut(IndexSummary), + ) -> Poll> { + self.update()?; + for s in self.packages.iter().map(|p| p.summary()) { + let matched = match kind { + QueryKind::Exact => dep.matches(s), + QueryKind::Alternatives => true, + QueryKind::Normalized => dep.matches(s), + }; + if matched { + f(IndexSummary::Candidate(s.clone())) + } + } + Poll::Ready(Ok(())) + } + + fn supports_checksums(&self) -> bool { + false + } + + fn requires_precise(&self) -> bool { + false + } + + fn source_id(&self) -> SourceId { + self.source_id + } + + fn download(&mut self, id: PackageId) -> CargoResult { + trace!("getting packages; id={}", id); + self.update()?; + let pkg = self.packages.iter().find(|pkg| pkg.package_id() == id); + pkg.cloned() + .map(MaybePackage::Ready) + .ok_or_else(|| internal(format!("failed to find {} in path source", id))) + } + + fn finish_download(&mut self, _id: PackageId, _data: Vec) -> CargoResult { + panic!("no download should have started") + } + + fn fingerprint(&self, pkg: &Package) -> CargoResult { + let (max, max_path) = self.last_modified_file(pkg)?; + // Note that we try to strip the prefix of this package to get a + // relative path to ensure that the fingerprint remains consistent + // across entire project directory renames. + let max_path = max_path.strip_prefix(&self.path).unwrap_or(&max_path); + Ok(format!("{} ({})", max, max_path.display())) + } + + fn describe(&self) -> String { + match self.source_id.url().to_file_path() { + Ok(path) => path.display().to_string(), + Err(_) => self.source_id.to_string(), + } + } + + fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {} + + fn is_yanked(&mut self, _pkg: PackageId) -> Poll> { + Poll::Ready(Ok(false)) + } + + fn block_until_ready(&mut self) -> CargoResult<()> { + self.update() + } + + fn invalidate_cache(&mut self) { + // Path source has no local cache. + } + + fn set_quiet(&mut self, _quiet: bool) { + // Path source does not display status + } +} + +/// A source that represents one or multiple packages gathered from a given root +/// path on the filesystem. +pub struct RecursivePathSource<'gctx> { + /// The unique identifier of this source. + source_id: SourceId, + /// The root path of this source. + path: PathBuf, + /// Whether this source has updated all package information it may contain. + updated: bool, + /// Packages that this sources has discovered. + packages: Vec, + /// Whether this source should discover nested packages recursively. + recursive: bool, + gctx: &'gctx GlobalContext, +} + +impl<'gctx> RecursivePathSource<'gctx> { /// Creates a new source which is walked recursively to discover packages. /// /// This is similar to the [`PathSource::new`] method except that instead @@ -68,10 +248,14 @@ impl<'gctx> PathSource<'gctx> { /// /// Note that this should be used with care and likely shouldn't be chosen /// by default! - pub fn new_recursive(root: &Path, id: SourceId, gctx: &'gctx GlobalContext) -> Self { + pub fn new(root: &Path, source_id: SourceId, gctx: &'gctx GlobalContext) -> Self { Self { + source_id, + path: root.to_path_buf(), + updated: false, + packages: Vec::new(), + gctx, recursive: true, - ..Self::new(root, id, gctx) } } @@ -156,13 +340,13 @@ impl<'gctx> PathSource<'gctx> { } } -impl<'gctx> Debug for PathSource<'gctx> { +impl<'gctx> Debug for RecursivePathSource<'gctx> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "the paths source") } } -impl<'gctx> Source for PathSource<'gctx> { +impl<'gctx> Source for RecursivePathSource<'gctx> { fn query( &mut self, dep: &Dependency,