From 287fcec30354ef2c620321c3f8ea717d4feb3be5 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Fri, 12 Jul 2024 20:47:35 +0100 Subject: [PATCH 01/17] [red-knot] Add support for editable installs to the module resolver --- crates/red_knot_module_resolver/src/path.rs | 75 ++++- .../red_knot_module_resolver/src/resolver.rs | 306 ++++++++++++++++-- .../red_knot_module_resolver/src/testing.rs | 20 ++ 3 files changed, 367 insertions(+), 34 deletions(-) diff --git a/crates/red_knot_module_resolver/src/path.rs b/crates/red_knot_module_resolver/src/path.rs index e529d32bc5095..4fc2f3554d469 100644 --- a/crates/red_knot_module_resolver/src/path.rs +++ b/crates/red_knot_module_resolver/src/path.rs @@ -73,6 +73,7 @@ enum ModuleResolutionPathBufInner { FirstParty(SystemPathBuf), StandardLibrary(FilePath), SitePackages(SystemPathBuf), + EditableInstall(SystemPathBuf), } impl ModuleResolutionPathBufInner { @@ -134,6 +135,19 @@ impl ModuleResolutionPathBufInner { ); path.push(component); } + Self::EditableInstall(ref mut path) => { + if let Some(extension) = extension { + assert!( + matches!(extension, "pyi" | "py"), + "Extension must be `py` or `pyi`; got `{extension}`" + ); + } + assert!( + path.extension().is_none(), + "Cannot push part {component} to {path}, which already has an extension" + ); + path.push(component); + } } } } @@ -197,6 +211,18 @@ impl ModuleResolutionPathBuf { .then_some(Self(ModuleResolutionPathBufInner::SitePackages(path))) } + #[must_use] + pub(crate) fn editable_installation_root( + db: &dyn Db, + path: impl Into, + ) -> Option { + let path = path.into(); + // TODO: Add Salsa invalidation to this system call: + db.system() + .is_directory(&path) + .then_some(Self(ModuleResolutionPathBufInner::EditableInstall(path))) + } + #[must_use] pub(crate) fn is_regular_package(&self, search_path: &Self, resolver: &ResolverState) -> bool { ModuleResolutionPathRef::from(self).is_regular_package(search_path, resolver) @@ -229,6 +255,16 @@ impl ModuleResolutionPathBuf { pub(crate) fn to_file(&self, search_path: &Self, resolver: &ResolverState) -> Option { ModuleResolutionPathRef::from(self).to_file(search_path, resolver) } + + pub(crate) fn as_system_path(&self) -> Option<&SystemPath> { + match &self.0 { + ModuleResolutionPathBufInner::Extra(path) => Some(path), + ModuleResolutionPathBufInner::FirstParty(path) => Some(path), + ModuleResolutionPathBufInner::StandardLibrary(_) => None, + ModuleResolutionPathBufInner::SitePackages(path) => Some(path), + ModuleResolutionPathBufInner::EditableInstall(path) => Some(path), + } + } } impl fmt::Debug for ModuleResolutionPathBuf { @@ -250,6 +286,10 @@ impl fmt::Debug for ModuleResolutionPathBuf { .debug_tuple("ModuleResolutionPathBuf::StandardLibrary") .field(path) .finish(), + ModuleResolutionPathBufInner::EditableInstall(path) => f + .debug_tuple("ModuleResolutionPathBuf::EditableInstall") + .field(path) + .finish(), } } } @@ -272,6 +312,7 @@ enum ModuleResolutionPathRefInner<'a> { FirstParty(&'a SystemPath), StandardLibrary(FilePathRef<'a>), SitePackages(&'a SystemPath), + EditableInstall(&'a SystemPath), } impl<'a> ModuleResolutionPathRefInner<'a> { @@ -306,6 +347,7 @@ impl<'a> ModuleResolutionPathRefInner<'a> { (Self::Extra(path), Self::Extra(_)) => resolver.system().is_directory(path), (Self::FirstParty(path), Self::FirstParty(_)) => resolver.system().is_directory(path), (Self::SitePackages(path), Self::SitePackages(_)) => resolver.system().is_directory(path), + (Self::EditableInstall(path), Self::EditableInstall(_)) => resolver.system().is_directory(path), (Self::StandardLibrary(path), Self::StandardLibrary(stdlib_root)) => { match Self::query_stdlib_version(path, search_path, &stdlib_root, resolver) { TypeshedVersionsQueryResult::DoesNotExist => false, @@ -333,6 +375,7 @@ impl<'a> ModuleResolutionPathRefInner<'a> { (Self::Extra(path), Self::Extra(_)) => is_non_stdlib_pkg(resolver, path), (Self::FirstParty(path), Self::FirstParty(_)) => is_non_stdlib_pkg(resolver, path), (Self::SitePackages(path), Self::SitePackages(_)) => is_non_stdlib_pkg(resolver, path), + (Self::EditableInstall(path), Self::EditableInstall(_)) => is_non_stdlib_pkg(resolver, path), // Unlike the other variants: // (1) Account for VERSIONS // (2) Only test for `__init__.pyi`, not `__init__.py` @@ -358,6 +401,7 @@ impl<'a> ModuleResolutionPathRefInner<'a> { (Self::SitePackages(path), Self::SitePackages(_)) => { system_path_to_file(resolver.db.upcast(), path) } + (Self::EditableInstall(path), Self::EditableInstall(_)) => system_path_to_file(resolver.db.upcast(), path), (Self::StandardLibrary(path), Self::StandardLibrary(stdlib_root)) => { match Self::query_stdlib_version(&path, search_path, &stdlib_root, resolver) { TypeshedVersionsQueryResult::DoesNotExist => None, @@ -374,7 +418,10 @@ impl<'a> ModuleResolutionPathRefInner<'a> { #[must_use] fn to_module_name(self) -> Option { match self { - Self::Extra(path) | Self::FirstParty(path) | Self::SitePackages(path) => { + Self::Extra(path) + | Self::FirstParty(path) + | Self::SitePackages(path) + | Self::EditableInstall(path) => { let parent = path.parent()?; let parent_components = parent.components().map(|component| component.as_str()); let skip_final_part = @@ -421,6 +468,9 @@ impl<'a> ModuleResolutionPathRefInner<'a> { Self::SitePackages(path) => { ModuleResolutionPathBufInner::SitePackages(path.with_extension("pyi")) } + Self::EditableInstall(path) => { + ModuleResolutionPathBufInner::EditableInstall(path.with_extension("pyi")) + } } } @@ -437,6 +487,9 @@ impl<'a> ModuleResolutionPathRefInner<'a> { Self::SitePackages(path) => Some(ModuleResolutionPathBufInner::SitePackages( path.with_extension("py"), )), + Self::EditableInstall(path) => Some(ModuleResolutionPathBufInner::EditableInstall( + path.with_extension("py"), + )), } } @@ -474,6 +527,13 @@ impl<'a> ModuleResolutionPathRefInner<'a> { .then_some(Self::SitePackages(path)) }) } + (Self::EditableInstall(root), FilePathRef::System(absolute_path)) => { + absolute_path.strip_prefix(root).ok().and_then(|path| { + path.extension() + .map_or(true, |ext| matches!(ext, "pyi" | "py")) + .then_some(Self::EditableInstall(path)) + }) + } (Self::Extra(_), FilePathRef::Vendored(_)) => None, (Self::FirstParty(_), FilePathRef::Vendored(_)) => None, (Self::StandardLibrary(root), FilePathRef::Vendored(absolute_path)) => match root { @@ -487,6 +547,7 @@ impl<'a> ModuleResolutionPathRefInner<'a> { } }, (Self::SitePackages(_), FilePathRef::Vendored(_)) => None, + (Self::EditableInstall(_), FilePathRef::Vendored(_)) => None, } } } @@ -562,6 +623,10 @@ impl fmt::Debug for ModuleResolutionPathRef<'_> { .debug_tuple("ModuleResolutionPathRef::StandardLibrary") .field(path) .finish(), + ModuleResolutionPathRefInner::EditableInstall(path) => f + .debug_tuple("ModuleResolutionPathRef::EditableInstall") + .field(path) + .finish(), } } } @@ -582,6 +647,9 @@ impl<'a> From<&'a ModuleResolutionPathBuf> for ModuleResolutionPathRef<'a> { ModuleResolutionPathBufInner::SitePackages(path) => { ModuleResolutionPathRefInner::SitePackages(path) } + ModuleResolutionPathBufInner::EditableInstall(path) => { + ModuleResolutionPathRefInner::EditableInstall(path) + } }; ModuleResolutionPathRef(inner) } @@ -593,6 +661,7 @@ impl PartialEq for ModuleResolutionPathRef<'_> { ModuleResolutionPathRefInner::Extra(path) => path == other, ModuleResolutionPathRefInner::FirstParty(path) => path == other, ModuleResolutionPathRefInner::SitePackages(path) => path == other, + ModuleResolutionPathRefInner::EditableInstall(path) => path == other, ModuleResolutionPathRefInner::StandardLibrary(FilePathRef::System(path)) => { path == other } @@ -625,6 +694,7 @@ impl PartialEq for ModuleResolutionPathRef<'_> { ModuleResolutionPathRefInner::Extra(_) => false, ModuleResolutionPathRefInner::FirstParty(_) => false, ModuleResolutionPathRefInner::SitePackages(_) => false, + ModuleResolutionPathRefInner::EditableInstall(_) => false, ModuleResolutionPathRefInner::StandardLibrary(FilePathRef::System(_)) => false, ModuleResolutionPathRefInner::StandardLibrary(FilePathRef::Vendored(path)) => { path == other @@ -707,6 +777,9 @@ mod tests { ModuleResolutionPathRefInner::SitePackages(path) => { ModuleResolutionPathBufInner::SitePackages(path.to_path_buf()) } + ModuleResolutionPathRefInner::EditableInstall(path) => { + ModuleResolutionPathBufInner::EditableInstall(path.to_path_buf()) + } }; ModuleResolutionPathBuf(inner) } diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 71b80787115a7..73f15844a7415 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -1,8 +1,8 @@ -use std::ops::Deref; use std::sync::Arc; -use ruff_db::files::{File, FilePath}; -use ruff_db::system::SystemPathBuf; +use ruff_db::files::{system_path_to_file, File, FilePath}; +use ruff_db::source::{source_text, SourceText}; +use ruff_db::system::{DirectoryEntry, SystemPath, SystemPathBuf}; use crate::db::Db; use crate::module::{Module, ModuleKind}; @@ -82,12 +82,14 @@ pub(crate) fn file_to_module(db: &dyn Db, file: File) -> Option { let resolver_settings = module_resolver_settings(db); - let relative_path = resolver_settings - .search_paths() - .iter() - .find_map(|root| root.relativize_path(path))?; + let mut search_paths = resolver_settings.search_paths(db).into_iter(); - let module_name = relative_path.to_module_name()?; + let module_name = loop { + let candidate = search_paths.next()?; + if let Some(relative_path) = candidate.relativize_path(path) { + break relative_path.to_module_name()?; + } + }; // Resolve the module name to see if Python would resolve the name to the same path. // If it doesn't, then that means that multiple modules have the same name in different @@ -152,54 +154,183 @@ impl RawModuleResolutionSettings { custom_typeshed, } = self; - let mut paths: Vec = extra_paths + let extra_paths: Vec> = extra_paths .into_iter() - .map(|fs_path| ModuleResolutionPathBuf::extra(fs_path).unwrap()) + .map(|fs_path| Arc::new(ModuleResolutionPathBuf::extra(fs_path).unwrap())) .collect(); - paths.push(ModuleResolutionPathBuf::first_party(workspace_root).unwrap()); + let workspace_root = + Arc::new(ModuleResolutionPathBuf::first_party(workspace_root).unwrap()); - paths.push( + let stdlib = Arc::new( custom_typeshed.map_or_else(ModuleResolutionPathBuf::vendored_stdlib, |custom| { ModuleResolutionPathBuf::stdlib_from_custom_typeshed_root(&custom).unwrap() }), ); // TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step - if let Some(site_packages) = site_packages { - paths.push(ModuleResolutionPathBuf::site_packages(site_packages).unwrap()); - } + let site_packages = site_packages + .map(|path| Arc::new(ModuleResolutionPathBuf::site_packages(path).unwrap())); ModuleResolutionSettings { target_version, - search_paths: OrderedSearchPaths(paths.into_iter().map(Arc::new).collect()), + search_path_settings: ValidatedSearchPathSettings { + extra_paths, + workspace_root, + stdlib, + site_packages, + }, } } } -/// A resolved module resolution order as per the [typing spec] -/// -/// [typing spec]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub(crate) struct OrderedSearchPaths(Vec>); +#[derive(Debug, PartialEq, Eq, Clone)] +pub(crate) struct ValidatedSearchPathSettings { + extra_paths: Vec>, + workspace_root: Arc, + stdlib: Arc, + site_packages: Option>, +} + +impl ValidatedSearchPathSettings { + fn search_paths(&self, db: &dyn Db) -> Vec> { + let ValidatedSearchPathSettings { + extra_paths, + workspace_root, + stdlib, + site_packages, + } = self; + let mut search_paths: Vec> = extra_paths + .iter() + .cloned() + .chain([workspace_root, stdlib].into_iter().cloned()) + .collect(); + if let Some(site_packages) = site_packages { + search_paths.push(Arc::clone(site_packages)); + let site_packages = site_packages + .as_system_path() + .expect("Expected site-packages never to be a VendoredPath!"); + let Ok(pth_file_iterator) = PthFileIterator::new(db, site_packages) else { + return search_paths; + }; + + // The Python documentation specifies that `.pth` files in `site-packages` + // are processed in alphabetical order. + // We know that files returned from [`ruff_db::system::MemoryFileSystem::read_directory()`] + // will be in alphabetical order already. However, [`ruff_db::system::OsSystem::read_directory()`] + // gives no such guarantees. Therefore, collecting and then sorting is necessary. + // https://docs.python.org/3/library/site.html#module-site + let mut all_pth_files: Vec = pth_file_iterator.collect(); + all_pth_files.sort_by(|a, b| a.path.cmp(&b.path)); + + for pth_file in &all_pth_files { + search_paths.extend(pth_file.iter_editable_installations().map(Arc::new)); + } + } + search_paths + } +} -impl Deref for OrderedSearchPaths { - type Target = [Arc]; +struct PthFile<'db> { + db: &'db dyn Db, + path: SystemPathBuf, + contents: SourceText, + site_packages: &'db SystemPath, +} - fn deref(&self) -> &Self::Target { - &self.0 +impl<'db> PthFile<'db> { + fn new( + db: &'db dyn Db, + pth_path: SystemPathBuf, + file: File, + site_packages: &'db SystemPath, + ) -> Self { + Self { + db, + path: pth_path, + contents: source_text(db.upcast(), file), + site_packages, + } + } + + fn iter_editable_installations<'a>( + &'a self, + ) -> impl Iterator + 'db + where + 'a: 'db, + { + // Empty lines or lines starting with '#' are ignored by the Python interpreter. + // Lines that start with "import " do not represent editable installs at all; + // instead, these are files that are executed by Python at startup. + // https://docs.python.org/3/library/site.html#module-site + self.contents.lines().filter_map(|line| { + if line.is_empty() || line.starts_with('#') || line.starts_with("import ") { + return None; + } + let possible_editable_install = self.site_packages.join(line); + + ModuleResolutionPathBuf::editable_installation_root(self.db, possible_editable_install) + }) + } +} + +struct PthFileIterator<'db> { + db: &'db dyn Db, + directory_iterator: Box> + 'db>, + site_packages: &'db SystemPath, +} + +impl<'db> PthFileIterator<'db> { + fn new(db: &'db dyn Db, site_packages: &'db SystemPath) -> std::io::Result { + Ok(Self { + db, + directory_iterator: db.system().read_directory(site_packages)?, + site_packages, + }) + } +} + +impl<'db> Iterator for PthFileIterator<'db> { + type Item = PthFile<'db>; + + fn next(&mut self) -> Option { + loop { + let entry_result = self.directory_iterator.next()?; + let Ok(entry) = entry_result else { + continue; + }; + let Ok(file_type) = entry.file_type() else { + continue; + }; + if file_type.is_directory() { + continue; + } + let path = entry.path(); + if path.extension() != Some("pth") { + continue; + } + let Some(file) = system_path_to_file(self.db.upcast(), path) else { + continue; + }; + return Some(PthFile::new( + self.db, + path.to_path_buf(), + file, + self.site_packages, + )); + } } } #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) struct ModuleResolutionSettings { - search_paths: OrderedSearchPaths, + search_path_settings: ValidatedSearchPathSettings, target_version: TargetVersion, } impl ModuleResolutionSettings { - pub(crate) fn search_paths(&self) -> &[Arc] { - &self.search_paths + pub(crate) fn search_paths(&self, db: &dyn Db) -> Vec> { + self.search_path_settings.search_paths(db) } pub(crate) fn target_version(&self) -> TargetVersion { @@ -245,18 +376,18 @@ fn resolve_name( let resolver_settings = module_resolver_settings(db); let resolver_state = ResolverState::new(db, resolver_settings.target_version()); - for search_path in resolver_settings.search_paths() { + for search_path in resolver_settings.search_paths(db) { let mut components = name.components(); let module_name = components.next_back()?; - match resolve_package(search_path, components, &resolver_state) { + match resolve_package(&search_path, components, &resolver_state) { Ok(resolved_package) => { let mut package_path = resolved_package.path; package_path.push(module_name); // Must be a `__init__.pyi` or `__init__.py` or it isn't a package. - let kind = if package_path.is_directory(search_path, &resolver_state) { + let kind = if package_path.is_directory(&search_path, &resolver_state) { package_path.push("__init__"); ModuleKind::Package } else { @@ -266,14 +397,14 @@ fn resolve_name( // TODO Implement full https://peps.python.org/pep-0561/#type-checker-module-resolution-order resolution if let Some(stub) = package_path .with_pyi_extension() - .to_file(search_path, &resolver_state) + .to_file(&search_path, &resolver_state) { return Some((search_path.clone(), stub, kind)); } if let Some(module) = package_path .with_py_extension() - .and_then(|path| path.to_file(search_path, &resolver_state)) + .and_then(|path| path.to_file(&search_path, &resolver_state)) { return Some((search_path.clone(), module, kind)); } @@ -1140,4 +1271,113 @@ mod tests { system_path_to_file(&db, stdlib.join("functools.pyi")) ); } + + #[test] + fn editable_install_absolute_path() { + const SITE_PACKAGES: &[FileSpec] = &[("_foo.pth", "/x/src")]; + const X_DIRECTORY: &[FileSpec] = + &[("/x/src/foo/__init__.py", ""), ("/x/src/foo/bar.py", "")]; + + let TestCase { db, .. } = TestCaseBuilder::new() + .with_site_packages_files(SITE_PACKAGES) + .with_other_files(X_DIRECTORY) + .build(); + + let foo_module_name = ModuleName::new_static("foo").unwrap(); + let foo_bar_module_name = ModuleName::new_static("foo.bar").unwrap(); + + let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); + let foo_bar_module = resolve_module(&db, foo_bar_module_name.clone()).unwrap(); + + assert_eq!( + Some(foo_module.file()), + system_path_to_file(&db, SystemPathBuf::from("/x/src/foo/__init__.py")) + ); + assert_eq!( + Some(foo_bar_module.file()), + system_path_to_file(&db, SystemPathBuf::from("/x/src/foo/bar.py")) + ); + } + + #[test] + fn editable_install_relative_path() { + const SITE_PACKAGES: &[FileSpec] = &[ + ("_foo.pth", "../../x/../x/y/src"), + ("../x/y/src/foo.pyi", ""), + ]; + + let TestCase { + db, site_packages, .. + } = TestCaseBuilder::new() + .with_site_packages_files(SITE_PACKAGES) + .build(); + + let foo_module_name = ModuleName::new_static("foo").unwrap(); + let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); + + assert_eq!( + Some(foo_module.file()), + system_path_to_file(&db, site_packages.join("../x/y/src/foo.pyi")) + ); + } + + #[test] + fn editable_install_multiple_pth_files_with_multiple_paths() { + const COMPLEX_PTH_FILE: &str = "\ +/ + +# a comment +/baz + +import not_an_editable_install; do_something_else_crazy_dynamic() + +# another comment +spam + +not_a_directory +"; + + const SITE_PACKAGES: &[FileSpec] = &[ + ("_foo.pth", "../../x/../x/y/src"), + ("_lots_of_others.pth", COMPLEX_PTH_FILE), + ("../x/y/src/foo.pyi", ""), + ("spam/spam.py", ""), + ]; + + const ROOT: &[FileSpec] = &[("/a.py", ""), ("/baz/b.py", "")]; + + let TestCase { + db, site_packages, .. + } = TestCaseBuilder::new() + .with_site_packages_files(SITE_PACKAGES) + .with_other_files(ROOT) + .build(); + + let foo_module_name = ModuleName::new_static("foo").unwrap(); + let a_module_name = ModuleName::new_static("a").unwrap(); + let b_module_name = ModuleName::new_static("b").unwrap(); + let spam_module_name = ModuleName::new_static("spam").unwrap(); + + let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); + let a_module = resolve_module(&db, a_module_name.clone()).unwrap(); + let b_module = resolve_module(&db, b_module_name.clone()).unwrap(); + let spam_module = resolve_module(&db, spam_module_name.clone()).unwrap(); + + assert_eq!( + Some(foo_module.file()), + system_path_to_file(&db, site_packages.join("../x/y/src/foo.pyi")) + ); + assert_eq!( + Some(a_module.file()), + system_path_to_file(&db, SystemPathBuf::from("/a.py")) + ); + assert_eq!( + Some(b_module.file()), + system_path_to_file(&db, SystemPathBuf::from("/baz/b.py")) + ); + assert_eq!( + Some(spam_module.file()), + system_path_to_file(&db, site_packages.join("spam/spam.py")) + ); + } } diff --git a/crates/red_knot_module_resolver/src/testing.rs b/crates/red_knot_module_resolver/src/testing.rs index b927ae7a1ef2e..56d8e66de7912 100644 --- a/crates/red_knot_module_resolver/src/testing.rs +++ b/crates/red_knot_module_resolver/src/testing.rs @@ -99,6 +99,7 @@ pub(crate) struct TestCaseBuilder { target_version: TargetVersion, first_party_files: Vec, site_packages_files: Vec, + other_files: Vec, } impl TestCaseBuilder { @@ -120,6 +121,16 @@ impl TestCaseBuilder { self } + /// Add other files external to any of the pre-configured search paths. + /// + /// (For example, an editable installation in `site-packages` + /// might point to a directory outside of any of the typing spec's + /// listed module resolution sources) + pub(crate) fn with_other_files(mut self, files: &[FileSpec]) -> Self { + self.other_files.extend(files.iter().copied()); + self + } + fn write_mock_directory( db: &mut TestDb, location: impl AsRef, @@ -143,6 +154,7 @@ impl TestCaseBuilder { target_version: TargetVersion::default(), first_party_files: vec![], site_packages_files: vec![], + other_files: vec![], } } @@ -153,12 +165,14 @@ impl TestCaseBuilder { target_version, first_party_files, site_packages_files, + other_files, } = self; TestCaseBuilder { typeshed_option: VendoredTypeshed, target_version, first_party_files, site_packages_files, + other_files, } } @@ -172,12 +186,14 @@ impl TestCaseBuilder { target_version, first_party_files, site_packages_files, + other_files, } = self; TestCaseBuilder { typeshed_option: typeshed, target_version, first_party_files, site_packages_files, + other_files, } } @@ -206,6 +222,7 @@ impl TestCaseBuilder { target_version, first_party_files, site_packages_files, + other_files, } = self; let mut db = TestDb::new(); @@ -214,6 +231,7 @@ impl TestCaseBuilder { Self::write_mock_directory(&mut db, "/site-packages", site_packages_files); let src = Self::write_mock_directory(&mut db, "/src", first_party_files); let typeshed = Self::build_typeshed_mock(&mut db, &typeshed_option); + Self::write_mock_directory(&mut db, "", other_files); set_module_resolution_settings( &mut db, @@ -260,6 +278,7 @@ impl TestCaseBuilder { target_version, first_party_files, site_packages_files, + other_files, } = self; let mut db = TestDb::new(); @@ -267,6 +286,7 @@ impl TestCaseBuilder { let site_packages = Self::write_mock_directory(&mut db, "/site-packages", site_packages_files); let src = Self::write_mock_directory(&mut db, "/src", first_party_files); + Self::write_mock_directory(&mut db, "", other_files); set_module_resolution_settings( &mut db, From 645532b12265054e24a007c22d8477dbda3a4fd6 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Fri, 12 Jul 2024 21:20:27 +0100 Subject: [PATCH 02/17] Deduplicate search path roots --- Cargo.lock | 1 + crates/red_knot_module_resolver/Cargo.toml | 1 + .../red_knot_module_resolver/src/resolver.rs | 28 +++++++++++++++---- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9d64570b9a718..9df065e8bb608 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1890,6 +1890,7 @@ dependencies = [ "compact_str", "insta", "once_cell", + "ordermap", "path-slash", "ruff_db", "ruff_python_stdlib", diff --git a/crates/red_knot_module_resolver/Cargo.toml b/crates/red_knot_module_resolver/Cargo.toml index a6761665d6116..a4a14c6fdb19f 100644 --- a/crates/red_knot_module_resolver/Cargo.toml +++ b/crates/red_knot_module_resolver/Cargo.toml @@ -16,6 +16,7 @@ ruff_python_stdlib = { workspace = true } compact_str = { workspace = true } camino = { workspace = true } +ordermap = { workspace = true } once_cell = { workspace = true } rustc-hash = { workspace = true } salsa = { workspace = true } diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 73f15844a7415..d886fc91202dd 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -1,5 +1,9 @@ +use std::hash::BuildHasherDefault; use std::sync::Arc; +use ordermap::OrderSet; +use rustc_hash::FxHasher; + use ruff_db::files::{system_path_to_file, File, FilePath}; use ruff_db::source::{source_text, SourceText}; use ruff_db::system::{DirectoryEntry, SystemPath, SystemPathBuf}; @@ -12,6 +16,14 @@ use crate::resolver::internal::ModuleResolverSettings; use crate::state::ResolverState; use crate::supported_py_version::TargetVersion; +/// A sequence of search paths that maintains its insertion order, but never contains duplicates. +/// +/// This follows the invariants maintained by [`sys.path` at runtime]: +/// "No item is added to `sys.path` more than once." +/// +/// [`sys.path` at runtime]: https://docs.python.org/3/library/site.html#module-site +type OrderedSearchPaths = OrderSet, BuildHasherDefault>; + /// Configures the module resolver settings. /// /// Must be called before calling any other module resolution functions. @@ -193,20 +205,20 @@ pub(crate) struct ValidatedSearchPathSettings { } impl ValidatedSearchPathSettings { - fn search_paths(&self, db: &dyn Db) -> Vec> { + fn search_paths(&self, db: &dyn Db) -> OrderedSearchPaths { let ValidatedSearchPathSettings { extra_paths, workspace_root, stdlib, site_packages, } = self; - let mut search_paths: Vec> = extra_paths + let mut search_paths: OrderedSearchPaths = extra_paths .iter() .cloned() .chain([workspace_root, stdlib].into_iter().cloned()) .collect(); if let Some(site_packages) = site_packages { - search_paths.push(Arc::clone(site_packages)); + search_paths.insert(Arc::clone(site_packages)); let site_packages = site_packages .as_system_path() .expect("Expected site-packages never to be a VendoredPath!"); @@ -260,11 +272,15 @@ impl<'db> PthFile<'db> { 'a: 'db, { // Empty lines or lines starting with '#' are ignored by the Python interpreter. - // Lines that start with "import " do not represent editable installs at all; + // Lines that start with "import " or "import\t" do not represent editable installs at all; // instead, these are files that are executed by Python at startup. // https://docs.python.org/3/library/site.html#module-site self.contents.lines().filter_map(|line| { - if line.is_empty() || line.starts_with('#') || line.starts_with("import ") { + if line.is_empty() + || line.starts_with('#') + || line.starts_with("import ") + || line.starts_with("import\t") + { return None; } let possible_editable_install = self.site_packages.join(line); @@ -329,7 +345,7 @@ pub(crate) struct ModuleResolutionSettings { } impl ModuleResolutionSettings { - pub(crate) fn search_paths(&self, db: &dyn Db) -> Vec> { + pub(crate) fn search_paths(&self, db: &dyn Db) -> OrderedSearchPaths { self.search_path_settings.search_paths(db) } From a456ef9f07c886bdb549c9407e65b62afaa2be68 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Sat, 13 Jul 2024 13:52:05 +0100 Subject: [PATCH 03/17] Make all module resolution search paths absolute --- .../red_knot_module_resolver/src/resolver.rs | 57 ++++++++++++++----- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index d886fc91202dd..564ad8abb5ac1 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -31,7 +31,7 @@ pub fn set_module_resolution_settings(db: &mut dyn Db, config: RawModuleResoluti // There's no concurrency issue here because we hold a `&mut dyn Db` reference. No other // thread can mutate the `Db` while we're in this call, so using `try_get` to test if // the settings have already been set is safe. - let resolved_settings = config.into_configuration_settings(); + let resolved_settings = config.into_configuration_settings(db.system().current_directory()); if let Some(existing) = ModuleResolverSettings::try_get(db) { existing.set_settings(db).to(resolved_settings); } else { @@ -157,7 +157,10 @@ impl RawModuleResolutionSettings { /// This validation should probably be done outside of Salsa? /// /// [module resolution order]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering - fn into_configuration_settings(self) -> ModuleResolutionSettings { + fn into_configuration_settings( + self, + current_directory: &SystemPath, + ) -> ModuleResolutionSettings { let RawModuleResolutionSettings { target_version, extra_paths, @@ -168,21 +171,46 @@ impl RawModuleResolutionSettings { let extra_paths: Vec> = extra_paths .into_iter() - .map(|fs_path| Arc::new(ModuleResolutionPathBuf::extra(fs_path).unwrap())) + .map(|fs_path| { + Arc::new( + ModuleResolutionPathBuf::extra(SystemPath::absolute( + fs_path, + current_directory, + )) + .unwrap(), + ) + }) .collect(); - let workspace_root = - Arc::new(ModuleResolutionPathBuf::first_party(workspace_root).unwrap()); - - let stdlib = Arc::new( - custom_typeshed.map_or_else(ModuleResolutionPathBuf::vendored_stdlib, |custom| { - ModuleResolutionPathBuf::stdlib_from_custom_typeshed_root(&custom).unwrap() - }), + let workspace_root = Arc::new( + ModuleResolutionPathBuf::first_party(SystemPath::absolute( + workspace_root, + current_directory, + )) + .unwrap(), ); + let stdlib = Arc::new(custom_typeshed.map_or_else( + ModuleResolutionPathBuf::vendored_stdlib, + |custom| { + ModuleResolutionPathBuf::stdlib_from_custom_typeshed_root(&SystemPath::absolute( + custom, + current_directory, + )) + .unwrap() + }, + )); + // TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step - let site_packages = site_packages - .map(|path| Arc::new(ModuleResolutionPathBuf::site_packages(path).unwrap())); + let site_packages = site_packages.map(|path| { + Arc::new( + ModuleResolutionPathBuf::site_packages(SystemPath::absolute( + path, + current_directory, + )) + .unwrap(), + ) + }); ModuleResolutionSettings { target_version, @@ -283,7 +311,10 @@ impl<'db> PthFile<'db> { { return None; } - let possible_editable_install = self.site_packages.join(line); + let possible_editable_install = SystemPath::absolute( + self.site_packages.join(line), + self.db.system().current_directory(), + ); ModuleResolutionPathBuf::editable_installation_root(self.db, possible_editable_install) }) From 9d68c078eb4638496e64962c10db1cfdabb76328 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Sat, 13 Jul 2024 14:16:12 +0100 Subject: [PATCH 04/17] Improve docs --- .../red_knot_module_resolver/src/resolver.rs | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 564ad8abb5ac1..ca58ba673e231 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -147,7 +147,8 @@ pub struct RawModuleResolutionSettings { } impl RawModuleResolutionSettings { - /// Implementation of the typing spec's [module resolution order] + /// Validate and normalize the raw settings given by the user + /// into settings we can use for module resolution /// /// TODO(Alex): this method does multiple `.unwrap()` calls when it should really return an error. /// Each `.unwrap()` call is a point where we're validating a setting that the user would pass @@ -155,8 +156,6 @@ impl RawModuleResolutionSettings { /// Rather than panicking if a path fails to validate, we should display an error message to the user /// and exit the process with a nonzero exit code. /// This validation should probably be done outside of Salsa? - /// - /// [module resolution order]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering fn into_configuration_settings( self, current_directory: &SystemPath, @@ -233,6 +232,9 @@ pub(crate) struct ValidatedSearchPathSettings { } impl ValidatedSearchPathSettings { + /// Implementation of the typing spec's [module resolution order] + /// + /// [module resolution order]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering fn search_paths(&self, db: &dyn Db) -> OrderedSearchPaths { let ValidatedSearchPathSettings { extra_paths, @@ -250,6 +252,13 @@ impl ValidatedSearchPathSettings { let site_packages = site_packages .as_system_path() .expect("Expected site-packages never to be a VendoredPath!"); + + // As well as modules installed directly into `site-packages`, + // the directory may also contain `.pth` files. + // Each `.pth` file in `site-packages` may contain one or more lines + // containing a (relative or absolute) path. + // Each of these paths may point to an editable install of a package, + // so should be considered an additional search path. let Ok(pth_file_iterator) = PthFileIterator::new(db, site_packages) else { return search_paths; }; @@ -271,6 +280,9 @@ impl ValidatedSearchPathSettings { } } +/// Represents a single `.pth` file in a `site-packages` directory. +/// One or more lines in a `.pth` file may be a (relative or absolute) +/// path that represents an editable installation of a package. struct PthFile<'db> { db: &'db dyn Db, path: SystemPathBuf, @@ -293,6 +305,8 @@ impl<'db> PthFile<'db> { } } + /// Yield paths in this `.pth` file that appear to represent editable installations, + /// and should therefore be added as module-resolution search paths. fn iter_editable_installations<'a>( &'a self, ) -> impl Iterator + 'db @@ -321,6 +335,8 @@ impl<'db> PthFile<'db> { } } +/// Iterator that yields a [`PthFile`] instance for every `.pth` file +/// found in a given `site-packages` directory. struct PthFileIterator<'db> { db: &'db dyn Db, directory_iterator: Box> + 'db>, @@ -369,6 +385,7 @@ impl<'db> Iterator for PthFileIterator<'db> { } } +/// Validated and normalized module-resolution settings. #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) struct ModuleResolutionSettings { search_path_settings: ValidatedSearchPathSettings, From 99d87b4651ceaa89d9b6f4686b4cbd30563146c5 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Sat, 13 Jul 2024 14:35:49 +0100 Subject: [PATCH 05/17] Be more lenient about trailing and leading whitespace --- crates/red_knot_module_resolver/src/resolver.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index ca58ba673e231..d0765cebbca88 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -318,6 +318,7 @@ impl<'db> PthFile<'db> { // instead, these are files that are executed by Python at startup. // https://docs.python.org/3/library/site.html#module-site self.contents.lines().filter_map(|line| { + let line = line.trim(); if line.is_empty() || line.starts_with('#') || line.starts_with("import ") @@ -1396,7 +1397,7 @@ mod tests { import not_an_editable_install; do_something_else_crazy_dynamic() # another comment -spam + spam not_a_directory "; From f7219f21d3eda0b6c99edaaf90cc297390fe0e2c Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Sat, 13 Jul 2024 15:44:04 +0100 Subject: [PATCH 06/17] Make it a Salsa query and add caching tests --- crates/red_knot_module_resolver/src/db.rs | 2 + .../red_knot_module_resolver/src/resolver.rs | 122 ++++++++++++++++-- 2 files changed, 111 insertions(+), 13 deletions(-) diff --git a/crates/red_knot_module_resolver/src/db.rs b/crates/red_knot_module_resolver/src/db.rs index 82da0e6e94d10..1756559bc7d81 100644 --- a/crates/red_knot_module_resolver/src/db.rs +++ b/crates/red_knot_module_resolver/src/db.rs @@ -3,6 +3,7 @@ use ruff_db::Upcast; use crate::resolver::{ file_to_module, internal::{ModuleNameIngredient, ModuleResolverSettings}, + module_search_paths, resolve_module_query, }; use crate::typeshed::parse_typeshed_versions; @@ -11,6 +12,7 @@ use crate::typeshed::parse_typeshed_versions; pub struct Jar( ModuleNameIngredient<'_>, ModuleResolverSettings, + module_search_paths, resolve_module_query, file_to_module, parse_typeshed_versions, diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index d0765cebbca88..75cdb9abeb4a4 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -92,9 +92,7 @@ pub(crate) fn file_to_module(db: &dyn Db, file: File) -> Option { let path = file.path(db.upcast()); - let resolver_settings = module_resolver_settings(db); - - let mut search_paths = resolver_settings.search_paths(db).into_iter(); + let mut search_paths = module_search_paths(db).into_iter(); let module_name = loop { let candidate = search_paths.next()?; @@ -224,7 +222,7 @@ impl RawModuleResolutionSettings { } #[derive(Debug, PartialEq, Eq, Clone)] -pub(crate) struct ValidatedSearchPathSettings { +struct ValidatedSearchPathSettings { extra_paths: Vec>, workspace_root: Arc, stdlib: Arc, @@ -346,6 +344,8 @@ struct PthFileIterator<'db> { impl<'db> PthFileIterator<'db> { fn new(db: &'db dyn Db, site_packages: &'db SystemPath) -> std::io::Result { + // TODO: The salsa cache should be invalidated every time a `.pth` file is added to `site-packages`, + // as well as every time an existing one is removed. Ok(Self { db, directory_iterator: db.system().read_directory(site_packages)?, @@ -394,10 +394,6 @@ pub(crate) struct ModuleResolutionSettings { } impl ModuleResolutionSettings { - pub(crate) fn search_paths(&self, db: &dyn Db) -> OrderedSearchPaths { - self.search_path_settings.search_paths(db) - } - pub(crate) fn target_version(&self) -> TargetVersion { self.target_version } @@ -432,6 +428,13 @@ fn module_resolver_settings(db: &dyn Db) -> &ModuleResolutionSettings { ModuleResolverSettings::get(db).settings(db) } +#[salsa::tracked(return_ref)] +pub(crate) fn module_search_paths(db: &dyn Db) -> OrderedSearchPaths { + module_resolver_settings(db) + .search_path_settings + .search_paths(db) +} + /// Given a module name and a list of search paths in which to lookup modules, /// attempt to resolve the module name fn resolve_name( @@ -441,18 +444,18 @@ fn resolve_name( let resolver_settings = module_resolver_settings(db); let resolver_state = ResolverState::new(db, resolver_settings.target_version()); - for search_path in resolver_settings.search_paths(db) { + for search_path in module_search_paths(db) { let mut components = name.components(); let module_name = components.next_back()?; - match resolve_package(&search_path, components, &resolver_state) { + match resolve_package(search_path, components, &resolver_state) { Ok(resolved_package) => { let mut package_path = resolved_package.path; package_path.push(module_name); // Must be a `__init__.pyi` or `__init__.py` or it isn't a package. - let kind = if package_path.is_directory(&search_path, &resolver_state) { + let kind = if package_path.is_directory(search_path, &resolver_state) { package_path.push("__init__"); ModuleKind::Package } else { @@ -462,14 +465,14 @@ fn resolve_name( // TODO Implement full https://peps.python.org/pep-0561/#type-checker-module-resolution-order resolution if let Some(stub) = package_path .with_pyi_extension() - .to_file(&search_path, &resolver_state) + .to_file(search_path, &resolver_state) { return Some((search_path.clone(), stub, kind)); } if let Some(module) = package_path .with_py_extension() - .and_then(|path| path.to_file(&search_path, &resolver_state)) + .and_then(|path| path.to_file(search_path, &resolver_state)) { return Some((search_path.clone(), module, kind)); } @@ -1445,4 +1448,97 @@ not_a_directory system_path_to_file(&db, site_packages.join("spam/spam.py")) ); } + + #[test] + fn module_resolution_paths_cached_between_different_module_resolutions() { + const SRC: &[FileSpec] = &[("foo.py", "")]; + const SITE_PACKAGES: &[FileSpec] = &[("bar.py", "")]; + + let TestCase { + mut db, + src, + site_packages, + .. + } = TestCaseBuilder::new() + .with_src_files(SRC) + .with_site_packages_files(SITE_PACKAGES) + .build(); + + let foo_module_name = ModuleName::new_static("foo").unwrap(); + let bar_module_name = ModuleName::new_static("bar").unwrap(); + + let foo_module = resolve_module(&db, foo_module_name).unwrap(); + assert_eq!( + Some(foo_module.file()), + system_path_to_file(&db, src.join("foo.py")) + ); + + db.clear_salsa_events(); + let bar_module = resolve_module(&db, bar_module_name).unwrap(); + assert_eq!( + Some(bar_module.file()), + system_path_to_file(&db, site_packages.join("bar.py")) + ); + let events = db.take_salsa_events(); + assert_function_query_was_not_run::( + &db, + |res| &res.function, + &(), + &events, + ); + } + + #[test] + fn deleting_pth_file_on_which_module_resolution_depends_invalidates_cache() { + const SITE_PACKAGES: &[FileSpec] = &[("_foo.pth", "/x/src")]; + const X_DIRECTORY: &[FileSpec] = &[("/x/src/foo.py", "")]; + + let TestCase { + mut db, + site_packages, + .. + } = TestCaseBuilder::new() + .with_site_packages_files(SITE_PACKAGES) + .with_other_files(X_DIRECTORY) + .build(); + + let foo_module_name = ModuleName::new_static("foo").unwrap(); + let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); + assert_eq!( + Some(foo_module.file()), + system_path_to_file(&db, SystemPathBuf::from("/x/src/foo.py")) + ); + + let pth_file = site_packages.join("_foo.pth"); + db.memory_file_system().remove_file(&pth_file).unwrap(); + File::touch_path(&mut db, &pth_file); + assert_eq!(resolve_module(&db, foo_module_name.clone()), None); + } + + #[test] + fn deleting_editable_install_on_which_module_resolution_depends_invalidates_cache() { + const SITE_PACKAGES: &[FileSpec] = &[("_foo.pth", "/x/src")]; + const X_DIRECTORY: &[FileSpec] = &[("/x/src/foo.py", "")]; + + let TestCase { mut db, .. } = TestCaseBuilder::new() + .with_site_packages_files(SITE_PACKAGES) + .with_other_files(X_DIRECTORY) + .build(); + + let foo_module_name = ModuleName::new_static("foo").unwrap(); + let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); + let src_path = SystemPathBuf::from("/x/src"); + assert_eq!( + Some(foo_module.file()), + system_path_to_file(&db, src_path.join("foo.py")) + ); + + db.memory_file_system() + .remove_file(src_path.join("foo.py")) + .unwrap(); + db.memory_file_system().remove_directory(&src_path).unwrap(); + File::touch_path(&mut db, &src_path.join("foo.py")); + File::touch_path(&mut db, &src_path); + assert_eq!(resolve_module(&db, foo_module_name.clone()), None); + } } From f664391240c50420ed4693a57b0bd4896092f226 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Sun, 14 Jul 2024 16:08:14 +0100 Subject: [PATCH 07/17] Lazily discover editable installations --- crates/red_knot_module_resolver/src/db.rs | 5 +- .../red_knot_module_resolver/src/resolver.rs | 194 +++++++++++------- 2 files changed, 127 insertions(+), 72 deletions(-) diff --git a/crates/red_knot_module_resolver/src/db.rs b/crates/red_knot_module_resolver/src/db.rs index 1756559bc7d81..fe9cb798c3235 100644 --- a/crates/red_knot_module_resolver/src/db.rs +++ b/crates/red_knot_module_resolver/src/db.rs @@ -1,9 +1,8 @@ use ruff_db::Upcast; use crate::resolver::{ - file_to_module, + dynamic_module_resolution_paths, file_to_module, internal::{ModuleNameIngredient, ModuleResolverSettings}, - module_search_paths, resolve_module_query, }; use crate::typeshed::parse_typeshed_versions; @@ -12,7 +11,7 @@ use crate::typeshed::parse_typeshed_versions; pub struct Jar( ModuleNameIngredient<'_>, ModuleResolverSettings, - module_search_paths, + dynamic_module_resolution_paths, resolve_module_query, file_to_module, parse_typeshed_versions, diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 75cdb9abeb4a4..752e1c6756917 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -1,7 +1,7 @@ use std::hash::BuildHasherDefault; +use std::iter::FusedIterator; use std::sync::Arc; -use ordermap::OrderSet; use rustc_hash::FxHasher; use ruff_db::files::{system_path_to_file, File, FilePath}; @@ -16,13 +16,8 @@ use crate::resolver::internal::ModuleResolverSettings; use crate::state::ResolverState; use crate::supported_py_version::TargetVersion; -/// A sequence of search paths that maintains its insertion order, but never contains duplicates. -/// -/// This follows the invariants maintained by [`sys.path` at runtime]: -/// "No item is added to `sys.path` more than once." -/// -/// [`sys.path` at runtime]: https://docs.python.org/3/library/site.html#module-site -type OrderedSearchPaths = OrderSet, BuildHasherDefault>; +type SearchPathRoot = Arc; +type OrderSet = ordermap::OrderSet>; /// Configures the module resolver settings. /// @@ -92,7 +87,7 @@ pub(crate) fn file_to_module(db: &dyn Db, file: File) -> Option { let path = file.path(db.upcast()); - let mut search_paths = module_search_paths(db).into_iter(); + let mut search_paths = iter_module_search_paths(db); let module_name = loop { let candidate = search_paths.next()?; @@ -166,7 +161,7 @@ impl RawModuleResolutionSettings { custom_typeshed, } = self; - let extra_paths: Vec> = extra_paths + let extra_paths: Vec = extra_paths .into_iter() .map(|fs_path| { Arc::new( @@ -223,61 +218,134 @@ impl RawModuleResolutionSettings { #[derive(Debug, PartialEq, Eq, Clone)] struct ValidatedSearchPathSettings { - extra_paths: Vec>, - workspace_root: Arc, - stdlib: Arc, - site_packages: Option>, + extra_paths: Vec, + workspace_root: SearchPathRoot, + stdlib: SearchPathRoot, + site_packages: Option, } impl ValidatedSearchPathSettings { - /// Implementation of the typing spec's [module resolution order] + /// Implementation of the typing spec's [module resolution order]. + /// + /// It's important that we collect these instead of iterating over them lazily, + /// so that we can be sure that it doesn't contain any duplicates. + /// At runtime, [`sys.path` never contains duplicate entries] /// /// [module resolution order]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering - fn search_paths(&self, db: &dyn Db) -> OrderedSearchPaths { + /// [`sys.path` never contains duplicate entries]: https://docs.python.org/3/library/site.html#module-site + fn static_search_paths(&self) -> OrderSet<&SearchPathRoot> { let ValidatedSearchPathSettings { extra_paths, workspace_root, stdlib, site_packages, } = self; - let mut search_paths: OrderedSearchPaths = extra_paths + extra_paths .iter() - .cloned() - .chain([workspace_root, stdlib].into_iter().cloned()) - .collect(); - if let Some(site_packages) = site_packages { - search_paths.insert(Arc::clone(site_packages)); - let site_packages = site_packages - .as_system_path() - .expect("Expected site-packages never to be a VendoredPath!"); - - // As well as modules installed directly into `site-packages`, - // the directory may also contain `.pth` files. - // Each `.pth` file in `site-packages` may contain one or more lines - // containing a (relative or absolute) path. - // Each of these paths may point to an editable install of a package, - // so should be considered an additional search path. - let Ok(pth_file_iterator) = PthFileIterator::new(db, site_packages) else { - return search_paths; - }; + .chain(std::iter::once(workspace_root)) + .chain(std::iter::once(stdlib)) + .chain(site_packages) + .collect() + } +} - // The Python documentation specifies that `.pth` files in `site-packages` - // are processed in alphabetical order. - // We know that files returned from [`ruff_db::system::MemoryFileSystem::read_directory()`] - // will be in alphabetical order already. However, [`ruff_db::system::OsSystem::read_directory()`] - // gives no such guarantees. Therefore, collecting and then sorting is necessary. - // https://docs.python.org/3/library/site.html#module-site - let mut all_pth_files: Vec = pth_file_iterator.collect(); - all_pth_files.sort_by(|a, b| a.path.cmp(&b.path)); - - for pth_file in &all_pth_files { - search_paths.extend(pth_file.iter_editable_installations().map(Arc::new)); - } +/// Collect all dynamic search paths: +/// search paths listed in `.pth` files in the `site-packages` directory +/// due to editable installations of third-party packages. +#[salsa::tracked(return_ref)] +pub(crate) fn dynamic_module_resolution_paths(db: &dyn Db) -> OrderSet { + let settings = &module_resolver_settings(db).search_path_settings; + let static_paths = settings.static_search_paths(); + let mut dynamic_paths = OrderSet::default(); + + if let Some(site_packages) = &settings.site_packages { + let site_packages = site_packages + .as_system_path() + .expect("Expected site-packages never to be a VendoredPath!"); + + // As well as modules installed directly into `site-packages`, + // the directory may also contain `.pth` files. + // Each `.pth` file in `site-packages` may contain one or more lines + // containing a (relative or absolute) path. + // Each of these paths may point to an editable install of a package, + // so should be considered an additional search path. + let Ok(pth_file_iterator) = PthFileIterator::new(db, site_packages) else { + return dynamic_paths; + }; + + // The Python documentation specifies that `.pth` files in `site-packages` + // are processed in alphabetical order. + // We know that files returned from [`ruff_db::system::MemoryFileSystem::read_directory()`] + // will be in alphabetical order already. However, [`ruff_db::system::OsSystem::read_directory()`] + // gives no such guarantees. Therefore, collecting and then sorting is necessary. + // https://docs.python.org/3/library/site.html#module-site + let mut all_pth_files: Vec = pth_file_iterator.collect(); + all_pth_files.sort_by(|a, b| a.path.cmp(&b.path)); + + for pth_file in &all_pth_files { + dynamic_paths.extend(pth_file.iter_editable_installations().filter_map( + |editable_path| { + let possible_search_path = Arc::new(editable_path); + (!static_paths.contains(&possible_search_path)).then_some(possible_search_path) + }, + )); + } + } + dynamic_paths +} + +/// Iterate over the available module-resolution search paths, +/// following the invariants maintained by [`sys.path` at runtime]: +/// "No item is added to `sys.path` more than once." +/// Dynamic search paths (required for editable installs into `site-packages`) +/// are only calculated lazily. +/// +/// [`sys.path` at runtime]: https://docs.python.org/3/library/site.html#module-site +struct SearchPathIterator<'db> { + db: &'db dyn Db, + static_paths: ordermap::set::IntoIter<&'db SearchPathRoot>, + dynamic_paths: Option>, +} + +impl<'db> SearchPathIterator<'db> { + fn new(db: &'db dyn Db) -> Self { + let static_paths = module_resolver_settings(db) + .search_path_settings + .static_search_paths() + .into_iter(); + + Self { + db, + static_paths, + dynamic_paths: None, } - search_paths } } +impl<'db> Iterator for SearchPathIterator<'db> { + type Item = &'db SearchPathRoot; + + fn next(&mut self) -> Option { + let SearchPathIterator { + db, + static_paths, + dynamic_paths, + } = self; + + static_paths.next().or_else(|| { + dynamic_paths + .get_or_insert_with(|| dynamic_module_resolution_paths(*db).into_iter()) + .next() + }) + } +} + +impl<'db> FusedIterator for SearchPathIterator<'db> {} + +fn iter_module_search_paths(db: &dyn Db) -> SearchPathIterator { + SearchPathIterator::new(db) +} + /// Represents a single `.pth` file in a `site-packages` directory. /// One or more lines in a `.pth` file may be a (relative or absolute) /// path that represents an editable installation of a package. @@ -394,7 +462,7 @@ pub(crate) struct ModuleResolutionSettings { } impl ModuleResolutionSettings { - pub(crate) fn target_version(&self) -> TargetVersion { + fn target_version(&self) -> TargetVersion { self.target_version } } @@ -428,13 +496,6 @@ fn module_resolver_settings(db: &dyn Db) -> &ModuleResolutionSettings { ModuleResolverSettings::get(db).settings(db) } -#[salsa::tracked(return_ref)] -pub(crate) fn module_search_paths(db: &dyn Db) -> OrderedSearchPaths { - module_resolver_settings(db) - .search_path_settings - .search_paths(db) -} - /// Given a module name and a list of search paths in which to lookup modules, /// attempt to resolve the module name fn resolve_name( @@ -444,7 +505,7 @@ fn resolve_name( let resolver_settings = module_resolver_settings(db); let resolver_state = ResolverState::new(db, resolver_settings.target_version()); - for search_path in module_search_paths(db) { + for search_path in iter_module_search_paths(db) { let mut components = name.components(); let module_name = components.next_back()?; @@ -1451,17 +1512,12 @@ not_a_directory #[test] fn module_resolution_paths_cached_between_different_module_resolutions() { - const SRC: &[FileSpec] = &[("foo.py", "")]; - const SITE_PACKAGES: &[FileSpec] = &[("bar.py", "")]; + const SITE_PACKAGES: &[FileSpec] = &[("_foo.pth", "/x/src"), ("_bar.pth", "/y/src")]; + const EXTERNAL_DIRECTORIES: &[FileSpec] = &[("/x/src/foo.py", ""), ("/y/src/bar.py", "")]; - let TestCase { - mut db, - src, - site_packages, - .. - } = TestCaseBuilder::new() - .with_src_files(SRC) + let TestCase { mut db, .. } = TestCaseBuilder::new() .with_site_packages_files(SITE_PACKAGES) + .with_other_files(EXTERNAL_DIRECTORIES) .build(); let foo_module_name = ModuleName::new_static("foo").unwrap(); @@ -1470,17 +1526,17 @@ not_a_directory let foo_module = resolve_module(&db, foo_module_name).unwrap(); assert_eq!( Some(foo_module.file()), - system_path_to_file(&db, src.join("foo.py")) + system_path_to_file(&db, "/x/src/foo.py") ); db.clear_salsa_events(); let bar_module = resolve_module(&db, bar_module_name).unwrap(); assert_eq!( Some(bar_module.file()), - system_path_to_file(&db, site_packages.join("bar.py")) + system_path_to_file(&db, "/y/src/bar.py") ); let events = db.take_salsa_events(); - assert_function_query_was_not_run::( + assert_function_query_was_not_run::( &db, |res| &res.function, &(), From 42551d1d193ec4364d03cb7376c07d927eeed6af Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 15 Jul 2024 12:51:14 +0100 Subject: [PATCH 08/17] Misc review comments --- Cargo.lock | 2 +- Cargo.toml | 1 + crates/red_knot_module_resolver/Cargo.toml | 2 +- .../red_knot_module_resolver/src/resolver.rs | 144 ++++++++---------- 4 files changed, 67 insertions(+), 82 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9df065e8bb608..4aec86ce00c6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1888,9 +1888,9 @@ dependencies = [ "anyhow", "camino", "compact_str", + "indexmap", "insta", "once_cell", - "ordermap", "path-slash", "ruff_db", "ruff_python_stdlib", diff --git a/Cargo.toml b/Cargo.toml index 0cb4f2e88ea17..57dd88431f3e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -72,6 +72,7 @@ hashbrown = "0.14.3" ignore = { version = "0.4.22" } imara-diff = { version = "0.1.5" } imperative = { version = "1.0.4" } +indexmap = { version = "2.2.6" } indicatif = { version = "0.17.8" } indoc = { version = "2.0.4" } insta = { version = "1.35.1" } diff --git a/crates/red_knot_module_resolver/Cargo.toml b/crates/red_knot_module_resolver/Cargo.toml index a4a14c6fdb19f..f97c26812354a 100644 --- a/crates/red_knot_module_resolver/Cargo.toml +++ b/crates/red_knot_module_resolver/Cargo.toml @@ -16,7 +16,7 @@ ruff_python_stdlib = { workspace = true } compact_str = { workspace = true } camino = { workspace = true } -ordermap = { workspace = true } +indexmap = { workspace = true } once_cell = { workspace = true } rustc-hash = { workspace = true } salsa = { workspace = true } diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 752e1c6756917..728b7f56b50c8 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -17,7 +17,7 @@ use crate::state::ResolverState; use crate::supported_py_version::TargetVersion; type SearchPathRoot = Arc; -type OrderSet = ordermap::OrderSet>; +type OrderSet = indexmap::IndexSet>; /// Configures the module resolver settings. /// @@ -87,7 +87,9 @@ pub(crate) fn file_to_module(db: &dyn Db, file: File) -> Option { let path = file.path(db.upcast()); - let mut search_paths = iter_module_search_paths(db); + let resolver_settings = module_resolver_settings(db); + + let mut search_paths = resolver_settings.search_paths(db); let module_name = loop { let candidate = search_paths.next()?; @@ -143,12 +145,16 @@ impl RawModuleResolutionSettings { /// Validate and normalize the raw settings given by the user /// into settings we can use for module resolution /// + /// This method also implements the typing spec's [module resolution order]. + /// /// TODO(Alex): this method does multiple `.unwrap()` calls when it should really return an error. /// Each `.unwrap()` call is a point where we're validating a setting that the user would pass /// and transforming it into an internal representation for a validated path. /// Rather than panicking if a path fails to validate, we should display an error message to the user /// and exit the process with a nonzero exit code. /// This validation should probably be done outside of Salsa? + /// + /// [module resolution order]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering fn into_configuration_settings( self, current_directory: &SystemPath, @@ -157,11 +163,11 @@ impl RawModuleResolutionSettings { target_version, extra_paths, workspace_root, - site_packages, + site_packages: site_packages_setting, custom_typeshed, } = self; - let extra_paths: Vec = extra_paths + let mut static_search_paths: OrderSet = extra_paths .into_iter() .map(|fs_path| { Arc::new( @@ -174,15 +180,15 @@ impl RawModuleResolutionSettings { }) .collect(); - let workspace_root = Arc::new( + static_search_paths.insert(Arc::new( ModuleResolutionPathBuf::first_party(SystemPath::absolute( workspace_root, current_directory, )) .unwrap(), - ); + )); - let stdlib = Arc::new(custom_typeshed.map_or_else( + static_search_paths.insert(Arc::new(custom_typeshed.map_or_else( ModuleResolutionPathBuf::vendored_stdlib, |custom| { ModuleResolutionPathBuf::stdlib_from_custom_typeshed_root(&SystemPath::absolute( @@ -191,25 +197,28 @@ impl RawModuleResolutionSettings { )) .unwrap() }, - )); + ))); - // TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step - let site_packages = site_packages.map(|path| { - Arc::new( + let mut site_packages = None; + + if let Some(path) = site_packages_setting { + let site_packages_root = Arc::new( ModuleResolutionPathBuf::site_packages(SystemPath::absolute( path, current_directory, )) .unwrap(), - ) - }); + ); + site_packages = Some(site_packages_root.clone()); + static_search_paths.insert(site_packages_root); + } + + // TODO vendor typeshed's third-party stubs as well as the stdlib and fallback to them as a final step ModuleResolutionSettings { target_version, search_path_settings: ValidatedSearchPathSettings { - extra_paths, - workspace_root, - stdlib, + static_search_paths, site_packages, }, } @@ -218,35 +227,18 @@ impl RawModuleResolutionSettings { #[derive(Debug, PartialEq, Eq, Clone)] struct ValidatedSearchPathSettings { - extra_paths: Vec, - workspace_root: SearchPathRoot, - stdlib: SearchPathRoot, - site_packages: Option, -} - -impl ValidatedSearchPathSettings { - /// Implementation of the typing spec's [module resolution order]. + /// Search paths that have been statically determined purely from reading Ruff's configuration settings. + /// These shouldn't ever change unless the config settings themselves change. /// - /// It's important that we collect these instead of iterating over them lazily, - /// so that we can be sure that it doesn't contain any duplicates. - /// At runtime, [`sys.path` never contains duplicate entries] + /// We use an `OrderSet` for storing the paths, since the order is important, + /// but at runtime, [`sys.path` never contains duplicate entries] + /// + /// Note that `site-packages` *is included* as a search path in this sequence, + /// but it is also stored separately so that we're able to find editable installs later. /// - /// [module resolution order]: https://typing.readthedocs.io/en/latest/spec/distributing.html#import-resolution-ordering /// [`sys.path` never contains duplicate entries]: https://docs.python.org/3/library/site.html#module-site - fn static_search_paths(&self) -> OrderSet<&SearchPathRoot> { - let ValidatedSearchPathSettings { - extra_paths, - workspace_root, - stdlib, - site_packages, - } = self; - extra_paths - .iter() - .chain(std::iter::once(workspace_root)) - .chain(std::iter::once(stdlib)) - .chain(site_packages) - .collect() - } + static_search_paths: OrderSet, + site_packages: Option, } /// Collect all dynamic search paths: @@ -254,11 +246,14 @@ impl ValidatedSearchPathSettings { /// due to editable installations of third-party packages. #[salsa::tracked(return_ref)] pub(crate) fn dynamic_module_resolution_paths(db: &dyn Db) -> OrderSet { - let settings = &module_resolver_settings(db).search_path_settings; - let static_paths = settings.static_search_paths(); + let ValidatedSearchPathSettings { + static_search_paths, + site_packages, + } = &module_resolver_settings(db).search_path_settings; + let mut dynamic_paths = OrderSet::default(); - if let Some(site_packages) = &settings.site_packages { + if let Some(site_packages) = site_packages { let site_packages = site_packages .as_system_path() .expect("Expected site-packages never to be a VendoredPath!"); @@ -274,21 +269,21 @@ pub(crate) fn dynamic_module_resolution_paths(db: &dyn Db) -> OrderSet = pth_file_iterator.collect(); all_pth_files.sort_by(|a, b| a.path.cmp(&b.path)); for pth_file in &all_pth_files { - dynamic_paths.extend(pth_file.iter_editable_installations().filter_map( - |editable_path| { - let possible_search_path = Arc::new(editable_path); - (!static_paths.contains(&possible_search_path)).then_some(possible_search_path) - }, - )); + dynamic_paths.extend( + pth_file + .editable_installations() + .filter_map(|editable_path| { + let possible_search_path = Arc::new(editable_path); + (!static_search_paths.contains(&possible_search_path)) + .then_some(possible_search_path) + }), + ); } } dynamic_paths @@ -303,23 +298,8 @@ pub(crate) fn dynamic_module_resolution_paths(db: &dyn Db) -> OrderSet { db: &'db dyn Db, - static_paths: ordermap::set::IntoIter<&'db SearchPathRoot>, - dynamic_paths: Option>, -} - -impl<'db> SearchPathIterator<'db> { - fn new(db: &'db dyn Db) -> Self { - let static_paths = module_resolver_settings(db) - .search_path_settings - .static_search_paths() - .into_iter(); - - Self { - db, - static_paths, - dynamic_paths: None, - } - } + static_paths: indexmap::set::Iter<'db, SearchPathRoot>, + dynamic_paths: Option>, } impl<'db> Iterator for SearchPathIterator<'db> { @@ -342,10 +322,6 @@ impl<'db> Iterator for SearchPathIterator<'db> { impl<'db> FusedIterator for SearchPathIterator<'db> {} -fn iter_module_search_paths(db: &dyn Db) -> SearchPathIterator { - SearchPathIterator::new(db) -} - /// Represents a single `.pth` file in a `site-packages` directory. /// One or more lines in a `.pth` file may be a (relative or absolute) /// path that represents an editable installation of a package. @@ -373,9 +349,7 @@ impl<'db> PthFile<'db> { /// Yield paths in this `.pth` file that appear to represent editable installations, /// and should therefore be added as module-resolution search paths. - fn iter_editable_installations<'a>( - &'a self, - ) -> impl Iterator + 'db + fn editable_installations<'a>(&'a self) -> impl Iterator + 'db where 'a: 'db, { @@ -465,6 +439,16 @@ impl ModuleResolutionSettings { fn target_version(&self) -> TargetVersion { self.target_version } + + fn search_paths<'db>(&'db self, db: &'db dyn Db) -> SearchPathIterator<'db> { + let static_paths = self.search_path_settings.static_search_paths.iter(); + + SearchPathIterator { + db, + static_paths, + dynamic_paths: None, + } + } } // The singleton methods generated by salsa are all `pub` instead of `pub(crate)` which triggers @@ -505,7 +489,7 @@ fn resolve_name( let resolver_settings = module_resolver_settings(db); let resolver_state = ResolverState::new(db, resolver_settings.target_version()); - for search_path in iter_module_search_paths(db) { + for search_path in resolver_settings.search_paths(db) { let mut components = name.components(); let module_name = components.next_back()?; From 1709e91e1a6b0934f4524a9d90b88ffe8fbe684f Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 15 Jul 2024 13:03:12 +0100 Subject: [PATCH 09/17] Simplify --- crates/red_knot_module_resolver/src/resolver.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 728b7f56b50c8..7e1242a723599 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -366,11 +366,7 @@ impl<'db> PthFile<'db> { { return None; } - let possible_editable_install = SystemPath::absolute( - self.site_packages.join(line), - self.db.system().current_directory(), - ); - + let possible_editable_install = SystemPath::absolute(line, self.site_packages); ModuleResolutionPathBuf::editable_installation_root(self.db, possible_editable_install) }) } From 64f03c66556bb39132a6267ad592e654df584afe Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 15 Jul 2024 13:18:27 +0100 Subject: [PATCH 10/17] Use `report_untracked_read()` --- crates/red_knot_module_resolver/src/resolver.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 7e1242a723599..bc8d296eec23a 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -246,6 +246,8 @@ struct ValidatedSearchPathSettings { /// due to editable installations of third-party packages. #[salsa::tracked(return_ref)] pub(crate) fn dynamic_module_resolution_paths(db: &dyn Db) -> OrderSet { + db.report_untracked_read(); + let ValidatedSearchPathSettings { static_search_paths, site_packages, From b1b2ffb9f057778de676cc33aa5107855a522acc Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 15 Jul 2024 13:31:06 +0100 Subject: [PATCH 11/17] Avoid an unnecessary allocation --- crates/red_knot_module_resolver/src/resolver.rs | 15 ++++----------- crates/ruff_db/src/system.rs | 4 ++++ 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index bc8d296eec23a..fd56a7f92be84 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -409,19 +409,14 @@ impl<'db> Iterator for PthFileIterator<'db> { if file_type.is_directory() { continue; } - let path = entry.path(); + let path = entry.into_path(); if path.extension() != Some("pth") { continue; } - let Some(file) = system_path_to_file(self.db.upcast(), path) else { + let Some(file) = system_path_to_file(self.db.upcast(), &path) else { continue; }; - return Some(PthFile::new( - self.db, - path.to_path_buf(), - file, - self.site_packages, - )); + return Some(PthFile::new(self.db, path, file, self.site_packages)); } } } @@ -439,11 +434,9 @@ impl ModuleResolutionSettings { } fn search_paths<'db>(&'db self, db: &'db dyn Db) -> SearchPathIterator<'db> { - let static_paths = self.search_path_settings.static_search_paths.iter(); - SearchPathIterator { db, - static_paths, + static_paths: self.search_path_settings.static_search_paths.iter(), dynamic_paths: None, } } diff --git a/crates/ruff_db/src/system.rs b/crates/ruff_db/src/system.rs index 80250fd3fb3e3..52228d0623381 100644 --- a/crates/ruff_db/src/system.rs +++ b/crates/ruff_db/src/system.rs @@ -138,6 +138,10 @@ impl DirectoryEntry { Self { path, file_type } } + pub fn into_path(self) -> SystemPathBuf { + self.path + } + pub fn path(&self) -> &SystemPath { &self.path } From 1adf37d575a6d3571389a8fae3503f41060b5da7 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 15 Jul 2024 13:54:01 +0100 Subject: [PATCH 12/17] Don't add a new method for creating arbitrary directories in tests --- .../red_knot_module_resolver/src/resolver.rs | 32 +++++++++++-------- .../red_knot_module_resolver/src/testing.rs | 20 ------------ 2 files changed, 19 insertions(+), 33 deletions(-) diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index fd56a7f92be84..9218597d87437 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -1379,14 +1379,14 @@ mod tests { #[test] fn editable_install_absolute_path() { const SITE_PACKAGES: &[FileSpec] = &[("_foo.pth", "/x/src")]; - const X_DIRECTORY: &[FileSpec] = - &[("/x/src/foo/__init__.py", ""), ("/x/src/foo/bar.py", "")]; + let x_directory = [("/x/src/foo/__init__.py", ""), ("/x/src/foo/bar.py", "")]; - let TestCase { db, .. } = TestCaseBuilder::new() + let TestCase { mut db, .. } = TestCaseBuilder::new() .with_site_packages_files(SITE_PACKAGES) - .with_other_files(X_DIRECTORY) .build(); + db.write_files(x_directory).unwrap(); + let foo_module_name = ModuleName::new_static("foo").unwrap(); let foo_bar_module_name = ModuleName::new_static("foo.bar").unwrap(); @@ -1448,15 +1448,18 @@ not_a_directory ("spam/spam.py", ""), ]; - const ROOT: &[FileSpec] = &[("/a.py", ""), ("/baz/b.py", "")]; + let root_files = [("/a.py", ""), ("/baz/b.py", "")]; let TestCase { - db, site_packages, .. + mut db, + site_packages, + .. } = TestCaseBuilder::new() .with_site_packages_files(SITE_PACKAGES) - .with_other_files(ROOT) .build(); + db.write_files(root_files).unwrap(); + let foo_module_name = ModuleName::new_static("foo").unwrap(); let a_module_name = ModuleName::new_static("a").unwrap(); let b_module_name = ModuleName::new_static("b").unwrap(); @@ -1488,13 +1491,14 @@ not_a_directory #[test] fn module_resolution_paths_cached_between_different_module_resolutions() { const SITE_PACKAGES: &[FileSpec] = &[("_foo.pth", "/x/src"), ("_bar.pth", "/y/src")]; - const EXTERNAL_DIRECTORIES: &[FileSpec] = &[("/x/src/foo.py", ""), ("/y/src/bar.py", "")]; + let external_directories = [("/x/src/foo.py", ""), ("/y/src/bar.py", "")]; let TestCase { mut db, .. } = TestCaseBuilder::new() .with_site_packages_files(SITE_PACKAGES) - .with_other_files(EXTERNAL_DIRECTORIES) .build(); + db.write_files(external_directories).unwrap(); + let foo_module_name = ModuleName::new_static("foo").unwrap(); let bar_module_name = ModuleName::new_static("bar").unwrap(); @@ -1522,7 +1526,7 @@ not_a_directory #[test] fn deleting_pth_file_on_which_module_resolution_depends_invalidates_cache() { const SITE_PACKAGES: &[FileSpec] = &[("_foo.pth", "/x/src")]; - const X_DIRECTORY: &[FileSpec] = &[("/x/src/foo.py", "")]; + let x_directory = [("/x/src/foo.py", "")]; let TestCase { mut db, @@ -1530,9 +1534,10 @@ not_a_directory .. } = TestCaseBuilder::new() .with_site_packages_files(SITE_PACKAGES) - .with_other_files(X_DIRECTORY) .build(); + db.write_files(x_directory).unwrap(); + let foo_module_name = ModuleName::new_static("foo").unwrap(); let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); assert_eq!( @@ -1549,13 +1554,14 @@ not_a_directory #[test] fn deleting_editable_install_on_which_module_resolution_depends_invalidates_cache() { const SITE_PACKAGES: &[FileSpec] = &[("_foo.pth", "/x/src")]; - const X_DIRECTORY: &[FileSpec] = &[("/x/src/foo.py", "")]; + let x_directory = [("/x/src/foo.py", "")]; let TestCase { mut db, .. } = TestCaseBuilder::new() .with_site_packages_files(SITE_PACKAGES) - .with_other_files(X_DIRECTORY) .build(); + db.write_files(x_directory).unwrap(); + let foo_module_name = ModuleName::new_static("foo").unwrap(); let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); let src_path = SystemPathBuf::from("/x/src"); diff --git a/crates/red_knot_module_resolver/src/testing.rs b/crates/red_knot_module_resolver/src/testing.rs index 56d8e66de7912..b927ae7a1ef2e 100644 --- a/crates/red_knot_module_resolver/src/testing.rs +++ b/crates/red_knot_module_resolver/src/testing.rs @@ -99,7 +99,6 @@ pub(crate) struct TestCaseBuilder { target_version: TargetVersion, first_party_files: Vec, site_packages_files: Vec, - other_files: Vec, } impl TestCaseBuilder { @@ -121,16 +120,6 @@ impl TestCaseBuilder { self } - /// Add other files external to any of the pre-configured search paths. - /// - /// (For example, an editable installation in `site-packages` - /// might point to a directory outside of any of the typing spec's - /// listed module resolution sources) - pub(crate) fn with_other_files(mut self, files: &[FileSpec]) -> Self { - self.other_files.extend(files.iter().copied()); - self - } - fn write_mock_directory( db: &mut TestDb, location: impl AsRef, @@ -154,7 +143,6 @@ impl TestCaseBuilder { target_version: TargetVersion::default(), first_party_files: vec![], site_packages_files: vec![], - other_files: vec![], } } @@ -165,14 +153,12 @@ impl TestCaseBuilder { target_version, first_party_files, site_packages_files, - other_files, } = self; TestCaseBuilder { typeshed_option: VendoredTypeshed, target_version, first_party_files, site_packages_files, - other_files, } } @@ -186,14 +172,12 @@ impl TestCaseBuilder { target_version, first_party_files, site_packages_files, - other_files, } = self; TestCaseBuilder { typeshed_option: typeshed, target_version, first_party_files, site_packages_files, - other_files, } } @@ -222,7 +206,6 @@ impl TestCaseBuilder { target_version, first_party_files, site_packages_files, - other_files, } = self; let mut db = TestDb::new(); @@ -231,7 +214,6 @@ impl TestCaseBuilder { Self::write_mock_directory(&mut db, "/site-packages", site_packages_files); let src = Self::write_mock_directory(&mut db, "/src", first_party_files); let typeshed = Self::build_typeshed_mock(&mut db, &typeshed_option); - Self::write_mock_directory(&mut db, "", other_files); set_module_resolution_settings( &mut db, @@ -278,7 +260,6 @@ impl TestCaseBuilder { target_version, first_party_files, site_packages_files, - other_files, } = self; let mut db = TestDb::new(); @@ -286,7 +267,6 @@ impl TestCaseBuilder { let site_packages = Self::write_mock_directory(&mut db, "/site-packages", site_packages_files); let src = Self::write_mock_directory(&mut db, "/src", first_party_files); - Self::write_mock_directory(&mut db, "", other_files); set_module_resolution_settings( &mut db, From 388fdca6a9b5a687e796a88b555204239b044a30 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 15 Jul 2024 15:27:42 +0100 Subject: [PATCH 13/17] Change test so that it asserts the cache is invalidated after modification rather than deletion of the `pth` file --- crates/red_knot_module_resolver/src/path.rs | 6 +- .../red_knot_module_resolver/src/resolver.rs | 66 +++++++++++-------- 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/crates/red_knot_module_resolver/src/path.rs b/crates/red_knot_module_resolver/src/path.rs index 4fc2f3554d469..8a79c666fcd4f 100644 --- a/crates/red_knot_module_resolver/src/path.rs +++ b/crates/red_knot_module_resolver/src/path.rs @@ -6,7 +6,7 @@ use std::fmt; use ruff_db::files::{system_path_to_file, vendored_path_to_file, File, FilePath}; -use ruff_db::system::{SystemPath, SystemPathBuf}; +use ruff_db::system::{System, SystemPath, SystemPathBuf}; use ruff_db::vendored::{VendoredPath, VendoredPathBuf}; use crate::db::Db; @@ -213,12 +213,12 @@ impl ModuleResolutionPathBuf { #[must_use] pub(crate) fn editable_installation_root( - db: &dyn Db, + system: &dyn System, path: impl Into, ) -> Option { let path = path.into(); // TODO: Add Salsa invalidation to this system call: - db.system() + system .is_directory(&path) .then_some(Self(ModuleResolutionPathBufInner::EditableInstall(path))) } diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 9218597d87437..6a5364baf16d9 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -2,11 +2,11 @@ use std::hash::BuildHasherDefault; use std::iter::FusedIterator; use std::sync::Arc; +use ruff_db::source::{source_text, SourceText}; use rustc_hash::FxHasher; use ruff_db::files::{system_path_to_file, File, FilePath}; -use ruff_db::source::{source_text, SourceText}; -use ruff_db::system::{DirectoryEntry, SystemPath, SystemPathBuf}; +use ruff_db::system::{DirectoryEntry, System, SystemPath, SystemPathBuf}; use crate::db::Db; use crate::module::{Module, ModuleKind}; @@ -328,38 +328,31 @@ impl<'db> FusedIterator for SearchPathIterator<'db> {} /// One or more lines in a `.pth` file may be a (relative or absolute) /// path that represents an editable installation of a package. struct PthFile<'db> { - db: &'db dyn Db, + system: &'db dyn System, path: SystemPathBuf, contents: SourceText, site_packages: &'db SystemPath, } impl<'db> PthFile<'db> { - fn new( - db: &'db dyn Db, - pth_path: SystemPathBuf, - file: File, - site_packages: &'db SystemPath, - ) -> Self { - Self { - db, - path: pth_path, - contents: source_text(db.upcast(), file), - site_packages, - } - } - /// Yield paths in this `.pth` file that appear to represent editable installations, /// and should therefore be added as module-resolution search paths. fn editable_installations<'a>(&'a self) -> impl Iterator + 'db where 'a: 'db, { + let PthFile { + system, + path: _, + contents, + site_packages, + } = self; + // Empty lines or lines starting with '#' are ignored by the Python interpreter. // Lines that start with "import " or "import\t" do not represent editable installs at all; // instead, these are files that are executed by Python at startup. // https://docs.python.org/3/library/site.html#module-site - self.contents.lines().filter_map(|line| { + contents.lines().filter_map(move |line| { let line = line.trim(); if line.is_empty() || line.starts_with('#') @@ -368,8 +361,8 @@ impl<'db> PthFile<'db> { { return None; } - let possible_editable_install = SystemPath::absolute(line, self.site_packages); - ModuleResolutionPathBuf::editable_installation_root(self.db, possible_editable_install) + let possible_editable_install = SystemPath::absolute(line, site_packages); + ModuleResolutionPathBuf::editable_installation_root(*system, possible_editable_install) }) } } @@ -398,8 +391,16 @@ impl<'db> Iterator for PthFileIterator<'db> { type Item = PthFile<'db>; fn next(&mut self) -> Option { + let PthFileIterator { + db, + directory_iterator, + site_packages, + } = self; + + let system = db.system(); + loop { - let entry_result = self.directory_iterator.next()?; + let entry_result = directory_iterator.next()?; let Ok(entry) = entry_result else { continue; }; @@ -413,10 +414,19 @@ impl<'db> Iterator for PthFileIterator<'db> { if path.extension() != Some("pth") { continue; } - let Some(file) = system_path_to_file(self.db.upcast(), &path) else { + + let Some(file) = system_path_to_file(db.upcast(), &path) else { continue; }; - return Some(PthFile::new(self.db, path, file, self.site_packages)); + + let contents = source_text(db.upcast(), file); + + return Some(PthFile { + system, + path, + contents, + site_packages, + }); } } } @@ -1524,7 +1534,7 @@ not_a_directory } #[test] - fn deleting_pth_file_on_which_module_resolution_depends_invalidates_cache() { + fn changing_pth_file_on_which_module_resolution_depends_invalidates_cache() { const SITE_PACKAGES: &[FileSpec] = &[("_foo.pth", "/x/src")]; let x_directory = [("/x/src/foo.py", "")]; @@ -1545,9 +1555,11 @@ not_a_directory system_path_to_file(&db, SystemPathBuf::from("/x/src/foo.py")) ); - let pth_file = site_packages.join("_foo.pth"); - db.memory_file_system().remove_file(&pth_file).unwrap(); - File::touch_path(&mut db, &pth_file); + let pth_file_path = site_packages.join("_foo.pth"); + db.memory_file_system() + .write_file(&pth_file_path, "/y/src") + .unwrap(); + File::touch_path(&mut db, &pth_file_path); assert_eq!(resolve_module(&db, foo_module_name.clone()), None); } From 1c267b5e053852852fc3bf4da8f0da4c8ed6b18d Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 15 Jul 2024 17:13:39 +0100 Subject: [PATCH 14/17] Update crates/red_knot_module_resolver/src/resolver.rs Co-authored-by: Micha Reiser --- crates/red_knot_module_resolver/src/resolver.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 6a5364baf16d9..bc5cfd942831c 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -1404,8 +1404,8 @@ mod tests { let foo_bar_module = resolve_module(&db, foo_bar_module_name.clone()).unwrap(); assert_eq!( - Some(foo_module.file()), - system_path_to_file(&db, SystemPathBuf::from("/x/src/foo/__init__.py")) + foo_module.file().path, + FilePath::System(SystemPathBuf::from("/x/src/foo/__init__.py")) ); assert_eq!( Some(foo_bar_module.file()), From 6d8acb90915a5f7cf780d1685006f141ebb797be Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 15 Jul 2024 17:55:04 +0100 Subject: [PATCH 15/17] Cleanup tests --- .../red_knot_module_resolver/src/resolver.rs | 50 ++++++++----------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index bc5cfd942831c..22bd94cb3e021 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -1404,12 +1404,12 @@ mod tests { let foo_bar_module = resolve_module(&db, foo_bar_module_name.clone()).unwrap(); assert_eq!( - foo_module.file().path, - FilePath::System(SystemPathBuf::from("/x/src/foo/__init__.py")) + foo_module.file().path(&db), + &FilePath::system("/x/src/foo/__init__.py") ); assert_eq!( - Some(foo_bar_module.file()), - system_path_to_file(&db, SystemPathBuf::from("/x/src/foo/bar.py")) + foo_bar_module.file().path(&db), + &FilePath::system("/x/src/foo/bar.py") ); } @@ -1420,9 +1420,7 @@ mod tests { ("../x/y/src/foo.pyi", ""), ]; - let TestCase { - db, site_packages, .. - } = TestCaseBuilder::new() + let TestCase { db, .. } = TestCaseBuilder::new() .with_site_packages_files(SITE_PACKAGES) .build(); @@ -1430,8 +1428,8 @@ mod tests { let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); assert_eq!( - Some(foo_module.file()), - system_path_to_file(&db, site_packages.join("../x/y/src/foo.pyi")) + foo_module.file().path(&db), + &FilePath::system("/x/y/src/foo.pyi") ); } @@ -1481,20 +1479,14 @@ not_a_directory let spam_module = resolve_module(&db, spam_module_name.clone()).unwrap(); assert_eq!( - Some(foo_module.file()), - system_path_to_file(&db, site_packages.join("../x/y/src/foo.pyi")) - ); - assert_eq!( - Some(a_module.file()), - system_path_to_file(&db, SystemPathBuf::from("/a.py")) - ); - assert_eq!( - Some(b_module.file()), - system_path_to_file(&db, SystemPathBuf::from("/baz/b.py")) + foo_module.file().path(&db), + &FilePath::system("/x/y/src/foo.pyi") ); + assert_eq!(a_module.file().path(&db), &FilePath::system("/a.py")); + assert_eq!(b_module.file().path(&db), &FilePath::system("/baz/b.py")); assert_eq!( - Some(spam_module.file()), - system_path_to_file(&db, site_packages.join("spam/spam.py")) + spam_module.file().path(&db), + &FilePath::System(site_packages.join("spam/spam.py")) ); } @@ -1514,15 +1506,15 @@ not_a_directory let foo_module = resolve_module(&db, foo_module_name).unwrap(); assert_eq!( - Some(foo_module.file()), - system_path_to_file(&db, "/x/src/foo.py") + foo_module.file().path(&db), + &FilePath::system("/x/src/foo.py") ); db.clear_salsa_events(); let bar_module = resolve_module(&db, bar_module_name).unwrap(); assert_eq!( - Some(bar_module.file()), - system_path_to_file(&db, "/y/src/bar.py") + bar_module.file().path(&db), + &FilePath::system("/y/src/bar.py") ); let events = db.take_salsa_events(); assert_function_query_was_not_run::( @@ -1551,8 +1543,8 @@ not_a_directory let foo_module_name = ModuleName::new_static("foo").unwrap(); let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); assert_eq!( - Some(foo_module.file()), - system_path_to_file(&db, SystemPathBuf::from("/x/src/foo.py")) + foo_module.file().path(&db), + &FilePath::system("/x/src/foo.py") ); let pth_file_path = site_packages.join("_foo.pth"); @@ -1578,8 +1570,8 @@ not_a_directory let foo_module = resolve_module(&db, foo_module_name.clone()).unwrap(); let src_path = SystemPathBuf::from("/x/src"); assert_eq!( - Some(foo_module.file()), - system_path_to_file(&db, src_path.join("foo.py")) + foo_module.file().path(&db), + &FilePath::System(src_path.join("foo.py")) ); db.memory_file_system() From e29d3d44f54200e5e089978a1dd3002884f6c582 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 15 Jul 2024 20:26:11 +0100 Subject: [PATCH 16/17] Fix comment Co-authored-by: Carl Meyer --- crates/red_knot_module_resolver/src/resolver.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 22bd94cb3e021..18480725cfab5 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -350,7 +350,7 @@ impl<'db> PthFile<'db> { // Empty lines or lines starting with '#' are ignored by the Python interpreter. // Lines that start with "import " or "import\t" do not represent editable installs at all; - // instead, these are files that are executed by Python at startup. + // instead, these are lines that are executed by Python at startup. // https://docs.python.org/3/library/site.html#module-site contents.lines().filter_map(move |line| { let line = line.trim(); From 23d2d8b501e49811f1f0e74ed773440a77afef28 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Tue, 16 Jul 2024 19:09:01 +0100 Subject: [PATCH 17/17] Fix bugs, address review, add TODOs --- Cargo.lock | 1 - Cargo.toml | 1 - crates/red_knot_module_resolver/Cargo.toml | 1 - crates/red_knot_module_resolver/src/db.rs | 4 +- crates/red_knot_module_resolver/src/path.rs | 2 +- .../red_knot_module_resolver/src/resolver.rs | 192 +++++++++++++++--- 6 files changed, 163 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4aec86ce00c6a..9d64570b9a718 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1888,7 +1888,6 @@ dependencies = [ "anyhow", "camino", "compact_str", - "indexmap", "insta", "once_cell", "path-slash", diff --git a/Cargo.toml b/Cargo.toml index 57dd88431f3e8..0cb4f2e88ea17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -72,7 +72,6 @@ hashbrown = "0.14.3" ignore = { version = "0.4.22" } imara-diff = { version = "0.1.5" } imperative = { version = "1.0.4" } -indexmap = { version = "2.2.6" } indicatif = { version = "0.17.8" } indoc = { version = "2.0.4" } insta = { version = "1.35.1" } diff --git a/crates/red_knot_module_resolver/Cargo.toml b/crates/red_knot_module_resolver/Cargo.toml index f97c26812354a..a6761665d6116 100644 --- a/crates/red_knot_module_resolver/Cargo.toml +++ b/crates/red_knot_module_resolver/Cargo.toml @@ -16,7 +16,6 @@ ruff_python_stdlib = { workspace = true } compact_str = { workspace = true } camino = { workspace = true } -indexmap = { workspace = true } once_cell = { workspace = true } rustc-hash = { workspace = true } salsa = { workspace = true } diff --git a/crates/red_knot_module_resolver/src/db.rs b/crates/red_knot_module_resolver/src/db.rs index fe9cb798c3235..9d6d6419117d3 100644 --- a/crates/red_knot_module_resolver/src/db.rs +++ b/crates/red_knot_module_resolver/src/db.rs @@ -1,7 +1,7 @@ use ruff_db::Upcast; use crate::resolver::{ - dynamic_module_resolution_paths, file_to_module, + editable_install_resolution_paths, file_to_module, internal::{ModuleNameIngredient, ModuleResolverSettings}, resolve_module_query, }; @@ -11,7 +11,7 @@ use crate::typeshed::parse_typeshed_versions; pub struct Jar( ModuleNameIngredient<'_>, ModuleResolverSettings, - dynamic_module_resolution_paths, + editable_install_resolution_paths, resolve_module_query, file_to_module, parse_typeshed_versions, diff --git a/crates/red_knot_module_resolver/src/path.rs b/crates/red_knot_module_resolver/src/path.rs index 8a79c666fcd4f..9ad4463f525a0 100644 --- a/crates/red_knot_module_resolver/src/path.rs +++ b/crates/red_knot_module_resolver/src/path.rs @@ -256,7 +256,7 @@ impl ModuleResolutionPathBuf { ModuleResolutionPathRef::from(self).to_file(search_path, resolver) } - pub(crate) fn as_system_path(&self) -> Option<&SystemPath> { + pub(crate) fn as_system_path(&self) -> Option<&SystemPathBuf> { match &self.0 { ModuleResolutionPathBufInner::Extra(path) => Some(path), ModuleResolutionPathBufInner::FirstParty(path) => Some(path), diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 22bd94cb3e021..bbc78912dda1d 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -1,11 +1,11 @@ +use std::collections; use std::hash::BuildHasherDefault; use std::iter::FusedIterator; use std::sync::Arc; -use ruff_db::source::{source_text, SourceText}; use rustc_hash::FxHasher; -use ruff_db::files::{system_path_to_file, File, FilePath}; +use ruff_db::files::{File, FilePath}; use ruff_db::system::{DirectoryEntry, System, SystemPath, SystemPathBuf}; use crate::db::Db; @@ -17,7 +17,77 @@ use crate::state::ResolverState; use crate::supported_py_version::TargetVersion; type SearchPathRoot = Arc; -type OrderSet = indexmap::IndexSet>; + +/// An ordered sequence of search paths. +/// +/// The sequence respects the invariant maintained by [`sys.path` at runtime] +/// where no two module-resolution paths ever point to the same directory on disk. +/// (Paths may, however, *overlap* -- e.g. you could have both `src/` and `src/foo` +/// as module resolution paths simultaneously.) +/// +/// [`sys.path` at runtime]: https://docs.python.org/3/library/site.html#module-site +#[derive(Debug, PartialEq, Eq, Default, Clone)] +pub(crate) struct SearchPathSequence { + raw_paths: collections::HashSet>, + search_paths: Vec, +} + +impl SearchPathSequence { + fn insert(&mut self, path: SearchPathRoot) -> bool { + // Just assume that all search paths that aren't SystemPaths are unique + if let Some(fs_path) = path.as_system_path() { + if self.raw_paths.contains(fs_path) { + false + } else { + let raw_path = fs_path.to_owned(); + self.search_paths.push(path); + self.raw_paths.insert(raw_path) + } + } else { + self.search_paths.push(path); + true + } + } + + fn contains(&self, path: &SearchPathRoot) -> bool { + if let Some(fs_path) = path.as_system_path() { + self.raw_paths.contains(fs_path) + } else { + self.search_paths.contains(path) + } + } + + fn iter(&self) -> std::slice::Iter { + self.search_paths.iter() + } +} + +impl<'a> IntoIterator for &'a SearchPathSequence { + type IntoIter = std::slice::Iter<'a, SearchPathRoot>; + type Item = &'a SearchPathRoot; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl FromIterator for SearchPathSequence { + fn from_iter>(iter: T) -> Self { + let mut sequence = Self::default(); + for item in iter { + sequence.insert(item); + } + sequence + } +} + +impl Extend for SearchPathSequence { + fn extend>(&mut self, iter: T) { + for item in iter { + self.insert(item); + } + } +} /// Configures the module resolver settings. /// @@ -167,7 +237,7 @@ impl RawModuleResolutionSettings { custom_typeshed, } = self; - let mut static_search_paths: OrderSet = extra_paths + let mut static_search_paths: SearchPathSequence = extra_paths .into_iter() .map(|fs_path| { Arc::new( @@ -230,14 +300,9 @@ struct ValidatedSearchPathSettings { /// Search paths that have been statically determined purely from reading Ruff's configuration settings. /// These shouldn't ever change unless the config settings themselves change. /// - /// We use an `OrderSet` for storing the paths, since the order is important, - /// but at runtime, [`sys.path` never contains duplicate entries] - /// /// Note that `site-packages` *is included* as a search path in this sequence, /// but it is also stored separately so that we're able to find editable installs later. - /// - /// [`sys.path` never contains duplicate entries]: https://docs.python.org/3/library/site.html#module-site - static_search_paths: OrderSet, + static_search_paths: SearchPathSequence, site_packages: Option, } @@ -245,7 +310,18 @@ struct ValidatedSearchPathSettings { /// search paths listed in `.pth` files in the `site-packages` directory /// due to editable installations of third-party packages. #[salsa::tracked(return_ref)] -pub(crate) fn dynamic_module_resolution_paths(db: &dyn Db) -> OrderSet { +pub(crate) fn editable_install_resolution_paths(db: &dyn Db) -> SearchPathSequence { + // This query needs to be re-executed each time a `.pth` file + // is added, modified or removed from the `site-packages` directory. + // However, we don't use Salsa queries to read the source text of `.pth` files; + // we use the APIs on the `System` trait directly. As such, for now we simply ask + // Salsa to recompute this query on each new revision. + // + // TODO: add some kind of watcher for the `site-packages` directory that looks + // for `site-packages/*.pth` files being added/modified/removed; get rid of this. + // When doing so, also make the test + // `deleting_pth_file_on_which_module_resolution_depends_invalidates_cache()` + // more principled! db.report_untracked_read(); let ValidatedSearchPathSettings { @@ -253,7 +329,7 @@ pub(crate) fn dynamic_module_resolution_paths(db: &dyn Db) -> OrderSet OrderSet { db: &'db dyn Db, - static_paths: indexmap::set::Iter<'db, SearchPathRoot>, - dynamic_paths: Option>, + static_paths: std::slice::Iter<'db, SearchPathRoot>, + dynamic_paths: Option>, } impl<'db> Iterator for SearchPathIterator<'db> { @@ -316,7 +392,7 @@ impl<'db> Iterator for SearchPathIterator<'db> { static_paths.next().or_else(|| { dynamic_paths - .get_or_insert_with(|| dynamic_module_resolution_paths(*db).into_iter()) + .get_or_insert_with(|| editable_install_resolution_paths(*db).into_iter()) .next() }) } @@ -330,17 +406,14 @@ impl<'db> FusedIterator for SearchPathIterator<'db> {} struct PthFile<'db> { system: &'db dyn System, path: SystemPathBuf, - contents: SourceText, + contents: String, site_packages: &'db SystemPath, } impl<'db> PthFile<'db> { /// Yield paths in this `.pth` file that appear to represent editable installations, /// and should therefore be added as module-resolution search paths. - fn editable_installations<'a>(&'a self) -> impl Iterator + 'db - where - 'a: 'db, - { + fn editable_installations(&'db self) -> impl Iterator + 'db { let PthFile { system, path: _, @@ -353,7 +426,7 @@ impl<'db> PthFile<'db> { // instead, these are files that are executed by Python at startup. // https://docs.python.org/3/library/site.html#module-site contents.lines().filter_map(move |line| { - let line = line.trim(); + let line = line.trim_end(); if line.is_empty() || line.starts_with('#') || line.starts_with("import ") @@ -377,8 +450,6 @@ struct PthFileIterator<'db> { impl<'db> PthFileIterator<'db> { fn new(db: &'db dyn Db, site_packages: &'db SystemPath) -> std::io::Result { - // TODO: The salsa cache should be invalidated every time a `.pth` file is added to `site-packages`, - // as well as every time an existing one is removed. Ok(Self { db, directory_iterator: db.system().read_directory(site_packages)?, @@ -415,12 +486,10 @@ impl<'db> Iterator for PthFileIterator<'db> { continue; } - let Some(file) = system_path_to_file(db.upcast(), &path) else { + let Ok(contents) = db.system().read_to_string(&path) else { continue; }; - let contents = source_text(db.upcast(), file); - return Some(PthFile { system, path, @@ -633,6 +702,7 @@ mod tests { use ruff_db::files::{system_path_to_file, File, FilePath}; use ruff_db::system::{DbWithTestSystem, OsSystem, SystemPath}; use ruff_db::testing::assert_function_query_was_not_run; + use ruff_db::Db; use crate::db::tests::TestDb; use crate::module::ModuleKind; @@ -1413,6 +1483,33 @@ mod tests { ); } + #[test] + fn editable_install_pth_file_with_whitespace() { + const SITE_PACKAGES: &[FileSpec] = &[ + ("_foo.pth", " /x/src"), + ("_bar.pth", "/y/src "), + ]; + let external_files = [("/x/src/foo.py", ""), ("/y/src/bar.py", "")]; + + let TestCase { mut db, .. } = TestCaseBuilder::new() + .with_site_packages_files(SITE_PACKAGES) + .build(); + + db.write_files(external_files).unwrap(); + + // Lines with leading whitespace in `.pth` files do not parse: + let foo_module_name = ModuleName::new_static("foo").unwrap(); + assert_eq!(resolve_module(&db, foo_module_name), None); + + // Lines with trailing whitespace in `.pth` files do: + let bar_module_name = ModuleName::new_static("bar").unwrap(); + let bar_module = resolve_module(&db, bar_module_name.clone()).unwrap(); + assert_eq!( + bar_module.file().path(&db), + &FilePath::system("/y/src/bar.py") + ); + } + #[test] fn editable_install_relative_path() { const SITE_PACKAGES: &[FileSpec] = &[ @@ -1444,7 +1541,7 @@ mod tests { import not_an_editable_install; do_something_else_crazy_dynamic() # another comment - spam +spam not_a_directory "; @@ -1517,7 +1614,7 @@ not_a_directory &FilePath::system("/y/src/bar.py") ); let events = db.take_salsa_events(); - assert_function_query_was_not_run::( + assert_function_query_was_not_run::( &db, |res| &res.function, &(), @@ -1526,7 +1623,7 @@ not_a_directory } #[test] - fn changing_pth_file_on_which_module_resolution_depends_invalidates_cache() { + fn deleting_pth_file_on_which_module_resolution_depends_invalidates_cache() { const SITE_PACKAGES: &[FileSpec] = &[("_foo.pth", "/x/src")]; let x_directory = [("/x/src/foo.py", "")]; @@ -1547,11 +1644,23 @@ not_a_directory &FilePath::system("/x/src/foo.py") ); - let pth_file_path = site_packages.join("_foo.pth"); db.memory_file_system() - .write_file(&pth_file_path, "/y/src") + .remove_file(site_packages.join("_foo.pth")) .unwrap(); - File::touch_path(&mut db, &pth_file_path); + + // Why are we touching a random file in the path that's been editably installed, + // rather than the `.pth` file, when the `.pth` file is the one that has been deleted? + // It's because the `.pth` file isn't directly tracked as a dependency by Salsa + // currently (we don't use `system_path_to_file()` to get the file, and we don't use + // `source_text()` to read the source of the file). Instead of using these APIs which + // would automatically add the existence and contents of the file as a Salsa-tracked + // dependency, we use `.report_untracked_read()` to force Salsa to re-parse all + // `.pth` files on each new "revision". Making a random modification to a tracked + // Salsa file forces a new revision. + // + // TODO: get rid of the `.report_untracked_read()` call... + File::touch_path(&mut db, SystemPath::new("/x/src/foo.py")); + assert_eq!(resolve_module(&db, foo_module_name.clone()), None); } @@ -1582,4 +1691,23 @@ not_a_directory File::touch_path(&mut db, &src_path); assert_eq!(resolve_module(&db, foo_module_name.clone()), None); } + + #[test] + fn no_duplicate_search_paths_added() { + let TestCase { db, .. } = TestCaseBuilder::new() + .with_src_files(&[("foo.py", "")]) + .with_site_packages_files(&[("_foo.pth", "/src")]) + .build(); + + let search_paths: Vec<&SearchPathRoot> = + module_resolver_settings(&db).search_paths(&db).collect(); + + assert!(search_paths.contains(&&Arc::new( + ModuleResolutionPathBuf::first_party("/src").unwrap() + ))); + + assert!(!search_paths.contains(&&Arc::new( + ModuleResolutionPathBuf::editable_installation_root(db.system(), "/src").unwrap() + ))); + } }