From c4147a07c2b0fc0ac1becbf5957f50aaa25d33ab Mon Sep 17 00:00:00 2001 From: Bas Zalmstra Date: Tue, 10 Sep 2024 09:50:13 +0200 Subject: [PATCH] refactor: use typed path in lock file --- crates/file_url/src/lib.rs | 38 +++++--- crates/rattler_lock/Cargo.toml | 1 + crates/rattler_lock/src/url_or_path.rs | 88 +++++++++++++------ .../src/utils/serde/url_or_path.rs | 8 +- 4 files changed, 91 insertions(+), 44 deletions(-) diff --git a/crates/file_url/src/lib.rs b/crates/file_url/src/lib.rs index a9c73c6dd..1f0775b7c 100644 --- a/crates/file_url/src/lib.rs +++ b/crates/file_url/src/lib.rs @@ -8,9 +8,7 @@ use std::fmt::Write; use std::path::PathBuf; use std::str::FromStr; use thiserror::Error; -use typed_path::{ - Utf8TypedComponent, Utf8TypedPath, Utf8UnixComponent, Utf8WindowsComponent, Utf8WindowsPrefix, -}; +use typed_path::{Utf8TypedComponent, Utf8TypedPath, Utf8TypedPathBuf, Utf8UnixComponent, Utf8WindowsComponent, Utf8WindowsPrefix}; use url::{Host, Url}; /// Returns true if the specified segment is considered to be a Windows drive letter segment. @@ -33,14 +31,8 @@ fn is_windows_drive_letter_segment(segment: &str) -> Option { None } -/// Tries to convert a `file://` based URL to a path. -/// -/// We assume that any passed URL that represents a path is an absolute path. -/// -/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this -/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function -/// tries to parse the URL as a path on all operating systems. -pub fn url_to_path(url: &Url) -> Option { + +fn url_to_path_inner>(url: &Url) -> Option { if url.scheme() != "file" { return None; } @@ -77,7 +69,29 @@ pub fn url_to_path(url: &Url) -> Option { } } - Some(PathBuf::from(path)) + Some(T::from(path)) +} + +/// Tries to convert a `file://` based URL to a path. +/// +/// We assume that any passed URL that represents a path is an absolute path. +/// +/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this +/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function +/// tries to parse the URL as a path on all operating systems. +pub fn url_to_path(url: &Url) -> Option { + url_to_path_inner(url) +} + +/// Tries to convert a `file://` based URL to a path. +/// +/// We assume that any passed URL that represents a path is an absolute path. +/// +/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this +/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function +/// tries to parse the URL as a path on all operating systems. +pub fn url_to_typed_path(url: &Url) -> Option { + url_to_path_inner(url) } const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); diff --git a/crates/rattler_lock/Cargo.toml b/crates/rattler_lock/Cargo.toml index 6cb3b05a2..b9a9005c4 100644 --- a/crates/rattler_lock/Cargo.toml +++ b/crates/rattler_lock/Cargo.toml @@ -26,6 +26,7 @@ serde_with = { workspace = true, features = ["indexmap_2"] } serde_repr = { workspace = true } thiserror = { workspace = true } url = { workspace = true, features = ["serde"] } +typed-path = { workspace = true } [dev-dependencies] insta = { workspace = true, features = ["yaml"] } diff --git a/crates/rattler_lock/src/url_or_path.rs b/crates/rattler_lock/src/url_or_path.rs index ad290c817..7c1b84fb9 100644 --- a/crates/rattler_lock/src/url_or_path.rs +++ b/crates/rattler_lock/src/url_or_path.rs @@ -1,16 +1,15 @@ -use file_url::url_to_path; +use file_url::{url_to_typed_path}; use itertools::Itertools; use serde_with::{DeserializeFromStr, SerializeDisplay}; use std::borrow::Cow; use std::cmp::Ordering; use std::hash::Hash; -use std::path::Path; use std::{ fmt::{Display, Formatter}, - path::PathBuf, str::FromStr, }; use thiserror::Error; +use typed_path::{Utf8TypedPath, Utf8TypedPathBuf}; use url::Url; /// Represents either a URL or a path. @@ -25,7 +24,7 @@ pub enum UrlOrPath { Url(Url), /// A local (or networked) path. - Path(PathBuf), + Path(Utf8TypedPathBuf), } impl PartialOrd for UrlOrPath { @@ -47,7 +46,7 @@ impl Ord for UrlOrPath { impl PartialEq for UrlOrPath { fn eq(&self, other: &Self) -> bool { - match (self.canonicalize().as_ref(), other.canonicalize().as_ref()) { + match (self.normalize().as_ref(), other.normalize().as_ref()) { (UrlOrPath::Path(a), UrlOrPath::Path(b)) => a == b, (UrlOrPath::Url(a), UrlOrPath::Url(b)) => a == b, _ => false, @@ -57,21 +56,21 @@ impl PartialEq for UrlOrPath { impl Hash for UrlOrPath { fn hash(&self, state: &mut H) { - match self.canonicalize().as_ref() { + match self.normalize().as_ref() { UrlOrPath::Url(url) => url.hash(state), UrlOrPath::Path(path) => path.hash(state), } } } -impl From for UrlOrPath { - fn from(value: PathBuf) -> Self { +impl From for UrlOrPath { + fn from(value: Utf8TypedPathBuf) -> Self { UrlOrPath::Path(value) } } -impl From<&Path> for UrlOrPath { - fn from(value: &Path) -> Self { +impl<'a> From> for UrlOrPath { + fn from(value: Utf8TypedPath<'a>) -> Self { UrlOrPath::Path(value.to_path_buf()) } } @@ -79,7 +78,7 @@ impl From<&Path> for UrlOrPath { impl From for UrlOrPath { fn from(value: Url) -> Self { // Try to normalize the URL to a path if possible. - if let Some(path) = url_to_path(&value) { + if let Some(path) = url_to_typed_path(&value) { UrlOrPath::Path(path) } else { UrlOrPath::Url(value) @@ -90,7 +89,7 @@ impl From for UrlOrPath { impl Display for UrlOrPath { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - UrlOrPath::Path(path) => write!(f, "{}", path.display()), + UrlOrPath::Path(path) => write!(f, "{path}"), UrlOrPath::Url(url) => write!(f, "{url}"), } } @@ -106,22 +105,37 @@ impl UrlOrPath { } /// Returns the path if this is a path. - pub fn as_path(&self) -> Option<&Path> { + pub fn as_path(&self) -> Option> { match self { - UrlOrPath::Path(path) => Some(path), + UrlOrPath::Path(path) => Some(path.to_path()), UrlOrPath::Url(_) => None, } } - /// Canonicalizes the instance to be a path if possible. + /// Normalizes the instance to be a path if possible and resolving `..` and `.` segments. /// /// If this instance is a URL with a `file://` scheme, this will try to convert it to a path. - pub fn canonicalize(&self) -> Cow<'_, Self> { - if let Some(path) = self.as_url().and_then(url_to_path) { - return Cow::Owned(UrlOrPath::Path(path)); + pub fn normalize(&self) -> Cow<'_, Self> { + match self { + UrlOrPath::Url(url) => { + if let Some(path) = url_to_typed_path(url) { + return Cow::Owned(UrlOrPath::Path(path.normalize())); + } + Cow::Borrowed(self) + } + UrlOrPath::Path(path) => { + Cow::Owned(UrlOrPath::Path(path.normalize())) + } } + } - Cow::Borrowed(self) + /// Returns the file name of the path or url. If the path or url ends in a directory seperator `None` is returned. + pub fn file_name(&self) -> Option<&str> { + match self { + UrlOrPath::Path(path) if !path.as_str().ends_with(['/', '\\']) => path.file_name(), + UrlOrPath::Url(url) if !url.as_str().ends_with('/') => url.path_segments()?.last(), + _ => None, + } } } @@ -136,22 +150,22 @@ impl FromStr for UrlOrPath { fn from_str(s: &str) -> Result { fn scheme_is_drive_letter(scheme: &str) -> bool { - let Some((drive_letter,)) = scheme.chars().collect_tuple() else { + let Some((drive_letter, )) = scheme.chars().collect_tuple() else { return false; }; drive_letter.is_ascii_alphabetic() } // First try to parse the string as a path. - return match Url::from_str(s) { + match Url::from_str(s) { Ok(url) => Ok(if scheme_is_drive_letter(url.scheme()) { - UrlOrPath::Path(PathBuf::from(s)) + UrlOrPath::Path(s.into()) } else { - UrlOrPath::Url(url).canonicalize().into_owned() + UrlOrPath::Url(url).normalize().into_owned() }), - Err(url::ParseError::RelativeUrlWithoutBase) => Ok(UrlOrPath::Path(PathBuf::from(s))), + Err(url::ParseError::RelativeUrlWithoutBase) => Ok(UrlOrPath::Path(s.into())), Err(e) => Err(PathOrUrlError::InvalidUrl(e)), - }; + } } } @@ -159,6 +173,24 @@ impl FromStr for UrlOrPath { mod test { use super::*; use std::str::FromStr; + use rstest::*; + + #[rstest] + #[case("https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2", + Some("_libgcc_mutex-0.1-conda_forge.tar.bz2") + )] + #[case("C:\\packages\\_libgcc_mutex-0.1-conda_forge.tar.bz2", + Some("_libgcc_mutex-0.1-conda_forge.tar.bz2") + )] + #[case("/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2", + Some("_libgcc_mutex-0.1-conda_forge.tar.bz2") + )] + #[case("https://conda.anaconda.org/conda-forge/linux-64/", None)] + #[case("C:\\packages\\", None)] + #[case("/packages/", None)] + fn test_file_name(#[case] case: UrlOrPath, #[case] expected_filename: Option<&str>) { + assert_eq!(case.file_name(), expected_filename); + } #[test] fn test_equality() { @@ -169,7 +201,7 @@ mod test { // Absolute paths as file and direct path (UrlOrPath::Url("file:///home/bob/test-file.txt".parse().unwrap()), - UrlOrPath::Path("/home/bob/test-file.txt".parse().unwrap())), + UrlOrPath::Path("/home/bob/test-file.txt".into())), ]; for (a, b) in &tests { @@ -228,10 +260,10 @@ mod test { "\\\\127.0.0.1\\c$\\temp\\test-file.txt", ]; - for path in &paths { + for path in paths { assert_eq!( UrlOrPath::from_str(path).unwrap(), - UrlOrPath::Path(path.parse().unwrap()) + UrlOrPath::Path(path.into()) ); } } diff --git a/crates/rattler_lock/src/utils/serde/url_or_path.rs b/crates/rattler_lock/src/utils/serde/url_or_path.rs index 001a0bea3..3dc2f30f6 100644 --- a/crates/rattler_lock/src/utils/serde/url_or_path.rs +++ b/crates/rattler_lock/src/utils/serde/url_or_path.rs @@ -9,7 +9,7 @@ use crate::UrlOrPath; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::{borrow::Cow, path::PathBuf}; +use std::{borrow::Cow}; use url::Url; #[derive(Serialize, Deserialize)] @@ -17,7 +17,7 @@ struct RawUrlOrPath<'a> { #[serde(skip_serializing_if = "Option::is_none")] url: Option>, #[serde(skip_serializing_if = "Option::is_none")] - path: Option>, + path: Option>, } pub fn serialize(value: &UrlOrPath, serializer: S) -> Result @@ -31,7 +31,7 @@ where }, UrlOrPath::Path(path) => RawUrlOrPath { url: None, - path: Some(Cow::Borrowed(path)), + path: Some(Cow::Borrowed(path.as_str())), }, }; @@ -45,7 +45,7 @@ where let raw = RawUrlOrPath::<'de>::deserialize(deserializer)?; match (raw.url, raw.path) { (Some(url), None) => Ok(UrlOrPath::Url(url.into_owned())), - (None, Some(path)) => Ok(UrlOrPath::Path(path.into_owned())), + (None, Some(path)) => Ok(UrlOrPath::Path(path.into_owned().into())), _ => Err(serde::de::Error::custom("expected either a url or a path")), } }