Skip to content

Commit

Permalink
refactor: use typed path in lock file
Browse files Browse the repository at this point in the history
  • Loading branch information
baszalmstra committed Sep 10, 2024
1 parent 4885895 commit c4147a0
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 44 deletions.
38 changes: 26 additions & 12 deletions crates/file_url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ use std::fmt::Write;
use std::path::PathBuf;

Check warning on line 8 in crates/file_url/src/lib.rs

View workflow job for this annotation

GitHub Actions / Format and Lint

Diff in /home/runner/work/rattler/rattler/crates/file_url/src/lib.rs
use std::str::FromStr;
use thiserror::Error;
use typed_path::{
Utf8TypedComponent, Utf8TypedPath, Utf8UnixComponent, Utf8WindowsComponent, Utf8WindowsPrefix,
};
use typed_path::{Utf8TypedComponent, Utf8TypedPath, Utf8TypedPathBuf, Utf8UnixComponent, Utf8WindowsComponent, Utf8WindowsPrefix};
use url::{Host, Url};

/// Returns true if the specified segment is considered to be a Windows drive letter segment.
Expand All @@ -33,14 +31,8 @@ fn is_windows_drive_letter_segment(segment: &str) -> Option<String> {
None
}

/// Tries to convert a `file://` based URL to a path.
///
/// We assume that any passed URL that represents a path is an absolute path.
///
/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this
/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function
/// tries to parse the URL as a path on all operating systems.
pub fn url_to_path(url: &Url) -> Option<PathBuf> {

fn url_to_path_inner<T: From<String>>(url: &Url) -> Option<T> {
if url.scheme() != "file" {
return None;
}
Expand Down Expand Up @@ -77,7 +69,29 @@ pub fn url_to_path(url: &Url) -> Option<PathBuf> {
}
}

Some(PathBuf::from(path))
Some(T::from(path))
}

/// Tries to convert a `file://` based URL to a path.
///
/// We assume that any passed URL that represents a path is an absolute path.
///
/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this
/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function
/// tries to parse the URL as a path on all operating systems.
pub fn url_to_path(url: &Url) -> Option<PathBuf> {
url_to_path_inner(url)
}

/// Tries to convert a `file://` based URL to a path.
///
/// We assume that any passed URL that represents a path is an absolute path.
///
/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this
/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function
/// tries to parse the URL as a path on all operating systems.
pub fn url_to_typed_path(url: &Url) -> Option<Utf8TypedPathBuf> {
url_to_path_inner(url)
}

const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
Expand Down
1 change: 1 addition & 0 deletions crates/rattler_lock/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ serde_with = { workspace = true, features = ["indexmap_2"] }
serde_repr = { workspace = true }
thiserror = { workspace = true }
url = { workspace = true, features = ["serde"] }
typed-path = { workspace = true }

[dev-dependencies]
insta = { workspace = true, features = ["yaml"] }
Expand Down
88 changes: 60 additions & 28 deletions crates/rattler_lock/src/url_or_path.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
use file_url::url_to_path;
use file_url::{url_to_typed_path};

Check warning on line 1 in crates/rattler_lock/src/url_or_path.rs

View workflow job for this annotation

GitHub Actions / Format and Lint

Diff in /home/runner/work/rattler/rattler/crates/rattler_lock/src/url_or_path.rs
use itertools::Itertools;
use serde_with::{DeserializeFromStr, SerializeDisplay};
use std::borrow::Cow;
use std::cmp::Ordering;
use std::hash::Hash;
use std::path::Path;
use std::{
fmt::{Display, Formatter},
path::PathBuf,
str::FromStr,
};
use thiserror::Error;
use typed_path::{Utf8TypedPath, Utf8TypedPathBuf};
use url::Url;

/// Represents either a URL or a path.
Expand All @@ -25,7 +24,7 @@ pub enum UrlOrPath {
Url(Url),

/// A local (or networked) path.
Path(PathBuf),
Path(Utf8TypedPathBuf),
}

impl PartialOrd<Self> for UrlOrPath {
Expand All @@ -47,7 +46,7 @@ impl Ord for UrlOrPath {

impl PartialEq for UrlOrPath {
fn eq(&self, other: &Self) -> bool {
match (self.canonicalize().as_ref(), other.canonicalize().as_ref()) {
match (self.normalize().as_ref(), other.normalize().as_ref()) {
(UrlOrPath::Path(a), UrlOrPath::Path(b)) => a == b,
(UrlOrPath::Url(a), UrlOrPath::Url(b)) => a == b,
_ => false,
Expand All @@ -57,29 +56,29 @@ impl PartialEq for UrlOrPath {

impl Hash for UrlOrPath {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
match self.canonicalize().as_ref() {
match self.normalize().as_ref() {
UrlOrPath::Url(url) => url.hash(state),
UrlOrPath::Path(path) => path.hash(state),

Check failure on line 61 in crates/rattler_lock/src/url_or_path.rs

View workflow job for this annotation

GitHub Actions / Format, Lint and Test the Python bindings

the method `hash` exists for reference `&Utf8TypedPathBuf`, but its trait bounds were not satisfied
}
}
}

impl From<PathBuf> for UrlOrPath {
fn from(value: PathBuf) -> Self {
impl From<Utf8TypedPathBuf> for UrlOrPath {
fn from(value: Utf8TypedPathBuf) -> Self {
UrlOrPath::Path(value)
}
}

impl From<&Path> for UrlOrPath {
fn from(value: &Path) -> Self {
impl<'a> From<Utf8TypedPath<'a>> for UrlOrPath {
fn from(value: Utf8TypedPath<'a>) -> Self {
UrlOrPath::Path(value.to_path_buf())
}
}

impl From<Url> for UrlOrPath {
fn from(value: Url) -> Self {
// Try to normalize the URL to a path if possible.
if let Some(path) = url_to_path(&value) {
if let Some(path) = url_to_typed_path(&value) {
UrlOrPath::Path(path)
} else {
UrlOrPath::Url(value)
Expand All @@ -90,7 +89,7 @@ impl From<Url> for UrlOrPath {
impl Display for UrlOrPath {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
UrlOrPath::Path(path) => write!(f, "{}", path.display()),
UrlOrPath::Path(path) => write!(f, "{path}"),
UrlOrPath::Url(url) => write!(f, "{url}"),
}
}
Expand All @@ -106,22 +105,37 @@ impl UrlOrPath {
}

/// Returns the path if this is a path.
pub fn as_path(&self) -> Option<&Path> {
pub fn as_path(&self) -> Option<Utf8TypedPath<'_>> {
match self {
UrlOrPath::Path(path) => Some(path),
UrlOrPath::Path(path) => Some(path.to_path()),
UrlOrPath::Url(_) => None,
}
}

/// Canonicalizes the instance to be a path if possible.
/// Normalizes the instance to be a path if possible and resolving `..` and `.` segments.
///
/// If this instance is a URL with a `file://` scheme, this will try to convert it to a path.
pub fn canonicalize(&self) -> Cow<'_, Self> {
if let Some(path) = self.as_url().and_then(url_to_path) {
return Cow::Owned(UrlOrPath::Path(path));
pub fn normalize(&self) -> Cow<'_, Self> {
match self {
UrlOrPath::Url(url) => {
if let Some(path) = url_to_typed_path(url) {
return Cow::Owned(UrlOrPath::Path(path.normalize()));
}

Check warning on line 123 in crates/rattler_lock/src/url_or_path.rs

View workflow job for this annotation

GitHub Actions / Format and Lint

Diff in /home/runner/work/rattler/rattler/crates/rattler_lock/src/url_or_path.rs
Cow::Borrowed(self)
}
UrlOrPath::Path(path) => {
Cow::Owned(UrlOrPath::Path(path.normalize()))
}
}
}

Cow::Borrowed(self)
/// Returns the file name of the path or url. If the path or url ends in a directory seperator `None` is returned.
pub fn file_name(&self) -> Option<&str> {
match self {
UrlOrPath::Path(path) if !path.as_str().ends_with(['/', '\\']) => path.file_name(),
UrlOrPath::Url(url) if !url.as_str().ends_with('/') => url.path_segments()?.last(),
_ => None,
}
}
}

Expand All @@ -136,29 +150,47 @@ impl FromStr for UrlOrPath {

Check warning on line 150 in crates/rattler_lock/src/url_or_path.rs

View workflow job for this annotation

GitHub Actions / Format and Lint

Diff in /home/runner/work/rattler/rattler/crates/rattler_lock/src/url_or_path.rs
fn from_str(s: &str) -> Result<Self, Self::Err> {
fn scheme_is_drive_letter(scheme: &str) -> bool {
let Some((drive_letter,)) = scheme.chars().collect_tuple() else {
let Some((drive_letter, )) = scheme.chars().collect_tuple() else {
return false;
};
drive_letter.is_ascii_alphabetic()
}

// First try to parse the string as a path.
return match Url::from_str(s) {
match Url::from_str(s) {
Ok(url) => Ok(if scheme_is_drive_letter(url.scheme()) {
UrlOrPath::Path(PathBuf::from(s))
UrlOrPath::Path(s.into())
} else {
UrlOrPath::Url(url).canonicalize().into_owned()
UrlOrPath::Url(url).normalize().into_owned()
}),
Err(url::ParseError::RelativeUrlWithoutBase) => Ok(UrlOrPath::Path(PathBuf::from(s))),
Err(url::ParseError::RelativeUrlWithoutBase) => Ok(UrlOrPath::Path(s.into())),
Err(e) => Err(PathOrUrlError::InvalidUrl(e)),
};
}
}
}

#[cfg(test)]

Check warning on line 172 in crates/rattler_lock/src/url_or_path.rs

View workflow job for this annotation

GitHub Actions / Format and Lint

Diff in /home/runner/work/rattler/rattler/crates/rattler_lock/src/url_or_path.rs
mod test {
use super::*;
use std::str::FromStr;
use rstest::*;

#[rstest]
#[case("https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2",
Some("_libgcc_mutex-0.1-conda_forge.tar.bz2")
)]
#[case("C:\\packages\\_libgcc_mutex-0.1-conda_forge.tar.bz2",
Some("_libgcc_mutex-0.1-conda_forge.tar.bz2")
)]
#[case("/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2",
Some("_libgcc_mutex-0.1-conda_forge.tar.bz2")
)]
#[case("https://conda.anaconda.org/conda-forge/linux-64/", None)]
#[case("C:\\packages\\", None)]
#[case("/packages/", None)]
fn test_file_name(#[case] case: UrlOrPath, #[case] expected_filename: Option<&str>) {
assert_eq!(case.file_name(), expected_filename);
}

#[test]
fn test_equality() {
Expand All @@ -169,7 +201,7 @@ mod test {

// Absolute paths as file and direct path
(UrlOrPath::Url("file:///home/bob/test-file.txt".parse().unwrap()),
UrlOrPath::Path("/home/bob/test-file.txt".parse().unwrap())),
UrlOrPath::Path("/home/bob/test-file.txt".into())),
];

for (a, b) in &tests {
Expand Down Expand Up @@ -228,10 +260,10 @@ mod test {
"\\\\127.0.0.1\\c$\\temp\\test-file.txt",
];

for path in &paths {
for path in paths {
assert_eq!(
UrlOrPath::from_str(path).unwrap(),
UrlOrPath::Path(path.parse().unwrap())
UrlOrPath::Path(path.into())
);
}
}
Expand Down
8 changes: 4 additions & 4 deletions crates/rattler_lock/src/utils/serde/url_or_path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@

Check warning on line 9 in crates/rattler_lock/src/utils/serde/url_or_path.rs

View workflow job for this annotation

GitHub Actions / Format and Lint

Diff in /home/runner/work/rattler/rattler/crates/rattler_lock/src/utils/serde/url_or_path.rs
use crate::UrlOrPath;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::{borrow::Cow, path::PathBuf};
use std::{borrow::Cow};
use url::Url;

#[derive(Serialize, Deserialize)]
struct RawUrlOrPath<'a> {
#[serde(skip_serializing_if = "Option::is_none")]
url: Option<Cow<'a, Url>>,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<Cow<'a, PathBuf>>,
path: Option<Cow<'a, str>>,
}

pub fn serialize<S>(value: &UrlOrPath, serializer: S) -> Result<S::Ok, S::Error>
Expand All @@ -31,7 +31,7 @@ where
},
UrlOrPath::Path(path) => RawUrlOrPath {
url: None,
path: Some(Cow::Borrowed(path)),
path: Some(Cow::Borrowed(path.as_str())),
},
};

Expand All @@ -45,7 +45,7 @@ where
let raw = RawUrlOrPath::<'de>::deserialize(deserializer)?;
match (raw.url, raw.path) {
(Some(url), None) => Ok(UrlOrPath::Url(url.into_owned())),
(None, Some(path)) => Ok(UrlOrPath::Path(path.into_owned())),
(None, Some(path)) => Ok(UrlOrPath::Path(path.into_owned().into())),
_ => Err(serde::de::Error::custom("expected either a url or a path")),
}
}

0 comments on commit c4147a0

Please sign in to comment.