diff --git a/src/cargo/core/resolver/encode.rs b/src/cargo/core/resolver/encode.rs index f73d023b184..000044d2168 100644 --- a/src/cargo/core/resolver/encode.rs +++ b/src/cargo/core/resolver/encode.rs @@ -196,13 +196,13 @@ impl EncodableResolve { let enc_id = EncodablePackageId { name: pkg.name.clone(), version: Some(pkg.version.clone()), - source: pkg.source, + source: pkg.source.clone(), }; if !all_pkgs.insert(enc_id.clone()) { anyhow::bail!("package `{}` is specified twice in the lockfile", pkg.name); } - let id = match pkg.source.as_ref().or_else(|| path_deps.get(&pkg.name)) { + let id = match pkg.source.as_deref().or_else(|| path_deps.get(&pkg.name)) { // We failed to find a local package in the workspace. // It must have been removed and should be ignored. None => { @@ -366,7 +366,7 @@ impl EncodableResolve { let mut unused_patches = Vec::new(); for pkg in self.patch.unused { - let id = match pkg.source.as_ref().or_else(|| path_deps.get(&pkg.name)) { + let id = match pkg.source.as_deref().or_else(|| path_deps.get(&pkg.name)) { Some(&src) => PackageId::new(&pkg.name, &pkg.version, src)?, None => continue, }; @@ -488,17 +488,95 @@ impl Patch { pub struct EncodableDependency { name: String, version: String, - source: Option, + source: Option, checksum: Option, dependencies: Option>, replace: Option, } +/// Pretty much equivalent to [`SourceId`] with a different serialization method. +/// +/// The serialization for `SourceId` doesn't do URL encode for parameters. +/// In contrast, this type is aware of that whenever [`ResolveVersion`] allows +/// us to do so (v4 or later). +/// +/// [`EncodableResolve`] turns into a ` +#[derive(Deserialize, Debug, PartialOrd, Ord, Clone)] +#[serde(transparent)] +pub struct EncodableSourceId { + inner: SourceId, + /// We don't care about the deserialization of this, as the `url` crate + /// will always decode as the URL was encoded. Only when a [`Resolve`] + /// turns into a [`EncodableResolve`] will it set the value accordingly + /// via [`encodable_source_id`]. + #[serde(skip)] + encoded: bool, +} + +impl EncodableSourceId { + /// Creates a `EncodableSourceId` that always encodes URL params. + fn new(inner: SourceId) -> Self { + Self { + inner, + encoded: true, + } + } + + /// Creates a `EncodableSourceId` that doesn't encode URL params. This is + /// for backward compatibility for order lockfile version. + fn without_url_encoded(inner: SourceId) -> Self { + Self { + inner, + encoded: false, + } + } + + /// Encodes the inner [`SourceId`] as a URL. + fn as_url(&self) -> impl fmt::Display + '_ { + if self.encoded { + self.inner.as_encoded_url() + } else { + self.inner.as_url() + } + } +} + +impl std::ops::Deref for EncodableSourceId { + type Target = SourceId; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl ser::Serialize for EncodableSourceId { + fn serialize(&self, s: S) -> Result + where + S: ser::Serializer, + { + s.collect_str(&self.as_url()) + } +} + +impl std::hash::Hash for EncodableSourceId { + fn hash(&self, state: &mut H) { + self.inner.hash(state) + } +} + +impl std::cmp::PartialEq for EncodableSourceId { + fn eq(&self, other: &Self) -> bool { + self.inner == other.inner + } +} + +impl std::cmp::Eq for EncodableSourceId {} + #[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Hash, Clone)] pub struct EncodablePackageId { name: String, version: Option, - source: Option, + source: Option, } impl fmt::Display for EncodablePackageId { @@ -535,7 +613,8 @@ impl FromStr for EncodablePackageId { Ok(EncodablePackageId { name: name.to_string(), version: version.map(|v| v.to_string()), - source: source_id, + // Default to url encoded. + source: source_id.map(EncodableSourceId::new), }) } } @@ -603,7 +682,7 @@ impl ser::Serialize for Resolve { .map(|id| EncodableDependency { name: id.name().to_string(), version: id.version().to_string(), - source: encode_source(id.source_id()), + source: encodable_source_id(id.source_id(), self.version()), dependencies: None, replace: None, checksum: if self.version() >= ResolveVersion::V2 { @@ -676,7 +755,7 @@ fn encodable_resolve_node( EncodableDependency { name: id.name().to_string(), version: id.version().to_string(), - source: encode_source(id.source_id()), + source: encodable_source_id(id.source_id(), resolve.version()), dependencies: deps, replace, checksum: if resolve.version() >= ResolveVersion::V2 { @@ -702,7 +781,7 @@ pub fn encodable_package_id( } } } - let mut source = encode_source(id_to_encode).map(|s| s.with_precise(None)); + let mut source = encodable_source_id(id_to_encode.with_precise(None), resolve_version); if let Some(counts) = &state.counts { let version_counts = &counts[&id.name()]; if version_counts[&id.version()] == 1 { @@ -719,10 +798,13 @@ pub fn encodable_package_id( } } -fn encode_source(id: SourceId) -> Option { +fn encodable_source_id(id: SourceId, version: ResolveVersion) -> Option { if id.is_path() { None } else { - Some(id) + Some(match version { + ResolveVersion::V4 => EncodableSourceId::new(id), + _ => EncodableSourceId::without_url_encoded(id), + }) } } diff --git a/src/cargo/core/resolver/resolve.rs b/src/cargo/core/resolver/resolve.rs index 8405a12450b..18a389773da 100644 --- a/src/cargo/core/resolver/resolve.rs +++ b/src/cargo/core/resolver/resolve.rs @@ -83,6 +83,8 @@ pub enum ResolveVersion { /// Unstable. Will collect a certain amount of changes and then go. /// /// Changes made: + /// + /// * SourceId URL serialization is aware of URL encoding. V4, } diff --git a/src/cargo/core/source/source_id.rs b/src/cargo/core/source/source_id.rs index 4064364d5e2..43c141b7a24 100644 --- a/src/cargo/core/source/source_id.rs +++ b/src/cargo/core/source/source_id.rs @@ -195,6 +195,15 @@ impl SourceId { pub fn as_url(&self) -> SourceIdAsUrl<'_> { SourceIdAsUrl { inner: &*self.inner, + encoded: false, + } + } + + /// Like [`Self::as_url`] but with URL parameters encoded. + pub fn as_encoded_url(&self) -> SourceIdAsUrl<'_> { + SourceIdAsUrl { + inner: &*self.inner, + encoded: true, } } @@ -566,7 +575,10 @@ impl fmt::Display for SourceId { // Don't replace the URL display for git references, // because those are kind of expected to be URLs. write!(f, "{}", self.inner.url)?; - if let Some(pretty) = reference.pretty_ref() { + // TODO(-Znext-lockfile-bump): set it to true when stabilizing + // lockfile v4, because we want Source ID serialization to be + // consistent with lockfile. + if let Some(pretty) = reference.pretty_ref(false) { write!(f, "?{}", pretty)?; } @@ -714,6 +726,7 @@ impl Ord for SourceKind { /// A `Display`able view into a `SourceId` that will write it as a url pub struct SourceIdAsUrl<'a> { inner: &'a SourceIdInner, + encoded: bool, } impl<'a> fmt::Display for SourceIdAsUrl<'a> { @@ -731,7 +744,7 @@ impl<'a> fmt::Display for SourceIdAsUrl<'a> { .. } => { write!(f, "git+{}", url)?; - if let Some(pretty) = reference.pretty_ref() { + if let Some(pretty) = reference.pretty_ref(self.encoded) { write!(f, "?{}", pretty)?; } if let Some(precise) = precise.as_ref() { @@ -771,10 +784,13 @@ impl<'a> fmt::Display for SourceIdAsUrl<'a> { impl GitReference { /// Returns a `Display`able view of this git reference, or None if using /// the head of the default branch - pub fn pretty_ref(&self) -> Option> { + pub fn pretty_ref(&self, url_encoded: bool) -> Option> { match self { GitReference::DefaultBranch => None, - _ => Some(PrettyRef { inner: self }), + _ => Some(PrettyRef { + inner: self, + url_encoded, + }), } } } @@ -782,16 +798,35 @@ impl GitReference { /// A git reference that can be `Display`ed pub struct PrettyRef<'a> { inner: &'a GitReference, + url_encoded: bool, } impl<'a> fmt::Display for PrettyRef<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self.inner { - GitReference::Branch(ref b) => write!(f, "branch={}", b), - GitReference::Tag(ref s) => write!(f, "tag={}", s), - GitReference::Rev(ref s) => write!(f, "rev={}", s), + let value: &str; + match self.inner { + GitReference::Branch(s) => { + write!(f, "branch=")?; + value = s; + } + GitReference::Tag(s) => { + write!(f, "tag=")?; + value = s; + } + GitReference::Rev(s) => { + write!(f, "rev=")?; + value = s; + } GitReference::DefaultBranch => unreachable!(), } + if self.url_encoded { + for value in url::form_urlencoded::byte_serialize(value.as_bytes()) { + write!(f, "{value}")?; + } + } else { + write!(f, "{value}")?; + } + Ok(()) } } @@ -905,6 +940,27 @@ mod tests { assert_eq!(formatted, "sparse+https://my-crates.io/"); assert_eq!(source_id, deserialized); } + + #[test] + fn gitrefs_roundtrip() { + let base = "https://host/path".into_url().unwrap(); + let branch = GitReference::Branch("*-._+20%30 Z/z#foo=bar&zap[]?to\\()'\"".to_string()); + let s1 = SourceId::for_git(&base, branch).unwrap(); + let ser1 = format!("{}", s1.as_encoded_url()); + let s2 = SourceId::from_url(&ser1).expect("Failed to deserialize"); + let ser2 = format!("{}", s2.as_encoded_url()); + // Serializing twice should yield the same result + assert_eq!(ser1, ser2, "Serialized forms don't match"); + // SourceId serializing the same should have the same semantics + // This used to not be the case (# was ambiguous) + assert_eq!(s1, s2, "SourceId doesn't round-trip"); + // Freeze the format to match an x-www-form-urlencoded query string + // https://url.spec.whatwg.org/#application/x-www-form-urlencoded + assert_eq!( + ser1, + "git+https://host/path?branch=*-._%2B20%2530+Z%2Fz%23foo%3Dbar%26zap%5B%5D%3Fto%5C%28%29%27%22" + ); + } } /// Check if `url` equals to the overridden crates.io URL. diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index b021d23a074..7cba2169362 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -164,7 +164,10 @@ impl<'cfg> Debug for GitSource<'cfg> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "git repo at {}", self.remote.url())?; - match self.manifest_reference.pretty_ref() { + // TODO(-Znext-lockfile-bump): set it to true when stabilizing + // lockfile v4, because we want Source ID serialization to be + // consistent with lockfile. + match self.manifest_reference.pretty_ref(false) { Some(s) => write!(f, " ({})", s), None => Ok(()), } diff --git a/src/cargo/util/toml_mut/dependency.rs b/src/cargo/util/toml_mut/dependency.rs index 1b24833c1ee..2f39b7ab4e4 100644 --- a/src/cargo/util/toml_mut/dependency.rs +++ b/src/cargo/util/toml_mut/dependency.rs @@ -881,7 +881,11 @@ impl GitSource { impl std::fmt::Display for GitSource { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let git_ref = self.git_ref(); - if let Some(pretty_ref) = git_ref.pretty_ref() { + + // TODO(-Znext-lockfile-bump): set it to true when stabilizing + // lockfile v4, because we want Source ID serialization to be + // consistent with lockfile. + if let Some(pretty_ref) = git_ref.pretty_ref(false) { write!(f, "{}?{}", self.git, pretty_ref) } else { write!(f, "{}", self.git) diff --git a/tests/testsuite/lockfile_compat.rs b/tests/testsuite/lockfile_compat.rs index 63148cc072b..97dcff1231a 100644 --- a/tests/testsuite/lockfile_compat.rs +++ b/tests/testsuite/lockfile_compat.rs @@ -966,3 +966,198 @@ version = "0.0.1" let lock = p.read_lockfile(); assert_match_exact(lockfile, &lock); } + +fn create_branch(repo: &git2::Repository, branch: &str, head_id: git2::Oid) { + repo.branch(branch, &repo.find_commit(head_id).unwrap(), true) + .unwrap(); +} + +fn create_tag(repo: &git2::Repository, tag: &str, head_id: git2::Oid) { + repo.tag( + tag, + &repo.find_object(head_id, None).unwrap(), + &repo.signature().unwrap(), + "make a new tag", + false, + ) + .unwrap(); +} + +fn v3_and_git_url_encoded(ref_kind: &str, f: impl FnOnce(&git2::Repository, &str, git2::Oid)) { + let (git_project, repo) = git::new_repo("dep1", |project| { + project + .file("Cargo.toml", &basic_lib_manifest("dep1")) + .file("src/lib.rs", "") + }); + let url = git_project.url(); + let head_id = repo.head().unwrap().target().unwrap(); + // Ref name with special characters + let git_ref = "a-_+#$)"; + f(&repo, git_ref, head_id); + + let lockfile = format!( + r#"# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "dep1" +version = "0.5.0" +source = "git+{url}?{ref_kind}={git_ref}#{head_id}" + +[[package]] +name = "foo" +version = "0.0.1" +dependencies = [ + "dep1", +] +"#, + ); + + let p = project() + .file( + "Cargo.toml", + &format!( + r#" + [package] + name = "foo" + version = "0.0.1" + + [dependencies] + dep1 = {{ git = '{url}', {ref_kind} = '{git_ref}' }} + "#, + ), + ) + .file("src/lib.rs", "") + .file("Cargo.lock", "version = 3") + .build(); + + p.cargo("check") + .with_stderr(format!( + "\ +[UPDATING] git repository `{url}` +[CHECKING] dep1 v0.5.0 ({url}?{ref_kind}={git_ref}#[..]) +[CHECKING] foo v0.0.1 ([CWD]) +[FINISHED] dev [..] +" + )) + .run(); + + let lock = p.read_lockfile(); + assert_match_exact(&lockfile, &lock); + + // v3 doesn't URL-encode URL parameters, but `url` crate does decode as it + // was URL-encoded. Therefore Cargo thinks they are from different source + // and clones the repository again. + p.cargo("check") + .with_stderr(format!( + "\ +[UPDATING] git repository `{url}` +[FINISHED] dev [..] +" + )) + .run(); +} + +#[cargo_test] +fn v3_and_git_url_encoded_branch() { + v3_and_git_url_encoded("branch", create_branch); +} + +#[cargo_test] +fn v3_and_git_url_encoded_tag() { + v3_and_git_url_encoded("tag", create_tag); +} + +#[cargo_test] +fn v3_and_git_url_encoded_rev() { + v3_and_git_url_encoded("rev", create_tag); +} + +fn v4_and_git_url_encoded(ref_kind: &str, f: impl FnOnce(&git2::Repository, &str, git2::Oid)) { + let (git_project, repo) = git::new_repo("dep1", |project| { + project + .file("Cargo.toml", &basic_lib_manifest("dep1")) + .file("src/lib.rs", "") + }); + let url = git_project.url(); + let head_id = repo.head().unwrap().target().unwrap(); + // Ref name with special characters + let git_ref = "a-_+#$)"; + let encoded_ref = "a-_%2B%23%24%29"; + f(&repo, git_ref, head_id); + + let lockfile = format!( + r#"# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "dep1" +version = "0.5.0" +source = "git+{url}?{ref_kind}={encoded_ref}#{head_id}" + +[[package]] +name = "foo" +version = "0.0.1" +dependencies = [ + "dep1", +] +"#, + ); + + let p = project() + .file( + "Cargo.toml", + &format!( + r#" + [package] + name = "foo" + version = "0.0.1" + + [dependencies] + dep1 = {{ git = '{url}', {ref_kind} = '{git_ref}' }} + "#, + ), + ) + .file("src/lib.rs", "") + .file("Cargo.lock", "version = 4") + .build(); + + p.cargo("check -Znext-lockfile-bump") + .masquerade_as_nightly_cargo(&["-Znext-lockfile-bump"]) + .with_stderr(format!( + "\ +[UPDATING] git repository `{url}` +[CHECKING] dep1 v0.5.0 ({url}?{ref_kind}={git_ref}#[..]) +[CHECKING] foo v0.0.1 ([CWD]) +[FINISHED] dev [..] +" + )) + .run(); + + let lock = p.read_lockfile(); + assert_match_exact(&lockfile, &lock); + + // Unlike v3_and_git_url_encoded, v4 encodes URL parameters so no git + // repository re-clone happen. + p.cargo("check -Znext-lockfile-bump") + .masquerade_as_nightly_cargo(&["-Znext-lockfile-bump"]) + .with_stderr("[FINISHED] dev [..]") + .run(); +} + +#[cargo_test] +fn v4_and_git_url_encoded_branch() { + v4_and_git_url_encoded("branch", create_branch); +} + +#[cargo_test] +fn v4_and_git_url_encoded_tag() { + v4_and_git_url_encoded("tag", create_tag); +} + +#[cargo_test] +fn v4_and_git_url_encoded_rev() { + v4_and_git_url_encoded("rev", create_tag) +}