Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: encode URL params correctly for SourceId in Cargo.lock #12280

Merged
merged 5 commits into from
Jul 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 93 additions & 11 deletions src/cargo/core/resolver/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,13 +196,13 @@ impl EncodableResolve {
let enc_id = EncodablePackageId {
name: pkg.name.clone(),
version: Some(pkg.version.clone()),
source: pkg.source,
source: pkg.source.clone(),
};

if !all_pkgs.insert(enc_id.clone()) {
anyhow::bail!("package `{}` is specified twice in the lockfile", pkg.name);
}
let id = match pkg.source.as_ref().or_else(|| path_deps.get(&pkg.name)) {
let id = match pkg.source.as_deref().or_else(|| path_deps.get(&pkg.name)) {
// We failed to find a local package in the workspace.
// It must have been removed and should be ignored.
None => {
Expand Down Expand Up @@ -366,7 +366,7 @@ impl EncodableResolve {

let mut unused_patches = Vec::new();
for pkg in self.patch.unused {
let id = match pkg.source.as_ref().or_else(|| path_deps.get(&pkg.name)) {
let id = match pkg.source.as_deref().or_else(|| path_deps.get(&pkg.name)) {
Some(&src) => PackageId::new(&pkg.name, &pkg.version, src)?,
None => continue,
};
Expand Down Expand Up @@ -488,17 +488,95 @@ impl Patch {
pub struct EncodableDependency {
name: String,
version: String,
source: Option<SourceId>,
source: Option<EncodableSourceId>,
checksum: Option<String>,
dependencies: Option<Vec<EncodablePackageId>>,
replace: Option<EncodablePackageId>,
}

/// Pretty much equivalent to [`SourceId`] with a different serialization method.
///
/// The serialization for `SourceId` doesn't do URL encode for parameters.
/// In contrast, this type is aware of that whenever [`ResolveVersion`] allows
/// us to do so (v4 or later).
///
/// [`EncodableResolve`] turns into a `
#[derive(Deserialize, Debug, PartialOrd, Ord, Clone)]
#[serde(transparent)]
pub struct EncodableSourceId {
inner: SourceId,
/// We don't care about the deserialization of this, as the `url` crate
/// will always decode as the URL was encoded. Only when a [`Resolve`]
/// turns into a [`EncodableResolve`] will it set the value accordingly
/// via [`encodable_source_id`].
#[serde(skip)]
encoded: bool,
}

impl EncodableSourceId {
/// Creates a `EncodableSourceId` that always encodes URL params.
fn new(inner: SourceId) -> Self {
Self {
inner,
encoded: true,
}
}

/// Creates a `EncodableSourceId` that doesn't encode URL params. This is
/// for backward compatibility for order lockfile version.
fn without_url_encoded(inner: SourceId) -> Self {
Self {
inner,
encoded: false,
}
}

/// Encodes the inner [`SourceId`] as a URL.
fn as_url(&self) -> impl fmt::Display + '_ {
if self.encoded {
self.inner.as_encoded_url()
} else {
self.inner.as_url()
}
}
}

impl std::ops::Deref for EncodableSourceId {
type Target = SourceId;

fn deref(&self) -> &Self::Target {
&self.inner
}
}

impl ser::Serialize for EncodableSourceId {
fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
where
S: ser::Serializer,
{
s.collect_str(&self.as_url())
}
}

impl std::hash::Hash for EncodableSourceId {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.inner.hash(state)
}
}

impl std::cmp::PartialEq for EncodableSourceId {
fn eq(&self, other: &Self) -> bool {
self.inner == other.inner
}
}

impl std::cmp::Eq for EncodableSourceId {}

#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Hash, Clone)]
pub struct EncodablePackageId {
name: String,
version: Option<String>,
source: Option<SourceId>,
source: Option<EncodableSourceId>,
}

impl fmt::Display for EncodablePackageId {
Expand Down Expand Up @@ -535,7 +613,8 @@ impl FromStr for EncodablePackageId {
Ok(EncodablePackageId {
name: name.to_string(),
version: version.map(|v| v.to_string()),
source: source_id,
// Default to url encoded.
source: source_id.map(EncodableSourceId::new),
})
}
}
Expand Down Expand Up @@ -603,7 +682,7 @@ impl ser::Serialize for Resolve {
.map(|id| EncodableDependency {
name: id.name().to_string(),
version: id.version().to_string(),
source: encode_source(id.source_id()),
source: encodable_source_id(id.source_id(), self.version()),
dependencies: None,
replace: None,
checksum: if self.version() >= ResolveVersion::V2 {
Expand Down Expand Up @@ -676,7 +755,7 @@ fn encodable_resolve_node(
EncodableDependency {
name: id.name().to_string(),
version: id.version().to_string(),
source: encode_source(id.source_id()),
source: encodable_source_id(id.source_id(), resolve.version()),
dependencies: deps,
replace,
checksum: if resolve.version() >= ResolveVersion::V2 {
Expand All @@ -702,7 +781,7 @@ pub fn encodable_package_id(
}
}
}
let mut source = encode_source(id_to_encode).map(|s| s.with_precise(None));
let mut source = encodable_source_id(id_to_encode.with_precise(None), resolve_version);
if let Some(counts) = &state.counts {
let version_counts = &counts[&id.name()];
if version_counts[&id.version()] == 1 {
Expand All @@ -719,10 +798,13 @@ pub fn encodable_package_id(
}
}

fn encode_source(id: SourceId) -> Option<SourceId> {
fn encodable_source_id(id: SourceId, version: ResolveVersion) -> Option<EncodableSourceId> {
if id.is_path() {
None
} else {
Some(id)
Some(match version {
ResolveVersion::V4 => EncodableSourceId::new(id),
_ => EncodableSourceId::without_url_encoded(id),
})
}
}
2 changes: 2 additions & 0 deletions src/cargo/core/resolver/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ pub enum ResolveVersion {
/// Unstable. Will collect a certain amount of changes and then go.
///
/// Changes made:
///
/// * SourceId URL serialization is aware of URL encoding.
V4,
}

Expand Down
72 changes: 64 additions & 8 deletions src/cargo/core/source/source_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,15 @@ impl SourceId {
pub fn as_url(&self) -> SourceIdAsUrl<'_> {
SourceIdAsUrl {
inner: &*self.inner,
encoded: false,
}
}

/// Like [`Self::as_url`] but with URL parameters encoded.
pub fn as_encoded_url(&self) -> SourceIdAsUrl<'_> {
SourceIdAsUrl {
inner: &*self.inner,
encoded: true,
}
}

Expand Down Expand Up @@ -566,7 +575,10 @@ impl fmt::Display for SourceId {
// Don't replace the URL display for git references,
// because those are kind of expected to be URLs.
write!(f, "{}", self.inner.url)?;
if let Some(pretty) = reference.pretty_ref() {
// TODO(-Znext-lockfile-bump): set it to true when stabilizing
// lockfile v4, because we want Source ID serialization to be
// consistent with lockfile.
if let Some(pretty) = reference.pretty_ref(false) {
write!(f, "?{}", pretty)?;
}

Expand Down Expand Up @@ -714,6 +726,7 @@ impl Ord for SourceKind {
/// A `Display`able view into a `SourceId` that will write it as a url
pub struct SourceIdAsUrl<'a> {
inner: &'a SourceIdInner,
encoded: bool,
}

impl<'a> fmt::Display for SourceIdAsUrl<'a> {
Expand All @@ -731,7 +744,7 @@ impl<'a> fmt::Display for SourceIdAsUrl<'a> {
..
} => {
write!(f, "git+{}", url)?;
if let Some(pretty) = reference.pretty_ref() {
if let Some(pretty) = reference.pretty_ref(self.encoded) {
write!(f, "?{}", pretty)?;
}
if let Some(precise) = precise.as_ref() {
Expand Down Expand Up @@ -771,27 +784,49 @@ impl<'a> fmt::Display for SourceIdAsUrl<'a> {
impl GitReference {
/// Returns a `Display`able view of this git reference, or None if using
/// the head of the default branch
pub fn pretty_ref(&self) -> Option<PrettyRef<'_>> {
pub fn pretty_ref(&self, url_encoded: bool) -> Option<PrettyRef<'_>> {
match self {
GitReference::DefaultBranch => None,
_ => Some(PrettyRef { inner: self }),
_ => Some(PrettyRef {
inner: self,
url_encoded,
}),
}
}
}

/// A git reference that can be `Display`ed
pub struct PrettyRef<'a> {
inner: &'a GitReference,
url_encoded: bool,
}

impl<'a> fmt::Display for PrettyRef<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self.inner {
GitReference::Branch(ref b) => write!(f, "branch={}", b),
GitReference::Tag(ref s) => write!(f, "tag={}", s),
GitReference::Rev(ref s) => write!(f, "rev={}", s),
let value: &str;
match self.inner {
GitReference::Branch(s) => {
write!(f, "branch=")?;
value = s;
}
GitReference::Tag(s) => {
write!(f, "tag=")?;
value = s;
}
GitReference::Rev(s) => {
write!(f, "rev=")?;
value = s;
}
GitReference::DefaultBranch => unreachable!(),
}
if self.url_encoded {
for value in url::form_urlencoded::byte_serialize(value.as_bytes()) {
write!(f, "{value}")?;
}
} else {
write!(f, "{value}")?;
}
Ok(())
}
}

Expand Down Expand Up @@ -905,6 +940,27 @@ mod tests {
assert_eq!(formatted, "sparse+https://my-crates.io/");
assert_eq!(source_id, deserialized);
}

#[test]
fn gitrefs_roundtrip() {
let base = "https://host/path".into_url().unwrap();
let branch = GitReference::Branch("*-._+20%30 Z/z#foo=bar&zap[]?to\\()'\"".to_string());
let s1 = SourceId::for_git(&base, branch).unwrap();
let ser1 = format!("{}", s1.as_encoded_url());
let s2 = SourceId::from_url(&ser1).expect("Failed to deserialize");
let ser2 = format!("{}", s2.as_encoded_url());
// Serializing twice should yield the same result
assert_eq!(ser1, ser2, "Serialized forms don't match");
// SourceId serializing the same should have the same semantics
// This used to not be the case (# was ambiguous)
assert_eq!(s1, s2, "SourceId doesn't round-trip");
// Freeze the format to match an x-www-form-urlencoded query string
// https://url.spec.whatwg.org/#application/x-www-form-urlencoded
assert_eq!(
ser1,
"git+https://host/path?branch=*-._%2B20%2530+Z%2Fz%23foo%3Dbar%26zap%5B%5D%3Fto%5C%28%29%27%22"
);
}
}

/// Check if `url` equals to the overridden crates.io URL.
Expand Down
5 changes: 4 additions & 1 deletion src/cargo/sources/git/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,10 @@ impl<'cfg> Debug for GitSource<'cfg> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "git repo at {}", self.remote.url())?;

match self.manifest_reference.pretty_ref() {
// TODO(-Znext-lockfile-bump): set it to true when stabilizing
// lockfile v4, because we want Source ID serialization to be
// consistent with lockfile.
match self.manifest_reference.pretty_ref(false) {
Some(s) => write!(f, " ({})", s),
None => Ok(()),
}
Expand Down
6 changes: 5 additions & 1 deletion src/cargo/util/toml_mut/dependency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,11 @@ impl GitSource {
impl std::fmt::Display for GitSource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let git_ref = self.git_ref();
if let Some(pretty_ref) = git_ref.pretty_ref() {

// TODO(-Znext-lockfile-bump): set it to true when stabilizing
// lockfile v4, because we want Source ID serialization to be
// consistent with lockfile.
if let Some(pretty_ref) = git_ref.pretty_ref(false) {
write!(f, "{}?{}", self.git, pretty_ref)
} else {
write!(f, "{}", self.git)
Expand Down
Loading