From 56eeec98b9e70d818a4c663263ff0a0e1b4abac5 Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 15 Jul 2024 10:42:43 +0200 Subject: [PATCH 1/4] Change stable hasher to Blake3 --- Cargo.lock | 31 +++++++++++++++++-- compiler/rustc_data_structures/Cargo.toml | 2 +- .../rustc_data_structures/src/fingerprint.rs | 8 +++-- compiler/rustc_data_structures/src/hashes.rs | 10 +++--- .../src/stable_hasher.rs | 4 +-- 5 files changed, 44 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cafc623c185a6..549733a68c742 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -243,6 +243,12 @@ dependencies = [ "object 0.32.2", ] +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + [[package]] name = "arrayvec" version = "0.7.4" @@ -362,6 +368,19 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +[[package]] +name = "blake3" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -848,6 +867,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + [[package]] name = "core" version = "0.0.0" @@ -3518,8 +3543,10 @@ checksum = "5be1bdc7edf596692617627bbfeaba522131b18e06ca4df2b6b689e3c5d5ce84" [[package]] name = "rustc-stable-hash" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5c9f15eec8235d7cb775ee6f81891db79b98fd54ba1ad8fae565b88ef1ae4e2" +source = "git+https://github.com/Urgau/rustc-stable-hash.git?rev=368f4ef#368f4ef468ca97fd4757c896f53349d9fa4def1b" +dependencies = [ + "blake3", +] [[package]] name = "rustc-std-workspace-alloc" diff --git a/compiler/rustc_data_structures/Cargo.toml b/compiler/rustc_data_structures/Cargo.toml index e5e733439ea05..706ce2e9b236a 100644 --- a/compiler/rustc_data_structures/Cargo.toml +++ b/compiler/rustc_data_structures/Cargo.toml @@ -15,7 +15,7 @@ jobserver_crate = { version = "0.1.28", package = "jobserver" } measureme = "11" rustc-hash = "1.1.0" rustc-rayon = { version = "0.5.0", optional = true } -rustc-stable-hash = { version = "0.1.0", features = ["nightly"] } +rustc-stable-hash = { git = "https://github.com/Urgau/rustc-stable-hash.git", rev = "368f4ef", features = ["nightly", "blake3"] } rustc_arena = { path = "../rustc_arena" } rustc_graphviz = { path = "../rustc_graphviz" } rustc_index = { path = "../rustc_index", package = "rustc_index" } diff --git a/compiler/rustc_data_structures/src/fingerprint.rs b/compiler/rustc_data_structures/src/fingerprint.rs index 30e3d6aa86ce9..5c369ebb5b45d 100644 --- a/compiler/rustc_data_structures/src/fingerprint.rs +++ b/compiler/rustc_data_structures/src/fingerprint.rs @@ -158,8 +158,12 @@ impl FromStableHash for Fingerprint { type Hash = StableHasherHash; #[inline] - fn from(StableHasherHash([_0, _1]): Self::Hash) -> Self { - Fingerprint(_0, _1) + fn from(hash: Self::Hash) -> Self { + let bytes = hash.as_bytes(); + Fingerprint( + u64::from_ne_bytes(bytes[0..8].try_into().unwrap()), + u64::from_ne_bytes(bytes[8..16].try_into().unwrap()), + ) } } diff --git a/compiler/rustc_data_structures/src/hashes.rs b/compiler/rustc_data_structures/src/hashes.rs index ef5d2e845ef07..935fc6b417a33 100644 --- a/compiler/rustc_data_structures/src/hashes.rs +++ b/compiler/rustc_data_structures/src/hashes.rs @@ -60,8 +60,9 @@ impl FromStableHash for Hash64 { type Hash = StableHasherHash; #[inline] - fn from(StableHasherHash([_0, __1]): Self::Hash) -> Self { - Self { inner: _0 } + fn from(hash: Self::Hash) -> Self { + let bytes = hash.as_bytes(); + Self { inner: u64::from_ne_bytes(bytes[0..8].try_into().unwrap()) } } } @@ -127,8 +128,9 @@ impl FromStableHash for Hash128 { type Hash = StableHasherHash; #[inline] - fn from(StableHasherHash([_0, _1]): Self::Hash) -> Self { - Self { inner: u128::from(_0) | (u128::from(_1) << 64) } + fn from(hash: Self::Hash) -> Self { + let bytes = hash.as_bytes(); + Self { inner: u128::from_ne_bytes(bytes[0..16].try_into().unwrap()) } } } diff --git a/compiler/rustc_data_structures/src/stable_hasher.rs b/compiler/rustc_data_structures/src/stable_hasher.rs index 83883eeba9ca0..ebdff2d6215bb 100644 --- a/compiler/rustc_data_structures/src/stable_hasher.rs +++ b/compiler/rustc_data_structures/src/stable_hasher.rs @@ -11,9 +11,9 @@ mod tests; pub use crate::hashes::{Hash128, Hash64}; +pub use rustc_stable_hash::hashers::Blake3Hash as StableHasherHash; +pub use rustc_stable_hash::hashers::StableBlake3Hasher as StableHasher; pub use rustc_stable_hash::FromStableHash; -pub use rustc_stable_hash::SipHasher128Hash as StableHasherHash; -pub use rustc_stable_hash::StableSipHasher128 as StableHasher; /// Something that implements `HashStable` can be hashed in a way that is /// stable across multiple compilation sessions. From 22bc40df2f319ea1b2820891b80044cf283bc1c8 Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 15 Jul 2024 10:55:05 +0200 Subject: [PATCH 2/4] Allow blake3+deps and licenses --- src/tools/tidy/src/deps.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 3c72fae0881e9..a19685417b142 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -21,6 +21,9 @@ const LICENSES: &[&str] = &[ "Apache-2.0", "Apache-2.0/MIT", "BSD-2-Clause OR Apache-2.0 OR MIT", // zerocopy + "BSD-2-Clause", // arrayref + "CC0-1.0 OR Apache-2.0 OR Apache-2.0 WITH LLVM-exception", // blake3 + "CC0-1.0 OR MIT-0 OR Apache-2.0", // constant_time_eq "ISC", "MIT / Apache-2.0", "MIT OR Apache-2.0 OR LGPL-2.1-or-later", // r-efi, r-efi-alloc @@ -232,15 +235,18 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "annotate-snippets", "anstyle", "ar_archive_writer", + "arrayref", "arrayvec", "autocfg", "bitflags", + "blake3", "block-buffer", "byteorder", // via ruzstd in object in thorin-dwp "cc", "cfg-if", "cfg_aliases", "compiler_builtins", + "constant_time_eq", "cpufeatures", "crc32fast", "crossbeam-channel", From 38b97bcf97f5afbb8124fcca3dec2d055241eec3 Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 15 Jul 2024 10:58:26 +0200 Subject: [PATCH 3/4] [DO NOT MERGE] Allow git version of rustc-stable-hash --- src/tools/tidy/src/extdeps.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tools/tidy/src/extdeps.rs b/src/tools/tidy/src/extdeps.rs index 8bb80f1171184..50ee6eda6d2c5 100644 --- a/src/tools/tidy/src/extdeps.rs +++ b/src/tools/tidy/src/extdeps.rs @@ -9,6 +9,8 @@ const ALLOWED_SOURCES: &[&str] = &[ r#""registry+https://github.com/rust-lang/crates.io-index""#, // This is `rust_team_data` used by `site` in src/tools/rustc-perf, r#""git+https://github.com/rust-lang/team#a5260e76d3aa894c64c56e6ddc8545b9a98043ec""#, + // WIP blake3 + r#""git+https://github.com/Urgau/rustc-stable-hash.git?rev=368f4ef#368f4ef468ca97fd4757c896f53349d9fa4def1b""#, ]; /// Checks for external package sources. `root` is the path to the directory that contains the From 311050d17f95b4d67187593625aae91b92efe056 Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 15 Jul 2024 13:12:15 +0200 Subject: [PATCH 4/4] Use full hash bytes and use fixed endianness --- .../rustc_data_structures/src/fingerprint.rs | 15 +++++++---- compiler/rustc_data_structures/src/hashes.rs | 27 +++++++++++++++++-- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/compiler/rustc_data_structures/src/fingerprint.rs b/compiler/rustc_data_structures/src/fingerprint.rs index 5c369ebb5b45d..8225ad6a2d635 100644 --- a/compiler/rustc_data_structures/src/fingerprint.rs +++ b/compiler/rustc_data_structures/src/fingerprint.rs @@ -159,11 +159,16 @@ impl FromStableHash for Fingerprint { #[inline] fn from(hash: Self::Hash) -> Self { - let bytes = hash.as_bytes(); - Fingerprint( - u64::from_ne_bytes(bytes[0..8].try_into().unwrap()), - u64::from_ne_bytes(bytes[8..16].try_into().unwrap()), - ) + let bytes: &[u8; 32] = hash.as_bytes(); + + let p0 = u64::from_le_bytes(bytes[0..8].try_into().unwrap()); + let p1 = u64::from_le_bytes(bytes[8..16].try_into().unwrap()); + let p2 = u64::from_le_bytes(bytes[16..24].try_into().unwrap()); + let p3 = u64::from_le_bytes(bytes[24..32].try_into().unwrap()); + + // See https://stackoverflow.com/a/27952689 on why this function is + // implemented this way. + Fingerprint(p0.wrapping_mul(3).wrapping_add(p1), p2.wrapping_mul(3).wrapping_add(p3)) } } diff --git a/compiler/rustc_data_structures/src/hashes.rs b/compiler/rustc_data_structures/src/hashes.rs index 935fc6b417a33..1733e64cf4cd5 100644 --- a/compiler/rustc_data_structures/src/hashes.rs +++ b/compiler/rustc_data_structures/src/hashes.rs @@ -62,7 +62,19 @@ impl FromStableHash for Hash64 { #[inline] fn from(hash: Self::Hash) -> Self { let bytes = hash.as_bytes(); - Self { inner: u64::from_ne_bytes(bytes[0..8].try_into().unwrap()) } + + let p0 = u64::from_le_bytes(bytes[0..8].try_into().unwrap()); + let p1 = u64::from_le_bytes(bytes[8..16].try_into().unwrap()); + let p2 = u64::from_le_bytes(bytes[16..24].try_into().unwrap()); + let p3 = u64::from_le_bytes(bytes[24..32].try_into().unwrap()); + + // See https://stackoverflow.com/a/27952689 on why this function is + // implemented this way. + let m0 = p0.wrapping_mul(3).wrapping_add(p1); + let m1 = p2.wrapping_mul(3).wrapping_add(p3); + let h = m0.wrapping_mul(3).wrapping_add(m1); + + Self { inner: h } } } @@ -130,7 +142,18 @@ impl FromStableHash for Hash128 { #[inline] fn from(hash: Self::Hash) -> Self { let bytes = hash.as_bytes(); - Self { inner: u128::from_ne_bytes(bytes[0..16].try_into().unwrap()) } + + let p0 = u64::from_le_bytes(bytes[0..8].try_into().unwrap()); + let p1 = u64::from_le_bytes(bytes[8..16].try_into().unwrap()); + let p2 = u64::from_le_bytes(bytes[16..24].try_into().unwrap()); + let p3 = u64::from_le_bytes(bytes[24..32].try_into().unwrap()); + + // See https://stackoverflow.com/a/27952689 on why this function is + // implemented this way. + let upper = p0.wrapping_mul(3).wrapping_add(p1); + let lower = p2.wrapping_mul(3).wrapping_add(p3); + + Self { inner: u128::from(lower) | (u128::from(upper) << 64) } } }