Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add checksums cache to build-manifest #78409

Merged
merged 2 commits into from
Oct 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions src/tools/build-manifest/src/checksum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
use crate::manifest::{FileHash, Manifest};
use rayon::prelude::*;
use sha2::{Digest, Sha256};
use std::collections::{HashMap, HashSet};
use std::error::Error;
use std::fs::File;
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use std::time::Instant;

pub(crate) struct Checksums {
cache_path: Option<PathBuf>,
collected: Mutex<HashMap<PathBuf, String>>,
}

impl Checksums {
pub(crate) fn new() -> Result<Self, Box<dyn Error>> {
let cache_path = std::env::var_os("BUILD_MANIFEST_CHECKSUM_CACHE").map(PathBuf::from);

let mut collected = HashMap::new();
if let Some(path) = &cache_path {
if path.is_file() {
collected = serde_json::from_slice(&std::fs::read(path)?)?;
}
}

Ok(Checksums { cache_path, collected: Mutex::new(collected) })
}

pub(crate) fn store_cache(&self) -> Result<(), Box<dyn Error>> {
if let Some(path) = &self.cache_path {
std::fs::write(path, &serde_json::to_vec(&self.collected)?)?;
}
Ok(())
}

pub(crate) fn fill_missing_checksums(&mut self, manifest: &mut Manifest) {
let need_checksums = self.find_missing_checksums(manifest);
if !need_checksums.is_empty() {
self.collect_checksums(&need_checksums);
}
self.replace_checksums(manifest);
}

fn find_missing_checksums(&mut self, manifest: &mut Manifest) -> HashSet<PathBuf> {
let collected = self.collected.lock().unwrap();
let mut need_checksums = HashSet::new();
crate::manifest::visit_file_hashes(manifest, |file_hash| {
if let FileHash::Missing(path) = file_hash {
let path = std::fs::canonicalize(path).unwrap();
if !collected.contains_key(&path) {
need_checksums.insert(path);
}
}
});
need_checksums
}

fn replace_checksums(&mut self, manifest: &mut Manifest) {
let collected = self.collected.lock().unwrap();
crate::manifest::visit_file_hashes(manifest, |file_hash| {
if let FileHash::Missing(path) = file_hash {
let path = std::fs::canonicalize(path).unwrap();
match collected.get(&path) {
Some(hash) => *file_hash = FileHash::Present(hash.clone()),
None => panic!("missing hash for file {}", path.display()),
}
}
});
}

fn collect_checksums(&mut self, files: &HashSet<PathBuf>) {
let collection_start = Instant::now();
println!(
"collecting hashes for {} tarballs across {} threads",
files.len(),
rayon::current_num_threads().min(files.len()),
);

files.par_iter().for_each(|path| match hash(path) {
Ok(hash) => {
self.collected.lock().unwrap().insert(path.clone(), hash);
}
Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
});

println!("collected {} hashes in {:.2?}", files.len(), collection_start.elapsed());
}
}

fn hash(path: &Path) -> Result<String, Box<dyn Error>> {
let mut file = BufReader::new(File::open(path)?);
let mut sha256 = Sha256::default();
std::io::copy(&mut file, &mut sha256)?;
Ok(hex::encode(sha256.finalize()))
}
59 changes: 9 additions & 50 deletions src/tools/build-manifest/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,19 @@
//! via `x.py dist hash-and-sign`; the cmdline arguments are set up
//! by rustbuild (in `src/bootstrap/dist.rs`).

mod checksum;
mod manifest;
mod versions;

use crate::manifest::{Component, FileHash, Manifest, Package, Rename, Target};
use crate::checksum::Checksums;
use crate::manifest::{Component, Manifest, Package, Rename, Target};
use crate::versions::{PkgType, Versions};
use rayon::prelude::*;
use sha2::Digest;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::env;
use std::error::Error;
use std::fs::{self, File};
use std::io::{self, BufReader, Read, Write};
use std::io::{self, Read, Write};
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::sync::Mutex;
use std::time::Instant;

static HOSTS: &[&str] = &[
"aarch64-apple-darwin",
Expand Down Expand Up @@ -186,6 +183,7 @@ macro_rules! t {

struct Builder {
versions: Versions,
checksums: Checksums,
shipped_files: HashSet<String>,

input: PathBuf,
Expand Down Expand Up @@ -240,6 +238,7 @@ fn main() {

Builder {
versions: Versions::new(&channel, &input).unwrap(),
checksums: t!(Checksums::new()),
shipped_files: HashSet::new(),

input,
Expand Down Expand Up @@ -276,6 +275,8 @@ impl Builder {
if let Some(path) = std::env::var_os("BUILD_MANIFEST_SHIPPED_FILES_PATH") {
self.write_shipped_files(&Path::new(&path));
}

t!(self.checksums.store_cache());
}

/// If a tool does not pass its tests, don't ship it.
Expand Down Expand Up @@ -321,7 +322,7 @@ impl Builder {
self.add_renames_to(&mut manifest);
manifest.pkg.insert("rust".to_string(), self.rust_package(&manifest));

self.fill_missing_hashes(&mut manifest);
self.checksums.fill_missing_checksums(&mut manifest);

manifest
}
Expand Down Expand Up @@ -595,41 +596,6 @@ impl Builder {
assert!(t!(child.wait()).success());
}

fn fill_missing_hashes(&self, manifest: &mut Manifest) {
// First collect all files that need hashes
let mut need_hashes = HashSet::new();
crate::manifest::visit_file_hashes(manifest, |file_hash| {
if let FileHash::Missing(path) = file_hash {
need_hashes.insert(path.clone());
}
});

let collected = Mutex::new(HashMap::new());
let collection_start = Instant::now();
println!(
"collecting hashes for {} tarballs across {} threads",
need_hashes.len(),
rayon::current_num_threads().min(need_hashes.len()),
);
need_hashes.par_iter().for_each(|path| match fetch_hash(path) {
Ok(hash) => {
collected.lock().unwrap().insert(path, hash);
}
Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
});
let collected = collected.into_inner().unwrap();
println!("collected {} hashes in {:.2?}", collected.len(), collection_start.elapsed());

crate::manifest::visit_file_hashes(manifest, |file_hash| {
if let FileHash::Missing(path) = file_hash {
match collected.get(path) {
Some(hash) => *file_hash = FileHash::Present(hash.clone()),
None => panic!("missing hash for file {}", path.display()),
}
}
})
}

fn write_channel_files(&mut self, channel_name: &str, manifest: &Manifest) {
self.write(&toml::to_string(&manifest).unwrap(), channel_name, ".toml");
self.write(&manifest.date, channel_name, "-date.txt");
Expand Down Expand Up @@ -660,10 +626,3 @@ impl Builder {
t!(std::fs::write(path, content.as_bytes()));
}
}

fn fetch_hash(path: &Path) -> Result<String, Box<dyn Error>> {
let mut file = BufReader::new(File::open(path)?);
let mut sha256 = sha2::Sha256::default();
std::io::copy(&mut file, &mut sha256)?;
Ok(hex::encode(sha256.finalize()))
}