From 783a18d98df447dd2db4568a1d30a0c04e063852 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Fri, 13 Sep 2024 16:50:04 -0500 Subject: [PATCH] Add more comprehensive miri coverage. --- CHANGELOG | 2 +- ci/miri.sh | 9 +- .../etc/correctness/test-parse-golang/main.rs | 104 +++++++++--------- .../correctness/test-parse-random/_common.rs | 5 +- .../test-parse-random/many-digits.rs | 4 +- .../correctness/test-parse-random/rand-f64.rs | 2 +- .../correctness/test-parse-random/u64-pow2.rs | 1 - .../correctness/test-parse-unittests/main.rs | 74 ++++++------- .../rust_parse_tests.toml | 2 +- .../test-parse-unittests/strtod_tests.toml | 2 +- 10 files changed, 107 insertions(+), 98 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c08ef207..24ab8422 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added fuzzing and miri code safety analysis to our CI pipelines. - Removed requirement of `alloc` in `no_std` ennvironments without the `write` feature. - Make multi-digit optimizations in integer parsing optional. -- Much higher miri coverage including for proptests. +- Much higher miri coverage including for proptests and our corner cases from the golang test suite. ### Changed diff --git a/ci/miri.sh b/ci/miri.sh index 689a7436..0f524364 100755 --- a/ci/miri.sh +++ b/ci/miri.sh @@ -29,6 +29,9 @@ fi # Test the write-float correctness tests. cd "${home}" cd lexical-write-float/etc/correctness -cargo run $FEATURES --release --bin shorter_interval -cargo run $FEATURES --release --bin random -cargo run $FEATURES --release --bin simple_random -- --iterations 1000000 +cargo +nightly miri run $FEATURES --release --bin test-parse-golang +# NOTE: This is **extraordinarily slow, mostly because of how the data is parsed +# as TOML which makes loading it take forever. +if [ -z $COMPREHENSIVE ]; then + cargo +nightly miri run $FEATURES --release --bin test-parse-unittests +fi diff --git a/lexical-parse-float/etc/correctness/test-parse-golang/main.rs b/lexical-parse-float/etc/correctness/test-parse-golang/main.rs index c3468988..919e7e89 100644 --- a/lexical-parse-float/etc/correctness/test-parse-golang/main.rs +++ b/lexical-parse-float/etc/correctness/test-parse-golang/main.rs @@ -2,40 +2,7 @@ // See https://unlicense.org/ use lexical_parse_float::FromLexical; - -use std::io::prelude::*; -use std::path::PathBuf; -use std::{env, fs, io}; - -// PATH - -/// Return the `target/debug` or `target/release` directory path. -pub fn build_dir() -> PathBuf { - env::current_exe() - .expect("unittest executable path") - .parent() - .expect("debug/release directory") - .to_path_buf() -} - -/// Return the `target` directory path. -pub fn target_dir() -> PathBuf { - build_dir().parent().expect("target directory").to_path_buf() -} - -/// Return the project directory path. -pub fn project_dir() -> PathBuf { - target_dir().parent().expect("project directory").to_path_buf() -} - -/// Return the `data` directory path. -pub fn data_dir() -> PathBuf { - let mut dir = project_dir(); - dir.push("test-parse-golang"); - dir.push("parse-number-fxx-test-data"); - dir.push("data"); - dir -} +use std::collections::HashMap; fn run_test(line: &str) { // Tests have the following format: @@ -56,22 +23,61 @@ fn run_test(line: &str) { fn main() { // Iterate over all .txt files in the directory. - let paths = fs::read_dir(data_dir()).expect("Please update the Git submodule"); - for direntry in paths { - let path = direntry.unwrap().path(); - if path.extension().unwrap() == "txt" { - // Have a data file, parse and run the tests. - let filename = path.file_name().unwrap().to_str().unwrap(); - println!("Running Test: {}", filename); - let file = fs::File::open(path).unwrap(); - let reader = io::BufReader::new(file); - let mut count: usize = 0; - for line in reader.lines() { - let line = line.unwrap(); - run_test(&line); - count += 1; + // NOTE: Miri does not play nicely with directories so we just compile them in. + let tests: HashMap<&str, &str> = HashMap::from([ + ("freetype-2-7.txt", include_str!("parse-number-fxx-test-data/data/freetype-2-7.txt")), + ( + "google-double-conversion.txt", + include_str!("parse-number-fxx-test-data/data/google-double-conversion.txt"), + ), + ("google-wuffs.txt", include_str!("parse-number-fxx-test-data/data/google-wuffs.txt")), + ("ibm-fpgen.txt", include_str!("parse-number-fxx-test-data/data/ibm-fpgen.txt")), + ( + "lemire-fast-double-parser.txt", + include_str!("parse-number-fxx-test-data/data/lemire-fast-double-parser.txt"), + ), + ( + "lemire-fast-float.txt", + include_str!("parse-number-fxx-test-data/data/lemire-fast-float.txt"), + ), + ( + "more-test-cases.txt", + include_str!("parse-number-fxx-test-data/data/more-test-cases.txt"), + ), + ( + "remyoudompheng-fptest-0.txt", + include_str!("parse-number-fxx-test-data/data/remyoudompheng-fptest-0.txt"), + ), + ( + "remyoudompheng-fptest-1.txt", + include_str!("parse-number-fxx-test-data/data/remyoudompheng-fptest-1.txt"), + ), + ( + "remyoudompheng-fptest-2.txt", + include_str!("parse-number-fxx-test-data/data/remyoudompheng-fptest-2.txt"), + ), + ( + "remyoudompheng-fptest-3.txt", + include_str!("parse-number-fxx-test-data/data/remyoudompheng-fptest-3.txt"), + ), + ( + "tencent-rapidjson.txt", + include_str!("parse-number-fxx-test-data/data/tencent-rapidjson.txt"), + ), + ("ulfjack-ryu.txt", include_str!("parse-number-fxx-test-data/data/ulfjack-ryu.txt")), + ]); + + // Unfortunately, randomize the data with miri is too expensive so we just use it normally. + for (&filename, data) in tests.iter() { + println!("Running Test: {}", filename); + for (count, line) in data.lines().enumerate() { + if cfg!(miri) && count % 10 == 0 { + println!("Running test {count} for conversion tests."); + } + run_test(line); + if cfg!(miri) && count > 3000 { + break; } - println!("Ran {} tests.", count); } } } diff --git a/lexical-parse-float/etc/correctness/test-parse-random/_common.rs b/lexical-parse-float/etc/correctness/test-parse-random/_common.rs index ba7ef6fa..4d415ddd 100644 --- a/lexical-parse-float/etc/correctness/test-parse-random/_common.rs +++ b/lexical-parse-float/etc/correctness/test-parse-random/_common.rs @@ -17,7 +17,10 @@ use std::mem::transmute; #[allow(dead_code)] pub const SEED: [u32; 3] = [0x243f_6a88, 0x85a3_08d3, 0x1319_8a2e]; #[allow(dead_code)] -pub const ISAAC_SEED: [u8; 32] = [49, 52, 49, 53, 57, 50, 54, 53, 51, 53, 56, 57, 55, 57, 51, 50, 51, 56, 52, 54, 50, 54, 52, 51, 51, 56, 51, 50, 55, 57, 53, 48]; +pub const ISAAC_SEED: [u8; 32] = [ + 49, 52, 49, 53, 57, 50, 54, 53, 51, 53, 56, 57, 55, 57, 51, 50, 51, 56, 52, 54, 50, 54, 52, 51, + 51, 56, 51, 50, 55, 57, 53, 48, +]; pub fn validate(text: &str) { let mut out = io::stdout(); diff --git a/lexical-parse-float/etc/correctness/test-parse-random/many-digits.rs b/lexical-parse-float/etc/correctness/test-parse-random/many-digits.rs index a9dd2f07..47c36e16 100644 --- a/lexical-parse-float/etc/correctness/test-parse-random/many-digits.rs +++ b/lexical-parse-float/etc/correctness/test-parse-random/many-digits.rs @@ -11,10 +11,10 @@ mod _common; use _common::{validate, ISAAC_SEED}; -use rand_isaac::Isaac64Rng; -use rand::distributions::Distribution; use rand::distributions::uniform::Uniform; +use rand::distributions::Distribution; use rand::{Rng, SeedableRng}; +use rand_isaac::Isaac64Rng; use std::char; fn main() { diff --git a/lexical-parse-float/etc/correctness/test-parse-random/rand-f64.rs b/lexical-parse-float/etc/correctness/test-parse-random/rand-f64.rs index c40235f9..17e32c8a 100644 --- a/lexical-parse-float/etc/correctness/test-parse-random/rand-f64.rs +++ b/lexical-parse-float/etc/correctness/test-parse-random/rand-f64.rs @@ -11,8 +11,8 @@ mod _common; use _common::{validate, ISAAC_SEED}; -use rand_isaac::Isaac64Rng; use rand::{RngCore, SeedableRng}; +use rand_isaac::Isaac64Rng; use std::mem::transmute; fn main() { diff --git a/lexical-parse-float/etc/correctness/test-parse-random/u64-pow2.rs b/lexical-parse-float/etc/correctness/test-parse-random/u64-pow2.rs index 5b25c839..84927855 100644 --- a/lexical-parse-float/etc/correctness/test-parse-random/u64-pow2.rs +++ b/lexical-parse-float/etc/correctness/test-parse-random/u64-pow2.rs @@ -11,7 +11,6 @@ mod _common; use _common::validate; -use std::u64; fn main() { for exp in 19..64 { diff --git a/lexical-parse-float/etc/correctness/test-parse-unittests/main.rs b/lexical-parse-float/etc/correctness/test-parse-unittests/main.rs index 1399285c..993f07a2 100644 --- a/lexical-parse-float/etc/correctness/test-parse-unittests/main.rs +++ b/lexical-parse-float/etc/correctness/test-parse-unittests/main.rs @@ -5,13 +5,14 @@ use lexical_parse_float::FromLexical; use serde::Deserialize; +use std::collections::HashMap; // STRUCTS // Derived structs for the Toml parser. #[derive(Debug, Deserialize)] struct StrtodTests { - negativeFormattingTests: Vec, + NegativeFormattingTests: Vec, FormattingTests: Vec, ConversionTests: Vec, } @@ -36,25 +37,6 @@ struct ConversionTest { // PATH -/// Return the `target/debug` or `target/release` directory path. -pub fn build_dir() -> std::path::PathBuf { - std::env::current_exe() - .expect("unittest executable path") - .parent() - .expect("debug/release directory") - .to_path_buf() -} - -/// Return the `target` directory path. -pub fn target_dir() -> std::path::PathBuf { - build_dir().parent().expect("target directory").to_path_buf() -} - -/// Return the project directory path. -pub fn project_dir() -> std::path::PathBuf { - target_dir().parent().expect("project directory").to_path_buf() -} - fn run_test(string: &str, hex: &str) { // We toggle between "inf" and "infinity" as valid Infinity identifiers. let lower = string.to_lowercase(); @@ -71,37 +53,53 @@ fn run_test(string: &str, hex: &str) { } fn run_tests(tests: StrtodTests) { - let negative_tests_count = tests.negativeFormattingTests.len(); + let negative_tests_count = tests.NegativeFormattingTests.len(); let formatting_tests_count = tests.FormattingTests.len(); let conversion_tests_count = tests.ConversionTests.len(); - for test in tests.negativeFormattingTests { + // Unfortunately, randomize the data with miri is too expensive so we just use it normally. + let mut count = 0; + for test in tests.NegativeFormattingTests { + if cfg!(miri) && count % 10 == 0 { + println!("Running test {count} for negative formatting."); + } assert!(f64::from_lexical(test.as_bytes()).is_err()); + count += 1; + if cfg!(miri) && count > 500 { + break; + } } for test in tests.FormattingTests { - run_test(&test.str, &test.hex) + if cfg!(miri) && count % 10 == 0 { + println!("Running test {count} for positive formatting."); + } + run_test(&test.str, &test.hex); + count += 1; + if cfg!(miri) && count > 1500 { + break; + } } for test in tests.ConversionTests { - run_test(&test.str, &test.hex) + if cfg!(miri) && count % 10 == 0 { + println!("Running test {count} for conversion tests."); + } + run_test(&test.str, &test.hex); + if cfg!(miri) && count > 2500 { + break; + } } println!("Ran {} negative tests.", negative_tests_count); println!("Ran {} formatting tests.", formatting_tests_count); - println!("Ran {} conversion tests.", conversion_tests_count); - println!(""); -} - -fn parse_tests(name: &str) -> StrtodTests { - let mut test_path = project_dir(); - test_path.push("test-parse-unittests"); - test_path.push(name); - let test_data = std::fs::read_to_string(test_path).unwrap(); - - toml::from_str(&test_data).unwrap() + println!("Ran {} conversion tests.\n", conversion_tests_count); } fn main() { - let filenames = ["strtod_tests.toml", "rust_parse_tests.toml"]; - for filename in filenames.iter() { + // NOTE: Miri does not play nicely with directories so we just compile them in. + let tests: HashMap<&str, &str> = HashMap::from([ + ("strtod_tests.toml", include_str!("strtod_tests.toml")), + ("rust_parse_tests.toml", include_str!("rust_parse_tests.toml")), + ]); + for (&filename, &data) in tests.iter() { println!("Running Test: {}", filename); - run_tests(parse_tests(filename)); + run_tests(toml::from_str(data).unwrap()); } } diff --git a/lexical-parse-float/etc/correctness/test-parse-unittests/rust_parse_tests.toml b/lexical-parse-float/etc/correctness/test-parse-unittests/rust_parse_tests.toml index c8b5d391..7ad31340 100644 --- a/lexical-parse-float/etc/correctness/test-parse-unittests/rust_parse_tests.toml +++ b/lexical-parse-float/etc/correctness/test-parse-unittests/rust_parse_tests.toml @@ -6,7 +6,7 @@ # License: MIT # strtod(string) method should not accept the following inputs -negativeFormattingTests = [ +NegativeFormattingTests = [ "inf1", "inf+", ".E", "1.0e", "2.45+e+3", "23e.23", "e9", "+e", "e+", ".", "e", ".7+", ".21e", "+", "", "infe", "nan(err", "nan)", "NAN(test_)_)", "nan0", "-.e+", "-+12.34" diff --git a/lexical-parse-float/etc/correctness/test-parse-unittests/strtod_tests.toml b/lexical-parse-float/etc/correctness/test-parse-unittests/strtod_tests.toml index bb22cdeb..248d75f5 100644 --- a/lexical-parse-float/etc/correctness/test-parse-unittests/strtod_tests.toml +++ b/lexical-parse-float/etc/correctness/test-parse-unittests/strtod_tests.toml @@ -36,7 +36,7 @@ # [5] http://bugs.python.org/ # strtod(string) method should not accept the following inputs -negativeFormattingTests = [ +NegativeFormattingTests = [ "inf1", "inf+", ".E", "1.0e", "2.45+e+3", "23e.23", "e9", "+e", "e+", ".", "e", ".7+", ".21e", "+", "", "infe", "nan(err", "nan)", "NAN(test_)_)", "nan0", "-.e+", "-+12.34"