From 783a18d98df447dd2db4568a1d30a0c04e063852 Mon Sep 17 00:00:00 2001
From: Alex Huszagh <ahuszagh@gmail.com>
Date: Fri, 13 Sep 2024 16:50:04 -0500
Subject: [PATCH] Add more comprehensive miri coverage.

---
 CHANGELOG                                     |   2 +-
 ci/miri.sh                                    |   9 +-
 .../etc/correctness/test-parse-golang/main.rs | 104 +++++++++---------
 .../correctness/test-parse-random/_common.rs  |   5 +-
 .../test-parse-random/many-digits.rs          |   4 +-
 .../correctness/test-parse-random/rand-f64.rs |   2 +-
 .../correctness/test-parse-random/u64-pow2.rs |   1 -
 .../correctness/test-parse-unittests/main.rs  |  74 ++++++-------
 .../rust_parse_tests.toml                     |   2 +-
 .../test-parse-unittests/strtod_tests.toml    |   2 +-
 10 files changed, 107 insertions(+), 98 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index c08ef207..24ab8422 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added fuzzing and miri code safety analysis to our CI pipelines.
 - Removed requirement of `alloc` in `no_std` ennvironments without the `write` feature.
 - Make multi-digit optimizations in integer parsing optional.
-- Much higher miri coverage including for proptests.
+- Much higher miri coverage including for proptests and our corner cases from the golang test suite.
 
 ### Changed
 
diff --git a/ci/miri.sh b/ci/miri.sh
index 689a7436..0f524364 100755
--- a/ci/miri.sh
+++ b/ci/miri.sh
@@ -29,6 +29,9 @@ fi
 # Test the write-float correctness tests.
 cd "${home}"
 cd lexical-write-float/etc/correctness
-cargo run $FEATURES --release --bin shorter_interval
-cargo run $FEATURES --release --bin random
-cargo run $FEATURES --release --bin simple_random  -- --iterations 1000000
+cargo +nightly miri run $FEATURES --release --bin test-parse-golang
+# NOTE: This is **extraordinarily slow, mostly because of how the data is parsed
+# as TOML which makes loading it take forever.
+if [ -z $COMPREHENSIVE ]; then
+    cargo +nightly miri run $FEATURES --release --bin test-parse-unittests
+fi
diff --git a/lexical-parse-float/etc/correctness/test-parse-golang/main.rs b/lexical-parse-float/etc/correctness/test-parse-golang/main.rs
index c3468988..919e7e89 100644
--- a/lexical-parse-float/etc/correctness/test-parse-golang/main.rs
+++ b/lexical-parse-float/etc/correctness/test-parse-golang/main.rs
@@ -2,40 +2,7 @@
 // See https://unlicense.org/
 
 use lexical_parse_float::FromLexical;
-
-use std::io::prelude::*;
-use std::path::PathBuf;
-use std::{env, fs, io};
-
-// PATH
-
-/// Return the `target/debug` or `target/release` directory path.
-pub fn build_dir() -> PathBuf {
-    env::current_exe()
-        .expect("unittest executable path")
-        .parent()
-        .expect("debug/release directory")
-        .to_path_buf()
-}
-
-/// Return the `target` directory path.
-pub fn target_dir() -> PathBuf {
-    build_dir().parent().expect("target directory").to_path_buf()
-}
-
-/// Return the project directory path.
-pub fn project_dir() -> PathBuf {
-    target_dir().parent().expect("project directory").to_path_buf()
-}
-
-/// Return the `data` directory path.
-pub fn data_dir() -> PathBuf {
-    let mut dir = project_dir();
-    dir.push("test-parse-golang");
-    dir.push("parse-number-fxx-test-data");
-    dir.push("data");
-    dir
-}
+use std::collections::HashMap;
 
 fn run_test(line: &str) {
     // Tests have the following format:
@@ -56,22 +23,61 @@ fn run_test(line: &str) {
 
 fn main() {
     // Iterate over all .txt files in the directory.
-    let paths = fs::read_dir(data_dir()).expect("Please update the Git submodule");
-    for direntry in paths {
-        let path = direntry.unwrap().path();
-        if path.extension().unwrap() == "txt" {
-            // Have a data file, parse and run the tests.
-            let filename = path.file_name().unwrap().to_str().unwrap();
-            println!("Running Test: {}", filename);
-            let file = fs::File::open(path).unwrap();
-            let reader = io::BufReader::new(file);
-            let mut count: usize = 0;
-            for line in reader.lines() {
-                let line = line.unwrap();
-                run_test(&line);
-                count += 1;
+    // NOTE: Miri does not play nicely with directories so we just compile them in.
+    let tests: HashMap<&str, &str> = HashMap::from([
+        ("freetype-2-7.txt", include_str!("parse-number-fxx-test-data/data/freetype-2-7.txt")),
+        (
+            "google-double-conversion.txt",
+            include_str!("parse-number-fxx-test-data/data/google-double-conversion.txt"),
+        ),
+        ("google-wuffs.txt", include_str!("parse-number-fxx-test-data/data/google-wuffs.txt")),
+        ("ibm-fpgen.txt", include_str!("parse-number-fxx-test-data/data/ibm-fpgen.txt")),
+        (
+            "lemire-fast-double-parser.txt",
+            include_str!("parse-number-fxx-test-data/data/lemire-fast-double-parser.txt"),
+        ),
+        (
+            "lemire-fast-float.txt",
+            include_str!("parse-number-fxx-test-data/data/lemire-fast-float.txt"),
+        ),
+        (
+            "more-test-cases.txt",
+            include_str!("parse-number-fxx-test-data/data/more-test-cases.txt"),
+        ),
+        (
+            "remyoudompheng-fptest-0.txt",
+            include_str!("parse-number-fxx-test-data/data/remyoudompheng-fptest-0.txt"),
+        ),
+        (
+            "remyoudompheng-fptest-1.txt",
+            include_str!("parse-number-fxx-test-data/data/remyoudompheng-fptest-1.txt"),
+        ),
+        (
+            "remyoudompheng-fptest-2.txt",
+            include_str!("parse-number-fxx-test-data/data/remyoudompheng-fptest-2.txt"),
+        ),
+        (
+            "remyoudompheng-fptest-3.txt",
+            include_str!("parse-number-fxx-test-data/data/remyoudompheng-fptest-3.txt"),
+        ),
+        (
+            "tencent-rapidjson.txt",
+            include_str!("parse-number-fxx-test-data/data/tencent-rapidjson.txt"),
+        ),
+        ("ulfjack-ryu.txt", include_str!("parse-number-fxx-test-data/data/ulfjack-ryu.txt")),
+    ]);
+
+    // Unfortunately, randomize the data with miri is too expensive so we just use it normally.
+    for (&filename, data) in tests.iter() {
+        println!("Running Test: {}", filename);
+        for (count, line) in data.lines().enumerate() {
+            if cfg!(miri) && count % 10 == 0 {
+                println!("Running test {count} for conversion tests.");
+            }
+            run_test(line);
+            if cfg!(miri) && count > 3000 {
+                break;
             }
-            println!("Ran {} tests.", count);
         }
     }
 }
diff --git a/lexical-parse-float/etc/correctness/test-parse-random/_common.rs b/lexical-parse-float/etc/correctness/test-parse-random/_common.rs
index ba7ef6fa..4d415ddd 100644
--- a/lexical-parse-float/etc/correctness/test-parse-random/_common.rs
+++ b/lexical-parse-float/etc/correctness/test-parse-random/_common.rs
@@ -17,7 +17,10 @@ use std::mem::transmute;
 #[allow(dead_code)]
 pub const SEED: [u32; 3] = [0x243f_6a88, 0x85a3_08d3, 0x1319_8a2e];
 #[allow(dead_code)]
-pub const ISAAC_SEED: [u8; 32] = [49, 52, 49, 53, 57, 50, 54, 53, 51, 53, 56, 57, 55, 57, 51, 50, 51, 56, 52, 54, 50, 54, 52, 51, 51, 56, 51, 50, 55, 57, 53, 48];
+pub const ISAAC_SEED: [u8; 32] = [
+    49, 52, 49, 53, 57, 50, 54, 53, 51, 53, 56, 57, 55, 57, 51, 50, 51, 56, 52, 54, 50, 54, 52, 51,
+    51, 56, 51, 50, 55, 57, 53, 48,
+];
 
 pub fn validate(text: &str) {
     let mut out = io::stdout();
diff --git a/lexical-parse-float/etc/correctness/test-parse-random/many-digits.rs b/lexical-parse-float/etc/correctness/test-parse-random/many-digits.rs
index a9dd2f07..47c36e16 100644
--- a/lexical-parse-float/etc/correctness/test-parse-random/many-digits.rs
+++ b/lexical-parse-float/etc/correctness/test-parse-random/many-digits.rs
@@ -11,10 +11,10 @@
 mod _common;
 
 use _common::{validate, ISAAC_SEED};
-use rand_isaac::Isaac64Rng;
-use rand::distributions::Distribution;
 use rand::distributions::uniform::Uniform;
+use rand::distributions::Distribution;
 use rand::{Rng, SeedableRng};
+use rand_isaac::Isaac64Rng;
 use std::char;
 
 fn main() {
diff --git a/lexical-parse-float/etc/correctness/test-parse-random/rand-f64.rs b/lexical-parse-float/etc/correctness/test-parse-random/rand-f64.rs
index c40235f9..17e32c8a 100644
--- a/lexical-parse-float/etc/correctness/test-parse-random/rand-f64.rs
+++ b/lexical-parse-float/etc/correctness/test-parse-random/rand-f64.rs
@@ -11,8 +11,8 @@
 mod _common;
 
 use _common::{validate, ISAAC_SEED};
-use rand_isaac::Isaac64Rng;
 use rand::{RngCore, SeedableRng};
+use rand_isaac::Isaac64Rng;
 use std::mem::transmute;
 
 fn main() {
diff --git a/lexical-parse-float/etc/correctness/test-parse-random/u64-pow2.rs b/lexical-parse-float/etc/correctness/test-parse-random/u64-pow2.rs
index 5b25c839..84927855 100644
--- a/lexical-parse-float/etc/correctness/test-parse-random/u64-pow2.rs
+++ b/lexical-parse-float/etc/correctness/test-parse-random/u64-pow2.rs
@@ -11,7 +11,6 @@
 mod _common;
 
 use _common::validate;
-use std::u64;
 
 fn main() {
     for exp in 19..64 {
diff --git a/lexical-parse-float/etc/correctness/test-parse-unittests/main.rs b/lexical-parse-float/etc/correctness/test-parse-unittests/main.rs
index 1399285c..993f07a2 100644
--- a/lexical-parse-float/etc/correctness/test-parse-unittests/main.rs
+++ b/lexical-parse-float/etc/correctness/test-parse-unittests/main.rs
@@ -5,13 +5,14 @@
 
 use lexical_parse_float::FromLexical;
 use serde::Deserialize;
+use std::collections::HashMap;
 
 // STRUCTS
 // Derived structs for the Toml parser.
 
 #[derive(Debug, Deserialize)]
 struct StrtodTests {
-    negativeFormattingTests: Vec<String>,
+    NegativeFormattingTests: Vec<String>,
     FormattingTests: Vec<FormattingTest>,
     ConversionTests: Vec<ConversionTest>,
 }
@@ -36,25 +37,6 @@ struct ConversionTest {
 
 // PATH
 
-/// Return the `target/debug` or `target/release` directory path.
-pub fn build_dir() -> std::path::PathBuf {
-    std::env::current_exe()
-        .expect("unittest executable path")
-        .parent()
-        .expect("debug/release directory")
-        .to_path_buf()
-}
-
-/// Return the `target` directory path.
-pub fn target_dir() -> std::path::PathBuf {
-    build_dir().parent().expect("target directory").to_path_buf()
-}
-
-/// Return the project directory path.
-pub fn project_dir() -> std::path::PathBuf {
-    target_dir().parent().expect("project directory").to_path_buf()
-}
-
 fn run_test(string: &str, hex: &str) {
     // We toggle between "inf" and "infinity" as valid Infinity identifiers.
     let lower = string.to_lowercase();
@@ -71,37 +53,53 @@ fn run_test(string: &str, hex: &str) {
 }
 
 fn run_tests(tests: StrtodTests) {
-    let negative_tests_count = tests.negativeFormattingTests.len();
+    let negative_tests_count = tests.NegativeFormattingTests.len();
     let formatting_tests_count = tests.FormattingTests.len();
     let conversion_tests_count = tests.ConversionTests.len();
-    for test in tests.negativeFormattingTests {
+    // Unfortunately, randomize the data with miri is too expensive so we just use it normally.
+    let mut count = 0;
+    for test in tests.NegativeFormattingTests {
+        if cfg!(miri) && count % 10 == 0 {
+            println!("Running test {count} for negative formatting.");
+        }
         assert!(f64::from_lexical(test.as_bytes()).is_err());
+        count += 1;
+        if cfg!(miri) && count > 500 {
+            break;
+        }
     }
     for test in tests.FormattingTests {
-        run_test(&test.str, &test.hex)
+        if cfg!(miri) && count % 10 == 0 {
+            println!("Running test {count} for positive formatting.");
+        }
+        run_test(&test.str, &test.hex);
+        count += 1;
+        if cfg!(miri) && count > 1500 {
+            break;
+        }
     }
     for test in tests.ConversionTests {
-        run_test(&test.str, &test.hex)
+        if cfg!(miri) && count % 10 == 0 {
+            println!("Running test {count} for conversion tests.");
+        }
+        run_test(&test.str, &test.hex);
+        if cfg!(miri) && count > 2500 {
+            break;
+        }
     }
     println!("Ran {} negative tests.", negative_tests_count);
     println!("Ran {} formatting tests.", formatting_tests_count);
-    println!("Ran {} conversion tests.", conversion_tests_count);
-    println!("");
-}
-
-fn parse_tests(name: &str) -> StrtodTests {
-    let mut test_path = project_dir();
-    test_path.push("test-parse-unittests");
-    test_path.push(name);
-    let test_data = std::fs::read_to_string(test_path).unwrap();
-
-    toml::from_str(&test_data).unwrap()
+    println!("Ran {} conversion tests.\n", conversion_tests_count);
 }
 
 fn main() {
-    let filenames = ["strtod_tests.toml", "rust_parse_tests.toml"];
-    for filename in filenames.iter() {
+    // NOTE: Miri does not play nicely with directories so we just compile them in.
+    let tests: HashMap<&str, &str> = HashMap::from([
+        ("strtod_tests.toml", include_str!("strtod_tests.toml")),
+        ("rust_parse_tests.toml", include_str!("rust_parse_tests.toml")),
+    ]);
+    for (&filename, &data) in tests.iter() {
         println!("Running Test: {}", filename);
-        run_tests(parse_tests(filename));
+        run_tests(toml::from_str(data).unwrap());
     }
 }
diff --git a/lexical-parse-float/etc/correctness/test-parse-unittests/rust_parse_tests.toml b/lexical-parse-float/etc/correctness/test-parse-unittests/rust_parse_tests.toml
index c8b5d391..7ad31340 100644
--- a/lexical-parse-float/etc/correctness/test-parse-unittests/rust_parse_tests.toml
+++ b/lexical-parse-float/etc/correctness/test-parse-unittests/rust_parse_tests.toml
@@ -6,7 +6,7 @@
 # License: MIT
 
 # strtod(string) method should not accept the following inputs
-negativeFormattingTests = [
+NegativeFormattingTests = [
     "inf1", "inf+", ".E", "1.0e", "2.45+e+3", "23e.23", "e9", "+e", "e+", ".",
     "e", ".7+", ".21e", "+", "", "infe", "nan(err", "nan)", "NAN(test_)_)",
     "nan0", "-.e+", "-+12.34"
diff --git a/lexical-parse-float/etc/correctness/test-parse-unittests/strtod_tests.toml b/lexical-parse-float/etc/correctness/test-parse-unittests/strtod_tests.toml
index bb22cdeb..248d75f5 100644
--- a/lexical-parse-float/etc/correctness/test-parse-unittests/strtod_tests.toml
+++ b/lexical-parse-float/etc/correctness/test-parse-unittests/strtod_tests.toml
@@ -36,7 +36,7 @@
 # [5] http://bugs.python.org/
 
 # strtod(string) method should not accept the following inputs
-negativeFormattingTests = [
+NegativeFormattingTests = [
     "inf1", "inf+", ".E", "1.0e", "2.45+e+3", "23e.23", "e9", "+e", "e+", ".",
     "e", ".7+", ".21e", "+", "", "infe", "nan(err", "nan)", "NAN(test_)_)",
     "nan0", "-.e+", "-+12.34"