From f090ec1216eb52c7d0fed53986c7e02f90464130 Mon Sep 17 00:00:00 2001 From: Tomas Tauber <2410580+tomtau@users.noreply.github.com> Date: Wed, 1 May 2024 20:20:22 +0800 Subject: [PATCH] make tracking for better error details optional (fixes #1009) (#1013) * make tracking for better error details optional (fixes #1009) * Update pest/src/parser_state.rs * bump toolchain for msrv --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .github/workflows/ci.yml | 2 +- debugger/Cargo.toml | 8 ++-- derive/Cargo.toml | 6 +-- generator/Cargo.toml | 6 +-- grammars/Cargo.toml | 6 +-- grammars/src/lib.rs | 2 + meta/Cargo.toml | 4 +- pest/Cargo.toml | 2 +- pest/src/lib.rs | 3 +- pest/src/parser_state.rs | 96 ++++++++++++++++++++++++++++------------ vm/Cargo.toml | 6 +-- 11 files changed, 91 insertions(+), 50 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 04abff93..ecb75f4f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,7 +79,7 @@ jobs: with: kind: msrv tools: cargo-msrv - toolchain: 1.70.0 # needed for cargo-msrv due to cargo-platform v0.1.6 + toolchain: 1.73.0 # needed for cargo-msrv due to cargo-platform v0.1.8 - name: Check msrv shell: sh run: for crate in "derive" "generator" "grammars" "meta" "pest" "vm"; do cd "$crate" && cargo msrv verify && cd ..; done diff --git a/debugger/Cargo.toml b/debugger/Cargo.toml index 8819f549..e87337c4 100644 --- a/debugger/Cargo.toml +++ b/debugger/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_debugger" description = "pest grammar debugger" -version = "2.7.9" +version = "2.7.10" edition = "2021" authors = [ "Dragoș Tiselice ", @@ -17,9 +17,9 @@ readme = "_README.md" rust-version = "1.61" [dependencies] -pest = { path = "../pest", version = "2.7.9" } -pest_meta = { path = "../meta", version = "2.7.9" } -pest_vm = { path = "../vm", version = "2.7.9" } +pest = { path = "../pest", version = "2.7.10" } +pest_meta = { path = "../meta", version = "2.7.10" } +pest_vm = { path = "../vm", version = "2.7.10" } reqwest = { version = "= 0.11.13", default-features = false, features = [ "blocking", "json", diff --git a/derive/Cargo.toml b/derive/Cargo.toml index ace3c4c0..dbdfb25c 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_derive" description = "pest's derive macro" -version = "2.7.9" +version = "2.7.10" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -25,5 +25,5 @@ grammar-extras = ["pest_generator/grammar-extras"] [dependencies] # for tests, included transitively anyway -pest = { path = "../pest", version = "2.7.9", default-features = false } -pest_generator = { path = "../generator", version = "2.7.9", default-features = false } +pest = { path = "../pest", version = "2.7.10", default-features = false } +pest_generator = { path = "../generator", version = "2.7.10", default-features = false } diff --git a/generator/Cargo.toml b/generator/Cargo.toml index fa16808b..77b5196c 100644 --- a/generator/Cargo.toml +++ b/generator/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_generator" description = "pest code generator" -version = "2.7.9" +version = "2.7.10" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -22,8 +22,8 @@ grammar-extras = ["pest_meta/grammar-extras"] export-internal = [] [dependencies] -pest = { path = "../pest", version = "2.7.9", default-features = false } -pest_meta = { path = "../meta", version = "2.7.9" } +pest = { path = "../pest", version = "2.7.10", default-features = false } +pest_meta = { path = "../meta", version = "2.7.10" } proc-macro2 = "1.0" quote = "1.0" syn = "2.0" diff --git a/grammars/Cargo.toml b/grammars/Cargo.toml index 6224c9f2..0b5393cc 100644 --- a/grammars/Cargo.toml +++ b/grammars/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_grammars" description = "pest popular grammar implementations" -version = "2.7.9" +version = "2.7.10" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -14,8 +14,8 @@ readme = "_README.md" rust-version = "1.61" [dependencies] -pest = { path = "../pest", version = "2.7.9" } -pest_derive = { path = "../derive", version = "2.7.9" } +pest = { path = "../pest", version = "2.7.10" } +pest_derive = { path = "../derive", version = "2.7.10" } [dev-dependencies] criterion = "0.5" diff --git a/grammars/src/lib.rs b/grammars/src/lib.rs index 94c26694..0e7ae42e 100644 --- a/grammars/src/lib.rs +++ b/grammars/src/lib.rs @@ -279,6 +279,8 @@ mod tests { #[test] fn sql_parse_attempts_error() { + pest::set_error_detail(true); + fn is_whitespace(string: String) -> bool { string == "\r\n" || (string.len() == 1 && string.chars().next().unwrap().is_whitespace()) diff --git a/meta/Cargo.toml b/meta/Cargo.toml index 535ed115..30927647 100644 --- a/meta/Cargo.toml +++ b/meta/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_meta" description = "pest meta language parser and validator" -version = "2.7.9" +version = "2.7.10" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -22,7 +22,7 @@ include = [ rust-version = "1.61" [dependencies] -pest = { path = "../pest", version = "2.7.9" } +pest = { path = "../pest", version = "2.7.10" } once_cell = "1.8.0" [build-dependencies] diff --git a/pest/Cargo.toml b/pest/Cargo.toml index eec4c9b2..508b1b83 100644 --- a/pest/Cargo.toml +++ b/pest/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest" description = "The Elegant Parser" -version = "2.7.9" +version = "2.7.10" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" diff --git a/pest/src/lib.rs b/pest/src/lib.rs index 2ec2688c..0dde153f 100644 --- a/pest/src/lib.rs +++ b/pest/src/lib.rs @@ -336,7 +336,8 @@ extern crate std; pub use crate::parser::Parser; pub use crate::parser_state::{ - set_call_limit, state, Atomicity, Lookahead, MatchDir, ParseResult, ParserState, + set_call_limit, set_error_detail, state, Atomicity, Lookahead, MatchDir, ParseResult, + ParserState, }; pub use crate::position::Position; pub use crate::span::{merge_spans, Lines, LinesSpan, Span}; diff --git a/pest/src/parser_state.rs b/pest/src/parser_state.rs index 1193eb33..276e81f5 100644 --- a/pest/src/parser_state.rs +++ b/pest/src/parser_state.rs @@ -20,7 +20,7 @@ use alloc::vec::Vec; use core::fmt::{Debug, Display, Formatter}; use core::num::NonZeroUsize; use core::ops::Range; -use core::sync::atomic::{AtomicUsize, Ordering}; +use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use crate::error::{Error, ErrorVariant}; use crate::iterators::pairs::new; @@ -103,6 +103,22 @@ pub fn set_call_limit(limit: Option) { CALL_LIMIT.store(limit.map(|f| f.get()).unwrap_or(0), Ordering::Relaxed); } +static ERROR_DETAIL: AtomicBool = AtomicBool::new(false); + +/// Sets whether information for more error details +/// should be collected. This is useful for debugging +/// parser errors (as it leads to more comprehensive +/// error messages), but it has a higher performance cost. +/// (hence, it's off by default) +/// +/// # Arguments +/// +/// * `enabled` - Whether to enable the collection for +/// more error details. +pub fn set_error_detail(enabled: bool) { + ERROR_DETAIL.store(enabled, Ordering::Relaxed); +} + #[derive(Debug)] struct CallLimitTracker { current_call_limit: Option<(usize, usize)>, @@ -204,6 +220,8 @@ impl Display for ParsingToken { /// The intuition is such rules will be most likely the query user initially wanted to write. #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct ParseAttempts { + /// Indicates whether the parsing attempts are tracked. + enabled: bool, /// Vec of rule calls sequences awaiting tokens at the same `max_position`. /// If there are several stacks in vec, it means all those rule stacks are "equal" /// because their attempts occurred on the same position. @@ -227,6 +245,7 @@ impl ParseAttempts { expected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY), unexpected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY), max_position: 0, + enabled: ERROR_DETAIL.load(Ordering::Relaxed), } } @@ -461,11 +480,18 @@ where } }; - Err(Error::new_from_pos_with_parsing_attempts( - variant, - Position::new_internal(input, state.attempt_pos), - state.parse_attempts.clone(), - )) + if state.parse_attempts.enabled { + Err(Error::new_from_pos_with_parsing_attempts( + variant, + Position::new_internal(input, state.attempt_pos), + state.parse_attempts.clone(), + )) + } else { + Err(Error::new_from_pos( + variant, + Position::new_internal(input, state.attempt_pos), + )) + } } } } @@ -675,7 +701,9 @@ impl<'i, R: RuleType> ParserState<'i, R> { // Note, that we need to count positive parsing results too, because we can fail in // optional rule call inside which may lie the farthest // parsed token. - try_add_rule_to_stack(&mut new_state); + if new_state.parse_attempts.enabled { + try_add_rule_to_stack(&mut new_state); + } Ok(new_state) } Err(mut new_state) => { @@ -687,7 +715,9 @@ impl<'i, R: RuleType> ParserState<'i, R> { neg_attempts_index, attempts, ); - try_add_rule_to_stack(&mut new_state); + if new_state.parse_attempts.enabled { + try_add_rule_to_stack(&mut new_state); + } } if new_state.lookahead == Lookahead::None @@ -981,13 +1011,15 @@ impl<'i, R: RuleType> ParserState<'i, R> { where F: FnOnce(char) -> bool, { - let token = ParsingToken::BuiltInRule; let start_position = self.position.pos(); - if self.position.match_char_by(f) { - self.handle_token_parse_result(start_position, token, true); + let succeeded = self.position.match_char_by(f); + if self.parse_attempts.enabled { + let token = ParsingToken::BuiltInRule; + self.handle_token_parse_result(start_position, token, succeeded); + } + if succeeded { Ok(self) } else { - self.handle_token_parse_result(start_position, token, false); Err(self) } } @@ -1016,15 +1048,17 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// ``` #[inline] pub fn match_string(mut self: Box, string: &str) -> ParseResult> { - let token = ParsingToken::Sensitive { - token: String::from(string), - }; let start_position = self.position.pos(); - if self.position.match_string(string) { - self.handle_token_parse_result(start_position, token, true); + let succeeded = self.position.match_string(string); + if self.parse_attempts.enabled { + let token = ParsingToken::Sensitive { + token: String::from(string), + }; + self.handle_token_parse_result(start_position, token, succeeded); + } + if succeeded { Ok(self) } else { - self.handle_token_parse_result(start_position, token, false); Err(self) } } @@ -1053,15 +1087,17 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// ``` #[inline] pub fn match_insensitive(mut self: Box, string: &str) -> ParseResult> { - let token = ParsingToken::Insensitive { - token: String::from(string), - }; - let start_position = self.position().pos(); - if self.position.match_insensitive(string) { - self.handle_token_parse_result(start_position, token, true); + let start_position: usize = self.position().pos(); + let succeeded = self.position.match_insensitive(string); + if self.parse_attempts.enabled { + let token = ParsingToken::Insensitive { + token: String::from(string), + }; + self.handle_token_parse_result(start_position, token, succeeded); + } + if succeeded { Ok(self) } else { - self.handle_token_parse_result(start_position, token, false); Err(self) } } @@ -1093,16 +1129,18 @@ impl<'i, R: RuleType> ParserState<'i, R> { /// ``` #[inline] pub fn match_range(mut self: Box, range: Range) -> ParseResult> { + let start_position = self.position().pos(); let token = ParsingToken::Range { start: range.start, end: range.end, }; - let start_position = self.position().pos(); - if self.position.match_range(range) { - self.handle_token_parse_result(start_position, token, true); + let succeeded = self.position.match_range(range); + if self.parse_attempts.enabled { + self.handle_token_parse_result(start_position, token, succeeded); + } + if succeeded { Ok(self) } else { - self.handle_token_parse_result(start_position, token, false); Err(self) } } diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 493557a1..f8c8e550 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_vm" description = "pest grammar virtual machine" -version = "2.7.9" +version = "2.7.10" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -14,8 +14,8 @@ readme = "_README.md" rust-version = "1.61" [dependencies] -pest = { path = "../pest", version = "2.7.9" } -pest_meta = { path = "../meta", version = "2.7.9" } +pest = { path = "../pest", version = "2.7.10" } +pest_meta = { path = "../meta", version = "2.7.10" } [features] grammar-extras = ["pest_meta/grammar-extras"]