pest-parser · tomtau · Aug 2, 2023 · Jul 31, 2023 · Jul 31, 2023 · Jul 31, 2023
@@ -25,6 +25,13 @@ const_prec_climber = []
 [dependencies]
 ucd-trie = { version = "0.1.5", default-features = false }
 serde = { version = "1.0.145", optional = true }
-serde_json = { version = "1.0.85", optional = true}
+serde_json = { version = "1.0.85", optional = true }
 thiserror = { version = "1.0.37", optional = true }
 memchr = { version = "2", optional = true }
+
+[dev-dependencies]
+criterion = { version = "0.5.1", features = ["html_reports"] }
+
+[[bench]]
+name = "stack"
+harness = false
@@ -0,0 +1,88 @@
+// pest. The Elegant Parser
+// Copyright (c) 2018 Dragoș Tiselice
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use criterion::{criterion_group, criterion_main, Criterion};
+use pest::Stack;
+
+fn snapshot_push_restore<T: Clone>(elements: impl Iterator<Item = T> + Clone) {
+    let mut stack = Stack::<T>::new();
+    for elem in elements {
+        stack.snapshot();
+        stack.push(elem.clone());
+        stack.restore();
+        stack.push(elem);
+    }
+}
+
+fn snapshot_push_clear_snapshot<T: Clone>(elements: impl Iterator<Item = T> + Clone) {
+    let mut stack = Stack::<T>::new();
+    for elem in elements {
+        stack.snapshot();
+        stack.push(elem);
+        stack.clear_snapshot();
+    }
+}
+
+fn snapshot_pop_restore<T: Clone>(elements: impl Iterator<Item = T>) {
+    let mut stack = Stack::<T>::new();
+    for elem in elements {
+        stack.push(elem);
+    }
+    while !stack.is_empty() {
+        stack.snapshot();
+        stack.pop();
+        stack.restore();
+        stack.pop();
+    }
+}
+
+fn snapshot_pop_clear<T: Clone>(elements: impl Iterator<Item = T>) {
+    let mut stack = Stack::<T>::new();
+    for elem in elements {
+        stack.push(elem);
+    }
+    while !stack.is_empty() {
+        stack.snapshot();
+        stack.pop();
+        stack.clear_snapshot();
+    }
+}
+
+fn benchmark(b: &mut Criterion) {
+    use core::iter::repeat;
+    // use criterion::black_box;
+    let times = 10000usize;
+    let small = 0..times;
+    let medium = ("", 0usize, 1usize);
+    let medium = repeat(medium).take(times);
+    let large = [""; 64];
+    let large = repeat(large).take(times);
+    macro_rules! test_series {
+        ($kind:ident) => {
+            b.bench_function(stringify!(push - restore - $kind), |b| {
+                b.iter(|| snapshot_push_restore($kind.clone()))
+            })
+            .bench_function(stringify!(push - clear - $kind), |b| {
+                b.iter(|| snapshot_push_clear_snapshot($kind.clone()))
+            })
+            .bench_function(stringify!(pop - restore - $kind), |b| {
+                b.iter(|| snapshot_pop_restore($kind.clone()))
+            })
+            .bench_function(stringify!(pop - clear - $kind), |b| {
+                b.iter(|| snapshot_pop_clear($kind.clone()))
+            })
+        };
+    }
+    test_series!(small);
+    test_series!(medium);
+    test_series!(large);
+}
+
+criterion_group!(benchmarks, benchmark);
+criterion_main!(benchmarks);
@@ -340,6 +340,7 @@ pub use crate::parser_state::{
 };
 pub use crate::position::Position;
 pub use crate::span::{merge_spans, Lines, LinesSpan, Span};
+pub use crate::stack::Stack;
 pub use crate::token::Token;
 use core::fmt::Debug;
 use core::hash::Hash;

@@ -11,22 +11,48 @@ use alloc::vec;
 use alloc::vec::Vec;
 use core::ops::{Index, Range};
 
-/// Implementation of a `Stack` which maintains an log of `StackOp`s in order to rewind the stack
-/// to a previous state.
+/// Implementation of a `Stack` which maintains popped elements and length of previous states
+/// in order to rewind the stack to a previous state.
 #[derive(Debug)]
 pub struct Stack<T: Clone> {
-    ops: Vec<StackOp<T>>,
+    /// All elements in the stack.
     cache: Vec<T>,
-    snapshots: Vec<usize>,
+    /// All elements that are in previous snapshots but may not be in the next state.
+    /// They will be pushed back to `cache` if the snapshot is restored,
+    /// otherwise be dropped if the snapshot is cleared.
+    ///
+    /// Those elements from a sequence of snapshots are stacked in one [`Vec`], and
+    /// `popped.len() == lengths.iter().map(|(len, remained)| len - remained).sum()`
+    popped: Vec<T>,
+    /// Every element corresponds to a snapshot, and each element has two fields:
+    /// - Length of `cache` when corresponding snapshot is taken (AKA `len`).
+    /// - Count of elements that come from corresponding snapshot
+    ///   and are still in next snapshot or current state (AKA `remained`).
+    ///
+    /// And `len` is never less than `remained`.
+    ///
+    /// On restoring, the `cache` can be divided into two parts:
+    /// - `0..remained` are untouched since the snapshot is taken.
+    ///
+    ///   There's nothing to do with those elements. Just let them stay where they are.
+    ///
+    /// - `remained..cache.len()` are pushed after the snapshot is taken.
+    lengths: Vec<(usize, usize)>,
+}
+
+impl<T: Clone> Default for Stack<T> {
+    fn default() -> Self {
+        Self::new()
+    }
 }
 
 impl<T: Clone> Stack<T> {
     /// Creates a new `Stack`.
     pub fn new() -> Self {
         Stack {
-            ops: vec![],
             cache: vec![],
-            snapshots: vec![],
+            popped: vec![],
+            lengths: vec![],
         }
     }
 
@@ -43,15 +69,21 @@ impl<T: Clone> Stack<T> {
 
     /// Pushes a `T` onto the `Stack`.
     pub fn push(&mut self, elem: T) {
-        self.ops.push(StackOp::Push(elem.clone()));
         self.cache.push(elem);
     }
 
     /// Pops the top-most `T` from the `Stack`.
     pub fn pop(&mut self) -> Option<T> {
+        let len = self.cache.len();
         let popped = self.cache.pop();
-        if let Some(ref val) = popped {
-            self.ops.push(StackOp::Pop(val.clone()));
+        if let Some(popped) = &popped {
+            if let Some((_, remained_count)) = self.lengths.last_mut() {
+                // `len >= *unpopped_count`
+                if len == *remained_count {
+                    *remained_count -= 1;
+                    self.popped.push(popped.clone());
+                }
+            }
         }
         popped
     }
@@ -63,40 +95,40 @@ impl<T: Clone> Stack<T> {
 
     /// Takes a snapshot of the current `Stack`.
     pub fn snapshot(&mut self) {
-        self.snapshots.push(self.ops.len());
+        self.lengths.push((self.cache.len(), self.cache.len()))
     }
 
     /// The parsing after the last snapshot was successful so clearing it.
     pub fn clear_snapshot(&mut self) {
-        self.snapshots.pop();
+        if let Some((len, unpopped)) = self.lengths.pop() {
+            // Popped elements from previous state are no longer needed.
+            self.popped.truncate(self.popped.len() - (len - unpopped));
+        }
     }
 
     /// Rewinds the `Stack` to the most recent `snapshot()`. If no `snapshot()` has been taken, this
     /// function return the stack to its initial state.
     pub fn restore(&mut self) {
-        match self.snapshots.pop() {
-            Some(ops_index) => {
-                self.rewind_to(ops_index);
-                self.ops.truncate(ops_index);
+        match self.lengths.pop() {
+            Some((len_stack, remained)) => {
+                if remained < self.cache.len() {
+                    // Remove those elements that are pushed after the snapshot.
+                    self.cache.truncate(remained);
+                }
+                if len_stack > remained {
+                    let rewind_count = len_stack - remained;
+                    let new_len = self.popped.len() - rewind_count;
+                    let recovered_elements = self.popped.drain(new_len..);
+                    self.cache.extend(recovered_elements.rev());
+                    debug_assert_eq!(self.popped.len(), new_len);
+                }
             }
             None => {
                 self.cache.clear();
-                self.ops.clear();
-            }
-        }
-    }
-
-    // Rewind the stack to a particular index
-    fn rewind_to(&mut self, index: usize) {
-        let ops_to_rewind = &self.ops[index..];
-        for op in ops_to_rewind.iter().rev() {
-            match *op {
-                StackOp::Push(_) => {
-                    self.cache.pop();
-                }
-                StackOp::Pop(ref elem) => {
-                    self.cache.push(elem.clone());
-                }
+                // As `self.popped` and `self.lengths` should already be empty,
+                // there is no need to clear it.
+                debug_assert!(self.popped.is_empty());
+                debug_assert!(self.lengths.is_empty());
             }
         }
     }
@@ -110,12 +142,6 @@ impl<T: Clone> Index<Range<usize>> for Stack<T> {
     }
 }
 
-#[derive(Debug)]
-enum StackOp<T> {
-    Push(T),
-    Pop(T),
-}
-
 #[cfg(test)]
 mod test {
     use super::Stack;
@@ -146,6 +172,79 @@ mod test {
 
         assert_eq!(stack[0..stack.len()], [0]);
     }
+    #[test]
+    fn restore_without_snapshot() {
+        let mut stack = Stack::new();
+
+        stack.push(0);
+        stack.restore();
+
+        assert_eq!(stack[0..stack.len()], [0; 0]);
+    }
+
+    #[test]
+    fn snapshot_pop_restore() {
+        let mut stack = Stack::new();
+
+        stack.push(0);
+        stack.snapshot();
+        stack.pop();
+        stack.restore();
+
+        assert_eq!(stack[0..stack.len()], [0]);
+    }
+
+    #[test]
+    fn snapshot_pop_push_restore() {
+        let mut stack = Stack::new();
+
+        stack.push(0);
+        stack.snapshot();
+        stack.pop();
+        stack.push(1);
+        stack.restore();
+
+        assert_eq!(stack[0..stack.len()], [0]);
+    }
+
+    #[test]
+    fn snapshot_push_pop_restore() {
+        let mut stack = Stack::new();
+
+        stack.push(0);
+        stack.snapshot();
+        stack.push(1);
+        stack.push(2);
+        stack.pop();
+        stack.restore();
+
+        assert_eq!(stack[0..stack.len()], [0]);
+    }
+
+    #[test]
+    fn snapshot_push_clear() {
+        let mut stack = Stack::new();
+
+        stack.push(0);
+        stack.snapshot();
+        stack.push(1);
+        stack.clear_snapshot();
+
+        assert_eq!(stack[0..stack.len()], [0, 1]);
+    }
+
+    #[test]
+    fn snapshot_pop_clear() {
+        let mut stack = Stack::new();
+
+        stack.push(0);
+        stack.push(1);
+        stack.snapshot();
+        stack.pop();
+        stack.clear_snapshot();
+
+        assert_eq!(stack[0..stack.len()], [0]);
+    }
 
     #[test]
     fn stack_ops() {