Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A new stack implementation that should be faster and less space-consuming #905

Merged
merged 4 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion pest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ const_prec_climber = []
[dependencies]
ucd-trie = { version = "0.1.5", default-features = false }
serde = { version = "1.0.145", optional = true }
serde_json = { version = "1.0.85", optional = true}
serde_json = { version = "1.0.85", optional = true }
thiserror = { version = "1.0.37", optional = true }
memchr = { version = "2", optional = true }

[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }

[[bench]]
name = "stack"
harness = false
88 changes: 88 additions & 0 deletions pest/benches/stack.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.

use criterion::{criterion_group, criterion_main, Criterion};
use pest::Stack;

fn snapshot_push_restore<T: Clone>(elements: impl Iterator<Item = T> + Clone) {
let mut stack = Stack::<T>::new();
for elem in elements {
stack.snapshot();
stack.push(elem.clone());
stack.restore();
stack.push(elem);
}
}

fn snapshot_push_clear_snapshot<T: Clone>(elements: impl Iterator<Item = T> + Clone) {
let mut stack = Stack::<T>::new();
for elem in elements {
stack.snapshot();
stack.push(elem);
stack.clear_snapshot();
}
}

fn snapshot_pop_restore<T: Clone>(elements: impl Iterator<Item = T>) {
let mut stack = Stack::<T>::new();
for elem in elements {
stack.push(elem);
}
while !stack.is_empty() {
stack.snapshot();
stack.pop();
stack.restore();
stack.pop();
}
}

fn snapshot_pop_clear<T: Clone>(elements: impl Iterator<Item = T>) {
let mut stack = Stack::<T>::new();
for elem in elements {
stack.push(elem);
}
while !stack.is_empty() {
stack.snapshot();
stack.pop();
stack.clear_snapshot();
}
}

fn benchmark(b: &mut Criterion) {
use core::iter::repeat;
// use criterion::black_box;
let times = 10000usize;
let small = 0..times;
let medium = ("", 0usize, 1usize);
let medium = repeat(medium).take(times);
let large = [""; 64];
let large = repeat(large).take(times);
macro_rules! test_series {
($kind:ident) => {
b.bench_function(stringify!(push - restore - $kind), |b| {
b.iter(|| snapshot_push_restore($kind.clone()))
})
.bench_function(stringify!(push - clear - $kind), |b| {
b.iter(|| snapshot_push_clear_snapshot($kind.clone()))
})
.bench_function(stringify!(pop - restore - $kind), |b| {
b.iter(|| snapshot_pop_restore($kind.clone()))
})
.bench_function(stringify!(pop - clear - $kind), |b| {
b.iter(|| snapshot_pop_clear($kind.clone()))
})
};
}
test_series!(small);
test_series!(medium);
test_series!(large);
}

criterion_group!(benchmarks, benchmark);
criterion_main!(benchmarks);
1 change: 1 addition & 0 deletions pest/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ pub use crate::parser_state::{
};
pub use crate::position::Position;
pub use crate::span::{merge_spans, Lines, LinesSpan, Span};
pub use crate::stack::Stack;
pub use crate::token::Token;
use core::fmt::Debug;
use core::hash::Hash;
Expand Down
173 changes: 136 additions & 37 deletions pest/src/stack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,48 @@ use alloc::vec;
use alloc::vec::Vec;
use core::ops::{Index, Range};

/// Implementation of a `Stack` which maintains an log of `StackOp`s in order to rewind the stack
/// to a previous state.
/// Implementation of a `Stack` which maintains popped elements and length of previous states
/// in order to rewind the stack to a previous state.
#[derive(Debug)]
pub struct Stack<T: Clone> {
ops: Vec<StackOp<T>>,
/// All elements in the stack.
cache: Vec<T>,
snapshots: Vec<usize>,
/// All elements that are in previous snapshots but may not be in the next state.
/// They will be pushed back to `cache` if the snapshot is restored,
/// otherwise be dropped if the snapshot is cleared.
///
/// Those elements from a sequence of snapshots are stacked in one [`Vec`], and
/// `popped.len() == lengths.iter().map(|(len, remained)| len - remained).sum()`
popped: Vec<T>,
/// Every element corresponds to a snapshot, and each element has two fields:
/// - Length of `cache` when corresponding snapshot is taken (AKA `len`).
/// - Count of elements that come from corresponding snapshot
/// and are still in next snapshot or current state (AKA `remained`).
///
/// And `len` is never less than `remained`.
///
/// On restoring, the `cache` can be divided into two parts:
/// - `0..remained` are untouched since the snapshot is taken.
///
/// There's nothing to do with those elements. Just let them stay where they are.
///
/// - `remained..cache.len()` are pushed after the snapshot is taken.
lengths: Vec<(usize, usize)>,
}

impl<T: Clone> Default for Stack<T> {
fn default() -> Self {
Self::new()
}
}

impl<T: Clone> Stack<T> {
/// Creates a new `Stack`.
pub fn new() -> Self {
Stack {
ops: vec![],
cache: vec![],
snapshots: vec![],
popped: vec![],
lengths: vec![],
}
}

Expand All @@ -43,15 +69,21 @@ impl<T: Clone> Stack<T> {

/// Pushes a `T` onto the `Stack`.
pub fn push(&mut self, elem: T) {
self.ops.push(StackOp::Push(elem.clone()));
self.cache.push(elem);
}

/// Pops the top-most `T` from the `Stack`.
pub fn pop(&mut self) -> Option<T> {
let len = self.cache.len();
let popped = self.cache.pop();
if let Some(ref val) = popped {
self.ops.push(StackOp::Pop(val.clone()));
if let Some(popped) = &popped {
if let Some((_, remained_count)) = self.lengths.last_mut() {
// `len >= *unpopped_count`
if len == *remained_count {
*remained_count -= 1;
self.popped.push(popped.clone());
}
}
}
popped
}
Expand All @@ -63,40 +95,40 @@ impl<T: Clone> Stack<T> {

/// Takes a snapshot of the current `Stack`.
pub fn snapshot(&mut self) {
self.snapshots.push(self.ops.len());
self.lengths.push((self.cache.len(), self.cache.len()))
}

/// The parsing after the last snapshot was successful so clearing it.
pub fn clear_snapshot(&mut self) {
self.snapshots.pop();
if let Some((len, unpopped)) = self.lengths.pop() {
// Popped elements from previous state are no longer needed.
self.popped.truncate(self.popped.len() - (len - unpopped));
}
}

/// Rewinds the `Stack` to the most recent `snapshot()`. If no `snapshot()` has been taken, this
/// function return the stack to its initial state.
pub fn restore(&mut self) {
match self.snapshots.pop() {
Some(ops_index) => {
self.rewind_to(ops_index);
self.ops.truncate(ops_index);
match self.lengths.pop() {
Some((len_stack, remained)) => {
if remained < self.cache.len() {
// Remove those elements that are pushed after the snapshot.
self.cache.truncate(remained);
}
if len_stack > remained {
let rewind_count = len_stack - remained;
let new_len = self.popped.len() - rewind_count;
let recovered_elements = self.popped.drain(new_len..);
self.cache.extend(recovered_elements.rev());
debug_assert_eq!(self.popped.len(), new_len);
}
}
None => {
self.cache.clear();
self.ops.clear();
}
}
}

// Rewind the stack to a particular index
fn rewind_to(&mut self, index: usize) {
let ops_to_rewind = &self.ops[index..];
for op in ops_to_rewind.iter().rev() {
match *op {
StackOp::Push(_) => {
self.cache.pop();
}
StackOp::Pop(ref elem) => {
self.cache.push(elem.clone());
}
// As `self.popped` and `self.lengths` should already be empty,
// there is no need to clear it.
debug_assert!(self.popped.is_empty());
debug_assert!(self.lengths.is_empty());
}
}
}
Expand All @@ -110,12 +142,6 @@ impl<T: Clone> Index<Range<usize>> for Stack<T> {
}
}

#[derive(Debug)]
enum StackOp<T> {
Push(T),
Pop(T),
}

#[cfg(test)]
mod test {
use super::Stack;
Expand Down Expand Up @@ -146,6 +172,79 @@ mod test {

assert_eq!(stack[0..stack.len()], [0]);
}
#[test]
fn restore_without_snapshot() {
let mut stack = Stack::new();

stack.push(0);
stack.restore();

assert_eq!(stack[0..stack.len()], [0; 0]);
}

#[test]
fn snapshot_pop_restore() {
let mut stack = Stack::new();

stack.push(0);
stack.snapshot();
stack.pop();
stack.restore();

assert_eq!(stack[0..stack.len()], [0]);
}

#[test]
fn snapshot_pop_push_restore() {
let mut stack = Stack::new();

stack.push(0);
stack.snapshot();
stack.pop();
stack.push(1);
stack.restore();

assert_eq!(stack[0..stack.len()], [0]);
}

#[test]
fn snapshot_push_pop_restore() {
let mut stack = Stack::new();

stack.push(0);
stack.snapshot();
stack.push(1);
stack.push(2);
stack.pop();
stack.restore();

assert_eq!(stack[0..stack.len()], [0]);
}

#[test]
fn snapshot_push_clear() {
let mut stack = Stack::new();

stack.push(0);
stack.snapshot();
stack.push(1);
stack.clear_snapshot();

assert_eq!(stack[0..stack.len()], [0, 1]);
}

#[test]
fn snapshot_pop_clear() {
let mut stack = Stack::new();

stack.push(0);
stack.push(1);
stack.snapshot();
stack.pop();
stack.clear_snapshot();

assert_eq!(stack[0..stack.len()], [0]);
}

#[test]
fn stack_ops() {
Expand Down