Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

Commit

Permalink
refactor(rome_js_parser): Refactor Parser Events
Browse files Browse the repository at this point in the history
Reduce the size of a single parser event from 16 bytes to 8 bytes each by:

* Using a `NonZeroU32` for the forward parent. The forward parent can never be 0 because it stores the offset from the current event to the start of the "forwarded" parent.
* Store the `start` of a node in the `CompletedMarker` (can't be computed because of forward parents)
* Remove `end` from the `Finish` event and instead retrieve the last token of the node when queried (mainly to produce diagnostics).
* Only store the end offset for each Token instead of the full range. The end offset is sufficient to reconstruct the length in the tree sink.

This reduces the memory consumption during the parse phase significantly:

* `jquery`:
  * Current Bytes: 4.12 MB -> 2.12 MB
  * Max Bytes: 5.82 MB -> 3.82 MB
  * Total Bytes: 8.45 MB -> 4.37 MB
* `tex-chtml-full`
  * Current bytes: 33.11 MB -> 17.11 MB
  * Max bytes: 46 MB -> 30 MB
  * Total bytes: 67.78 -> 34.92 MB

It also reduces the max bytes required during the tree sink phase.

The changes do improve parse times but not as much as I did hope for:

```
group                                    event                                  main
-----                                    -----                                  ----
parser/checker.ts                        1.00     63.6±1.84ms    40.9 MB/sec    1.00     63.8±0.45ms    40.8 MB/sec
parser/compiler.js                       1.00     36.3±0.77ms    28.9 MB/sec    1.03     37.5±0.38ms    27.9 MB/sec
parser/d3.min.js                         1.00     24.3±0.25ms    10.8 MB/sec    1.03     25.1±2.39ms    10.4 MB/sec
parser/dojo.js                           1.00      2.2±0.00ms    30.9 MB/sec    1.03      2.3±0.02ms    30.0 MB/sec
parser/ios.d.ts                          1.00     52.7±0.55ms    35.4 MB/sec    1.19     62.6±0.58ms    29.8 MB/sec
parser/jquery.min.js                     1.00      6.6±0.13ms    12.6 MB/sec    1.05      6.9±0.26ms    12.0 MB/sec
parser/math.js                           1.00     45.4±0.90ms    14.3 MB/sec    1.02     46.3±0.59ms    14.0 MB/sec
parser/parser.ts                         1.00  1525.9±16.73µs    31.7 MB/sec    1.02  1556.6±21.54µs    31.0 MB/sec
parser/pixi.min.js                       1.00     28.9±0.67ms    15.2 MB/sec    1.01     29.3±0.14ms    15.0 MB/sec
parser/react-dom.production.min.js       1.00      9.0±0.01ms    12.7 MB/sec    1.02      9.2±0.05ms    12.5 MB/sec
parser/react.production.min.js           1.00    466.9±1.03µs    13.2 MB/sec    1.03    481.5±3.49µs    12.8 MB/sec
parser/router.ts                         1.00   1186.9±8.65µs    50.4 MB/sec    1.03  1222.2±10.20µs    48.9 MB/sec
parser/tex-chtml-full.js                 1.00     60.5±0.68ms    15.1 MB/sec    1.10     66.4±1.53ms    13.7 MB/sec
parser/three.min.js                      1.00     32.1±0.24ms    18.3 MB/sec    1.03     33.0±0.43ms    17.8 MB/sec
parser/typescript.js                     1.00    279.9±4.87ms    33.9 MB/sec    1.04    292.2±2.93ms    32.5 MB/sec
parser/vue.global.prod.js                1.00     11.4±0.34ms    10.6 MB/sec    1.01     11.5±0.03ms    10.5 MB/sec
```

## Tests

`cargo test`
  • Loading branch information
MichaReiser committed Mar 30, 2022
1 parent 956566d commit 350dfe5
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 117 deletions.
41 changes: 17 additions & 24 deletions crates/rome_js_parser/src/event.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
//! Events emitted by the Parser which are then constructed into a syntax tree

use std::mem;
use std::num::NonZeroU32;

use crate::lexer::TextSize;
use crate::{ParseDiagnostic, Parser, TreeSink};
use rome_js_syntax::JsSyntaxKind::{self, *};
use rome_rowan::TextRange;

use crate::parser::rewrite_parser::{RewriteParser, RewriteToken};
use crate::parser::Checkpoint;
use crate::{ParseDiagnostic, Parser, TreeSink};
use rome_js_syntax::JsSyntaxKind::{self, *};

/// Events emitted by the Parser, these events are later
/// made into a syntax tree with `process` into TreeSink.
Expand All @@ -23,29 +22,25 @@ pub enum Event {
/// become the children of the respective node.
Start {
kind: JsSyntaxKind,
start: TextSize,
forward_parent: Option<u32>,
forward_parent: Option<NonZeroU32>,
},

/// Complete the previous `Start` event
Finish { end: TextSize },
Finish,

/// Produce a single leaf-element.
/// `n_raw_tokens` is used to glue complex contextual tokens.
/// For example, lexer tokenizes `>>` as `>`, `>`, and
/// `n_raw_tokens = 2` is used to produced a single `>>`.
Token {
kind: JsSyntaxKind,
range: TextRange,
/// The end offset of this token.
end: TextSize,
},
}

impl Event {
pub fn tombstone(start: TextSize) -> Self {
pub fn tombstone() -> Self {
Event::Start {
kind: TOMBSTONE,
forward_parent: None,
start,
}
}
}
Expand All @@ -57,7 +52,7 @@ pub fn process(sink: &mut impl TreeSink, mut events: Vec<Event>, errors: Vec<Par
let mut forward_parents = Vec::new();

for i in 0..events.len() {
match mem::replace(&mut events[i], Event::tombstone(TextSize::default())) {
match &mut events[i] {
Event::Start {
kind: TOMBSTONE, ..
} => (),
Expand All @@ -72,14 +67,13 @@ pub fn process(sink: &mut impl TreeSink, mut events: Vec<Event>, errors: Vec<Par
// while with the magic forward_parent, it writes: `C <- B <- A`.

// append `A` into parents.
forward_parents.push(kind);
forward_parents.push(*kind);
let mut idx = i;
let mut fp = forward_parent;
let mut fp = *forward_parent;
while let Some(fwd) = fp {
idx += fwd as usize;
idx += u32::from(fwd) as usize;
// append `A`'s forward_parent `B`
fp = match mem::replace(&mut events[idx], Event::tombstone(TextSize::default()))
{
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
Event::Start {
kind,
forward_parent,
Expand All @@ -100,8 +94,8 @@ pub fn process(sink: &mut impl TreeSink, mut events: Vec<Event>, errors: Vec<Par
}
}
Event::Finish { .. } => sink.finish_node(),
Event::Token { kind, range } => {
sink.token(kind, range.len());
Event::Token { kind, end } => {
sink.token(*kind, *end);
}
}
}
Expand All @@ -113,9 +107,9 @@ struct RewriteParseEventsTreeSink<'r, 'p, T> {
}

impl<'r, 'p, T: RewriteParseEvents> TreeSink for RewriteParseEventsTreeSink<'r, 'p, T> {
fn token(&mut self, kind: JsSyntaxKind, length: TextSize) {
fn token(&mut self, kind: JsSyntaxKind, end: TextSize) {
self.reparse
.token(RewriteToken::new(kind, length), &mut self.parser);
.token(RewriteToken::new(kind, end), &mut self.parser);
}

fn start_node(&mut self, kind: JsSyntaxKind) {
Expand Down Expand Up @@ -156,7 +150,6 @@ pub(crate) fn rewrite_events<T: RewriteParseEvents>(
// The current parsed grammar is a super-set of the grammar that gets re-parsed. Thus, any
// error that applied to the old grammar also applies to the sub-grammar.
let events: Vec<_> = p.events.split_off(checkpoint.event_pos + 1);
p.last_token_event_pos = checkpoint.last_token_pos;

let mut sink = RewriteParseEventsTreeSink {
parser: RewriteParser::new(p, checkpoint.token_source),
Expand Down
2 changes: 1 addition & 1 deletion crates/rome_js_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ use std::path::Path;
/// An abstraction for syntax tree implementations
pub trait TreeSink {
/// Adds new token to the current branch.
fn token(&mut self, kind: JsSyntaxKind, length: TextSize);
fn token(&mut self, kind: JsSyntaxKind, end: TextSize);

/// Start new branch and make it current.
fn start_node(&mut self, kind: JsSyntaxKind);
Expand Down
10 changes: 5 additions & 5 deletions crates/rome_js_parser/src/lossless_tree_sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ pub struct LosslessTreeSink<'a> {
}

impl<'a> TreeSink for LosslessTreeSink<'a> {
fn token(&mut self, kind: JsSyntaxKind, length: TextSize) {
self.do_token(kind, length);
fn token(&mut self, kind: JsSyntaxKind, end: TextSize) {
self.do_token(kind, end);
}

fn start_node(&mut self, kind: JsSyntaxKind) {
Expand All @@ -34,7 +34,7 @@ impl<'a> TreeSink for LosslessTreeSink<'a> {
self.parents_count -= 1;

if self.parents_count == 0 && self.needs_eof {
self.do_token(JsSyntaxKind::EOF, TextSize::default());
self.do_token(JsSyntaxKind::EOF, TextSize::from(self.text.len() as u32));
}

self.inner.finish_node();
Expand Down Expand Up @@ -69,7 +69,7 @@ impl<'a> LosslessTreeSink<'a> {
}

#[inline]
fn do_token(&mut self, kind: JsSyntaxKind, length: TextSize) {
fn do_token(&mut self, kind: JsSyntaxKind, token_end: TextSize) {
if kind == JsSyntaxKind::EOF {
self.needs_eof = false;
}
Expand All @@ -80,7 +80,7 @@ impl<'a> LosslessTreeSink<'a> {
self.eat_trivia(false);
let trailing_start = self.trivia_pieces.len();

self.text_pos += length;
self.text_pos = token_end;

// Everything until the next linebreak (but not including it)
// will be the trailing trivia...
Expand Down
102 changes: 38 additions & 64 deletions crates/rome_js_parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use rome_js_syntax::{
JsSyntaxKind::{self},
TextRange,
};
use std::num::NonZeroU32;

pub(crate) use parse_error::*;
pub(crate) use parse_lists::{ParseNodeList, ParseSeparatedList};
Expand Down Expand Up @@ -73,8 +74,6 @@ pub(crate) struct Parser<'s> {
pub(super) state: ParserState,
pub source_type: SourceType,
pub diagnostics: Vec<ParseDiagnostic>,
// A `u32` is sufficient because the parser only supports files up to `u32` bytes.
pub(super) last_token_event_pos: Option<u32>,
// If the parser should skip tokens as trivia
skipping: bool,
}
Expand All @@ -89,7 +88,6 @@ impl<'s> Parser<'s> {
events: vec![],
state: ParserState::new(&source_type),
tokens: token_source,
last_token_event_pos: None,
source_type,
diagnostics: vec![],
skipping: false,
Expand Down Expand Up @@ -135,20 +133,18 @@ impl<'s> Parser<'s> {

/// Returns the kind of the last bumped token.
pub fn last(&self) -> Option<JsSyntaxKind> {
self.last_token_event_pos
.map(|pos| match self.events[pos as usize] {
Event::Token { kind, .. } => kind,
_ => unreachable!(),
})
self.events.iter().rev().find_map(|event| match event {
Event::Token { kind, .. } => Some(*kind),
_ => None,
})
}

/// Returns the range of the last bumped token.
pub fn last_range(&self) -> Option<TextRange> {
self.last_token_event_pos
.map(|pos| match self.events[pos as usize] {
Event::Token { range, .. } => range,
_ => unreachable!(),
})
/// Returns the end offset of the last bumped token.
pub fn last_end(&self) -> Option<TextSize> {
self.events.iter().rev().find_map(|event| match event {
Event::Token { end, .. } => Some(*end),
_ => None,
})
}

/// Consume the next token if `kind` matches.
Expand All @@ -169,7 +165,7 @@ impl<'s> Parser<'s> {
pub fn start(&mut self) -> Marker {
let pos = self.events.len() as u32;
let start = self.tokens.position();
self.push_event(Event::tombstone(start));
self.push_event(Event::tombstone());
Marker::new(pos, start)
}

Expand Down Expand Up @@ -271,13 +267,12 @@ impl<'s> Parser<'s> {
} else {
let range = self.cur_range();
self.tokens.bump(context);
self.push_token(kind, range);
self.push_token(kind, range.end());
}
}

fn push_token(&mut self, kind: JsSyntaxKind, range: TextRange) {
self.last_token_event_pos = Some(self.events.len() as u32);
self.push_event(Event::Token { kind, range });
fn push_token(&mut self, kind: JsSyntaxKind, end: TextSize) {
self.push_event(Event::Token { kind, end });
}

fn push_event(&mut self, event: Event) {
Expand Down Expand Up @@ -312,10 +307,8 @@ impl<'s> Parser<'s> {
event_pos,
errors_pos,
state,
last_token_pos,
} = checkpoint;
self.tokens.rewind(token_source);
self.last_token_event_pos = last_token_pos;
self.drain_events(self.cur_event_pos() - event_pos);
self.diagnostics.truncate(errors_pos as usize);
self.state.restore(state)
Expand All @@ -326,7 +319,6 @@ impl<'s> Parser<'s> {
pub fn checkpoint(&self) -> Checkpoint {
Checkpoint {
token_source: self.tokens.checkpoint(),
last_token_pos: self.last_token_event_pos,
event_pos: self.cur_event_pos(),
errors_pos: self.diagnostics.len() as u32,
state: self.state.checkpoint(),
Expand Down Expand Up @@ -432,21 +424,7 @@ impl Marker {
/// Finishes the syntax tree node and assigns `kind` to it,
/// and mark the create a `CompletedMarker` for possible future
/// operation like `.precede()` to deal with forward_parent.
pub fn complete(self, p: &mut Parser, kind: JsSyntaxKind) -> CompletedMarker {
let end_pos = TextSize::max(
p.last_range().map(|t| t.end()).unwrap_or(self.start),
self.start,
);

self.complete_at(p, kind, end_pos)
}

fn complete_at(
mut self,
p: &mut Parser,
kind: JsSyntaxKind,
end_pos: TextSize,
) -> CompletedMarker {
pub fn complete(mut self, p: &mut Parser, kind: JsSyntaxKind) -> CompletedMarker {
self.bomb.defuse();
let idx = self.pos as usize;
match p.events[idx] {
Expand All @@ -458,11 +436,9 @@ impl Marker {
_ => unreachable!(),
}
let finish_pos = p.events.len() as u32;
p.push_event(Event::Finish);

assert!(end_pos >= self.start);
p.push_event(Event::Finish { end: end_pos });

let new = CompletedMarker::new(self.pos, finish_pos, kind);
let new = CompletedMarker::new(self.pos, finish_pos, self.start, kind);
new.old_start(self.old_start)
}

Expand Down Expand Up @@ -503,6 +479,7 @@ impl Marker {
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct CompletedMarker {
start_pos: u32,
offset: TextSize,
// Hack for parsing completed markers which have been preceded
// This should be redone completely in the future
old_start: u32,
Expand All @@ -511,9 +488,10 @@ pub(crate) struct CompletedMarker {
}

impl CompletedMarker {
pub fn new(start_pos: u32, finish_pos: u32, kind: JsSyntaxKind) -> Self {
pub fn new(start_pos: u32, finish_pos: u32, offset: TextSize, kind: JsSyntaxKind) -> Self {
CompletedMarker {
start_pos,
offset,
old_start: start_pos,
finish_pos,
kind,
Expand Down Expand Up @@ -549,15 +527,16 @@ impl CompletedMarker {

/// Get the range of the marker
pub fn range(&self, p: &Parser) -> TextRange {
let start = match p.events[self.old_start as usize] {
Event::Start { start, .. } => start,
_ => unreachable!(),
};
let end = match p.events[self.finish_pos as usize] {
Event::Finish { end } => end,
_ => unreachable!(),
};
TextRange::new(start, end)
let end = p.events[self.old_start as usize..self.finish_pos as usize]
.iter()
.rev()
.find_map(|event| match event {
Event::Token { end, .. } => Some(*end),
_ => None,
})
.unwrap_or(self.offset);

TextRange::new(self.offset, end)
}

/// Get the underlying text of a marker
Expand All @@ -583,40 +562,36 @@ impl CompletedMarker {
match p.events[idx] {
Event::Start {
ref mut forward_parent,
start,
..
} => {
*forward_parent = Some(new_pos.pos - self.start_pos);
new_pos.start = start;
// Safety: The new marker is always inserted after the start marker of this node, thus
// subtracting the two positions can never be 0.
*forward_parent = Some(NonZeroU32::try_from(new_pos.pos - self.start_pos).unwrap());
}
_ => unreachable!(),
}
new_pos.child_idx = Some(self.start_pos as usize);
new_pos.start = self.offset;
new_pos.old_start(self.old_start as u32)
}

/// Undo this completion and turns into a `Marker`
pub fn undo_completion(self, p: &mut Parser) -> Marker {
let start_idx = self.start_pos as usize;
let finish_idx = self.finish_pos as usize;
let start_pos;

match p.events[start_idx] {
Event::Start {
ref mut kind,
forward_parent: None,
start,
} => {
start_pos = start;
*kind = JsSyntaxKind::TOMBSTONE
}
} => *kind = JsSyntaxKind::TOMBSTONE,
_ => unreachable!(),
}
match p.events[finish_idx] {
ref mut slot @ Event::Finish { .. } => *slot = Event::tombstone(start_pos),
ref mut slot @ Event::Finish { .. } => *slot = Event::tombstone(),
_ => unreachable!(),
}
Marker::new(self.start_pos, start_pos)
Marker::new(self.start_pos, self.offset)
}

pub fn kind(&self) -> JsSyntaxKind {
Expand All @@ -632,7 +607,6 @@ pub struct Checkpoint {
/// Safety: The parser only supports files <= 4Gb. Storing a `u32` is sufficient to store one error
/// for each single character in the file, which should be sufficient for any realistic file.
errors_pos: u32,
pub(super) last_token_pos: Option<u32>,
state: ParserStateCheckpoint,
pub(super) token_source: TokenSourceCheckpoint,
}
Expand Down
Loading

0 comments on commit 350dfe5

Please sign in to comment.