Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

Commit

Permalink
refactor(rome_js_parser): Refactor Parser Events
Browse files Browse the repository at this point in the history
Reduce the size of a single parser event from 16 bytes to 8 bytes each by:

* Using a `NonZeroU32` for the forward parent. The forward parent can never be 0 because it stores the offset from the current event to the start of the "forwarded" parent.
* Store the `start` of a node in the `CompletedMarker` (can't be computed because of forward parents)
* Remove `end` from the `Finish` event and instead retrieve the last token of the node when queried (mainly to produce diagnostics).
* Only store the end offset for each Token instead of the full range. The end offset is sufficient to reconstruct the length in the tree sink.

This reduces the memory consumption during the parse phase significantly:

* `jquery`:
  * Current Bytes: 4.12 MB -> 2.12 MB
  * Max Bytes: 5.82 MB -> 3.82 MB
  * Total Bytes: 8.45 MB -> 4.37 MB
* `tex-chtml-full`
  * Current bytes: 33.11 MB -> 17.11 MB
  * Max bytes: 46 MB -> 30 MB
  * Total bytes: 67.78 -> 34.92 MB

It also reduces the max bytes required during the tree sink phase.

The changes do improve parse times but not as much as I did hope for:

```
group                                    event                                  main
-----                                    -----                                  ----
parser/checker.ts                        1.00     63.6±1.84ms    40.9 MB/sec    1.00     63.8±0.45ms    40.8 MB/sec
parser/compiler.js                       1.00     36.3±0.77ms    28.9 MB/sec    1.03     37.5±0.38ms    27.9 MB/sec
parser/d3.min.js                         1.00     24.3±0.25ms    10.8 MB/sec    1.03     25.1±2.39ms    10.4 MB/sec
parser/dojo.js                           1.00      2.2±0.00ms    30.9 MB/sec    1.03      2.3±0.02ms    30.0 MB/sec
parser/ios.d.ts                          1.00     52.7±0.55ms    35.4 MB/sec    1.19     62.6±0.58ms    29.8 MB/sec
parser/jquery.min.js                     1.00      6.6±0.13ms    12.6 MB/sec    1.05      6.9±0.26ms    12.0 MB/sec
parser/math.js                           1.00     45.4±0.90ms    14.3 MB/sec    1.02     46.3±0.59ms    14.0 MB/sec
parser/parser.ts                         1.00  1525.9±16.73µs    31.7 MB/sec    1.02  1556.6±21.54µs    31.0 MB/sec
parser/pixi.min.js                       1.00     28.9±0.67ms    15.2 MB/sec    1.01     29.3±0.14ms    15.0 MB/sec
parser/react-dom.production.min.js       1.00      9.0±0.01ms    12.7 MB/sec    1.02      9.2±0.05ms    12.5 MB/sec
parser/react.production.min.js           1.00    466.9±1.03µs    13.2 MB/sec    1.03    481.5±3.49µs    12.8 MB/sec
parser/router.ts                         1.00   1186.9±8.65µs    50.4 MB/sec    1.03  1222.2±10.20µs    48.9 MB/sec
parser/tex-chtml-full.js                 1.00     60.5±0.68ms    15.1 MB/sec    1.10     66.4±1.53ms    13.7 MB/sec
parser/three.min.js                      1.00     32.1±0.24ms    18.3 MB/sec    1.03     33.0±0.43ms    17.8 MB/sec
parser/typescript.js                     1.00    279.9±4.87ms    33.9 MB/sec    1.04    292.2±2.93ms    32.5 MB/sec
parser/vue.global.prod.js                1.00     11.4±0.34ms    10.6 MB/sec    1.01     11.5±0.03ms    10.5 MB/sec
```

## Tests

`cargo test`
  • Loading branch information
MichaReiser committed Mar 30, 2022
1 parent 956566d commit caabf9d
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 117 deletions.
41 changes: 17 additions & 24 deletions crates/rome_js_parser/src/event.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
//! Events emitted by the Parser which are then constructed into a syntax tree

use std::mem;
use std::num::NonZeroU32;

use crate::lexer::TextSize;
use crate::{ParseDiagnostic, Parser, TreeSink};
use rome_js_syntax::JsSyntaxKind::{self, *};
use rome_rowan::TextRange;

use crate::parser::rewrite_parser::{RewriteParser, RewriteToken};
use crate::parser::Checkpoint;
use crate::{ParseDiagnostic, Parser, TreeSink};
use rome_js_syntax::JsSyntaxKind::{self, *};

/// Events emitted by the Parser, these events are later
/// made into a syntax tree with `process` into TreeSink.
Expand All @@ -23,29 +22,25 @@ pub enum Event {
/// become the children of the respective node.
Start {
kind: JsSyntaxKind,
start: TextSize,
forward_parent: Option<u32>,
forward_parent: Option<NonZeroU32>,
},

/// Complete the previous `Start` event
Finish { end: TextSize },
Finish,

/// Produce a single leaf-element.
/// `n_raw_tokens` is used to glue complex contextual tokens.
/// For example, lexer tokenizes `>>` as `>`, `>`, and
/// `n_raw_tokens = 2` is used to produced a single `>>`.
Token {
kind: JsSyntaxKind,
range: TextRange,
/// The end offset of this token.
end: TextSize,
},
}

impl Event {
pub fn tombstone(start: TextSize) -> Self {
pub fn tombstone() -> Self {
Event::Start {
kind: TOMBSTONE,
forward_parent: None,
start,
}
}
}
Expand All @@ -57,7 +52,7 @@ pub fn process(sink: &mut impl TreeSink, mut events: Vec<Event>, errors: Vec<Par
let mut forward_parents = Vec::new();

for i in 0..events.len() {
match mem::replace(&mut events[i], Event::tombstone(TextSize::default())) {
match &mut events[i] {
Event::Start {
kind: TOMBSTONE, ..
} => (),
Expand All @@ -72,14 +67,13 @@ pub fn process(sink: &mut impl TreeSink, mut events: Vec<Event>, errors: Vec<Par
// while with the magic forward_parent, it writes: `C <- B <- A`.

// append `A` into parents.
forward_parents.push(kind);
forward_parents.push(*kind);
let mut idx = i;
let mut fp = forward_parent;
let mut fp = *forward_parent;
while let Some(fwd) = fp {
idx += fwd as usize;
idx += u32::from(fwd) as usize;
// append `A`'s forward_parent `B`
fp = match mem::replace(&mut events[idx], Event::tombstone(TextSize::default()))
{
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
Event::Start {
kind,
forward_parent,
Expand All @@ -100,8 +94,8 @@ pub fn process(sink: &mut impl TreeSink, mut events: Vec<Event>, errors: Vec<Par
}
}
Event::Finish { .. } => sink.finish_node(),
Event::Token { kind, range } => {
sink.token(kind, range.len());
Event::Token { kind, end } => {
sink.token(*kind, *end);
}
}
}
Expand All @@ -113,9 +107,9 @@ struct RewriteParseEventsTreeSink<'r, 'p, T> {
}

impl<'r, 'p, T: RewriteParseEvents> TreeSink for RewriteParseEventsTreeSink<'r, 'p, T> {
fn token(&mut self, kind: JsSyntaxKind, length: TextSize) {
fn token(&mut self, kind: JsSyntaxKind, end: TextSize) {
self.reparse
.token(RewriteToken::new(kind, length), &mut self.parser);
.token(RewriteToken::new(kind, end), &mut self.parser);
}

fn start_node(&mut self, kind: JsSyntaxKind) {
Expand Down Expand Up @@ -156,7 +150,6 @@ pub(crate) fn rewrite_events<T: RewriteParseEvents>(
// The current parsed grammar is a super-set of the grammar that gets re-parsed. Thus, any
// error that applied to the old grammar also applies to the sub-grammar.
let events: Vec<_> = p.events.split_off(checkpoint.event_pos + 1);
p.last_token_event_pos = checkpoint.last_token_pos;

let mut sink = RewriteParseEventsTreeSink {
parser: RewriteParser::new(p, checkpoint.token_source),
Expand Down
2 changes: 1 addition & 1 deletion crates/rome_js_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ use std::path::Path;
/// An abstraction for syntax tree implementations
pub trait TreeSink {
/// Adds new token to the current branch.
fn token(&mut self, kind: JsSyntaxKind, length: TextSize);
fn token(&mut self, kind: JsSyntaxKind, end: TextSize);

/// Start new branch and make it current.
fn start_node(&mut self, kind: JsSyntaxKind);
Expand Down
10 changes: 5 additions & 5 deletions crates/rome_js_parser/src/lossless_tree_sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ pub struct LosslessTreeSink<'a> {
}

impl<'a> TreeSink for LosslessTreeSink<'a> {
fn token(&mut self, kind: JsSyntaxKind, length: TextSize) {
self.do_token(kind, length);
fn token(&mut self, kind: JsSyntaxKind, end: TextSize) {
self.do_token(kind, end);
}

fn start_node(&mut self, kind: JsSyntaxKind) {
Expand All @@ -34,7 +34,7 @@ impl<'a> TreeSink for LosslessTreeSink<'a> {
self.parents_count -= 1;

if self.parents_count == 0 && self.needs_eof {
self.do_token(JsSyntaxKind::EOF, TextSize::default());
self.do_token(JsSyntaxKind::EOF, TextSize::from(self.text.len() as u32));
}

self.inner.finish_node();
Expand Down Expand Up @@ -69,7 +69,7 @@ impl<'a> LosslessTreeSink<'a> {
}

#[inline]
fn do_token(&mut self, kind: JsSyntaxKind, length: TextSize) {
fn do_token(&mut self, kind: JsSyntaxKind, token_end: TextSize) {
if kind == JsSyntaxKind::EOF {
self.needs_eof = false;
}
Expand All @@ -80,7 +80,7 @@ impl<'a> LosslessTreeSink<'a> {
self.eat_trivia(false);
let trailing_start = self.trivia_pieces.len();

self.text_pos += length;
self.text_pos = token_end;

// Everything until the next linebreak (but not including it)
// will be the trailing trivia...
Expand Down
Loading

0 comments on commit caabf9d

Please sign in to comment.