Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

refactor(rome_js_parser): Streamline parser events #2327

Merged
merged 1 commit into from
Mar 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 17 additions & 24 deletions crates/rome_js_parser/src/event.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
//! Events emitted by the Parser which are then constructed into a syntax tree

use std::mem;
use std::num::NonZeroU32;

use crate::lexer::TextSize;
use crate::{ParseDiagnostic, Parser, TreeSink};
use rome_js_syntax::JsSyntaxKind::{self, *};
use rome_rowan::TextRange;

use crate::parser::rewrite_parser::{RewriteParser, RewriteToken};
use crate::parser::Checkpoint;
use crate::{ParseDiagnostic, Parser, TreeSink};
use rome_js_syntax::JsSyntaxKind::{self, *};

/// Events emitted by the Parser, these events are later
/// made into a syntax tree with `process` into TreeSink.
Expand All @@ -23,29 +22,25 @@ pub enum Event {
/// become the children of the respective node.
Start {
kind: JsSyntaxKind,
start: TextSize,
forward_parent: Option<u32>,
forward_parent: Option<NonZeroU32>,
},

/// Complete the previous `Start` event
Finish { end: TextSize },
Finish,

/// Produce a single leaf-element.
/// `n_raw_tokens` is used to glue complex contextual tokens.
/// For example, lexer tokenizes `>>` as `>`, `>`, and
/// `n_raw_tokens = 2` is used to produced a single `>>`.
Token {
kind: JsSyntaxKind,
range: TextRange,
/// The end offset of this token.
end: TextSize,
},
}

impl Event {
pub fn tombstone(start: TextSize) -> Self {
pub fn tombstone() -> Self {
Event::Start {
kind: TOMBSTONE,
forward_parent: None,
start,
}
}
}
Expand All @@ -57,7 +52,7 @@ pub fn process(sink: &mut impl TreeSink, mut events: Vec<Event>, errors: Vec<Par
let mut forward_parents = Vec::new();

for i in 0..events.len() {
match mem::replace(&mut events[i], Event::tombstone(TextSize::default())) {
match &mut events[i] {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to replace the events with tombstone because the iterator is only moving forward from here

Event::Start {
kind: TOMBSTONE, ..
} => (),
Expand All @@ -72,14 +67,13 @@ pub fn process(sink: &mut impl TreeSink, mut events: Vec<Event>, errors: Vec<Par
// while with the magic forward_parent, it writes: `C <- B <- A`.

// append `A` into parents.
forward_parents.push(kind);
forward_parents.push(*kind);
let mut idx = i;
let mut fp = forward_parent;
let mut fp = *forward_parent;
while let Some(fwd) = fp {
idx += fwd as usize;
idx += u32::from(fwd) as usize;
// append `A`'s forward_parent `B`
fp = match mem::replace(&mut events[idx], Event::tombstone(TextSize::default()))
{
fp = match mem::replace(&mut events[idx], Event::tombstone()) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replacing is necessary here because we don't want to visit the start node of a forwarded parent again.

Event::Start {
kind,
forward_parent,
Expand All @@ -100,8 +94,8 @@ pub fn process(sink: &mut impl TreeSink, mut events: Vec<Event>, errors: Vec<Par
}
}
Event::Finish { .. } => sink.finish_node(),
Event::Token { kind, range } => {
sink.token(kind, range.len());
Event::Token { kind, end } => {
sink.token(*kind, *end);
}
}
}
Expand All @@ -113,9 +107,9 @@ struct RewriteParseEventsTreeSink<'r, 'p, T> {
}

impl<'r, 'p, T: RewriteParseEvents> TreeSink for RewriteParseEventsTreeSink<'r, 'p, T> {
fn token(&mut self, kind: JsSyntaxKind, length: TextSize) {
fn token(&mut self, kind: JsSyntaxKind, end: TextSize) {
self.reparse
.token(RewriteToken::new(kind, length), &mut self.parser);
.token(RewriteToken::new(kind, end), &mut self.parser);
}

fn start_node(&mut self, kind: JsSyntaxKind) {
Expand Down Expand Up @@ -156,7 +150,6 @@ pub(crate) fn rewrite_events<T: RewriteParseEvents>(
// The current parsed grammar is a super-set of the grammar that gets re-parsed. Thus, any
// error that applied to the old grammar also applies to the sub-grammar.
let events: Vec<_> = p.events.split_off(checkpoint.event_pos + 1);
p.last_token_event_pos = checkpoint.last_token_pos;

let mut sink = RewriteParseEventsTreeSink {
parser: RewriteParser::new(p, checkpoint.token_source),
Expand Down
2 changes: 1 addition & 1 deletion crates/rome_js_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ use std::path::Path;
/// An abstraction for syntax tree implementations
pub trait TreeSink {
/// Adds new token to the current branch.
fn token(&mut self, kind: JsSyntaxKind, length: TextSize);
fn token(&mut self, kind: JsSyntaxKind, end: TextSize);

/// Start new branch and make it current.
fn start_node(&mut self, kind: JsSyntaxKind);
Expand Down
10 changes: 5 additions & 5 deletions crates/rome_js_parser/src/lossless_tree_sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ pub struct LosslessTreeSink<'a> {
}

impl<'a> TreeSink for LosslessTreeSink<'a> {
fn token(&mut self, kind: JsSyntaxKind, length: TextSize) {
self.do_token(kind, length);
fn token(&mut self, kind: JsSyntaxKind, end: TextSize) {
self.do_token(kind, end);
}

fn start_node(&mut self, kind: JsSyntaxKind) {
Expand All @@ -34,7 +34,7 @@ impl<'a> TreeSink for LosslessTreeSink<'a> {
self.parents_count -= 1;

if self.parents_count == 0 && self.needs_eof {
self.do_token(JsSyntaxKind::EOF, TextSize::default());
self.do_token(JsSyntaxKind::EOF, TextSize::from(self.text.len() as u32));
}

self.inner.finish_node();
Expand Down Expand Up @@ -69,7 +69,7 @@ impl<'a> LosslessTreeSink<'a> {
}

#[inline]
fn do_token(&mut self, kind: JsSyntaxKind, length: TextSize) {
fn do_token(&mut self, kind: JsSyntaxKind, token_end: TextSize) {
if kind == JsSyntaxKind::EOF {
self.needs_eof = false;
}
Expand All @@ -80,7 +80,7 @@ impl<'a> LosslessTreeSink<'a> {
self.eat_trivia(false);
let trailing_start = self.trivia_pieces.len();

self.text_pos += length;
self.text_pos = token_end;

// Everything until the next linebreak (but not including it)
// will be the trailing trivia...
Expand Down
102 changes: 38 additions & 64 deletions crates/rome_js_parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use rome_js_syntax::{
JsSyntaxKind::{self},
TextRange,
};
use std::num::NonZeroU32;

pub(crate) use parse_error::*;
pub(crate) use parse_lists::{ParseNodeList, ParseSeparatedList};
Expand Down Expand Up @@ -73,8 +74,6 @@ pub(crate) struct Parser<'s> {
pub(super) state: ParserState,
pub source_type: SourceType,
pub diagnostics: Vec<ParseDiagnostic>,
// A `u32` is sufficient because the parser only supports files up to `u32` bytes.
pub(super) last_token_event_pos: Option<u32>,
// If the parser should skip tokens as trivia
skipping: bool,
}
Expand All @@ -89,7 +88,6 @@ impl<'s> Parser<'s> {
events: vec![],
state: ParserState::new(&source_type),
tokens: token_source,
last_token_event_pos: None,
source_type,
diagnostics: vec![],
skipping: false,
Expand Down Expand Up @@ -135,20 +133,18 @@ impl<'s> Parser<'s> {

/// Returns the kind of the last bumped token.
pub fn last(&self) -> Option<JsSyntaxKind> {
self.last_token_event_pos
.map(|pos| match self.events[pos as usize] {
Event::Token { kind, .. } => kind,
_ => unreachable!(),
})
self.events.iter().rev().find_map(|event| match event {
Event::Token { kind, .. } => Some(*kind),
_ => None,
})
}

/// Returns the range of the last bumped token.
pub fn last_range(&self) -> Option<TextRange> {
self.last_token_event_pos
.map(|pos| match self.events[pos as usize] {
Event::Token { range, .. } => range,
_ => unreachable!(),
})
/// Returns the end offset of the last bumped token.
pub fn last_end(&self) -> Option<TextSize> {
self.events.iter().rev().find_map(|event| match event {
Event::Token { end, .. } => Some(*end),
_ => None,
})
}

/// Consume the next token if `kind` matches.
Expand All @@ -169,7 +165,7 @@ impl<'s> Parser<'s> {
pub fn start(&mut self) -> Marker {
let pos = self.events.len() as u32;
let start = self.tokens.position();
self.push_event(Event::tombstone(start));
self.push_event(Event::tombstone());
Marker::new(pos, start)
}

Expand Down Expand Up @@ -271,13 +267,12 @@ impl<'s> Parser<'s> {
} else {
let range = self.cur_range();
self.tokens.bump(context);
self.push_token(kind, range);
self.push_token(kind, range.end());
}
}

fn push_token(&mut self, kind: JsSyntaxKind, range: TextRange) {
self.last_token_event_pos = Some(self.events.len() as u32);
self.push_event(Event::Token { kind, range });
fn push_token(&mut self, kind: JsSyntaxKind, end: TextSize) {
self.push_event(Event::Token { kind, end });
}

fn push_event(&mut self, event: Event) {
Expand Down Expand Up @@ -312,10 +307,8 @@ impl<'s> Parser<'s> {
event_pos,
errors_pos,
state,
last_token_pos,
} = checkpoint;
self.tokens.rewind(token_source);
self.last_token_event_pos = last_token_pos;
self.drain_events(self.cur_event_pos() - event_pos);
self.diagnostics.truncate(errors_pos as usize);
self.state.restore(state)
Expand All @@ -326,7 +319,6 @@ impl<'s> Parser<'s> {
pub fn checkpoint(&self) -> Checkpoint {
Checkpoint {
token_source: self.tokens.checkpoint(),
last_token_pos: self.last_token_event_pos,
event_pos: self.cur_event_pos(),
errors_pos: self.diagnostics.len() as u32,
state: self.state.checkpoint(),
Expand Down Expand Up @@ -432,21 +424,7 @@ impl Marker {
/// Finishes the syntax tree node and assigns `kind` to it,
/// and mark the create a `CompletedMarker` for possible future
/// operation like `.precede()` to deal with forward_parent.
pub fn complete(self, p: &mut Parser, kind: JsSyntaxKind) -> CompletedMarker {
let end_pos = TextSize::max(
p.last_range().map(|t| t.end()).unwrap_or(self.start),
self.start,
);

self.complete_at(p, kind, end_pos)
}

fn complete_at(
mut self,
p: &mut Parser,
kind: JsSyntaxKind,
end_pos: TextSize,
) -> CompletedMarker {
pub fn complete(mut self, p: &mut Parser, kind: JsSyntaxKind) -> CompletedMarker {
self.bomb.defuse();
let idx = self.pos as usize;
match p.events[idx] {
Expand All @@ -458,11 +436,9 @@ impl Marker {
_ => unreachable!(),
}
let finish_pos = p.events.len() as u32;
p.push_event(Event::Finish);

assert!(end_pos >= self.start);
p.push_event(Event::Finish { end: end_pos });

let new = CompletedMarker::new(self.pos, finish_pos, kind);
let new = CompletedMarker::new(self.pos, finish_pos, self.start, kind);
new.old_start(self.old_start)
}

Expand Down Expand Up @@ -503,6 +479,7 @@ impl Marker {
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct CompletedMarker {
start_pos: u32,
offset: TextSize,
// Hack for parsing completed markers which have been preceded
// This should be redone completely in the future
old_start: u32,
Expand All @@ -511,9 +488,10 @@ pub(crate) struct CompletedMarker {
}

impl CompletedMarker {
pub fn new(start_pos: u32, finish_pos: u32, kind: JsSyntaxKind) -> Self {
pub fn new(start_pos: u32, finish_pos: u32, offset: TextSize, kind: JsSyntaxKind) -> Self {
CompletedMarker {
start_pos,
offset,
old_start: start_pos,
finish_pos,
kind,
Expand Down Expand Up @@ -549,15 +527,16 @@ impl CompletedMarker {

/// Get the range of the marker
pub fn range(&self, p: &Parser) -> TextRange {
let start = match p.events[self.old_start as usize] {
Event::Start { start, .. } => start,
_ => unreachable!(),
};
let end = match p.events[self.finish_pos as usize] {
Event::Finish { end } => end,
_ => unreachable!(),
};
TextRange::new(start, end)
let end = p.events[self.old_start as usize..self.finish_pos as usize]
.iter()
.rev()
.find_map(|event| match event {
Event::Token { end, .. } => Some(*end),
_ => None,
})
.unwrap_or(self.offset);

TextRange::new(self.offset, end)
}

/// Get the underlying text of a marker
Expand All @@ -583,40 +562,36 @@ impl CompletedMarker {
match p.events[idx] {
Event::Start {
ref mut forward_parent,
start,
..
} => {
*forward_parent = Some(new_pos.pos - self.start_pos);
new_pos.start = start;
// Safety: The new marker is always inserted after the start marker of this node, thus
// subtracting the two positions can never be 0.
*forward_parent = Some(NonZeroU32::try_from(new_pos.pos - self.start_pos).unwrap());
}
_ => unreachable!(),
}
new_pos.child_idx = Some(self.start_pos as usize);
new_pos.start = self.offset;
new_pos.old_start(self.old_start as u32)
}

/// Undo this completion and turns into a `Marker`
pub fn undo_completion(self, p: &mut Parser) -> Marker {
let start_idx = self.start_pos as usize;
let finish_idx = self.finish_pos as usize;
let start_pos;

match p.events[start_idx] {
Event::Start {
ref mut kind,
forward_parent: None,
start,
} => {
start_pos = start;
*kind = JsSyntaxKind::TOMBSTONE
}
} => *kind = JsSyntaxKind::TOMBSTONE,
_ => unreachable!(),
}
match p.events[finish_idx] {
ref mut slot @ Event::Finish { .. } => *slot = Event::tombstone(start_pos),
ref mut slot @ Event::Finish { .. } => *slot = Event::tombstone(),
_ => unreachable!(),
}
Marker::new(self.start_pos, start_pos)
Marker::new(self.start_pos, self.offset)
}

pub fn kind(&self) -> JsSyntaxKind {
Expand All @@ -632,7 +607,6 @@ pub struct Checkpoint {
/// Safety: The parser only supports files <= 4Gb. Storing a `u32` is sufficient to store one error
/// for each single character in the file, which should be sufficient for any realistic file.
errors_pos: u32,
pub(super) last_token_pos: Option<u32>,
state: ParserStateCheckpoint,
pub(super) token_source: TokenSourceCheckpoint,
}
Expand Down
Loading