Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor/feat: refactor identifier parsing a bit #109203

Merged
merged 3 commits into from
Mar 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ parse_expected_identifier_found_reserved_keyword = expected identifier, found re
parse_expected_identifier_found_doc_comment = expected identifier, found doc comment
parse_expected_identifier = expected identifier
parse_sugg_escape_to_use_as_identifier = escape `{$ident_name}` to use it as an identifier
parse_sugg_escape_identifier = escape `{$ident_name}` to use it as an identifier
parse_sugg_remove_comma = remove this comma
Expand Down
11 changes: 7 additions & 4 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -888,12 +888,12 @@ pub(crate) struct InvalidMetaItem {

#[derive(Subdiagnostic)]
#[suggestion(
parse_sugg_escape_to_use_as_identifier,
parse_sugg_escape_identifier,
style = "verbose",
applicability = "maybe-incorrect",
code = "r#"
)]
pub(crate) struct SuggEscapeToUseAsIdentifier {
pub(crate) struct SuggEscapeIdentifier {
#[primary_span]
pub span: Span,
pub ident_name: String,
Expand Down Expand Up @@ -937,7 +937,7 @@ impl ExpectedIdentifierFound {
pub(crate) struct ExpectedIdentifier {
pub span: Span,
pub token: Token,
pub suggest_raw: Option<SuggEscapeToUseAsIdentifier>,
pub suggest_raw: Option<SuggEscapeIdentifier>,
pub suggest_remove_comma: Option<SuggRemoveComma>,
pub help_cannot_start_number: Option<HelpIdentifierStartsWithNumber>,
}
Expand Down Expand Up @@ -986,7 +986,10 @@ impl<'a, G: EmissionGuarantee> IntoDiagnostic<'a, G> for ExpectedIdentifier {

#[derive(Subdiagnostic)]
#[help(parse_invalid_identifier_with_leading_number)]
pub(crate) struct HelpIdentifierStartsWithNumber;
pub(crate) struct HelpIdentifierStartsWithNumber {
#[primary_span]
pub num_span: Span,
}

pub(crate) struct ExpectedSemi {
pub span: Span,
Expand Down
123 changes: 89 additions & 34 deletions compiler/rustc_parse/src/parser/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ use super::{
use crate::errors::{
AmbiguousPlus, AttributeOnParamType, BadQPathStage2, BadTypePlus, BadTypePlusSub,
ComparisonOperatorsCannotBeChained, ComparisonOperatorsCannotBeChainedSugg,
ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentOnParamType,
DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg,
ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentDoesNotDocumentAnything,
DocCommentOnParamType, DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg,
GenericParamsWithoutAngleBrackets, GenericParamsWithoutAngleBracketsSugg,
HelpIdentifierStartsWithNumber, InInTypo, IncorrectAwait, IncorrectSemicolon,
IncorrectUseOfAwait, ParenthesesInForHead, ParenthesesInForHeadSugg,
PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg, SelfParamNotFirst,
StructLiteralBodyWithoutPath, StructLiteralBodyWithoutPathSugg, StructLiteralNeedingParens,
StructLiteralNeedingParensSugg, SuggEscapeToUseAsIdentifier, SuggRemoveComma,
StructLiteralNeedingParensSugg, SuggEscapeIdentifier, SuggRemoveComma,
UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration,
UnexpectedConstParamDeclarationSugg, UnmatchedAngleBrackets, UseEqInstead,
};
Expand All @@ -38,7 +38,7 @@ use rustc_errors::{
use rustc_session::errors::ExprParenthesesNeeded;
use rustc_span::source_map::Spanned;
use rustc_span::symbol::{kw, sym, Ident};
use rustc_span::{Span, SpanSnippetError, DUMMY_SP};
use rustc_span::{Span, SpanSnippetError, Symbol, DUMMY_SP};
use std::mem::take;
use std::ops::{Deref, DerefMut};
use thin_vec::{thin_vec, ThinVec};
Expand Down Expand Up @@ -268,7 +268,21 @@ impl<'a> Parser<'a> {
self.sess.source_map().span_to_snippet(span)
}

pub(super) fn expected_ident_found(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
/// Emits an error with suggestions if an identifier was expected but not found.
///
/// Returns a possibly recovered identifier.
pub(super) fn expected_ident_found(
&mut self,
recover: bool,
) -> PResult<'a, (Ident, /* is_raw */ bool)> {
if let TokenKind::DocComment(..) = self.prev_token.kind {
return Err(DocCommentDoesNotDocumentAnything {
span: self.prev_token.span,
missing_comma: None,
}
.into_diagnostic(&self.sess.span_diagnostic));
}

let valid_follow = &[
TokenKind::Eq,
TokenKind::Colon,
Expand All @@ -281,31 +295,51 @@ impl<'a> Parser<'a> {
TokenKind::CloseDelim(Delimiter::Parenthesis),
];

let suggest_raw = match self.token.ident() {
Some((ident, false))
if ident.is_raw_guess()
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind)) =>
{
Some(SuggEscapeToUseAsIdentifier {
span: ident.span.shrink_to_lo(),
// `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`,
// which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#`
ident_name: ident.name.to_string(),
})
}
_ => None,
};
let mut recovered_ident = None;
// we take this here so that the correct original token is retained in
// the diagnostic, regardless of eager recovery.
let bad_token = self.token.clone();

// suggest prepending a keyword in identifier position with `r#`
let suggest_raw = if let Some((ident, false)) = self.token.ident()
&& ident.is_raw_guess()
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind))
{
recovered_ident = Some((ident, true));

// `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`,
// which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#`
let ident_name = ident.name.to_string();

Some(SuggEscapeIdentifier {
span: ident.span.shrink_to_lo(),
ident_name
})
} else { None };

let suggest_remove_comma =
if self.token == token::Comma && self.look_ahead(1, |t| t.is_ident()) {
if recover {
self.bump();
recovered_ident = self.ident_or_err(false).ok();
};

let suggest_remove_comma = (self.token == token::Comma
&& self.look_ahead(1, |t| t.is_ident()))
.then_some(SuggRemoveComma { span: self.token.span });
Some(SuggRemoveComma { span: bad_token.span })
} else {
None
};

let help_cannot_start_number =
self.is_lit_bad_ident().then_some(HelpIdentifierStartsWithNumber);
let help_cannot_start_number = self.is_lit_bad_ident().map(|(len, valid_portion)| {
let (invalid, valid) = self.token.span.split_at(len as u32);

recovered_ident = Some((Ident::new(valid_portion, valid), false));

HelpIdentifierStartsWithNumber { num_span: invalid }
});

let err = ExpectedIdentifier {
span: self.token.span,
token: self.token.clone(),
span: bad_token.span,
token: bad_token,
suggest_raw,
suggest_remove_comma,
help_cannot_start_number,
Expand All @@ -314,6 +348,7 @@ impl<'a> Parser<'a> {

// if the token we have is a `<`
// it *might* be a misplaced generic
// FIXME: could we recover with this?
if self.token == token::Lt {
// all keywords that could have generic applied
let valid_prev_keywords =
Expand Down Expand Up @@ -364,18 +399,38 @@ impl<'a> Parser<'a> {
}
}

err
if let Some(recovered_ident) = recovered_ident && recover {
err.emit();
Ok(recovered_ident)
} else {
Err(err)
}
}

pub(super) fn expected_ident_found_err(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
self.expected_ident_found(false).unwrap_err()
}

/// Checks if the current token is a integer or float literal and looks like
/// it could be a invalid identifier with digits at the start.
pub(super) fn is_lit_bad_ident(&mut self) -> bool {
matches!(self.token.uninterpolate().kind, token::Literal(Lit { kind: token::LitKind::Integer | token::LitKind::Float, .. })
// ensure that the integer literal is followed by a *invalid*
// suffix: this is how we know that it is a identifier with an
// invalid beginning.
if rustc_ast::MetaItemLit::from_token(&self.token).is_none()
)
///
/// Returns the number of characters (bytes) composing the invalid portion
/// of the identifier and the valid portion of the identifier.
pub(super) fn is_lit_bad_ident(&mut self) -> Option<(usize, Symbol)> {
// ensure that the integer literal is followed by a *invalid*
// suffix: this is how we know that it is a identifier with an
// invalid beginning.
if let token::Literal(Lit {
kind: token::LitKind::Integer | token::LitKind::Float,
symbol,
suffix,
}) = self.token.kind
&& rustc_ast::MetaItemLit::from_token(&self.token).is_none()
{
Some((symbol.as_str().len(), suffix.unwrap()))
Comment on lines +426 to +430
Copy link
Member

@compiler-errors compiler-errors Apr 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re: #110014

-             suffix,
+             suffix: Some(suffix),
         }) = self.token.kind
             && rustc_ast::MetaItemLit::from_token(&self.token).is_none()
         {
-            Some((symbol.as_str().len(), suffix.unwrap()))
+            Some((symbol.as_str().len(), suffix))

Copy link
Contributor Author

@Ezrashaw Ezrashaw Apr 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you like me to PR this? Done.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be nice if you could put up the fix, yes :)

} else {
None
}
}

pub(super) fn expected_one_of_not_found(
Expand Down
10 changes: 5 additions & 5 deletions compiler/rustc_parse/src/parser/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1181,7 +1181,7 @@ impl<'a> Parser<'a> {
defaultness: Defaultness,
) -> PResult<'a, ItemInfo> {
let impl_span = self.token.span;
let mut err = self.expected_ident_found();
let mut err = self.expected_ident_found_err();

// Only try to recover if this is implementing a trait for a type
let mut impl_info = match self.parse_item_impl(attrs, defaultness) {
Expand Down Expand Up @@ -1744,7 +1744,7 @@ impl<'a> Parser<'a> {
/// Parses a field identifier. Specialized version of `parse_ident_common`
/// for better diagnostics and suggestions.
fn parse_field_ident(&mut self, adt_ty: &str, lo: Span) -> PResult<'a, Ident> {
let (ident, is_raw) = self.ident_or_err()?;
let (ident, is_raw) = self.ident_or_err(true)?;
if !is_raw && ident.is_reserved() {
let snapshot = self.create_snapshot_for_diagnostic();
let err = if self.check_fn_front_matter(false, Case::Sensitive) {
Expand Down Expand Up @@ -1776,7 +1776,7 @@ impl<'a> Parser<'a> {
Err(err) => {
err.cancel();
self.restore_snapshot(snapshot);
self.expected_ident_found()
self.expected_ident_found_err()
}
}
} else if self.eat_keyword(kw::Struct) {
Expand All @@ -1792,11 +1792,11 @@ impl<'a> Parser<'a> {
Err(err) => {
err.cancel();
self.restore_snapshot(snapshot);
self.expected_ident_found()
self.expected_ident_found_err()
}
}
} else {
let mut err = self.expected_ident_found();
let mut err = self.expected_ident_found_err();
if self.eat_keyword_noexpect(kw::Let)
&& let removal_span = self.prev_token.span.until(self.token.span)
&& let Ok(ident) = self.parse_ident_common(false)
Expand Down
34 changes: 19 additions & 15 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ use thin_vec::ThinVec;
use tracing::debug;

use crate::errors::{
DocCommentDoesNotDocumentAnything, IncorrectVisibilityRestriction, MismatchedClosingDelimiter,
NonStringAbiLiteral,
IncorrectVisibilityRestriction, MismatchedClosingDelimiter, NonStringAbiLiteral,
};

bitflags::bitflags! {
Expand Down Expand Up @@ -552,21 +551,11 @@ impl<'a> Parser<'a> {
self.parse_ident_common(true)
}

fn ident_or_err(&mut self) -> PResult<'a, (Ident, /* is_raw */ bool)> {
self.token.ident().ok_or_else(|| match self.prev_token.kind {
TokenKind::DocComment(..) => DocCommentDoesNotDocumentAnything {
span: self.prev_token.span,
missing_comma: None,
}
.into_diagnostic(&self.sess.span_diagnostic),
_ => self.expected_ident_found(),
})
}

fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, Ident> {
let (ident, is_raw) = self.ident_or_err()?;
let (ident, is_raw) = self.ident_or_err(recover)?;

if !is_raw && ident.is_reserved() {
let mut err = self.expected_ident_found();
let mut err = self.expected_ident_found_err();
if recover {
err.emit();
} else {
Expand All @@ -577,6 +566,21 @@ impl<'a> Parser<'a> {
Ok(ident)
}

fn ident_or_err(&mut self, recover: bool) -> PResult<'a, (Ident, /* is_raw */ bool)> {
let result = self.token.ident().ok_or_else(|| self.expected_ident_found(recover));

let (ident, is_raw) = match result {
Ok(ident) => ident,
Err(err) => match err {
// we recovered!
Ok(ident) => ident,
Err(err) => return Err(err),
},
};

Ok((ident, is_raw))
}

/// Checks if the next token is `tok`, and returns `true` if so.
///
/// This method will automatically add `tok` to `expected_tokens` if `tok` is not
Expand Down
14 changes: 8 additions & 6 deletions compiler/rustc_parse/src/parser/pat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,6 @@ impl<'a> Parser<'a> {
lo = self.token.span;
}

if self.is_lit_bad_ident() {
return Err(self.expected_ident_found());
}

let pat = if self.check(&token::BinOp(token::And)) || self.token.kind == token::AndAnd {
self.parse_pat_deref(expected)?
} else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) {
Expand Down Expand Up @@ -395,7 +391,13 @@ impl<'a> Parser<'a> {
} else {
PatKind::Lit(const_expr)
}
} else if self.can_be_ident_pat() {
// Don't eagerly error on semantically invalid tokens when matching
// declarative macros, as the input to those doesn't have to be
// semantically valid. For attribute/derive proc macros this is not the
// case, so doing the recovery for them is fine.
} else if self.can_be_ident_pat()
|| (self.is_lit_bad_ident().is_some() && self.may_recover())
{
// Parse `ident @ pat`
// This can give false positives and parse nullary enums,
// they are dealt with later in resolve.
Expand Down Expand Up @@ -594,7 +596,7 @@ impl<'a> Parser<'a> {
// Make sure we don't allow e.g. `let mut $p;` where `$p:pat`.
if let token::Interpolated(nt) = &self.token.kind {
if let token::NtPat(_) = **nt {
self.expected_ident_found().emit();
self.expected_ident_found_err().emit();
}
}

Expand Down
12 changes: 12 additions & 0 deletions compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,18 @@ impl Span {
})
}

/// Splits a span into two composite spans around a certain position.
pub fn split_at(self, pos: u32) -> (Span, Span) {
let len = self.hi().0 - self.lo().0;
debug_assert!(pos <= len);

let split_pos = BytePos(self.lo().0 + pos);
(
Span::new(self.lo(), split_pos, self.ctxt(), self.parent()),
Span::new(split_pos, self.hi(), self.ctxt(), self.parent()),
)
}

/// Returns a `Span` that would enclose both `self` and `end`.
///
/// Note that this can also be used to extend the span "backwards":
Expand Down
16 changes: 16 additions & 0 deletions tests/ui/parser/ident-recovery.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
fn ,comma() {
//~^ ERROR expected identifier, found `,`
struct Foo {
x: i32,,
//~^ ERROR expected identifier, found `,`
y: u32,
}
}

fn break() {
//~^ ERROR expected identifier, found keyword `break`
let continue = 5;
//~^ ERROR expected identifier, found keyword `continue`
}

fn main() {}
Loading