Skip to content

Commit

Permalink
Rollup merge of #81307 - estebank:invalid-byte-str-span, r=petrochenkov
Browse files Browse the repository at this point in the history
Handle `Span`s for byte and raw strings and add more detail

CC #81208.
  • Loading branch information
m-ou-se committed Feb 5, 2021
2 parents 676ff77 + 3b5d018 commit 8d49ca1
Show file tree
Hide file tree
Showing 35 changed files with 278 additions and 183 deletions.
30 changes: 20 additions & 10 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ mod tokentrees;
mod unescape_error_reporting;
mod unicode_chars;

use unescape_error_reporting::{emit_unescape_error, push_escaped_char};
use unescape_error_reporting::{emit_unescape_error, escaped_char};

#[derive(Clone, Debug)]
pub struct UnmatchedBrace {
Expand Down Expand Up @@ -122,11 +122,9 @@ impl<'a> StringReader<'a> {
m: &str,
c: char,
) -> DiagnosticBuilder<'a> {
let mut m = m.to_string();
m.push_str(": ");
push_escaped_char(&mut m, c);

self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
self.sess
.span_diagnostic
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
}

/// Turns simple `rustc_lexer::TokenKind` enum into a rich
Expand Down Expand Up @@ -421,7 +419,7 @@ impl<'a> StringReader<'a> {
let content_start = start + BytePos(prefix_len);
let content_end = suffix_start - BytePos(postfix_len);
let id = self.symbol_from_to(content_start, content_end);
self.validate_literal_escape(mode, content_start, content_end);
self.validate_literal_escape(mode, content_start, content_end, prefix_len, postfix_len);
(lit_kind, id)
}

Expand Down Expand Up @@ -525,17 +523,29 @@ impl<'a> StringReader<'a> {
.raise();
}

fn validate_literal_escape(&self, mode: Mode, content_start: BytePos, content_end: BytePos) {
fn validate_literal_escape(
&self,
mode: Mode,
content_start: BytePos,
content_end: BytePos,
prefix_len: u32,
postfix_len: u32,
) {
let lit_content = self.str_from_to(content_start, content_end);
unescape::unescape_literal(lit_content, mode, &mut |range, result| {
// Here we only check for errors. The actual unescaping is done later.
if let Err(err) = result {
let span_with_quotes =
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1));
let span_with_quotes = self
.mk_sp(content_start - BytePos(prefix_len), content_end + BytePos(postfix_len));
let (start, end) = (range.start as u32, range.end as u32);
let lo = content_start + BytePos(start);
let hi = lo + BytePos(end - start);
let span = self.mk_sp(lo, hi);
emit_unescape_error(
&self.sess.span_diagnostic,
lit_content,
span_with_quotes,
span,
mode,
range,
err,
Expand Down
186 changes: 116 additions & 70 deletions compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ pub(crate) fn emit_unescape_error(
lit: &str,
// full span of the literal, including quotes
span_with_quotes: Span,
// interior span of the literal, without quotes
span: Span,
mode: Mode,
// range of the error inside `lit`
range: Range<usize>,
Expand All @@ -26,13 +28,6 @@ pub(crate) fn emit_unescape_error(
range,
error
);
let span = {
let Range { start, end } = range;
let (start, end) = (start as u32, end as u32);
let lo = span_with_quotes.lo() + BytePos(start + 1);
let hi = lo + BytePos(end - start);
span_with_quotes.with_lo(lo).with_hi(hi)
};
let last_char = || {
let c = lit[range.clone()].chars().rev().next().unwrap();
let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
Expand All @@ -42,20 +37,22 @@ pub(crate) fn emit_unescape_error(
EscapeError::LoneSurrogateUnicodeEscape => {
handler
.struct_span_err(span, "invalid unicode character escape")
.span_label(span, "invalid escape")
.help("unicode escape must not be a surrogate")
.emit();
}
EscapeError::OutOfRangeUnicodeEscape => {
handler
.struct_span_err(span, "invalid unicode character escape")
.span_label(span, "invalid escape")
.help("unicode escape must be at most 10FFFF")
.emit();
}
EscapeError::MoreThanOneChar => {
let msg = if mode.is_bytes() {
"if you meant to write a byte string literal, use double quotes"
let (prefix, msg) = if mode.is_bytes() {
("b", "if you meant to write a byte string literal, use double quotes")
} else {
"if you meant to write a `str` literal, use double quotes"
("", "if you meant to write a `str` literal, use double quotes")
};

handler
Expand All @@ -66,31 +63,44 @@ pub(crate) fn emit_unescape_error(
.span_suggestion(
span_with_quotes,
msg,
format!("\"{}\"", lit),
format!("{}\"{}\"", prefix, lit),
Applicability::MachineApplicable,
)
.emit();
}
EscapeError::EscapeOnlyChar => {
let (c, _span) = last_char();
let (c, char_span) = last_char();

let mut msg = if mode.is_bytes() {
"byte constant must be escaped: "
let msg = if mode.is_bytes() {
"byte constant must be escaped"
} else {
"character constant must be escaped: "
}
.to_string();
push_escaped_char(&mut msg, c);

handler.span_err(span, msg.as_str())
"character constant must be escaped"
};
handler
.struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
.span_suggestion(
char_span,
"escape the character",
c.escape_default().to_string(),
Applicability::MachineApplicable,
)
.emit()
}
EscapeError::BareCarriageReturn => {
let msg = if mode.in_double_quotes() {
"bare CR not allowed in string, use \\r instead"
"bare CR not allowed in string, use `\\r` instead"
} else {
"character constant must be escaped: \\r"
"character constant must be escaped: `\\r`"
};
handler.span_err(span, msg);
handler
.struct_span_err(span, msg)
.span_suggestion(
span,
"escape the character",
"\\r".to_string(),
Applicability::MachineApplicable,
)
.emit();
}
EscapeError::BareCarriageReturnInRawString => {
assert!(mode.in_double_quotes());
Expand All @@ -102,21 +112,22 @@ pub(crate) fn emit_unescape_error(

let label =
if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" };
let mut msg = label.to_string();
msg.push_str(": ");
push_escaped_char(&mut msg, c);

let mut diag = handler.struct_span_err(span, msg.as_str());
let ec = escaped_char(c);
let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
diag.span_label(span, label);
if c == '{' || c == '}' && !mode.is_bytes() {
diag.help(
"if used in a formatting string, \
curly braces are escaped with `{{` and `}}`",
"if used in a formatting string, curly braces are escaped with `{{` and `}}`",
);
} else if c == '\r' {
diag.help(
"this is an isolated carriage return; \
consider checking your editor and version control settings",
"this is an isolated carriage return; consider checking your editor and \
version control settings",
);
} else {
diag.help(
"for more information, visit \
<https://static.rust-lang.org/doc/master/reference.html#literals>",
);
}
diag.emit();
Expand All @@ -127,45 +138,70 @@ pub(crate) fn emit_unescape_error(
EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
let (c, span) = last_char();

let mut msg = if error == EscapeError::InvalidCharInHexEscape {
"invalid character in numeric character escape: "
let msg = if error == EscapeError::InvalidCharInHexEscape {
"invalid character in numeric character escape"
} else {
"invalid character in unicode escape: "
}
.to_string();
push_escaped_char(&mut msg, c);
"invalid character in unicode escape"
};
let c = escaped_char(c);

handler.span_err(span, msg.as_str())
handler
.struct_span_err(span, &format!("{}: `{}`", msg, c))
.span_label(span, msg)
.emit();
}
EscapeError::NonAsciiCharInByte => {
assert!(mode.is_bytes());
let (_c, span) = last_char();
handler.span_err(
span,
"byte constant must be ASCII. \
Use a \\xHH escape for a non-ASCII byte",
)
let (c, span) = last_char();
handler
.struct_span_err(span, "non-ASCII character in byte constant")
.span_label(span, "byte constant must be ASCII")
.span_suggestion(
span,
"use a \\xHH escape for a non-ASCII byte",
format!("\\x{:X}", c as u32),
Applicability::MachineApplicable,
)
.emit();
}
EscapeError::NonAsciiCharInByteString => {
assert!(mode.is_bytes());
let (_c, span) = last_char();
handler.span_err(span, "raw byte string must be ASCII")
handler
.struct_span_err(span, "raw byte string must be ASCII")
.span_label(span, "must be ASCII")
.emit();
}
EscapeError::OutOfRangeHexEscape => {
handler
.struct_span_err(span, "out of range hex escape")
.span_label(span, "must be a character in the range [\\x00-\\x7f]")
.emit();
}
EscapeError::OutOfRangeHexEscape => handler.span_err(
span,
"this form of character escape may only be used \
with characters in the range [\\x00-\\x7f]",
),
EscapeError::LeadingUnderscoreUnicodeEscape => {
let (_c, span) = last_char();
handler.span_err(span, "invalid start of unicode escape")
let (c, span) = last_char();
let msg = "invalid start of unicode escape";
handler
.struct_span_err(span, &format!("{}: `{}`", msg, c))
.span_label(span, msg)
.emit();
}
EscapeError::OverlongUnicodeEscape => {
handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)")
}
EscapeError::UnclosedUnicodeEscape => {
handler.span_err(span, "unterminated unicode escape (needed a `}`)")
handler
.struct_span_err(span, "overlong unicode escape")
.span_label(span, "must have at most 6 hex digits")
.emit();
}
EscapeError::UnclosedUnicodeEscape => handler
.struct_span_err(span, "unterminated unicode escape")
.span_label(span, "missing a closing `}`")
.span_suggestion_verbose(
span.shrink_to_hi(),
"terminate the unicode escape",
"}".to_string(),
Applicability::MaybeIncorrect,
)
.emit(),
EscapeError::NoBraceInUnicodeEscape => {
let msg = "incorrect unicode escape sequence";
let mut diag = handler.struct_span_err(span, msg);
Expand Down Expand Up @@ -195,28 +231,38 @@ pub(crate) fn emit_unescape_error(

diag.emit();
}
EscapeError::UnicodeEscapeInByte => handler.span_err(
span,
"unicode escape sequences cannot be used \
as a byte or in a byte string",
),
EscapeError::UnicodeEscapeInByte => {
let msg = "unicode escape in byte string";
handler
.struct_span_err(span, msg)
.span_label(span, msg)
.help("unicode escape sequences cannot be used as a byte or in a byte string")
.emit();
}
EscapeError::EmptyUnicodeEscape => {
handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)")
handler
.struct_span_err(span, "empty unicode escape")
.span_label(span, "this escape must have at least 1 hex digit")
.emit();
}
EscapeError::ZeroChars => {
let msg = "empty character literal";
handler.struct_span_err(span, msg).span_label(span, msg).emit()
}
EscapeError::LoneSlash => {
let msg = "invalid trailing slash in literal";
handler.struct_span_err(span, msg).span_label(span, msg).emit();
}
EscapeError::ZeroChars => handler.span_err(span, "empty character literal"),
EscapeError::LoneSlash => handler.span_err(span, "invalid trailing slash in literal"),
}
}

/// Pushes a character to a message string for error reporting
pub(crate) fn push_escaped_char(msg: &mut String, c: char) {
pub(crate) fn escaped_char(c: char) -> String {
match c {
'\u{20}'..='\u{7e}' => {
// Don't escape \, ' or " for user-facing messages
msg.push(c);
}
_ => {
msg.extend(c.escape_default());
c.to_string()
}
_ => c.escape_default().to_string(),
}
}
2 changes: 1 addition & 1 deletion src/test/ui/attributes/key-value-non-ascii.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#![feature(rustc_attrs)]

#[rustc_dummy = b"ffi.rs"] //~ ERROR byte constant must be ASCII
#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte constant
fn main() {}
5 changes: 4 additions & 1 deletion src/test/ui/attributes/key-value-non-ascii.stderr
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
error: byte constant must be ASCII. Use a \xHH escape for a non-ASCII byte
error: non-ASCII character in byte constant
--> $DIR/key-value-non-ascii.rs:3:19
|
LL | #[rustc_dummy = b"ffi.rs"]
| ^
| |
| byte constant must be ASCII
| help: use a \xHH escape for a non-ASCII byte: `\xFB03`

error: aborting due to previous error

6 changes: 3 additions & 3 deletions src/test/ui/parser/ascii-only-character-escape.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
fn main() {
let x = "\x80"; //~ ERROR may only be used
let y = "\xff"; //~ ERROR may only be used
let z = "\xe2"; //~ ERROR may only be used
let x = "\x80"; //~ ERROR out of range hex escape
let y = "\xff"; //~ ERROR out of range hex escape
let z = "\xe2"; //~ ERROR out of range hex escape
let a = b"\x00e2"; // ok because byte literal
}
Loading

0 comments on commit 8d49ca1

Please sign in to comment.