Skip to content

Commit

Permalink
Rollup merge of rust-lang#119532 - GKFX:offset-of-parse-expr, r=est31
Browse files Browse the repository at this point in the history
Make offset_of field parsing use metavariable which handles any spacing

As discussed at and around comments rust-lang#106655 (comment) and rust-lang#106655 (comment), the current arguments to offset_of do not accept all the whitespace combinations: `0. 1.1.1` and `0.1.1. 1` are currently treated specially in `tests/ui/offset-of/offset-of-tuple-nested.rs`.

They also do not allow [forwarding individual fields as in](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=444cdf0ec02b99e8fd5fd8d8ecb312ca)
```rust
macro_rules! off {
    ($a:expr) => {
        offset_of!(m::S, 0. $a)
    }
}
```

This PR replaces the macro arguments with `($Container:ty, $($fields:expr)+ $(,)?)` which does allow any arrangement of whitespace that I could come up with and the forwarding of fields example above.

This also allows for array indexing in the future, which I think is the last future extension to the syntax suggested in the offset_of RFC.

Tracking issue for offset_of: rust-lang#106655
`@rustbot` label F-offset_of

`@est31`
  • Loading branch information
matthiaskrgr committed Jan 4, 2024
2 parents 6129278 + f0c0a49 commit 94b03af
Show file tree
Hide file tree
Showing 13 changed files with 293 additions and 263 deletions.
4 changes: 4 additions & 0 deletions compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ parse_ambiguous_range_pattern = the range pattern here has ambiguous interpretat
parse_array_brackets_instead_of_braces = this is a block expression, not an array
.suggestion = to make an array, use square brackets instead of curly braces
parse_array_index_offset_of = array indexing not supported in offset_of
parse_assignment_else_not_allowed = <assignment> ... else {"{"} ... {"}"} is not allowed
parse_assoc_lifetime = associated lifetimes are not supported
Expand Down Expand Up @@ -405,6 +407,8 @@ parse_invalid_logical_operator = `{$incorrect}` is not a logical operator
parse_invalid_meta_item = expected unsuffixed literal or identifier, found `{$token}`
parse_invalid_offset_of = offset_of expects dot-separated field and variant names
parse_invalid_unicode_escape = invalid unicode character escape
.label = invalid escape
.help = unicode escape must {$surrogate ->
Expand Down
8 changes: 8 additions & 0 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2887,3 +2887,11 @@ pub(crate) struct TransposeDynOrImplSugg<'a> {
pub insertion_span: Span,
pub kw: &'a str,
}

#[derive(Diagnostic)]
#[diag(parse_array_index_offset_of)]
pub(crate) struct ArrayIndexInOffsetOf(#[primary_span] pub Span);

#[derive(Diagnostic)]
#[diag(parse_invalid_offset_of)]
pub(crate) struct InvalidOffsetOf(#[primary_span] pub Span);
164 changes: 121 additions & 43 deletions compiler/rustc_parse/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,7 @@ impl<'a> Parser<'a> {
// we should break everything including floats into more basic proc-macro style
// tokens in the lexer (probably preferable).
// See also `TokenKind::break_two_token_op` which does similar splitting of `>>` into `>`.
fn break_up_float(&mut self, float: Symbol) -> DestructuredFloat {
fn break_up_float(&self, float: Symbol, span: Span) -> DestructuredFloat {
#[derive(Debug)]
enum FloatComponent {
IdentLike(String),
Expand Down Expand Up @@ -1053,7 +1053,6 @@ impl<'a> Parser<'a> {
// With proc macros the span can refer to anything, the source may be too short,
// or too long, or non-ASCII. It only makes sense to break our span into components
// if its underlying text is identical to our float literal.
let span = self.token.span;
let can_take_span_apart =
|| self.span_to_snippet(span).as_deref() == Ok(float_str).as_deref();

Expand Down Expand Up @@ -1115,7 +1114,7 @@ impl<'a> Parser<'a> {
float: Symbol,
suffix: Option<Symbol>,
) -> P<Expr> {
match self.break_up_float(float) {
match self.break_up_float(float, self.token.span) {
// 1e2
DestructuredFloat::Single(sym, _sp) => {
self.parse_expr_tuple_field_access(lo, base, sym, suffix, None)
Expand Down Expand Up @@ -1143,40 +1142,105 @@ impl<'a> Parser<'a> {
}
}

fn parse_field_name_maybe_tuple(&mut self) -> PResult<'a, ThinVec<Ident>> {
let token::Literal(token::Lit { kind: token::Float, symbol, suffix }) = self.token.kind
else {
return Ok(thin_vec![self.parse_field_name()?]);
};
Ok(match self.break_up_float(symbol) {
// 1e2
DestructuredFloat::Single(sym, sp) => {
self.bump();
thin_vec![Ident::new(sym, sp)]
}
// 1.
DestructuredFloat::TrailingDot(sym, sym_span, dot_span) => {
assert!(suffix.is_none());
// Analogous to `Self::break_and_eat`
self.break_last_token = true;
// This might work, in cases like `1. 2`, and might not,
// in cases like `offset_of!(Ty, 1.)`. It depends on what comes
// after the float-like token, and therefore we have to make
// the other parts of the parser think that there is a dot literal.
self.token = Token::new(token::Ident(sym, false), sym_span);
self.bump_with((Token::new(token::Dot, dot_span), self.token_spacing));
thin_vec![Ident::new(sym, sym_span)]
}
// 1.2 | 1.2e3
DestructuredFloat::MiddleDot(symbol1, ident1_span, _dot_span, symbol2, ident2_span) => {
self.bump();
thin_vec![Ident::new(symbol1, ident1_span), Ident::new(symbol2, ident2_span)]
/// Parse the field access used in offset_of, matched by `$(e:expr)+`.
/// Currently returns a list of idents. However, it should be possible in
/// future to also do array indices, which might be arbitrary expressions.
fn parse_floating_field_access(&mut self) -> PResult<'a, P<[Ident]>> {
let mut fields = Vec::new();
let mut trailing_dot = None;

loop {
// This is expected to use a metavariable $(args:expr)+, but the builtin syntax
// could be called directly. Calling `parse_expr` allows this function to only
// consider `Expr`s.
let expr = self.parse_expr()?;
let mut current = &expr;
let start_idx = fields.len();
loop {
match current.kind {
ExprKind::Field(ref left, right) => {
// Field access is read right-to-left.
fields.insert(start_idx, right);
trailing_dot = None;
current = left;
}
// Parse this both to give helpful error messages and to
// verify it can be done with this parser setup.
ExprKind::Index(ref left, ref _right, span) => {
self.dcx().emit_err(errors::ArrayIndexInOffsetOf(span));
current = left;
}
ExprKind::Lit(token::Lit {
kind: token::Float | token::Integer,
symbol,
suffix,
}) => {
if let Some(suffix) = suffix {
self.expect_no_tuple_index_suffix(current.span, suffix);
}
match self.break_up_float(symbol, current.span) {
// 1e2
DestructuredFloat::Single(sym, sp) => {
trailing_dot = None;
fields.insert(start_idx, Ident::new(sym, sp));
}
// 1.
DestructuredFloat::TrailingDot(sym, sym_span, dot_span) => {
assert!(suffix.is_none());
trailing_dot = Some(dot_span);
fields.insert(start_idx, Ident::new(sym, sym_span));
}
// 1.2 | 1.2e3
DestructuredFloat::MiddleDot(
symbol1,
span1,
_dot_span,
symbol2,
span2,
) => {
trailing_dot = None;
fields.insert(start_idx, Ident::new(symbol2, span2));
fields.insert(start_idx, Ident::new(symbol1, span1));
}
DestructuredFloat::Error => {
trailing_dot = None;
fields.insert(start_idx, Ident::new(symbol, self.prev_token.span));
}
}
break;
}
ExprKind::Path(None, Path { ref segments, .. }) => {
match &segments[..] {
[PathSegment { ident, args: None, .. }] => {
trailing_dot = None;
fields.insert(start_idx, *ident)
}
_ => {
self.dcx().emit_err(errors::InvalidOffsetOf(current.span));
break;
}
}
break;
}
_ => {
self.dcx().emit_err(errors::InvalidOffsetOf(current.span));
break;
}
}
}
DestructuredFloat::Error => {
self.bump();
thin_vec![Ident::new(symbol, self.prev_token.span)]

if matches!(self.token.kind, token::CloseDelim(..) | token::Comma) {
break;
} else if trailing_dot.is_none() {
// This loop should only repeat if there is a trailing dot.
self.dcx().emit_err(errors::InvalidOffsetOf(self.token.span));
break;
}
})
}
if let Some(dot) = trailing_dot {
self.dcx().emit_err(errors::InvalidOffsetOf(dot));
}
Ok(fields.into_iter().collect())
}

fn parse_expr_tuple_field_access(
Expand Down Expand Up @@ -1907,15 +1971,29 @@ impl<'a> Parser<'a> {
let container = self.parse_ty()?;
self.expect(&TokenKind::Comma)?;

let seq_sep = SeqSep { sep: Some(token::Dot), trailing_sep_allowed: false };
let (fields, _trailing, _recovered) = self.parse_seq_to_before_end(
&TokenKind::CloseDelim(Delimiter::Parenthesis),
seq_sep,
Parser::parse_field_name_maybe_tuple,
)?;
let fields = fields.into_iter().flatten().collect::<Vec<_>>();
let fields = self.parse_floating_field_access()?;
let trailing_comma = self.eat_noexpect(&TokenKind::Comma);

if let Err(mut e) =
self.expect_one_of(&[], &[TokenKind::CloseDelim(Delimiter::Parenthesis)])
{
if trailing_comma {
e.note("unexpected third argument to offset_of");
} else {
e.note("offset_of expects dot-separated field and variant names");
}
e.emit();
}

// Eat tokens until the macro call ends.
if self.may_recover() {
while !matches!(self.token.kind, token::CloseDelim(..) | token::Eof) {
self.bump();
}
}

let span = lo.to(self.token.span);
Ok(self.mk_expr(span, ExprKind::OffsetOf(container, fields.into())))
Ok(self.mk_expr(span, ExprKind::OffsetOf(container, fields)))
}

/// Returns a string literal if the next token is a string literal.
Expand Down
10 changes: 10 additions & 0 deletions library/core/src/mem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1395,8 +1395,18 @@ impl<T> SizedTypeProperties for T {}
///
/// assert_eq!(mem::offset_of!(Option<&u8>, Some.0), 0);
/// ```
#[cfg(not(bootstrap))]
#[unstable(feature = "offset_of", issue = "106655")]
#[allow_internal_unstable(builtin_syntax, hint_must_use)]
pub macro offset_of($Container:ty, $($fields:expr)+ $(,)?) {
// The `{}` is for better error messages
crate::hint::must_use({builtin # offset_of($Container, $($fields)+)})
}

#[cfg(bootstrap)]
#[unstable(feature = "offset_of", issue = "106655")]
#[allow_internal_unstable(builtin_syntax, hint_must_use)]
#[allow(missing_docs)]
pub macro offset_of($Container:ty, $($fields:tt).+ $(,)?) {
// The `{}` is for better error messages
crate::hint::must_use({builtin # offset_of($Container, $($fields).+)})
Expand Down
8 changes: 4 additions & 4 deletions tests/ui/offset-of/offset-of-arg-count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ fn main() {
offset_of!(Container, field, too many arguments); //~ ERROR no rules expected the token `too`
offset_of!(S, f); // compiles fine
offset_of!(S, f,); // also compiles fine
offset_of!(S, f.); //~ ERROR unexpected end of macro invocation
offset_of!(S, f.,); //~ ERROR expected identifier
offset_of!(S, f..); //~ ERROR no rules expected the token
offset_of!(S, f..,); //~ ERROR no rules expected the token
offset_of!(S, f.); //~ ERROR unexpected token: `)`
offset_of!(S, f.,); //~ ERROR unexpected token: `,`
offset_of!(S, f..); //~ ERROR offset_of expects dot-separated field and variant names
offset_of!(S, f..,); //~ ERROR offset_of expects dot-separated field and variant names
offset_of!(Lt<'static>, bar); // issue #111657
offset_of!(Lt<'_>, bar); // issue #111678
}
Expand Down
29 changes: 11 additions & 18 deletions tests/ui/offset-of/offset-of-arg-count.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ error: unexpected end of macro invocation
LL | offset_of!(NotEnoughArgumentsWithAComma, );
| ^ missing tokens in macro arguments
|
note: while trying to match meta-variable `$fields:tt`
note: while trying to match meta-variable `$fields:expr`
--> $SRC_DIR/core/src/mem/mod.rs:LL:COL

error: no rules expected the token `too`
Expand All @@ -24,36 +24,29 @@ LL | offset_of!(Container, field, too many arguments);
|
= note: while trying to match sequence end

error: unexpected end of macro invocation
error: unexpected token: `)`
--> $DIR/offset-of-arg-count.rs:11:21
|
LL | offset_of!(S, f.);
| ^ missing tokens in macro arguments
|
note: while trying to match meta-variable `$fields:tt`
--> $SRC_DIR/core/src/mem/mod.rs:LL:COL
| ^

error: expected identifier, found `,`
error: unexpected token: `,`
--> $DIR/offset-of-arg-count.rs:12:21
|
LL | offset_of!(S, f.,);
| ^ expected identifier
| ^

error: no rules expected the token `..`
--> $DIR/offset-of-arg-count.rs:13:20
error: offset_of expects dot-separated field and variant names
--> $DIR/offset-of-arg-count.rs:13:19
|
LL | offset_of!(S, f..);
| ^^ no rules expected this token in macro call
|
= note: while trying to match sequence start
| ^^^

error: no rules expected the token `..`
--> $DIR/offset-of-arg-count.rs:14:20
error: offset_of expects dot-separated field and variant names
--> $DIR/offset-of-arg-count.rs:14:19
|
LL | offset_of!(S, f..,);
| ^^ no rules expected this token in macro call
|
= note: while trying to match sequence start
| ^^^

error: aborting due to 7 previous errors

24 changes: 6 additions & 18 deletions tests/ui/offset-of/offset-of-builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,25 @@ fn main() {
builtin # offset_of(NotEnoughArguments); //~ ERROR expected one of
}
fn t1() {
// Already errored upon at the macro level. Yielding an error would require
// extra effort.
builtin # offset_of(NotEnoughArgumentsWithAComma, );
builtin # offset_of(NotEnoughArgumentsWithAComma, ); //~ ERROR expected expression
}
fn t2() {
builtin # offset_of(Container, field, too many arguments); //~ ERROR expected identifier, found
//~| ERROR found `,`
//~| ERROR found `many`
//~| ERROR found `arguments`
builtin # offset_of(S, f, too many arguments); //~ ERROR expected `)`, found `too`
}
fn t3() {
builtin # offset_of(S, f); // compiles fine
}
fn t4() {
// Already errored upon at the macro level. Yielding an error would require
// extra effort.
builtin # offset_of(S, f);
builtin # offset_of(S, f.); //~ ERROR unexpected token
}
fn t5() {
builtin # offset_of(S, f.); //~ ERROR expected identifier
builtin # offset_of(S, f.,); //~ ERROR unexpected token
}
fn t6() {
builtin # offset_of(S, f.,); //~ ERROR expected identifier
builtin # offset_of(S, f..); //~ ERROR offset_of expects dot-separated field and variant names
}
fn t7() {
builtin # offset_of(S, f..); //~ ERROR expected one of
}
fn t8() {
// Already errored upon at the macro level. Yielding an error would require
// extra effort.
builtin # offset_of(S, f..,);
builtin # offset_of(S, f..,); //~ ERROR offset_of expects dot-separated field and variant names
}

struct S { f: u8, }
Loading

0 comments on commit 94b03af

Please sign in to comment.