Auto merge of #112216 - est31:offset_of_deep_tuple, r=petrochenkov

Support float-like tuple indices in offset_of!() Supports invocations like `offset_of!((((), ()), ()), 0.0)`. This `0.0` gets tokenized as float literal, so it has to be broken up again. The code that did the breaking up was returning a finished `Expr`, while we need a `Ident`, so this PR splits up the `parse_expr_tuple_field_access_float` function into: * a function that breaks up the float literal (similar to `TokenKind::break_two_token_op`, but we do access the parser during this splitting operation, so we keep it as an inherent function on the parser) * and a function that constructs an `Expr` from it The former we can then re-use in `offset_of` parsing. The edge cases especially involving whitespaces are tricky so this adds a bunch of new tests as well. fixes #112204
rust-lang · Jun 9, 2023 · 43062c4 · 43062c4
2 parents 397641f + 9fb266b
commit 43062c4
Show file tree

Hide file tree

Showing 4 changed files with 390 additions and 42 deletions.
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
@@ -91,6 +91,18 @@ impl From<P<Expr>> for LhsExpr {
     }
 }
 
+#[derive(Debug)]
+enum DestructuredFloat {
+    /// 1e2
+    Single(Symbol, Span),
+    /// 1.
+    TrailingDot(Symbol, Span, Span),
+    /// 1.2 | 1.2e3
+    MiddleDot(Symbol, Span, Span, Symbol, Span),
+    /// Invalid
+    Error,
+}
+
 impl<'a> Parser<'a> {
     /// Parses an expression.
     #[inline]
@@ -1013,13 +1025,8 @@ impl<'a> Parser<'a> {
     // support pushing "future tokens" (would be also helpful to `break_and_eat`), or
     // we should break everything including floats into more basic proc-macro style
     // tokens in the lexer (probably preferable).
-    fn parse_expr_tuple_field_access_float(
-        &mut self,
-        lo: Span,
-        base: P<Expr>,
-        float: Symbol,
-        suffix: Option<Symbol>,
-    ) -> P<Expr> {
+    // See also `TokenKind::break_two_token_op` which does similar splitting of `>>` into `>`.
+    fn break_up_float(&mut self, float: Symbol) -> DestructuredFloat {
         #[derive(Debug)]
         enum FloatComponent {
             IdentLike(String),
@@ -1056,7 +1063,7 @@ impl<'a> Parser<'a> {
         match &*components {
             // 1e2
             [IdentLike(i)] => {
-                self.parse_expr_tuple_field_access(lo, base, Symbol::intern(&i), suffix, None)
+                DestructuredFloat::Single(Symbol::intern(&i), span)
             }
             // 1.
             [IdentLike(i), Punct('.')] => {
@@ -1068,11 +1075,8 @@ impl<'a> Parser<'a> {
                 } else {
                     (span, span)
                 };
-                assert!(suffix.is_none());
                 let symbol = Symbol::intern(&i);
-                self.token = Token::new(token::Ident(symbol, false), ident_span);
-                let next_token = (Token::new(token::Dot, dot_span), self.token_spacing);
-                self.parse_expr_tuple_field_access(lo, base, symbol, None, Some(next_token))
+                DestructuredFloat::TrailingDot(symbol, ident_span, dot_span)
             }
             // 1.2 | 1.2e3
             [IdentLike(i1), Punct('.'), IdentLike(i2)] => {
@@ -1088,16 +1092,8 @@ impl<'a> Parser<'a> {
                     (span, span, span)
                 };
                 let symbol1 = Symbol::intern(&i1);
-                self.token = Token::new(token::Ident(symbol1, false), ident1_span);
-                // This needs to be `Spacing::Alone` to prevent regressions.
-                // See issue #76399 and PR #76285 for more details
-                let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone);
-                let base1 =
-                    self.parse_expr_tuple_field_access(lo, base, symbol1, None, Some(next_token1));
                 let symbol2 = Symbol::intern(&i2);
-                let next_token2 = Token::new(token::Ident(symbol2, false), ident2_span);
-                self.bump_with((next_token2, self.token_spacing)); // `.`
-                self.parse_expr_tuple_field_access(lo, base1, symbol2, suffix, None)
+                DestructuredFloat::MiddleDot(symbol1, ident1_span, dot_span, symbol2, ident2_span)
             }
             // 1e+ | 1e- (recovered)
             [IdentLike(_), Punct('+' | '-')] |
@@ -1109,12 +1105,83 @@ impl<'a> Parser<'a> {
             [IdentLike(_), Punct('.'), IdentLike(_), Punct('+' | '-'), IdentLike(_)] => {
                 // See the FIXME about `TokenCursor` above.
                 self.error_unexpected_after_dot();
-                base
+                DestructuredFloat::Error
             }
             _ => panic!("unexpected components in a float token: {:?}", components),
         }
     }
 
+    fn parse_expr_tuple_field_access_float(
+        &mut self,
+        lo: Span,
+        base: P<Expr>,
+        float: Symbol,
+        suffix: Option<Symbol>,
+    ) -> P<Expr> {
+        match self.break_up_float(float) {
+            // 1e2
+            DestructuredFloat::Single(sym, _sp) => {
+                self.parse_expr_tuple_field_access(lo, base, sym, suffix, None)
+            }
+            // 1.
+            DestructuredFloat::TrailingDot(sym, ident_span, dot_span) => {
+                assert!(suffix.is_none());
+                self.token = Token::new(token::Ident(sym, false), ident_span);
+                let next_token = (Token::new(token::Dot, dot_span), self.token_spacing);
+                self.parse_expr_tuple_field_access(lo, base, sym, None, Some(next_token))
+            }
+            // 1.2 | 1.2e3
+            DestructuredFloat::MiddleDot(symbol1, ident1_span, dot_span, symbol2, ident2_span) => {
+                self.token = Token::new(token::Ident(symbol1, false), ident1_span);
+                // This needs to be `Spacing::Alone` to prevent regressions.
+                // See issue #76399 and PR #76285 for more details
+                let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone);
+                let base1 =
+                    self.parse_expr_tuple_field_access(lo, base, symbol1, None, Some(next_token1));
+                let next_token2 = Token::new(token::Ident(symbol2, false), ident2_span);
+                self.bump_with((next_token2, self.token_spacing)); // `.`
+                self.parse_expr_tuple_field_access(lo, base1, symbol2, suffix, None)
+            }
+            DestructuredFloat::Error => base,
+        }
+    }
+
+    fn parse_field_name_maybe_tuple(&mut self) -> PResult<'a, ThinVec<Ident>> {
+        let token::Literal(token::Lit { kind: token::Float, symbol, suffix }) = self.token.kind
+        else {
+            return Ok(thin_vec![self.parse_field_name()?]);
+        };
+        Ok(match self.break_up_float(symbol) {
+            // 1e2
+            DestructuredFloat::Single(sym, sp) => {
+                self.bump();
+                thin_vec![Ident::new(sym, sp)]
+            }
+            // 1.
+            DestructuredFloat::TrailingDot(sym, sym_span, dot_span) => {
+                assert!(suffix.is_none());
+                // Analogous to `Self::break_and_eat`
+                self.token_cursor.break_last_token = true;
+                // This might work, in cases like `1. 2`, and might not,
+                // in cases like `offset_of!(Ty, 1.)`. It depends on what comes
+                // after the float-like token, and therefore we have to make
+                // the other parts of the parser think that there is a dot literal.
+                self.token = Token::new(token::Ident(sym, false), sym_span);
+                self.bump_with((Token::new(token::Dot, dot_span), self.token_spacing));
+                thin_vec![Ident::new(sym, sym_span)]
+            }
+            // 1.2 | 1.2e3
+            DestructuredFloat::MiddleDot(symbol1, ident1_span, _dot_span, symbol2, ident2_span) => {
+                self.bump();
+                thin_vec![Ident::new(symbol1, ident1_span), Ident::new(symbol2, ident2_span)]
+            }
+            DestructuredFloat::Error => {
+                self.bump();
+                thin_vec![Ident::new(symbol, self.prev_token.span)]
+            }
+        })
+    }
+
     fn parse_expr_tuple_field_access(
         &mut self,
         lo: Span,
@@ -1821,10 +1888,11 @@ impl<'a> Parser<'a> {
         let (fields, _trailing, _recovered) = self.parse_seq_to_before_end(
             &TokenKind::CloseDelim(Delimiter::Parenthesis),
             seq_sep,
-            Parser::parse_field_name,
+            Parser::parse_field_name_maybe_tuple,
         )?;
+        let fields = fields.into_iter().flatten().collect::<Vec<_>>();
         let span = lo.to(self.token.span);
-        Ok(self.mk_expr(span, ExprKind::OffsetOf(container, fields.to_vec().into())))
+        Ok(self.mk_expr(span, ExprKind::OffsetOf(container, fields.into())))
     }
 
     /// Returns a string literal if the next token is a string literal.

diff --git a/tests/ui/offset-of/offset-of-tuple-nested.rs b/tests/ui/offset-of/offset-of-tuple-nested.rs
@@ -0,0 +1,32 @@
+// run-pass
+// Test for issue #112204 -- make sure this goes through the entire compilation pipeline,
+// similar to why `offset-of-unsized.rs` is also build-pass
+
+#![feature(offset_of)]
+#![feature(builtin_syntax)]
+
+use std::mem::offset_of;
+
+type ComplexTup = ((u8, (u8, (u8, u16), u8)), (u8, u32, u16));
+
+fn main() {
+    println!("{}", offset_of!(((u8, u8), u8), 0));
+    println!("{}", offset_of!(((u8, u8), u8), 1));
+    println!("{}", offset_of!(((u8, (u8, u8)), (u8, u8, u8)), 0.1.0));
+
+    // Complex case: do all combinations of spacings because the spacing determines what gets
+    // sent to the lexer.
+    println!("{}", offset_of!(ComplexTup, 0.1.1.1));
+    println!("{}", builtin # offset_of(ComplexTup, 0. 1.1.1));
+    println!("{}", offset_of!(ComplexTup, 0 . 1.1.1));
+    println!("{}", offset_of!(ComplexTup, 0 .1.1.1));
+    println!("{}", offset_of!(ComplexTup, 0.1 .1.1));
+    println!("{}", offset_of!(ComplexTup, 0.1 . 1.1));
+    println!("{}", offset_of!(ComplexTup, 0.1. 1.1));
+    println!("{}", builtin # offset_of(ComplexTup, 0.1.1. 1));
+    println!("{}", offset_of!(ComplexTup, 0.1.1 . 1));
+    println!("{}", offset_of!(ComplexTup, 0.1.1 .1));
+
+    println!("{}", offset_of!(((u8, u16), (u32, u16, u8)), 0.0));
+    println!("{}", offset_of!(((u8, u16), (u32, u16, u8)), 1.2));
+}
diff --git a/tests/ui/offset-of/offset-of-tuple.rs b/tests/ui/offset-of/offset-of-tuple.rs
@@ -1,10 +1,54 @@
 #![feature(offset_of)]
 #![feature(builtin_syntax)]
 
+use std::mem::offset_of;
+
 fn main() {
-    core::mem::offset_of!((u8, u8), _0); //~ ERROR no field `_0`
-    core::mem::offset_of!((u8, u8), +1); //~ ERROR no rules expected
-    core::mem::offset_of!((u8, u8), -1); //~ ERROR no rules expected
+    offset_of!((u8, u8), _0); //~ ERROR no field `_0`
+    offset_of!((u8, u8), 01); //~ ERROR no field `01`
+    offset_of!((u8, u8), 1e2); //~ ERROR no field `1e2`
+    offset_of!((u8, u8), 1_u8); //~ ERROR no field `1_`
+    //~| ERROR suffixes on a tuple index
+    offset_of!((u8, u8), +1); //~ ERROR no rules expected
+    offset_of!((u8, u8), -1); //~ ERROR no rules expected
+    offset_of!((u8, u8), 1.); //~ ERROR expected identifier, found `)`
+    offset_of!((u8, u8), 1 .); //~ ERROR unexpected end of macro
+    builtin # offset_of((u8, u8), 1e2); //~ ERROR no field `1e2`
     builtin # offset_of((u8, u8), _0); //~ ERROR no field `_0`
-    builtin # offset_of((u8, u8), +1); //~ ERROR expected identifier
+    builtin # offset_of((u8, u8), 01); //~ ERROR no field `01`
+    builtin # offset_of((u8, u8), 1_u8); //~ ERROR no field `1_`
+    //~| ERROR suffixes on a tuple index
+    // We need to put these into curly braces, otherwise only one of the
+    // errors will be emitted and the others suppressed.
+    { builtin # offset_of((u8, u8), +1) }; //~ ERROR expected identifier, found `+`
+    { builtin # offset_of((u8, u8), 1.) }; //~ ERROR expected identifier, found `)`
+    { builtin # offset_of((u8, u8), 1 .) }; //~ ERROR expected identifier, found `)`
+}
+
+type ComplexTup = ((u8, (u8, u8)), u8);
+
+fn nested() {
+    offset_of!(((u8, u16), (u32, u16, u8)), 0.2); //~ ERROR no field `2`
+    offset_of!(((u8, u16), (u32, u16, u8)), 1.2);
+    offset_of!(((u8, u16), (u32, u16, u8)), 1.2.0); //~ ERROR no field `0`
+
+    // All combinations of spaces (this sends different tokens to the parser)
+    offset_of!(ComplexTup, 0.0.1.); //~ ERROR expected identifier
+    offset_of!(ComplexTup, 0 .0.1.); //~ ERROR unexpected end of macro
+    offset_of!(ComplexTup, 0 . 0.1.); //~ ERROR unexpected end of macro
+    offset_of!(ComplexTup, 0. 0.1.); //~ ERROR no rules expected
+    offset_of!(ComplexTup, 0.0 .1.); //~ ERROR expected identifier, found `)`
+    offset_of!(ComplexTup, 0.0 . 1.); //~ ERROR expected identifier, found `)`
+    offset_of!(ComplexTup, 0.0. 1.); //~ ERROR expected identifier, found `)`
+
+    // Test for builtin too to ensure that the builtin syntax can also handle these cases
+    // We need to put these into curly braces, otherwise only one of the
+    // errors will be emitted and the others suppressed.
+    { builtin # offset_of(ComplexTup, 0.0.1.) }; //~ ERROR expected identifier, found `)`
+    { builtin # offset_of(ComplexTup, 0 .0.1.) }; //~ ERROR expected identifier, found `)`
+    { builtin # offset_of(ComplexTup, 0 . 0.1.) }; //~ ERROR expected identifier, found `)`
+    { builtin # offset_of(ComplexTup, 0. 0.1.) }; //~ ERROR expected identifier, found `)`
+    { builtin # offset_of(ComplexTup, 0.0 .1.) }; //~ ERROR expected identifier, found `)`
+    { builtin # offset_of(ComplexTup, 0.0 . 1.) }; //~ ERROR expected identifier, found `)`
+    { builtin # offset_of(ComplexTup, 0.0. 1.) }; //~ ERROR expected identifier, found `)`
 }