Auto merge of #114915 - nnethercote:Nonterminal-cleanups, r=petrochenkov

`Nonterminal`-related cleanups In #114647 I am trying to remove `Nonterminal`. It has a number of preliminary cleanups that are worth merging even if #114647 doesn't merge, so let's do them in this PR. r? `@petrochenkov`
rust-lang · Aug 18, 2023 · ee5cb9e · ee5cb9e
2 parents 2ceed0b + 9e22351
commit ee5cb9e
Show file tree

Hide file tree

Showing 11 changed files with 120 additions and 92 deletions.
diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs
@@ -150,6 +150,8 @@ pub fn print_crate<'a>(
 /// and also addresses some specific regressions described in #63896 and #73345.
 fn tt_prepend_space(tt: &TokenTree, prev: &TokenTree) -> bool {
     if let TokenTree::Token(token, _) = prev {
+        // No space after these tokens, e.g. `x.y`, `$e`
+        // (The carets point to `prev`.)       ^     ^
         if matches!(token.kind, token::Dot | token::Dollar) {
             return false;
         }
@@ -158,10 +160,19 @@ fn tt_prepend_space(tt: &TokenTree, prev: &TokenTree) -> bool {
         }
     }
     match tt {
+        // No space before these tokens, e.g. `foo,`, `println!`, `x.y`
+        // (The carets point to `token`.)         ^           ^     ^
+        //
+        // FIXME: having `Not` here works well for macro invocations like
+        // `println!()`, but is bad when `!` means "logical not" or "the never
+        // type", where the lack of space causes ugliness like this:
+        // `Fn() ->!`, `x =! y`, `if! x { f(); }`.
         TokenTree::Token(token, _) => !matches!(token.kind, token::Comma | token::Not | token::Dot),
+        // No space before parentheses if preceded by these tokens, e.g. `foo(...)`
         TokenTree::Delimited(_, Delimiter::Parenthesis, _) => {
             !matches!(prev, TokenTree::Token(Token { kind: token::Ident(..), .. }, _))
         }
+        // No space before brackets if preceded by these tokens, e.g. `#[...]`
         TokenTree::Delimited(_, Delimiter::Bracket, _) => {
             !matches!(prev, TokenTree::Token(Token { kind: token::Pound, .. }, _))
         }

diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -81,7 +81,7 @@ use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::Lrc;
 use rustc_errors::ErrorGuaranteed;
 use rustc_lint_defs::pluralize;
-use rustc_parse::parser::{NtOrTt, Parser};
+use rustc_parse::parser::{ParseNtResult, Parser};
 use rustc_span::symbol::Ident;
 use rustc_span::symbol::MacroRulesNormalizedIdent;
 use rustc_span::Span;
@@ -692,8 +692,8 @@ impl TtParser {
                             Ok(nt) => nt,
                         };
                         let m = match nt {
-                            NtOrTt::Nt(nt) => MatchedNonterminal(Lrc::new(nt)),
-                            NtOrTt::Tt(tt) => MatchedTokenTree(tt),
+                            ParseNtResult::Nt(nt) => MatchedNonterminal(Lrc::new(nt)),
+                            ParseNtResult::Tt(tt) => MatchedTokenTree(tt),
                         };
                         mp.push_match(next_metavar, seq_depth, m);
                         mp.idx += 1;

diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs
@@ -1328,7 +1328,7 @@ fn is_in_follow(tok: &mbe::TokenTree, kind: NonterminalKind) -> IsInFollow {
                     _ => IsInFollow::No(TOKENS),
                 }
             }
-            NonterminalKind::PatWithOr { .. } => {
+            NonterminalKind::PatWithOr => {
                 const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`if`", "`in`"];
                 match tok {
                     TokenTree::Token(token) => match token.kind {

diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs
@@ -220,16 +220,15 @@ pub(super) fn transcribe<'a>(
                         MatchedTokenTree(tt) => {
                             // `tt`s are emitted into the output stream directly as "raw tokens",
                             // without wrapping them into groups.
-                            let token = tt.clone();
-                            result.push(token);
+                            result.push(tt.clone());
                         }
                         MatchedNonterminal(nt) => {
                             // Other variables are emitted into the output stream as groups with
                             // `Delimiter::Invisible` to maintain parsing priorities.
                             // `Interpolated` is currently used for such groups in rustc parser.
                             marker.visit_span(&mut sp);
-                            let token = TokenTree::token_alone(token::Interpolated(nt.clone()), sp);
-                            result.push(token);
+                            result
+                                .push(TokenTree::token_alone(token::Interpolated(nt.clone()), sp));
                         }
                         MatchedSeq(..) => {
                             // We were unable to descend far enough. This is an error.

diff --git a/compiler/rustc_macros/src/serialize.rs b/compiler/rustc_macros/src/serialize.rs
@@ -59,14 +59,14 @@ fn decodable_body(
                 })
                 .collect();
             let message = format!(
-                "invalid enum variant tag while decoding `{}`, expected 0..{}",
+                "invalid enum variant tag while decoding `{}`, expected 0..{}, actual {{}}",
                 ty_name,
                 variants.len()
             );
             quote! {
                 match ::rustc_serialize::Decoder::read_usize(__decoder) {
                     #match_inner
-                    _ => panic!(#message),
+                    n => panic!(#message, n),
                 }
             }
         }

diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
@@ -193,13 +193,7 @@ impl<'a> Parser<'a> {
 
         self.expected_tokens.push(TokenType::Operator);
         while let Some(op) = self.check_assoc_op() {
-            // Adjust the span for interpolated LHS to point to the `$lhs` token
-            // and not to what it refers to.
-            let lhs_span = match self.prev_token.kind {
-                TokenKind::Interpolated(..) => self.prev_token.span,
-                _ => lhs.span,
-            };
-
+            let lhs_span = self.interpolated_or_expr_span(&lhs);
             let cur_op_span = self.token.span;
             let restrictions = if op.node.is_assign_like() {
                 self.restrictions & Restrictions::NO_STRUCT_LITERAL
@@ -626,8 +620,8 @@ impl<'a> Parser<'a> {
 
     fn parse_expr_prefix_common(&mut self, lo: Span) -> PResult<'a, (Span, P<Expr>)> {
         self.bump();
-        let expr = self.parse_expr_prefix(None);
-        let (span, expr) = self.interpolated_or_expr_span(expr)?;
+        let expr = self.parse_expr_prefix(None)?;
+        let span = self.interpolated_or_expr_span(&expr);
         Ok((lo.to(span), expr))
     }
 
@@ -702,20 +696,12 @@ impl<'a> Parser<'a> {
         self.parse_expr_unary(lo, UnOp::Not)
     }
 
-    /// Returns the span of expr, if it was not interpolated or the span of the interpolated token.
-    fn interpolated_or_expr_span(
-        &self,
-        expr: PResult<'a, P<Expr>>,
-    ) -> PResult<'a, (Span, P<Expr>)> {
-        expr.map(|e| {
-            (
-                match self.prev_token.kind {
-                    TokenKind::Interpolated(..) => self.prev_token.span,
-                    _ => e.span,
-                },
-                e,
-            )
-        })
+    /// Returns the span of expr if it was not interpolated, or the span of the interpolated token.
+    fn interpolated_or_expr_span(&self, expr: &Expr) -> Span {
+        match self.prev_token.kind {
+            TokenKind::Interpolated(..) => self.prev_token.span,
+            _ => expr.span,
+        }
     }
 
     fn parse_assoc_op_cast(
@@ -898,8 +884,8 @@ impl<'a> Parser<'a> {
             self.parse_expr_prefix_range(None)
         } else {
             self.parse_expr_prefix(None)
-        };
-        let (hi, expr) = self.interpolated_or_expr_span(expr)?;
+        }?;
+        let hi = self.interpolated_or_expr_span(&expr);
         let span = lo.to(hi);
         if let Some(lt) = lifetime {
             self.error_remove_borrow_lifetime(span, lt.ident.span);
@@ -930,8 +916,8 @@ impl<'a> Parser<'a> {
     fn parse_expr_dot_or_call(&mut self, attrs: Option<AttrWrapper>) -> PResult<'a, P<Expr>> {
         let attrs = self.parse_or_use_outer_attributes(attrs)?;
         self.collect_tokens_for_expr(attrs, |this, attrs| {
-            let base = this.parse_expr_bottom();
-            let (span, base) = this.interpolated_or_expr_span(base)?;
+            let base = this.parse_expr_bottom()?;
+            let span = this.interpolated_or_expr_span(&base);
             this.parse_expr_dot_or_call_with(base, span, attrs)
         })
     }

diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
@@ -1052,33 +1052,48 @@ impl<'a> Parser<'a> {
     }
 
     /// Look-ahead `dist` tokens of `self.token` and get access to that token there.
-    /// When `dist == 0` then the current token is looked at.
+    /// When `dist == 0` then the current token is looked at. `Eof` will be
+    /// returned if the look-ahead is any distance past the end of the tokens.
     pub fn look_ahead<R>(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R {
         if dist == 0 {
             return looker(&self.token);
         }
 
-        let tree_cursor = &self.token_cursor.tree_cursor;
         if let Some(&(_, delim, span)) = self.token_cursor.stack.last()
             && delim != Delimiter::Invisible
         {
+            // We are not in the outermost token stream, and the token stream
+            // we are in has non-skipped delimiters. Look for skipped
+            // delimiters in the lookahead range.
+            let tree_cursor = &self.token_cursor.tree_cursor;
             let all_normal = (0..dist).all(|i| {
                 let token = tree_cursor.look_ahead(i);
                 !matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _)))
             });
             if all_normal {
+                // There were no skipped delimiters. Do lookahead by plain indexing.
                 return match tree_cursor.look_ahead(dist - 1) {
-                    Some(tree) => match tree {
-                        TokenTree::Token(token, _) => looker(token),
-                        TokenTree::Delimited(dspan, delim, _) => {
-                            looker(&Token::new(token::OpenDelim(*delim), dspan.open))
+                    Some(tree) => {
+                        // Indexing stayed within the current token stream.
+                        match tree {
+                            TokenTree::Token(token, _) => looker(token),
+                            TokenTree::Delimited(dspan, delim, _) => {
+                                looker(&Token::new(token::OpenDelim(*delim), dspan.open))
+                            }
                         }
-                    },
-                    None => looker(&Token::new(token::CloseDelim(delim), span.close)),
+                    }
+                    None => {
+                        // Indexing went past the end of the current token
+                        // stream. Use the close delimiter, no matter how far
+                        // ahead `dist` went.
+                        looker(&Token::new(token::CloseDelim(delim), span.close))
+                    }
                 };
             }
         }
 
+        // We are in a more complex case. Just clone the token cursor and use
+        // `next`, skipping delimiters as necessary. Slow but simple.
         let mut cursor = self.token_cursor.clone();
         let mut i = 0;
         let mut token = Token::dummy();
@@ -1476,7 +1491,7 @@ pub enum FlatToken {
 }
 
 #[derive(Debug)]
-pub enum NtOrTt {
+pub enum ParseNtResult {
     Nt(Nonterminal),
     Tt(TokenTree),
 }