From 904e70a7b00f41b168add13a33bc14f200442ad0 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 23 Mar 2022 11:44:13 +1100
Subject: [PATCH 1/4] Add a size assertion for `NamedMatchVec`.

---
 compiler/rustc_expand/src/mbe/macro_parser.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
index d8071bf159a74..8eaa6d3131c06 100644
--- a/compiler/rustc_expand/src/mbe/macro_parser.rs
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -103,6 +103,10 @@ struct MatcherTtFrame<'tt> {
 
 type NamedMatchVec = SmallVec<[NamedMatch; 4]>;
 
+// This type is used a lot. Make sure it doesn't unintentionally get bigger.
+#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
+rustc_data_structures::static_assert_size!(NamedMatchVec, 72);
+
 /// Represents a single "position" (aka "matcher position", aka "item"), as
 /// described in the module documentation.
 #[derive(Clone)]

From 6817442ec7d9f29abfb080314f3c45ff5e3e633a Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 23 Mar 2022 11:46:22 +1100
Subject: [PATCH 2/4] Split `NamedMatch::MatchNonterminal` in two.

The `Lrc` is only relevant within `transcribe()`. There, the `Lrc` is
helpful for the non-`NtTT` cases, because the entire nonterminal is
cloned. But for the `NtTT` cases the inner token tree is cloned (a full
clone) and so the `Lrc` is of no help.

This commit splits the `NtTT` and non-`NtTT` cases, avoiding the useless
`Lrc` in the former case, for the following effect on macro-heavy
crates.
- It reduces the total number of allocations a lot.
- It increases the size of some of the remaining allocations.
- It doesn't affect *peak* memory usage, because the larger allocations
  are short-lived.

This overall gives a speed win.
---
 compiler/rustc_expand/src/mbe/macro_parser.rs | 52 +++++++++-------
 compiler/rustc_expand/src/mbe/macro_rules.rs  | 60 +++++++++----------
 compiler/rustc_expand/src/mbe/transcribe.rs   | 40 +++++++------
 3 files changed, 81 insertions(+), 71 deletions(-)

diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
index 8eaa6d3131c06..f2090899e99a1 100644
--- a/compiler/rustc_expand/src/mbe/macro_parser.rs
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -105,7 +105,7 @@ type NamedMatchVec = SmallVec<[NamedMatch; 4]>;
 
 // This type is used a lot. Make sure it doesn't unintentionally get bigger.
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-rustc_data_structures::static_assert_size!(NamedMatchVec, 72);
+rustc_data_structures::static_assert_size!(NamedMatchVec, 168);
 
 /// Represents a single "position" (aka "matcher position", aka "item"), as
 /// described in the module documentation.
@@ -278,22 +278,20 @@ pub(super) fn count_names(ms: &[TokenTree]) -> usize {
     })
 }
 
-/// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`:
-/// so it is associated with a single ident in a parse, and all
-/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type
-/// (expr, item, etc). Each leaf in a single `NamedMatch` corresponds to a
-/// single `token::MATCH_NONTERMINAL` in the `TokenTree` that produced it.
+/// `NamedMatch` is a pattern-match result for a single metavar. All
+/// `MatchedNtNonTt`s in the `NamedMatch` have the same non-terminal type
+/// (expr, item, etc).
 ///
 /// The in-memory structure of a particular `NamedMatch` represents the match
 /// that occurred when a particular subset of a matcher was applied to a
 /// particular token tree.
 ///
 /// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of
-/// the `MatchedNonterminal`s, will depend on the token tree it was applied
-/// to: each `MatchedSeq` corresponds to a single `TTSeq` in the originating
+/// the `MatchedNtNonTts`s, will depend on the token tree it was applied
+/// to: each `MatchedSeq` corresponds to a single repetition in the originating
 /// token tree. The depth of the `NamedMatch` structure will therefore depend
-/// only on the nesting depth of `ast::TTSeq`s in the originating
-/// token tree it was derived from.
+/// only on the nesting depth of repetitions in the originating token tree it
+/// was derived from.
 ///
 /// In layman's terms: `NamedMatch` will form a tree representing nested matches of a particular
 /// meta variable. For example, if we are matching the following macro against the following
@@ -312,24 +310,32 @@ pub(super) fn count_names(ms: &[TokenTree]) -> usize {
 /// ```rust
 /// MatchedSeq([
 ///   MatchedSeq([
-///     MatchedNonterminal(a),
-///     MatchedNonterminal(b),
-///     MatchedNonterminal(c),
-///     MatchedNonterminal(d),
+///     MatchedNtNonTt(a),
+///     MatchedNtNonTt(b),
+///     MatchedNtNonTt(c),
+///     MatchedNtNonTt(d),
 ///   ]),
 ///   MatchedSeq([
-///     MatchedNonterminal(a),
-///     MatchedNonterminal(b),
-///     MatchedNonterminal(c),
-///     MatchedNonterminal(d),
-///     MatchedNonterminal(e),
+///     MatchedNtNonTt(a),
+///     MatchedNtNonTt(b),
+///     MatchedNtNonTt(c),
+///     MatchedNtNonTt(d),
+///     MatchedNtNonTt(e),
 ///   ])
 /// ])
 /// ```
 #[derive(Debug, Clone)]
 crate enum NamedMatch {
     MatchedSeq(Lrc<NamedMatchVec>),
-    MatchedNonterminal(Lrc<Nonterminal>),
+
+    // This variant should never hold an `NtTT`. `MatchedNtTt` should be used
+    // for that case.
+    MatchedNtNonTt(Lrc<Nonterminal>),
+
+    // `NtTT` is handled without any cloning when transcribing, unlike other
+    // nonterminals. Therefore, an `Lrc` isn't helpful and causes unnecessary
+    // allocations. Hence this separate variant.
+    MatchedNtTt(rustc_ast::tokenstream::TokenTree),
 }
 
 /// Takes a slice of token trees `ms` representing a matcher which successfully matched input
@@ -669,7 +675,11 @@ impl<'tt> TtParser<'tt> {
                             }
                             Ok(nt) => nt,
                         };
-                        item.push_match(match_cur, MatchedNonterminal(Lrc::new(nt)));
+                        let m = match nt {
+                            Nonterminal::NtTT(tt) => MatchedNtTt(tt),
+                            _ => MatchedNtNonTt(Lrc::new(nt)),
+                        };
+                        item.push_match(match_cur, m);
                         item.idx += 1;
                         item.match_cur += 1;
                     } else {
diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs
index f13b97251d210..7837de5c18dba 100644
--- a/compiler/rustc_expand/src/mbe/macro_rules.rs
+++ b/compiler/rustc_expand/src/mbe/macro_rules.rs
@@ -4,11 +4,11 @@ use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstF
 use crate::mbe;
 use crate::mbe::macro_check;
 use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser};
-use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
+use crate::mbe::macro_parser::{MatchedNtTt, MatchedSeq};
 use crate::mbe::transcribe::transcribe;
 
 use rustc_ast as ast;
-use rustc_ast::token::{self, NonterminalKind, NtTT, Token, TokenKind::*};
+use rustc_ast::token::{self, NonterminalKind, Token, TokenKind::*};
 use rustc_ast::tokenstream::{DelimSpan, TokenStream};
 use rustc_ast::{NodeId, DUMMY_NODE_ID};
 use rustc_ast_pretty::pprust;
@@ -470,22 +470,20 @@ pub fn compile_declarative_macro(
         MatchedSeq(ref s) => s
             .iter()
             .map(|m| {
-                if let MatchedNonterminal(ref nt) = *m {
-                    if let NtTT(ref tt) = **nt {
-                        let mut tts = vec![];
-                        mbe::quoted::parse(
-                            tt.clone().into(),
-                            true,
-                            &sess.parse_sess,
-                            def.id,
-                            features,
-                            edition,
-                            &mut tts,
-                        );
-                        let tt = tts.pop().unwrap();
-                        valid &= check_lhs_nt_follows(&sess.parse_sess, features, &def, &tt);
-                        return tt;
-                    }
+                if let MatchedNtTt(ref tt) = *m {
+                    let mut tts = vec![];
+                    mbe::quoted::parse(
+                        tt.clone().into(),
+                        true,
+                        &sess.parse_sess,
+                        def.id,
+                        features,
+                        edition,
+                        &mut tts,
+                    );
+                    let tt = tts.pop().unwrap();
+                    valid &= check_lhs_nt_follows(&sess.parse_sess, features, &def, &tt);
+                    return tt;
                 }
                 sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs")
             })
@@ -497,20 +495,18 @@ pub fn compile_declarative_macro(
         MatchedSeq(ref s) => s
             .iter()
             .map(|m| {
-                if let MatchedNonterminal(ref nt) = *m {
-                    if let NtTT(ref tt) = **nt {
-                        let mut tts = vec![];
-                        mbe::quoted::parse(
-                            tt.clone().into(),
-                            false,
-                            &sess.parse_sess,
-                            def.id,
-                            features,
-                            edition,
-                            &mut tts,
-                        );
-                        return tts.pop().unwrap();
-                    }
+                if let MatchedNtTt(ref tt) = *m {
+                    let mut tts = vec![];
+                    mbe::quoted::parse(
+                        tt.clone().into(),
+                        false,
+                        &sess.parse_sess,
+                        def.id,
+                        features,
+                        edition,
+                        &mut tts,
+                    );
+                    return tts.pop().unwrap();
                 }
                 sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs")
             })
diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs
index e097f9d9c02ed..228ed04548df4 100644
--- a/compiler/rustc_expand/src/mbe/transcribe.rs
+++ b/compiler/rustc_expand/src/mbe/transcribe.rs
@@ -1,8 +1,8 @@
 use crate::base::ExtCtxt;
-use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, NamedMatch};
+use crate::mbe::macro_parser::{MatchedNtNonTt, MatchedNtTt, MatchedSeq, NamedMatch};
 use crate::mbe::{self, MetaVarExpr};
 use rustc_ast::mut_visit::{self, MutVisitor};
-use rustc_ast::token::{self, NtTT, Token, TokenKind};
+use rustc_ast::token::{self, Nonterminal, Token, TokenKind};
 use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree, TreeAndSpacing};
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::Lrc;
@@ -233,25 +233,29 @@ pub(super) fn transcribe<'a>(
                 // the meta-var.
                 let ident = MacroRulesNormalizedIdent::new(orignal_ident);
                 if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) {
-                    if let MatchedNonterminal(nt) = cur_matched {
-                        let token = if let NtTT(tt) = &**nt {
+                    match cur_matched {
+                        MatchedNtTt(ref tt) => {
                             // `tt`s are emitted into the output stream directly as "raw tokens",
                             // without wrapping them into groups.
-                            tt.clone()
-                        } else {
+                            let token = tt.clone();
+                            result.push(token.into());
+                        }
+                        MatchedNtNonTt(ref nt) => {
                             // Other variables are emitted into the output stream as groups with
                             // `Delimiter::None` to maintain parsing priorities.
                             // `Interpolated` is currently used for such groups in rustc parser.
+                            debug_assert!(!matches!(**nt, Nonterminal::NtTT(_)));
                             marker.visit_span(&mut sp);
-                            TokenTree::token(token::Interpolated(nt.clone()), sp)
-                        };
-                        result.push(token.into());
-                    } else {
-                        // We were unable to descend far enough. This is an error.
-                        return Err(cx.struct_span_err(
-                            sp, /* blame the macro writer */
-                            &format!("variable '{}' is still repeating at this depth", ident),
-                        ));
+                            let token = TokenTree::token(token::Interpolated(nt.clone()), sp);
+                            result.push(token.into());
+                        }
+                        MatchedSeq(..) => {
+                            // We were unable to descend far enough. This is an error.
+                            return Err(cx.struct_span_err(
+                                sp, /* blame the macro writer */
+                                &format!("variable '{}' is still repeating at this depth", ident),
+                            ));
+                        }
                     }
                 } else {
                     // If we aren't able to match the meta-var, we push it back into the result but
@@ -308,7 +312,7 @@ fn lookup_cur_matched<'a>(
         let mut matched = matched;
         for &(idx, _) in repeats {
             match matched {
-                MatchedNonterminal(_) => break,
+                MatchedNtTt(_) | MatchedNtNonTt(_) => break,
                 MatchedSeq(ref ads) => matched = ads.get(idx).unwrap(),
             }
         }
@@ -398,7 +402,7 @@ fn lockstep_iter_size(
             let name = MacroRulesNormalizedIdent::new(name);
             match lookup_cur_matched(name, interpolations, repeats) {
                 Some(matched) => match matched {
-                    MatchedNonterminal(_) => LockstepIterSize::Unconstrained,
+                    MatchedNtTt(_) | MatchedNtNonTt(_) => LockstepIterSize::Unconstrained,
                     MatchedSeq(ref ads) => LockstepIterSize::Constraint(ads.len(), name),
                 },
                 _ => LockstepIterSize::Unconstrained,
@@ -445,7 +449,7 @@ fn count_repetitions<'a>(
         sp: &DelimSpan,
     ) -> PResult<'a, usize> {
         match matched {
-            MatchedNonterminal(_) => {
+            MatchedNtTt(_) | MatchedNtNonTt(_) => {
                 if declared_lhs_depth == 0 {
                     return Err(cx.struct_span_err(
                         sp.entire(),

From cad5f1e7742e46ce16dc54d38516301e3ae72a9e Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 23 Mar 2022 13:09:55 +1100
Subject: [PATCH 3/4] Shrink `NamedMatchVec` to one inline element.

This counters the `NamedMatchVec` size increase from the previous
commit, leaving `NamedMatchVec` smaller than before.
---
 compiler/rustc_expand/src/mbe/macro_parser.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
index f2090899e99a1..63c43cc8a33d3 100644
--- a/compiler/rustc_expand/src/mbe/macro_parser.rs
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -101,11 +101,14 @@ struct MatcherTtFrame<'tt> {
     idx: usize,
 }
 
-type NamedMatchVec = SmallVec<[NamedMatch; 4]>;
+// One element is enough to cover 95-99% of vectors for most benchmarks. Also,
+// vectors longer than one frequently have many elements, not just two or
+// three.
+type NamedMatchVec = SmallVec<[NamedMatch; 1]>;
 
 // This type is used a lot. Make sure it doesn't unintentionally get bigger.
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-rustc_data_structures::static_assert_size!(NamedMatchVec, 168);
+rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
 
 /// Represents a single "position" (aka "matcher position", aka "item"), as
 /// described in the module documentation.

From fdec26ddad3f54c2606afff830d293468edc4c33 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 23 Mar 2022 16:04:33 +1100
Subject: [PATCH 4/4] Shrink `MatcherPosRepetition`.

Currently it copies a `KleeneOp` and a `Token` out of a
`SequenceRepetition`. It's better to store a reference to the
`SequenceRepetition`, which is now possible due to #95159 having changed
the lifetimes.
---
 compiler/rustc_expand/src/mbe/macro_parser.rs | 28 +++++++++----------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
index 63c43cc8a33d3..5e97fc903209f 100644
--- a/compiler/rustc_expand/src/mbe/macro_parser.rs
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -160,7 +160,7 @@ struct MatcherPos<'tt> {
 
 // This type is used a lot. Make sure it doesn't unintentionally get bigger.
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-rustc_data_structures::static_assert_size!(MatcherPos<'_>, 136);
+rustc_data_structures::static_assert_size!(MatcherPos<'_>, 112);
 
 impl<'tt> MatcherPos<'tt> {
     /// `len` `Vec`s (initially shared and empty) that will store matches of metavars.
@@ -209,11 +209,7 @@ impl<'tt> MatcherPos<'tt> {
             match_lo: up.match_cur,
             match_cur: up.match_cur,
             match_hi: up.match_cur + seq.num_captures,
-            repetition: Some(MatcherPosRepetition {
-                up,
-                sep: seq.separator.clone(),
-                seq_op: seq.kleene.op,
-            }),
+            repetition: Some(MatcherPosRepetition { up, seq }),
             stack: smallvec![],
         }
     }
@@ -227,15 +223,12 @@ impl<'tt> MatcherPos<'tt> {
 
 #[derive(Clone)]
 struct MatcherPosRepetition<'tt> {
-    /// The KleeneOp of this sequence.
-    seq_op: mbe::KleeneOp,
-
-    /// The separator.
-    sep: Option<Token>,
-
     /// The "parent" matcher position. That is, the matcher position just before we enter the
     /// sequence.
     up: Box<MatcherPos<'tt>>,
+
+    /// The sequence itself.
+    seq: &'tt SequenceRepetition,
 }
 
 enum EofItems<'tt> {
@@ -559,14 +552,19 @@ impl<'tt> TtParser<'tt> {
                     self.cur_items.push(new_pos);
                 }
 
-                if idx == len && repetition.sep.is_some() {
-                    if repetition.sep.as_ref().map_or(false, |sep| token_name_eq(token, sep)) {
+                if idx == len && repetition.seq.separator.is_some() {
+                    if repetition
+                        .seq
+                        .separator
+                        .as_ref()
+                        .map_or(false, |sep| token_name_eq(token, sep))
+                    {
                         // The matcher has a separator, and it matches the current token. We can
                         // advance past the separator token.
                         item.idx += 1;
                         self.next_items.push(item);
                     }
-                } else if repetition.seq_op != mbe::KleeneOp::ZeroOrOne {
+                } else if repetition.seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
                     // We don't need a separator. Move the "dot" back to the beginning of the
                     // matcher and try to match again UNLESS we are only allowed to have _one_
                     // repetition.