diff --git a/crates/cfg/src/cfg_expr.rs b/crates/cfg/src/cfg_expr.rs index 35c0c89c70cf..b0936cf48f93 100644 --- a/crates/cfg/src/cfg_expr.rs +++ b/crates/cfg/src/cfg_expr.rs @@ -45,8 +45,8 @@ impl From for CfgExpr { impl CfgExpr { #[cfg(feature = "tt")] - pub fn parse(tt: &tt::Subtree) -> CfgExpr { - next_cfg_expr(&mut tt.token_trees.iter()).unwrap_or(CfgExpr::Invalid) + pub fn parse(tt: &tt::TokenStream) -> CfgExpr { + next_cfg_expr(tt.trees()).unwrap_or(CfgExpr::Invalid) } /// Fold the cfg by querying all basic `Atom` and `KeyValue` predicates. @@ -66,48 +66,52 @@ impl CfgExpr { } #[cfg(feature = "tt")] -fn next_cfg_expr(it: &mut std::slice::Iter<'_, tt::TokenTree>) -> Option { +fn next_cfg_expr(mut it: tt::RefTokenTreeCursor<'_, S>) -> Option { use intern::sym; let name = match it.next() { None => return None, - Some(tt::TokenTree::Leaf(tt::Leaf::Ident(ident))) => ident.sym.clone(), + Some(tt::TokenTree::Token(tt::Token { kind: tt::TokenKind::Ident(ident, _), .. }, _)) => { + ident.clone() + } Some(_) => return Some(CfgExpr::Invalid), }; // Peek - let ret = match it.as_slice().first() { - Some(tt::TokenTree::Leaf(tt::Leaf::Punct(punct))) if punct.char == '=' => { - match it.as_slice().get(1) { - Some(tt::TokenTree::Leaf(tt::Leaf::Literal(literal))) => { - it.next(); - it.next(); - CfgAtom::KeyValue { key: name, value: literal.symbol.clone() }.into() + let ret = match it.look_ahead(0) { + Some(tt::TokenTree::Token(tt::Token { kind: tt::TokenKind::Eq, .. }, _)) => { + match it.look_ahead(1) { + Some(tt::TokenTree::Token( + tt::Token { kind: tt::TokenKind::Literal(lit), .. }, + _, + )) => { + let res = CfgAtom::KeyValue { key: name, value: lit.symbol.clone() }.into(); + _ = it.next(); + _ = it.next(); + res } _ => return Some(CfgExpr::Invalid), } } - Some(tt::TokenTree::Subtree(subtree)) => { - it.next(); - let mut sub_it = subtree.token_trees.iter(); - let mut subs = std::iter::from_fn(|| next_cfg_expr(&mut sub_it)); - match name { + Some(tt::TokenTree::Delimited(_, _, _, stream)) => { + let mut subs = std::iter::from_fn(|| next_cfg_expr(stream.trees())); + let res = match name { s if s == sym::all => CfgExpr::All(subs.collect()), s if s == sym::any => CfgExpr::Any(subs.collect()), s if s == sym::not => { CfgExpr::Not(Box::new(subs.next().unwrap_or(CfgExpr::Invalid))) } _ => CfgExpr::Invalid, - } + }; + it.next(); + res } _ => CfgAtom::Flag(name).into(), }; // Eat comma separator - if let Some(tt::TokenTree::Leaf(tt::Leaf::Punct(punct))) = it.as_slice().first() { - if punct.char == ',' { - it.next(); - } + if let Some(tt::TokenTree::Token(tt::Token { kind: tt::TokenKind::Comma, .. }, _)) = it.next() { + it.next(); } Some(ret) } diff --git a/crates/hir-expand/src/db.rs b/crates/hir-expand/src/db.rs index 0d19ae202ce0..ed0f314a2745 100644 --- a/crates/hir-expand/src/db.rs +++ b/crates/hir-expand/src/db.rs @@ -28,7 +28,7 @@ use crate::{ MacroDefId, MacroDefKind, MacroFileId, }; /// This is just to ensure the types of smart_macro_arg and macro_arg are the same -type MacroArgResult = (Arc, SyntaxFixupUndoInfo, Span); +type MacroArgResult = (Arc, SyntaxFixupUndoInfo, Span); /// Total limit on the number of tokens produced by any macro invocation. /// /// If an invocation produces more tokens than this limit, it will not be stored in the database and diff --git a/crates/hir-expand/src/lib.rs b/crates/hir-expand/src/lib.rs index 2ee598dfbfdc..51e22c08e9eb 100644 --- a/crates/hir-expand/src/lib.rs +++ b/crates/hir-expand/src/lib.rs @@ -75,6 +75,7 @@ pub mod tt { pub type Punct = ::tt::Punct; pub type Ident = ::tt::Ident; pub type TokenTree = ::tt::TokenTree; + pub type TokenStream = ::tt::TokenStream; } #[macro_export] diff --git a/crates/mbe/src/lib.rs b/crates/mbe/src/lib.rs index ca10a2be2732..d5debaf7bd71 100644 --- a/crates/mbe/src/lib.rs +++ b/crates/mbe/src/lib.rs @@ -23,15 +23,14 @@ mod tests; use span::{Edition, Span, SyntaxContextId}; use syntax_bridge::to_parser_input; -use tt::iter::TtIter; -use tt::DelimSpan; +use tt::{DelimSpan, RefTokenCursor, Token, TokenCursor, TokenKind, TokenStream}; use std::fmt; use std::sync::Arc; use crate::parser::{MetaTemplate, MetaVarKind, Op}; -pub use tt::{Delimiter, DelimiterKind, Punct}; +pub use tt::Delimiter; #[derive(Debug, PartialEq, Eq, Clone)] pub enum ParseError { @@ -148,18 +147,18 @@ impl DeclarativeMacro { /// The old, `macro_rules! m {}` flavor. pub fn parse_macro_rules( - tt: &tt::Subtree, + stream: &TokenStream, ctx_edition: impl Copy + Fn(SyntaxContextId) -> Edition, ) -> DeclarativeMacro { // Note: this parsing can be implemented using mbe machinery itself, by // matching against `$($lhs:tt => $rhs:tt);*` pattern, but implementing // manually seems easier. - let mut src = TtIter::new(tt); + let mut cursor = RefTokenCursor::new(stream); let mut rules = Vec::new(); let mut err = None; - while src.len() > 0 { - let rule = match Rule::parse(ctx_edition, &mut src) { + while let Some((token, _)) = cursor.next() { + let rule = match Rule::parse(ctx_edition, &mut cursor) { Ok(it) => it, Err(e) => { err = Some(Box::new(e)); @@ -167,11 +166,13 @@ impl DeclarativeMacro { } }; rules.push(rule); - if let Err(()) = src.expect_char(';') { - if src.len() > 0 { + match cursor.next() { + Some((Token { kind: TokenKind::Semi, .. }, _)) => (), + Some((Token { span, .. }, _)) => { err = Some(Box::new(ParseError::expected("expected `;`"))); + break; } - break; + None => break, } } @@ -187,8 +188,8 @@ impl DeclarativeMacro { /// The new, unstable `macro m {}` flavor. pub fn parse_macro2( - args: Option<&tt::Subtree>, - body: &tt::Subtree, + args: Option<&tt::TokenStream>, + body: &tt::TokenStream, ctx_edition: impl Copy + Fn(SyntaxContextId) -> Edition, ) -> DeclarativeMacro { let mut rules = Vec::new(); @@ -210,9 +211,9 @@ impl DeclarativeMacro { } } else { cov_mark::hit!(parse_macro_def_rules); - let mut src = TtIter::new(body); - while src.len() > 0 { - let rule = match Rule::parse(ctx_edition, &mut src) { + let mut cursor = RefTokenCursor::new(body); + while let Some((token, _)) = cursor.next() { + let rule = match Rule::parse(ctx_edition, &mut cursor) { Ok(it) => it, Err(e) => { err = Some(Box::new(e)); @@ -220,13 +221,15 @@ impl DeclarativeMacro { } }; rules.push(rule); - if let Err(()) = src.expect_any_char(&[';', ',']) { - if src.len() > 0 { + match cursor.next() { + Some((Token { kind: TokenKind::Semi | TokenKind::Comma, .. }, _)) => (), + Some((Token { span, .. }, _)) => { err = Some(Box::new(ParseError::expected( "expected `;` or `,` to delimit rules", ))); + break; } - break; + None => break, } } } @@ -251,11 +254,11 @@ impl DeclarativeMacro { pub fn expand( &self, - tt: &tt::Subtree, + tt: &tt::TokenStream, marker: impl Fn(&mut Span) + Copy, call_site: Span, def_site_edition: Edition, - ) -> ExpandResult<(tt::Subtree, MatchedArmIndex)> { + ) -> ExpandResult<(tt::TokenStream, MatchedArmIndex)> { expander::expand_rules(&self.rules, tt, marker, call_site, def_site_edition) } } @@ -263,7 +266,7 @@ impl DeclarativeMacro { impl Rule { fn parse( edition: impl Copy + Fn(SyntaxContextId) -> Edition, - src: &mut TtIter<'_, Span>, + src: &mut RefTokenCursor<'_, Span>, ) -> Result { let lhs = src.expect_subtree().map_err(|()| ParseError::expected("expected subtree"))?; src.expect_char('=').map_err(|()| ParseError::expected("expected `=`"))?; @@ -360,31 +363,26 @@ impl From> for ValueResult { } pub fn expect_fragment( - tt_iter: &mut TtIter<'_, Span>, + cursor: &mut RefTokenCursor<'_, Span>, entry_point: ::parser::PrefixEntryPoint, edition: ::parser::Edition, delim_span: DelimSpan, ) -> ExpandResult>> { use ::parser; - let buffer = tt::buffer::TokenBuffer::from_tokens(tt_iter.as_slice()); - let parser_input = to_parser_input(edition, &buffer); + let parser_input = to_parser_input(edition, cursor.clone()); let tree_traversal = entry_point.parse(&parser_input, edition); - let mut cursor = buffer.begin(); let mut error = false; for step in tree_traversal.iter() { match step { parser::Step::Token { kind, mut n_input_tokens } => { - if kind == ::parser::SyntaxKind::LIFETIME_IDENT { - n_input_tokens = 2; - } for _ in 0..n_input_tokens { - cursor = cursor.bump_subtree(); + cursor.next(); } } parser::Step::FloatSplit { .. } => { // FIXME: We need to split the tree properly here, but mutating the token trees // in the buffer is somewhat tricky to pull off. - cursor = cursor.bump_subtree(); + cursor.next(); } parser::Step::Enter { .. } | parser::Step::Exit => (), parser::Step::Error { .. } => error = true, @@ -409,7 +407,7 @@ pub fn expect_fragment( curr = curr.bump(); } - *tt_iter = TtIter::new_iter(tt_iter.as_slice()[res.len()..].iter()); + *cursor = TtIter::new_iter(cursor.as_slice()[res.len()..].iter()); let res = match &*res { [] | [_] => res.pop(), [first, ..] => Some(tt::TokenTree::Subtree(tt::Subtree { diff --git a/crates/syntax-bridge/src/lib.rs b/crates/syntax-bridge/src/lib.rs index 3a05b83e4970..8be84937c7aa 100644 --- a/crates/syntax-bridge/src/lib.rs +++ b/crates/syntax-bridge/src/lib.rs @@ -2,7 +2,7 @@ use std::fmt; -use intern::Symbol; +use intern::{sym, Symbol}; use rustc_hash::{FxHashMap, FxHashSet}; use span::{Edition, SpanAnchor, SpanData, SpanMap}; use stdx::{format_to, never, non_empty_vec::NonEmptyVec}; @@ -13,8 +13,8 @@ use syntax::{ SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, WalkEvent, T, }; use tt::{ - buffer::{Cursor, TokenBuffer}, - token_to_literal, + token_to_literal, BinOpToken, Delimiter, IdentIsRaw, RefTokenCursor, Spacing, TokenKind, + TokenStream, }; pub mod prettify_macro_expansion; @@ -99,13 +99,13 @@ pub fn syntax_node_to_token_tree( map: SpanMap, span: SpanData, mode: DocCommentDesugarMode, -) -> tt::Subtree> +) -> tt::TokenStream> where SpanData: Copy + fmt::Debug, SpanMap: SpanMapper>, { let mut c = Converter::new(node, map, Default::default(), Default::default(), span, mode); - convert_tokens(&mut c) + convert_to_token_tree(&mut c) } /// Converts a syntax tree to a [`tt::Subtree`] using the provided span map to populate the @@ -114,17 +114,17 @@ where pub fn syntax_node_to_token_tree_modified( node: &SyntaxNode, map: SpanMap, - append: FxHashMap>>>, + append: FxHashMap>>>, remove: FxHashSet, call_site: SpanData, mode: DocCommentDesugarMode, -) -> tt::Subtree> +) -> tt::TokenStream> where SpanMap: SpanMapper>, SpanData: Copy + fmt::Debug, { let mut c = Converter::new(node, map, append, remove, call_site, mode); - convert_tokens(&mut c) + convert_to_token_tree(&mut c) } // The following items are what `rustc` macro can be parsed into : @@ -139,10 +139,10 @@ where // * AssocItems(SmallVec<[ast::AssocItem; 1]>) // * ForeignItems(SmallVec<[ast::ForeignItem; 1]> -/// Converts a [`tt::Subtree`] back to a [`SyntaxNode`]. +/// Converts a [`tt::TokenStream`] back to a [`SyntaxNode`]. /// The produced `SpanMap` contains a mapping from the syntax nodes offsets to the subtree's spans. pub fn token_tree_to_syntax_node( - tt: &tt::Subtree>, + tt: &tt::TokenStream>, entry_point: parser::TopEntryPoint, edition: parser::Edition, ) -> (Parse, SpanMap) @@ -150,23 +150,17 @@ where SpanData: Copy + fmt::Debug, Ctx: PartialEq, { - let buffer = match tt { - tt::Subtree { - delimiter: tt::Delimiter { kind: tt::DelimiterKind::Invisible, .. }, - token_trees, - } => TokenBuffer::from_tokens(token_trees), - _ => TokenBuffer::from_subtree(tt), - }; - let parser_input = to_parser_input(edition, &buffer); + let parser_input = to_parser_input(edition, RefTokenCursor::new(tt)); let parser_output = entry_point.parse(&parser_input, edition); - let mut tree_sink = TtTreeSink::new(buffer.begin()); + let mut tree_sink = TtTreeSink::new(RefTokenCursor::new(tt)); for event in parser_output.iter() { match event { parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => { tree_sink.token(kind, n_raw_tokens) } parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => { - tree_sink.float_split(has_pseudo_dot) + todo!() + // tree_sink.float_split(has_pseudo_dot) } parser::Step::Enter { kind } => tree_sink.start_node(kind), parser::Step::Exit => tree_sink.finish_node(), @@ -183,7 +177,7 @@ pub fn parse_to_token_tree( anchor: SpanAnchor, ctx: Ctx, text: &str, -) -> Option>> +) -> Option>> where SpanData: Copy + fmt::Debug, Ctx: Copy, @@ -194,7 +188,7 @@ where } let mut conv = RawConverter { lexed, anchor, pos: 0, ctx, mode: DocCommentDesugarMode::ProcMacro }; - Some(convert_tokens(&mut conv)) + Some(convert_to_token_tree(&mut conv)) } /// Convert a string to a `TokenTree`. The passed span will be used for all spans of the produced subtree. @@ -202,7 +196,7 @@ pub fn parse_to_token_tree_static_span( edition: Edition, span: S, text: &str, -) -> Option> +) -> Option> where S: Copy + fmt::Debug, { @@ -212,165 +206,162 @@ where } let mut conv = StaticRawConverter { lexed, pos: 0, span, mode: DocCommentDesugarMode::ProcMacro }; - Some(convert_tokens(&mut conv)) + Some(convert_to_token_tree(&mut conv)) +} + +struct TokenStreamBuilder { + tt: Vec>, } -fn convert_tokens(conv: &mut C) -> tt::Subtree +fn convert_to_token_tree(conv: &mut C) -> tt::TokenStream where C: TokenConverter, S: Copy + fmt::Debug, - C::Token: fmt::Debug, { - let entry = tt::SubtreeBuilder { - delimiter: tt::Delimiter::invisible_spanned(conv.call_site()), - token_trees: vec![], - }; - let mut stack = NonEmptyVec::new(entry); - - while let Some((token, abs_range)) = conv.bump() { - let tt::SubtreeBuilder { delimiter, token_trees } = stack.last_mut(); - - let tt = match token.as_leaf() { - Some(leaf) => tt::TokenTree::Leaf(leaf.clone()), - None => match token.kind(conv) { - // Desugar doc comments into doc attributes - COMMENT => { - let span = conv.span_for(abs_range); - if let Some(tokens) = conv.convert_doc_comment(&token, span) { - token_trees.extend(tokens); - } - continue; - } - kind if kind.is_punct() && kind != UNDERSCORE => { - let expected = match delimiter.kind { - tt::DelimiterKind::Parenthesis => Some(T![')']), - tt::DelimiterKind::Brace => Some(T!['}']), - tt::DelimiterKind::Bracket => Some(T![']']), - tt::DelimiterKind::Invisible => None, - }; - - // Current token is a closing delimiter that we expect, fix up the closing span - // and end the subtree here - if matches!(expected, Some(expected) if expected == kind) { - if let Some(mut subtree) = stack.pop() { - subtree.delimiter.close = conv.span_for(abs_range); - stack.last_mut().token_trees.push(subtree.build().into()); - } - continue; - } + let mut stack = NonEmptyVec::new((None, vec![])); - let delim = match kind { - T!['('] => Some(tt::DelimiterKind::Parenthesis), - T!['{'] => Some(tt::DelimiterKind::Brace), - T!['['] => Some(tt::DelimiterKind::Bracket), - _ => None, - }; - - // Start a new subtree - if let Some(kind) = delim { - let open = conv.span_for(abs_range); - stack.push(tt::SubtreeBuilder { - delimiter: tt::Delimiter { - open, - // will be overwritten on subtree close above - close: open, - kind, - }, - token_trees: vec![], - }); - continue; - } + while let Some((token, abs_range, text)) = conv.bump() { + let (b, tt) = stack.last_mut(); - let spacing = match conv.peek().map(|next| next.kind(conv)) { - Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint, - _ => tt::Spacing::Alone, - }; - let Some(char) = token.to_char(conv) else { - panic!("Token from lexer must be single char: token = {token:#?}") - }; - tt::Leaf::from(tt::Punct { char, spacing, span: conv.span_for(abs_range) }) - .into() - } - kind => { - macro_rules! make_ident { - () => { - tt::Ident { - span: conv.span_for(abs_range), - sym: Symbol::intern(&token.to_text(conv)), - is_raw: tt::IdentIsRaw::No, - } - .into() - }; - } - let leaf: tt::Leaf<_> = match kind { - k if k.is_any_identifier() => { - let text = token.to_text(conv); - tt::Ident::new(&text, conv.span_for(abs_range)).into() - } - UNDERSCORE => make_ident!(), - k if k.is_literal() => { - let text = token.to_text(conv); - let span = conv.span_for(abs_range); - token_to_literal(&text, span).into() - } - LIFETIME_IDENT => { - let apostrophe = tt::Leaf::from(tt::Punct { - char: '\'', - spacing: tt::Spacing::Joint, - span: conv - .span_for(TextRange::at(abs_range.start(), TextSize::of('\''))), - }); - token_trees.push(apostrophe.into()); - - let ident = tt::Leaf::from(tt::Ident { - sym: Symbol::intern(&token.to_text(conv)[1..]), - span: conv.span_for(TextRange::new( - abs_range.start() + TextSize::of('\''), - abs_range.end(), - )), - is_raw: tt::IdentIsRaw::No, - }); - token_trees.push(ident.into()); - continue; - } - _ => continue, - }; + let kind = match token { + L_PAREN => { + stack.push((Some((Delimiter::Parenthesis, conv.span_for(abs_range))), vec![])); + continue; + } + L_CURLY => { + stack.push((Some((Delimiter::Brace, conv.span_for(abs_range))), vec![])); + continue; + } + L_BRACK => { + stack.push((Some((Delimiter::Bracket, conv.span_for(abs_range))), vec![])); + continue; + } - leaf.into() + R_CURLY if matches!(b, Some((Delimiter::Brace, _))) => { + if let Some((Some((delim, open)), tt)) = stack.pop() { + stack.last_mut().1.push(tt::TokenTree::Delimited( + tt::DelimSpan { open, close: conv.span_for(abs_range) }, + tt::DelimSpacing::new(Spacing::Alone, Spacing::Alone), + delim, + TokenStream(tt.into_boxed_slice()), + )); } - }, + continue; + } + R_PAREN if matches!(b, Some((Delimiter::Parenthesis, _))) => { + if let Some((Some((delim, open)), tt)) = stack.pop() { + stack.last_mut().1.push(tt::TokenTree::Delimited( + tt::DelimSpan { open, close: conv.span_for(abs_range) }, + tt::DelimSpacing::new(Spacing::Alone, Spacing::Alone), + delim, + TokenStream(tt.into_boxed_slice()), + )); + } + continue; + } + R_BRACK if matches!(b, Some((Delimiter::Bracket, _))) => { + if let Some((Some((delim, open)), tt)) = stack.pop() { + stack.last_mut().1.push(tt::TokenTree::Delimited( + tt::DelimSpan { open, close: conv.span_for(abs_range) }, + tt::DelimSpacing::new(Spacing::Alone, Spacing::Alone), + delim, + TokenStream(tt.into_boxed_slice()), + )); + } + continue; + } + + L_ANGLE => TokenKind::Lt, + R_ANGLE => TokenKind::Gt, + + DOLLAR => TokenKind::Dollar, + SEMICOLON => TokenKind::Semi, + COMMA => TokenKind::Comma, + AT => TokenKind::At, + POUND => TokenKind::Pound, + TILDE => TokenKind::Tilde, + QUESTION => TokenKind::Question, + AMP => TokenKind::BinOp(BinOpToken::And), + PIPE => TokenKind::BinOp(BinOpToken::Or), + PLUS => TokenKind::BinOp(BinOpToken::Plus), + STAR => TokenKind::BinOp(BinOpToken::Star), + SLASH => TokenKind::BinOp(BinOpToken::Slash), + CARET => TokenKind::BinOp(BinOpToken::Caret), + PERCENT => TokenKind::BinOp(BinOpToken::Percent), + MINUS => TokenKind::BinOp(BinOpToken::Minus), + UNDERSCORE => TokenKind::Ident(sym::underscore.clone(), tt::IdentIsRaw::No), + DOT => TokenKind::Dot, + DOT2 => unreachable!(), + DOT3 => unreachable!(), + DOT2EQ => unreachable!(), + COLON => TokenKind::Colon, + COLON2 => unreachable!(), + EQ => TokenKind::Eq, + EQ2 => TokenKind::EqEq, + FAT_ARROW => TokenKind::FatArrow, + BANG => TokenKind::Not, + NEQ => TokenKind::Ne, + THIN_ARROW => TokenKind::RArrow, + LTEQ => TokenKind::Le, + GTEQ => TokenKind::Ge, + PLUSEQ => unreachable!(), + MINUSEQ => unreachable!(), + PIPEEQ => unreachable!(), + AMPEQ => unreachable!(), + CARETEQ => unreachable!(), + SLASHEQ => unreachable!(), + STAREQ => unreachable!(), + PERCENTEQ => unreachable!(), + AMP2 => unreachable!(), + PIPE2 => unreachable!(), + SHL => TokenKind::Lt, + SHR => TokenKind::Gt, + SHLEQ => unreachable!(), + SHREQ => unreachable!(), + // FIXME: split up (raw) c string literals to an ident and a string literal when edition < 2021. + k if k.is_literal() => TokenKind::Literal(Box::new(token_to_literal(&text))), + // FIXME: Doc desugaring + COMMENT => continue, + LIFETIME_IDENT => TokenKind::Lifetime(Symbol::intern(text)), + ident if ident.is_any_identifier() => { + let (raw, sym) = IdentIsRaw::split_from_symbol(text); + TokenKind::Ident(Symbol::intern(sym), raw) + } + WHITESPACE => continue, + kind => unreachable!("{kind:?}"), + }; + let spacing = match conv.peek() { + Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint, + _ => tt::Spacing::Alone, }; - token_trees.push(tt); + tt.push(tt::TokenTree::Token(tt::Token::new(kind, conv.span_for(abs_range)), spacing)); } // If we get here, we've consumed all input tokens. // We might have more than one subtree in the stack, if the delimiters are improperly balanced. // Merge them so we're left with one. - while let Some(entry) = stack.pop() { - let parent = stack.last_mut(); - - let leaf: tt::Leaf<_> = tt::Punct { - span: entry.delimiter.open, - char: match entry.delimiter.kind { - tt::DelimiterKind::Parenthesis => '(', - tt::DelimiterKind::Brace => '{', - tt::DelimiterKind::Bracket => '[', - tt::DelimiterKind::Invisible => '$', - }, - spacing: tt::Spacing::Alone, - } - .into(); - parent.token_trees.push(leaf.into()); - parent.token_trees.extend(entry.token_trees); - } - - let subtree = stack.into_last().build(); - if let [tt::TokenTree::Subtree(first)] = &*subtree.token_trees { - first.clone() - } else { - subtree - } + // while let Some(entry) = stack.pop() { + // let parent = stack.last_mut(); + + // let leaf: tt::Leaf<_> = tt::Punct { + // span: entry.delimiter.open, + // char: match entry.delimiter.kind { + // tt::DelimiterKind::Parenthesis => '(', + // tt::DelimiterKind::Brace => '{', + // tt::DelimiterKind::Bracket => '[', + // tt::DelimiterKind::Invisible => '$', + // }, + // spacing: tt::Spacing::Alone, + // } + // .into(); + // parent.token_trees.push(leaf.into()); + // parent.token_trees.extend(entry.token_trees); + // } + + let (_delim, token_trees) = stack.into_last(); + assert!(_delim.is_none()); + TokenStream(token_trees.into_boxed_slice()) } fn is_single_token_op(kind: SyntaxKind) -> bool { @@ -432,60 +423,60 @@ pub fn desugar_doc_comment_text(text: &str, mode: DocCommentDesugarMode) -> (Sym } } -fn convert_doc_comment( - token: &syntax::SyntaxToken, - span: S, - mode: DocCommentDesugarMode, -) -> Option>> { - let comment = ast::Comment::cast(token.clone())?; - let doc = comment.kind().doc?; - - let mk_ident = |s: &str| { - tt::TokenTree::from(tt::Leaf::from(tt::Ident { - sym: Symbol::intern(s), - span, - is_raw: tt::IdentIsRaw::No, - })) - }; - - let mk_punct = |c: char| { - tt::TokenTree::from(tt::Leaf::from(tt::Punct { - char: c, - spacing: tt::Spacing::Alone, - span, - })) - }; - - let mk_doc_literal = |comment: &ast::Comment| { - let prefix_len = comment.prefix().len(); - let mut text = &comment.text()[prefix_len..]; - - // Remove ending "*/" - if comment.kind().shape == ast::CommentShape::Block { - text = &text[0..text.len() - 2]; - } - let (text, kind) = desugar_doc_comment_text(text, mode); - let lit = tt::Literal { symbol: text, span, kind, suffix: None }; - - tt::TokenTree::from(tt::Leaf::from(lit)) - }; - - // Make `doc="\" Comments\"" - let meta_tkns = Box::new([mk_ident("doc"), mk_punct('='), mk_doc_literal(&comment)]); - - // Make `#![]` - let mut token_trees = Vec::with_capacity(3); - token_trees.push(mk_punct('#')); - if let ast::CommentPlacement::Inner = doc { - token_trees.push(mk_punct('!')); - } - token_trees.push(tt::TokenTree::from(tt::Subtree { - delimiter: tt::Delimiter { open: span, close: span, kind: tt::DelimiterKind::Bracket }, - token_trees: meta_tkns, - })); - - Some(token_trees) -} +// fn convert_doc_comment( +// token: &syntax::SyntaxToken, +// span: S, +// mode: DocCommentDesugarMode, +// ) -> Option>> { +// let comment = ast::Comment::cast(token.clone())?; +// let doc = comment.kind().doc?; + +// let mk_ident = |s: &str| { +// tt::TokenTree::from(tt::Leaf::from(tt::Ident { +// sym: Symbol::intern(s), +// span, +// is_raw: tt::IdentIsRaw::No, +// })) +// }; + +// let mk_punct = |c: char| { +// tt::TokenTree::from(tt::Leaf::from(tt::Punct { +// char: c, +// spacing: tt::Spacing::Alone, +// span, +// })) +// }; + +// let mk_doc_literal = |comment: &ast::Comment| { +// let prefix_len = comment.prefix().len(); +// let mut text = &comment.text()[prefix_len..]; + +// // Remove ending "*/" +// if comment.kind().shape == ast::CommentShape::Block { +// text = &text[0..text.len() - 2]; +// } +// let (text, kind) = desugar_doc_comment_text(text, mode); +// let lit = tt::Literal { symbol: text, span, kind, suffix: None }; + +// tt::TokenTree::from(tt::Leaf::from(lit)) +// }; + +// // Make `doc="\" Comments\"" +// let meta_tkns = Box::new([mk_ident("doc"), mk_punct('='), mk_doc_literal(&comment)]); + +// // Make `#![]` +// let mut token_trees = Vec::with_capacity(3); +// token_trees.push(mk_punct('#')); +// if let ast::CommentPlacement::Inner = doc { +// token_trees.push(mk_punct('!')); +// } +// token_trees.push(tt::TokenTree::from(tt::Subtree { +// delimiter: tt::Delimiter { open: span, close: span, kind: tt::DelimiterKind::Bracket }, +// token_trees: meta_tkns, +// })); + +// Some(token_trees) +// } /// A raw token (straight from lexer) converter struct RawConverter<'a, Ctx> { @@ -503,76 +494,32 @@ struct StaticRawConverter<'a, S> { mode: DocCommentDesugarMode, } -trait SrcToken { - fn kind(&self, ctx: &Ctx) -> SyntaxKind; - - fn to_char(&self, ctx: &Ctx) -> Option; - - fn to_text(&self, ctx: &Ctx) -> SmolStr; - - fn as_leaf(&self) -> Option<&tt::Leaf> { - None - } -} - trait TokenConverter: Sized { - type Token: SrcToken; + // fn convert_doc_comment(&self, token: &Self::Token, span: S) -> Option>>; - fn convert_doc_comment(&self, token: &Self::Token, span: S) -> Option>>; + fn bump(&mut self) -> Option<(SyntaxKind, TextRange, &str)>; - fn bump(&mut self) -> Option<(Self::Token, TextRange)>; - - fn peek(&self) -> Option; + fn peek(&self) -> Option; fn span_for(&self, range: TextRange) -> S; fn call_site(&self) -> S; } -impl SrcToken, S> for usize { - fn kind(&self, ctx: &RawConverter<'_, Ctx>) -> SyntaxKind { - ctx.lexed.kind(*self) - } - - fn to_char(&self, ctx: &RawConverter<'_, Ctx>) -> Option { - ctx.lexed.text(*self).chars().next() - } - - fn to_text(&self, ctx: &RawConverter<'_, Ctx>) -> SmolStr { - ctx.lexed.text(*self).into() - } -} - -impl SrcToken, S> for usize { - fn kind(&self, ctx: &StaticRawConverter<'_, S>) -> SyntaxKind { - ctx.lexed.kind(*self) - } - - fn to_char(&self, ctx: &StaticRawConverter<'_, S>) -> Option { - ctx.lexed.text(*self).chars().next() - } - - fn to_text(&self, ctx: &StaticRawConverter<'_, S>) -> SmolStr { - ctx.lexed.text(*self).into() - } -} - impl TokenConverter> for RawConverter<'_, Ctx> where SpanData: Copy, { - type Token = usize; - - fn convert_doc_comment( - &self, - &token: &usize, - span: SpanData, - ) -> Option>>> { - let text = self.lexed.text(token); - convert_doc_comment(&doc_comment(text), span, self.mode) - } - - fn bump(&mut self) -> Option<(Self::Token, TextRange)> { + // fn convert_doc_comment( + // &self, + // &token: &usize, + // span: SpanData, + // ) -> Option>>> { + // let text = self.lexed.text(token); + // convert_doc_comment(&doc_comment(text), span, self.mode) + // } + + fn bump(&mut self) -> Option<(SyntaxKind, TextRange, &str)> { if self.pos == self.lexed.len() { return None; } @@ -581,14 +528,14 @@ where let range = self.lexed.text_range(token); let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?); - Some((token, range)) + Some((self.lexed.kind(token), range, self.lexed.text(token))) } - fn peek(&self) -> Option { + fn peek(&self) -> Option { if self.pos == self.lexed.len() { return None; } - Some(self.pos) + Some(self.lexed.kind(self.pos)) } fn span_for(&self, range: TextRange) -> SpanData { @@ -604,14 +551,12 @@ impl TokenConverter for StaticRawConverter<'_, S> where S: Copy, { - type Token = usize; + // fn convert_doc_comment(&self, &token: &usize, span: S) -> Option>> { + // let text = self.lexed.text(token); + // convert_doc_comment(&doc_comment(text), span, self.mode) + // } - fn convert_doc_comment(&self, &token: &usize, span: S) -> Option>> { - let text = self.lexed.text(token); - convert_doc_comment(&doc_comment(text), span, self.mode) - } - - fn bump(&mut self) -> Option<(Self::Token, TextRange)> { + fn bump(&mut self) -> Option<(SyntaxKind, TextRange, &str)> { if self.pos == self.lexed.len() { return None; } @@ -620,14 +565,14 @@ where let range = self.lexed.text_range(token); let range = TextRange::new(range.start.try_into().ok()?, range.end.try_into().ok()?); - Some((token, range)) + Some((self.lexed.kind(self.pos), range, self.lexed.text(self.pos))) } - fn peek(&self) -> Option { + fn peek(&self) -> Option { if self.pos == self.lexed.len() { return None; } - Some(self.pos) + Some(self.lexed.kind(self.pos)) } fn span_for(&self, _: TextRange) -> S { @@ -641,13 +586,13 @@ where struct Converter { current: Option, - current_leaves: Vec>, + current_leaves: Vec>, preorder: PreorderWithTokens, range: TextRange, punct_offset: Option<(SyntaxToken, TextSize)>, /// Used to make the emitted text ranges in the spans relative to the span anchor. map: SpanMap, - append: FxHashMap>>, + append: FxHashMap>>, remove: FxHashSet, call_site: S, mode: DocCommentDesugarMode, @@ -657,7 +602,7 @@ impl Converter { fn new( node: &SyntaxNode, map: SpanMap, - append: FxHashMap>>, + append: FxHashMap>>, remove: FxHashSet, call_site: S, mode: DocCommentDesugarMode, @@ -714,70 +659,16 @@ impl Converter { } } -#[derive(Debug)] -enum SynToken { - Ordinary(SyntaxToken), - Punct { token: SyntaxToken, offset: usize }, - Leaf(tt::Leaf), -} - -impl SynToken { - fn token(&self) -> &SyntaxToken { - match self { - SynToken::Ordinary(it) | SynToken::Punct { token: it, offset: _ } => it, - SynToken::Leaf(_) => unreachable!(), - } - } -} - -impl SrcToken, S> for SynToken { - fn kind(&self, _ctx: &Converter) -> SyntaxKind { - match self { - SynToken::Ordinary(token) => token.kind(), - SynToken::Punct { token, offset: i } => { - SyntaxKind::from_char(token.text().chars().nth(*i).unwrap()).unwrap() - } - SynToken::Leaf(_) => { - never!(); - SyntaxKind::ERROR - } - } - } - fn to_char(&self, _ctx: &Converter) -> Option { - match self { - SynToken::Ordinary(_) => None, - SynToken::Punct { token: it, offset: i } => it.text().chars().nth(*i), - SynToken::Leaf(_) => None, - } - } - fn to_text(&self, _ctx: &Converter) -> SmolStr { - match self { - SynToken::Ordinary(token) | SynToken::Punct { token, offset: _ } => token.text().into(), - SynToken::Leaf(_) => { - never!(); - "".into() - } - } - } - fn as_leaf(&self) -> Option<&tt::Leaf> { - match self { - SynToken::Ordinary(_) | SynToken::Punct { .. } => None, - SynToken::Leaf(it) => Some(it), - } - } -} - impl TokenConverter for Converter where S: Copy, SpanMap: SpanMapper, { - type Token = SynToken; - fn convert_doc_comment(&self, token: &Self::Token, span: S) -> Option>> { - convert_doc_comment(token.token(), span, self.mode) - } + // fn convert_doc_comment(&self, token: &Self::Token, span: S) -> Option>> { + // convert_doc_comment(token.token(), span, self.mode) + // } - fn bump(&mut self) -> Option<(Self::Token, TextRange)> { + fn bump(&mut self) -> Option<(SyntaxKind, TextRange, &str)> { if let Some((punct, offset)) = self.punct_offset.clone() { if usize::from(offset) + 1 < punct.text().len() { let offset = offset + TextSize::of('.'); @@ -785,18 +676,21 @@ where self.punct_offset = Some((punct.clone(), offset)); let range = TextRange::at(range.start() + offset, TextSize::of('.')); return Some(( - SynToken::Punct { token: punct, offset: u32::from(offset) as usize }, + SyntaxKind::from_char(punct.text().chars().nth(offset.into()).unwrap()) + .unwrap(), range, + "", )); } } - if let Some(leaf) = self.current_leaves.pop() { - if self.current_leaves.is_empty() { - self.current = self.next_token(); - } - return Some((SynToken::Leaf(leaf), TextRange::empty(TextSize::new(0)))); - } + // FIXME bring this back + // if let Some(leaf) = self.current_leaves.pop() { + // if self.current_leaves.is_empty() { + // self.current = self.next_token(); + // } + // return Some((SynToken::Leaf(leaf), TextRange::empty(TextSize::new(0)))); + // } let curr = self.current.clone()?; if !self.range.contains_range(curr.text_range()) { @@ -808,21 +702,29 @@ where self.punct_offset = Some((curr.clone(), 0.into())); let range = curr.text_range(); let range = TextRange::at(range.start(), TextSize::of('.')); - (SynToken::Punct { token: curr, offset: 0_usize }, range) + (SyntaxKind::from_char(curr.text().chars().next().unwrap()).unwrap(), range, "") } else { self.punct_offset = None; let range = curr.text_range(); - (SynToken::Ordinary(curr), range) + ( + curr.kind(), + range, + // FIXME: lifetimes begone + unsafe { std::mem::transmute::<&str, &str>(curr.text()) }, + ) }; Some(token) } - fn peek(&self) -> Option { + fn peek(&self) -> Option { if let Some((punct, mut offset)) = self.punct_offset.clone() { offset += TextSize::of('.'); if usize::from(offset) < punct.text().len() { - return Some(SynToken::Punct { token: punct, offset: usize::from(offset) }); + return Some( + SyntaxKind::from_char(punct.text().chars().nth(offset.into()).unwrap()) + .unwrap(), + ); } } @@ -832,9 +734,9 @@ where } let token = if curr.kind().is_punct() { - SynToken::Punct { token: curr, offset: 0_usize } + SyntaxKind::from_char(curr.text().chars().next().unwrap()).unwrap() } else { - SynToken::Ordinary(curr) + curr.kind() }; Some(token) } @@ -852,23 +754,25 @@ where SpanData: Copy, { buf: String, - cursor: Cursor<'a, SpanData>, + cursor: tt::RefTokenCursor<'a, SpanData>, text_pos: TextSize, inner: SyntaxTreeBuilder, token_map: SpanMap, + needs_spacing_if_punct: bool, } impl<'a, Ctx> TtTreeSink<'a, Ctx> where SpanData: Copy, { - fn new(cursor: Cursor<'a, SpanData>) -> Self { + fn new(cursor: tt::RefTokenCursor<'a, SpanData>) -> Self { TtTreeSink { buf: String::new(), cursor, text_pos: 0.into(), inner: SyntaxTreeBuilder::default(), token_map: SpanMap::empty(), + needs_spacing_if_punct: false, } } @@ -878,12 +782,12 @@ where } } -fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> { +fn delim_to_str(d: tt::Delimiter, closing: bool) -> Option<&'static str> { let texts = match d { - tt::DelimiterKind::Parenthesis => "()", - tt::DelimiterKind::Brace => "{}", - tt::DelimiterKind::Bracket => "[]", - tt::DelimiterKind::Invisible => return None, + tt::Delimiter::Parenthesis => "()", + tt::Delimiter::Brace => "{}", + tt::Delimiter::Bracket => "[]", + tt::Delimiter::Invisible => return None, }; let idx = closing as usize; @@ -895,126 +799,149 @@ where SpanData: Copy + fmt::Debug, Ctx: PartialEq, { - /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween. - /// This occurs when a float literal is used as a field access. - fn float_split(&mut self, has_pseudo_dot: bool) { - let (text, span) = match self.cursor.token_tree() { - Some(tt::buffer::TokenTreeRef::Leaf( - tt::Leaf::Literal(tt::Literal { - symbol: text, - span, - kind: tt::LitKind::Float, - suffix: _, - }), - _, - )) => (text.as_str(), *span), - tt => unreachable!("{tt:?}"), - }; - // FIXME: Span splitting - match text.split_once('.') { - Some((left, right)) => { - assert!(!left.is_empty()); - - self.inner.start_node(SyntaxKind::NAME_REF); - self.inner.token(SyntaxKind::INT_NUMBER, left); - self.inner.finish_node(); - self.token_map.push(self.text_pos + TextSize::of(left), span); - - // here we move the exit up, the original exit has been deleted in process - self.inner.finish_node(); - - self.inner.token(SyntaxKind::DOT, "."); - self.token_map.push(self.text_pos + TextSize::of(left) + TextSize::of("."), span); - - if has_pseudo_dot { - assert!(right.is_empty(), "{left}.{right}"); - } else { - assert!(!right.is_empty(), "{left}.{right}"); - self.inner.start_node(SyntaxKind::NAME_REF); - self.inner.token(SyntaxKind::INT_NUMBER, right); - self.token_map.push(self.text_pos + TextSize::of(text), span); - self.inner.finish_node(); - - // the parser creates an unbalanced start node, we are required to close it here - self.inner.finish_node(); - } - self.text_pos += TextSize::of(text); - } - None => unreachable!(), - } - self.cursor = self.cursor.bump(); - } + // /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween. + // /// This occurs when a float literal is used as a field access. + // fn float_split(&mut self, has_pseudo_dot: bool) { + // let (text, span) = match self.cursor.token_tree() { + // Some(tt::buffer::TokenTreeRef::Leaf( + // tt::Leaf::Literal(tt::Literal { + // symbol: text, + // span, + // kind: tt::LitKind::Float, + // suffix: _, + // }), + // _, + // )) => (text.as_str(), *span), + // tt => unreachable!("{tt:?}"), + // }; + // // FIXME: Span splitting + // match text.split_once('.') { + // Some((left, right)) => { + // assert!(!left.is_empty()); + + // self.inner.start_node(SyntaxKind::NAME_REF); + // self.inner.token(SyntaxKind::INT_NUMBER, left); + // self.inner.finish_node(); + // self.token_map.push(self.text_pos + TextSize::of(left), span); + + // // here we move the exit up, the original exit has been deleted in process + // self.inner.finish_node(); + + // self.inner.token(SyntaxKind::DOT, "."); + // self.token_map.push(self.text_pos + TextSize::of(left) + TextSize::of("."), span); + + // if has_pseudo_dot { + // assert!(right.is_empty(), "{left}.{right}"); + // } else { + // assert!(!right.is_empty(), "{left}.{right}"); + // self.inner.start_node(SyntaxKind::NAME_REF); + // self.inner.token(SyntaxKind::INT_NUMBER, right); + // self.token_map.push(self.text_pos + TextSize::of(text), span); + // self.inner.finish_node(); + + // // the parser creates an unbalanced start node, we are required to close it here + // self.inner.finish_node(); + // } + // self.text_pos += TextSize::of(text); + // } + // None => unreachable!(), + // } + // self.cursor = self.cursor.bump(); + // } fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) { if kind == LIFETIME_IDENT { n_tokens = 2; } - let mut last = self.cursor; let mut combined_span = None; + let mut last_spacing = Spacing::Joint; 'tokens: for _ in 0..n_tokens { - let tmp: u8; - if self.cursor.eof() { - break; - } - last = self.cursor; - let (text, span) = loop { - break match self.cursor.token_tree() { - Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => match leaf { - tt::Leaf::Ident(ident) => { - if ident.is_raw.yes() { - self.buf.push_str("r#"); - self.text_pos += TextSize::of("r#"); - } - let r = (ident.sym.as_str(), ident.span); - self.cursor = self.cursor.bump(); - r - } - tt::Leaf::Punct(punct) => { - assert!(punct.char.is_ascii()); - tmp = punct.char as u8; - let r = ( - std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(), - punct.span, - ); - self.cursor = self.cursor.bump(); - r - } - tt::Leaf::Literal(lit) => { - let buf_l = self.buf.len(); - format_to!(self.buf, "{lit}"); - debug_assert_ne!(self.buf.len() - buf_l, 0); - self.text_pos += TextSize::new((self.buf.len() - buf_l) as u32); - combined_span = match combined_span { - None => Some(lit.span), - Some(prev_span) => Some(Self::merge_spans(prev_span, lit.span)), - }; - self.cursor = self.cursor.bump(); - continue 'tokens; - } - }, - Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { - self.cursor = self.cursor.subtree().unwrap(); - match delim_to_str(subtree.delimiter.kind, false) { - Some(it) => (it, subtree.delimiter.open), - None => continue, - } - } - None => { - let parent = self.cursor.end().unwrap(); - self.cursor = self.cursor.bump(); - match delim_to_str(parent.delimiter.kind, true) { - Some(it) => (it, parent.delimiter.close), - None => continue, - } + let Some((t, spacing)) = self.cursor.next() else { break }; + last_spacing = spacing; + let text = match t.kind { + TokenKind::Eq => "=", + TokenKind::Lt => "<", + TokenKind::Le => "<=", + TokenKind::EqEq => "==", + TokenKind::Ne => "!=", + TokenKind::Ge => ">=", + TokenKind::Gt => ">", + TokenKind::AndAnd => "&&", + TokenKind::OrOr => "||", + TokenKind::Not => "!", + TokenKind::Tilde => "~", + TokenKind::BinOp(b) => match b { + tt::BinOpToken::Plus => "+", + tt::BinOpToken::Minus => "-", + tt::BinOpToken::Star => "*", + tt::BinOpToken::Slash => "/", + tt::BinOpToken::Percent => "%", + tt::BinOpToken::Caret => "^", + tt::BinOpToken::And => "&", + tt::BinOpToken::Or => "|", + tt::BinOpToken::Shl => "<<", + tt::BinOpToken::Shr => ">>", + }, + TokenKind::BinOpEq(b) => match b { + tt::BinOpToken::Plus => "+=", + tt::BinOpToken::Minus => "-=", + tt::BinOpToken::Star => "*=", + tt::BinOpToken::Slash => "/=", + tt::BinOpToken::Percent => "%=", + tt::BinOpToken::Caret => "^=", + tt::BinOpToken::And => "&=", + tt::BinOpToken::Or => "|=", + tt::BinOpToken::Shl => "<<=", + tt::BinOpToken::Shr => ">>=", + }, + TokenKind::At => "@", + TokenKind::Dot => ".", + TokenKind::DotDot => "..", + TokenKind::DotDotDot => "...", + TokenKind::DotDotEq => "..=", + TokenKind::Comma => ",", + TokenKind::Semi => ";", + TokenKind::Colon => ":", + TokenKind::PathSep => "::", + TokenKind::RArrow => "->", + TokenKind::LArrow => "<-", + TokenKind::FatArrow => "=>", + TokenKind::Pound => "#", + TokenKind::Dollar => "$", + TokenKind::Question => "?", + TokenKind::SingleQuote => "'", + TokenKind::OpenDelim(d) => match delim_to_str(d, false) { + Some(it) => it, + None => continue, + }, + TokenKind::CloseDelim(d) => match delim_to_str(d, true) { + Some(it) => it, + None => continue, + }, + TokenKind::Literal(lit) => { + let buf_l = self.buf.len(); + format_to!(self.buf, "{lit}"); + debug_assert_ne!(self.buf.len() - buf_l, 0); + self.text_pos += TextSize::new((self.buf.len() - buf_l) as u32); + "" + } + TokenKind::Ident(ref ident, raw) => { + if raw.yes() { + self.buf.push_str("r#"); + self.text_pos += TextSize::of("r#"); } - }; + ident.as_str() + } + TokenKind::Lifetime(_) => todo!(), + TokenKind::DocComment(_, _, _) => todo!(), + TokenKind::Eof => todo!(), }; self.buf += text; self.text_pos += TextSize::of(text); combined_span = match combined_span { - None => Some(span), - Some(prev_span) => Some(Self::merge_spans(prev_span, span)), + None => Some(t.span), + Some(prev_span) => Some(Self::merge_spans(prev_span, t.span)), } } @@ -1023,23 +950,30 @@ where self.buf.clear(); // FIXME: Emitting whitespace for this is really just a hack, we should get rid of it. // Add whitespace between adjoint puncts - let next = last.bump(); - if let ( - Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(curr), _)), - Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(next), _)), - ) = (last.token_tree(), next.token_tree()) - { - // Note: We always assume the semi-colon would be the last token in - // other parts of RA such that we don't add whitespace here. - // - // When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't - // need to add whitespace either. - if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' { - self.inner.token(WHITESPACE, " "); - self.text_pos += TextSize::of(' '); - self.token_map.push(self.text_pos, curr.span); - } - } + // let next = last.bump(); + // if let ( + // Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(curr), _)), + // Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(next), _)), + // ) = (last.token_tree(), next.token_tree()) + // { + // // Note: We always assume the semi-colon would be the last token in + // // other parts of RA such that we don't add whitespace here. + // // + // // When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't + // // need to add whitespace either. + // if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' { + // self.inner.token(WHITESPACE, " "); + // self.text_pos += TextSize::of(' '); + // self.token_map.push(self.text_pos, curr.span); + // } + // } + // let is_punct = SyntaxKind::is_punct(kind); + // if self.needs_spacing_if_punct && is_punct && { + + // } + // self.needs_spacing_if_punct = last_spacing == Spacing::Alone + // && is_punct + // && kind != SyntaxKind::SEMICOLON; } fn start_node(&mut self, kind: SyntaxKind) { diff --git a/crates/syntax-bridge/src/tests.rs b/crates/syntax-bridge/src/tests.rs index 7b8e3f2b49c2..7261ebb4b376 100644 --- a/crates/syntax-bridge/src/tests.rs +++ b/crates/syntax-bridge/src/tests.rs @@ -1,11 +1,7 @@ use rustc_hash::FxHashMap; -use span::Span; use syntax::{ast, AstNode}; use test_utils::extract_annotations; -use tt::{ - buffer::{TokenBuffer, TokenTreeRef}, - Leaf, Punct, Spacing, -}; +use tt::{Spacing, TokenCursor}; use crate::{ dummy_test_span_utils::{DummyTestSpanMap, DUMMY}, @@ -14,7 +10,7 @@ use crate::{ fn check_punct_spacing(fixture: &str) { let source_file = ast::SourceFile::parse(fixture, span::Edition::CURRENT).ok().unwrap(); - let subtree = syntax_node_to_token_tree( + let stream = syntax_node_to_token_tree( source_file.syntax(), DummyTestSpanMap, DUMMY, @@ -32,22 +28,11 @@ fn check_punct_spacing(fixture: &str) { }) .collect(); - let buf = TokenBuffer::from_subtree(&subtree); - let mut cursor = buf.begin(); - while !cursor.eof() { - while let Some(token_tree) = cursor.token_tree() { - if let TokenTreeRef::Leaf( - Leaf::Punct(Punct { spacing, span: Span { range, .. }, .. }), - _, - ) = token_tree - { - if let Some(expected) = annotations.remove(range) { - assert_eq!(expected, *spacing); - } - } - cursor = cursor.bump_subtree(); + let mut current = TokenCursor::new(stream); + while let Some((token, spacing)) = current.next() { + if let Some(expected) = annotations.remove(&token.span.range) { + assert_eq!(expected, spacing); } - cursor = cursor.bump(); } assert!(annotations.is_empty(), "unchecked annotations: {annotations:?}"); @@ -90,7 +75,6 @@ fn main() { // ^ Alone struct Struct<'a> {}; // ^ Joint - // ^ Joint Struct::<0>; // ^ Alone Struct::<{0}>; diff --git a/crates/syntax-bridge/src/to_parser_input.rs b/crates/syntax-bridge/src/to_parser_input.rs index 14216e309328..8b5274bf2be9 100644 --- a/crates/syntax-bridge/src/to_parser_input.rs +++ b/crates/syntax-bridge/src/to_parser_input.rs @@ -5,112 +5,131 @@ use std::fmt; use span::Edition; use syntax::{SyntaxKind, SyntaxKind::*, T}; - -use tt::buffer::TokenBuffer; +use tt::{RefTokenCursor, TokenCursor, TokenStream}; pub fn to_parser_input( edition: Edition, - buffer: &TokenBuffer<'_, S>, + mut cursor: RefTokenCursor, ) -> parser::Input { let mut res = parser::Input::default(); - let mut current = buffer.begin(); - - while !current.eof() { - let cursor = current; - let tt = cursor.token_tree(); - - // Check if it is lifetime - if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt { - if punct.char == '\'' { - let next = cursor.bump(); - match next.token_tree() { - Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(_ident), _)) => { - res.push(LIFETIME_IDENT); - current = next.bump(); - continue; + while let Some((t, spacing)) = cursor.next() { + let kind = match t.kind { + tt::TokenKind::Eq => SyntaxKind::EQ, + tt::TokenKind::Lt => SyntaxKind::L_ANGLE, + tt::TokenKind::Le => SyntaxKind::LTEQ, + tt::TokenKind::EqEq => SyntaxKind::EQ2, + tt::TokenKind::Ne => SyntaxKind::NEQ, + tt::TokenKind::Ge => SyntaxKind::GTEQ, + tt::TokenKind::Gt => SyntaxKind::R_ANGLE, + tt::TokenKind::AndAnd => SyntaxKind::AMP2, + tt::TokenKind::OrOr => SyntaxKind::PIPE2, + tt::TokenKind::Not => SyntaxKind::BANG, + tt::TokenKind::Tilde => SyntaxKind::TILDE, + tt::TokenKind::BinOp(binop) => match binop { + tt::BinOpToken::Plus => SyntaxKind::PLUS, + tt::BinOpToken::Minus => SyntaxKind::MINUS, + tt::BinOpToken::Star => SyntaxKind::STAR, + tt::BinOpToken::Slash => SyntaxKind::SLASH, + tt::BinOpToken::Percent => SyntaxKind::PERCENT, + tt::BinOpToken::Caret => SyntaxKind::CARET, + tt::BinOpToken::And => SyntaxKind::AMP, + tt::BinOpToken::Or => SyntaxKind::PIPE, + tt::BinOpToken::Shl => SyntaxKind::SHL, + tt::BinOpToken::Shr => SyntaxKind::SHR, + }, + tt::TokenKind::BinOpEq(binop) => match binop { + tt::BinOpToken::Plus => SyntaxKind::PLUSEQ, + tt::BinOpToken::Minus => SyntaxKind::MINUSEQ, + tt::BinOpToken::Star => SyntaxKind::STAREQ, + tt::BinOpToken::Slash => SyntaxKind::SLASHEQ, + tt::BinOpToken::Percent => SyntaxKind::PERCENTEQ, + tt::BinOpToken::Caret => SyntaxKind::CARETEQ, + tt::BinOpToken::And => SyntaxKind::AMPEQ, + tt::BinOpToken::Or => SyntaxKind::PIPEEQ, + tt::BinOpToken::Shl => SyntaxKind::SHLEQ, + tt::BinOpToken::Shr => SyntaxKind::SHREQ, + }, + tt::TokenKind::At => SyntaxKind::AT, + tt::TokenKind::Dot => SyntaxKind::DOT, + tt::TokenKind::DotDot => SyntaxKind::DOT2, + tt::TokenKind::DotDotDot => SyntaxKind::DOT3, + tt::TokenKind::DotDotEq => SyntaxKind::DOT2EQ, + tt::TokenKind::Comma => SyntaxKind::COMMA, + tt::TokenKind::Semi => SyntaxKind::SEMICOLON, + tt::TokenKind::Colon => SyntaxKind::COLON, + tt::TokenKind::PathSep => SyntaxKind::COLON2, + tt::TokenKind::RArrow => SyntaxKind::THIN_ARROW, + tt::TokenKind::LArrow => todo!(), + tt::TokenKind::FatArrow => SyntaxKind::FAT_ARROW, + tt::TokenKind::Pound => SyntaxKind::POUND, + tt::TokenKind::Dollar => SyntaxKind::DOLLAR, + tt::TokenKind::Question => SyntaxKind::QUESTION, + tt::TokenKind::SingleQuote => { + assert!(matches!( + cursor.next(), + Some((tt::Token { kind: tt::TokenKind::Ident(..), .. }, _)) + )); + SyntaxKind::LIFETIME + } + tt::TokenKind::OpenDelim(delim) => match delim { + tt::Delimiter::Parenthesis => SyntaxKind::L_PAREN, + tt::Delimiter::Brace => SyntaxKind::L_CURLY, + tt::Delimiter::Bracket => SyntaxKind::L_BRACK, + tt::Delimiter::Invisible => continue, + }, + tt::TokenKind::CloseDelim(delim) => match delim { + tt::Delimiter::Parenthesis => SyntaxKind::R_PAREN, + tt::Delimiter::Brace => SyntaxKind::R_CURLY, + tt::Delimiter::Bracket => SyntaxKind::R_BRACK, + tt::Delimiter::Invisible => continue, + }, + tt::TokenKind::Literal(lit) => match lit.kind { + tt::LitKind::Byte => SyntaxKind::BYTE, + tt::LitKind::Char => SyntaxKind::CHAR, + tt::LitKind::Integer => SyntaxKind::INT_NUMBER, + tt::LitKind::Float => { + res.push(SyntaxKind::FLOAT_NUMBER); + if lit.suffix.is_none() && !lit.symbol.as_str().ends_with('.') { + // Tag the token as joint if it is float with a fractional part + // we use this jointness to inform the parser about what token split + // event to emit when we encounter a float literal in a field access + res.was_joint(); } - _ => panic!("Next token must be ident : {:#?}", next.token_tree()), + continue; } - } - } - - current = match tt { - Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => { - match leaf { - tt::Leaf::Literal(lit) => { - let kind = match lit.kind { - tt::LitKind::Byte => SyntaxKind::BYTE, - tt::LitKind::Char => SyntaxKind::CHAR, - tt::LitKind::Integer => SyntaxKind::INT_NUMBER, - tt::LitKind::Float => SyntaxKind::FLOAT_NUMBER, - tt::LitKind::Str | tt::LitKind::StrRaw(_) => SyntaxKind::STRING, - tt::LitKind::ByteStr | tt::LitKind::ByteStrRaw(_) => { - SyntaxKind::BYTE_STRING - } - tt::LitKind::CStr | tt::LitKind::CStrRaw(_) => SyntaxKind::C_STRING, - tt::LitKind::Err(_) => SyntaxKind::ERROR, - }; - res.push(kind); - - if kind == FLOAT_NUMBER && !lit.symbol.as_str().ends_with('.') { - // Tag the token as joint if it is float with a fractional part - // we use this jointness to inform the parser about what token split - // event to emit when we encounter a float literal in a field access - res.was_joint(); + tt::LitKind::Str => SyntaxKind::STRING, + tt::LitKind::StrRaw(_) => SyntaxKind::STRING, + tt::LitKind::ByteStr => SyntaxKind::BYTE_STRING, + tt::LitKind::ByteStrRaw(_) => SyntaxKind::BYTE_STRING, + tt::LitKind::CStr => SyntaxKind::C_STRING, + tt::LitKind::CStrRaw(_) => SyntaxKind::C_STRING, + tt::LitKind::Err(_) => SyntaxKind::ERROR, + }, + tt::TokenKind::Ident(sym, raw) => match sym.as_str() { + _ if raw.yes() => IDENT, + "_" => T![_], + // is this right? + i if i.starts_with('\'') => LIFETIME_IDENT, + text => match SyntaxKind::from_keyword(text, edition) { + Some(kind) => kind, + None => match SyntaxKind::from_contextual_keyword(text, edition) { + Some(contextual_keyword) => { + res.push_ident(contextual_keyword); + continue; } - } - tt::Leaf::Ident(ident) => match ident.sym.as_str() { - "_" => res.push(T![_]), - i if i.starts_with('\'') => res.push(LIFETIME_IDENT), - _ if ident.is_raw.yes() => res.push(IDENT), - text => match SyntaxKind::from_keyword(text, edition) { - Some(kind) => res.push(kind), - None => { - let contextual_keyword = - SyntaxKind::from_contextual_keyword(text, edition) - .unwrap_or(SyntaxKind::IDENT); - res.push_ident(contextual_keyword); - } - }, + None => SyntaxKind::IDENT, }, - tt::Leaf::Punct(punct) => { - let kind = SyntaxKind::from_char(punct.char) - .unwrap_or_else(|| panic!("{punct:#?} is not a valid punct")); - res.push(kind); - if punct.spacing == tt::Spacing::Joint { - res.was_joint(); - } - } - } - cursor.bump() - } - Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { - if let Some(kind) = match subtree.delimiter.kind { - tt::DelimiterKind::Parenthesis => Some(T!['(']), - tt::DelimiterKind::Brace => Some(T!['{']), - tt::DelimiterKind::Bracket => Some(T!['[']), - tt::DelimiterKind::Invisible => None, - } { - res.push(kind); - } - cursor.subtree().unwrap() - } - None => match cursor.end() { - Some(subtree) => { - if let Some(kind) = match subtree.delimiter.kind { - tt::DelimiterKind::Parenthesis => Some(T![')']), - tt::DelimiterKind::Brace => Some(T!['}']), - tt::DelimiterKind::Bracket => Some(T![']']), - tt::DelimiterKind::Invisible => None, - } { - res.push(kind); - } - cursor.bump() - } - None => continue, + }, }, + tt::TokenKind::Lifetime(_) => SyntaxKind::LIFETIME, + tt::TokenKind::DocComment(_, _, _) => todo!(), + tt::TokenKind::Eof => break, }; + res.push(kind); + if spacing == tt::Spacing::Joint { + res.was_joint(); + } } res diff --git a/crates/tt/src/lib.rs b/crates/tt/src/lib.rs index 8d915d0a51e3..7d746432fe92 100644 --- a/crates/tt/src/lib.rs +++ b/crates/tt/src/lib.rs @@ -9,21 +9,86 @@ extern crate ra_ap_rustc_lexer as rustc_lexer; #[cfg(feature = "in-rust-tree")] extern crate rustc_lexer; -pub mod buffer; -pub mod iter; +// pub mod buffer; +// pub mod iter; -use std::fmt; +use std::{fmt, mem}; use intern::Symbol; -use stdx::{impl_from, itertools::Itertools as _}; +use stdx::itertools::Itertools as _; pub use text_size::{TextRange, TextSize}; -#[derive(Clone, PartialEq, Debug)] -pub struct Lit { - pub kind: LitKind, - pub symbol: Symbol, - pub suffix: Option, +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Token { + pub kind: TokenKind, + pub span: Span, +} + +impl Token { + pub fn new(kind: TokenKind, span: Span) -> Self { + Token { kind, span } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum CommentKind { + Line, + Block, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum BinOpToken { + Plus, + Minus, + Star, + Slash, + Percent, + Caret, + And, + Or, + Shl, + Shr, +} + +/// Describes how a sequence of token trees is delimited. +/// Cannot use `proc_macro::Delimiter` directly because this +/// structure should implement some additional traits. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum Delimiter { + /// `( ... )` + Parenthesis, + /// `{ ... }` + Brace, + /// `[ ... ]` + Bracket, + /// `∅ ... ∅` + /// An invisible delimiter, that may, for example, appear around tokens coming from a + /// "macro variable" `$var`. It is important to preserve operator priorities in cases like + /// `$var * 3` where `$var` is `1 + 2`. + /// Invisible delimiters might not survive roundtrip of a token stream through a string. + Invisible, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum LitKind { + Byte, + Char, + Integer, // e.g. `1`, `1u8`, `1f32` + Float, // e.g. `1.`, `1.0`, `1e3f32` + Str, + StrRaw(u8), // raw string delimited by `n` hash symbols + ByteStr, + ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols + CStr, + CStrRaw(u8), + Err(()), +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum AttrStyle { + Outer, + Inner, } #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] @@ -31,6 +96,7 @@ pub enum IdentIsRaw { No, Yes, } + impl IdentIsRaw { pub fn yes(self) -> bool { matches!(self, IdentIsRaw::Yes) @@ -53,107 +119,141 @@ impl IdentIsRaw { } } -#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] -pub enum LitKind { - Byte, - Char, - Integer, // e.g. `1`, `1u8`, `1f32` - Float, // e.g. `1.`, `1.0`, `1e3f32` - Str, - StrRaw(u8), // raw string delimited by `n` hash symbols - ByteStr, - ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols - CStr, - CStrRaw(u8), - Err(()), -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum TokenTree { - Leaf(Leaf), - Subtree(Subtree), -} -impl_from!(Leaf, Subtree for TokenTree); -impl TokenTree { - pub fn empty(span: S) -> Self { - Self::Subtree(Subtree { - delimiter: Delimiter::invisible_spanned(span), - token_trees: Box::new([]), - }) - } - - pub fn subtree_or_wrap(self, span: DelimSpan) -> Subtree { - match self { - TokenTree::Leaf(_) => Subtree { - delimiter: Delimiter::invisible_delim_spanned(span), - token_trees: Box::new([self]), - }, - TokenTree::Subtree(s) => s, - } - } - - pub fn first_span(&self) -> S { - match self { - TokenTree::Leaf(l) => *l.span(), - TokenTree::Subtree(s) => s.delimiter.open, +impl From for IdentIsRaw { + fn from(b: bool) -> Self { + match b { + true => Self::Yes, + false => Self::No, } } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum Leaf { - Literal(Literal), - Punct(Punct), - Ident(Ident), -} - -impl Leaf { - pub fn span(&self) -> &S { - match self { - Leaf::Literal(it) => &it.span, - Leaf::Punct(it) => &it.span, - Leaf::Ident(it) => &it.span, - } - } +pub enum TokenKind { + /* Expression-operator symbols. */ + /// `=` + Eq, + /// `<` + Lt, + /// `<=` + Le, + /// `==` + EqEq, + /// `!=` + Ne, + /// `>=` + Ge, + /// `>` + Gt, + /// `&&` + AndAnd, + /// `||` + OrOr, + /// `!` + Not, + /// `~` + Tilde, + BinOp(BinOpToken), + BinOpEq(BinOpToken), + + /* Structural symbols */ + /// `@` + At, + /// `.` + Dot, + /// `..` + DotDot, + /// `...` + DotDotDot, + /// `..=` + DotDotEq, + /// `,` + Comma, + /// `;` + Semi, + /// `:` + Colon, + /// `::` + PathSep, + /// `->` + RArrow, + /// `<-` + LArrow, + /// `=>` + FatArrow, + /// `#` + Pound, + /// `$` + Dollar, + /// `?` + Question, + /// Used by proc macros for representing lifetimes, not generated by lexer right now. + SingleQuote, + /// An opening delimiter (e.g., `{`). + OpenDelim(Delimiter), + /// A closing delimiter (e.g., `}`). + CloseDelim(Delimiter), + + /* Literals */ + // The box shrinks this enum by 8 bytes + Literal(Box), + + /// Identifier token. + Ident(Symbol, IdentIsRaw), + + /// Lifetime identifier token. + Lifetime(Symbol), + + /// A doc comment token. + /// `Symbol` is the doc comment's data excluding its "quotes" (`///`, `/**`, etc) + /// similarly to symbols in string literal tokens. + DocComment(CommentKind, AttrStyle, Symbol), + + /// End Of File + Eof, } -impl_from!(Literal, Punct, Ident for Leaf); -#[derive(Clone, PartialEq, Eq, Hash)] -pub struct Subtree { - pub delimiter: Delimiter, - pub token_trees: Box<[TokenTree]>, -} +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct TokenStream(pub Box<[TokenTree]>); -impl Subtree { - pub fn empty(span: DelimSpan) -> Self { - Subtree { delimiter: Delimiter::invisible_delim_spanned(span), token_trees: Box::new([]) } +impl TokenStream { + pub fn trees(&self) -> RefTokenTreeCursor<'_, S> { + RefTokenTreeCursor::new(self) } - /// This is slow, and should be avoided, as it will always reallocate! - pub fn push(&mut self, subtree: TokenTree) { - let mut mutable_trees = std::mem::take(&mut self.token_trees).into_vec(); - - // Reserve exactly space for one element, to avoid `into_boxed_slice` having to reallocate again. - mutable_trees.reserve_exact(1); - mutable_trees.push(subtree); - - self.token_trees = mutable_trees.into_boxed_slice(); + pub fn into_trees(self) -> TokenTreeCursor { + TokenTreeCursor::new(self) } } -#[derive(Clone, PartialEq, Eq, Hash)] -pub struct SubtreeBuilder { - pub delimiter: Delimiter, - pub token_trees: Vec>, +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum TokenTree { + /// A single token. Should never be `OpenDelim` or `CloseDelim`, because + /// delimiters are implicitly represented by `Delimited`. + Token(Token, Spacing), + /// A delimited sequence of token trees. + Delimited(DelimSpan, DelimSpacing, Delimiter, TokenStream), } -impl SubtreeBuilder { - pub fn build(self) -> Subtree { - Subtree { delimiter: self.delimiter, token_trees: self.token_trees.into_boxed_slice() } +impl TokenTree { + pub fn empty(span: S) -> Self { + Self::Delimited( + DelimSpan::from_single(span), + DelimSpacing { open: Spacing::Alone, close: Spacing::Alone }, + Delimiter::Invisible, + TokenStream(Box::new([])), + ) + } + + pub fn first_span(&self) -> S { + match self { + TokenTree::Token(t, _) => t.span, + TokenTree::Delimited(s, ..) => s.open, + } } } -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct DelimSpan { pub open: S, pub close: S, @@ -167,49 +267,28 @@ impl DelimSpan { pub fn from_pair(open: Span, close: Span) -> Self { DelimSpan { open, close } } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub struct Delimiter { - pub open: S, - pub close: S, - pub kind: DelimiterKind, -} - -impl Delimiter { - pub const fn invisible_spanned(span: S) -> Self { - Delimiter { open: span, close: span, kind: DelimiterKind::Invisible } - } - - pub const fn invisible_delim_spanned(span: DelimSpan) -> Self { - Delimiter { open: span.open, close: span.close, kind: DelimiterKind::Invisible } - } - pub fn delim_span(&self) -> DelimSpan { - DelimSpan { open: self.open, close: self.close } + pub fn entire(self) -> Span + where + Span: SpanOps, + { + self.open.up_to(self.close) } } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub enum DelimiterKind { - Parenthesis, - Brace, - Bracket, - Invisible, +pub trait SpanOps { + fn up_to(self, other: Self) -> Self; } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Literal { +pub struct Literal { // escaped pub symbol: Symbol, - pub span: S, pub kind: LitKind, pub suffix: Option, } -pub fn token_to_literal(text: &str, span: S) -> Literal -where - S: Copy, -{ +pub fn token_to_literal(text: &str) -> Literal { use rustc_lexer::LiteralKind; let token = rustc_lexer::tokenize(text).next_tuple(); @@ -218,12 +297,7 @@ where .. },)) = token else { - return Literal { - span, - symbol: Symbol::intern(text), - kind: LitKind::Err(()), - suffix: None, - }; + return Literal { symbol: Symbol::intern(text), kind: LitKind::Err(()), suffix: None }; }; let (kind, start_offset, end_offset) = match kind { @@ -258,14 +332,7 @@ where suffix => Some(Symbol::intern(suffix)), }; - Literal { span, symbol: Symbol::intern(lit), kind, suffix } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct Punct { - pub char: char, - pub spacing: Spacing, - pub span: S, + Literal { symbol: Symbol::intern(lit), kind, suffix } } /// Indicates whether a token can join with the following token to form a @@ -328,164 +395,19 @@ pub enum Spacing { JointHidden, } -/// Identifier or keyword. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Ident { - pub sym: Symbol, - pub span: S, - pub is_raw: IdentIsRaw, -} - -impl Ident { - pub fn new(text: &str, span: S) -> Self { - // let raw_stripped = IdentIsRaw::split_from_symbol(text.as_ref()); - let (is_raw, text) = IdentIsRaw::split_from_symbol(text); - Ident { sym: Symbol::intern(text), span, is_raw } - } -} - -fn print_debug_subtree( - f: &mut fmt::Formatter<'_>, - subtree: &Subtree, - level: usize, -) -> fmt::Result { - let align = " ".repeat(level); - - let Delimiter { kind, open, close } = &subtree.delimiter; - let delim = match kind { - DelimiterKind::Invisible => "$$", - DelimiterKind::Parenthesis => "()", - DelimiterKind::Brace => "{}", - DelimiterKind::Bracket => "[]", - }; - - write!(f, "{align}SUBTREE {delim} ",)?; - fmt::Debug::fmt(&open, f)?; - write!(f, " ")?; - fmt::Debug::fmt(&close, f)?; - if !subtree.token_trees.is_empty() { - writeln!(f)?; - for (idx, child) in subtree.token_trees.iter().enumerate() { - print_debug_token(f, child, level + 1)?; - if idx != subtree.token_trees.len() - 1 { - writeln!(f)?; - } - } - } - - Ok(()) -} - -fn print_debug_token( - f: &mut fmt::Formatter<'_>, - tkn: &TokenTree, - level: usize, -) -> fmt::Result { - let align = " ".repeat(level); - - match tkn { - TokenTree::Leaf(leaf) => match leaf { - Leaf::Literal(lit) => { - write!( - f, - "{}LITERAL {:?} {}{} {:#?}", - align, - lit.kind, - lit.symbol, - lit.suffix.as_ref().map(|it| it.as_str()).unwrap_or(""), - lit.span - )?; - } - Leaf::Punct(punct) => { - write!( - f, - "{}PUNCH {} [{}] {:#?}", - align, - punct.char, - if punct.spacing == Spacing::Alone { "alone" } else { "joint" }, - punct.span - )?; - } - Leaf::Ident(ident) => { - write!( - f, - "{}IDENT {}{} {:#?}", - align, - ident.is_raw.as_str(), - ident.sym, - ident.span - )?; - } - }, - TokenTree::Subtree(subtree) => { - print_debug_subtree(f, subtree, level)?; - } - } - - Ok(()) -} - -impl fmt::Debug for Subtree { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - print_debug_subtree(f, self, 0) - } -} - -impl fmt::Display for TokenTree { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - TokenTree::Leaf(it) => fmt::Display::fmt(it, f), - TokenTree::Subtree(it) => fmt::Display::fmt(it, f), - } - } -} - -impl fmt::Display for Subtree { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let (l, r) = match self.delimiter.kind { - DelimiterKind::Parenthesis => ("(", ")"), - DelimiterKind::Brace => ("{", "}"), - DelimiterKind::Bracket => ("[", "]"), - DelimiterKind::Invisible => ("", ""), - }; - f.write_str(l)?; - let mut needs_space = false; - for tt in self.token_trees.iter() { - if needs_space { - f.write_str(" ")?; - } - needs_space = true; - match tt { - TokenTree::Leaf(Leaf::Punct(p)) => { - needs_space = p.spacing == Spacing::Alone; - fmt::Display::fmt(p, f)?; - } - tt => fmt::Display::fmt(tt, f)?, - } - } - f.write_str(r)?; - Ok(()) - } -} - -impl fmt::Display for Leaf { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Leaf::Ident(it) => fmt::Display::fmt(it, f), - Leaf::Literal(it) => fmt::Display::fmt(it, f), - Leaf::Punct(it) => fmt::Display::fmt(it, f), - } - } +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct DelimSpacing { + pub open: Spacing, + pub close: Spacing, } -impl fmt::Display for Ident { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.is_raw.as_str(), f)?; - fmt::Display::fmt(&self.sym, f) +impl DelimSpacing { + pub fn new(open: Spacing, close: Spacing) -> DelimSpacing { + DelimSpacing { open, close } } } -impl fmt::Display for Literal { +impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.kind { LitKind::Byte => write!(f, "b'{}'", self.symbol), @@ -528,106 +450,413 @@ impl fmt::Display for Literal { Ok(()) } } +// fn print_debug_subtree( +// f: &mut fmt::Formatter<'_>, +// subtree: &Subtree, +// level: usize, +// ) -> fmt::Result { +// let align = " ".repeat(level); + +// let Delimiter { kind, open, close } = &subtree.delimiter; +// let delim = match kind { +// DelimiterKind::Invisible => "$$", +// DelimiterKind::Parenthesis => "()", +// DelimiterKind::Brace => "{}", +// DelimiterKind::Bracket => "[]", +// }; + +// write!(f, "{align}SUBTREE {delim} ",)?; +// fmt::Debug::fmt(&open, f)?; +// write!(f, " ")?; +// fmt::Debug::fmt(&close, f)?; +// if !subtree.token_trees.is_empty() { +// writeln!(f)?; +// for (idx, child) in subtree.token_trees.iter().enumerate() { +// print_debug_token(f, child, level + 1)?; +// if idx != subtree.token_trees.len() - 1 { +// writeln!(f)?; +// } +// } +// } + +// Ok(()) +// } + +// fn print_debug_token( +// f: &mut fmt::Formatter<'_>, +// tkn: &TokenTree, +// level: usize, +// ) -> fmt::Result { +// let align = " ".repeat(level); + +// match tkn { +// TokenTree::Leaf(leaf) => match leaf { +// Leaf::Literal(lit) => { +// write!( +// f, +// "{}LITERAL {:?} {}{} {:#?}", +// align, +// lit.kind, +// lit.symbol, +// lit.suffix.as_ref().map(|it| it.as_str()).unwrap_or(""), +// lit.span +// )?; +// } +// Leaf::Punct(punct) => { +// write!( +// f, +// "{}PUNCH {} [{}] {:#?}", +// align, +// punct.char, +// if punct.spacing == Spacing::Alone { "alone" } else { "joint" }, +// punct.span +// )?; +// } +// Leaf::Ident(ident) => { +// write!( +// f, +// "{}IDENT {}{} {:#?}", +// align, +// ident.is_raw.as_str(), +// ident.sym, +// ident.span +// )?; +// } +// }, +// TokenTree::Subtree(subtree) => { +// print_debug_subtree(f, subtree, level)?; +// } +// } + +// Ok(()) +// } + +// impl fmt::Display for Subtree { +// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +// let (l, r) = match self.delimiter.kind { +// DelimiterKind::Parenthesis => ("(", ")"), +// DelimiterKind::Brace => ("{", "}"), +// DelimiterKind::Bracket => ("[", "]"), +// DelimiterKind::Invisible => ("", ""), +// }; +// f.write_str(l)?; +// let mut needs_space = false; +// for tt in self.token_trees.iter() { +// if needs_space { +// f.write_str(" ")?; +// } +// needs_space = true; +// match tt { +// TokenTree::Leaf(Leaf::Punct(p)) => { +// needs_space = p.spacing == Spacing::Alone; +// fmt::Display::fmt(p, f)?; +// } +// tt => fmt::Display::fmt(tt, f)?, +// } +// } +// f.write_str(r)?; +// Ok(()) +// } +// } + +// impl Subtree { +// /// Count the number of tokens recursively +// pub fn count(&self) -> usize { +// let children_count = self +// .token_trees +// .iter() +// .map(|c| match c { +// TokenTree::Subtree(c) => c.count(), +// TokenTree::Leaf(_) => 0, +// }) +// .sum::(); + +// self.token_trees.len() + children_count +// } +// } + +// impl Subtree { +// /// A simple line string used for debugging +// pub fn as_debug_string(&self) -> String { +// let delim = match self.delimiter.kind { +// DelimiterKind::Brace => ("{", "}"), +// DelimiterKind::Bracket => ("[", "]"), +// DelimiterKind::Parenthesis => ("(", ")"), +// DelimiterKind::Invisible => ("$", "$"), +// }; + +// let mut res = String::new(); +// res.push_str(delim.0); +// let mut last = None; +// for child in self.token_trees.iter() { +// let s = match child { +// TokenTree::Leaf(it) => { +// let s = match it { +// Leaf::Literal(it) => it.symbol.to_string(), +// Leaf::Punct(it) => it.char.to_string(), +// Leaf::Ident(it) => format!("{}{}", it.is_raw.as_str(), it.sym), +// }; +// match (it, last) { +// (Leaf::Ident(_), Some(&TokenTree::Leaf(Leaf::Ident(_)))) => { +// " ".to_owned() + &s +// } +// (Leaf::Punct(_), Some(TokenTree::Leaf(Leaf::Punct(punct)))) => { +// if punct.spacing == Spacing::Alone { +// " ".to_owned() + &s +// } else { +// s +// } +// } +// _ => s, +// } +// } +// TokenTree::Subtree(it) => it.as_debug_string(), +// }; +// res.push_str(&s); +// last = Some(child); +// } + +// res.push_str(delim.1); +// res +// } +// } + +// pub fn pretty(tkns: &[TokenTree]) -> String { +// fn tokentree_to_text(tkn: &TokenTree) -> String { +// match tkn { +// TokenTree::Leaf(Leaf::Ident(ident)) => { +// format!("{}{}", ident.is_raw.as_str(), ident.sym) +// } +// TokenTree::Leaf(Leaf::Literal(literal)) => format!("{literal}"), +// TokenTree::Leaf(Leaf::Punct(punct)) => format!("{}", punct.char), +// TokenTree::Subtree(subtree) => { +// let content = pretty(&subtree.token_trees); +// let (open, close) = match subtree.delimiter.kind { +// DelimiterKind::Brace => ("{", "}"), +// DelimiterKind::Bracket => ("[", "]"), +// DelimiterKind::Parenthesis => ("(", ")"), +// DelimiterKind::Invisible => ("", ""), +// }; +// format!("{open}{content}{close}") +// } +// } +// } + +// tkns.iter() +// .fold((String::new(), true), |(last, last_to_joint), tkn| { +// let s = [last, tokentree_to_text(tkn)].join(if last_to_joint { "" } else { " " }); +// let mut is_joint = false; +// if let TokenTree::Leaf(Leaf::Punct(punct)) = tkn { +// if punct.spacing == Spacing::Joint { +// is_joint = true; +// } +// } +// (s, is_joint) +// }) +// .0 +// } +/// By-reference iterator over a [`TokenStream`], that produces `&TokenTree` +/// items. +#[derive(Clone, Debug)] +pub struct RefTokenTreeCursor<'t, S> { + stream: &'t TokenStream, + index: usize, +} + +impl<'t, S> RefTokenTreeCursor<'t, S> { + fn new(stream: &'t TokenStream) -> Self { + RefTokenTreeCursor { stream, index: 0 } + } -impl fmt::Display for Punct { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.char, f) + pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> { + self.stream.0.get(self.index + n) } } -impl Subtree { - /// Count the number of tokens recursively - pub fn count(&self) -> usize { - let children_count = self - .token_trees - .iter() - .map(|c| match c { - TokenTree::Subtree(c) => c.count(), - TokenTree::Leaf(_) => 0, - }) - .sum::(); +impl<'t, S> Iterator for RefTokenTreeCursor<'t, S> { + type Item = &'t TokenTree; - self.token_trees.len() + children_count + fn next(&mut self) -> Option<&'t TokenTree> { + self.stream.0.get(self.index).inspect(|_| self.index += 1) } } -impl Subtree { - /// A simple line string used for debugging - pub fn as_debug_string(&self) -> String { - let delim = match self.delimiter.kind { - DelimiterKind::Brace => ("{", "}"), - DelimiterKind::Bracket => ("[", "]"), - DelimiterKind::Parenthesis => ("(", ")"), - DelimiterKind::Invisible => ("$", "$"), - }; - - let mut res = String::new(); - res.push_str(delim.0); - let mut last = None; - for child in self.token_trees.iter() { - let s = match child { - TokenTree::Leaf(it) => { - let s = match it { - Leaf::Literal(it) => it.symbol.to_string(), - Leaf::Punct(it) => it.char.to_string(), - Leaf::Ident(it) => format!("{}{}", it.is_raw.as_str(), it.sym), - }; - match (it, last) { - (Leaf::Ident(_), Some(&TokenTree::Leaf(Leaf::Ident(_)))) => { - " ".to_owned() + &s - } - (Leaf::Punct(_), Some(TokenTree::Leaf(Leaf::Punct(punct)))) => { - if punct.spacing == Spacing::Alone { - " ".to_owned() + &s - } else { - s - } - } - _ => s, - } - } - TokenTree::Subtree(it) => it.as_debug_string(), - }; - res.push_str(&s); - last = Some(child); - } +/// Owning by-value iterator over a [`TokenStream`], that produces `&TokenTree` +/// items. +/// +/// Doesn't impl `Iterator` because Rust doesn't permit an owning iterator to +/// return `&T` from `next`; the need for an explicit lifetime in the `Item` +/// associated type gets in the way. Instead, use `next_ref` (which doesn't +/// involve associated types) for getting individual elements, or +/// `RefTokenTreeCursor` if you really want an `Iterator`, e.g. in a `for` +/// loop. +#[derive(Clone, Debug)] +pub struct TokenTreeCursor { + pub stream: TokenStream, + index: usize, +} + +impl TokenTreeCursor { + fn new(stream: TokenStream) -> Self { + TokenTreeCursor { stream, index: 0 } + } + + #[inline] + pub fn next_ref(&mut self) -> Option<&TokenTree> { + self.stream.0.get(self.index).inspect(|_| self.index += 1) + } - res.push_str(delim.1); - res + pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> { + self.stream.0.get(self.index + n) } } -pub fn pretty(tkns: &[TokenTree]) -> String { - fn tokentree_to_text(tkn: &TokenTree) -> String { - match tkn { - TokenTree::Leaf(Leaf::Ident(ident)) => { - format!("{}{}", ident.is_raw.as_str(), ident.sym) - } - TokenTree::Leaf(Leaf::Literal(literal)) => format!("{literal}"), - TokenTree::Leaf(Leaf::Punct(punct)) => format!("{}", punct.char), - TokenTree::Subtree(subtree) => { - let content = pretty(&subtree.token_trees); - let (open, close) = match subtree.delimiter.kind { - DelimiterKind::Brace => ("{", "}"), - DelimiterKind::Bracket => ("[", "]"), - DelimiterKind::Parenthesis => ("(", ")"), - DelimiterKind::Invisible => ("", ""), +#[derive(Clone, Debug)] +pub struct TokenCursor { + // Cursor for the current (innermost) token stream. The delimiters for this + // token stream are found in `self.stack.last()`; when that is `None` then + // we are in the outermost token stream which never has delimiters. + tree_cursor: TokenTreeCursor, + + // Token streams surrounding the current one. The delimiters for stack[n]'s + // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters + // because it's the outermost token stream which never has delimiters. + stack: Vec<(TokenTreeCursor, DelimSpan, DelimSpacing, Delimiter)>, +} + +impl TokenCursor { + pub fn new(stream: TokenStream) -> Self { + TokenCursor { tree_cursor: TokenTreeCursor::new(stream), stack: Vec::new() } + } + + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Option<(Token, Spacing)> { + self.inlined_next() + } + + /// This always-inlined version should only be used on hot code paths. + #[inline(always)] + pub fn inlined_next(&mut self) -> Option<(Token, Spacing)> { + loop { + // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix + // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions + // below can be removed. + if let Some(tree) = self.tree_cursor.next_ref() { + match *tree { + TokenTree::Token(ref token, spacing) => { + debug_assert!(!matches!( + token.kind, + TokenKind::OpenDelim(_) | TokenKind::CloseDelim(_) + )); + return Some((token.clone(), spacing)); + } + TokenTree::Delimited(sp, spacing, delim, ref tts) => { + let trees = tts.clone().into_trees(); + self.stack.push(( + mem::replace(&mut self.tree_cursor, trees), + sp, + spacing, + delim, + )); + if delim != Delimiter::Invisible { + return Some(( + Token::new(TokenKind::OpenDelim(delim), sp.open), + spacing.open, + )); + } + // No open delimiter to return; continue on to the next iteration. + } }; - format!("{open}{content}{close}") + } else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() { + // We have exhausted this token stream. Move back to its parent token stream. + self.tree_cursor = tree_cursor; + if delim != Delimiter::Invisible { + return Some(( + Token::new(TokenKind::CloseDelim(delim), span.close), + spacing.close, + )); + } + // No close delimiter to return; continue on to the next iteration. + } else { + return None; } } } +} + +#[derive(Clone, Debug)] +pub struct RefTokenCursor<'a, S> { + // Cursor for the current (innermost) token stream. The delimiters for this + // token stream are found in `self.stack.last()`; when that is `None` then + // we are in the outermost token stream which never has delimiters. + tree_cursor: RefTokenTreeCursor<'a, S>, - tkns.iter() - .fold((String::new(), true), |(last, last_to_joint), tkn| { - let s = [last, tokentree_to_text(tkn)].join(if last_to_joint { "" } else { " " }); - let mut is_joint = false; - if let TokenTree::Leaf(Leaf::Punct(punct)) = tkn { - if punct.spacing == Spacing::Joint { - is_joint = true; + // Token streams surrounding the current one. The delimiters for stack[n]'s + // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters + // because it's the outermost token stream which never has delimiters. + stack: Vec<(RefTokenTreeCursor<'a, S>, DelimSpan, DelimSpacing, Delimiter)>, +} + +impl<'a, S: Copy> RefTokenCursor<'a, S> { + pub fn new(stream: &'a TokenStream) -> Self { + RefTokenCursor { tree_cursor: RefTokenTreeCursor::new(stream), stack: Vec::new() } + } + + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Option<(Token, Spacing)> { + self.inlined_next() + } + + pub fn at_root(&self) -> bool { + self.stack.is_empty() + } + + /// This always-inlined version should only be used on hot code paths. + #[inline(always)] + pub fn inlined_next(&mut self) -> Option<(Token, Spacing)> { + loop { + // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix + // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions + // below can be removed. + if let Some(tree) = self.tree_cursor.next() { + match *tree { + TokenTree::Token(ref token, spacing) => { + debug_assert!(!matches!( + token.kind, + TokenKind::OpenDelim(_) | TokenKind::CloseDelim(_) + )); + return Some((token.clone(), spacing)); + } + TokenTree::Delimited(sp, spacing, delim, ref tts) => { + let trees = tts.trees(); + self.stack.push(( + mem::replace(&mut self.tree_cursor, trees), + sp, + spacing, + delim, + )); + if delim != Delimiter::Invisible { + return Some(( + Token::new(TokenKind::OpenDelim(delim), sp.open), + spacing.open, + )); + } + // No open delimiter to return; continue on to the next iteration. + } + }; + } else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() { + // We have exhausted this token stream. Move back to its parent token stream. + self.tree_cursor = tree_cursor; + if delim != Delimiter::Invisible { + return Some(( + Token::new(TokenKind::CloseDelim(delim), span.close), + spacing.close, + )); } + // No close delimiter to return; continue on to the next iteration. + } else { + return None; } - (s, is_joint) - }) - .0 + } + } }