Skip to content

Commit

Permalink
Speed up Parser::expected_token_types.
Browse files Browse the repository at this point in the history
The parser pushes a `TokenType` to `Parser::expected_token_types` on
every call to the various `check`/`eat` methods, and clears it on every
call to `bump`. Some of those `TokenType` values are full tokens that
require cloning and dropping. This is a *lot* of work for something
that is only used in error messages and it accounts for a significant
fraction of parsing execution time.

This commit overhauls `TokenType` so that `Parser::expected_token_types`
can be implemented as a bitset. This requires changing `TokenType` to a
C-style parameterless enum, and adding `TokenTypeSet` which uses a
`u128` for the bits. (The new `TokenType` has 105 variants.)

The new types `ExpTokenPair` and `ExpKeywordPair` are now arguments to
the `check`/`eat` methods. This is for maximum speed. The elements in
the pairs are always statically known; e.g. a
`token::BinOp(token::Star)` is always paired with a `TokenType::Star`.
So we now compute `TokenType`s in advance and pass them in to
`check`/`eat` rather than the current approach of constructing them on
insertion into `expected_token_types`.

Values of these pair types can be produced by the new `exp!` macro,
which is used at every `check`/`eat` call site. The macro is for
convenience, allowing any pair to be generated from a single identifier.

The ident/keyword filtering in `expected_one_of_not_found` is no longer
necessary. It was there to account for some sloppiness in
`TokenKind`/`TokenType` comparisons.

The existing `TokenType` is moved to a new file `token_type.rs`, and all
its new infrastructure is added to that file. There is more boilerplate
code than I would like, but I can't see how to make it shorter.
  • Loading branch information
nnethercote committed Dec 10, 2024
1 parent fc0016c commit 9d3b77a
Show file tree
Hide file tree
Showing 22 changed files with 1,352 additions and 789 deletions.
112 changes: 56 additions & 56 deletions compiler/rustc_builtin_macros/src/asm.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
use ast::token::IdentIsRaw;
use lint::BuiltinLintDiag;
use rustc_ast::AsmMacro;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, Delimiter};
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::{AsmMacro, token};
use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
use rustc_errors::PResult;
use rustc_expand::base::*;
use rustc_index::bit_set::GrowableBitSet;
use rustc_parse::parser::Parser;
use rustc_parse::exp;
use rustc_parse::parser::{ExpKeywordPair, Parser};
use rustc_session::lint;
use rustc_span::symbol::{Ident, Symbol, kw, sym};
use rustc_span::symbol::{Ident, Symbol, kw};
use rustc_span::{ErrorGuaranteed, InnerSpan, Span};
use rustc_target::asm::InlineAsmArch;
use smallvec::smallvec;
Expand Down Expand Up @@ -39,16 +39,16 @@ pub struct AsmArgs {
/// - `Err(_)` if the current token matches the keyword, but was not expected
fn eat_operand_keyword<'a>(
p: &mut Parser<'a>,
symbol: Symbol,
exp: ExpKeywordPair,
asm_macro: AsmMacro,
) -> PResult<'a, bool> {
if matches!(asm_macro, AsmMacro::Asm) {
Ok(p.eat_keyword(symbol))
Ok(p.eat_keyword(exp))
} else {
let span = p.token.span;
if p.eat_keyword_noexpect(symbol) {
if p.eat_keyword_noexpect(exp.kw) {
// in gets printed as `r#in` otherwise
let symbol = if symbol == kw::In { "in" } else { symbol.as_str() };
let symbol = if exp.kw == kw::In { "in" } else { exp.kw.as_str() };
Err(p.dcx().create_err(errors::AsmUnsupportedOperand {
span,
symbol,
Expand Down Expand Up @@ -96,28 +96,28 @@ pub fn parse_asm_args<'a>(

let mut allow_templates = true;
while p.token != token::Eof {
if !p.eat(&token::Comma) {
if !p.eat(exp!(Comma)) {
if allow_templates {
// After a template string, we always expect *only* a comma...
return Err(dcx.create_err(errors::AsmExpectedComma { span: p.token.span }));
} else {
// ...after that delegate to `expect` to also include the other expected tokens.
return Err(p.expect(&token::Comma).err().unwrap());
return Err(p.expect(exp!(Comma)).err().unwrap());
}
}
if p.token == token::Eof {
break;
} // accept trailing commas

// Parse clobber_abi
if p.eat_keyword(sym::clobber_abi) {
if p.eat_keyword(exp!(ClobberAbi)) {
parse_clobber_abi(p, &mut args)?;
allow_templates = false;
continue;
}

// Parse options
if p.eat_keyword(sym::options) {
if p.eat_keyword(exp!(Options)) {
parse_options(p, &mut args, asm_macro)?;
allow_templates = false;
continue;
Expand All @@ -129,65 +129,65 @@ pub fn parse_asm_args<'a>(
let name = if p.token.is_ident() && p.look_ahead(1, |t| *t == token::Eq) {
let (ident, _) = p.token.ident().unwrap();
p.bump();
p.expect(&token::Eq)?;
p.expect(exp!(Eq))?;
allow_templates = false;
Some(ident.name)
} else {
None
};

let mut explicit_reg = false;
let op = if eat_operand_keyword(p, kw::In, asm_macro)? {
let op = if eat_operand_keyword(p, exp!(In), asm_macro)? {
let reg = parse_reg(p, &mut explicit_reg)?;
if p.eat_keyword(kw::Underscore) {
if p.eat_keyword(exp!(Underscore)) {
let err = dcx.create_err(errors::AsmUnderscoreInput { span: p.token.span });
return Err(err);
}
let expr = p.parse_expr()?;
ast::InlineAsmOperand::In { reg, expr }
} else if eat_operand_keyword(p, sym::out, asm_macro)? {
} else if eat_operand_keyword(p, exp!(Out), asm_macro)? {
let reg = parse_reg(p, &mut explicit_reg)?;
let expr = if p.eat_keyword(kw::Underscore) { None } else { Some(p.parse_expr()?) };
let expr = if p.eat_keyword(exp!(Underscore)) { None } else { Some(p.parse_expr()?) };
ast::InlineAsmOperand::Out { reg, expr, late: false }
} else if eat_operand_keyword(p, sym::lateout, asm_macro)? {
} else if eat_operand_keyword(p, exp!(Lateout), asm_macro)? {
let reg = parse_reg(p, &mut explicit_reg)?;
let expr = if p.eat_keyword(kw::Underscore) { None } else { Some(p.parse_expr()?) };
let expr = if p.eat_keyword(exp!(Underscore)) { None } else { Some(p.parse_expr()?) };
ast::InlineAsmOperand::Out { reg, expr, late: true }
} else if eat_operand_keyword(p, sym::inout, asm_macro)? {
} else if eat_operand_keyword(p, exp!(Inout), asm_macro)? {
let reg = parse_reg(p, &mut explicit_reg)?;
if p.eat_keyword(kw::Underscore) {
if p.eat_keyword(exp!(Underscore)) {
let err = dcx.create_err(errors::AsmUnderscoreInput { span: p.token.span });
return Err(err);
}
let expr = p.parse_expr()?;
if p.eat(&token::FatArrow) {
if p.eat(exp!(FatArrow)) {
let out_expr =
if p.eat_keyword(kw::Underscore) { None } else { Some(p.parse_expr()?) };
if p.eat_keyword(exp!(Underscore)) { None } else { Some(p.parse_expr()?) };
ast::InlineAsmOperand::SplitInOut { reg, in_expr: expr, out_expr, late: false }
} else {
ast::InlineAsmOperand::InOut { reg, expr, late: false }
}
} else if eat_operand_keyword(p, sym::inlateout, asm_macro)? {
} else if eat_operand_keyword(p, exp!(Inlateout), asm_macro)? {
let reg = parse_reg(p, &mut explicit_reg)?;
if p.eat_keyword(kw::Underscore) {
if p.eat_keyword(exp!(Underscore)) {
let err = dcx.create_err(errors::AsmUnderscoreInput { span: p.token.span });
return Err(err);
}
let expr = p.parse_expr()?;
if p.eat(&token::FatArrow) {
if p.eat(exp!(FatArrow)) {
let out_expr =
if p.eat_keyword(kw::Underscore) { None } else { Some(p.parse_expr()?) };
if p.eat_keyword(exp!(Underscore)) { None } else { Some(p.parse_expr()?) };
ast::InlineAsmOperand::SplitInOut { reg, in_expr: expr, out_expr, late: true }
} else {
ast::InlineAsmOperand::InOut { reg, expr, late: true }
}
} else if eat_operand_keyword(p, sym::label, asm_macro)? {
} else if eat_operand_keyword(p, exp!(Label), asm_macro)? {
let block = p.parse_block()?;
ast::InlineAsmOperand::Label { block }
} else if p.eat_keyword(kw::Const) {
} else if p.eat_keyword(exp!(Const)) {
let anon_const = p.parse_expr_anon_const()?;
ast::InlineAsmOperand::Const { anon_const }
} else if p.eat_keyword(sym::sym) {
} else if p.eat_keyword(exp!(Sym)) {
let expr = p.parse_expr()?;
let ast::ExprKind::Path(qself, path) = &expr.kind else {
let err = dcx.create_err(errors::AsmSymNoPath { span: expr.span });
Expand Down Expand Up @@ -390,31 +390,31 @@ fn parse_options<'a>(
) -> PResult<'a, ()> {
let span_start = p.prev_token.span;

p.expect(&token::OpenDelim(Delimiter::Parenthesis))?;

while !p.eat(&token::CloseDelim(Delimiter::Parenthesis)) {
const OPTIONS: [(Symbol, ast::InlineAsmOptions); ast::InlineAsmOptions::COUNT] = [
(sym::pure, ast::InlineAsmOptions::PURE),
(sym::nomem, ast::InlineAsmOptions::NOMEM),
(sym::readonly, ast::InlineAsmOptions::READONLY),
(sym::preserves_flags, ast::InlineAsmOptions::PRESERVES_FLAGS),
(sym::noreturn, ast::InlineAsmOptions::NORETURN),
(sym::nostack, ast::InlineAsmOptions::NOSTACK),
(sym::may_unwind, ast::InlineAsmOptions::MAY_UNWIND),
(sym::att_syntax, ast::InlineAsmOptions::ATT_SYNTAX),
(kw::Raw, ast::InlineAsmOptions::RAW),
p.expect(exp!(OpenParen))?;

while !p.eat(exp!(CloseParen)) {
const OPTIONS: [(ExpKeywordPair, ast::InlineAsmOptions); ast::InlineAsmOptions::COUNT] = [
(exp!(Pure), ast::InlineAsmOptions::PURE),
(exp!(Nomem), ast::InlineAsmOptions::NOMEM),
(exp!(Readonly), ast::InlineAsmOptions::READONLY),
(exp!(PreservesFlags), ast::InlineAsmOptions::PRESERVES_FLAGS),
(exp!(Noreturn), ast::InlineAsmOptions::NORETURN),
(exp!(Nostack), ast::InlineAsmOptions::NOSTACK),
(exp!(MayUnwind), ast::InlineAsmOptions::MAY_UNWIND),
(exp!(AttSyntax), ast::InlineAsmOptions::ATT_SYNTAX),
(exp!(Raw), ast::InlineAsmOptions::RAW),
];

'blk: {
for (symbol, option) in OPTIONS {
for (exp, option) in OPTIONS {
let kw_matched = if asm_macro.is_supported_option(option) {
p.eat_keyword(symbol)
p.eat_keyword(exp)
} else {
p.eat_keyword_noexpect(symbol)
p.eat_keyword_noexpect(exp.kw)
};

if kw_matched {
try_set_option(p, args, asm_macro, symbol, option);
try_set_option(p, args, asm_macro, exp.kw, option);
break 'blk;
}
}
Expand All @@ -423,10 +423,10 @@ fn parse_options<'a>(
}

// Allow trailing commas
if p.eat(&token::CloseDelim(Delimiter::Parenthesis)) {
if p.eat(exp!(CloseParen)) {
break;
}
p.expect(&token::Comma)?;
p.expect(exp!(Comma))?;
}

let new_span = span_start.to(p.prev_token.span);
Expand All @@ -438,14 +438,14 @@ fn parse_options<'a>(
fn parse_clobber_abi<'a>(p: &mut Parser<'a>, args: &mut AsmArgs) -> PResult<'a, ()> {
let span_start = p.prev_token.span;

p.expect(&token::OpenDelim(Delimiter::Parenthesis))?;
p.expect(exp!(OpenParen))?;

if p.eat(&token::CloseDelim(Delimiter::Parenthesis)) {
if p.eat(exp!(CloseParen)) {
return Err(p.dcx().create_err(errors::NonABI { span: p.token.span }));
}

let mut new_abis = Vec::new();
while !p.eat(&token::CloseDelim(Delimiter::Parenthesis)) {
while !p.eat(exp!(CloseParen)) {
match p.parse_str_lit() {
Ok(str_lit) => {
new_abis.push((str_lit.symbol_unescaped, str_lit.span));
Expand All @@ -457,10 +457,10 @@ fn parse_clobber_abi<'a>(p: &mut Parser<'a>, args: &mut AsmArgs) -> PResult<'a,
};

// Allow trailing commas
if p.eat(&token::CloseDelim(Delimiter::Parenthesis)) {
if p.eat(exp!(CloseParen)) {
break;
}
p.expect(&token::Comma)?;
p.expect(exp!(Comma))?;
}

let full_span = span_start.to(p.prev_token.span);
Expand All @@ -483,7 +483,7 @@ fn parse_reg<'a>(
p: &mut Parser<'a>,
explicit_reg: &mut bool,
) -> PResult<'a, ast::InlineAsmRegOrRegClass> {
p.expect(&token::OpenDelim(Delimiter::Parenthesis))?;
p.expect(exp!(OpenParen))?;
let result = match p.token.uninterpolate().kind {
token::Ident(name, IdentIsRaw::No) => ast::InlineAsmRegOrRegClass::RegClass(name),
token::Literal(token::Lit { kind: token::LitKind::Str, symbol, suffix: _ }) => {
Expand All @@ -497,7 +497,7 @@ fn parse_reg<'a>(
}
};
p.bump();
p.expect(&token::CloseDelim(Delimiter::Parenthesis))?;
p.expect(exp!(CloseParen))?;
Ok(result)
}

Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_builtin_macros/src/assert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use rustc_ast::{DelimArgs, Expr, ExprKind, MacCall, Path, PathSegment, UnOp, tok
use rustc_ast_pretty::pprust;
use rustc_errors::PResult;
use rustc_expand::base::{DummyResult, ExpandResult, ExtCtxt, MacEager, MacroExpanderResult};
use rustc_parse::exp;
use rustc_parse::parser::Parser;
use rustc_span::symbol::{Ident, Symbol, sym};
use rustc_span::{DUMMY_SP, Span};
Expand Down Expand Up @@ -144,7 +145,7 @@ fn parse_assert<'a>(cx: &ExtCtxt<'a>, sp: Span, stream: TokenStream) -> PResult<
cx.dcx().emit_err(errors::AssertMissingComma { span: parser.token.span, comma });

parse_custom_message(&mut parser)
} else if parser.eat(&token::Comma) {
} else if parser.eat(exp!(Comma)) {
parse_custom_message(&mut parser)
} else {
None
Expand Down
5 changes: 3 additions & 2 deletions compiler/rustc_builtin_macros/src/cfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use rustc_ast::token;
use rustc_ast::tokenstream::TokenStream;
use rustc_errors::PResult;
use rustc_expand::base::{DummyResult, ExpandResult, ExtCtxt, MacEager, MacroExpanderResult};
use rustc_parse::exp;
use rustc_span::Span;
use {rustc_ast as ast, rustc_attr as attr};

Expand Down Expand Up @@ -48,9 +49,9 @@ fn parse_cfg<'a>(

let cfg = p.parse_meta_item_inner()?;

let _ = p.eat(&token::Comma);
let _ = p.eat(exp!(Comma));

if !p.eat(&token::Eof) {
if !p.eat(exp!(Eof)) {
return Err(cx.dcx().create_err(errors::OneCfgPattern { span }));
}

Expand Down
7 changes: 4 additions & 3 deletions compiler/rustc_builtin_macros/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use rustc_errors::{Applicability, Diag, MultiSpan, PResult, SingleLabelManySpans
use rustc_expand::base::*;
use rustc_lint_defs::builtin::NAMED_ARGUMENTS_USED_POSITIONALLY;
use rustc_lint_defs::{BufferedEarlyLint, BuiltinLintDiag, LintId};
use rustc_parse::exp;
use rustc_parse_format as parse;
use rustc_span::symbol::{Ident, Symbol};
use rustc_span::{BytePos, ErrorGuaranteed, InnerSpan, Span};
Expand Down Expand Up @@ -94,12 +95,12 @@ fn parse_args<'a>(ecx: &ExtCtxt<'a>, sp: Span, tts: TokenStream) -> PResult<'a,
let mut first = true;

while p.token != token::Eof {
if !p.eat(&token::Comma) {
if !p.eat(exp!(Comma)) {
if first {
p.clear_expected_token_types();
}

match p.expect(&token::Comma) {
match p.expect(exp!(Comma)) {
Err(err) => {
match token::TokenKind::Comma.similar_tokens() {
Some(tks) if tks.contains(&p.token.kind) => {
Expand All @@ -123,7 +124,7 @@ fn parse_args<'a>(ecx: &ExtCtxt<'a>, sp: Span, tts: TokenStream) -> PResult<'a,
match p.token.ident() {
Some((ident, _)) if p.look_ahead(1, |t| *t == token::Eq) => {
p.bump();
p.expect(&token::Eq)?;
p.expect(exp!(Eq))?;
let expr = p.parse_expr()?;
if let Some((_, prev)) = args.by_name(ident.name) {
ecx.dcx().emit_err(errors::FormatDuplicateArg {
Expand Down
5 changes: 3 additions & 2 deletions compiler/rustc_builtin_macros/src/pattern_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use rustc_ast::tokenstream::TokenStream;
use rustc_ast::{Pat, Ty, ast};
use rustc_errors::PResult;
use rustc_expand::base::{self, DummyResult, ExpandResult, ExtCtxt, MacroExpanderResult};
use rustc_span::{Span, sym};
use rustc_parse::exp;
use rustc_span::Span;

pub(crate) fn expand<'cx>(
cx: &'cx mut ExtCtxt<'_>,
Expand All @@ -24,7 +25,7 @@ fn parse_pat_ty<'a>(cx: &mut ExtCtxt<'a>, stream: TokenStream) -> PResult<'a, (P
let mut parser = cx.new_parser_from_tts(stream);

let ty = parser.parse_ty()?;
parser.expect_keyword(sym::is)?;
parser.expect_keyword(exp!(Is))?;
let pat = parser.parse_pat_no_top_alt(None, None)?;

Ok((ty, pat))
Expand Down
6 changes: 3 additions & 3 deletions compiler/rustc_builtin_macros/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use rustc_expand::expand::AstFragment;
use rustc_feature::AttributeTemplate;
use rustc_lint_defs::BuiltinLintDiag;
use rustc_lint_defs::builtin::DUPLICATE_MACRO_ATTRIBUTES;
use rustc_parse::{parser, validate_attr};
use rustc_parse::{exp, parser, validate_attr};
use rustc_session::errors::report_lit_error;
use rustc_span::{BytePos, Span, Symbol};

Expand Down Expand Up @@ -204,7 +204,7 @@ pub(crate) fn get_single_expr_from_tts(
Ok(ret) => ret,
Err(guar) => return ExpandResult::Ready(Err(guar)),
};
let _ = p.eat(&token::Comma);
let _ = p.eat(exp!(Comma));

if p.token != token::Eof {
cx.dcx().emit_err(errors::OnlyOneArgument { span, name });
Expand Down Expand Up @@ -237,7 +237,7 @@ pub(crate) fn get_exprs_from_tts(
let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr();

es.push(expr);
if p.eat(&token::Comma) {
if p.eat(exp!(Comma)) {
continue;
}
if p.token != token::Eof {
Expand Down
Loading

0 comments on commit 9d3b77a

Please sign in to comment.