From 609238554028352b53cd6c0ecaf43c6f5f9be570 Mon Sep 17 00:00:00 2001 From: Alisdair Meredith Date: Tue, 9 Jul 2024 12:18:59 -0400 Subject: [PATCH 01/10] [lex] Reorganize contents to follow grammar and phases of translation --- source/lex.tex | 140 ++++++++++++++++++++++++------------------------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/source/lex.tex b/source/lex.tex index 8ac924c910..9592f45982 100644 --- a/source/lex.tex +++ b/source/lex.tex @@ -465,30 +465,48 @@ for the respective Unicode encoding form. \indextext{character set|)} +\rSec1[lex.comment]{Comments} + +\pnum +\indextext{comment|(}% +\indextext{comment!\tcode{/*} \tcode{*/}}% +\indextext{comment!\tcode{//}}% +The characters \tcode{/*} start a comment, which terminates with the +characters \tcode{*/}. These comments do not nest. +\indextext{comment!\tcode{//}}% +The characters \tcode{//} start a comment, which terminates immediately before the +next new-line character. If there is a form-feed or a vertical-tab +character in such a comment, only whitespace characters shall appear +between it and the new-line that terminates the comment; no diagnostic +is required. +\begin{note} +The comment characters \tcode{//}, \tcode{/*}, +and \tcode{*/} have no special meaning within a \tcode{//} comment and +are treated just like other characters. Similarly, the comment +characters \tcode{//} and \tcode{/*} have no special meaning within a +\tcode{/*} comment. +\end{note} +\indextext{comment|)} + \rSec1[lex.pptoken]{Preprocessing tokens} \indextext{token!preprocessing|(}% \begin{bnf} \nontermdef{preprocessing-token}\br header-name\br + pp-number\br + identifier\br import-keyword\br module-keyword\br export-keyword\br - identifier\br - pp-number\br + preprocessing-op-or-punc\br character-literal\br user-defined-character-literal\br string-literal\br user-defined-string-literal\br - preprocessing-op-or-punc\br \textnormal{each non-whitespace character that cannot be one of the above} \end{bnf} -\pnum -Each preprocessing token that is converted to a token\iref{lex.token} -shall have the lexical form of a keyword, an identifier, a literal, -or an operator or punctuator. - \pnum A preprocessing token is the minimal lexical element of the language in translation phases 3 through 6. @@ -523,6 +541,11 @@ between the quotation characters in a character literal or string literal. +\pnum +Each preprocessing token that is converted to a token\iref{lex.token} +shall have the lexical form of a keyword, an identifier, a literal, +or an operator or punctuator. + \pnum If the input stream has been parsed into preprocessing tokens up to a given character: @@ -569,17 +592,6 @@ \end{codeblock} \end{example} -\pnum -The \grammarterm{import-keyword} is produced -by processing an \keyword{import} directive\iref{cpp.import}, -the \grammarterm{module-keyword} is produced -by preprocessing a \keyword{module} directive\iref{cpp.module}, and -the \grammarterm{export-keyword} is produced -by preprocessing either of the previous two directives. -\begin{note} -None has any observable spelling. -\end{note} - \pnum \begin{example} The program fragment \tcode{0xe+foo} is parsed as a @@ -651,57 +663,6 @@ \end{tokentable}% \indextext{token!alternative|)} -\rSec1[lex.token]{Tokens} - -\indextext{token|(}% -\begin{bnf} -\nontermdef{token}\br - identifier\br - keyword\br - literal\br - operator-or-punctuator -\end{bnf} - -\pnum -\indextext{\idxgram{token}}% -There are five kinds of tokens: identifiers, keywords, literals,% -\begin{footnote} -Literals include strings and character and numeric literals. -\end{footnote} -operators, and other separators. -\indextext{whitespace}% -Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments -(collectively, ``whitespace''), as described below, are ignored except -as they serve to separate tokens. -\begin{note} -Whitespace can separate otherwise adjacent identifiers, keywords, numeric -literals, and alternative tokens containing alphabetic characters. -\end{note} -\indextext{token|)} - -\rSec1[lex.comment]{Comments} - -\pnum -\indextext{comment|(}% -\indextext{comment!\tcode{/*} \tcode{*/}}% -\indextext{comment!\tcode{//}}% -The characters \tcode{/*} start a comment, which terminates with the -characters \tcode{*/}. These comments do not nest. -\indextext{comment!\tcode{//}}% -The characters \tcode{//} start a comment, which terminates immediately before the -next new-line character. If there is a form-feed or a vertical-tab -character in such a comment, only whitespace characters shall appear -between it and the new-line that terminates the comment; no diagnostic -is required. -\begin{note} -The comment characters \tcode{//}, \tcode{/*}, -and \tcode{*/} have no special meaning within a \tcode{//} comment and -are treated just like other characters. Similarly, the comment -characters \tcode{//} and \tcode{/*} have no special meaning within a -\tcode{/*} comment. -\end{note} -\indextext{comment|)} - \rSec1[lex.header]{Header names} \indextext{header!name|(}% @@ -791,6 +752,34 @@ a \grammarterm{floating-point-literal} token.% \indextext{number!preprocessing|)} +\rSec1[lex.token]{Tokens} + +\indextext{token|(}% +\begin{bnf} +\nontermdef{token}\br + identifier\br + keyword\br + literal\br + operator-or-punctuator +\end{bnf} + +\pnum +\indextext{\idxgram{token}}% +There are five kinds of tokens: identifiers, keywords, literals,% +\begin{footnote} +Literals include strings and character and numeric literals. +\end{footnote} +operators, and other separators. +\indextext{whitespace}% +Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments +(collectively, ``whitespace''), as described below, are ignored except +as they serve to separate tokens. +\begin{note} +Whitespace can separate otherwise adjacent identifiers, keywords, numeric +literals, and alternative tokens containing alphabetic characters. +\end{note} +\indextext{token|)} + \rSec1[lex.name]{Identifiers} \indextext{identifier|(}% @@ -920,6 +909,17 @@ \grammarterm{export-keyword} \end{bnf} +\pnum +The \grammarterm{import-keyword} is produced +by processing an \keyword{import} directive\iref{cpp.import}, +the \grammarterm{module-keyword} is produced +by preprocessing a \keyword{module} directive\iref{cpp.module}, and +the \grammarterm{export-keyword} is produced +by preprocessing either of the previous two directives. +\begin{note} +None has any observable spelling. +\end{note} + \pnum \indextext{keyword|(}% The identifiers shown in \tref{lex.key} are reserved for use From b7c25b90be4e7338affa78f80ee6ba3eef1b0f01 Mon Sep 17 00:00:00 2001 From: Alisdair Meredith Date: Mon, 17 Jun 2024 10:19:40 +0700 Subject: [PATCH 02/10] [lex.charset] Extract universal-character-name grammar to new subclause The grammar for universal-character-name is oddly sandwiched into the middle of the subcluase talking about the different character sets used by the standard. To improve the flow, extract that grammar into its own subclause. In the extraction, I make two other clarifying changes. First, describe this new subclause as 'a way to name any element of the of the tranlation character set using just the basic character set' rather than simply 'a way to name other characters'. Secondly, remove the 'one of' in the grammar where there is only one option to choose. --- source/lex.tex | 128 +++++++++++++++++++++++++------------------------ 1 file changed, 65 insertions(+), 63 deletions(-) diff --git a/source/lex.tex b/source/lex.tex index 9592f45982..aa1d4017da 100644 --- a/source/lex.tex +++ b/source/lex.tex @@ -326,11 +326,69 @@ \end{floattable} \pnum -The \grammarterm{universal-character-name} construct provides a way to name -other characters. +The \defnadj{basic literal}{character set} consists of +all characters of the basic character set, +plus the control characters specified in \tref{lex.charset.literal}. + +\begin{floattable}{Additional control characters in the basic literal character set}{lex.charset.literal}{ll} +\topline +\ohdrx{2}{character} \\ \capsep +\ucode{0000} & \uname{null} \\ +\ucode{0007} & \uname{alert} \\ +\ucode{0008} & \uname{backspace} \\ +\ucode{000d} & \uname{carriage return} \\ +\end{floattable} + +\pnum +A \defn{code unit} is an integer value +of character type\iref{basic.fundamental}. +Characters in a \grammarterm{character-literal} +other than a multicharacter or non-encodable character literal or +in a \grammarterm{string-literal} are encoded as +a sequence of one or more code units, as determined +by the \grammarterm{encoding-prefix}\iref{lex.ccon,lex.string}; +this is termed the respective \defnadj{literal}{encoding}. +The \defnadj{ordinary literal}{encoding} is +the encoding applied to an ordinary character or string literal. +The \defnadj{wide literal}{encoding} is the encoding applied +to a wide character or string literal. + +\pnum +A literal encoding or a locale-specific encoding of one of +the execution character sets\iref{character.seq} +encodes each element of the basic literal character set as +a single code unit with non-negative value, +distinct from the code unit for any other such element. +\begin{note} +A character not in the basic literal character set +can be encoded with more than one code unit; +the value of such a code unit can be the same as +that of a code unit for an element of the basic literal character set. +\end{note} +\indextext{character!null}% +\indextext{wide-character!null}% +The \unicode{0000}{null} character is encoded as the value \tcode{0}. +No other element of the translation character set +is encoded with a code unit of value \tcode{0}. +The code unit value of each decimal digit character after the digit \tcode{0} (\ucode{0030}) +shall be one greater than the value of the previous. +The ordinary and wide literal encodings are otherwise +\impldef{ordinary and wide literal encodings}. +\indextext{UTF-8}% +\indextext{UTF-16}% +\indextext{UTF-32}% +For a UTF-8, UTF-16, or UTF-32 literal, +the implementation shall encode +the Unicode scalar value +corresponding to each character of the translation character set +as specified in the Unicode Standard +for the respective Unicode encoding form. +\indextext{character set|)} + +\rSec1[lex.universal.char]{Universal Character Names} \begin{bnf} -\nontermdef{n-char} \textnormal{one of}\br +\nontermdef{n-char}\br \textnormal{any member of the translation character set except the \unicode{007d}{right curly bracket} or new-line character} \end{bnf} @@ -364,6 +422,10 @@ named-universal-character \end{bnf} +\pnum +The \grammarterm{universal-character-name} construct provides a way to name +any element in the translation character set using just the basic character set. + \pnum A \grammarterm{universal-character-name} of the form \tcode{\textbackslash u} \grammarterm{hex-quad}, @@ -405,66 +467,6 @@ \grammarterm{universal-character-name}. \end{note} -\pnum -The \defnadj{basic literal}{character set} consists of -all characters of the basic character set, -plus the control characters specified in \tref{lex.charset.literal}. - -\begin{floattable}{Additional control characters in the basic literal character set}{lex.charset.literal}{ll} -\topline -\ohdrx{2}{character} \\ \capsep -\ucode{0000} & \uname{null} \\ -\ucode{0007} & \uname{alert} \\ -\ucode{0008} & \uname{backspace} \\ -\ucode{000d} & \uname{carriage return} \\ -\end{floattable} - -\pnum -A \defn{code unit} is an integer value -of character type\iref{basic.fundamental}. -Characters in a \grammarterm{character-literal} -other than a multicharacter or non-encodable character literal or -in a \grammarterm{string-literal} are encoded as -a sequence of one or more code units, as determined -by the \grammarterm{encoding-prefix}\iref{lex.ccon,lex.string}; -this is termed the respective \defnadj{literal}{encoding}. -The \defnadj{ordinary literal}{encoding} is -the encoding applied to an ordinary character or string literal. -The \defnadj{wide literal}{encoding} is the encoding applied -to a wide character or string literal. - -\pnum -A literal encoding or a locale-specific encoding of one of -the execution character sets\iref{character.seq} -encodes each element of the basic literal character set as -a single code unit with non-negative value, -distinct from the code unit for any other such element. -\begin{note} -A character not in the basic literal character set -can be encoded with more than one code unit; -the value of such a code unit can be the same as -that of a code unit for an element of the basic literal character set. -\end{note} -\indextext{character!null}% -\indextext{wide-character!null}% -The \unicode{0000}{null} character is encoded as the value \tcode{0}. -No other element of the translation character set -is encoded with a code unit of value \tcode{0}. -The code unit value of each decimal digit character after the digit \tcode{0} (\ucode{0030}) -shall be one greater than the value of the previous. -The ordinary and wide literal encodings are otherwise -\impldef{ordinary and wide literal encodings}. -\indextext{UTF-8}% -\indextext{UTF-16}% -\indextext{UTF-32}% -For a UTF-8, UTF-16, or UTF-32 literal, -the implementation shall encode -the Unicode scalar value -corresponding to each character of the translation character set -as specified in the Unicode Standard -for the respective Unicode encoding form. -\indextext{character set|)} - \rSec1[lex.comment]{Comments} \pnum From 3d060e558bd2b3cc161f0d18b56c24294d159e73 Mon Sep 17 00:00:00 2001 From: Alisdair Meredith Date: Mon, 17 Jun 2024 23:48:09 +0700 Subject: [PATCH 03/10] [lex.charset] Introduced parent [lax.char] clause for character sets and UCNs --- source/lex.tex | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/lex.tex b/source/lex.tex index aa1d4017da..ee6342b7c8 100644 --- a/source/lex.tex +++ b/source/lex.tex @@ -249,7 +249,9 @@ \indextext{translation!phases|)} \end{enumerate} -\rSec1[lex.charset]{Character sets} +\rSec1[lex.char]{Characters}% + +\rSec2[lex.charset]{Character sets} \pnum \indextext{character set|(}% @@ -385,7 +387,7 @@ for the respective Unicode encoding form. \indextext{character set|)} -\rSec1[lex.universal.char]{Universal Character Names} +\rSec2[lex.universal.char]{Universal character names} \begin{bnf} \nontermdef{n-char}\br From 98cb2e5ec5979c031aee6d524fe08f6585bac530 Mon Sep 17 00:00:00 2001 From: Alisdair Meredith Date: Mon, 8 Jul 2024 12:33:25 -0400 Subject: [PATCH 04/10] [basic.pre] Defragment specification of names and entities The current contents of [basic.pre] jump between specifying different things. This PR moves all the specification of names to the front, followed by the specification of entities. There are two main benefits: (1) the specification for when two names are the same is a list of 4 rules that correspond to the 4 things than can form a name --- the connection is much clearer when the paragraphs are adjacent and the list is sorted to the same order; (2) in this form, even though all the words are the same, the reordering and merging of paragraphs a fit on a single page. The very last paragraph was forced over a page-break in the original layout. --- source/basic.tex | 54 ++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/source/basic.tex b/source/basic.tex index 24342733c4..de2e129033 100644 --- a/source/basic.tex +++ b/source/basic.tex @@ -32,18 +32,22 @@ \indextext{storage class}% \indextext{scope}% \indextext{linkage}% -An \defn{entity} is a value, object, reference, -structured binding, -function, enumerator, type, -class member, bit-field, template, template specialization, namespace, or -pack. - -\pnum A \defn{name} is an \grammarterm{identifier}\iref{lex.name}, \grammarterm{operator-function-id}\iref{over.oper}, \grammarterm{literal-operator-id}\iref{over.literal}, or \grammarterm{conversion-function-id}\iref{class.conv.fct}. +\pnum +Two names are \defnx{the same}{name!same} if +\begin{itemize} +\item they are \grammarterm{identifier}{s} composed of the same character sequence, or +\item they are \grammarterm{operator-function-id}{s} formed with the same operator, or +\item they are \grammarterm{literal-operator-id}{s} formed with the same +literal suffix identifier, or +\item they are \grammarterm{conversion-function-id}{s} formed with +equivalent\iref{temp.over.link} types. +\end{itemize} + \pnum Every name is introduced by a \defn{declaration}, which is a \begin{itemize} @@ -85,9 +89,13 @@ The interpretation of a \grammarterm{for-range-declaration} produces one or more of the above\iref{stmt.ranged}. \end{note} -An entity $E$ is denoted by the name (if any) -that is introduced by a declaration of $E$ or -by a \grammarterm{typedef-name} introduced by a declaration specifying $E$. + +\pnum +Some names denote types or templates. In general, whenever a name is +encountered it is necessary to determine whether that name denotes one of these +entities before continuing to parse the program that contains it. The process +that determines this is called +\defnx{name lookup}{lookup!name}\iref{basic.lookup}. \pnum A \defn{variable} is introduced by the @@ -95,6 +103,13 @@ a reference other than a non-static data member or of an object. The variable's name, if any, denotes the reference or object. +\pnum +An \defn{entity} is a value, object, reference, structured binding, function, +enumerator, type, class member, bit-field, template, template specialization, +namespace, or pack. An entity $E$ is denoted by the name (if any) that is +introduced by a declaration of $E$ or by a \grammarterm{typedef-name} +introduced by a declaration specifying $E$. + \pnum A \defnadj{local}{entity} is a variable with automatic storage duration\iref{basic.stc.auto}, @@ -102,25 +117,6 @@ whose corresponding variable is such an entity, or the \tcode{*\keyword{this}} object\iref{expr.prim.this}. -\pnum -Some names denote types or templates. In general, -whenever a name is encountered it is necessary to determine whether that name denotes -one of these entities before continuing to parse the program that contains it. The -process that determines this is called -\defnx{name lookup}{lookup!name}\iref{basic.lookup}. - -\pnum -Two names are \defnx{the same}{name!same} if -\begin{itemize} -\item they are \grammarterm{identifier}{s} composed of the same character sequence, or -\item they are \grammarterm{operator-function-id}{s} formed with -the same operator, or -\item they are \grammarterm{conversion-function-id}{s} formed -with equivalent\iref{temp.over.link} types, or -\item they are \grammarterm{literal-operator-id}{s}\iref{over.literal} formed with -the same literal suffix identifier. -\end{itemize} - \pnum \indextext{translation unit!name and}% \indextext{linkage}% From 52aedd575a33de80300dc2c0d3aab859a3ee59a7 Mon Sep 17 00:00:00 2001 From: Alisdair Meredith Date: Mon, 8 Jul 2024 13:36:46 -0400 Subject: [PATCH 05/10] [std.tex] Reorder preprocessor and modules before basic This change puts all the specification for assembling and transforming the source of a program ([lex], [cpp], and [modules]) ahead of the basic core specification of how to interpret that source. --- source/config.tex | 4 ++++ source/intro.tex | 5 +++-- source/std.tex | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/source/config.tex b/source/config.tex index be1743a556..304f32893b 100644 --- a/source/config.tex +++ b/source/config.tex @@ -8,6 +8,10 @@ %% Release date \newcommand{\reldate}{\today} +%% Core chapters +\newcommand{\firstcorechapter}{lex} +\newcommand{\lastcorechapter}{except} + %% Library chapters \newcommand{\firstlibchapter}{support} \newcommand{\lastlibchapter}{thread} diff --git a/source/intro.tex b/source/intro.tex index ca6009f3c9..024b871948 100644 --- a/source/intro.tex +++ b/source/intro.tex @@ -722,7 +722,8 @@ \begin{itemize} \item If a program contains no violations of the rules in -\ref{lex} through \ref{\lastlibchapter} as well as those specified in \ref{depr}, +\ref{\firstcorechapter} through \ref{\lastlibchapter} as well as those +specified in \ref{depr}, a conforming implementation shall, in accordance with the resource limits specified in \ref{implimits}, accept and correctly execute @@ -952,7 +953,7 @@ \pnum \indextext{standard!structure of|(}% \indextext{standard!structure of}% -\ref{lex} through \ref{cpp} describe the \Cpp{} programming +\ref{\firstcorechapter} through \ref{\lastlibchapter} describe the \Cpp{} programming language. That description includes detailed syntactic specifications in a form described in~\ref{syntax}. For convenience, \ref{gram} repeats all such syntactic specifications. diff --git a/source/std.tex b/source/std.tex index de1f338cc2..c33511f613 100644 --- a/source/std.tex +++ b/source/std.tex @@ -106,16 +106,16 @@ \include{intro} \include{lex} +\include{preprocessor} +\include{modules} \include{basic} \include{expressions} \include{statements} \include{declarations} -\include{modules} \include{classes} \include{overloading} \include{templates} \include{exceptions} -\include{preprocessor} \include{lib-intro} \include{support} \include{concepts} From 7a60384c0f8a225ff5cac3ece5428ae60e353fe8 Mon Sep 17 00:00:00 2001 From: Alisdair Meredith Date: Tue, 9 Jul 2024 16:02:04 -0400 Subject: [PATCH 06/10] [std] Reorganize the initial Core clauses This PR colocates [lex], [cpp], and modules to put all the parts that talk about assembling and translating a program together. In doing so, it rearranges the subclauses in [lex] and introduces subclauses that can be cross-references for each phase of translation. Metadata is introduced to identify the first and last core clause when cross-references want the first/last property rather than the specific clause itself. The subclause on comments, [lex.comment], is merged into the new [lex.phase.3] as comments feature only during translation phase three. --- source/compatibility.tex | 36 +++++++-------- source/expressions.tex | 2 +- source/intro.tex | 6 +-- source/lex.tex | 95 ++++++++++++++++++++++++---------------- source/xrefdelta.tex | 3 ++ 5 files changed, 82 insertions(+), 60 deletions(-) diff --git a/source/compatibility.tex b/source/compatibility.tex index 46fb8d7557..8bdb2b42be 100644 --- a/source/compatibility.tex +++ b/source/compatibility.tex @@ -2584,6 +2584,24 @@ Programs that have a legitimate reason to treat string literal objects as potentially modifiable memory are probably rare. +\rSec2[diff.cpp]{\ref{cpp}: preprocessing directives} + +\diffref{cpp.predefined} +\change +Whether \mname{STDC} is defined and if so, what its value is, are +\impldef{definition and meaning of \mname{STDC}}. +\rationale +\Cpp{} is not identical to C\@. +Mandating that \mname{STDC} +be defined would require that translators make an incorrect claim. +\effect +Change to semantics of well-defined feature. +\difficulty +Semantic transformation. +\howwide +Programs and headers that reference \mname{STDC} are +quite common. + \rSec2[diff.basic]{\ref{basic}: basics} \diffref{basic.def} @@ -3370,24 +3388,6 @@ \howwide Seldom. -\rSec2[diff.cpp]{\ref{cpp}: preprocessing directives} - -\diffref{cpp.predefined} -\change -Whether \mname{STDC} is defined and if so, what its value is, are -\impldef{definition and meaning of \mname{STDC}}. -\rationale -\Cpp{} is not identical to C\@. -Mandating that \mname{STDC} -be defined would require that translators make an incorrect claim. -\effect -Change to semantics of well-defined feature. -\difficulty -Semantic transformation. -\howwide -Programs and headers that reference \mname{STDC} are -quite common. - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \rSec1[diff.library]{C standard library} diff --git a/source/expressions.tex b/source/expressions.tex index f259fa68d3..4a6630e613 100644 --- a/source/expressions.tex +++ b/source/expressions.tex @@ -7524,7 +7524,7 @@ \item an operation that would have undefined or erroneous behavior -as specified in \ref{intro} through \ref{cpp}, +as specified in \ref{intro} through \ref{\lastcorechapter}, excluding \ref{dcl.attr.assume} and \ref{dcl.attr.noreturn}; \begin{footnote} This includes, diff --git a/source/intro.tex b/source/intro.tex index 024b871948..27b3fc767f 100644 --- a/source/intro.tex +++ b/source/intro.tex @@ -256,7 +256,7 @@ Implementations are allowed, but not required, to diagnose it\iref{intro.compliance.general}. Evaluation of a constant expression\iref{expr.const} -never exhibits behavior specified as erroneous in \ref{intro} through \ref{cpp}. +never exhibits behavior specified as erroneous in \ref{intro} through \ref{\lastcorechapter}. \end{defnote} \definition{expression-equivalent}{defns.expression.equivalent} @@ -644,7 +644,7 @@ issuance of a diagnostic message). Many incorrect program constructs do not engender undefined behavior; they are required to be diagnosed. Evaluation of a constant expression\iref{expr.const} never exhibits behavior explicitly -specified as undefined in \ref{intro} through \ref{cpp}. +specified as undefined in \ref{intro} through \ref{\lastcorechapter}. \end{defnote} \indexdefn{behavior!unspecified}% @@ -810,7 +810,7 @@ supports all the facilities described in this document, while a freestanding implementation supports the entire \Cpp{} language -described in \ref{lex} through \ref{cpp} and +described in \ref{\firstcorechapter} through \ref{\lastcorechapter} and the subset of the library facilities described in \ref{compliance}. \pnum diff --git a/source/lex.tex b/source/lex.tex index ee6342b7c8..8a15c81252 100644 --- a/source/lex.tex +++ b/source/lex.tex @@ -66,8 +66,9 @@ occur, although in practice different phases can be folded together. \end{footnote} -\begin{enumerate} -\item +\rSec2[lex.phase.1]{Translation phase 1}% + +\pnum \indextext{character!source file}% An implementation shall support input files that are a sequence of UTF-8 code units (UTF-8 files). @@ -106,7 +107,8 @@ to a sequence of translation character set elements\iref{lex.charset}, representing end-of-line indicators as new-line characters. -\item +\rSec2[lex.phase.2]{Translation phase 2}% +\pnum \indextext{line splicing}% If the first translation character is \unicode{feff}{byte order mark}, it is deleted. @@ -126,7 +128,9 @@ shall be processed as if an additional new-line character were appended to the file. -\item The source file is decomposed into preprocessing +\rSec2[lex.phase.3]{Translation phase 3}% +\pnum +The source file is decomposed into preprocessing tokens\iref{lex.pptoken} and sequences of whitespace characters (including comments). A source file shall not end in a partial preprocessing token or in a partial comment. @@ -140,6 +144,29 @@ would arise from a source file ending with an unclosed \tcode{/*} comment. \end{footnote} + +\pnum +\indextext{comment|(}% +\indextext{comment!\tcode{/*} \tcode{*/}}% +\indextext{comment!\tcode{//}}% +The characters \tcode{/*} start a comment, which terminates with the +characters \tcode{*/}. These comments do not nest. +\indextext{comment!\tcode{//}}% +The characters \tcode{//} start a comment, which terminates immediately before the +next new-line character. If there is a form-feed or a vertical-tab +character in such a comment, only whitespace characters shall appear +between it and the new-line that terminates the comment; no diagnostic +is required. +\begin{note} +The comment characters \tcode{//}, \tcode{/*}, +and \tcode{*/} have no special meaning within a \tcode{//} comment and +are treated just like other characters. Similarly, the comment +characters \tcode{//} and \tcode{/*} have no special meaning within a +\tcode{/*} comment. +\end{note} +\indextext{comment|)} + +\pnum Each comment is replaced by one space character. New-line characters are retained. Whether each nonempty sequence of whitespace characters other than new-line is retained or replaced by one space character is @@ -162,23 +189,33 @@ directive\iref{cpp.include}. \end{example} -\item Preprocessing directives are executed, macro invocations are +\rSec2[lex.phase.4]{Translation phase 4}% + +\pnum +Preprocessing directives are executed, macro invocations are expanded, and \tcode{_Pragma} unary operator expressions are executed. A \tcode{\#include} preprocessing directive causes the named header or source file to be processed from phase 1 through phase 4, recursively. All preprocessing directives are then deleted. -\item +\rSec2[lex.phase.5]{Translation phase 5}% + +\pnum For a sequence of two or more adjacent \grammarterm{string-literal} tokens, a common \grammarterm{encoding-prefix} is determined as specified in \ref{lex.string}. Each such \grammarterm{string-literal} token is then considered to have that common \grammarterm{encoding-prefix}. -\item +\rSec2[lex.phase.6]{Translation phase 6}% + +\pnum Adjacent \grammarterm{string-literal} tokens are concatenated\iref{lex.string}. -\item Whitespace characters separating tokens are no longer +\rSec2[lex.phase.7]{Translation phase 7}% + +\pnum +Whitespace characters separating tokens are no longer significant. Each preprocessing token is converted into a token\iref{lex.token}. The resulting tokens constitute a \defn{translation unit} and @@ -207,7 +244,10 @@ only, and does not specify any particular implementation. \end{note} -\item Translated translation units and instantiation units are combined +\rSec2[lex.phase.8]{Translation phase 8}% + +\pnum +Translated translation units and instantiation units are combined as follows: \begin{note} Some or all of these can be supplied from a @@ -241,13 +281,15 @@ The program is ill-formed if any instantiation fails. -\item All external entity references are resolved. Library +\rSec2[lex.phase.9]{Translation phase 9}% + +\pnum +All external entity references are resolved. Library components are linked to satisfy external references to entities not defined in the current translation. All such translator output is collected into a program image which contains information needed for execution in its execution environment.% \indextext{translation!phases|)} -\end{enumerate} \rSec1[lex.char]{Characters}% @@ -469,29 +511,6 @@ \grammarterm{universal-character-name}. \end{note} -\rSec1[lex.comment]{Comments} - -\pnum -\indextext{comment|(}% -\indextext{comment!\tcode{/*} \tcode{*/}}% -\indextext{comment!\tcode{//}}% -The characters \tcode{/*} start a comment, which terminates with the -characters \tcode{*/}. These comments do not nest. -\indextext{comment!\tcode{//}}% -The characters \tcode{//} start a comment, which terminates immediately before the -next new-line character. If there is a form-feed or a vertical-tab -character in such a comment, only whitespace characters shall appear -between it and the new-line that terminates the comment; no diagnostic -is required. -\begin{note} -The comment characters \tcode{//}, \tcode{/*}, -and \tcode{*/} have no special meaning within a \tcode{//} comment and -are treated just like other characters. Similarly, the comment -characters \tcode{//} and \tcode{/*} have no special meaning within a -\tcode{/*} comment. -\end{note} -\indextext{comment|)} - \rSec1[lex.pptoken]{Preprocessing tokens} \indextext{token!preprocessing|(}% @@ -532,7 +551,7 @@ \indextext{whitespace}% whitespace; \indextext{comment}% -this consists of comments\iref{lex.comment}, or whitespace characters +this consists of comments\iref{lex.phase.3}, or whitespace characters (\unicode{0020}{space}, \unicode{0009}{character tabulation}, new-line, @@ -1075,7 +1094,7 @@ \end{bnf} Each \grammarterm{operator-or-punctuator} is converted to a single token -in translation phase 7\iref{lex.phases}.% +in translation phase 7\iref{lex.phase.7}.% \indextext{punctuator|)}% \indextext{operator|)} @@ -1919,7 +1938,7 @@ \end{note} \pnum -In translation phase 6\iref{lex.phases}, +In translation phase 6\iref{lex.phase.6}, adjacent \grammarterm{string-literal}s are concatenated. The lexical structure and grouping of the contents of the individual \grammarterm{string-literal}s is retained. @@ -2268,7 +2287,7 @@ \end{example} \pnum -In translation phase 6\iref{lex.phases}, adjacent \grammarterm{string-literal}s are concatenated and +In translation phase 6\iref{lex.phase.6}, adjacent \grammarterm{string-literal}s are concatenated and \grammarterm{user-defined-string-literal}{s} are considered \grammarterm{string-literal}s for that purpose. During concatenation, \grammarterm{ud-suffix}{es} are removed and ignored and the concatenation process occurs as described in~\ref{lex.string}. At the end of phase diff --git a/source/xrefdelta.tex b/source/xrefdelta.tex index cd814e0a13..c45b5d2199 100644 --- a/source/xrefdelta.tex +++ b/source/xrefdelta.tex @@ -82,6 +82,9 @@ % P2875 Undeprecate polymorphic_allocator::destroy \movedxref{depr.mem.poly.allocator.mem}{mem.poly.allocator.mem} +%%% Refactoring core clauses +\movedxref{lex.comment}{lex.phase.3} + % https://github.com/cplusplus/draft/pull/6653 \movedxref{mismatch}{alg.mismatch} From 5f88effb237ed88158e3a1e5266a90ae96ee98b8 Mon Sep 17 00:00:00 2001 From: Alisdair Meredith Date: Fri, 12 Jul 2024 09:11:44 -0400 Subject: [PATCH 07/10] [cpp] Merge preprocessor directives into lex --- source/lex.tex | 3791 ++++++++++++++++++++++++++++++--------- source/preprocessor.tex | 2009 --------------------- source/std.tex | 1 - 3 files changed, 2909 insertions(+), 2892 deletions(-) delete mode 100644 source/preprocessor.tex diff --git a/source/lex.tex b/source/lex.tex index 8a15c81252..355fa13f70 100644 --- a/source/lex.tex +++ b/source/lex.tex @@ -66,7 +66,7 @@ occur, although in practice different phases can be folded together. \end{footnote} -\rSec2[lex.phase.1]{Translation phase 1}% +\rSec2[lex.phase.1]{Mapping to translation characters}% \pnum \indextext{character!source file}% @@ -82,6 +82,8 @@ In other words, recognizing the \unicode{feff}{byte order mark} is not sufficient. \end{note} + +\pnum If an input file is determined to be a UTF-8 file, then it shall be a well-formed UTF-8 code unit sequence and it is decoded to produce a sequence of Unicode @@ -101,13 +103,14 @@ \unicode{000d}{carriage return} not immediately followed by a \unicode{000a}{line feed}, is replaced by a single new-line character. +\pnum For any other kind of input file supported by the implementation, characters are mapped, in an \impldef{mapping physical source file characters to translation character set} manner, to a sequence of translation character set elements\iref{lex.charset}, representing end-of-line indicators as new-line characters. -\rSec2[lex.phase.2]{Translation phase 2}% +\rSec2[lex.phase.2]{Line splicing}% \pnum \indextext{line splicing}% If the first translation character is \unicode{feff}{byte order mark}, @@ -123,12 +126,14 @@ Line splicing can form a \grammarterm{universal-character-name}\iref{lex.charset}. \end{note} + +\pnum A source file that is not empty and that (after splicing) does not end in a new-line character shall be processed as if an additional new-line character were appended to the file. -\rSec2[lex.phase.3]{Translation phase 3}% +\rSec2[lex.phase.3]{Preprocessor tokenization}% \pnum The source file is decomposed into preprocessing tokens\iref{lex.pptoken} and sequences of whitespace characters @@ -189,7 +194,7 @@ directive\iref{cpp.include}. \end{example} -\rSec2[lex.phase.4]{Translation phase 4}% +\rSec2[lex.phase.4]{Preprocessing directives}% \pnum Preprocessing directives are executed, macro invocations are @@ -198,7 +203,7 @@ source file to be processed from phase 1 through phase 4, recursively. All preprocessing directives are then deleted. -\rSec2[lex.phase.5]{Translation phase 5}% +\rSec2[lex.phase.5]{String literal encoding}% \pnum For a sequence of two or more adjacent \grammarterm{string-literal} tokens, @@ -207,12 +212,12 @@ Each such \grammarterm{string-literal} token is then considered to have that common \grammarterm{encoding-prefix}. -\rSec2[lex.phase.6]{Translation phase 6}% +\rSec2[lex.phase.6]{String literal concatenation}% \pnum Adjacent \grammarterm{string-literal} tokens are concatenated\iref{lex.string}. -\rSec2[lex.phase.7]{Translation phase 7}% +\rSec2[lex.phase.7]{Syntactic and semantic analysis}% \pnum Whitespace characters separating tokens are no longer @@ -226,6 +231,8 @@ result in one token being replaced by a sequence of other tokens\iref{temp.names}. \end{note} + +\pnum It is \impldef{whether the sources for module units and header units @@ -244,7 +251,7 @@ only, and does not specify any particular implementation. \end{note} -\rSec2[lex.phase.8]{Translation phase 8}% +\rSec2[lex.phase.8]{Template instantiation}% \pnum Translated translation units and instantiation units are combined @@ -253,6 +260,8 @@ Some or all of these can be supplied from a library. \end{note} + +\pnum Each translated translation unit is examined to produce a list of required instantiations. \begin{note} @@ -260,6 +269,8 @@ instantiations which have been explicitly requested\iref{temp.explicit}. \end{note} + +\pnum The definitions of the required templates are located. It is \impldef{whether source of translation units must be available to locate template definitions} whether the @@ -270,6 +281,8 @@ information into the translated translation unit so as to ensure the source is not required here. \end{note} + +\pnum All the required instantiations are performed to produce \defn{instantiation units}. @@ -278,10 +291,12 @@ to translated translation units, but contain no references to uninstantiated templates and no template definitions. \end{note} + +\pnum The program is ill-formed if any instantiation fails. -\rSec2[lex.phase.9]{Translation phase 9}% +\rSec2[lex.phase.9]{Linking}% \pnum All external entity references are resolved. Library @@ -467,8 +482,20 @@ \end{bnf} \pnum -The \grammarterm{universal-character-name} construct provides a way to name -any element in the translation character set using just the basic character set. +The \grammarterm{universal-character-name} construct provides a way to name any +element in the translation character set using just the basic character set. +If a \grammarterm{universal-character-name} outside +the \grammarterm{c-char-sequence}, \grammarterm{s-char-sequence}, or +\grammarterm{r-char-sequence} of +a \grammarterm{character-literal} or \grammarterm{string-literal} +(in either case, including within a \grammarterm{user-defined-literal}) +corresponds to a control character or +to a character in the basic character set, the program is ill-formed. +\begin{note} +A sequence of characters resembling a \grammarterm{universal-character-name} in an +\grammarterm{r-char-sequence}\iref{lex.string} does not form a +\grammarterm{universal-character-name}. +\end{note} \pnum A \grammarterm{universal-character-name} @@ -497,32 +524,20 @@ None of these names or aliases have leading or trailing spaces. \end{note} -\pnum -If a \grammarterm{universal-character-name} outside -the \grammarterm{c-char-sequence}, \grammarterm{s-char-sequence}, or -\grammarterm{r-char-sequence} of -a \grammarterm{character-literal} or \grammarterm{string-literal} -(in either case, including within a \grammarterm{user-defined-literal}) -corresponds to a control character or -to a character in the basic character set, the program is ill-formed. -\begin{note} -A sequence of characters resembling a \grammarterm{universal-character-name} in an -\grammarterm{r-char-sequence}\iref{lex.string} does not form a -\grammarterm{universal-character-name}. -\end{note} - \rSec1[lex.pptoken]{Preprocessing tokens} +\rSec2[lex.ppbasic]{Basic tokens} + \indextext{token!preprocessing|(}% \begin{bnf} \nontermdef{preprocessing-token}\br header-name\br pp-number\br + preprocessing-op-or-punc\br identifier\br import-keyword\br module-keyword\br export-keyword\br - preprocessing-op-or-punc\br character-literal\br user-defined-character-literal\br string-literal\br @@ -637,7 +652,7 @@ \end{example} \indextext{token!preprocessing|)} -\rSec1[lex.digraph]{Alternative tokens} +\rSec2[lex.digraph]{Alternative tokens} \pnum \indextext{token!alternative|(}% @@ -686,7 +701,7 @@ \end{tokentable}% \indextext{token!alternative|)} -\rSec1[lex.header]{Header names} +\rSec2[lex.header]{Header names} \indextext{header!name|(}% \begin{bnf} @@ -746,7 +761,7 @@ \end{footnote} \indextext{header!name|)} -\rSec1[lex.ppnumber]{Preprocessing numbers} +\rSec2[lex.ppnumber]{Preprocessing numbers} \indextext{number!preprocessing|(}% \begin{bnf} @@ -775,171 +790,2780 @@ a \grammarterm{floating-point-literal} token.% \indextext{number!preprocessing|)} -\rSec1[lex.token]{Tokens} +\rSec2[lex.operators]{Operators and punctuators} + +\pnum +\indextext{operator|(}% +\indextext{punctuator|(}% +The lexical representation of \Cpp{} programs includes a number of +preprocessing tokens that are used in the syntax of the preprocessor or +are converted into tokens for operators and punctuators: -\indextext{token|(}% \begin{bnf} -\nontermdef{token}\br - identifier\br - keyword\br - literal\br +\nontermdef{preprocessing-op-or-punc}\br + preprocessing-operator\br operator-or-punctuator \end{bnf} -\pnum -\indextext{\idxgram{token}}% -There are five kinds of tokens: identifiers, keywords, literals,% -\begin{footnote} -Literals include strings and character and numeric literals. -\end{footnote} -operators, and other separators. -\indextext{whitespace}% -Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments -(collectively, ``whitespace''), as described below, are ignored except -as they serve to separate tokens. -\begin{note} -Whitespace can separate otherwise adjacent identifiers, keywords, numeric -literals, and alternative tokens containing alphabetic characters. -\end{note} -\indextext{token|)} +\begin{bnf} +%% Ed. note: character protrusion would misalign various operators. +\microtypesetup{protrusion=false}\obeyspaces +\nontermdef{preprocessing-operator} \textnormal{one of}\br + \terminal{\# \#\# \%: \%:\%:} +\end{bnf} -\rSec1[lex.name]{Identifiers} +\begin{bnf} +\microtypesetup{protrusion=false}\obeyspaces +\nontermdef{operator-or-punctuator} \textnormal{one of}\br + \terminal{\{ \} [ ] ( )}\br + \terminal{<: :> <\% \%> ; : ...}\br + \terminal{? :: . .* -> ->* \~}\br + \terminal{! + - * / \% \caret{} \& |}\br + \terminal{= += -= *= /= \%= \caret{}= \&= |=}\br + \terminal{== != < > <= >= <=> \&\& ||}\br + \terminal{<< >> <<= >>= ++ -- ,}\br + \terminal{\keyword{and} \keyword{or} \keyword{xor} \keyword{not} \keyword{bitand} \keyword{bitor} \keyword{compl}}\br + \terminal{\keyword{and_eq} \keyword{or_eq} \keyword{xor_eq} \keyword{not_eq}} +\end{bnf} -\indextext{identifier|(}% +Each \grammarterm{operator-or-punctuator} is converted to a single token +in translation phase 7\iref{lex.phase.7}.% +\indextext{punctuator|)}% +\indextext{operator|)} + +\rSec2[lex.ppliteral]{Literals} + +\rSec3[lex.ccon]{Character literals} + +\indextext{literal!character}% \begin{bnf} -\nontermdef{identifier}\br - identifier-start\br - identifier identifier-continue +\nontermdef{character-literal}\br + \opt{encoding-prefix} \terminal{'} c-char-sequence \terminal{'} \end{bnf} \begin{bnf} -\nontermdef{identifier-start}\br - nondigit\br - \textnormal{an element of the translation character set with the Unicode property XID_Start} +\nontermdef{encoding-prefix} \textnormal{one of}\br + \terminal{u8}\quad\terminal{u}\quad\terminal{U}\quad\terminal{L} \end{bnf} \begin{bnf} -\nontermdef{identifier-continue}\br - digit\br - nondigit\br - \textnormal{an element of the translation character set with the Unicode property XID_Continue} +\nontermdef{c-char-sequence}\br + c-char\br + c-char-sequence c-char \end{bnf} \begin{bnf} -\nontermdef{nondigit} \textnormal{one of}\br - \terminal{a b c d e f g h i j k l m}\br - \terminal{n o p q r s t u v w x y z}\br - \terminal{A B C D E F G H I J K L M}\br - \terminal{N O P Q R S T U V W X Y Z _} +\nontermdef{c-char}\br + basic-c-char\br + escape-sequence\br + universal-character-name \end{bnf} \begin{bnf} -\nontermdef{digit} \textnormal{one of}\br - \terminal{0 1 2 3 4 5 6 7 8 9} +\nontermdef{basic-c-char}\br + \textnormal{any member of the translation character set except the \unicode{0027}{apostrophe},}\br + \bnfindent\textnormal{\unicode{005c}{reverse solidus}, or new-line character} \end{bnf} -\pnum -\indextext{name!length of}% -\indextext{name}% -\begin{note} -The character properties XID_Start and XID_Continue are Derived Core Properties -as described by \UAX{44} of the Unicode Standard. -\begin{footnote} -On systems in which linkers cannot accept extended -characters, an encoding of the \grammarterm{universal-character-name} can be used in -forming valid external identifiers. For example, some otherwise unused -character or sequence of characters can be used to encode the -\tcode{\textbackslash u} in a \grammarterm{universal-character-name}. Extended -characters can produce a long external identifier, but \Cpp{} does not -place a translation limit on significant characters for external -identifiers. -\end{footnote} -\end{note} -The program is ill-formed -if an \grammarterm{identifier} does not conform to -Normalization Form C as specified in the Unicode Standard. -\begin{note} -Identifiers are case-sensitive. -\end{note} -\begin{note} -\ref{uaxid} compares the requirements of \UAX{31} of the Unicode Standard -with the \Cpp{} rules for identifiers. -\end{note} -\begin{note} -In translation phase 4, -\grammarterm{identifier} also includes -those \grammarterm{preprocessing-token}s\iref{lex.pptoken} -differentiated as keywords\iref{lex.key} -in the later translation phase 7\iref{lex.token}. -\end{note} +\begin{bnf} +\nontermdef{escape-sequence}\br + simple-escape-sequence\br + numeric-escape-sequence\br + conditional-escape-sequence +\end{bnf} -\pnum -\indextext{\idxcode{import}}% -\indextext{\idxcode{final}}% -\indextext{\idxcode{module}}% -\indextext{\idxcode{override}}% -The identifiers in \tref{lex.name.special} have a special meaning when -appearing in a certain context. When referred to in the grammar, these identifiers -are used explicitly rather than using the \grammarterm{identifier} grammar production. -Unless otherwise specified, any ambiguity as to whether a given -\grammarterm{identifier} has a special meaning is resolved to interpret the -token as a regular \grammarterm{identifier}. +\begin{bnf} +\nontermdef{simple-escape-sequence}\br + \terminal{\textbackslash} simple-escape-sequence-char +\end{bnf} -\begin{multicolfloattable}{Identifiers with special meaning}{lex.name.special} -{llll} -\keyword{final} \\ -\columnbreak -\keyword{import} \\ -\columnbreak -\keyword{module} \\ -\columnbreak -\keyword{override} \\ -\end{multicolfloattable} +\begin{bnf} +\nontermdef{simple-escape-sequence-char} \textnormal{one of}\br + \terminal{' " ? \textbackslash{} a b f n r t v} +\end{bnf} -\pnum -\indextext{\idxcode{_}|see{character, underscore}}% -\indextext{character!underscore!in identifier}% -\indextext{reserved identifier}% -In addition, some identifiers -appearing as a \grammarterm{token} or \grammarterm{preprocessing-token} -are reserved for use by \Cpp{} -implementations and shall -not be used otherwise; no diagnostic is required. -\begin{itemize} -\item -Each identifier that contains a double underscore -\tcode{\unun} -\indextext{character!underscore}% -or begins with an underscore followed by -an uppercase letter -\indextext{uppercase}% -is reserved to the implementation for any use. -\item -Each identifier that begins with an underscore is -\indextext{character!underscore}% -reserved to the implementation for use as a name in the global namespace.% -\indextext{namespace!global} -\end{itemize}% -\indextext{identifier|)} +\begin{bnf} +\nontermdef{numeric-escape-sequence}\br + octal-escape-sequence\br + hexadecimal-escape-sequence +\end{bnf} -\rSec1[lex.key]{Keywords} +\begin{bnf} +\nontermdef{simple-octal-digit-sequence}\br + octal-digit\br + simple-octal-digit-sequence octal-digit +\end{bnf} \begin{bnf} -\nontermdef{keyword}\br - \textnormal{any identifier listed in \tref{lex.key}}\br - \grammarterm{import-keyword}\br - \grammarterm{module-keyword}\br - \grammarterm{export-keyword} +\nontermdef{octal-escape-sequence}\br + \terminal{\textbackslash} octal-digit\br + \terminal{\textbackslash} octal-digit octal-digit\br + \terminal{\textbackslash} octal-digit octal-digit octal-digit\br + \terminal{\textbackslash o\{} simple-octal-digit-sequence \terminal{\}}\br \end{bnf} -\pnum -The \grammarterm{import-keyword} is produced -by processing an \keyword{import} directive\iref{cpp.import}, -the \grammarterm{module-keyword} is produced -by preprocessing a \keyword{module} directive\iref{cpp.module}, and -the \grammarterm{export-keyword} is produced -by preprocessing either of the previous two directives. -\begin{note} +\begin{bnf} +\nontermdef{hexadecimal-escape-sequence}\br + \terminal{\textbackslash x} simple-hexadecimal-digit-sequence\br + \terminal{\textbackslash x\{} simple-hexadecimal-digit-sequence \terminal{\}} +\end{bnf} + +\begin{bnf} +\nontermdef{conditional-escape-sequence}\br + \terminal{\textbackslash} conditional-escape-sequence-char +\end{bnf} + +\begin{bnf} +\nontermdef{conditional-escape-sequence-char}\br + \textnormal{any member of the basic character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{N}, \terminal{o}, \terminal{u}, \terminal{U}, or \terminal{x}} +\end{bnf} + +\pnum +\indextext{literal!character}% +\indextext{literal!\idxcode{char8_t}}% +\indextext{literal!\idxcode{char16_t}}% +\indextext{literal!\idxcode{char32_t}}% +\indextext{literal!type of character}% +\indextext{type!\idxcode{char8_t}}% +\indextext{type!\idxcode{char16_t}}% +\indextext{type!\idxcode{char32_t}}% +\indextext{wide-character}% +\indextext{type!\idxcode{wchar_t}}% +A \defnadj{multicharacter}{literal} is a \grammarterm{character-literal} +whose \grammarterm{c-char-sequence} consists of +more than one \grammarterm{c-char}. +A multicharacter literal shall not have an \grammarterm{encoding-prefix}. +If a multicharacter literal contains a \grammarterm{c-char} +that is not encodable as a single code unit in the ordinary literal encoding, +the program is ill-formed. +Multicharacter literals are conditionally-supported. + +\pnum +The kind of a \grammarterm{character-literal}, +its type, and its associated character encoding\iref{lex.charset} +are determined by +its \grammarterm{encoding-prefix} and its \grammarterm{c-char-sequence} +as defined by \tref{lex.ccon.literal}. + +\begin{floattable}{Character literals}{lex.ccon.literal} +{l|l|l|l|l} +\topline +\lhdr{Encoding} & \chdr{Kind} & \chdr{Type} & \chdr{Associated char-} & \rhdr{Example} \\ +\lhdr{prefix} & \chdr{} & \chdr{} & \chdr{acter encoding} & \\ +\capsep +none & +\defnx{ordinary character literal}{literal!character!ordinary} & +\keyword{char} & +ordinary literal & +\tcode{'v'} \\ \cline{2-3}\cline{5-5} + & +multicharacter literal & +\keyword{int} & +encoding & +\tcode{'abcd'} \\ \hline +\tcode{L} & +\defnx{wide character literal}{literal!character!wide} & +\keyword{wchar_t} & +wide literal & +\tcode{L'w'} \\ + & & & encoding & \\ \hline +\tcode{u8} & +\defnx{UTF-8 character literal}{literal!character!UTF-8} & +\keyword{char8_t} & +UTF-8 & +\tcode{u8'x'} \\ \hline +\tcode{u} & +\defnx{UTF-16 character literal}{literal!character!UTF-16} & +\keyword{char16_t} & +UTF-16 & +\tcode{u'y'} \\ \hline +\tcode{U} & +\defnx{UTF-32 character literal}{literal!character!UTF-32} & +\keyword{char32_t} & +UTF-32 & +\tcode{U'z'} \\ +\end{floattable} + +\pnum +In translation phase 4, +the value of a \grammarterm{character-literal} is determined +using the range of representable values +of the \grammarterm{character-literal}'s type in translation phase 7. +A multicharacter literal has an +\impldef{value of non-encodable character literal or multicharacter literal} +value. +The value of any other kind of \grammarterm{character-literal} +is determined as follows: +\begin{itemize} +\item +A \grammarterm{character-literal} with +a \grammarterm{c-char-sequence} consisting of a single +\grammarterm{basic-c-char}, +\grammarterm{simple-escape-sequence}, or +\grammarterm{universal-character-name} +is the code unit value of the specified character +as encoded in the literal's associated character encoding. +If the specified character lacks +representation in the literal's associated character encoding or +if it cannot be encoded as a single code unit, +then the program is ill-formed. +\item +A \grammarterm{character-literal} with +a \grammarterm{c-char-sequence} consisting of +a single \grammarterm{numeric-escape-sequence} +has a value as follows: +\begin{itemize} +\item +Let $v$ be the integer value represented by +the octal number comprising +the sequence of \grammarterm{octal-digit}{s} in +an \grammarterm{octal-escape-sequence} or by +the hexadecimal number comprising +the sequence of \grammarterm{hexadecimal-digit}{s} in +a \grammarterm{hexadecimal-escape-sequence}. +\item +If $v$ does not exceed +the range of representable values of the \grammarterm{character-literal}'s type, +then the value is $v$. +\item +Otherwise, +if the \grammarterm{character-literal}'s \grammarterm{encoding-prefix} +is absent or \tcode{L}, and +$v$ does not exceed the range of representable values of the corresponding unsigned type for the underlying type of the \grammarterm{character-literal}'s type, +then the value is the unique value of the \grammarterm{character-literal}'s type \tcode{T} that is congruent to $v$ modulo $2^N$, where $N$ is the width of \tcode{T}. +\item +Otherwise, the program is ill-formed. +\end{itemize} +\item +A \grammarterm{character-literal} with +a \grammarterm{c-char-sequence} consisting of +a single \grammarterm{conditional-escape-sequence} +is conditionally-supported and +has an \impldef{value of \grammarterm{conditional-escape-sequence}} value. +\end{itemize} + +\pnum +\indextext{backslash character}% +\indextext{\idxcode{\textbackslash}|see{backslash character}}% +\indextext{escape character|see{backslash character}}% +The character specified by a \grammarterm{simple-escape-sequence} +is specified in \tref{lex.ccon.esc}. +\begin{note} +Using an escape sequence for a question mark +is supported for compatibility with \CppXIV{} and C. +\end{note} + +\begin{floattable}{Simple escape sequences}{lex.ccon.esc} +{lll} +\topline +\lhdrx{2}{character} & \rhdr{\grammarterm{simple-escape-sequence}} \\ \capsep +\ucode{000a} & \uname{line feed} & \tcode{\textbackslash n} \\ +\ucode{0009} & \uname{character tabulation} & \tcode{\textbackslash t} \\ +\ucode{000b} & \uname{line tabulation} & \tcode{\textbackslash v} \\ +\ucode{0008} & \uname{backspace} & \tcode{\textbackslash b} \\ +\ucode{000d} & \uname{carriage return} & \tcode{\textbackslash r} \\ +\ucode{000c} & \uname{form feed} & \tcode{\textbackslash f} \\ +\ucode{0007} & \uname{alert} & \tcode{\textbackslash a} \\ +\ucode{005c} & \uname{reverse solidus} & \tcode{\textbackslash\textbackslash} \\ +\ucode{003f} & \uname{question mark} & \tcode{\textbackslash ?} \\ +\ucode{0027} & \uname{apostrophe} & \tcode{\textbackslash '} \\ +\ucode{0022} & \uname{quotation mark} & \tcode{\textbackslash "} \\ +\end{floattable} + +\rSec3[lex.string]{String literals} + +\indextext{literal!string}% +\begin{bnf} +\nontermdef{string-literal}\br + \opt{encoding-prefix} \terminal{"} \opt{s-char-sequence} \terminal{"}\br + \opt{encoding-prefix} \terminal{R} raw-string +\end{bnf} + +\begin{bnf} +\nontermdef{s-char-sequence}\br + s-char\br + s-char-sequence s-char +\end{bnf} + +\begin{bnf} +\nontermdef{s-char}\br + basic-s-char\br + escape-sequence\br + universal-character-name +\end{bnf} + +\begin{bnf} +\nontermdef{basic-s-char}\br + \textnormal{any member of the translation character set except the \unicode{0022}{quotation mark},}\br + \bnfindent\textnormal{\unicode{005c}{reverse solidus}, or new-line character} +\end{bnf} + +\begin{bnf} +\nontermdef{raw-string}\br + \terminal{"} \opt{d-char-sequence} \terminal{(} \opt{r-char-sequence} \terminal{)} \opt{d-char-sequence} \terminal{"} +\end{bnf} + +\begin{bnf} +\nontermdef{r-char-sequence}\br + r-char\br + r-char-sequence r-char +\end{bnf} + +\begin{bnf} +\nontermdef{r-char}\br + \textnormal{any member of the translation character set, except a \unicode{0029}{right parenthesis} followed by}\br + \bnfindent\textnormal{the initial \grammarterm{d-char-sequence} (which may be empty) followed by a \unicode{0022}{quotation mark}} +\end{bnf} + +\begin{bnf} +\nontermdef{d-char-sequence}\br + d-char\br + d-char-sequence d-char +\end{bnf} + +\begin{bnf} +\nontermdef{d-char}\br + \textnormal{any member of the basic character set except:}\br + \bnfindent\textnormal{\unicode{0020}{space}, \unicode{0028}{left parenthesis}, \unicode{0029}{right parenthesis}, \unicode{005c}{reverse solidus},}\br + \bnfindent\textnormal{\unicode{0009}{character tabulation}, \unicode{000b}{line tabulation}, \unicode{000c}{form feed}, and new-line} +\end{bnf} + +\pnum +\indextext{literal!string}% +\indextext{character string}% +\indextext{string!type of}% +\indextext{type!\idxcode{wchar_t}}% +\indextext{prefix!\idxcode{L}}% +\indextext{literal!string!\idxcode{char16_t}}% +\indextext{type!\idxcode{char16_t}}% +\indextext{literal!string!\idxcode{char32_t}}% +\indextext{type!\idxcode{char32_t}}% +The kind of a \grammarterm{string-literal}, +its type, and +its associated character encoding\iref{lex.charset} +are determined by its encoding prefix and sequence of +\grammarterm{s-char}s or \grammarterm{r-char}s +as defined by \tref{lex.string.literal} +where $n$ is the number of encoded code units as described below. + +\begin{floattable}{String literals}{lex.string.literal} +{llp{2.6cm}p{2.3cm}p{4.7cm}} +\topline +\lhdr{Enco-} & \chdr{Kind} & \chdr{Type} & \chdr{Associated} & \rhdr{Examples} \\ +\lhdr{ding} & \chdr{} & \chdr{} & \chdr{character} & \rhdr{} \\ +\lhdr{prefix} & \chdr{} & \chdr{} & \chdr{encoding} & \rhdr{} \\ +\capsep +none & +\defnx{ordinary string literal}{literal!string!ordinary} & +array of $n$\newline \tcode{\keyword{const} \keyword{char}} & +ordinary literal encoding & +\tcode{"ordinary string"}\newline +\tcode{R"(ordinary raw string)"} \\ +\tcode{L} & +\defnx{wide string literal}{literal!string!wide} & +array of $n$\newline \tcode{\keyword{const} \keyword{wchar_t}} & +wide literal\newline encoding & +\tcode{L"wide string"}\newline +\tcode{LR"w(wide raw string)w"} \\ +\tcode{u8} & +\defnx{UTF-8 string literal}{literal!string!UTF-8} & +array of $n$\newline \tcode{\keyword{const} \keyword{char8_t}} & +UTF-8 & +\tcode{u8"UTF-8 string"}\newline +\tcode{u8R"x(UTF-8 raw string)x"} \\ +\tcode{u} & +\defnx{UTF-16 string literal}{literal!string!UTF-16} & +array of $n$\newline \tcode{\keyword{const} \keyword{char16_t}} & +UTF-16 & +\tcode{u"UTF-16 string"}\newline +\tcode{uR"y(UTF-16 raw string)y"} \\ +\tcode{U} & +\defnx{UTF-32 string literal}{literal!string!UTF-32} & +array of $n$\newline \tcode{\keyword{const} \keyword{char32_t}} & +UTF-32 & +\tcode{U"UTF-32 string"}\newline +\tcode{UR"z(UTF-32 raw string)z"} \\ +\end{floattable} + +\pnum +\indextext{literal!string!raw}% +A \grammarterm{string-literal} that has an \tcode{R} +\indextext{prefix!\idxcode{R}}% +in the prefix is a \defn{raw string literal}. The +\grammarterm{d-char-sequence} serves as a delimiter. The terminating +\grammarterm{d-char-sequence} of a \grammarterm{raw-string} is the same sequence of +characters as the initial \grammarterm{d-char-sequence}. A \grammarterm{d-char-sequence} +shall consist of at most 16 characters. + +\pnum +\begin{note} +The characters \tcode{'('} and \tcode{')'} can appear in a +\grammarterm{raw-string}. Thus, \tcode{R"delimiter((a|b))delimiter"} is equivalent to +\tcode{"(a|b)"}. +\end{note} + +\pnum +\begin{note} +A source-file new-line in a raw string literal results in a new-line in the +resulting execution string literal. Assuming no +whitespace at the beginning of lines in the following example, the assert will succeed: +\begin{codeblock} +const char* p = R"(a\ +b +c)"; +assert(std::strcmp(p, "a\\\nb\nc") == 0); +\end{codeblock} +\end{note} + +\pnum +\begin{example} +The raw string +\begin{codeblock} +R"a( +)\ +a" +)a" +\end{codeblock} +is equivalent to \tcode{"\textbackslash n)\textbackslash \textbackslash \textbackslash na\textbackslash"\textbackslash n"}. The raw string +\begin{codeblock} +R"(x = "\"y\"")" +\end{codeblock} +is equivalent to \tcode{"x = \textbackslash "\textbackslash\textbackslash\textbackslash "y\textbackslash\textbackslash\textbackslash "\textbackslash ""}. +\end{example} + +\pnum +\indextext{literal!narrow-character}% +Ordinary string literals and UTF-8 string literals are +also referred to as \defnx{narrow string literals}{literal!string!narrow}. + +\pnum +\indextext{concatenation!string}% +The common \grammarterm{encoding-prefix} +for a sequence of adjacent \grammarterm{string-literal}s +is determined pairwise as follows. +If two \grammarterm{string-literal}{s} have +the same \grammarterm{encoding-prefix}, +the common \grammarterm{encoding-prefix} is that \grammarterm{encoding-prefix}. +If one \grammarterm{string-literal} has no \grammarterm{encoding-prefix}, +the common \grammarterm{encoding-prefix} is that +of the other \grammarterm{string-literal}. +Any other combinations are ill-formed. +\begin{note} +A \grammarterm{string-literal}'s rawness has +no effect on the determination of the common \grammarterm{encoding-prefix}. +\end{note} + +\pnum +In translation phase 6\iref{lex.phase.6}, +adjacent \grammarterm{string-literal}s are concatenated. +The lexical structure and grouping of +the contents of the individual \grammarterm{string-literal}s is retained. +\begin{example} +\begin{codeblock} +"\xA" "B" +\end{codeblock} +represents +the code unit \tcode{'\textbackslash xA'} and the character \tcode{'B'} +after concatenation +(and not the single code unit \tcode{'\textbackslash xAB'}). +Similarly, +\begin{codeblock} +R"(\u00)" "41" +\end{codeblock} +represents six characters, +starting with a backslash and ending with the digit \tcode{1} +(and not the single character \tcode{'A'} +specified by a \grammarterm{universal-character-name}). + +\tref{lex.string.concat} has some examples of valid concatenations. +\end{example} + +\begin{floattable}{String literal concatenations}{lex.string.concat} +{lll|lll|lll} +\topline +\multicolumn{2}{|c}{Source} & +Means & +\multicolumn{2}{c}{Source} & +Means & +\multicolumn{2}{c}{Source} & +Means \\ +\tcode{u"a"} & \tcode{u"b"} & \tcode{u"ab"} & +\tcode{U"a"} & \tcode{U"b"} & \tcode{U"ab"} & +\tcode{L"a"} & \tcode{L"b"} & \tcode{L"ab"} \\ +\tcode{u"a"} & \tcode{"b"} & \tcode{u"ab"} & +\tcode{U"a"} & \tcode{"b"} & \tcode{U"ab"} & +\tcode{L"a"} & \tcode{"b"} & \tcode{L"ab"} \\ +\tcode{"a"} & \tcode{u"b"} & \tcode{u"ab"} & +\tcode{"a"} & \tcode{U"b"} & \tcode{U"ab"} & +\tcode{"a"} & \tcode{L"b"} & \tcode{L"ab"} \\ +\end{floattable} + +\pnum +Evaluating a \grammarterm{string-literal} results in a string literal object +with static storage duration\iref{basic.stc}. +\begin{note} +String literal objects are potentially non-unique\iref{intro.object}. +Whether successive evaluations of a +\grammarterm{string-literal} yield the same or a different object is +unspecified. +\end{note} +\begin{note} +\indextext{literal!string!undefined change to}% +The effect of attempting to modify a string literal object is undefined. +\end{note} + +\pnum +\indextext{\idxcode{0}!string terminator}% +\indextext{\idxcode{0}!null character|see {character, null}}% +String literal objects are initialized with +the sequence of code unit values +corresponding to the \grammarterm{string-literal}'s sequence of +\grammarterm{s-char}s (originally from non-raw string literals) and +\grammarterm{r-char}s (originally from raw string literals), +plus a terminating \unicode{0000}{null} character, +in order as follows: +\begin{itemize} +\item +The sequence of characters denoted by each contiguous sequence of +\grammarterm{basic-s-char}s, +\grammarterm{r-char}s, +\grammarterm{simple-escape-sequence}s\iref{lex.ccon}, and +\grammarterm{universal-character-name}s\iref{lex.charset} +is encoded to a code unit sequence +using the \grammarterm{string-literal}'s associated character encoding. +If a character lacks representation in the associated character encoding, +then the program is ill-formed. +\begin{note} +No character lacks representation in any Unicode encoding form. +\end{note} +When encoding a stateful character encoding, +implementations should encode the first such sequence +beginning with the initial encoding state and +encode subsequent sequences +beginning with the final encoding state of the prior sequence. +\begin{note} +The encoded code unit sequence can differ from +the sequence of code units that would be obtained by +encoding each character independently. +\end{note} +\item +Each \grammarterm{numeric-escape-sequence}\iref{lex.ccon} +contributes a single code unit with a value as follows: +\begin{itemize} +\item +Let $v$ be the integer value represented by +the octal number comprising +the sequence of \grammarterm{octal-digit}{s} in +an \grammarterm{octal-escape-sequence} or by +the hexadecimal number comprising +the sequence of \grammarterm{hexadecimal-digit}{s} in +a \grammarterm{hexadecimal-escape-sequence}. +\item +If $v$ does not exceed the range of representable values of +the \grammarterm{string-literal}'s array element type, +then the value is $v$. +\item +Otherwise, +if the \grammarterm{string-literal}'s \grammarterm{encoding-prefix} +is absent or \tcode{L}, and +$v$ does not exceed the range of representable values of +the corresponding unsigned type for the underlying type of +the \grammarterm{string-literal}'s array element type, +then the value is the unique value of +the \grammarterm{string-literal}'s array element type \tcode{T} +that is congruent to $v$ modulo $2^N$, where $N$ is the width of \tcode{T}. +\item +Otherwise, the program is ill-formed. +\end{itemize} +When encoding a stateful character encoding, +these sequences should have no effect on encoding state. +\item +Each \grammarterm{conditional-escape-sequence}\iref{lex.ccon} +contributes an +\impldef{code unit sequence for \grammarterm{conditional-escape-sequence}} +code unit sequence. +When encoding a stateful character encoding, +it is +\impldef{effect of \grammarterm{conditional-escape-sequence} on encoding state} +what effect these sequences have on encoding state. +\end{itemize} + +\rSec2[lex.name]{Identifiers} + +\indextext{identifier|(}% +\begin{bnf} +\nontermdef{identifier}\br + identifier-start\br + identifier identifier-continue +\end{bnf} + +\begin{bnf} +\nontermdef{identifier-start}\br + nondigit\br + \textnormal{an element of the translation character set with the Unicode property XID_Start} +\end{bnf} + +\begin{bnf} +\nontermdef{identifier-continue}\br + digit\br + nondigit\br + \textnormal{an element of the translation character set with the Unicode property XID_Continue} +\end{bnf} + +\begin{bnf} +\nontermdef{nondigit} \textnormal{one of}\br + \terminal{a b c d e f g h i j k l m}\br + \terminal{n o p q r s t u v w x y z}\br + \terminal{A B C D E F G H I J K L M}\br + \terminal{N O P Q R S T U V W X Y Z _} +\end{bnf} + +\begin{bnf} +\nontermdef{digit} \textnormal{one of}\br + \terminal{0 1 2 3 4 5 6 7 8 9} +\end{bnf} + +\pnum +\indextext{name!length of}% +\indextext{name}% +\begin{note} +The character properties XID_Start and XID_Continue are Derived Core Properties +as described by \UAX{44} of the Unicode Standard. +\begin{footnote} +On systems in which linkers cannot accept extended +characters, an encoding of the \grammarterm{universal-character-name} can be used in +forming valid external identifiers. For example, some otherwise unused +character or sequence of characters can be used to encode the +\tcode{\textbackslash u} in a \grammarterm{universal-character-name}. Extended +characters can produce a long external identifier, but \Cpp{} does not +place a translation limit on significant characters for external +identifiers. +\end{footnote} +\end{note} +The program is ill-formed +if an \grammarterm{identifier} does not conform to +Normalization Form C as specified in the Unicode Standard. +\begin{note} +Identifiers are case-sensitive. +\end{note} +\begin{note} +\ref{uaxid} compares the requirements of \UAX{31} of the Unicode Standard +with the \Cpp{} rules for identifiers. +\end{note} +\begin{note} +In translation phase 4, +\grammarterm{identifier} also includes +those \grammarterm{preprocessing-token}s\iref{lex.pptoken} +differentiated as keywords\iref{lex.key} +in the later translation phase 7\iref{lex.token}. +\end{note} + +\pnum +\indextext{\idxcode{import}}% +\indextext{\idxcode{final}}% +\indextext{\idxcode{module}}% +\indextext{\idxcode{override}}% +The identifiers in \tref{lex.name.special} have a special meaning when +appearing in a certain context. When referred to in the grammar, these identifiers +are used explicitly rather than using the \grammarterm{identifier} grammar production. +Unless otherwise specified, any ambiguity as to whether a given +\grammarterm{identifier} has a special meaning is resolved to interpret the +token as a regular \grammarterm{identifier}. + +\begin{multicolfloattable}{Identifiers with special meaning}{lex.name.special} +{llll} +\keyword{final} \\ +\columnbreak +\keyword{import} \\ +\columnbreak +\keyword{module} \\ +\columnbreak +\keyword{override} \\ +\end{multicolfloattable} + +\pnum +\indextext{\idxcode{_}|see{character, underscore}}% +\indextext{character!underscore!in identifier}% +\indextext{reserved identifier}% +In addition, some identifiers +appearing as a \grammarterm{token} or \grammarterm{preprocessing-token} +are reserved for use by \Cpp{} +implementations and shall +not be used otherwise; no diagnostic is required. +\begin{itemize} +\item +Each identifier that contains a double underscore +\tcode{\unun} +\indextext{character!underscore}% +or begins with an underscore followed by +an uppercase letter +\indextext{uppercase}% +is reserved to the implementation for any use. +\item +Each identifier that begins with an underscore is +\indextext{character!underscore}% +reserved to the implementation for use as a name in the global namespace.% +\indextext{namespace!global} +\end{itemize}% +\indextext{identifier|)} + +\rSec1[cpp]{Preprocessing directives}% +\indextext{preprocessing directive|(} + +\indextext{compiler control line|see{preprocessing directive}}% +\indextext{control line|see{preprocessing directive}}% +\indextext{directive, preprocessing|see{preprocessing directive}} + +\gramSec[gram.cpp]{Preprocessing directives} + +\rSec2[cpp.pre]{Preamble} + +\begin{bnf} +\nontermdef{preprocessing-file}\br + \opt{group}\br + module-file +\end{bnf} + +\begin{bnf} +\nontermdef{module-file}\br + \opt{pp-global-module-fragment} pp-module \opt{group} \opt{pp-private-module-fragment} +\end{bnf} + +\begin{bnf} +\nontermdef{pp-global-module-fragment}\br + \keyword{module} \terminal{;} new-line \opt{group} +\end{bnf} + +\begin{bnf} +\nontermdef{pp-private-module-fragment}\br + \keyword{module} \terminal{:} \keyword{private} \terminal{;} new-line \opt{group} +\end{bnf} + +\begin{bnf} +\nontermdef{group}\br + group-part\br + group group-part +\end{bnf} + +\begin{bnf} +\nontermdef{group-part}\br + control-line\br + if-section\br + text-line\br + \terminal{\#} conditionally-supported-directive +\end{bnf} + +\begin{bnf}\obeyspaces +\nontermdef{control-line}\br + \terminal{\# include} pp-tokens new-line\br + pp-import\br + \terminal{\# define } identifier replacement-list new-line\br + \terminal{\# define } identifier lparen \opt{identifier-list} \terminal{)} replacement-list new-line\br + \terminal{\# define } identifier lparen \terminal{... )} replacement-list new-line\br + \terminal{\# define } identifier lparen identifier-list \terminal{, ... )} replacement-list new-line\br + \terminal{\# undef } identifier new-line\br + \terminal{\# line } pp-tokens new-line\br + \terminal{\# error } \opt{pp-tokens} new-line\br + \terminal{\# warning} \opt{pp-tokens} new-line\br + \terminal{\# pragma } \opt{pp-tokens} new-line\br + \terminal{\# }new-line +\end{bnf} + +\begin{bnf} +\nontermdef{if-section}\br + if-group \opt{elif-groups} \opt{else-group} endif-line +\end{bnf} + +\begin{bnf}\obeyspaces +\nontermdef{if-group}\br + \terminal{\# if } constant-expression new-line \opt{group}\br + \terminal{\# ifdef } identifier new-line \opt{group}\br + \terminal{\# ifndef } identifier new-line \opt{group} +\end{bnf} + +\begin{bnf} +\nontermdef{elif-groups}\br + elif-group\br + elif-groups elif-group +\end{bnf} + +\begin{bnf}\obeyspaces +\nontermdef{elif-group}\br + \terminal{\# elif } constant-expression new-line \opt{group}\br + \terminal{\# elifdef } identifier new-line \opt{group}\br + \terminal{\# elifndef} identifier new-line \opt{group} +\end{bnf} + +\begin{bnf}\obeyspaces +\nontermdef{else-group}\br + \terminal{\# else } new-line \opt{group} +\end{bnf} + +\begin{bnf}\obeyspaces +\nontermdef{endif-line}\br + \terminal{\# endif } new-line +\end{bnf} + +\begin{bnf} +\nontermdef{text-line}\br + \opt{pp-tokens} new-line +\end{bnf} + +\begin{bnf} +\nontermdef{conditionally-supported-directive}\br + pp-tokens new-line +\end{bnf} + +\begin{bnf} +\nontermdef{lparen}\br + \descr{a \terminal{(} character not immediately preceded by whitespace} +\end{bnf} + +\begin{bnf} +\nontermdef{identifier-list}\br + identifier\br + identifier-list \terminal{,} identifier +\end{bnf} + +\begin{bnf} +\nontermdef{replacement-list}\br + \opt{pp-tokens} +\end{bnf} + +\begin{bnf} +\nontermdef{pp-tokens}\br + preprocessing-token\br + pp-tokens preprocessing-token +\end{bnf} + +\begin{bnf} +\nontermdef{new-line}\br + \descr{the new-line character} +\end{bnf} + +\pnum +A \defn{preprocessing directive} consists of a sequence of preprocessing tokens +that satisfies the following constraints: +At the start of translation phase 4, +the first token in the sequence, +referred to as a \defnadj{directive-introducing}{token}, +begins with the first character in the source file +(optionally after whitespace containing no new-line characters) or +follows whitespace containing at least one new-line character, +and is + +\begin{itemize} +\item +a \tcode{\#} preprocessing token, or + +\item +an \keyword{import} preprocessing token +immediately followed on the same logical line by a +\grammarterm{header-name}, +\tcode{<}, +\grammarterm{identifier}, +\grammarterm{string-literal}, or +\tcode{:} +preprocessing token, or + +\item +a \keyword{module} preprocessing token +immediately followed on the same logical line by an +\grammarterm{identifier}, +\tcode{:}, or +\tcode{;} +preprocessing token, or + +\item +an \keyword{export} preprocessing token +immediately followed on the same logical line by +one of the two preceding forms. +\end{itemize} + +The last token in the sequence is the first token within the sequence that +is immediately followed by whitespace containing a new-line character. +\begin{footnote} +Thus, +preprocessing directives are commonly called ``lines''. +These ``lines'' have no other syntactic significance, +as all whitespace is equivalent except in certain situations +during preprocessing (see the +\tcode{\#} +character string literal creation operator in~\ref{cpp.stringize}, for example). +\end{footnote} +\begin{note} +A new-line character ends the preprocessing directive even if it occurs +within what would otherwise be an invocation of a function-like macro. +\end{note} + +\begin{example} +\begin{codeblock} +# // preprocessing directive +module ; // preprocessing directive +export module leftpad; // preprocessing directive +import ; // preprocessing directive +export import "squee"; // preprocessing directive +import rightpad; // preprocessing directive +import :part; // preprocessing directive + +module // not a preprocessing directive +; // not a preprocessing directive + +export // not a preprocessing directive +import // not a preprocessing directive +foo; // not a preprocessing directive + +export // not a preprocessing directive +import foo; // preprocessing directive (ill-formed at phase 7) + +import :: // not a preprocessing directive +import -> // not a preprocessing directive +\end{codeblock} +\end{example} + +\pnum +A sequence of preprocessing tokens is only a \grammarterm{text-line} +if it does not begin with a directive-introducing token. +A sequence of preprocessing tokens is only a \grammarterm{conditionally-supported-directive} +if it does not begin with any of the directive names +appearing after a \tcode{\#} in the syntax. +A \grammarterm{conditionally-supported-directive} is +conditionally-supported with +\impldef{additional supported forms of preprocessing directive} +semantics. + +\pnum +At the start of phase 4 of translation, +the \grammarterm{group} of a \grammarterm{pp-global-module-fragment} shall +contain neither a \grammarterm{text-line} nor a \grammarterm{pp-import}. + +\pnum +When in a group that is skipped\iref{cpp.cond}, the directive +syntax is relaxed to allow any sequence of preprocessing tokens to occur between +the directive name and the following new-line character. + +\pnum +The only whitespace characters that shall appear +between preprocessing tokens +within a preprocessing directive +(from just after the directive-introducing token +through just before the terminating new-line character) +are space and horizontal-tab +(including spaces that have replaced comments +or possibly other whitespace characters +in translation phase 3). + +\pnum +The implementation can +process and skip sections of source files conditionally, +include other source files, +import macros from header units, +and replace macros. +These capabilities are called +\defn{preprocessing}, +because conceptually they occur +before translation of the resulting translation unit. + +\pnum +The preprocessing tokens within a preprocessing directive +are not subject to macro expansion unless otherwise stated. + +\begin{example} +In: +\begin{codeblock} +#define EMPTY +EMPTY # include +\end{codeblock} +the sequence of preprocessing tokens on the second line is \textit{not} +a preprocessing directive, because it does not begin with a \tcode{\#} at the start of +translation phase 4, even though it will do so after the macro \tcode{EMPTY} +has been replaced. +\end{example} + +\rSec2[cpp.module]{Module directive} +\indextext{preprocessing directive!module}% + +\begin{bnf} +\nontermdef{pp-module}\br + \opt{\keyword{export}} \keyword{module} \opt{pp-tokens} \terminal{;} new-line +\end{bnf} + +\pnum +A \grammarterm{pp-module} shall not +appear in a context where \tcode{module} +or (if it is the first token of the \grammarterm{pp-module}) \tcode{export} +is an identifier defined as an object-like macro. + +\pnum +The \grammarterm{pp-tokens}, if any, of a \grammarterm{pp-module} +shall be of the form: +\begin{ncsimplebnf} +pp-module-name \opt{pp-module-partition} \opt{pp-tokens} +\end{ncsimplebnf} +where the \grammarterm{pp-tokens} (if any) shall not begin with +a \tcode{(} preprocessing token and +the grammar non-terminals are defined as: +\begin{ncbnf} +\nontermdef{pp-module-name}\br + \opt{pp-module-name-qualifier} identifier +\end{ncbnf} +\begin{ncbnf} +\nontermdef{pp-module-partition}\br + \terminal{:} \opt{pp-module-name-qualifier} identifier +\end{ncbnf} +\begin{ncbnf} +\nontermdef{pp-module-name-qualifier}\br + identifier \terminal{.}\br + pp-module-name-qualifier identifier \terminal{.} +\end{ncbnf} +No \grammarterm{identifier} in +the \grammarterm{pp-module-name} or \grammarterm{pp-module-partition} +shall currently be defined as an object-like macro. + +\pnum +Any preprocessing tokens after the \tcode{module} preprocessing token +in the \tcode{module} directive are processed just as in normal text. +\begin{note} +Each identifier currently defined as a macro name +is replaced by its replacement list of preprocessing tokens. +\end{note} + +\pnum +The \tcode{module} and \tcode{export} (if it exists) preprocessing tokens +are replaced by the \grammarterm{module-keyword} and +\grammarterm{export-keyword} preprocessing tokens respectively. +\begin{note} +This makes the line no longer a directive +so it is not removed at the end of phase 4. +\end{note} + +\rSec2[cpp.null]{Null directive}% +\indextext{preprocessing directive!null} + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\#} new-line +\end{ncsimplebnf} +has no effect. + +\rSec2[cpp.cond]{Conditional inclusion}% +\indextext{preprocessing directive!conditional inclusion}% +\indextext{inclusion!conditional|see{preprocessing directive, conditional inclusion}} + +\indextext{\idxcode{defined}}% +\begin{bnf} +\nontermdef{defined-macro-expression}\br + \terminal{defined} identifier\br + \terminal{defined (} identifier \terminal{)} +\end{bnf} + +\begin{bnf} +\nontermdef{h-preprocessing-token}\br + \textnormal{any \grammarterm{preprocessing-token} other than \terminal{>}} +\end{bnf} + +\begin{bnf} +\nontermdef{h-pp-tokens}\br + h-preprocessing-token\br + h-pp-tokens h-preprocessing-token +\end{bnf} + +\begin{bnf} +\nontermdef{header-name-tokens}\br + string-literal\br + \terminal{<} h-pp-tokens \terminal{>} +\end{bnf} + +\indextext{\idxxname{has_include}}% +\begin{bnf} +\nontermdef{has-include-expression}\br + \terminal{\xname{has_include}} \terminal{(} header-name \terminal{)}\br + \terminal{\xname{has_include}} \terminal{(} header-name-tokens \terminal{)} +\end{bnf} + +\indextext{\idxxname{has_cpp_attribute}}% +\begin{bnf} +\nontermdef{has-attribute-expression}\br + \terminal{\xname{has_cpp_attribute} (} pp-tokens \terminal{)} +\end{bnf} + +\pnum +The expression that controls conditional inclusion +shall be an integral constant expression except that +identifiers +(including those lexically identical to keywords) +are interpreted as described below +\begin{footnote} +Because the controlling constant expression is evaluated +during translation phase 4, +all identifiers either are or are not macro names --- +there simply are no keywords, enumeration constants, etc. +\end{footnote} +and it may contain zero or more \grammarterm{defined-macro-expression}{s} and/or +\grammarterm{has-include-expression}{s} and/or +\grammarterm{has-attribute-expression}{s} as unary operator expressions. + +\pnum +A \grammarterm{defined-macro-expression} evaluates to \tcode{1} +if the identifier is currently defined +as a macro name +(that is, if it is predefined +or if it has one or more active macro definitions\iref{cpp.import}, +for example because +it has been the subject of a +\tcode{\#define} +preprocessing directive +without an intervening +\tcode{\#undef} +directive with the same subject identifier), \tcode{0} if it is not. + +\pnum +The second form of \grammarterm{has-include-expression} +is considered only if the first form does not match, +in which case the preprocessing tokens are processed just as in normal text. + +\pnum +The header or source file identified by +the parenthesized preprocessing token sequence +in each contained \grammarterm{has-include-expression} +is searched for as if that preprocessing token sequence +were the \grammarterm{pp-tokens} in a \tcode{\#include} directive, +except that no further macro expansion is performed. +If such a directive would not satisfy the syntactic requirements +of a \tcode{\#include} directive, the program is ill-formed. +The \grammarterm{has-include-expression} evaluates +to \tcode{1} if the search for the source file succeeds, and +to \tcode{0} if the search fails. + +\pnum +Each \grammarterm{has-attribute-expression} is replaced by +a non-zero \grammarterm{pp-number} +matching the form of an \grammarterm{integer-literal} +if the implementation supports an attribute +with the name specified by interpreting +the \grammarterm{pp-tokens}, after macro expansion, +as an \grammarterm{attribute-token}, +and by \tcode{0} otherwise. +The program is ill-formed if the \grammarterm{pp-tokens} +do not match the form of an \grammarterm{attribute-token}. + +\pnum +For an attribute specified in this document, +it is \impldef{value of \grammarterm{has-attribute-expression} +for standard attributes} +whether the value of the \grammarterm{has-attribute-expression} +is \tcode{0} or is given by \tref{cpp.cond.ha}. +For other attributes recognized by the implementation, +the value is +\impldef{value of \grammarterm{has-attribute-expression} +for non-standard attributes}. +\begin{note} +It is expected +that the availability of an attribute can be detected by any non-zero result. +\end{note} + +\begin{floattable}{\xname{has_cpp_attribute} values}{cpp.cond.ha} +{ll} +\topline +\lhdr{Attribute} & \rhdr{Value} \\ \rowsep +\tcode{assume} & \tcode{202207L} \\ +\tcode{carries_dependency} & \tcode{200809L} \\ +\tcode{deprecated} & \tcode{201309L} \\ +\tcode{fallthrough} & \tcode{201603L} \\ +\tcode{likely} & \tcode{201803L} \\ +\tcode{maybe_unused} & \tcode{201603L} \\ +\tcode{no_unique_address} & \tcode{201803L} \\ +\tcode{nodiscard} & \tcode{201907L} \\ +\tcode{noreturn} & \tcode{200809L} \\ +\tcode{unlikely} & \tcode{201803L} \\ +\end{floattable} + +\pnum +The +\tcode{\#ifdef}, \tcode{\#ifndef}, \tcode{\#elifdef}, and \tcode{\#elifndef} +directives, and +the \tcode{defined} conditional inclusion operator, +shall treat \xname{has_include} and \xname{has_cpp_attribute} +as if they were the names of defined macros. +The identifiers \xname{has_include} and \xname{has_cpp_attribute} +shall not appear in any context not mentioned in this subclause. + +\pnum +Each preprocessing token that remains (in the list of preprocessing tokens that +will become the controlling expression) +after all macro replacements have occurred +shall be in the lexical form of a token\iref{lex.token}. + +\pnum +Preprocessing directives of the forms +\begin{ncsimplebnf}\obeyspaces +\indextext{\idxcode{\#if}}% +\terminal{\# if } constant-expression new-line \opt{group}\br +\indextext{\idxcode{\#elif}}% +\terminal{\# elif } constant-expression new-line \opt{group} +\end{ncsimplebnf} +check whether the controlling constant expression evaluates to nonzero. + +\pnum +Prior to evaluation, +macro invocations in the list of preprocessing tokens +that will become the controlling constant expression +are replaced +(except for those macro names modified by the +\tcode{defined} +unary operator), +just as in normal text. +If the token +\tcode{defined} +is generated as a result of this replacement process +or use of the +\tcode{defined} +unary operator does not match one of the two specified forms +prior to macro replacement, +the behavior is undefined. + +\pnum +After all replacements due to macro expansion and +evaluations of +\grammarterm{defined-macro-expression}s, +\grammarterm{has-include-expression}s, and +\grammarterm{has-attribute-expression}s +have been performed, +all remaining identifiers and keywords, +except for +\tcode{true} +and +\tcode{false}, +are replaced with the \grammarterm{pp-number} +\tcode{0}, +and then each preprocessing token is converted into a token. +\begin{note} +An alternative +token\iref{lex.digraph} is not an identifier, +even when its spelling consists entirely of letters and underscores. +Therefore it is not subject to this replacement. +\end{note} + +\pnum +The resulting tokens comprise the controlling constant expression +which is evaluated according to the rules of~\ref{expr.const} +using arithmetic that has at least the ranges specified +in~\ref{support.limits}. For the purposes of this token conversion and evaluation +all signed and unsigned integer types +act as if they have the same representation as, respectively, +\tcode{intmax_t} or \tcode{uintmax_t}\iref{cstdint.syn}. +\begin{note} +Thus on an +implementation where \tcode{std::numeric_limits::max()} is \tcode{0x7FFF} +and \tcode{std::numeric_limits::max()} is \tcode{0xFFFF}, +the integer literal \tcode{0x8000} is signed and positive within a \tcode{\#if} +expression even though it is unsigned in translation phase +7\iref{lex.phases}. +\end{note} +This includes interpreting \grammarterm{character-literal}s +according to the rules in \ref{lex.ccon}. +\begin{note} +The associated character encodings of literals are the same +in \tcode{\#if} and \tcode{\#elif} directives and in any expression. +\end{note} +Each subexpression with type +\tcode{bool} +is subjected to integral promotion before processing continues. + +\pnum +Preprocessing directives of the forms +\begin{ncsimplebnf}\obeyspaces +\terminal{\# ifdef } identifier new-line \opt{group}\br +\indextext{\idxcode{\#ifdef}}% +\terminal{\# ifndef } identifier new-line \opt{group}\br +\indextext{\idxcode{\#ifndef}}% +\terminal{\# elifdef } identifier new-line \opt{group}\br +\indextext{\idxcode{\#elifdef}}% +\terminal{\# elifndef} identifier new-line \opt{group} +\indextext{\idxcode{\#elifndef}}% +\end{ncsimplebnf} +check whether the identifier is or is not currently defined as a macro name. +Their conditions are equivalent to +\tcode{\#if} \tcode{defined} \grammarterm{identifier}, +\tcode{\#if} \tcode{!defined} \grammarterm{identifier}, +\tcode{\#elif} \tcode{defined} \grammarterm{identifier}, and +\tcode{\#elif} \tcode{!defined} \grammarterm{identifier}, +respectively. + +\pnum +Each directive's condition is checked in order. +If it evaluates to false (zero), +the group that it controls is skipped: +directives are processed only through the name that determines +the directive in order to keep track of the level +of nested conditionals; +the rest of the directives' preprocessing tokens are ignored, +as are the other preprocessing tokens in the group. +Only the first group +whose control condition evaluates to true (nonzero) is processed; +any following groups are skipped and their controlling directives +are processed as if they were in a group that is skipped. +If none of the conditions evaluates to true, +and there is a +\tcode{\#else} +\indextext{\idxcode{\#else}}% +directive, +the group controlled by the +\tcode{\#else} +is processed; lacking a +\tcode{\#else} +directive, all the groups until the +\tcode{\#endif} +\indextext{\idxcode{\#endif}}% +are skipped.% +\begin{footnote} +As indicated by the syntax, +a preprocessing token cannot follow a +\tcode{\#else} +or +\tcode{\#endif} +directive before the terminating new-line character. +However, +comments can appear anywhere in a source file, +including within a preprocessing directive. +\end{footnote} + +\pnum +\begin{example} +This demonstrates a way to include a library \tcode{optional} facility +only if it is available: + +\begin{codeblock} +#if __has_include() +# include +# if __cpp_lib_optional >= 201603 +# define have_optional 1 +# endif +#elif __has_include() +# include +# if __cpp_lib_experimental_optional >= 201411 +# define have_optional 1 +# define experimental_optional 1 +# endif +#endif +#ifndef have_optional +# define have_optional 0 +#endif +\end{codeblock} +\end{example} + +\pnum +\begin{example} +This demonstrates a way to use the attribute \tcode{[[acme::deprecated]]} +only if it is available. +\begin{codeblock} +#if __has_cpp_attribute(acme::deprecated) +# define ATTR_DEPRECATED(msg) [[acme::deprecated(msg)]] +#else +# define ATTR_DEPRECATED(msg) [[deprecated(msg)]] +#endif +ATTR_DEPRECATED("This function is deprecated") void anvil(); +\end{codeblock} +\end{example} + +\rSec2[cpp.include]{Source file inclusion} +\indextext{preprocessing directive!header inclusion} +\indextext{preprocessing directive!source-file inclusion} +\indextext{inclusion!source file|see{preprocessing directive, source-file inclusion}}% +\indextext{\idxcode{\#include}}% + +\pnum +A +\tcode{\#include} +directive shall identify a header or source file +that can be processed by the implementation. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# include <} h-char-sequence \terminal{>} new-line +\end{ncsimplebnf} +searches a sequence of +\impldef{sequence of places searched for a header} +places +for a header identified uniquely by the specified sequence +between the +\tcode{<} +and +\tcode{>} +delimiters, +and causes the replacement of that +directive by the entire contents of the header. +How the places are specified +or the header identified +is \impldef{search locations for \tcode{<>} header}. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# include "} q-char-sequence \terminal{"} new-line +\end{ncsimplebnf} +causes the replacement of that +directive by the entire contents of the +source file identified by the specified sequence between the +\tcode{"} +delimiters. +The named source file is searched for in an +\impldef{manner of search for included source file} +manner. +If this search is not supported, +or if the search fails, +the directive is reprocessed as if it read +\begin{ncsimplebnf} +\terminal{\# include <} h-char-sequence \terminal{>} new-line +\end{ncsimplebnf} +with the identical contained sequence (including +\tcode{>} +characters, if any) from the original directive. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# include} pp-tokens new-line +\end{ncsimplebnf} +(that does not match one of the two previous forms) is permitted. +The preprocessing tokens after +\tcode{include} +in the directive are processed just as in normal text +(i.e., each identifier currently defined as a macro name is replaced by its +replacement list of preprocessing tokens). +If the directive resulting after all replacements does not match +one of the two previous forms, the behavior is +undefined. +\begin{footnote} +Note that adjacent \grammarterm{string-literal}s are not concatenated into +a single \grammarterm{string-literal} +(see the translation phases in~\ref{lex.phases}); +thus, an expansion that results in two \grammarterm{string-literal}s is an +invalid directive. +\end{footnote} +The method by which a sequence of preprocessing tokens between a +\tcode{<} +and a +\tcode{>} +preprocessing token pair or a pair of +\tcode{"} +characters is combined into a single header name +preprocessing token is \impldef{search locations for \tcode{""""} header}. + +\pnum +The implementation shall provide unique mappings for +sequences consisting of one or more +\grammarterm{nondigit}{s} or \grammarterm{digit}{s}\iref{lex.name} +followed by a period +(\tcode{.}) +and a single +\grammarterm{nondigit}. +The first character shall not be a \grammarterm{digit}. +The implementation may ignore distinctions of alphabetical case. + +\pnum +A +\tcode{\#include} +preprocessing directive may appear +in a source file that has been read because of a +\tcode{\#include} +directive in another file, +up to an \impldef{nesting limit for \tcode{\#include} directives} nesting limit. + +\pnum +If the header identified by the \grammarterm{header-name} +denotes an importable header\iref{module.import}, +it is +\impldef{whether source file inclusion of importable header +is replaced with \tcode{import} directive} +whether the \tcode{\#include} preprocessing directive +is instead replaced by an \tcode{import} directive\iref{cpp.import} of the form +\begin{ncbnf} +\terminal{import} header-name \terminal{;} new-line +\end{ncbnf} + +\pnum +\begin{note} +An implementation can provide a mechanism for making arbitrary +source files available to the \tcode{< >} search. +However, using the \tcode{< >} form for headers provided +with the implementation and the \tcode{" "} form for sources +outside the control of the implementation +achieves wider portability. For instance: + +\begin{codeblock} +#include +#include +#include "usefullib.h" +#include "myprog.h" +\end{codeblock} + +\end{note} + +\pnum +\begin{example} +This illustrates macro-replaced +\tcode{\#include} +directives: + +\begin{codeblock} +#if VERSION == 1 + #define INCFILE "vers1.h" +#elif VERSION == 2 + #define INCFILE "vers2.h" // and so on +#else + #define INCFILE "versN.h" +#endif +#include INCFILE +\end{codeblock} +\end{example} + +\rSec2[cpp.import]{Header unit importation} +\indextext{header unit!preprocessing}% +\indextext{preprocessing directive!import}% +\indextext{macro!import|(}% + +\begin{bnf} +\nontermdef{pp-import}\br + \opt{\keyword{export}} \keyword{import} header-name \opt{pp-tokens} \terminal{;} new-line\br + \opt{\keyword{export}} \keyword{import} header-name-tokens \opt{pp-tokens} \terminal{;} new-line\br + \opt{\keyword{export}} \keyword{import} pp-tokens \terminal{;} new-line +\end{bnf} + +\pnum +A \grammarterm{pp-import} shall not +appear in a context where \tcode{import} +or (if it is the first token of the \grammarterm{pp-import}) \tcode{export} +is an identifier defined as an object-like macro. + +\pnum +The preprocessing tokens after the \tcode{import} preprocessing token +in the \tcode{import} \grammarterm{control-line} +are processed just as in normal text +(i.e., each identifier currently defined as a macro name +is replaced by its replacement list of preprocessing tokens). +\begin{note} +An \tcode{import} directive +matching the first two forms of a \grammarterm{pp-import} +instructs the preprocessor to import macros +from the header unit\iref{module.import} +denoted by the \grammarterm{header-name}, +as described below. +\end{note} +\indextext{point of!macro import|see{macro, point of import}}% +The \defnx{point of macro import}{macro!point of import} for the +first two forms of \grammarterm{pp-import} is +immediately after the \grammarterm{new-line} terminating +the \grammarterm{pp-import}. +The last form of \grammarterm{pp-import} is only considered +if the first two forms did not match, and +does not have a point of macro import. + +\pnum +If a \grammarterm{pp-import} is produced by source file inclusion +(including by the rewrite produced +when a \tcode{\#include} directive names an importable header) +while processing the \grammarterm{group} of a \grammarterm{module-file}, +the program is ill-formed. + +\pnum +In all three forms of \grammarterm{pp-import}, +the \tcode{import} and \tcode{export} (if it exists) preprocessing tokens +are replaced by the \grammarterm{import-keyword} and +\grammarterm{export-keyword} preprocessing tokens respectively. +\begin{note} +This makes the line no longer a directive +so it is not removed at the end of phase 4. +\end{note} +Additionally, in the second form of \grammarterm{pp-import}, +a \grammarterm{header-name} token is formed as if +the \grammarterm{header-name-tokens} +were the \grammarterm{pp-tokens} of a \tcode{\#include} directive. +The \grammarterm{header-name-tokens} are replaced by +the \grammarterm{header-name} token. +\begin{note} +This ensures that imports are treated consistently by +the preprocessor and later phases of translation. +\end{note} + +\pnum +Each \tcode{\#define} directive encountered when preprocessing +each translation unit in a program results in a distinct +\defnx{macro definition}{macro!definition}. +\begin{note} +A predefined macro name\iref{cpp.predefined} +is not introduced by a \tcode{\#define} directive. +Implementations providing mechanisms to predefine additional macros +are encouraged to not treat them +as being introduced by a \tcode{\#define} directive. +\end{note} +Each macro definition has at most one point of definition in +each translation unit and at most one point of undefinition, as follows: +\begin{itemize} +\item +\indextext{point of!macro definition|see{macro, point of definition}}% +The \defnx{point of definition}{macro!point of definition} +of a macro definition within a translation unit $T$ is +\begin{itemize} +\item +if the \tcode{\#define} directive of the macro definition occurs within $T$, +the point at which that directive occurs, or otherwise, +\item +if the macro name is not lexically identical to a keyword\iref{lex.key} +or to the \grammarterm{identifier}{s} \tcode{module} or \tcode{import}, +the first point of macro import in $T$ of a header unit +containing a point of definition for the macro definition, if any. +\end{itemize} +In the latter case, the macro is said +to be \defnx{imported}{macro!import} from the header unit. + +\item +\indextext{point of!macro undefinition|see{macro, point of undefinition}}% +The \defnx{point of undefinition}{macro!point of undefinition} +of a macro definition within a translation unit +is the first point at which a \tcode{\#undef} directive naming the macro occurs +after its point of definition, or the first point +of macro import of a header unit containing a point of undefinition for the +macro definition, whichever (if any) occurs first. +\end{itemize} + +\pnum +\indextext{active macro directive|see{macro, active}}% +A macro directive is \defnx{active}{macro!active} at a source location +if it has a point of definition in that translation unit preceding the location, +and does not have a point of undefinition in that translation unit preceding +the location. + +\pnum +If a macro would be replaced or redefined, and multiple macro definitions +are active for that macro name, the active macro definitions shall all be +valid redefinitions of the same macro\iref{cpp.replace}. +\begin{note} +The relative order of \grammarterm{pp-import}{s} has no bearing on whether a +particular macro definition is active. +\end{note} + +\pnum +\begin{example} +\begin{codeblocktu}{Importable header \tcode{"a.h"}} +#define X 123 // \#1 +#define Y 45 // \#2 +#define Z a // \#3 +#undef X // point of undefinition of \#1 in \tcode{"a.h"} +\end{codeblocktu} + +\begin{codeblocktu}{Importable header \tcode{"b.h"}} +import "a.h"; // point of definition of \#1, \#2, and \#3, point of undefinition of \#1 in \tcode{"b.h"} +#define X 456 // OK, \#1 is not active +#define Y 6 // error: \#2 is active +\end{codeblocktu} + +\begin{codeblocktu}{Importable header \tcode{"c.h"}} +#define Y 45 // \#4 +#define Z c // \#5 +\end{codeblocktu} + +\begin{codeblocktu}{Importable header \tcode{"d.h"}} +import "c.h"; // point of definition of \#4 and \#5 in \tcode{"d.h"} +\end{codeblocktu} + +\begin{codeblocktu}{Importable header \tcode{"e.h"}} +import "a.h"; // point of definition of \#1, \#2, and \#3, point of undefinition of \#1 in \tcode{"e.h"} +import "d.h"; // point of definition of \#4 and \#5 in \tcode{"e.h"} +int a = Y; // OK, active macro definitions \#2 and \#4 are valid redefinitions +int c = Z; // error: active macro definitions \#3 and \#5 are not valid redefinitions of \tcode{Z} +\end{codeblocktu} + +\begin{codeblocktu}{Module unit \tcode{f}} +export module f; +export import "a.h"; + +int a = Y; // OK +\end{codeblocktu} + +\begin{codeblocktu}{Translation unit \tcode{\#1}} +import f; +int x = Y; // error: \tcode{Y} is neither a defined macro nor a declared name +\end{codeblocktu} +\end{example} +\indextext{macro!import|)} + +\rSec2[cpp.replace]{Macro replacement}% + +\rSec3[cpp.replace.general]{General}% +\indextext{macro!replacement|(}% +\indextext{replacement!macro|see{macro, replacement}}% +\indextext{preprocessing directive!macro replacement|see{macro, replacement}} + +\pnum +\indextext{macro!replacement list}% +Two replacement lists are identical if and only if +the preprocessing tokens in both have +the same number, ordering, spelling, and whitespace separation, +where all whitespace separations are considered identical. + +\pnum +An identifier currently defined as an +\indextext{macro!object-like}% +object-like macro (see below) may be redefined by another +\tcode{\#define} +preprocessing directive provided that the second definition is an +object-like macro definition and the two replacement lists +are identical, otherwise the program is ill-formed. +Likewise, an identifier currently defined as a +\indextext{macro!function-like}% +function-like macro (see below) may be redefined by another +\tcode{\#define} +preprocessing directive provided that the second definition is a +function-like macro definition that has the same number and spelling +of parameters, +and the two replacement lists are identical, +otherwise the program is ill-formed. + +\pnum +\begin{example} +The following sequence is valid: +\begin{codeblock} +#define OBJ_LIKE (1-1) +#define OBJ_LIKE @\tcode{/* whitespace */ (1-1) /* other */}@ +#define FUNC_LIKE(a) ( a ) +#define FUNC_LIKE( a )( @\tcode{/* note the whitespace */ \textbackslash}@ + a @\tcode{/* other stuff on this line}@ + @\tcode{*/}@ ) +\end{codeblock} +But the following redefinitions are invalid: +\begin{codeblock} +#define OBJ_LIKE (0) // different token sequence +#define OBJ_LIKE (1 - 1) // different whitespace +#define FUNC_LIKE(b) ( a ) // different parameter usage +#define FUNC_LIKE(b) ( b ) // different parameter spelling +\end{codeblock} +\end{example} + +\pnum +\indextext{macro!replacement list}% +There shall be whitespace between the identifier and the replacement list +in the definition of an object-like macro. + +\pnum +If the \grammarterm{identifier-list} in the macro definition does not end with +an ellipsis, the number of arguments (including those arguments consisting +of no preprocessing tokens) +in an invocation of a function-like macro shall +equal the number of parameters in the macro definition. +Otherwise, there shall be at least as many arguments in the invocation as there are +parameters in the macro definition (excluding the \tcode{...}). There +shall exist a +\tcode{)} +preprocessing token that terminates the invocation. + +\pnum +\indextext{__va_args__@\mname{VA_ARGS}}% +\indextext{__va_opt__@\mname{VA_OPT}}% +The identifiers \mname{VA_ARGS} and \mname{VA_OPT} +shall occur only in the \grammarterm{replacement-list} +of a function-like macro that uses the ellipsis notation in the parameters. + +\pnum +A parameter identifier in a function-like macro +shall be uniquely declared within its scope. + +\pnum +The identifier immediately following the +\tcode{define} +is called the +\indextext{name!macro|see{macro, name}}% +\defnx{macro name}{macro!name}. +There is one name space for macro names. +Any whitespace characters preceding or following the +replacement list of preprocessing tokens are not considered +part of the replacement list for either form of macro. + +\pnum +If a +\indextext{\#\#0 operator@\tcode{\#} operator} +\tcode{\#} +preprocessing token, +followed by an identifier, +occurs lexically +at the point at which a preprocessing directive can begin, +the identifier is not subject to macro replacement. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# define} identifier replacement-list new-line +\indextext{\idxcode{\#define}}% +\end{ncsimplebnf} +defines an +\defnadj{object-like}{macro} that +causes each subsequent instance of the macro name +\begin{footnote} +Since, by macro-replacement time, +all \grammarterm{character-literal}s and \grammarterm{string-literal}s are preprocessing tokens, +not sequences possibly containing identifier-like subsequences +(see \ref{lex.phases}, translation phases), +they are never scanned for macro names or parameters. +\end{footnote} +to be replaced by the replacement list of preprocessing tokens +that constitute the remainder of the directive. +\begin{footnote} +An alternative token\iref{lex.digraph} is not an identifier, +even when its spelling consists entirely of letters and underscores. +Therefore it is not possible to define a macro +whose name is the same as that of an alternative token. +\end{footnote} +The replacement list is then rescanned for more macro names as +specified below. + +\pnum +\begin{example} +The simplest use of this facility is to define a ``manifest constant'', +as in +\begin{codeblock} +#define TABSIZE 100 +int table[TABSIZE]; +\end{codeblock} +\end{example} + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# define} identifier lparen \opt{identifier-list} \terminal{)} replacement-list new-line\br +\terminal{\# define} identifier lparen \terminal{...} \terminal{)} replacement-list new-line\br +\terminal{\# define} identifier lparen identifier-list \terminal{, ...} \terminal{)} replacement-list new-line +\end{ncsimplebnf} +defines a \defnadj{function-like}{macro} +with parameters, whose use is +similar syntactically to a function call. +The parameters +\indextext{parameter!macro}% +are specified by the optional list of identifiers. +Each subsequent instance of the function-like macro name followed by a +\tcode{(} +as the next preprocessing token +introduces the sequence of preprocessing tokens that is replaced +by the replacement list in the definition +(an invocation of the macro). +\indextext{invocation!macro}% +The replaced sequence of preprocessing tokens is terminated by the matching +\tcode{)} +preprocessing token, skipping intervening matched pairs of left and +right parenthesis preprocessing tokens. +Within the sequence of preprocessing tokens making up an invocation +of a function-like macro, +new-line is considered a normal whitespace character. + +\pnum +\indextext{macro!function-like!arguments}% +The sequence of preprocessing tokens +bounded by the outside-most matching parentheses +forms the list of arguments for the function-like macro. +The individual arguments within the list +are separated by comma preprocessing tokens, +but comma preprocessing tokens between matching +inner parentheses do not separate arguments. +If there are sequences of preprocessing tokens within the list of +arguments that would otherwise act as preprocessing directives, +\begin{footnote} +A \grammarterm{conditionally-supported-directive} is a preprocessing directive regardless of whether the implementation supports it. +\end{footnote} +the behavior is undefined. + +\pnum +\begin{example} +The following defines a function-like +macro whose value is the maximum of its arguments. +It has the disadvantages of evaluating one or the other of its arguments +a second time +(including +\indextext{side effects}% +side effects) +and generating more code than a function if invoked several times. +It also cannot have its address taken, +as it has none. + +\begin{codeblock} +#define max(a, b) ((a) > (b) ? (a) : (b)) +\end{codeblock} + +The parentheses ensure that the arguments and +the resulting expression are bound properly. +\end{example} + +\pnum +\indextext{macro!function-like!arguments}% +If there is a \tcode{...} immediately preceding the \tcode{)} in the +function-like macro +definition, then the trailing arguments (if any), including any separating comma preprocessing +tokens, are merged to form a single item: the \defn{variable arguments}. The number of +arguments so combined is such that, following merger, the number of arguments is +either equal to or +one more than the number of parameters in the macro definition (excluding the +\tcode{...}). + +\rSec3[cpp.subst]{Argument substitution}% +\indextext{macro!argument substitution}% +\indextext{argument substitution|see{macro, argument substitution}}% + +\indextext{__va_opt__@\mname{VA_OPT}}% +\begin{bnf} +\nontermdef{va-opt-replacement}\br + \terminal{\mname{VA_OPT} (} \opt{pp-tokens} \terminal{)} +\end{bnf} + +\pnum +After the arguments for the invocation of a function-like macro have +been identified, argument substitution takes place. +For each parameter in the replacement list that is neither +preceded by a \tcode{\#} or \tcode{\#\#} preprocessing token nor +followed by a \tcode{\#\#} preprocessing token, the preprocessing tokens +naming the parameter are replaced by a token sequence determined as follows: +\begin{itemize} +\item + If the parameter is of the form \grammarterm{va-opt-replacement}, + the replacement preprocessing tokens are the + preprocessing token sequence for the corresponding argument, + as specified below. +\item + Otherwise, the replacement preprocessing tokens are the + preprocessing tokens of corresponding argument after all + macros contained therein have been expanded. The argument's + preprocessing tokens are completely macro replaced before + being substituted as if they formed the rest of the preprocessing + file with no other preprocessing tokens being available. +\end{itemize} +\begin{example} +\begin{codeblock} +#define LPAREN() ( +#define G(Q) 42 +#define F(R, X, ...) __VA_OPT__(G R X) ) +int x = F(LPAREN(), 0, <:-); // replaced by \tcode{int x = 42;} +\end{codeblock} +\end{example} + +\pnum +\indextext{__va_args__@\mname{VA_ARGS}}% +An identifier \mname{VA_ARGS} that occurs in the replacement list +shall be treated as if it were a parameter, and the variable arguments shall form +the preprocessing tokens used to replace it. + +\pnum +\begin{example} +\begin{codeblock} +#define debug(...) fprintf(stderr, @\mname{VA_ARGS}@) +#define showlist(...) puts(#@\mname{VA_ARGS}@) +#define report(test, ...) ((test) ? puts(#test) : printf(@\mname{VA_ARGS}@)) +debug("Flag"); +debug("X = %d\n", x); +showlist(The first, second, and third items.); +report(x>y, "x is %d but y is %d", x, y); +\end{codeblock} +results in +\begin{codeblock} +fprintf(stderr, "Flag"); +fprintf(stderr, "X = %d\n", x); +puts("The first, second, and third items."); +((x>y) ? puts("x>y") : printf("x is %d but y is %d", x, y)); +\end{codeblock} +\end{example} + +\pnum +\indextext{__va_opt__@\mname{VA_OPT}}% +The identifier \mname{VA_OPT} +shall always occur as part of the preprocessing token sequence +\grammarterm{va-opt-replacement}; +its closing \tcode{)} is determined by skipping +intervening pairs of matching left and right parentheses +in its \grammarterm{pp-tokens}. +The \grammarterm{pp-tokens} of a \grammarterm{va-opt-replacement} +shall not contain \mname{VA_OPT}. +If the \grammarterm{pp-tokens} would be ill-formed +as the replacement list of the current function-like macro, +the program is ill-formed. +A \grammarterm{va-opt-replacement} is treated as if it were a parameter, +and the preprocessing token sequence for the corresponding +argument is defined as follows. +If the substitution of \mname{VA_ARGS} as neither an operand +of \tcode{\#} nor \tcode{\#\#} consists of no preprocessing tokens, +the argument consists of +a single placemarker preprocessing token\iref{cpp.concat,cpp.rescan}. +Otherwise, the argument consists of +the results of the expansion of the contained \grammarterm{pp-tokens} +as the replacement list of the current function-like macro +before removal of placemarker tokens, rescanning, and further replacement. +\begin{note} +The placemarker tokens are removed before stringization\iref{cpp.stringize}, +and can be removed by rescanning and further replacement\iref{cpp.rescan}. +\end{note} +\begin{example} +\begin{codeblock} +#define F(...) f(0 __VA_OPT__(,) __VA_ARGS__) +#define G(X, ...) f(0, X __VA_OPT__(,) __VA_ARGS__) +#define SDEF(sname, ...) S sname __VA_OPT__(= { __VA_ARGS__ }) +#define EMP + +F(a, b, c) // replaced by \tcode{f(0, a, b, c)} +F() // replaced by \tcode{f(0)} +F(EMP) // replaced by \tcode{f(0)} + +G(a, b, c) // replaced by \tcode{f(0, a, b, c)} +G(a, ) // replaced by \tcode{f(0, a)} +G(a) // replaced by \tcode{f(0, a)} + +SDEF(foo); // replaced by \tcode{S foo;} +SDEF(bar, 1, 2); // replaced by \tcode{S bar = \{ 1, 2 \};} + +#define H1(X, ...) X __VA_OPT__(##) __VA_ARGS__ // error: \tcode{\#\#} may not appear at + // the beginning of a replacement list\iref{cpp.concat} + +#define H2(X, Y, ...) __VA_OPT__(X ## Y,) __VA_ARGS__ +H2(a, b, c, d) // replaced by \tcode{ab, c, d} + +#define H3(X, ...) #__VA_OPT__(X##X X##X) +H3(, 0) // replaced by \tcode{""} + +#define H4(X, ...) __VA_OPT__(a X ## X) ## b +H4(, 1) // replaced by \tcode{a b} + +#define H5A(...) __VA_OPT__()@\tcode{/**/}@__VA_OPT__() +#define H5B(X) a ## X ## b +#define H5C(X) H5B(X) +H5C(H5A()) // replaced by \tcode{ab} +\end{codeblock} +\end{example} + +\rSec3[cpp.stringize]{The \tcode{\#} operator}% +\indextext{\#\#0 operator@\tcode{\#} operator}% +\indextext{stringize|see{\tcode{\#} operator}} + +\pnum +Each +\tcode{\#} +preprocessing token in the replacement list for a function-like +macro shall be followed by a parameter as the next preprocessing +token in the replacement list. + +\pnum +A \defn{character string literal} is a \grammarterm{string-literal} with no prefix. +If, in the replacement list, a parameter is immediately +preceded by a +\tcode{\#} +preprocessing token, +both are replaced by a single character string literal preprocessing token that +contains the spelling of the preprocessing token sequence for the +corresponding argument (excluding placemarker tokens). +Let the \defn{stringizing argument} be the preprocessing token sequence +for the corresponding argument with placemarker tokens removed. +Each occurrence of whitespace between the stringizing argument's preprocessing +tokens becomes a single space character in the character string literal. +Whitespace before the first preprocessing token and after the last +preprocessing token comprising the stringizing argument is deleted. +Otherwise, the original spelling of each preprocessing token in the +stringizing argument is retained in the character string literal, +except for special handling for producing the spelling of +\grammarterm{string-literal}s and \grammarterm{character-literal}s: +a +\tcode{\textbackslash} +character is inserted before each +\tcode{"} +and +\tcode{\textbackslash} +character of a \grammarterm{character-literal} or \grammarterm{string-literal} +(including the delimiting +\tcode{"} +characters). +If the replacement that results is not a valid character string literal, +the behavior is undefined. The character string literal corresponding to +an empty stringizing argument is \tcode{""}. +The order of evaluation of +\tcode{\#} +and +\tcode{\#\#} +operators is unspecified. + +\rSec3[cpp.concat]{The \tcode{\#\#} operator}% +\indextext{\#\#1 operator@\tcode{\#\#} operator}% +\indextext{concatenation!macro argument|see{\tcode{\#\#} operator}} + +\pnum +A +\tcode{\#\#} +preprocessing token shall not occur at the beginning or +at the end of a replacement list for either form +of macro definition. + +\pnum +If, in the replacement list of a function-like macro, a parameter is +immediately preceded or followed by a +\tcode{\#\#} +preprocessing token, the parameter is replaced by the +corresponding argument's preprocessing token sequence; however, if an argument consists of no preprocessing tokens, the parameter is +replaced by a placemarker preprocessing token instead. +\begin{footnote} +Placemarker preprocessing tokens do not appear in the syntax +because they are temporary entities that exist only within translation phase 4. +\end{footnote} + +\pnum +For both object-like and function-like macro invocations, before the +replacement list is reexamined for more macro names to replace, +each instance of a +\tcode{\#\#} +preprocessing token in the replacement list +(not from an argument) is deleted and the +preceding preprocessing token is concatenated +with the following preprocessing token. +Placemarker preprocessing tokens are handled specially: concatenation +of two placemarkers results in a single placemarker preprocessing token, and +concatenation of a placemarker with a non-placemarker preprocessing token results +in the non-placemarker preprocessing token. +\begin{note} +Concatenation can form +a \grammarterm{universal-character-name}\iref{lex.charset}. +\end{note} +If the result is not a valid preprocessing token, +the behavior is undefined. +The resulting token is available for further macro replacement. +The order of evaluation of +\tcode{\#\#} +operators is unspecified. + +\pnum +\begin{example} +The sequence +\begin{codeblock} +#define str(s) # s +#define xstr(s) str(s) +#define debug(s, t) printf("x" # s "= %d, x" # t "= %s", @\textbackslash@ + x ## s, x ## t) +#define INCFILE(n) vers ## n +#define glue(a, b) a ## b +#define xglue(a, b) glue(a, b) +#define HIGHLOW "hello" +#define LOW LOW ", world" + +debug(1, 2); +fputs(str(strncmp("abc@\textbackslash@0d", "abc", '@\textbackslash@4') // this goes away + == 0) str(: @\atsign\textbackslash@n), s); +#include xstr(INCFILE(2).h) +glue(HIGH, LOW); +xglue(HIGH, LOW) +\end{codeblock} +results in +\begin{codeblock} +printf("x" "1" "= %d, x" "2" "= %s", x1, x2); +fputs("strncmp(@\textbackslash@"abc@\textbackslash\textbackslash@0d@\textbackslash@", @\textbackslash@"abc@\textbackslash@", '@\textbackslash\textbackslash@4') == 0" ": @\atsign\textbackslash@n", s); +#include "vers2.h" @\textrm{(\textit{after macro replacement, before file access})}@ +"hello"; +"hello" ", world" +\end{codeblock} +or, after concatenation of the character string literals, +\begin{codeblock} +printf("x1= %d, x2= %s", x1, x2); +fputs("strncmp(@\textbackslash@"abc@\textbackslash\textbackslash@0d@\textbackslash@", @\textbackslash@"abc@\textbackslash@", '@\textbackslash\textbackslash@4') == 0: @\atsign\textbackslash@n", s); +#include "vers2.h" @\textrm{(\textit{after macro replacement, before file access})}@ +"hello"; +"hello, world" +\end{codeblock} + +Space around the \tcode{\#} and \tcode{\#\#} tokens in the macro definition +is optional. +\end{example} + +\pnum +\begin{example} +In the following fragment: + +\begin{codeblock} +#define hash_hash # ## # +#define mkstr(a) # a +#define in_between(a) mkstr(a) +#define join(c, d) in_between(c hash_hash d) +char p[] = join(x, y); // equivalent to \tcode{char p[] = "x \#\# y";} +\end{codeblock} + +The expansion produces, at various stages: + +\begin{codeblock} +join(x, y) +in_between(x hash_hash y) +in_between(x ## y) +mkstr(x ## y) +"x ## y" +\end{codeblock} + +In other words, expanding \tcode{hash_hash} produces a new token, +consisting of two adjacent sharp signs, but this new token is not the +\tcode{\#\#} operator. +\end{example} + +\pnum +\begin{example} +To illustrate the rules for placemarker preprocessing tokens, the sequence +\begin{codeblock} +#define t(x,y,z) x ## y ## z +int j[] = { t(1,2,3), t(,4,5), t(6,,7), t(8,9,), + t(10,,), t(,11,), t(,,12), t(,,) }; +\end{codeblock} +results in +\begin{codeblock} +int j[] = { 123, 45, 67, 89, + 10, 11, 12, }; +\end{codeblock} +\end{example} + +\rSec3[cpp.rescan]{Rescanning and further replacement}% +\indextext{macro!rescanning and replacement}% +\indextext{rescanning and replacement|see{macro, rescanning and replacement}} + +\pnum +After all parameters in the replacement list have been substituted and \tcode{\#} and \tcode{\#\#} processing has taken +place, all placemarker preprocessing tokens are removed. Then +the resulting preprocessing token sequence is rescanned, along with all +subsequent preprocessing tokens of the source file, for more macro names +to replace. + +\pnum +\begin{example} +The sequence +\begin{codeblock} +#define x 3 +#define f(a) f(x * (a)) +#undef x +#define x 2 +#define g f +#define z z[0] +#define h g(~ +#define m(a) a(w) +#define w 0,1 +#define t(a) a +#define p() int +#define q(x) x +#define r(x,y) x ## y +#define str(x) # x + +f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); +g(x+(3,4)-w) | h 5) & m + (f)^m(m); +p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) }; +char c[2][6] = { str(hello), str() }; +\end{codeblock} +results in +\begin{codeblock} +f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1); +f(2 * (2+(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1); +int i[] = { 1, 23, 4, 5, }; +char c[2][6] = { "hello", "" }; +\end{codeblock} +\end{example} + +\pnum +If the name of the macro being replaced is found during this scan of +the replacement list +(not including the rest of the source file's preprocessing tokens), +it is not replaced. +Furthermore, +if any nested replacements encounter the name of the macro being replaced, +it is not replaced. +These nonreplaced macro name preprocessing tokens are no longer available +for further replacement even if they are later (re)examined in contexts +in which that macro name preprocessing token would otherwise have been +replaced. + +\pnum +The resulting completely macro-replaced preprocessing token sequence +is not processed as a preprocessing directive even if it resembles one, +but all pragma unary operator expressions within it are then processed as +specified in~\ref{cpp.pragma.op} below. + +\rSec3[cpp.scope]{Scope of macro definitions}% +\indextext{macro!scope of definition}% +\indextext{scope!macro definition|see{macro, scope of definition}} + +\pnum +A macro definition lasts +(independent of block structure) +until a corresponding +\tcode{\#undef} +directive is encountered or +(if none is encountered) +until the end of the translation unit. +Macro definitions have no significance after translation phase 4. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# undef} identifier new-line +\indextext{\idxcode{\#undef}}% +\end{ncsimplebnf} +causes the specified identifier no longer to be defined as a macro name. +It is ignored if the specified identifier is not currently defined as +a macro name. + +\indextext{macro!replacement|)} + +\rSec2[cpp.line]{Line control}% +\indextext{preprocessing directive!line control}% +\indextext{\idxcode{\#line}|see{preprocessing directive, line control}} + +\pnum +The \grammarterm{string-literal} of a +\tcode{\#line} +directive, if present, +shall be a character string literal. + +\pnum +The +\defn{line number} +of the current source line is one greater than +the number of new-line characters read or introduced +in translation phase 1\iref{lex.phases} +while processing the source file to the current token. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# line} digit-sequence new-line +\end{ncsimplebnf} +causes the implementation to behave as if +the following sequence of source lines begins with a +source line that has a line number as specified +by the digit sequence (interpreted as a decimal integer). +If the digit sequence specifies zero +or a number greater than 2147483647, +the behavior is undefined. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# line} digit-sequence \terminal{"} \opt{s-char-sequence} \terminal{"} new-line +\end{ncsimplebnf} +sets the presumed line number similarly and changes the +presumed name of the source file to be the contents +of the character string literal. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# line} pp-tokens new-line +\end{ncsimplebnf} +(that does not match one of the two previous forms) +is permitted. +The preprocessing tokens after +\tcode{line} +on the directive are processed just as in normal text +(each identifier currently defined as a macro name is replaced by its +replacement list of preprocessing tokens). +If the directive resulting after all replacements does not match +one of the two previous forms, the behavior is undefined; +otherwise, the result is processed as appropriate. + +\rSec2[cpp.error]{Diagnostic directives}% +\indextext{preprocessing directive!error}% +\indextext{preprocessing directive!diagnostic}% +\indextext{preprocessing directive!warning}% +\indextext{\idxcode{\#error}|see{preprocessing directive, error}} + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# error} \opt{pp-tokens} new-line +\end{ncsimplebnf} +renders the program ill-formed. +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# warning} \opt{pp-tokens} new-line +\end{ncsimplebnf} +requires the implementation to produce at least one diagnostic message +for the preprocessing translation unit\iref{intro.compliance.general}. +\recommended +Any diagnostic message caused by either of these directives +should include the specified sequence of preprocessing tokens. + +\rSec2[cpp.pragmas]{Pragmas}% + +\rSec3[cpp.pragma]{Pragma directive}% +\indextext{preprocessing directive!pragma}% +\indextext{\idxcode{\#pragma}|see{preprocessing directive, pragma}} + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# pragma} \opt{pp-tokens} new-line +\end{ncsimplebnf} +causes the implementation to behave +in an \impldef{\tcode{\#pragma}} manner. +The behavior may cause translation to fail or cause the translator or +the resulting program to behave in a non-conforming manner. +Any pragma that is not recognized by the implementation is ignored. + +\rSec3[cpp.pragma.op]{Pragma operator}% +\indextext{macro!pragma operator}% +\indextext{operator!pragma|see{macro, pragma operator}} + +\pnum +A unary operator expression of the form: +\begin{ncbnf} +\terminal{_Pragma} \terminal{(} string-literal \terminal{)} +\end{ncbnf} +is processed as follows: The \grammarterm{string-literal} is \defnx{destringized}{destringization} +by deleting the \tcode{L} prefix, if present, deleting the leading and trailing +double-quotes, replacing each escape sequence \tcode{\textbackslash"} by a double-quote, and +replacing each escape sequence \tcode{\textbackslash\textbackslash} by a single +backslash. The resulting sequence of characters is processed through translation phase 3 +to produce preprocessing tokens that are executed as if they were the +\grammarterm{pp-tokens} in a pragma directive. The original four preprocessing +tokens in the unary operator expression are removed. + +\pnum +\begin{example} +\begin{codeblock} +#pragma listing on "..\listing.dir" +\end{codeblock} +can also be expressed as: +\begin{codeblock} +_Pragma ( "listing on \"..\\listing.dir\"" ) +\end{codeblock} +The latter form is processed in the same way whether it appears literally +as shown, or results from macro replacement, as in: +\begin{codeblock} +#define LISTING(x) PRAGMA(listing on #x) +#define PRAGMA(x) _Pragma(#x) + +LISTING( ..\listing.dir ) +\end{codeblock} +\end{example} + +\rSec2[cpp.predefined]{Predefined macro names} +\indextext{macro!predefined}% +\indextext{name!predefined macro|see{macro, predefined}} + +\pnum +The following macro names shall be defined by the implementation: + +\begin{description} + +\item +\indextext{\idxxname{cplusplus}}% +\xname{cplusplus}\\ +The integer literal \tcode{\cppver}. +\begin{note} +Future revisions of this document will +replace the value of this macro with a greater value. +\end{note} + +\item The names listed in \tref{cpp.predefined.ft}.\\ +The macros defined in \tref{cpp.predefined.ft} shall be defined to +the corresponding integer literal. +\begin{note} +Future revisions of this document might replace +the values of these macros with greater values. +\end{note} + +\item +\indextext{__date__@\mname{DATE}}% +\mname{DATE}\\ +The date of translation of the source file: +a character string literal of the form +\tcode{"Mmm~dd~yyyy"}, +where the names of the months are the same as those generated +by the +\tcode{asctime} +function, +and the first character of +\tcode{dd} +is a space character if the value is less than 10. +If the date of translation is not available, +an \impldef{text of \mname{DATE} when date of translation is not available} valid date +shall be supplied. + +\item +\indextext{__file__@\mname{FILE}}% +\mname{FILE}\\ +The presumed name of the current source file (a character string +literal). +\begin{footnote} +The presumed source file name can be changed by the \tcode{\#line} directive. +\end{footnote} + +\item +\indextext{__line__@\mname{LINE}}% +\mname{LINE}\\ +The presumed line number (within the current source file) of the current source line +(an integer literal). +\begin{footnote} +The presumed line number can be changed by the \tcode{\#line} directive. +\end{footnote} + +\item +\indextext{__stdc_hosted__@\mname{STDC_HOSTED}}% +\indextext{implementation!hosted}% +\indextext{implementation!freestanding}% +\mname{STDC_HOSTED}\\ +The integer literal \tcode{1} +if the implementation is a hosted implementation or +the integer literal \tcode{0} +if it is a freestanding implementation\iref{intro.compliance}. + +\item +\indextext{__stdcpp_default_new_alignment__@\mname{STDCPP_DEFAULT_NEW_ALIGNMENT}}% +\mname{STDCPP_DEFAULT_NEW_ALIGNMENT}\\ +An integer literal of type \tcode{std::size_t} +whose value is the alignment guaranteed +by a call to \tcode{operator new(std::size_t)} +or \tcode{operator new[](std::size_t)}. +\begin{note} +Larger alignments will be passed to +\tcode{operator new(std::size_t, std::align_val_t)}, etc.\iref{expr.new}. +\end{note} + +\item +\indextext{__stdcpp_float16_t__@\mname{STDCPP_FLOAT16_T}}% +\mname{STDCPP_FLOAT16_T}\\ +Defined as the integer literal \tcode{1} +if and only if the implementation supports +the ISO/IEC/IEEE 60559 floating-point interchange format binary16 +as an extended floating-point type\iref{basic.extended.fp}. + +\item +\indextext{__stdcpp_float32_t__@\mname{STDCPP_FLOAT32_T}}% +\mname{STDCPP_FLOAT32_T}\\ +Defined as the integer literal \tcode{1} +if and only if the implementation supports +the ISO/IEC/IEEE 60559 floating-point interchange format binary32 +as an extended floating-point type. + +\item +\indextext{__stdcpp_float64_t__@\mname{STDCPP_FLOAT64_T}}% +\mname{STDCPP_FLOAT64_T}\\ +Defined as the integer literal \tcode{1} +if and only if the implementation supports +the ISO/IEC/IEEE 60559 floating-point interchange format binary64 +as an extended floating-point type. + +\item +\indextext{__stdcpp_float128_t__@\mname{STDCPP_FLOAT128_T}}% +\mname{STDCPP_FLOAT128_T}\\ +Defined as the integer literal \tcode{1} +if and only if the implementation supports +the ISO/IEC/IEEE 60559 floating-point interchange format binary128 +as an extended floating-point type. + +\item +\indextext{__stdcpp_bfloat16_t__@\mname{STDCPP_BFLOAT16_T}}% +\mname{STDCPP_BFLOAT16_T}\\ +Defined as the integer literal \tcode{1} +if and only if the implementation supports an extended floating-point type +with the properties of the \grammarterm{typedef-name} \tcode{std::bfloat16_t} +as described in \ref{basic.extended.fp}. + +\item +\indextext{__time__@\mname{TIME}}% +\mname{TIME}\\ +The time of translation of the source file: +a character string literal of the form +\tcode{"hh:mm:ss"} +as in the time generated by the +\tcode{asctime} +function. +If the time of translation is not available, +an \impldef{text of \mname{TIME} when time of translation is not available} valid time shall be supplied. +\end{description} + +\indextext{macro!feature-test}% +\indextext{feature-test macro|see{macro, feature-test}}% +\begin{LongTable}{Feature-test macros}{cpp.predefined.ft}{ll} +\\ \topline +\lhdr{Macro name} & \rhdr{Value} \\ \capsep +\endfirsthead +\continuedcaption \\ +\hline +\lhdr{Name} & \rhdr{Value} \\ \capsep +\endhead +\defnxname{cpp_aggregate_bases} & \tcode{201603L} \\ \rowsep +\defnxname{cpp_aggregate_nsdmi} & \tcode{201304L} \\ \rowsep +\defnxname{cpp_aggregate_paren_init} & \tcode{201902L} \\ \rowsep +\defnxname{cpp_alias_templates} & \tcode{200704L} \\ \rowsep +\defnxname{cpp_aligned_new} & \tcode{201606L} \\ \rowsep +\defnxname{cpp_attributes} & \tcode{200809L} \\ \rowsep +\defnxname{cpp_auto_cast} & \tcode{202110L} \\ \rowsep +\defnxname{cpp_binary_literals} & \tcode{201304L} \\ \rowsep +\defnxname{cpp_capture_star_this} & \tcode{201603L} \\ \rowsep +\defnxname{cpp_char8_t} & \tcode{202207L} \\ \rowsep +\defnxname{cpp_concepts} & \tcode{202002L} \\ \rowsep +\defnxname{cpp_conditional_explicit} & \tcode{201806L} \\ \rowsep +\defnxname{cpp_constexpr} & \tcode{202306L} \\ \rowsep +\defnxname{cpp_constexpr_dynamic_alloc} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_constexpr_in_decltype} & \tcode{201711L} \\ \rowsep +\defnxname{cpp_consteval} & \tcode{202211L} \\ \rowsep +\defnxname{cpp_constinit} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_decltype} & \tcode{200707L} \\ \rowsep +\defnxname{cpp_decltype_auto} & \tcode{201304L} \\ \rowsep +\defnxname{cpp_deduction_guides} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_delegating_constructors} & \tcode{200604L} \\ \rowsep +\defnxname{cpp_deleted_function} & \tcode{202403L} \\ \rowsep +\defnxname{cpp_designated_initializers} & \tcode{201707L} \\ \rowsep +\defnxname{cpp_enumerator_attributes} & \tcode{201411L} \\ \rowsep +\defnxname{cpp_explicit_this_parameter} & \tcode{202110L} \\ \rowsep +\defnxname{cpp_fold_expressions} & \tcode{201603L} \\ \rowsep +\defnxname{cpp_generic_lambdas} & \tcode{201707L} \\ \rowsep +\defnxname{cpp_guaranteed_copy_elision} & \tcode{201606L} \\ \rowsep +\defnxname{cpp_hex_float} & \tcode{201603L} \\ \rowsep +\defnxname{cpp_if_consteval} & \tcode{202106L} \\ \rowsep +\defnxname{cpp_if_constexpr} & \tcode{201606L} \\ \rowsep +\defnxname{cpp_impl_coroutine} & \tcode{201902L} \\ \rowsep +\defnxname{cpp_impl_destroying_delete} & \tcode{201806L} \\ \rowsep +\defnxname{cpp_impl_three_way_comparison} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_implicit_move} & \tcode{202207L} \\ \rowsep +\defnxname{cpp_inheriting_constructors} & \tcode{201511L} \\ \rowsep +\defnxname{cpp_init_captures} & \tcode{201803L} \\ \rowsep +\defnxname{cpp_initializer_lists} & \tcode{200806L} \\ \rowsep +\defnxname{cpp_inline_variables} & \tcode{201606L} \\ \rowsep +\defnxname{cpp_lambdas} & \tcode{200907L} \\ \rowsep +\defnxname{cpp_modules} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_multidimensional_subscript} & \tcode{202211L} \\ \rowsep +\defnxname{cpp_named_character_escapes} & \tcode{202207L} \\ \rowsep +\defnxname{cpp_namespace_attributes} & \tcode{201411L} \\ \rowsep +\defnxname{cpp_noexcept_function_type} & \tcode{201510L} \\ \rowsep +\defnxname{cpp_nontype_template_args} & \tcode{201911L} \\ \rowsep +\defnxname{cpp_nontype_template_parameter_auto} & \tcode{201606L} \\ \rowsep +\defnxname{cpp_nsdmi} & \tcode{200809L} \\ \rowsep +\defnxname{cpp_pack_indexing} & \tcode{202311L} \\ \rowsep +\defnxname{cpp_placeholder_variables} & \tcode{202306L} \\ \rowsep +\defnxname{cpp_range_based_for} & \tcode{202211L} \\ \rowsep +\defnxname{cpp_raw_strings} & \tcode{200710L} \\ \rowsep +\defnxname{cpp_ref_qualifiers} & \tcode{200710L} \\ \rowsep +\defnxname{cpp_return_type_deduction} & \tcode{201304L} \\ \rowsep +\defnxname{cpp_rvalue_references} & \tcode{200610L} \\ \rowsep +\defnxname{cpp_size_t_suffix} & \tcode{202011L} \\ \rowsep +\defnxname{cpp_sized_deallocation} & \tcode{201309L} \\ \rowsep +\defnxname{cpp_static_assert} & \tcode{202306L} \\ \rowsep +\defnxname{cpp_static_call_operator} & \tcode{202207L} \\ \rowsep +\defnxname{cpp_structured_bindings} & \tcode{202403L} \\ \rowsep +\defnxname{cpp_template_template_args} & \tcode{201611L} \\ \rowsep +\defnxname{cpp_threadsafe_static_init} & \tcode{200806L} \\ \rowsep +\defnxname{cpp_unicode_characters} & \tcode{200704L} \\ \rowsep +\defnxname{cpp_unicode_literals} & \tcode{200710L} \\ \rowsep +\defnxname{cpp_user_defined_literals} & \tcode{200809L} \\ \rowsep +\defnxname{cpp_using_enum} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_variable_templates} & \tcode{201304L} \\ \rowsep +\defnxname{cpp_variadic_friend} & \tcode{202403L} \\ \rowsep +\defnxname{cpp_variadic_templates} & \tcode{200704L} \\ \rowsep +\defnxname{cpp_variadic_using} & \tcode{201611L} \\ +\end{LongTable} + +\pnum +The following macro names are conditionally defined by the implementation: + +\begin{description} +\item +\indextext{__stdc__@\mname{STDC}}% +\mname{STDC}\\ +Whether \mname{STDC} is predefined and if so, what its value is, +are \impldef{definition and meaning of \mname{STDC}}. + +\item +\indextext{__stdc_mb_might_neq_wc__@\mname{STDC_MB_MIGHT_NEQ_WC}}% +\mname{STDC_MB_MIGHT_NEQ_WC}\\ +The integer literal \tcode{1}, intended to indicate that, in the encoding for +\keyword{wchar_t}, a member of the basic character set need not have a code value equal to +its value when used as the lone character in an ordinary character literal. + +\item +\indextext{__stdc_version__@\mname{STDC_VERSION}}% +\mname{STDC_VERSION}\\ +Whether \mname{STDC_VERSION} is predefined and if so, what its value is, +are \impldef{definition and meaning of \mname{STDC_VERSION}}. + +\item +\indextext{__stdc_iso_10646__@\mname{STDC_ISO_10646}}% +\mname{STDC_ISO_10646}\\ +An integer literal of the form \tcode{yyyymmL} +(for example, \tcode{199712L}). +Whether \mname{STDC_ISO_10646} is predefined and +if so, what its value is, +are \impldef{presence and value of \mname{STDC_ISO_10646}}. + +\item +\indextext{__stdcpp_threads__@\mname{STDCPP_THREADS}}% +\mname{STDCPP_THREADS}\\ +Defined, and has the value integer literal 1, if and only if a program +can have more than one thread of execution\iref{intro.multithread}. + +\end{description} + +\pnum +The values of the predefined macros +(except for +\mname{FILE} +and +\mname{LINE}) +remain constant throughout the translation unit. + +\pnum +If any of the pre-defined macro names in this subclause, +or the identifier +\tcode{defined}, +is the subject of a +\tcode{\#define} +or a +\tcode{\#undef} +preprocessing directive, +the behavior is undefined. +Any other predefined macro names shall begin with a +leading underscore followed by an uppercase letter or a second +underscore. +\indextext{preprocessing directive|)} + +\rSec1[lex.token]{Tokens} + +\indextext{token|(}% +\begin{bnf} +\nontermdef{token}\br + identifier\br + keyword\br + literal\br + operator-or-punctuator +\end{bnf} + +\pnum +\indextext{\idxgram{token}}% +There are five kinds of tokens: identifiers, keywords, literals,% +\begin{footnote} +Literals include strings and character and numeric literals. +\end{footnote} +operators, and other separators. +\indextext{whitespace}% +Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments +(collectively, ``whitespace''), as described below, are ignored except +as they serve to separate tokens. +\begin{note} +Whitespace can separate otherwise adjacent identifiers, keywords, numeric +literals, and alternative tokens containing alphabetic characters. +\end{note} +\indextext{token|)} + +\rSec2[lex.key]{Keywords} + +\begin{bnf} +\nontermdef{keyword}\br + \textnormal{any identifier listed in \tref{lex.key}}\br + \grammarterm{import-keyword}\br + \grammarterm{module-keyword}\br + \grammarterm{export-keyword} +\end{bnf} + +\pnum +The \grammarterm{import-keyword} is produced +by processing an \keyword{import} directive\iref{cpp.import}, +the \grammarterm{module-keyword} is produced +by preprocessing a \keyword{module} directive\iref{cpp.module}, and +the \grammarterm{export-keyword} is produced +by preprocessing either of the previous two directives. +\begin{note} None has any observable spelling. \end{note} @@ -1039,69 +3663,27 @@ \keyword{void} \\ \keyword{volatile} \\ \keyword{wchar_t} \\ -\keyword{while} \\ -\end{multicolfloattable} - -\pnum -Furthermore, the alternative representations shown in -\tref{lex.key.digraph} for certain operators and -punctuators\iref{lex.digraph} are reserved and shall not be used -otherwise. - -\begin{floattable}{Alternative representations}{lex.key.digraph} -{llllll} -\topline -\keyword{and} & \keyword{and_eq} & \keyword{bitand} & \keyword{bitor} & \keyword{compl} & \keyword{not} \\ -\keyword{not_eq} & \keyword{or} & \keyword{or_eq} & \keyword{xor} & \keyword{xor_eq} & \\ -\end{floattable}% -\indextext{keyword|)}% - - -\rSec1[lex.operators]{Operators and punctuators} - -\pnum -\indextext{operator|(}% -\indextext{punctuator|(}% -The lexical representation of \Cpp{} programs includes a number of -preprocessing tokens that are used in the syntax of the preprocessor or -are converted into tokens for operators and punctuators: - -\begin{bnf} -\nontermdef{preprocessing-op-or-punc}\br - preprocessing-operator\br - operator-or-punctuator -\end{bnf} - -\begin{bnf} -%% Ed. note: character protrusion would misalign various operators. -\microtypesetup{protrusion=false}\obeyspaces -\nontermdef{preprocessing-operator} \textnormal{one of}\br - \terminal{\# \#\# \%: \%:\%:} -\end{bnf} - -\begin{bnf} -\microtypesetup{protrusion=false}\obeyspaces -\nontermdef{operator-or-punctuator} \textnormal{one of}\br - \terminal{\{ \} [ ] ( )}\br - \terminal{<: :> <\% \%> ; : ...}\br - \terminal{? :: . .* -> ->* \~}\br - \terminal{! + - * / \% \caret{} \& |}\br - \terminal{= += -= *= /= \%= \caret{}= \&= |=}\br - \terminal{== != < > <= >= <=> \&\& ||}\br - \terminal{<< >> <<= >>= ++ -- ,}\br - \terminal{\keyword{and} \keyword{or} \keyword{xor} \keyword{not} \keyword{bitand} \keyword{bitor} \keyword{compl}}\br - \terminal{\keyword{and_eq} \keyword{or_eq} \keyword{xor_eq} \keyword{not_eq}} -\end{bnf} +\keyword{while} \\ +\end{multicolfloattable} -Each \grammarterm{operator-or-punctuator} is converted to a single token -in translation phase 7\iref{lex.phase.7}.% -\indextext{punctuator|)}% -\indextext{operator|)} +\pnum +Furthermore, the alternative representations shown in +\tref{lex.key.digraph} for certain operators and +punctuators\iref{lex.digraph} are reserved and shall not be used +otherwise. + +\begin{floattable}{Alternative representations}{lex.key.digraph} +{llllll} +\topline +\keyword{and} & \keyword{and_eq} & \keyword{bitand} & \keyword{bitor} & \keyword{compl} & \keyword{not} \\ +\keyword{not_eq} & \keyword{or} & \keyword{or_eq} & \keyword{xor} & \keyword{xor_eq} & \\ +\end{floattable}% +\indextext{keyword|)}% -\rSec1[lex.literal]{Literals}% +\rSec2[lex.literal]{Literals}% \indextext{literal|(} -\rSec2[lex.literal.kinds]{Kinds of literals} +\rSec3[lex.literal.kinds]{Kinds of literals} \pnum \indextext{constant}% @@ -1127,7 +3709,7 @@ a literal has a type and a value category\iref{expr.prim.literal}. \end{note} -\rSec2[lex.icon]{Integer literals} +\rSec3[lex.icon]{Integer literals} \indextext{literal!integer}% \begin{bnf} @@ -1377,246 +3959,9 @@ \begin{note} An \grammarterm{integer-literal} with a \tcode{z} or \tcode{Z} suffix is ill-formed if it cannot be represented by \tcode{std::size_t}. -\end{note} - -\rSec2[lex.ccon]{Character literals} - -\indextext{literal!character}% -\begin{bnf} -\nontermdef{character-literal}\br - \opt{encoding-prefix} \terminal{'} c-char-sequence \terminal{'} -\end{bnf} - -\begin{bnf} -\nontermdef{encoding-prefix} \textnormal{one of}\br - \terminal{u8}\quad\terminal{u}\quad\terminal{U}\quad\terminal{L} -\end{bnf} - -\begin{bnf} -\nontermdef{c-char-sequence}\br - c-char\br - c-char-sequence c-char -\end{bnf} - -\begin{bnf} -\nontermdef{c-char}\br - basic-c-char\br - escape-sequence\br - universal-character-name -\end{bnf} - -\begin{bnf} -\nontermdef{basic-c-char}\br - \textnormal{any member of the translation character set except the \unicode{0027}{apostrophe},}\br - \bnfindent\textnormal{\unicode{005c}{reverse solidus}, or new-line character} -\end{bnf} - -\begin{bnf} -\nontermdef{escape-sequence}\br - simple-escape-sequence\br - numeric-escape-sequence\br - conditional-escape-sequence -\end{bnf} - -\begin{bnf} -\nontermdef{simple-escape-sequence}\br - \terminal{\textbackslash} simple-escape-sequence-char -\end{bnf} - -\begin{bnf} -\nontermdef{simple-escape-sequence-char} \textnormal{one of}\br - \terminal{' " ? \textbackslash{} a b f n r t v} -\end{bnf} - -\begin{bnf} -\nontermdef{numeric-escape-sequence}\br - octal-escape-sequence\br - hexadecimal-escape-sequence -\end{bnf} - -\begin{bnf} -\nontermdef{simple-octal-digit-sequence}\br - octal-digit\br - simple-octal-digit-sequence octal-digit -\end{bnf} - -\begin{bnf} -\nontermdef{octal-escape-sequence}\br - \terminal{\textbackslash} octal-digit\br - \terminal{\textbackslash} octal-digit octal-digit\br - \terminal{\textbackslash} octal-digit octal-digit octal-digit\br - \terminal{\textbackslash o\{} simple-octal-digit-sequence \terminal{\}}\br -\end{bnf} - -\begin{bnf} -\nontermdef{hexadecimal-escape-sequence}\br - \terminal{\textbackslash x} simple-hexadecimal-digit-sequence\br - \terminal{\textbackslash x\{} simple-hexadecimal-digit-sequence \terminal{\}} -\end{bnf} - -\begin{bnf} -\nontermdef{conditional-escape-sequence}\br - \terminal{\textbackslash} conditional-escape-sequence-char -\end{bnf} - -\begin{bnf} -\nontermdef{conditional-escape-sequence-char}\br - \textnormal{any member of the basic character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{N}, \terminal{o}, \terminal{u}, \terminal{U}, or \terminal{x}} -\end{bnf} - -\pnum -\indextext{literal!character}% -\indextext{literal!\idxcode{char8_t}}% -\indextext{literal!\idxcode{char16_t}}% -\indextext{literal!\idxcode{char32_t}}% -\indextext{literal!type of character}% -\indextext{type!\idxcode{char8_t}}% -\indextext{type!\idxcode{char16_t}}% -\indextext{type!\idxcode{char32_t}}% -\indextext{wide-character}% -\indextext{type!\idxcode{wchar_t}}% -A \defnadj{multicharacter}{literal} is a \grammarterm{character-literal} -whose \grammarterm{c-char-sequence} consists of -more than one \grammarterm{c-char}. -A multicharacter literal shall not have an \grammarterm{encoding-prefix}. -If a multicharacter literal contains a \grammarterm{c-char} -that is not encodable as a single code unit in the ordinary literal encoding, -the program is ill-formed. -Multicharacter literals are conditionally-supported. - -\pnum -The kind of a \grammarterm{character-literal}, -its type, and its associated character encoding\iref{lex.charset} -are determined by -its \grammarterm{encoding-prefix} and its \grammarterm{c-char-sequence} -as defined by \tref{lex.ccon.literal}. - -\begin{floattable}{Character literals}{lex.ccon.literal} -{l|l|l|l|l} -\topline -\lhdr{Encoding} & \chdr{Kind} & \chdr{Type} & \chdr{Associated char-} & \rhdr{Example} \\ -\lhdr{prefix} & \chdr{} & \chdr{} & \chdr{acter encoding} & \\ -\capsep -none & -\defnx{ordinary character literal}{literal!character!ordinary} & -\keyword{char} & -ordinary literal & -\tcode{'v'} \\ \cline{2-3}\cline{5-5} - & -multicharacter literal & -\keyword{int} & -encoding & -\tcode{'abcd'} \\ \hline -\tcode{L} & -\defnx{wide character literal}{literal!character!wide} & -\keyword{wchar_t} & -wide literal & -\tcode{L'w'} \\ - & & & encoding & \\ \hline -\tcode{u8} & -\defnx{UTF-8 character literal}{literal!character!UTF-8} & -\keyword{char8_t} & -UTF-8 & -\tcode{u8'x'} \\ \hline -\tcode{u} & -\defnx{UTF-16 character literal}{literal!character!UTF-16} & -\keyword{char16_t} & -UTF-16 & -\tcode{u'y'} \\ \hline -\tcode{U} & -\defnx{UTF-32 character literal}{literal!character!UTF-32} & -\keyword{char32_t} & -UTF-32 & -\tcode{U'z'} \\ -\end{floattable} - -\pnum -In translation phase 4, -the value of a \grammarterm{character-literal} is determined -using the range of representable values -of the \grammarterm{character-literal}'s type in translation phase 7. -A multicharacter literal has an -\impldef{value of non-encodable character literal or multicharacter literal} -value. -The value of any other kind of \grammarterm{character-literal} -is determined as follows: -\begin{itemize} -\item -A \grammarterm{character-literal} with -a \grammarterm{c-char-sequence} consisting of a single -\grammarterm{basic-c-char}, -\grammarterm{simple-escape-sequence}, or -\grammarterm{universal-character-name} -is the code unit value of the specified character -as encoded in the literal's associated character encoding. -If the specified character lacks -representation in the literal's associated character encoding or -if it cannot be encoded as a single code unit, -then the program is ill-formed. -\item -A \grammarterm{character-literal} with -a \grammarterm{c-char-sequence} consisting of -a single \grammarterm{numeric-escape-sequence} -has a value as follows: -\begin{itemize} -\item -Let $v$ be the integer value represented by -the octal number comprising -the sequence of \grammarterm{octal-digit}{s} in -an \grammarterm{octal-escape-sequence} or by -the hexadecimal number comprising -the sequence of \grammarterm{hexadecimal-digit}{s} in -a \grammarterm{hexadecimal-escape-sequence}. -\item -If $v$ does not exceed -the range of representable values of the \grammarterm{character-literal}'s type, -then the value is $v$. -\item -Otherwise, -if the \grammarterm{character-literal}'s \grammarterm{encoding-prefix} -is absent or \tcode{L}, and -$v$ does not exceed the range of representable values of the corresponding unsigned type for the underlying type of the \grammarterm{character-literal}'s type, -then the value is the unique value of the \grammarterm{character-literal}'s type \tcode{T} that is congruent to $v$ modulo $2^N$, where $N$ is the width of \tcode{T}. -\item -Otherwise, the program is ill-formed. -\end{itemize} -\item -A \grammarterm{character-literal} with -a \grammarterm{c-char-sequence} consisting of -a single \grammarterm{conditional-escape-sequence} -is conditionally-supported and -has an \impldef{value of \grammarterm{conditional-escape-sequence}} value. -\end{itemize} - -\pnum -\indextext{backslash character}% -\indextext{\idxcode{\textbackslash}|see{backslash character}}% -\indextext{escape character|see{backslash character}}% -The character specified by a \grammarterm{simple-escape-sequence} -is specified in \tref{lex.ccon.esc}. -\begin{note} -Using an escape sequence for a question mark -is supported for compatibility with \CppXIV{} and C. -\end{note} - -\begin{floattable}{Simple escape sequences}{lex.ccon.esc} -{lll} -\topline -\lhdrx{2}{character} & \rhdr{\grammarterm{simple-escape-sequence}} \\ \capsep -\ucode{000a} & \uname{line feed} & \tcode{\textbackslash n} \\ -\ucode{0009} & \uname{character tabulation} & \tcode{\textbackslash t} \\ -\ucode{000b} & \uname{line tabulation} & \tcode{\textbackslash v} \\ -\ucode{0008} & \uname{backspace} & \tcode{\textbackslash b} \\ -\ucode{000d} & \uname{carriage return} & \tcode{\textbackslash r} \\ -\ucode{000c} & \uname{form feed} & \tcode{\textbackslash f} \\ -\ucode{0007} & \uname{alert} & \tcode{\textbackslash a} \\ -\ucode{005c} & \uname{reverse solidus} & \tcode{\textbackslash\textbackslash} \\ -\ucode{003f} & \uname{question mark} & \tcode{\textbackslash ?} \\ -\ucode{0027} & \uname{apostrophe} & \tcode{\textbackslash '} \\ -\ucode{0022} & \uname{quotation mark} & \tcode{\textbackslash "} \\ -\end{floattable} +\end{note} -\rSec2[lex.fcon]{Floating-point literals} +\rSec3[lex.fcon]{Floating-point literals} \indextext{literal!floating-point}% \begin{bnf} @@ -1643,436 +3988,118 @@ digit-sequence \terminal{.} \end{bnf} -\begin{bnf} -\nontermdef{hexadecimal-fractional-constant}\br - \opt{hexadecimal-digit-sequence} \terminal{.} hexadecimal-digit-sequence\br - hexadecimal-digit-sequence \terminal{.} -\end{bnf} - -\begin{bnf} -\nontermdef{exponent-part}\br - \terminal{e} \opt{sign} digit-sequence\br - \terminal{E} \opt{sign} digit-sequence -\end{bnf} - -\begin{bnf} -\nontermdef{binary-exponent-part}\br - \terminal{p} \opt{sign} digit-sequence\br - \terminal{P} \opt{sign} digit-sequence -\end{bnf} - -\begin{bnf} -\nontermdef{sign} \textnormal{one of}\br - \terminal{+ -} -\end{bnf} - -\begin{bnf} -\nontermdef{digit-sequence}\br - digit\br - digit-sequence \opt{\terminal{'}} digit -\end{bnf} - -\begin{bnf} -\nontermdef{floating-point-suffix} \textnormal{one of}\br - \terminal{f l f16 f32 f64 f128 bf16 F L F16 F32 F64 F128 BF16} -\end{bnf} - -\pnum -\indextext{literal!type of floating-point}% -\indextext{literal!\idxcode{float}}% -\indextext{suffix!\idxcode{F}}% -\indextext{suffix!\idxcode{f}}% -\indextext{suffix!\idxcode{L}}% -\indextext{suffix!\idxcode{l}}% -\indextext{literal!\idxcode{long double}}% -The type of -a \grammarterm{floating-point-literal}\iref{basic.fundamental,basic.extended.fp} -is determined by -its \grammarterm{floating-point-suffix} as specified in \tref{lex.fcon.type}. -\begin{note} -The floating-point suffixes -\tcode{f16}, \tcode{f32}, \tcode{f64}, \tcode{f128}, \tcode{bf16}, -\tcode{F16}, \tcode{F32}, \tcode{F64}, \tcode{F128}, and \tcode{BF16} -are conditionally-supported. See \ref{basic.extended.fp}. -\end{note} -\begin{simpletypetable} -{Types of \grammarterm{floating-point-literal}{s}} -{lex.fcon.type} -{ll} -\topline -\lhdr{\grammarterm{floating-point-suffix}} & \rhdr{type} \\ \capsep -none & \keyword{double} \\ -\tcode{f} or \tcode{F} & \keyword {float} \\ -\tcode{l} or \tcode{L} & \keyword{long} \keyword{double} \\ -\tcode{f16} or \tcode{F16} & \tcode{std::float16_t} \\ -\tcode{f32} or \tcode{F32} & \tcode{std::float32_t} \\ -\tcode{f64} or \tcode{F64} & \tcode{std::float64_t} \\ -\tcode{f128} or \tcode{F128} & \tcode{std::float128_t} \\ -\tcode{bf16} or \tcode{BF16} & \tcode{std::bfloat16_t} \\ -\end{simpletypetable} - -\pnum -\indextext{literal!floating-point}% -The \defn{significand} of a \grammarterm{floating-point-literal} -is the \grammarterm{fractional-constant} or \grammarterm{digit-sequence} -of a \grammarterm{decimal-floating-point-literal} -or the \grammarterm{hexadecimal-fractional-constant} -or \grammarterm{hexadecimal-digit-sequence} -of a \grammarterm{hexadecimal-floating-point-literal}. -In the significand, -the sequence of \grammarterm{digit}s or \grammarterm{hexadecimal-digit}s -and optional period are interpreted as a base $N$ real number $s$, -where $N$ is 10 for a \grammarterm{decimal-floating-point-literal} and -16 for a \grammarterm{hexadecimal-floating-point-literal}. -\begin{note} -Any optional separating single quotes are ignored when determining the value. -\end{note} -If an \grammarterm{exponent-part} or \grammarterm{binary-exponent-part} -is present, -the exponent $e$ of the \grammarterm{floating-point-literal} -is the result of interpreting -the sequence of an optional \grammarterm{sign} and the \grammarterm{digit}s -as a base 10 integer. -Otherwise, the exponent $e$ is 0. -The scaled value of the literal is -$s \times 10^e$ for a \grammarterm{decimal-floating-point-literal} and -$s \times 2^e$ for a \grammarterm{hexadecimal-floating-point-literal}. -\begin{example} -The \grammarterm{floating-point-literal}{s} -\tcode{49.625} and \tcode{0xC.68p+2} have the same value. -The \grammarterm{floating-point-literal}{s} -\tcode{1.602'176'565e-19} and \tcode{1.602176565e-19} -have the same value. -\end{example} - -\pnum -If the scaled value is not in the range of representable -values for its type, the program is ill-formed. -Otherwise, the value of a \grammarterm{floating-point-literal} -is the scaled value if representable, -else the larger or smaller representable value nearest the scaled value, -chosen in an \impldef{choice of larger or smaller value of -\grammarterm{floating-point-literal}} manner. - -\rSec2[lex.string]{String literals} - -\indextext{literal!string}% -\begin{bnf} -\nontermdef{string-literal}\br - \opt{encoding-prefix} \terminal{"} \opt{s-char-sequence} \terminal{"}\br - \opt{encoding-prefix} \terminal{R} raw-string -\end{bnf} - -\begin{bnf} -\nontermdef{s-char-sequence}\br - s-char\br - s-char-sequence s-char -\end{bnf} - -\begin{bnf} -\nontermdef{s-char}\br - basic-s-char\br - escape-sequence\br - universal-character-name -\end{bnf} - -\begin{bnf} -\nontermdef{basic-s-char}\br - \textnormal{any member of the translation character set except the \unicode{0022}{quotation mark},}\br - \bnfindent\textnormal{\unicode{005c}{reverse solidus}, or new-line character} -\end{bnf} - -\begin{bnf} -\nontermdef{raw-string}\br - \terminal{"} \opt{d-char-sequence} \terminal{(} \opt{r-char-sequence} \terminal{)} \opt{d-char-sequence} \terminal{"} -\end{bnf} - -\begin{bnf} -\nontermdef{r-char-sequence}\br - r-char\br - r-char-sequence r-char -\end{bnf} - -\begin{bnf} -\nontermdef{r-char}\br - \textnormal{any member of the translation character set, except a \unicode{0029}{right parenthesis} followed by}\br - \bnfindent\textnormal{the initial \grammarterm{d-char-sequence} (which may be empty) followed by a \unicode{0022}{quotation mark}} -\end{bnf} - -\begin{bnf} -\nontermdef{d-char-sequence}\br - d-char\br - d-char-sequence d-char -\end{bnf} - -\begin{bnf} -\nontermdef{d-char}\br - \textnormal{any member of the basic character set except:}\br - \bnfindent\textnormal{\unicode{0020}{space}, \unicode{0028}{left parenthesis}, \unicode{0029}{right parenthesis}, \unicode{005c}{reverse solidus},}\br - \bnfindent\textnormal{\unicode{0009}{character tabulation}, \unicode{000b}{line tabulation}, \unicode{000c}{form feed}, and new-line} -\end{bnf} - -\pnum -\indextext{literal!string}% -\indextext{character string}% -\indextext{string!type of}% -\indextext{type!\idxcode{wchar_t}}% -\indextext{prefix!\idxcode{L}}% -\indextext{literal!string!\idxcode{char16_t}}% -\indextext{type!\idxcode{char16_t}}% -\indextext{literal!string!\idxcode{char32_t}}% -\indextext{type!\idxcode{char32_t}}% -The kind of a \grammarterm{string-literal}, -its type, and -its associated character encoding\iref{lex.charset} -are determined by its encoding prefix and sequence of -\grammarterm{s-char}s or \grammarterm{r-char}s -as defined by \tref{lex.string.literal} -where $n$ is the number of encoded code units as described below. - -\begin{floattable}{String literals}{lex.string.literal} -{llp{2.6cm}p{2.3cm}p{4.7cm}} -\topline -\lhdr{Enco-} & \chdr{Kind} & \chdr{Type} & \chdr{Associated} & \rhdr{Examples} \\ -\lhdr{ding} & \chdr{} & \chdr{} & \chdr{character} & \rhdr{} \\ -\lhdr{prefix} & \chdr{} & \chdr{} & \chdr{encoding} & \rhdr{} \\ -\capsep -none & -\defnx{ordinary string literal}{literal!string!ordinary} & -array of $n$\newline \tcode{\keyword{const} \keyword{char}} & -ordinary literal encoding & -\tcode{"ordinary string"}\newline -\tcode{R"(ordinary raw string)"} \\ -\tcode{L} & -\defnx{wide string literal}{literal!string!wide} & -array of $n$\newline \tcode{\keyword{const} \keyword{wchar_t}} & -wide literal\newline encoding & -\tcode{L"wide string"}\newline -\tcode{LR"w(wide raw string)w"} \\ -\tcode{u8} & -\defnx{UTF-8 string literal}{literal!string!UTF-8} & -array of $n$\newline \tcode{\keyword{const} \keyword{char8_t}} & -UTF-8 & -\tcode{u8"UTF-8 string"}\newline -\tcode{u8R"x(UTF-8 raw string)x"} \\ -\tcode{u} & -\defnx{UTF-16 string literal}{literal!string!UTF-16} & -array of $n$\newline \tcode{\keyword{const} \keyword{char16_t}} & -UTF-16 & -\tcode{u"UTF-16 string"}\newline -\tcode{uR"y(UTF-16 raw string)y"} \\ -\tcode{U} & -\defnx{UTF-32 string literal}{literal!string!UTF-32} & -array of $n$\newline \tcode{\keyword{const} \keyword{char32_t}} & -UTF-32 & -\tcode{U"UTF-32 string"}\newline -\tcode{UR"z(UTF-32 raw string)z"} \\ -\end{floattable} - -\pnum -\indextext{literal!string!raw}% -A \grammarterm{string-literal} that has an \tcode{R} -\indextext{prefix!\idxcode{R}}% -in the prefix is a \defn{raw string literal}. The -\grammarterm{d-char-sequence} serves as a delimiter. The terminating -\grammarterm{d-char-sequence} of a \grammarterm{raw-string} is the same sequence of -characters as the initial \grammarterm{d-char-sequence}. A \grammarterm{d-char-sequence} -shall consist of at most 16 characters. - -\pnum -\begin{note} -The characters \tcode{'('} and \tcode{')'} can appear in a -\grammarterm{raw-string}. Thus, \tcode{R"delimiter((a|b))delimiter"} is equivalent to -\tcode{"(a|b)"}. -\end{note} - -\pnum -\begin{note} -A source-file new-line in a raw string literal results in a new-line in the -resulting execution string literal. Assuming no -whitespace at the beginning of lines in the following example, the assert will succeed: -\begin{codeblock} -const char* p = R"(a\ -b -c)"; -assert(std::strcmp(p, "a\\\nb\nc") == 0); -\end{codeblock} -\end{note} - -\pnum -\begin{example} -The raw string -\begin{codeblock} -R"a( -)\ -a" -)a" -\end{codeblock} -is equivalent to \tcode{"\textbackslash n)\textbackslash \textbackslash \textbackslash na\textbackslash"\textbackslash n"}. The raw string -\begin{codeblock} -R"(x = "\"y\"")" -\end{codeblock} -is equivalent to \tcode{"x = \textbackslash "\textbackslash\textbackslash\textbackslash "y\textbackslash\textbackslash\textbackslash "\textbackslash ""}. -\end{example} +\begin{bnf} +\nontermdef{hexadecimal-fractional-constant}\br + \opt{hexadecimal-digit-sequence} \terminal{.} hexadecimal-digit-sequence\br + hexadecimal-digit-sequence \terminal{.} +\end{bnf} -\pnum -\indextext{literal!narrow-character}% -Ordinary string literals and UTF-8 string literals are -also referred to as \defnx{narrow string literals}{literal!string!narrow}. +\begin{bnf} +\nontermdef{exponent-part}\br + \terminal{e} \opt{sign} digit-sequence\br + \terminal{E} \opt{sign} digit-sequence +\end{bnf} -\pnum -\indextext{concatenation!string}% -The common \grammarterm{encoding-prefix} -for a sequence of adjacent \grammarterm{string-literal}s -is determined pairwise as follows. -If two \grammarterm{string-literal}{s} have -the same \grammarterm{encoding-prefix}, -the common \grammarterm{encoding-prefix} is that \grammarterm{encoding-prefix}. -If one \grammarterm{string-literal} has no \grammarterm{encoding-prefix}, -the common \grammarterm{encoding-prefix} is that -of the other \grammarterm{string-literal}. -Any other combinations are ill-formed. -\begin{note} -A \grammarterm{string-literal}'s rawness has -no effect on the determination of the common \grammarterm{encoding-prefix}. -\end{note} +\begin{bnf} +\nontermdef{binary-exponent-part}\br + \terminal{p} \opt{sign} digit-sequence\br + \terminal{P} \opt{sign} digit-sequence +\end{bnf} -\pnum -In translation phase 6\iref{lex.phase.6}, -adjacent \grammarterm{string-literal}s are concatenated. -The lexical structure and grouping of -the contents of the individual \grammarterm{string-literal}s is retained. -\begin{example} -\begin{codeblock} -"\xA" "B" -\end{codeblock} -represents -the code unit \tcode{'\textbackslash xA'} and the character \tcode{'B'} -after concatenation -(and not the single code unit \tcode{'\textbackslash xAB'}). -Similarly, -\begin{codeblock} -R"(\u00)" "41" -\end{codeblock} -represents six characters, -starting with a backslash and ending with the digit \tcode{1} -(and not the single character \tcode{'A'} -specified by a \grammarterm{universal-character-name}). +\begin{bnf} +\nontermdef{sign} \textnormal{one of}\br + \terminal{+ -} +\end{bnf} -\tref{lex.string.concat} has some examples of valid concatenations. -\end{example} +\begin{bnf} +\nontermdef{digit-sequence}\br + digit\br + digit-sequence \opt{\terminal{'}} digit +\end{bnf} -\begin{floattable}{String literal concatenations}{lex.string.concat} -{lll|lll|lll} -\topline -\multicolumn{2}{|c}{Source} & -Means & -\multicolumn{2}{c}{Source} & -Means & -\multicolumn{2}{c}{Source} & -Means \\ -\tcode{u"a"} & \tcode{u"b"} & \tcode{u"ab"} & -\tcode{U"a"} & \tcode{U"b"} & \tcode{U"ab"} & -\tcode{L"a"} & \tcode{L"b"} & \tcode{L"ab"} \\ -\tcode{u"a"} & \tcode{"b"} & \tcode{u"ab"} & -\tcode{U"a"} & \tcode{"b"} & \tcode{U"ab"} & -\tcode{L"a"} & \tcode{"b"} & \tcode{L"ab"} \\ -\tcode{"a"} & \tcode{u"b"} & \tcode{u"ab"} & -\tcode{"a"} & \tcode{U"b"} & \tcode{U"ab"} & -\tcode{"a"} & \tcode{L"b"} & \tcode{L"ab"} \\ -\end{floattable} +\begin{bnf} +\nontermdef{floating-point-suffix} \textnormal{one of}\br + \terminal{f l f16 f32 f64 f128 bf16 F L F16 F32 F64 F128 BF16} +\end{bnf} \pnum -Evaluating a \grammarterm{string-literal} results in a string literal object -with static storage duration\iref{basic.stc}. -\begin{note} -String literal objects are potentially non-unique\iref{intro.object}. -Whether successive evaluations of a -\grammarterm{string-literal} yield the same or a different object is -unspecified. -\end{note} +\indextext{literal!type of floating-point}% +\indextext{literal!\idxcode{float}}% +\indextext{suffix!\idxcode{F}}% +\indextext{suffix!\idxcode{f}}% +\indextext{suffix!\idxcode{L}}% +\indextext{suffix!\idxcode{l}}% +\indextext{literal!\idxcode{long double}}% +The type of +a \grammarterm{floating-point-literal}\iref{basic.fundamental,basic.extended.fp} +is determined by +its \grammarterm{floating-point-suffix} as specified in \tref{lex.fcon.type}. \begin{note} -\indextext{literal!string!undefined change to}% -The effect of attempting to modify a string literal object is undefined. +The floating-point suffixes +\tcode{f16}, \tcode{f32}, \tcode{f64}, \tcode{f128}, \tcode{bf16}, +\tcode{F16}, \tcode{F32}, \tcode{F64}, \tcode{F128}, and \tcode{BF16} +are conditionally-supported. See \ref{basic.extended.fp}. \end{note} +\begin{simpletypetable} +{Types of \grammarterm{floating-point-literal}{s}} +{lex.fcon.type} +{ll} +\topline +\lhdr{\grammarterm{floating-point-suffix}} & \rhdr{type} \\ \capsep +none & \keyword{double} \\ +\tcode{f} or \tcode{F} & \keyword {float} \\ +\tcode{l} or \tcode{L} & \keyword{long} \keyword{double} \\ +\tcode{f16} or \tcode{F16} & \tcode{std::float16_t} \\ +\tcode{f32} or \tcode{F32} & \tcode{std::float32_t} \\ +\tcode{f64} or \tcode{F64} & \tcode{std::float64_t} \\ +\tcode{f128} or \tcode{F128} & \tcode{std::float128_t} \\ +\tcode{bf16} or \tcode{BF16} & \tcode{std::bfloat16_t} \\ +\end{simpletypetable} \pnum -\indextext{\idxcode{0}!string terminator}% -\indextext{\idxcode{0}!null character|see {character, null}}% -String literal objects are initialized with -the sequence of code unit values -corresponding to the \grammarterm{string-literal}'s sequence of -\grammarterm{s-char}s (originally from non-raw string literals) and -\grammarterm{r-char}s (originally from raw string literals), -plus a terminating \unicode{0000}{null} character, -in order as follows: -\begin{itemize} -\item -The sequence of characters denoted by each contiguous sequence of -\grammarterm{basic-s-char}s, -\grammarterm{r-char}s, -\grammarterm{simple-escape-sequence}s\iref{lex.ccon}, and -\grammarterm{universal-character-name}s\iref{lex.charset} -is encoded to a code unit sequence -using the \grammarterm{string-literal}'s associated character encoding. -If a character lacks representation in the associated character encoding, -then the program is ill-formed. -\begin{note} -No character lacks representation in any Unicode encoding form. -\end{note} -When encoding a stateful character encoding, -implementations should encode the first such sequence -beginning with the initial encoding state and -encode subsequent sequences -beginning with the final encoding state of the prior sequence. +\indextext{literal!floating-point}% +The \defn{significand} of a \grammarterm{floating-point-literal} +is the \grammarterm{fractional-constant} or \grammarterm{digit-sequence} +of a \grammarterm{decimal-floating-point-literal} +or the \grammarterm{hexadecimal-fractional-constant} +or \grammarterm{hexadecimal-digit-sequence} +of a \grammarterm{hexadecimal-floating-point-literal}. +In the significand, +the sequence of \grammarterm{digit}s or \grammarterm{hexadecimal-digit}s +and optional period are interpreted as a base $N$ real number $s$, +where $N$ is 10 for a \grammarterm{decimal-floating-point-literal} and +16 for a \grammarterm{hexadecimal-floating-point-literal}. \begin{note} -The encoded code unit sequence can differ from -the sequence of code units that would be obtained by -encoding each character independently. +Any optional separating single quotes are ignored when determining the value. \end{note} -\item -Each \grammarterm{numeric-escape-sequence}\iref{lex.ccon} -contributes a single code unit with a value as follows: -\begin{itemize} -\item -Let $v$ be the integer value represented by -the octal number comprising -the sequence of \grammarterm{octal-digit}{s} in -an \grammarterm{octal-escape-sequence} or by -the hexadecimal number comprising -the sequence of \grammarterm{hexadecimal-digit}{s} in -a \grammarterm{hexadecimal-escape-sequence}. -\item -If $v$ does not exceed the range of representable values of -the \grammarterm{string-literal}'s array element type, -then the value is $v$. -\item -Otherwise, -if the \grammarterm{string-literal}'s \grammarterm{encoding-prefix} -is absent or \tcode{L}, and -$v$ does not exceed the range of representable values of -the corresponding unsigned type for the underlying type of -the \grammarterm{string-literal}'s array element type, -then the value is the unique value of -the \grammarterm{string-literal}'s array element type \tcode{T} -that is congruent to $v$ modulo $2^N$, where $N$ is the width of \tcode{T}. -\item -Otherwise, the program is ill-formed. -\end{itemize} -When encoding a stateful character encoding, -these sequences should have no effect on encoding state. -\item -Each \grammarterm{conditional-escape-sequence}\iref{lex.ccon} -contributes an -\impldef{code unit sequence for \grammarterm{conditional-escape-sequence}} -code unit sequence. -When encoding a stateful character encoding, -it is -\impldef{effect of \grammarterm{conditional-escape-sequence} on encoding state} -what effect these sequences have on encoding state. -\end{itemize} +If an \grammarterm{exponent-part} or \grammarterm{binary-exponent-part} +is present, +the exponent $e$ of the \grammarterm{floating-point-literal} +is the result of interpreting +the sequence of an optional \grammarterm{sign} and the \grammarterm{digit}s +as a base 10 integer. +Otherwise, the exponent $e$ is 0. +The scaled value of the literal is +$s \times 10^e$ for a \grammarterm{decimal-floating-point-literal} and +$s \times 2^e$ for a \grammarterm{hexadecimal-floating-point-literal}. +\begin{example} +The \grammarterm{floating-point-literal}{s} +\tcode{49.625} and \tcode{0xC.68p+2} have the same value. +The \grammarterm{floating-point-literal}{s} +\tcode{1.602'176'565e-19} and \tcode{1.602176565e-19} +have the same value. +\end{example} + +\pnum +If the scaled value is not in the range of representable +values for its type, the program is ill-formed. +Otherwise, the value of a \grammarterm{floating-point-literal} +is the scaled value if representable, +else the larger or smaller representable value nearest the scaled value, +chosen in an \impldef{choice of larger or smaller value of +\grammarterm{floating-point-literal}} manner. -\rSec2[lex.string.uneval]{Unevaluated strings} +\rSec3[lex.string.uneval]{Unevaluated strings} \begin{bnf} \nontermdef{unevaluated-string}\br @@ -2094,7 +4121,7 @@ An \grammarterm{unevaluated-string} is never evaluated and its interpretation depends on the context in which it appears. -\rSec2[lex.bool]{Boolean literals} +\rSec3[lex.bool]{Boolean literals} \indextext{literal!boolean}% \begin{bnf} @@ -2108,7 +4135,7 @@ The Boolean literals are the keywords \tcode{false} and \tcode{true}. Such literals have type \tcode{bool}. -\rSec2[lex.nullptr]{Pointer literals} +\rSec3[lex.nullptr]{Pointer literals} \indextext{literal!pointer}% \begin{bnf} @@ -2126,7 +4153,7 @@ and~\ref{conv.mem}. \end{note} -\rSec2[lex.ext]{User-defined literals} +\rSec3[lex.ext]{User-defined literals} \indextext{literal!user-defined}% \begin{bnf} diff --git a/source/preprocessor.tex b/source/preprocessor.tex deleted file mode 100644 index db4bce19d1..0000000000 --- a/source/preprocessor.tex +++ /dev/null @@ -1,2009 +0,0 @@ -%!TEX root = std.tex -\rSec0[cpp]{Preprocessing directives}% -\indextext{preprocessing directive|(} - -\indextext{compiler control line|see{preprocessing directive}}% -\indextext{control line|see{preprocessing directive}}% -\indextext{directive, preprocessing|see{preprocessing directive}} - -\gramSec[gram.cpp]{Preprocessing directives} - -\rSec1[cpp.pre]{Preamble} - -\begin{bnf} -\nontermdef{preprocessing-file}\br - \opt{group}\br - module-file -\end{bnf} - -\begin{bnf} -\nontermdef{module-file}\br - \opt{pp-global-module-fragment} pp-module \opt{group} \opt{pp-private-module-fragment} -\end{bnf} - -\begin{bnf} -\nontermdef{pp-global-module-fragment}\br - \keyword{module} \terminal{;} new-line \opt{group} -\end{bnf} - -\begin{bnf} -\nontermdef{pp-private-module-fragment}\br - \keyword{module} \terminal{:} \keyword{private} \terminal{;} new-line \opt{group} -\end{bnf} - -\begin{bnf} -\nontermdef{group}\br - group-part\br - group group-part -\end{bnf} - -\begin{bnf} -\nontermdef{group-part}\br - control-line\br - if-section\br - text-line\br - \terminal{\#} conditionally-supported-directive -\end{bnf} - -\begin{bnf}\obeyspaces -\nontermdef{control-line}\br - \terminal{\# include} pp-tokens new-line\br - pp-import\br - \terminal{\# define } identifier replacement-list new-line\br - \terminal{\# define } identifier lparen \opt{identifier-list} \terminal{)} replacement-list new-line\br - \terminal{\# define } identifier lparen \terminal{... )} replacement-list new-line\br - \terminal{\# define } identifier lparen identifier-list \terminal{, ... )} replacement-list new-line\br - \terminal{\# undef } identifier new-line\br - \terminal{\# line } pp-tokens new-line\br - \terminal{\# error } \opt{pp-tokens} new-line\br - \terminal{\# warning} \opt{pp-tokens} new-line\br - \terminal{\# pragma } \opt{pp-tokens} new-line\br - \terminal{\# }new-line -\end{bnf} - -\begin{bnf} -\nontermdef{if-section}\br - if-group \opt{elif-groups} \opt{else-group} endif-line -\end{bnf} - -\begin{bnf}\obeyspaces -\nontermdef{if-group}\br - \terminal{\# if } constant-expression new-line \opt{group}\br - \terminal{\# ifdef } identifier new-line \opt{group}\br - \terminal{\# ifndef } identifier new-line \opt{group} -\end{bnf} - -\begin{bnf} -\nontermdef{elif-groups}\br - elif-group\br - elif-groups elif-group -\end{bnf} - -\begin{bnf}\obeyspaces -\nontermdef{elif-group}\br - \terminal{\# elif } constant-expression new-line \opt{group}\br - \terminal{\# elifdef } identifier new-line \opt{group}\br - \terminal{\# elifndef} identifier new-line \opt{group} -\end{bnf} - -\begin{bnf}\obeyspaces -\nontermdef{else-group}\br - \terminal{\# else } new-line \opt{group} -\end{bnf} - -\begin{bnf}\obeyspaces -\nontermdef{endif-line}\br - \terminal{\# endif } new-line -\end{bnf} - -\begin{bnf} -\nontermdef{text-line}\br - \opt{pp-tokens} new-line -\end{bnf} - -\begin{bnf} -\nontermdef{conditionally-supported-directive}\br - pp-tokens new-line -\end{bnf} - -\begin{bnf} -\nontermdef{lparen}\br - \descr{a \terminal{(} character not immediately preceded by whitespace} -\end{bnf} - -\begin{bnf} -\nontermdef{identifier-list}\br - identifier\br - identifier-list \terminal{,} identifier -\end{bnf} - -\begin{bnf} -\nontermdef{replacement-list}\br - \opt{pp-tokens} -\end{bnf} - -\begin{bnf} -\nontermdef{pp-tokens}\br - preprocessing-token\br - pp-tokens preprocessing-token -\end{bnf} - -\begin{bnf} -\nontermdef{new-line}\br - \descr{the new-line character} -\end{bnf} - -\pnum -A \defn{preprocessing directive} consists of a sequence of preprocessing tokens -that satisfies the following constraints: -At the start of translation phase 4, -the first token in the sequence, -referred to as a \defnadj{directive-introducing}{token}, -begins with the first character in the source file -(optionally after whitespace containing no new-line characters) or -follows whitespace containing at least one new-line character, -and is - -\begin{itemize} -\item -a \tcode{\#} preprocessing token, or - -\item -an \keyword{import} preprocessing token -immediately followed on the same logical line by a -\grammarterm{header-name}, -\tcode{<}, -\grammarterm{identifier}, -\grammarterm{string-literal}, or -\tcode{:} -preprocessing token, or - -\item -a \keyword{module} preprocessing token -immediately followed on the same logical line by an -\grammarterm{identifier}, -\tcode{:}, or -\tcode{;} -preprocessing token, or - -\item -an \keyword{export} preprocessing token -immediately followed on the same logical line by -one of the two preceding forms. -\end{itemize} - -The last token in the sequence is the first token within the sequence that -is immediately followed by whitespace containing a new-line character. -\begin{footnote} -Thus, -preprocessing directives are commonly called ``lines''. -These ``lines'' have no other syntactic significance, -as all whitespace is equivalent except in certain situations -during preprocessing (see the -\tcode{\#} -character string literal creation operator in~\ref{cpp.stringize}, for example). -\end{footnote} -\begin{note} -A new-line character ends the preprocessing directive even if it occurs -within what would otherwise be an invocation of a function-like macro. -\end{note} - -\begin{example} -\begin{codeblock} -# // preprocessing directive -module ; // preprocessing directive -export module leftpad; // preprocessing directive -import ; // preprocessing directive -export import "squee"; // preprocessing directive -import rightpad; // preprocessing directive -import :part; // preprocessing directive - -module // not a preprocessing directive -; // not a preprocessing directive - -export // not a preprocessing directive -import // not a preprocessing directive -foo; // not a preprocessing directive - -export // not a preprocessing directive -import foo; // preprocessing directive (ill-formed at phase 7) - -import :: // not a preprocessing directive -import -> // not a preprocessing directive -\end{codeblock} -\end{example} - -\pnum -A sequence of preprocessing tokens is only a \grammarterm{text-line} -if it does not begin with a directive-introducing token. -A sequence of preprocessing tokens is only a \grammarterm{conditionally-supported-directive} -if it does not begin with any of the directive names -appearing after a \tcode{\#} in the syntax. -A \grammarterm{conditionally-supported-directive} is -conditionally-supported with -\impldef{additional supported forms of preprocessing directive} -semantics. - -\pnum -At the start of phase 4 of translation, -the \grammarterm{group} of a \grammarterm{pp-global-module-fragment} shall -contain neither a \grammarterm{text-line} nor a \grammarterm{pp-import}. - -\pnum -When in a group that is skipped\iref{cpp.cond}, the directive -syntax is relaxed to allow any sequence of preprocessing tokens to occur between -the directive name and the following new-line character. - -\pnum -The only whitespace characters that shall appear -between preprocessing tokens -within a preprocessing directive -(from just after the directive-introducing token -through just before the terminating new-line character) -are space and horizontal-tab -(including spaces that have replaced comments -or possibly other whitespace characters -in translation phase 3). - -\pnum -The implementation can -process and skip sections of source files conditionally, -include other source files, -import macros from header units, -and replace macros. -These capabilities are called -\defn{preprocessing}, -because conceptually they occur -before translation of the resulting translation unit. - -\pnum -The preprocessing tokens within a preprocessing directive -are not subject to macro expansion unless otherwise stated. - -\begin{example} -In: -\begin{codeblock} -#define EMPTY -EMPTY # include -\end{codeblock} -the sequence of preprocessing tokens on the second line is \textit{not} -a preprocessing directive, because it does not begin with a \tcode{\#} at the start of -translation phase 4, even though it will do so after the macro \tcode{EMPTY} -has been replaced. -\end{example} - -\rSec1[cpp.cond]{Conditional inclusion}% -\indextext{preprocessing directive!conditional inclusion}% -\indextext{inclusion!conditional|see{preprocessing directive, conditional inclusion}} - -\indextext{\idxcode{defined}}% -\begin{bnf} -\nontermdef{defined-macro-expression}\br - \terminal{defined} identifier\br - \terminal{defined (} identifier \terminal{)} -\end{bnf} - -\begin{bnf} -\nontermdef{h-preprocessing-token}\br - \textnormal{any \grammarterm{preprocessing-token} other than \terminal{>}} -\end{bnf} - -\begin{bnf} -\nontermdef{h-pp-tokens}\br - h-preprocessing-token\br - h-pp-tokens h-preprocessing-token -\end{bnf} - -\begin{bnf} -\nontermdef{header-name-tokens}\br - string-literal\br - \terminal{<} h-pp-tokens \terminal{>} -\end{bnf} - -\indextext{\idxxname{has_include}}% -\begin{bnf} -\nontermdef{has-include-expression}\br - \terminal{\xname{has_include}} \terminal{(} header-name \terminal{)}\br - \terminal{\xname{has_include}} \terminal{(} header-name-tokens \terminal{)} -\end{bnf} - -\indextext{\idxxname{has_cpp_attribute}}% -\begin{bnf} -\nontermdef{has-attribute-expression}\br - \terminal{\xname{has_cpp_attribute} (} pp-tokens \terminal{)} -\end{bnf} - -\pnum -The expression that controls conditional inclusion -shall be an integral constant expression except that -identifiers -(including those lexically identical to keywords) -are interpreted as described below -\begin{footnote} -Because the controlling constant expression is evaluated -during translation phase 4, -all identifiers either are or are not macro names --- -there simply are no keywords, enumeration constants, etc. -\end{footnote} -and it may contain zero or more \grammarterm{defined-macro-expression}{s} and/or -\grammarterm{has-include-expression}{s} and/or -\grammarterm{has-attribute-expression}{s} as unary operator expressions. - -\pnum -A \grammarterm{defined-macro-expression} evaluates to \tcode{1} -if the identifier is currently defined -as a macro name -(that is, if it is predefined -or if it has one or more active macro definitions\iref{cpp.import}, -for example because -it has been the subject of a -\tcode{\#define} -preprocessing directive -without an intervening -\tcode{\#undef} -directive with the same subject identifier), \tcode{0} if it is not. - -\pnum -The second form of \grammarterm{has-include-expression} -is considered only if the first form does not match, -in which case the preprocessing tokens are processed just as in normal text. - -\pnum -The header or source file identified by -the parenthesized preprocessing token sequence -in each contained \grammarterm{has-include-expression} -is searched for as if that preprocessing token sequence -were the \grammarterm{pp-tokens} in a \tcode{\#include} directive, -except that no further macro expansion is performed. -If such a directive would not satisfy the syntactic requirements -of a \tcode{\#include} directive, the program is ill-formed. -The \grammarterm{has-include-expression} evaluates -to \tcode{1} if the search for the source file succeeds, and -to \tcode{0} if the search fails. - -\pnum -Each \grammarterm{has-attribute-expression} is replaced by -a non-zero \grammarterm{pp-number} -matching the form of an \grammarterm{integer-literal} -if the implementation supports an attribute -with the name specified by interpreting -the \grammarterm{pp-tokens}, after macro expansion, -as an \grammarterm{attribute-token}, -and by \tcode{0} otherwise. -The program is ill-formed if the \grammarterm{pp-tokens} -do not match the form of an \grammarterm{attribute-token}. - -\pnum -For an attribute specified in this document, -it is \impldef{value of \grammarterm{has-attribute-expression} -for standard attributes} -whether the value of the \grammarterm{has-attribute-expression} -is \tcode{0} or is given by \tref{cpp.cond.ha}. -For other attributes recognized by the implementation, -the value is -\impldef{value of \grammarterm{has-attribute-expression} -for non-standard attributes}. -\begin{note} -It is expected -that the availability of an attribute can be detected by any non-zero result. -\end{note} - -\begin{floattable}{\xname{has_cpp_attribute} values}{cpp.cond.ha} -{ll} -\topline -\lhdr{Attribute} & \rhdr{Value} \\ \rowsep -\tcode{assume} & \tcode{202207L} \\ -\tcode{carries_dependency} & \tcode{200809L} \\ -\tcode{deprecated} & \tcode{201309L} \\ -\tcode{fallthrough} & \tcode{201603L} \\ -\tcode{likely} & \tcode{201803L} \\ -\tcode{maybe_unused} & \tcode{201603L} \\ -\tcode{no_unique_address} & \tcode{201803L} \\ -\tcode{nodiscard} & \tcode{201907L} \\ -\tcode{noreturn} & \tcode{200809L} \\ -\tcode{unlikely} & \tcode{201803L} \\ -\end{floattable} - -\pnum -The -\tcode{\#ifdef}, \tcode{\#ifndef}, \tcode{\#elifdef}, and \tcode{\#elifndef} -directives, and -the \tcode{defined} conditional inclusion operator, -shall treat \xname{has_include} and \xname{has_cpp_attribute} -as if they were the names of defined macros. -The identifiers \xname{has_include} and \xname{has_cpp_attribute} -shall not appear in any context not mentioned in this subclause. - -\pnum -Each preprocessing token that remains (in the list of preprocessing tokens that -will become the controlling expression) -after all macro replacements have occurred -shall be in the lexical form of a token\iref{lex.token}. - -\pnum -Preprocessing directives of the forms -\begin{ncsimplebnf}\obeyspaces -\indextext{\idxcode{\#if}}% -\terminal{\# if } constant-expression new-line \opt{group}\br -\indextext{\idxcode{\#elif}}% -\terminal{\# elif } constant-expression new-line \opt{group} -\end{ncsimplebnf} -check whether the controlling constant expression evaluates to nonzero. - -\pnum -Prior to evaluation, -macro invocations in the list of preprocessing tokens -that will become the controlling constant expression -are replaced -(except for those macro names modified by the -\tcode{defined} -unary operator), -just as in normal text. -If the token -\tcode{defined} -is generated as a result of this replacement process -or use of the -\tcode{defined} -unary operator does not match one of the two specified forms -prior to macro replacement, -the behavior is undefined. - -\pnum -After all replacements due to macro expansion and -evaluations of -\grammarterm{defined-macro-expression}s, -\grammarterm{has-include-expression}s, and -\grammarterm{has-attribute-expression}s -have been performed, -all remaining identifiers and keywords, -except for -\tcode{true} -and -\tcode{false}, -are replaced with the \grammarterm{pp-number} -\tcode{0}, -and then each preprocessing token is converted into a token. -\begin{note} -An alternative -token\iref{lex.digraph} is not an identifier, -even when its spelling consists entirely of letters and underscores. -Therefore it is not subject to this replacement. -\end{note} - -\pnum -The resulting tokens comprise the controlling constant expression -which is evaluated according to the rules of~\ref{expr.const} -using arithmetic that has at least the ranges specified -in~\ref{support.limits}. For the purposes of this token conversion and evaluation -all signed and unsigned integer types -act as if they have the same representation as, respectively, -\tcode{intmax_t} or \tcode{uintmax_t}\iref{cstdint.syn}. -\begin{note} -Thus on an -implementation where \tcode{std::numeric_limits::max()} is \tcode{0x7FFF} -and \tcode{std::numeric_limits::max()} is \tcode{0xFFFF}, -the integer literal \tcode{0x8000} is signed and positive within a \tcode{\#if} -expression even though it is unsigned in translation phase -7\iref{lex.phases}. -\end{note} -This includes interpreting \grammarterm{character-literal}s -according to the rules in \ref{lex.ccon}. -\begin{note} -The associated character encodings of literals are the same -in \tcode{\#if} and \tcode{\#elif} directives and in any expression. -\end{note} -Each subexpression with type -\tcode{bool} -is subjected to integral promotion before processing continues. - -\pnum -Preprocessing directives of the forms -\begin{ncsimplebnf}\obeyspaces -\terminal{\# ifdef } identifier new-line \opt{group}\br -\indextext{\idxcode{\#ifdef}}% -\terminal{\# ifndef } identifier new-line \opt{group}\br -\indextext{\idxcode{\#ifndef}}% -\terminal{\# elifdef } identifier new-line \opt{group}\br -\indextext{\idxcode{\#elifdef}}% -\terminal{\# elifndef} identifier new-line \opt{group} -\indextext{\idxcode{\#elifndef}}% -\end{ncsimplebnf} -check whether the identifier is or is not currently defined as a macro name. -Their conditions are equivalent to -\tcode{\#if} \tcode{defined} \grammarterm{identifier}, -\tcode{\#if} \tcode{!defined} \grammarterm{identifier}, -\tcode{\#elif} \tcode{defined} \grammarterm{identifier}, and -\tcode{\#elif} \tcode{!defined} \grammarterm{identifier}, -respectively. - -\pnum -Each directive's condition is checked in order. -If it evaluates to false (zero), -the group that it controls is skipped: -directives are processed only through the name that determines -the directive in order to keep track of the level -of nested conditionals; -the rest of the directives' preprocessing tokens are ignored, -as are the other preprocessing tokens in the group. -Only the first group -whose control condition evaluates to true (nonzero) is processed; -any following groups are skipped and their controlling directives -are processed as if they were in a group that is skipped. -If none of the conditions evaluates to true, -and there is a -\tcode{\#else} -\indextext{\idxcode{\#else}}% -directive, -the group controlled by the -\tcode{\#else} -is processed; lacking a -\tcode{\#else} -directive, all the groups until the -\tcode{\#endif} -\indextext{\idxcode{\#endif}}% -are skipped.% -\begin{footnote} -As indicated by the syntax, -a preprocessing token cannot follow a -\tcode{\#else} -or -\tcode{\#endif} -directive before the terminating new-line character. -However, -comments can appear anywhere in a source file, -including within a preprocessing directive. -\end{footnote} - -\pnum -\begin{example} -This demonstrates a way to include a library \tcode{optional} facility -only if it is available: - -\begin{codeblock} -#if __has_include() -# include -# if __cpp_lib_optional >= 201603 -# define have_optional 1 -# endif -#elif __has_include() -# include -# if __cpp_lib_experimental_optional >= 201411 -# define have_optional 1 -# define experimental_optional 1 -# endif -#endif -#ifndef have_optional -# define have_optional 0 -#endif -\end{codeblock} -\end{example} - -\pnum -\begin{example} -This demonstrates a way to use the attribute \tcode{[[acme::deprecated]]} -only if it is available. -\begin{codeblock} -#if __has_cpp_attribute(acme::deprecated) -# define ATTR_DEPRECATED(msg) [[acme::deprecated(msg)]] -#else -# define ATTR_DEPRECATED(msg) [[deprecated(msg)]] -#endif -ATTR_DEPRECATED("This function is deprecated") void anvil(); -\end{codeblock} -\end{example} - -\rSec1[cpp.include]{Source file inclusion} -\indextext{preprocessing directive!header inclusion} -\indextext{preprocessing directive!source-file inclusion} -\indextext{inclusion!source file|see{preprocessing directive, source-file inclusion}}% -\indextext{\idxcode{\#include}}% - -\pnum -A -\tcode{\#include} -directive shall identify a header or source file -that can be processed by the implementation. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# include <} h-char-sequence \terminal{>} new-line -\end{ncsimplebnf} -searches a sequence of -\impldef{sequence of places searched for a header} -places -for a header identified uniquely by the specified sequence -between the -\tcode{<} -and -\tcode{>} -delimiters, -and causes the replacement of that -directive by the entire contents of the header. -How the places are specified -or the header identified -is \impldef{search locations for \tcode{<>} header}. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# include "} q-char-sequence \terminal{"} new-line -\end{ncsimplebnf} -causes the replacement of that -directive by the entire contents of the -source file identified by the specified sequence between the -\tcode{"} -delimiters. -The named source file is searched for in an -\impldef{manner of search for included source file} -manner. -If this search is not supported, -or if the search fails, -the directive is reprocessed as if it read -\begin{ncsimplebnf} -\terminal{\# include <} h-char-sequence \terminal{>} new-line -\end{ncsimplebnf} -with the identical contained sequence (including -\tcode{>} -characters, if any) from the original directive. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# include} pp-tokens new-line -\end{ncsimplebnf} -(that does not match one of the two previous forms) is permitted. -The preprocessing tokens after -\tcode{include} -in the directive are processed just as in normal text -(i.e., each identifier currently defined as a macro name is replaced by its -replacement list of preprocessing tokens). -If the directive resulting after all replacements does not match -one of the two previous forms, the behavior is -undefined. -\begin{footnote} -Note that adjacent \grammarterm{string-literal}s are not concatenated into -a single \grammarterm{string-literal} -(see the translation phases in~\ref{lex.phases}); -thus, an expansion that results in two \grammarterm{string-literal}s is an -invalid directive. -\end{footnote} -The method by which a sequence of preprocessing tokens between a -\tcode{<} -and a -\tcode{>} -preprocessing token pair or a pair of -\tcode{"} -characters is combined into a single header name -preprocessing token is \impldef{search locations for \tcode{""""} header}. - -\pnum -The implementation shall provide unique mappings for -sequences consisting of one or more -\grammarterm{nondigit}{s} or \grammarterm{digit}{s}\iref{lex.name} -followed by a period -(\tcode{.}) -and a single -\grammarterm{nondigit}. -The first character shall not be a \grammarterm{digit}. -The implementation may ignore distinctions of alphabetical case. - -\pnum -A -\tcode{\#include} -preprocessing directive may appear -in a source file that has been read because of a -\tcode{\#include} -directive in another file, -up to an \impldef{nesting limit for \tcode{\#include} directives} nesting limit. - -\pnum -If the header identified by the \grammarterm{header-name} -denotes an importable header\iref{module.import}, -it is -\impldef{whether source file inclusion of importable header -is replaced with \tcode{import} directive} -whether the \tcode{\#include} preprocessing directive -is instead replaced by an \tcode{import} directive\iref{cpp.import} of the form -\begin{ncbnf} -\terminal{import} header-name \terminal{;} new-line -\end{ncbnf} - -\pnum -\begin{note} -An implementation can provide a mechanism for making arbitrary -source files available to the \tcode{< >} search. -However, using the \tcode{< >} form for headers provided -with the implementation and the \tcode{" "} form for sources -outside the control of the implementation -achieves wider portability. For instance: - -\begin{codeblock} -#include -#include -#include "usefullib.h" -#include "myprog.h" -\end{codeblock} - -\end{note} - -\pnum -\begin{example} -This illustrates macro-replaced -\tcode{\#include} -directives: - -\begin{codeblock} -#if VERSION == 1 - #define INCFILE "vers1.h" -#elif VERSION == 2 - #define INCFILE "vers2.h" // and so on -#else - #define INCFILE "versN.h" -#endif -#include INCFILE -\end{codeblock} -\end{example} - -\rSec1[cpp.module]{Module directive} -\indextext{preprocessing directive!module}% - -\begin{bnf} -\nontermdef{pp-module}\br - \opt{\keyword{export}} \keyword{module} \opt{pp-tokens} \terminal{;} new-line -\end{bnf} - -\pnum -A \grammarterm{pp-module} shall not -appear in a context where \tcode{module} -or (if it is the first token of the \grammarterm{pp-module}) \tcode{export} -is an identifier defined as an object-like macro. - -\pnum -The \grammarterm{pp-tokens}, if any, of a \grammarterm{pp-module} -shall be of the form: -\begin{ncsimplebnf} -pp-module-name \opt{pp-module-partition} \opt{pp-tokens} -\end{ncsimplebnf} -where the \grammarterm{pp-tokens} (if any) shall not begin with -a \tcode{(} preprocessing token and -the grammar non-terminals are defined as: -\begin{ncbnf} -\nontermdef{pp-module-name}\br - \opt{pp-module-name-qualifier} identifier -\end{ncbnf} -\begin{ncbnf} -\nontermdef{pp-module-partition}\br - \terminal{:} \opt{pp-module-name-qualifier} identifier -\end{ncbnf} -\begin{ncbnf} -\nontermdef{pp-module-name-qualifier}\br - identifier \terminal{.}\br - pp-module-name-qualifier identifier \terminal{.} -\end{ncbnf} -No \grammarterm{identifier} in -the \grammarterm{pp-module-name} or \grammarterm{pp-module-partition} -shall currently be defined as an object-like macro. - -\pnum -Any preprocessing tokens after the \tcode{module} preprocessing token -in the \tcode{module} directive are processed just as in normal text. -\begin{note} -Each identifier currently defined as a macro name -is replaced by its replacement list of preprocessing tokens. -\end{note} - -\pnum -The \tcode{module} and \tcode{export} (if it exists) preprocessing tokens -are replaced by the \grammarterm{module-keyword} and -\grammarterm{export-keyword} preprocessing tokens respectively. -\begin{note} -This makes the line no longer a directive -so it is not removed at the end of phase 4. -\end{note} - -\rSec1[cpp.import]{Header unit importation} -\indextext{header unit!preprocessing}% -\indextext{preprocessing directive!import}% -\indextext{macro!import|(}% - -\begin{bnf} -\nontermdef{pp-import}\br - \opt{\keyword{export}} \keyword{import} header-name \opt{pp-tokens} \terminal{;} new-line\br - \opt{\keyword{export}} \keyword{import} header-name-tokens \opt{pp-tokens} \terminal{;} new-line\br - \opt{\keyword{export}} \keyword{import} pp-tokens \terminal{;} new-line -\end{bnf} - -\pnum -A \grammarterm{pp-import} shall not -appear in a context where \tcode{import} -or (if it is the first token of the \grammarterm{pp-import}) \tcode{export} -is an identifier defined as an object-like macro. - -\pnum -The preprocessing tokens after the \tcode{import} preprocessing token -in the \tcode{import} \grammarterm{control-line} -are processed just as in normal text -(i.e., each identifier currently defined as a macro name -is replaced by its replacement list of preprocessing tokens). -\begin{note} -An \tcode{import} directive -matching the first two forms of a \grammarterm{pp-import} -instructs the preprocessor to import macros -from the header unit\iref{module.import} -denoted by the \grammarterm{header-name}, -as described below. -\end{note} -\indextext{point of!macro import|see{macro, point of import}}% -The \defnx{point of macro import}{macro!point of import} for the -first two forms of \grammarterm{pp-import} is -immediately after the \grammarterm{new-line} terminating -the \grammarterm{pp-import}. -The last form of \grammarterm{pp-import} is only considered -if the first two forms did not match, and -does not have a point of macro import. - -\pnum -If a \grammarterm{pp-import} is produced by source file inclusion -(including by the rewrite produced -when a \tcode{\#include} directive names an importable header) -while processing the \grammarterm{group} of a \grammarterm{module-file}, -the program is ill-formed. - -\pnum -In all three forms of \grammarterm{pp-import}, -the \tcode{import} and \tcode{export} (if it exists) preprocessing tokens -are replaced by the \grammarterm{import-keyword} and -\grammarterm{export-keyword} preprocessing tokens respectively. -\begin{note} -This makes the line no longer a directive -so it is not removed at the end of phase 4. -\end{note} -Additionally, in the second form of \grammarterm{pp-import}, -a \grammarterm{header-name} token is formed as if -the \grammarterm{header-name-tokens} -were the \grammarterm{pp-tokens} of a \tcode{\#include} directive. -The \grammarterm{header-name-tokens} are replaced by -the \grammarterm{header-name} token. -\begin{note} -This ensures that imports are treated consistently by -the preprocessor and later phases of translation. -\end{note} - -\pnum -Each \tcode{\#define} directive encountered when preprocessing -each translation unit in a program results in a distinct -\defnx{macro definition}{macro!definition}. -\begin{note} -A predefined macro name\iref{cpp.predefined} -is not introduced by a \tcode{\#define} directive. -Implementations providing mechanisms to predefine additional macros -are encouraged to not treat them -as being introduced by a \tcode{\#define} directive. -\end{note} -Each macro definition has at most one point of definition in -each translation unit and at most one point of undefinition, as follows: -\begin{itemize} -\item -\indextext{point of!macro definition|see{macro, point of definition}}% -The \defnx{point of definition}{macro!point of definition} -of a macro definition within a translation unit $T$ is -\begin{itemize} -\item -if the \tcode{\#define} directive of the macro definition occurs within $T$, -the point at which that directive occurs, or otherwise, -\item -if the macro name is not lexically identical to a keyword\iref{lex.key} -or to the \grammarterm{identifier}{s} \tcode{module} or \tcode{import}, -the first point of macro import in $T$ of a header unit -containing a point of definition for the macro definition, if any. -\end{itemize} -In the latter case, the macro is said -to be \defnx{imported}{macro!import} from the header unit. - -\item -\indextext{point of!macro undefinition|see{macro, point of undefinition}}% -The \defnx{point of undefinition}{macro!point of undefinition} -of a macro definition within a translation unit -is the first point at which a \tcode{\#undef} directive naming the macro occurs -after its point of definition, or the first point -of macro import of a header unit containing a point of undefinition for the -macro definition, whichever (if any) occurs first. -\end{itemize} - -\pnum -\indextext{active macro directive|see{macro, active}}% -A macro directive is \defnx{active}{macro!active} at a source location -if it has a point of definition in that translation unit preceding the location, -and does not have a point of undefinition in that translation unit preceding -the location. - -\pnum -If a macro would be replaced or redefined, and multiple macro definitions -are active for that macro name, the active macro definitions shall all be -valid redefinitions of the same macro\iref{cpp.replace}. -\begin{note} -The relative order of \grammarterm{pp-import}{s} has no bearing on whether a -particular macro definition is active. -\end{note} - -\pnum -\begin{example} -\begin{codeblocktu}{Importable header \tcode{"a.h"}} -#define X 123 // \#1 -#define Y 45 // \#2 -#define Z a // \#3 -#undef X // point of undefinition of \#1 in \tcode{"a.h"} -\end{codeblocktu} - -\begin{codeblocktu}{Importable header \tcode{"b.h"}} -import "a.h"; // point of definition of \#1, \#2, and \#3, point of undefinition of \#1 in \tcode{"b.h"} -#define X 456 // OK, \#1 is not active -#define Y 6 // error: \#2 is active -\end{codeblocktu} - -\begin{codeblocktu}{Importable header \tcode{"c.h"}} -#define Y 45 // \#4 -#define Z c // \#5 -\end{codeblocktu} - -\begin{codeblocktu}{Importable header \tcode{"d.h"}} -import "c.h"; // point of definition of \#4 and \#5 in \tcode{"d.h"} -\end{codeblocktu} - -\begin{codeblocktu}{Importable header \tcode{"e.h"}} -import "a.h"; // point of definition of \#1, \#2, and \#3, point of undefinition of \#1 in \tcode{"e.h"} -import "d.h"; // point of definition of \#4 and \#5 in \tcode{"e.h"} -int a = Y; // OK, active macro definitions \#2 and \#4 are valid redefinitions -int c = Z; // error: active macro definitions \#3 and \#5 are not valid redefinitions of \tcode{Z} -\end{codeblocktu} - -\begin{codeblocktu}{Module unit \tcode{f}} -export module f; -export import "a.h"; - -int a = Y; // OK -\end{codeblocktu} - -\begin{codeblocktu}{Translation unit \tcode{\#1}} -import f; -int x = Y; // error: \tcode{Y} is neither a defined macro nor a declared name -\end{codeblocktu} -\end{example} -\indextext{macro!import|)} - -\rSec1[cpp.replace]{Macro replacement}% - -\rSec2[cpp.replace.general]{General}% -\indextext{macro!replacement|(}% -\indextext{replacement!macro|see{macro, replacement}}% -\indextext{preprocessing directive!macro replacement|see{macro, replacement}} - -\pnum -\indextext{macro!replacement list}% -Two replacement lists are identical if and only if -the preprocessing tokens in both have -the same number, ordering, spelling, and whitespace separation, -where all whitespace separations are considered identical. - -\pnum -An identifier currently defined as an -\indextext{macro!object-like}% -object-like macro (see below) may be redefined by another -\tcode{\#define} -preprocessing directive provided that the second definition is an -object-like macro definition and the two replacement lists -are identical, otherwise the program is ill-formed. -Likewise, an identifier currently defined as a -\indextext{macro!function-like}% -function-like macro (see below) may be redefined by another -\tcode{\#define} -preprocessing directive provided that the second definition is a -function-like macro definition that has the same number and spelling -of parameters, -and the two replacement lists are identical, -otherwise the program is ill-formed. - -\pnum -\begin{example} -The following sequence is valid: -\begin{codeblock} -#define OBJ_LIKE (1-1) -#define OBJ_LIKE @\tcode{/* whitespace */ (1-1) /* other */}@ -#define FUNC_LIKE(a) ( a ) -#define FUNC_LIKE( a )( @\tcode{/* note the whitespace */ \textbackslash}@ - a @\tcode{/* other stuff on this line}@ - @\tcode{*/}@ ) -\end{codeblock} -But the following redefinitions are invalid: -\begin{codeblock} -#define OBJ_LIKE (0) // different token sequence -#define OBJ_LIKE (1 - 1) // different whitespace -#define FUNC_LIKE(b) ( a ) // different parameter usage -#define FUNC_LIKE(b) ( b ) // different parameter spelling -\end{codeblock} -\end{example} - -\pnum -\indextext{macro!replacement list}% -There shall be whitespace between the identifier and the replacement list -in the definition of an object-like macro. - -\pnum -If the \grammarterm{identifier-list} in the macro definition does not end with -an ellipsis, the number of arguments (including those arguments consisting -of no preprocessing tokens) -in an invocation of a function-like macro shall -equal the number of parameters in the macro definition. -Otherwise, there shall be at least as many arguments in the invocation as there are -parameters in the macro definition (excluding the \tcode{...}). There -shall exist a -\tcode{)} -preprocessing token that terminates the invocation. - -\pnum -\indextext{__va_args__@\mname{VA_ARGS}}% -\indextext{__va_opt__@\mname{VA_OPT}}% -The identifiers \mname{VA_ARGS} and \mname{VA_OPT} -shall occur only in the \grammarterm{replacement-list} -of a function-like macro that uses the ellipsis notation in the parameters. - -\pnum -A parameter identifier in a function-like macro -shall be uniquely declared within its scope. - -\pnum -The identifier immediately following the -\tcode{define} -is called the -\indextext{name!macro|see{macro, name}}% -\defnx{macro name}{macro!name}. -There is one name space for macro names. -Any whitespace characters preceding or following the -replacement list of preprocessing tokens are not considered -part of the replacement list for either form of macro. - -\pnum -If a -\indextext{\#\#0 operator@\tcode{\#} operator} -\tcode{\#} -preprocessing token, -followed by an identifier, -occurs lexically -at the point at which a preprocessing directive can begin, -the identifier is not subject to macro replacement. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# define} identifier replacement-list new-line -\indextext{\idxcode{\#define}}% -\end{ncsimplebnf} -defines an -\defnadj{object-like}{macro} that -causes each subsequent instance of the macro name -\begin{footnote} -Since, by macro-replacement time, -all \grammarterm{character-literal}s and \grammarterm{string-literal}s are preprocessing tokens, -not sequences possibly containing identifier-like subsequences -(see \ref{lex.phases}, translation phases), -they are never scanned for macro names or parameters. -\end{footnote} -to be replaced by the replacement list of preprocessing tokens -that constitute the remainder of the directive. -\begin{footnote} -An alternative token\iref{lex.digraph} is not an identifier, -even when its spelling consists entirely of letters and underscores. -Therefore it is not possible to define a macro -whose name is the same as that of an alternative token. -\end{footnote} -The replacement list is then rescanned for more macro names as -specified below. - -\pnum -\begin{example} -The simplest use of this facility is to define a ``manifest constant'', -as in -\begin{codeblock} -#define TABSIZE 100 -int table[TABSIZE]; -\end{codeblock} -\end{example} - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# define} identifier lparen \opt{identifier-list} \terminal{)} replacement-list new-line\br -\terminal{\# define} identifier lparen \terminal{...} \terminal{)} replacement-list new-line\br -\terminal{\# define} identifier lparen identifier-list \terminal{, ...} \terminal{)} replacement-list new-line -\end{ncsimplebnf} -defines a \defnadj{function-like}{macro} -with parameters, whose use is -similar syntactically to a function call. -The parameters -\indextext{parameter!macro}% -are specified by the optional list of identifiers. -Each subsequent instance of the function-like macro name followed by a -\tcode{(} -as the next preprocessing token -introduces the sequence of preprocessing tokens that is replaced -by the replacement list in the definition -(an invocation of the macro). -\indextext{invocation!macro}% -The replaced sequence of preprocessing tokens is terminated by the matching -\tcode{)} -preprocessing token, skipping intervening matched pairs of left and -right parenthesis preprocessing tokens. -Within the sequence of preprocessing tokens making up an invocation -of a function-like macro, -new-line is considered a normal whitespace character. - -\pnum -\indextext{macro!function-like!arguments}% -The sequence of preprocessing tokens -bounded by the outside-most matching parentheses -forms the list of arguments for the function-like macro. -The individual arguments within the list -are separated by comma preprocessing tokens, -but comma preprocessing tokens between matching -inner parentheses do not separate arguments. -If there are sequences of preprocessing tokens within the list of -arguments that would otherwise act as preprocessing directives, -\begin{footnote} -A \grammarterm{conditionally-supported-directive} is a preprocessing directive regardless of whether the implementation supports it. -\end{footnote} -the behavior is undefined. - -\pnum -\begin{example} -The following defines a function-like -macro whose value is the maximum of its arguments. -It has the disadvantages of evaluating one or the other of its arguments -a second time -(including -\indextext{side effects}% -side effects) -and generating more code than a function if invoked several times. -It also cannot have its address taken, -as it has none. - -\begin{codeblock} -#define max(a, b) ((a) > (b) ? (a) : (b)) -\end{codeblock} - -The parentheses ensure that the arguments and -the resulting expression are bound properly. -\end{example} - -\pnum -\indextext{macro!function-like!arguments}% -If there is a \tcode{...} immediately preceding the \tcode{)} in the -function-like macro -definition, then the trailing arguments (if any), including any separating comma preprocessing -tokens, are merged to form a single item: the \defn{variable arguments}. The number of -arguments so combined is such that, following merger, the number of arguments is -either equal to or -one more than the number of parameters in the macro definition (excluding the -\tcode{...}). - -\rSec2[cpp.subst]{Argument substitution}% -\indextext{macro!argument substitution}% -\indextext{argument substitution|see{macro, argument substitution}}% - -\indextext{__va_opt__@\mname{VA_OPT}}% -\begin{bnf} -\nontermdef{va-opt-replacement}\br - \terminal{\mname{VA_OPT} (} \opt{pp-tokens} \terminal{)} -\end{bnf} - -\pnum -After the arguments for the invocation of a function-like macro have -been identified, argument substitution takes place. -For each parameter in the replacement list that is neither -preceded by a \tcode{\#} or \tcode{\#\#} preprocessing token nor -followed by a \tcode{\#\#} preprocessing token, the preprocessing tokens -naming the parameter are replaced by a token sequence determined as follows: -\begin{itemize} -\item - If the parameter is of the form \grammarterm{va-opt-replacement}, - the replacement preprocessing tokens are the - preprocessing token sequence for the corresponding argument, - as specified below. -\item - Otherwise, the replacement preprocessing tokens are the - preprocessing tokens of corresponding argument after all - macros contained therein have been expanded. The argument's - preprocessing tokens are completely macro replaced before - being substituted as if they formed the rest of the preprocessing - file with no other preprocessing tokens being available. -\end{itemize} -\begin{example} -\begin{codeblock} -#define LPAREN() ( -#define G(Q) 42 -#define F(R, X, ...) __VA_OPT__(G R X) ) -int x = F(LPAREN(), 0, <:-); // replaced by \tcode{int x = 42;} -\end{codeblock} -\end{example} - -\pnum -\indextext{__va_args__@\mname{VA_ARGS}}% -An identifier \mname{VA_ARGS} that occurs in the replacement list -shall be treated as if it were a parameter, and the variable arguments shall form -the preprocessing tokens used to replace it. - -\pnum -\begin{example} -\begin{codeblock} -#define debug(...) fprintf(stderr, @\mname{VA_ARGS}@) -#define showlist(...) puts(#@\mname{VA_ARGS}@) -#define report(test, ...) ((test) ? puts(#test) : printf(@\mname{VA_ARGS}@)) -debug("Flag"); -debug("X = %d\n", x); -showlist(The first, second, and third items.); -report(x>y, "x is %d but y is %d", x, y); -\end{codeblock} -results in -\begin{codeblock} -fprintf(stderr, "Flag"); -fprintf(stderr, "X = %d\n", x); -puts("The first, second, and third items."); -((x>y) ? puts("x>y") : printf("x is %d but y is %d", x, y)); -\end{codeblock} -\end{example} - -\pnum -\indextext{__va_opt__@\mname{VA_OPT}}% -The identifier \mname{VA_OPT} -shall always occur as part of the preprocessing token sequence -\grammarterm{va-opt-replacement}; -its closing \tcode{)} is determined by skipping -intervening pairs of matching left and right parentheses -in its \grammarterm{pp-tokens}. -The \grammarterm{pp-tokens} of a \grammarterm{va-opt-replacement} -shall not contain \mname{VA_OPT}. -If the \grammarterm{pp-tokens} would be ill-formed -as the replacement list of the current function-like macro, -the program is ill-formed. -A \grammarterm{va-opt-replacement} is treated as if it were a parameter, -and the preprocessing token sequence for the corresponding -argument is defined as follows. -If the substitution of \mname{VA_ARGS} as neither an operand -of \tcode{\#} nor \tcode{\#\#} consists of no preprocessing tokens, -the argument consists of -a single placemarker preprocessing token\iref{cpp.concat,cpp.rescan}. -Otherwise, the argument consists of -the results of the expansion of the contained \grammarterm{pp-tokens} -as the replacement list of the current function-like macro -before removal of placemarker tokens, rescanning, and further replacement. -\begin{note} -The placemarker tokens are removed before stringization\iref{cpp.stringize}, -and can be removed by rescanning and further replacement\iref{cpp.rescan}. -\end{note} -\begin{example} -\begin{codeblock} -#define F(...) f(0 __VA_OPT__(,) __VA_ARGS__) -#define G(X, ...) f(0, X __VA_OPT__(,) __VA_ARGS__) -#define SDEF(sname, ...) S sname __VA_OPT__(= { __VA_ARGS__ }) -#define EMP - -F(a, b, c) // replaced by \tcode{f(0, a, b, c)} -F() // replaced by \tcode{f(0)} -F(EMP) // replaced by \tcode{f(0)} - -G(a, b, c) // replaced by \tcode{f(0, a, b, c)} -G(a, ) // replaced by \tcode{f(0, a)} -G(a) // replaced by \tcode{f(0, a)} - -SDEF(foo); // replaced by \tcode{S foo;} -SDEF(bar, 1, 2); // replaced by \tcode{S bar = \{ 1, 2 \};} - -#define H1(X, ...) X __VA_OPT__(##) __VA_ARGS__ // error: \tcode{\#\#} may not appear at - // the beginning of a replacement list\iref{cpp.concat} - -#define H2(X, Y, ...) __VA_OPT__(X ## Y,) __VA_ARGS__ -H2(a, b, c, d) // replaced by \tcode{ab, c, d} - -#define H3(X, ...) #__VA_OPT__(X##X X##X) -H3(, 0) // replaced by \tcode{""} - -#define H4(X, ...) __VA_OPT__(a X ## X) ## b -H4(, 1) // replaced by \tcode{a b} - -#define H5A(...) __VA_OPT__()@\tcode{/**/}@__VA_OPT__() -#define H5B(X) a ## X ## b -#define H5C(X) H5B(X) -H5C(H5A()) // replaced by \tcode{ab} -\end{codeblock} -\end{example} - -\rSec2[cpp.stringize]{The \tcode{\#} operator}% -\indextext{\#\#0 operator@\tcode{\#} operator}% -\indextext{stringize|see{\tcode{\#} operator}} - -\pnum -Each -\tcode{\#} -preprocessing token in the replacement list for a function-like -macro shall be followed by a parameter as the next preprocessing -token in the replacement list. - -\pnum -A \defn{character string literal} is a \grammarterm{string-literal} with no prefix. -If, in the replacement list, a parameter is immediately -preceded by a -\tcode{\#} -preprocessing token, -both are replaced by a single character string literal preprocessing token that -contains the spelling of the preprocessing token sequence for the -corresponding argument (excluding placemarker tokens). -Let the \defn{stringizing argument} be the preprocessing token sequence -for the corresponding argument with placemarker tokens removed. -Each occurrence of whitespace between the stringizing argument's preprocessing -tokens becomes a single space character in the character string literal. -Whitespace before the first preprocessing token and after the last -preprocessing token comprising the stringizing argument is deleted. -Otherwise, the original spelling of each preprocessing token in the -stringizing argument is retained in the character string literal, -except for special handling for producing the spelling of -\grammarterm{string-literal}s and \grammarterm{character-literal}s: -a -\tcode{\textbackslash} -character is inserted before each -\tcode{"} -and -\tcode{\textbackslash} -character of a \grammarterm{character-literal} or \grammarterm{string-literal} -(including the delimiting -\tcode{"} -characters). -If the replacement that results is not a valid character string literal, -the behavior is undefined. The character string literal corresponding to -an empty stringizing argument is \tcode{""}. -The order of evaluation of -\tcode{\#} -and -\tcode{\#\#} -operators is unspecified. - -\rSec2[cpp.concat]{The \tcode{\#\#} operator}% -\indextext{\#\#1 operator@\tcode{\#\#} operator}% -\indextext{concatenation!macro argument|see{\tcode{\#\#} operator}} - -\pnum -A -\tcode{\#\#} -preprocessing token shall not occur at the beginning or -at the end of a replacement list for either form -of macro definition. - -\pnum -If, in the replacement list of a function-like macro, a parameter is -immediately preceded or followed by a -\tcode{\#\#} -preprocessing token, the parameter is replaced by the -corresponding argument's preprocessing token sequence; however, if an argument consists of no preprocessing tokens, the parameter is -replaced by a placemarker preprocessing token instead. -\begin{footnote} -Placemarker preprocessing tokens do not appear in the syntax -because they are temporary entities that exist only within translation phase 4. -\end{footnote} - -\pnum -For both object-like and function-like macro invocations, before the -replacement list is reexamined for more macro names to replace, -each instance of a -\tcode{\#\#} -preprocessing token in the replacement list -(not from an argument) is deleted and the -preceding preprocessing token is concatenated -with the following preprocessing token. -Placemarker preprocessing tokens are handled specially: concatenation -of two placemarkers results in a single placemarker preprocessing token, and -concatenation of a placemarker with a non-placemarker preprocessing token results -in the non-placemarker preprocessing token. -\begin{note} -Concatenation can form -a \grammarterm{universal-character-name}\iref{lex.charset}. -\end{note} -If the result is not a valid preprocessing token, -the behavior is undefined. -The resulting token is available for further macro replacement. -The order of evaluation of -\tcode{\#\#} -operators is unspecified. - -\pnum -\begin{example} -The sequence -\begin{codeblock} -#define str(s) # s -#define xstr(s) str(s) -#define debug(s, t) printf("x" # s "= %d, x" # t "= %s", @\textbackslash@ - x ## s, x ## t) -#define INCFILE(n) vers ## n -#define glue(a, b) a ## b -#define xglue(a, b) glue(a, b) -#define HIGHLOW "hello" -#define LOW LOW ", world" - -debug(1, 2); -fputs(str(strncmp("abc@\textbackslash@0d", "abc", '@\textbackslash@4') // this goes away - == 0) str(: @\atsign\textbackslash@n), s); -#include xstr(INCFILE(2).h) -glue(HIGH, LOW); -xglue(HIGH, LOW) -\end{codeblock} -results in -\begin{codeblock} -printf("x" "1" "= %d, x" "2" "= %s", x1, x2); -fputs("strncmp(@\textbackslash@"abc@\textbackslash\textbackslash@0d@\textbackslash@", @\textbackslash@"abc@\textbackslash@", '@\textbackslash\textbackslash@4') == 0" ": @\atsign\textbackslash@n", s); -#include "vers2.h" @\textrm{(\textit{after macro replacement, before file access})}@ -"hello"; -"hello" ", world" -\end{codeblock} -or, after concatenation of the character string literals, -\begin{codeblock} -printf("x1= %d, x2= %s", x1, x2); -fputs("strncmp(@\textbackslash@"abc@\textbackslash\textbackslash@0d@\textbackslash@", @\textbackslash@"abc@\textbackslash@", '@\textbackslash\textbackslash@4') == 0: @\atsign\textbackslash@n", s); -#include "vers2.h" @\textrm{(\textit{after macro replacement, before file access})}@ -"hello"; -"hello, world" -\end{codeblock} - -Space around the \tcode{\#} and \tcode{\#\#} tokens in the macro definition -is optional. -\end{example} - -\pnum -\begin{example} -In the following fragment: - -\begin{codeblock} -#define hash_hash # ## # -#define mkstr(a) # a -#define in_between(a) mkstr(a) -#define join(c, d) in_between(c hash_hash d) -char p[] = join(x, y); // equivalent to \tcode{char p[] = "x \#\# y";} -\end{codeblock} - -The expansion produces, at various stages: - -\begin{codeblock} -join(x, y) -in_between(x hash_hash y) -in_between(x ## y) -mkstr(x ## y) -"x ## y" -\end{codeblock} - -In other words, expanding \tcode{hash_hash} produces a new token, -consisting of two adjacent sharp signs, but this new token is not the -\tcode{\#\#} operator. -\end{example} - -\pnum -\begin{example} -To illustrate the rules for placemarker preprocessing tokens, the sequence -\begin{codeblock} -#define t(x,y,z) x ## y ## z -int j[] = { t(1,2,3), t(,4,5), t(6,,7), t(8,9,), - t(10,,), t(,11,), t(,,12), t(,,) }; -\end{codeblock} -results in -\begin{codeblock} -int j[] = { 123, 45, 67, 89, - 10, 11, 12, }; -\end{codeblock} -\end{example} - -\rSec2[cpp.rescan]{Rescanning and further replacement}% -\indextext{macro!rescanning and replacement}% -\indextext{rescanning and replacement|see{macro, rescanning and replacement}} - -\pnum -After all parameters in the replacement list have been substituted and \tcode{\#} and \tcode{\#\#} processing has taken -place, all placemarker preprocessing tokens are removed. Then -the resulting preprocessing token sequence is rescanned, along with all -subsequent preprocessing tokens of the source file, for more macro names -to replace. - -\pnum -\begin{example} -The sequence -\begin{codeblock} -#define x 3 -#define f(a) f(x * (a)) -#undef x -#define x 2 -#define g f -#define z z[0] -#define h g(~ -#define m(a) a(w) -#define w 0,1 -#define t(a) a -#define p() int -#define q(x) x -#define r(x,y) x ## y -#define str(x) # x - -f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); -g(x+(3,4)-w) | h 5) & m - (f)^m(m); -p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) }; -char c[2][6] = { str(hello), str() }; -\end{codeblock} -results in -\begin{codeblock} -f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1); -f(2 * (2+(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1); -int i[] = { 1, 23, 4, 5, }; -char c[2][6] = { "hello", "" }; -\end{codeblock} -\end{example} - -\pnum -If the name of the macro being replaced is found during this scan of -the replacement list -(not including the rest of the source file's preprocessing tokens), -it is not replaced. -Furthermore, -if any nested replacements encounter the name of the macro being replaced, -it is not replaced. -These nonreplaced macro name preprocessing tokens are no longer available -for further replacement even if they are later (re)examined in contexts -in which that macro name preprocessing token would otherwise have been -replaced. - -\pnum -The resulting completely macro-replaced preprocessing token sequence -is not processed as a preprocessing directive even if it resembles one, -but all pragma unary operator expressions within it are then processed as -specified in~\ref{cpp.pragma.op} below. - -\rSec2[cpp.scope]{Scope of macro definitions}% -\indextext{macro!scope of definition}% -\indextext{scope!macro definition|see{macro, scope of definition}} - -\pnum -A macro definition lasts -(independent of block structure) -until a corresponding -\tcode{\#undef} -directive is encountered or -(if none is encountered) -until the end of the translation unit. -Macro definitions have no significance after translation phase 4. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# undef} identifier new-line -\indextext{\idxcode{\#undef}}% -\end{ncsimplebnf} -causes the specified identifier no longer to be defined as a macro name. -It is ignored if the specified identifier is not currently defined as -a macro name. - -\indextext{macro!replacement|)} - -\rSec1[cpp.line]{Line control}% -\indextext{preprocessing directive!line control}% -\indextext{\idxcode{\#line}|see{preprocessing directive, line control}} - -\pnum -The \grammarterm{string-literal} of a -\tcode{\#line} -directive, if present, -shall be a character string literal. - -\pnum -The -\defn{line number} -of the current source line is one greater than -the number of new-line characters read or introduced -in translation phase 1\iref{lex.phases} -while processing the source file to the current token. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# line} digit-sequence new-line -\end{ncsimplebnf} -causes the implementation to behave as if -the following sequence of source lines begins with a -source line that has a line number as specified -by the digit sequence (interpreted as a decimal integer). -If the digit sequence specifies zero -or a number greater than 2147483647, -the behavior is undefined. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# line} digit-sequence \terminal{"} \opt{s-char-sequence} \terminal{"} new-line -\end{ncsimplebnf} -sets the presumed line number similarly and changes the -presumed name of the source file to be the contents -of the character string literal. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# line} pp-tokens new-line -\end{ncsimplebnf} -(that does not match one of the two previous forms) -is permitted. -The preprocessing tokens after -\tcode{line} -on the directive are processed just as in normal text -(each identifier currently defined as a macro name is replaced by its -replacement list of preprocessing tokens). -If the directive resulting after all replacements does not match -one of the two previous forms, the behavior is undefined; -otherwise, the result is processed as appropriate. - -\rSec1[cpp.error]{Diagnostic directives}% -\indextext{preprocessing directive!error}% -\indextext{preprocessing directive!diagnostic}% -\indextext{preprocessing directive!warning}% -\indextext{\idxcode{\#error}|see{preprocessing directive, error}} - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# error} \opt{pp-tokens} new-line -\end{ncsimplebnf} -renders the program ill-formed. -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# warning} \opt{pp-tokens} new-line -\end{ncsimplebnf} -requires the implementation to produce at least one diagnostic message -for the preprocessing translation unit\iref{intro.compliance.general}. -\recommended -Any diagnostic message caused by either of these directives -should include the specified sequence of preprocessing tokens. - -\rSec1[cpp.pragma]{Pragma directive}% -\indextext{preprocessing directive!pragma}% -\indextext{\idxcode{\#pragma}|see{preprocessing directive, pragma}} - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# pragma} \opt{pp-tokens} new-line -\end{ncsimplebnf} -causes the implementation to behave -in an \impldef{\tcode{\#pragma}} manner. -The behavior may cause translation to fail or cause the translator or -the resulting program to behave in a non-conforming manner. -Any pragma that is not recognized by the implementation is ignored. - -\rSec1[cpp.null]{Null directive}% -\indextext{preprocessing directive!null} - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\#} new-line -\end{ncsimplebnf} -has no effect. - -\rSec1[cpp.predefined]{Predefined macro names} -\indextext{macro!predefined}% -\indextext{name!predefined macro|see{macro, predefined}} - -\pnum -The following macro names shall be defined by the implementation: - -\begin{description} - -\item -\indextext{\idxxname{cplusplus}}% -\xname{cplusplus}\\ -The integer literal \tcode{\cppver}. -\begin{note} -Future revisions of this document will -replace the value of this macro with a greater value. -\end{note} - -\item The names listed in \tref{cpp.predefined.ft}.\\ -The macros defined in \tref{cpp.predefined.ft} shall be defined to -the corresponding integer literal. -\begin{note} -Future revisions of this document might replace -the values of these macros with greater values. -\end{note} - -\item -\indextext{__date__@\mname{DATE}}% -\mname{DATE}\\ -The date of translation of the source file: -a character string literal of the form -\tcode{"Mmm~dd~yyyy"}, -where the names of the months are the same as those generated -by the -\tcode{asctime} -function, -and the first character of -\tcode{dd} -is a space character if the value is less than 10. -If the date of translation is not available, -an \impldef{text of \mname{DATE} when date of translation is not available} valid date -shall be supplied. - -\item -\indextext{__file__@\mname{FILE}}% -\mname{FILE}\\ -The presumed name of the current source file (a character string -literal). -\begin{footnote} -The presumed source file name can be changed by the \tcode{\#line} directive. -\end{footnote} - -\item -\indextext{__line__@\mname{LINE}}% -\mname{LINE}\\ -The presumed line number (within the current source file) of the current source line -(an integer literal). -\begin{footnote} -The presumed line number can be changed by the \tcode{\#line} directive. -\end{footnote} - -\item -\indextext{__stdc_hosted__@\mname{STDC_HOSTED}}% -\indextext{implementation!hosted}% -\indextext{implementation!freestanding}% -\mname{STDC_HOSTED}\\ -The integer literal \tcode{1} -if the implementation is a hosted implementation or -the integer literal \tcode{0} -if it is a freestanding implementation\iref{intro.compliance}. - -\item -\indextext{__stdcpp_default_new_alignment__@\mname{STDCPP_DEFAULT_NEW_ALIGNMENT}}% -\mname{STDCPP_DEFAULT_NEW_ALIGNMENT}\\ -An integer literal of type \tcode{std::size_t} -whose value is the alignment guaranteed -by a call to \tcode{operator new(std::size_t)} -or \tcode{operator new[](std::size_t)}. -\begin{note} -Larger alignments will be passed to -\tcode{operator new(std::size_t, std::align_val_t)}, etc.\iref{expr.new}. -\end{note} - -\item -\indextext{__stdcpp_float16_t__@\mname{STDCPP_FLOAT16_T}}% -\mname{STDCPP_FLOAT16_T}\\ -Defined as the integer literal \tcode{1} -if and only if the implementation supports -the ISO/IEC/IEEE 60559 floating-point interchange format binary16 -as an extended floating-point type\iref{basic.extended.fp}. - -\item -\indextext{__stdcpp_float32_t__@\mname{STDCPP_FLOAT32_T}}% -\mname{STDCPP_FLOAT32_T}\\ -Defined as the integer literal \tcode{1} -if and only if the implementation supports -the ISO/IEC/IEEE 60559 floating-point interchange format binary32 -as an extended floating-point type. - -\item -\indextext{__stdcpp_float64_t__@\mname{STDCPP_FLOAT64_T}}% -\mname{STDCPP_FLOAT64_T}\\ -Defined as the integer literal \tcode{1} -if and only if the implementation supports -the ISO/IEC/IEEE 60559 floating-point interchange format binary64 -as an extended floating-point type. - -\item -\indextext{__stdcpp_float128_t__@\mname{STDCPP_FLOAT128_T}}% -\mname{STDCPP_FLOAT128_T}\\ -Defined as the integer literal \tcode{1} -if and only if the implementation supports -the ISO/IEC/IEEE 60559 floating-point interchange format binary128 -as an extended floating-point type. - -\item -\indextext{__stdcpp_bfloat16_t__@\mname{STDCPP_BFLOAT16_T}}% -\mname{STDCPP_BFLOAT16_T}\\ -Defined as the integer literal \tcode{1} -if and only if the implementation supports an extended floating-point type -with the properties of the \grammarterm{typedef-name} \tcode{std::bfloat16_t} -as described in \ref{basic.extended.fp}. - -\item -\indextext{__time__@\mname{TIME}}% -\mname{TIME}\\ -The time of translation of the source file: -a character string literal of the form -\tcode{"hh:mm:ss"} -as in the time generated by the -\tcode{asctime} -function. -If the time of translation is not available, -an \impldef{text of \mname{TIME} when time of translation is not available} valid time shall be supplied. -\end{description} - -\indextext{macro!feature-test}% -\indextext{feature-test macro|see{macro, feature-test}}% -\begin{LongTable}{Feature-test macros}{cpp.predefined.ft}{ll} -\\ \topline -\lhdr{Macro name} & \rhdr{Value} \\ \capsep -\endfirsthead -\continuedcaption \\ -\hline -\lhdr{Name} & \rhdr{Value} \\ \capsep -\endhead -\defnxname{cpp_aggregate_bases} & \tcode{201603L} \\ \rowsep -\defnxname{cpp_aggregate_nsdmi} & \tcode{201304L} \\ \rowsep -\defnxname{cpp_aggregate_paren_init} & \tcode{201902L} \\ \rowsep -\defnxname{cpp_alias_templates} & \tcode{200704L} \\ \rowsep -\defnxname{cpp_aligned_new} & \tcode{201606L} \\ \rowsep -\defnxname{cpp_attributes} & \tcode{200809L} \\ \rowsep -\defnxname{cpp_auto_cast} & \tcode{202110L} \\ \rowsep -\defnxname{cpp_binary_literals} & \tcode{201304L} \\ \rowsep -\defnxname{cpp_capture_star_this} & \tcode{201603L} \\ \rowsep -\defnxname{cpp_char8_t} & \tcode{202207L} \\ \rowsep -\defnxname{cpp_concepts} & \tcode{202002L} \\ \rowsep -\defnxname{cpp_conditional_explicit} & \tcode{201806L} \\ \rowsep -\defnxname{cpp_constexpr} & \tcode{202306L} \\ \rowsep -\defnxname{cpp_constexpr_dynamic_alloc} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_constexpr_in_decltype} & \tcode{201711L} \\ \rowsep -\defnxname{cpp_consteval} & \tcode{202211L} \\ \rowsep -\defnxname{cpp_constinit} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_decltype} & \tcode{200707L} \\ \rowsep -\defnxname{cpp_decltype_auto} & \tcode{201304L} \\ \rowsep -\defnxname{cpp_deduction_guides} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_delegating_constructors} & \tcode{200604L} \\ \rowsep -\defnxname{cpp_deleted_function} & \tcode{202403L} \\ \rowsep -\defnxname{cpp_designated_initializers} & \tcode{201707L} \\ \rowsep -\defnxname{cpp_enumerator_attributes} & \tcode{201411L} \\ \rowsep -\defnxname{cpp_explicit_this_parameter} & \tcode{202110L} \\ \rowsep -\defnxname{cpp_fold_expressions} & \tcode{201603L} \\ \rowsep -\defnxname{cpp_generic_lambdas} & \tcode{201707L} \\ \rowsep -\defnxname{cpp_guaranteed_copy_elision} & \tcode{201606L} \\ \rowsep -\defnxname{cpp_hex_float} & \tcode{201603L} \\ \rowsep -\defnxname{cpp_if_consteval} & \tcode{202106L} \\ \rowsep -\defnxname{cpp_if_constexpr} & \tcode{201606L} \\ \rowsep -\defnxname{cpp_impl_coroutine} & \tcode{201902L} \\ \rowsep -\defnxname{cpp_impl_destroying_delete} & \tcode{201806L} \\ \rowsep -\defnxname{cpp_impl_three_way_comparison} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_implicit_move} & \tcode{202207L} \\ \rowsep -\defnxname{cpp_inheriting_constructors} & \tcode{201511L} \\ \rowsep -\defnxname{cpp_init_captures} & \tcode{201803L} \\ \rowsep -\defnxname{cpp_initializer_lists} & \tcode{200806L} \\ \rowsep -\defnxname{cpp_inline_variables} & \tcode{201606L} \\ \rowsep -\defnxname{cpp_lambdas} & \tcode{200907L} \\ \rowsep -\defnxname{cpp_modules} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_multidimensional_subscript} & \tcode{202211L} \\ \rowsep -\defnxname{cpp_named_character_escapes} & \tcode{202207L} \\ \rowsep -\defnxname{cpp_namespace_attributes} & \tcode{201411L} \\ \rowsep -\defnxname{cpp_noexcept_function_type} & \tcode{201510L} \\ \rowsep -\defnxname{cpp_nontype_template_args} & \tcode{201911L} \\ \rowsep -\defnxname{cpp_nontype_template_parameter_auto} & \tcode{201606L} \\ \rowsep -\defnxname{cpp_nsdmi} & \tcode{200809L} \\ \rowsep -\defnxname{cpp_pack_indexing} & \tcode{202311L} \\ \rowsep -\defnxname{cpp_placeholder_variables} & \tcode{202306L} \\ \rowsep -\defnxname{cpp_range_based_for} & \tcode{202211L} \\ \rowsep -\defnxname{cpp_raw_strings} & \tcode{200710L} \\ \rowsep -\defnxname{cpp_ref_qualifiers} & \tcode{200710L} \\ \rowsep -\defnxname{cpp_return_type_deduction} & \tcode{201304L} \\ \rowsep -\defnxname{cpp_rvalue_references} & \tcode{200610L} \\ \rowsep -\defnxname{cpp_size_t_suffix} & \tcode{202011L} \\ \rowsep -\defnxname{cpp_sized_deallocation} & \tcode{201309L} \\ \rowsep -\defnxname{cpp_static_assert} & \tcode{202306L} \\ \rowsep -\defnxname{cpp_static_call_operator} & \tcode{202207L} \\ \rowsep -\defnxname{cpp_structured_bindings} & \tcode{202403L} \\ \rowsep -\defnxname{cpp_template_template_args} & \tcode{201611L} \\ \rowsep -\defnxname{cpp_threadsafe_static_init} & \tcode{200806L} \\ \rowsep -\defnxname{cpp_unicode_characters} & \tcode{200704L} \\ \rowsep -\defnxname{cpp_unicode_literals} & \tcode{200710L} \\ \rowsep -\defnxname{cpp_user_defined_literals} & \tcode{200809L} \\ \rowsep -\defnxname{cpp_using_enum} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_variable_templates} & \tcode{201304L} \\ \rowsep -\defnxname{cpp_variadic_friend} & \tcode{202403L} \\ \rowsep -\defnxname{cpp_variadic_templates} & \tcode{200704L} \\ \rowsep -\defnxname{cpp_variadic_using} & \tcode{201611L} \\ -\end{LongTable} - -\pnum -The following macro names are conditionally defined by the implementation: - -\begin{description} -\item -\indextext{__stdc__@\mname{STDC}}% -\mname{STDC}\\ -Whether \mname{STDC} is predefined and if so, what its value is, -are \impldef{definition and meaning of \mname{STDC}}. - -\item -\indextext{__stdc_mb_might_neq_wc__@\mname{STDC_MB_MIGHT_NEQ_WC}}% -\mname{STDC_MB_MIGHT_NEQ_WC}\\ -The integer literal \tcode{1}, intended to indicate that, in the encoding for -\keyword{wchar_t}, a member of the basic character set need not have a code value equal to -its value when used as the lone character in an ordinary character literal. - -\item -\indextext{__stdc_version__@\mname{STDC_VERSION}}% -\mname{STDC_VERSION}\\ -Whether \mname{STDC_VERSION} is predefined and if so, what its value is, -are \impldef{definition and meaning of \mname{STDC_VERSION}}. - -\item -\indextext{__stdc_iso_10646__@\mname{STDC_ISO_10646}}% -\mname{STDC_ISO_10646}\\ -An integer literal of the form \tcode{yyyymmL} -(for example, \tcode{199712L}). -Whether \mname{STDC_ISO_10646} is predefined and -if so, what its value is, -are \impldef{presence and value of \mname{STDC_ISO_10646}}. - -\item -\indextext{__stdcpp_threads__@\mname{STDCPP_THREADS}}% -\mname{STDCPP_THREADS}\\ -Defined, and has the value integer literal 1, if and only if a program -can have more than one thread of execution\iref{intro.multithread}. - -\end{description} - -\pnum -The values of the predefined macros -(except for -\mname{FILE} -and -\mname{LINE}) -remain constant throughout the translation unit. - -\pnum -If any of the pre-defined macro names in this subclause, -or the identifier -\tcode{defined}, -is the subject of a -\tcode{\#define} -or a -\tcode{\#undef} -preprocessing directive, -the behavior is undefined. -Any other predefined macro names shall begin with a -leading underscore followed by an uppercase letter or a second -underscore. - -\rSec1[cpp.pragma.op]{Pragma operator}% -\indextext{macro!pragma operator}% -\indextext{operator!pragma|see{macro, pragma operator}} - -\pnum -A unary operator expression of the form: -\begin{ncbnf} -\terminal{_Pragma} \terminal{(} string-literal \terminal{)} -\end{ncbnf} -is processed as follows: The \grammarterm{string-literal} is \defnx{destringized}{destringization} -by deleting the \tcode{L} prefix, if present, deleting the leading and trailing -double-quotes, replacing each escape sequence \tcode{\textbackslash"} by a double-quote, and -replacing each escape sequence \tcode{\textbackslash\textbackslash} by a single -backslash. The resulting sequence of characters is processed through translation phase 3 -to produce preprocessing tokens that are executed as if they were the -\grammarterm{pp-tokens} in a pragma directive. The original four preprocessing -tokens in the unary operator expression are removed. - -\pnum -\begin{example} -\begin{codeblock} -#pragma listing on "..\listing.dir" -\end{codeblock} -can also be expressed as: -\begin{codeblock} -_Pragma ( "listing on \"..\\listing.dir\"" ) -\end{codeblock} -The latter form is processed in the same way whether it appears literally -as shown, or results from macro replacement, as in: -\begin{codeblock} -#define LISTING(x) PRAGMA(listing on #x) -#define PRAGMA(x) _Pragma(#x) - -LISTING( ..\listing.dir ) -\end{codeblock} -\end{example} -\indextext{preprocessing directive|)} diff --git a/source/std.tex b/source/std.tex index c33511f613..b16cd4b97c 100644 --- a/source/std.tex +++ b/source/std.tex @@ -106,7 +106,6 @@ \include{intro} \include{lex} -\include{preprocessor} \include{modules} \include{basic} \include{expressions} From a149f603247d5b14d63c148cec6c21d8f0849080 Mon Sep 17 00:00:00 2001 From: Alisdair Meredith Date: Tue, 16 Jul 2024 09:58:00 -0400 Subject: [PATCH 08/10] Further work cleaning up preprocessor tokens --- source/lex.tex | 188 +++++++++++++++++++++++++------------------------ 1 file changed, 97 insertions(+), 91 deletions(-) diff --git a/source/lex.tex b/source/lex.tex index 355fa13f70..9f26e9fb04 100644 --- a/source/lex.tex +++ b/source/lex.tex @@ -35,8 +35,9 @@ conditional inclusion\iref{cpp.cond} preprocessing directives, as modified by the implementation-defined behavior of any conditionally-supported-directives\iref{cpp.pre} and pragmas\iref{cpp.pragma}, -if any, is -called a \defnadj{preprocessing}{translation unit}. +if any, is called a \defnadj{preprocessing}{translation unit}. + +\pnum \begin{note} A \Cpp{} program need not all be translated at the same time. \end{note} @@ -45,8 +46,7 @@ \begin{note} Previously translated translation units and instantiation units can be preserved individually or in libraries. The separate -translation units of a program communicate\iref{basic.link} by (for -example) +translation units of a program communicate\iref{basic.link} by (for example) calls to functions whose identifiers have external or module linkage, manipulation of objects whose identifiers have external or module linkage, or manipulation of data files. Translation units can be separately @@ -55,36 +55,40 @@ \end{note} \indextext{compilation!separate|)} -\rSec1[lex.phases]{Phases of translation}% - \pnum \indextext{translation!phases|(}% The precedence among the syntax rules of translation is specified by the -following phases. -\begin{footnote} +following phases of tranlation\iref{lex.phases}. + +\pnum +\begin{note} Implementations behave as if these separate phases occur, although in practice different phases can be folded together. -\end{footnote} +\end{note} + +\rSec1[lex.phases]{Phases of translation}% \rSec2[lex.phase.1]{Mapping to translation characters}% \pnum \indextext{character!source file}% -An implementation shall support input files +An implementation shall support source files that are a sequence of UTF-8 code units (UTF-8 files). It may also support -an \impldef{supported input files} set of other kinds of input files, and, -if so, the kind of an input file is determined in -an \impldef{determination of kind of input file} manner -that includes a means of designating input files as UTF-8 files, +an \impldef{supported source files} set of other kinds of source files, and, +if so, the kind of an source file is determined in +an \impldef{determination of kind of source file} manner +that includes a means of designating source files as UTF-8 files, independent of their content. + +\pnum \begin{note} In other words, recognizing the \unicode{feff}{byte order mark} is not sufficient. \end{note} \pnum -If an input file is determined to be a UTF-8 file, +If a source file is determined to be a UTF-8 file, then it shall be a well-formed UTF-8 code unit sequence and it is decoded to produce a sequence of Unicode \begin{footnote} @@ -104,7 +108,7 @@ is replaced by a single new-line character. \pnum -For any other kind of input file supported by the implementation, +For any other kind of source file supported by the implementation, characters are mapped, in an \impldef{mapping physical source file characters to translation character set} manner, to a sequence of translation character set elements\iref{lex.charset}, @@ -122,6 +126,8 @@ physical source lines to form logical source lines. Only the last backslash on any physical source line shall be eligible for being part of such a splice. + +\pnum \begin{note} Line splicing can form a \grammarterm{universal-character-name}\iref{lex.charset}. @@ -162,6 +168,8 @@ character in such a comment, only whitespace characters shall appear between it and the new-line that terminates the comment; no diagnostic is required. + +\pnum \begin{note} The comment characters \tcode{//}, \tcode{/*}, and \tcode{*/} have no special meaning within a \tcode{//} comment and @@ -224,8 +232,7 @@ significant. Each preprocessing token is converted into a token\iref{lex.token}. The resulting tokens constitute a \defn{translation unit} and -are syntactically and -semantically analyzed and translated. +are syntactically and semantically analyzed and translated. \begin{note} The process of analyzing and translating the tokens can occasionally result in one token being replaced by a sequence of other @@ -233,13 +240,10 @@ \end{note} \pnum -It is -\impldef{whether the sources for -module units and header units +It is \impldef{whether the sources for module units and header units on which the current translation unit has an interface dependency are required to be available during translation} -whether the sources for -module units and header units +whether the sources for module units and header units on which the current translation unit has an interface dependency\iref{module.unit,module.import} are required to be available. @@ -271,11 +275,11 @@ \end{note} \pnum -The definitions of the -required templates are located. It is \impldef{whether source of translation units must +The definitions of the required templates are located. +It is \impldef{whether source of translation units must be available to locate template definitions} whether the -source of the translation units containing these definitions is required -to be available. +source of the translation units containing these definitions +is required to be available. \begin{note} An implementation can choose to encode sufficient information into the translated translation unit so as to ensure the @@ -283,18 +287,15 @@ \end{note} \pnum -All the required instantiations -are performed to produce -\defn{instantiation units}. +All the required instantiations are performed +to produce \defn{instantiation units}. \begin{note} -These are similar -to translated translation units, but contain no references to +These are similar to translated translation units, but contain no references to uninstantiated templates and no template definitions. \end{note} \pnum -The -program is ill-formed if any instantiation fails. +The program is ill-formed if any instantiation fails. \rSec2[lex.phase.9]{Linking}% @@ -533,15 +534,15 @@ \nontermdef{preprocessing-token}\br header-name\br pp-number\br + character-literal\br + user-defined-character-literal\br + string-literal\br + user-defined-string-literal\br preprocessing-op-or-punc\br identifier\br import-keyword\br module-keyword\br export-keyword\br - character-literal\br - user-defined-character-literal\br - string-literal\br - user-defined-string-literal\br \textnormal{each non-whitespace character that cannot be one of the above} \end{bnf} @@ -551,17 +552,20 @@ In this document, glyphs are used to identify elements of the basic character set\iref{lex.charset}. -The categories of preprocessing token are: header names, -placeholder tokens produced by preprocessing \tcode{import} and \tcode{module} directives -(\grammarterm{import-keyword}, \grammarterm{module-keyword}, and \grammarterm{export-keyword}), -identifiers, preprocessing numbers, character literals (including user-defined character -literals), string literals (including user-defined string literals), preprocessing -operators and punctuators, and single non-whitespace characters that do not lexically -match the other preprocessing token categories. +The categories of preprocessing token are: header names, preprocessing numbers, +character literals (including user-defined character literals), string literals +(including user-defined string literals), preprocessing operators and punctuators, +identifiers, placeholder tokens produced by preprocessing \tcode{import} and +\tcode{module} directives (\grammarterm{import-keyword}, +\grammarterm{module-keyword}, and \grammarterm{export-keyword}), +and single non-whitespace characters that do not lexically match the other +preprocessing token categories. If a \unicode{0027}{apostrophe} or a \unicode{0022}{quotation mark} character matches the last category, the program is ill-formed. If any character not in the basic character set matches the last category, the program is ill-formed. + +\pnum Preprocessing tokens can be separated by \indextext{whitespace}% whitespace; @@ -572,6 +576,8 @@ new-line, \unicode{000b}{line tabulation}, and \unicode{000c}{form feed}), or both. + +\pnum As described in \ref{cpp}, in certain circumstances during translation phase 4, whitespace (or the absence thereof) serves as more than preprocessing token separation. Whitespace @@ -579,11 +585,6 @@ between the quotation characters in a character literal or string literal. -\pnum -Each preprocessing token that is converted to a token\iref{lex.token} -shall have the lexical form of a keyword, an identifier, a literal, -or an operator or punctuator. - \pnum If the input stream has been parsed into preprocessing tokens up to a given character: @@ -790,47 +791,6 @@ a \grammarterm{floating-point-literal} token.% \indextext{number!preprocessing|)} -\rSec2[lex.operators]{Operators and punctuators} - -\pnum -\indextext{operator|(}% -\indextext{punctuator|(}% -The lexical representation of \Cpp{} programs includes a number of -preprocessing tokens that are used in the syntax of the preprocessor or -are converted into tokens for operators and punctuators: - -\begin{bnf} -\nontermdef{preprocessing-op-or-punc}\br - preprocessing-operator\br - operator-or-punctuator -\end{bnf} - -\begin{bnf} -%% Ed. note: character protrusion would misalign various operators. -\microtypesetup{protrusion=false}\obeyspaces -\nontermdef{preprocessing-operator} \textnormal{one of}\br - \terminal{\# \#\# \%: \%:\%:} -\end{bnf} - -\begin{bnf} -\microtypesetup{protrusion=false}\obeyspaces -\nontermdef{operator-or-punctuator} \textnormal{one of}\br - \terminal{\{ \} [ ] ( )}\br - \terminal{<: :> <\% \%> ; : ...}\br - \terminal{? :: . .* -> ->* \~}\br - \terminal{! + - * / \% \caret{} \& |}\br - \terminal{= += -= *= /= \%= \caret{}= \&= |=}\br - \terminal{== != < > <= >= <=> \&\& ||}\br - \terminal{<< >> <<= >>= ++ -- ,}\br - \terminal{\keyword{and} \keyword{or} \keyword{xor} \keyword{not} \keyword{bitand} \keyword{bitor} \keyword{compl}}\br - \terminal{\keyword{and_eq} \keyword{or_eq} \keyword{xor_eq} \keyword{not_eq}} -\end{bnf} - -Each \grammarterm{operator-or-punctuator} is converted to a single token -in translation phase 7\iref{lex.phase.7}.% -\indextext{punctuator|)}% -\indextext{operator|)} - \rSec2[lex.ppliteral]{Literals} \rSec3[lex.ccon]{Character literals} @@ -1388,6 +1348,47 @@ what effect these sequences have on encoding state. \end{itemize} +\rSec2[lex.operators]{Operators and punctuators} + +\pnum +\indextext{operator|(}% +\indextext{punctuator|(}% +The lexical representation of \Cpp{} programs includes a number of +preprocessing tokens that are used in the syntax of the preprocessor or +are converted into tokens for operators and punctuators: + +\begin{bnf} +\nontermdef{preprocessing-op-or-punc}\br + preprocessing-operator\br + operator-or-punctuator +\end{bnf} + +\begin{bnf} +%% Ed. note: character protrusion would misalign various operators. +\microtypesetup{protrusion=false}\obeyspaces +\nontermdef{preprocessing-operator} \textnormal{one of}\br + \terminal{\# \#\# \%: \%:\%:} +\end{bnf} + +\begin{bnf} +\microtypesetup{protrusion=false}\obeyspaces +\nontermdef{operator-or-punctuator} \textnormal{one of}\br + \terminal{\{ \} [ ] ( )}\br + \terminal{<: :> <\% \%> ; : ...}\br + \terminal{? :: . .* -> ->* \~}\br + \terminal{! + - * / \% \caret{} \& |}\br + \terminal{= += -= *= /= \%= \caret{}= \&= |=}\br + \terminal{== != < > <= >= <=> \&\& ||}\br + \terminal{<< >> <<= >>= ++ -- ,}\br + \terminal{\keyword{and} \keyword{or} \keyword{xor} \keyword{not} \keyword{bitand} \keyword{bitor} \keyword{compl}}\br + \terminal{\keyword{and_eq} \keyword{or_eq} \keyword{xor_eq} \keyword{not_eq}} +\end{bnf} + +Each \grammarterm{operator-or-punctuator} is converted to a single token +in translation phase 7\iref{lex.phase.7}.% +\indextext{punctuator|)}% +\indextext{operator|)} + \rSec2[lex.name]{Identifiers} \indextext{identifier|(}% @@ -3529,6 +3530,11 @@ operator-or-punctuator \end{bnf} +\pnum +Each preprocessing token that is converted to a token\iref{lex.token} +shall have the lexical form of a keyword, an identifier, a literal, +or an operator or punctuator. + \pnum \indextext{\idxgram{token}}% There are five kinds of tokens: identifiers, keywords, literals,% From 4f52901a74b6c2ff5661d4a3a88a72a518c62e04 Mon Sep 17 00:00:00 2001 From: Alisdair Meredith Date: Tue, 16 Jul 2024 15:03:23 -0400 Subject: [PATCH 09/10] [lex.separate][module.unit] move definitions of program and translation unit The definition of program at the top of [basic.link] should move to the front of [lex.separate] so that it is defined before its first usage, and also clarifies that the phases of translation produce. Similarly, move the definition of the grammar production translation-unit to the top of the first clause to actually use it, [module.unit]. Finally, retitle [basic.link] as just Linkage, rather than prgrams and linkage. --- source/basic.tex | 49 ++++++++++++---------------------------------- source/lex.tex | 5 +++++ source/modules.tex | 8 ++++++++ 3 files changed, 26 insertions(+), 36 deletions(-) diff --git a/source/basic.tex b/source/basic.tex index de2e129033..32d904ea72 100644 --- a/source/basic.tex +++ b/source/basic.tex @@ -2643,45 +2643,22 @@ only namespace names are considered.% \indextext{lookup!name|)}% -\rSec1[basic.link]{Program and linkage}% +\rSec1[basic.link]{Linkage}% \indextext{linkage|(} \pnum -\indextext{program}% -A \defn{program} consists of one or more translation units\iref{lex.separate} -linked together. A translation unit consists -of a sequence of declarations. - -\begin{bnf} -\nontermdef{translation-unit}\br - \opt{declaration-seq}\br - \opt{global-module-fragment} module-declaration \opt{declaration-seq} \opt{private-module-fragment} -\end{bnf} - -\pnum -\indextext{translation unit}% -A name is said to have \defn{linkage} when it can denote the same -object, reference, function, type, template, namespace or value as a -name introduced by a declaration in another scope: -\begin{itemize} -\item When a name has \defnadj{external}{linkage}, -the entity it denotes -can be referred to by names from scopes of other translation units or -from other scopes of the same translation unit. - -\item When a name has \defnx{module linkage}{linkage!module}, -the entity it denotes -can be referred to by names from other scopes of the same module unit\iref{module.unit} or -from scopes of other module units of that same module. - -\item When a name has \defnadj{internal}{linkage}, -the entity it denotes -can be referred to by names from other scopes in the same translation -unit. - -\item When a name has \indextext{linkage!no}\defn{no linkage}, the entity it denotes -cannot be referred to by names from other scopes. -\end{itemize} +A name can have +\defnadj{external}{linkage}, +\defnadj{module}{linkage}, +\defnadj{internal}{linkage}, or +\defnadj{no}{linkage}, +as determined by the rules below. +\begin{note} +All declarations of an entity with a name with internal linkage +appear in the same translation unit. +All declarations of an entity with module linkage +are attached to the same module. +\end{note} \pnum \indextext{linkage!\idxcode{static} and}% diff --git a/source/lex.tex b/source/lex.tex index 9f26e9fb04..d378e63e97 100644 --- a/source/lex.tex +++ b/source/lex.tex @@ -24,6 +24,11 @@ \rSec1[lex.separate]{Separate translation} +\pnum +\indextext{program}% +A \defn{program} consists of one or more translation units\iref{lex.phases,module.unit} +linked together. + \pnum \indextext{conventions!lexical|(}% \indextext{compilation!separate|(}% diff --git a/source/modules.tex b/source/modules.tex index 2ed009a3b6..9298ba4dee 100644 --- a/source/modules.tex +++ b/source/modules.tex @@ -6,6 +6,10 @@ \rSec1[module.unit]{Module units and purviews} \begin{bnf} +\nontermdef{translation-unit}\br + \opt{declaration-seq}\br + \opt{global-module-fragment} module-declaration \opt{declaration-seq} \opt{private-module-fragment} + \nontermdef{module-declaration}\br \opt{export-keyword} module-keyword module-name \opt{module-partition} \opt{attribute-specifier-seq} \terminal{;} \end{bnf} @@ -26,6 +30,10 @@ module-name-qualifier identifier \terminal{.} \end{bnf} +\pnum +\indextext{translation unit}% +A translation unit consists of a sequence of declarations. + \pnum A \defn{module unit} is a translation unit that contains a \grammarterm{module-declaration}. From a467f30f09a345ce3cd6750e9704491a04fa8623 Mon Sep 17 00:00:00 2001 From: Alisdair Meredith Date: Tue, 23 Jul 2024 06:58:47 -0400 Subject: [PATCH 10/10] Links to appropriate phase of translation --- source/intro.tex | 4 ++-- source/lex.tex | 10 +++++----- source/lib-intro.tex | 4 ++-- source/limits.tex | 4 ++-- source/modules.tex | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/source/intro.tex b/source/intro.tex index 27b3fc767f..39b72a650c 100644 --- a/source/intro.tex +++ b/source/intro.tex @@ -788,7 +788,7 @@ consistent with the descriptions in the library Clauses. \pnum -A \Cpp{} translation unit\iref{lex.phases} +A \Cpp{} translation unit\iref{lex.phase.7,module.unit} obtains access to the names defined in the library by including the appropriate standard library header or importing the appropriate standard library named header unit\iref{using.headers}. @@ -797,7 +797,7 @@ The templates, classes, functions, and objects in the library have external linkage\iref{basic.link}. The implementation provides definitions for standard library entities, as necessary, while combining -translation units to form a complete \Cpp{} program\iref{lex.phases}.% +translation units to form a complete \Cpp{} program\iref{lex.phase.9}.% \indextext{conformance requirements!library|)} \pnum diff --git a/source/lex.tex b/source/lex.tex index d378e63e97..8b015ac351 100644 --- a/source/lex.tex +++ b/source/lex.tex @@ -26,7 +26,7 @@ \pnum \indextext{program}% -A \defn{program} consists of one or more translation units\iref{lex.phases,module.unit} +A \defn{program} consists of one or more translation units\iref{lex.phase.7,module.unit} linked together. \pnum @@ -2065,7 +2065,7 @@ and \tcode{std::numeric_limits::max()} is \tcode{0xFFFF}, the integer literal \tcode{0x8000} is signed and positive within a \tcode{\#if} expression even though it is unsigned in translation phase -7\iref{lex.phases}. +7\iref{lex.phase.7}. \end{note} This includes interpreting \grammarterm{character-literal}s according to the rules in \ref{lex.ccon}. @@ -2245,7 +2245,7 @@ \begin{footnote} Note that adjacent \grammarterm{string-literal}s are not concatenated into a single \grammarterm{string-literal} -(see the translation phases in~\ref{lex.phases}); +(see the translation phases in~\ref{lex.phase.6}); thus, an expansion that results in two \grammarterm{string-literal}s is an invalid directive. \end{footnote} @@ -2610,7 +2610,7 @@ Since, by macro-replacement time, all \grammarterm{character-literal}s and \grammarterm{string-literal}s are preprocessing tokens, not sequences possibly containing identifier-like subsequences -(see \ref{lex.phases}, translation phases), +(see \ref{lex.phase.3}, translation phases), they are never scanned for macro names or parameters. \end{footnote} to be replaced by the replacement list of preprocessing tokens @@ -3126,7 +3126,7 @@ \defn{line number} of the current source line is one greater than the number of new-line characters read or introduced -in translation phase 1\iref{lex.phases} +in translation phase 1\iref{lex.phase.1} while processing the source file to the current token. \pnum diff --git a/source/lib-intro.tex b/source/lib-intro.tex index fb0a13a5df..08901e4001 100644 --- a/source/lib-intro.tex +++ b/source/lib-intro.tex @@ -1584,8 +1584,8 @@ \pnum Subclause \ref{using} describes how a \Cpp{} program gains access to the facilities of the \Cpp{} standard library. \ref{using.headers} describes effects during translation -phase 4, while~\ref{using.linkage} describes effects during phase -8\iref{lex.phases}. +phase 4\iref{lex.phase.4}, while~\ref{using.linkage} describes effects during phase +8\iref{lex.phase.9}. \rSec3[using.headers]{Headers} diff --git a/source/limits.tex b/source/limits.tex index ca3953634b..610b5f6d98 100644 --- a/source/limits.tex +++ b/source/limits.tex @@ -59,10 +59,10 @@ \item% Arguments in one macro invocation\iref{cpp.replace} [256]. \item% -Characters in one logical source line\iref{lex.phases} [65\,536]. +Characters in one logical source line\iref{lex.phase.2} [65\,536]. \item% Characters in a \grammarterm{string-literal}\iref{lex.string} -(after concatenation\iref{lex.phases}) [65\,536]. +(after concatenation\iref{lex.phase.6}) [65\,536]. \item% Size of an object\iref{intro.object} [262\,144]. \item% diff --git a/source/modules.tex b/source/modules.tex index 9298ba4dee..91ad6b8c2b 100644 --- a/source/modules.tex +++ b/source/modules.tex @@ -526,7 +526,7 @@ \begin{note} Such indirect importation does not make macros available, because a translation unit is -a sequence of tokens in translation phase 7\iref{lex.phases}. +a sequence of tokens in translation phase 7\iref{lex.phase.7}. Macros can be made available by directly importing header units as described in \ref{cpp.import}. \end{note}