diff --git a/source/basic.tex b/source/basic.tex index 24342733c4..32d904ea72 100644 --- a/source/basic.tex +++ b/source/basic.tex @@ -32,18 +32,22 @@ \indextext{storage class}% \indextext{scope}% \indextext{linkage}% -An \defn{entity} is a value, object, reference, -structured binding, -function, enumerator, type, -class member, bit-field, template, template specialization, namespace, or -pack. - -\pnum A \defn{name} is an \grammarterm{identifier}\iref{lex.name}, \grammarterm{operator-function-id}\iref{over.oper}, \grammarterm{literal-operator-id}\iref{over.literal}, or \grammarterm{conversion-function-id}\iref{class.conv.fct}. +\pnum +Two names are \defnx{the same}{name!same} if +\begin{itemize} +\item they are \grammarterm{identifier}{s} composed of the same character sequence, or +\item they are \grammarterm{operator-function-id}{s} formed with the same operator, or +\item they are \grammarterm{literal-operator-id}{s} formed with the same +literal suffix identifier, or +\item they are \grammarterm{conversion-function-id}{s} formed with +equivalent\iref{temp.over.link} types. +\end{itemize} + \pnum Every name is introduced by a \defn{declaration}, which is a \begin{itemize} @@ -85,9 +89,13 @@ The interpretation of a \grammarterm{for-range-declaration} produces one or more of the above\iref{stmt.ranged}. \end{note} -An entity $E$ is denoted by the name (if any) -that is introduced by a declaration of $E$ or -by a \grammarterm{typedef-name} introduced by a declaration specifying $E$. + +\pnum +Some names denote types or templates. In general, whenever a name is +encountered it is necessary to determine whether that name denotes one of these +entities before continuing to parse the program that contains it. The process +that determines this is called +\defnx{name lookup}{lookup!name}\iref{basic.lookup}. \pnum A \defn{variable} is introduced by the @@ -95,6 +103,13 @@ a reference other than a non-static data member or of an object. The variable's name, if any, denotes the reference or object. +\pnum +An \defn{entity} is a value, object, reference, structured binding, function, +enumerator, type, class member, bit-field, template, template specialization, +namespace, or pack. An entity $E$ is denoted by the name (if any) that is +introduced by a declaration of $E$ or by a \grammarterm{typedef-name} +introduced by a declaration specifying $E$. + \pnum A \defnadj{local}{entity} is a variable with automatic storage duration\iref{basic.stc.auto}, @@ -102,25 +117,6 @@ whose corresponding variable is such an entity, or the \tcode{*\keyword{this}} object\iref{expr.prim.this}. -\pnum -Some names denote types or templates. In general, -whenever a name is encountered it is necessary to determine whether that name denotes -one of these entities before continuing to parse the program that contains it. The -process that determines this is called -\defnx{name lookup}{lookup!name}\iref{basic.lookup}. - -\pnum -Two names are \defnx{the same}{name!same} if -\begin{itemize} -\item they are \grammarterm{identifier}{s} composed of the same character sequence, or -\item they are \grammarterm{operator-function-id}{s} formed with -the same operator, or -\item they are \grammarterm{conversion-function-id}{s} formed -with equivalent\iref{temp.over.link} types, or -\item they are \grammarterm{literal-operator-id}{s}\iref{over.literal} formed with -the same literal suffix identifier. -\end{itemize} - \pnum \indextext{translation unit!name and}% \indextext{linkage}% @@ -2647,45 +2643,22 @@ only namespace names are considered.% \indextext{lookup!name|)}% -\rSec1[basic.link]{Program and linkage}% +\rSec1[basic.link]{Linkage}% \indextext{linkage|(} \pnum -\indextext{program}% -A \defn{program} consists of one or more translation units\iref{lex.separate} -linked together. A translation unit consists -of a sequence of declarations. - -\begin{bnf} -\nontermdef{translation-unit}\br - \opt{declaration-seq}\br - \opt{global-module-fragment} module-declaration \opt{declaration-seq} \opt{private-module-fragment} -\end{bnf} - -\pnum -\indextext{translation unit}% -A name is said to have \defn{linkage} when it can denote the same -object, reference, function, type, template, namespace or value as a -name introduced by a declaration in another scope: -\begin{itemize} -\item When a name has \defnadj{external}{linkage}, -the entity it denotes -can be referred to by names from scopes of other translation units or -from other scopes of the same translation unit. - -\item When a name has \defnx{module linkage}{linkage!module}, -the entity it denotes -can be referred to by names from other scopes of the same module unit\iref{module.unit} or -from scopes of other module units of that same module. - -\item When a name has \defnadj{internal}{linkage}, -the entity it denotes -can be referred to by names from other scopes in the same translation -unit. - -\item When a name has \indextext{linkage!no}\defn{no linkage}, the entity it denotes -cannot be referred to by names from other scopes. -\end{itemize} +A name can have +\defnadj{external}{linkage}, +\defnadj{module}{linkage}, +\defnadj{internal}{linkage}, or +\defnadj{no}{linkage}, +as determined by the rules below. +\begin{note} +All declarations of an entity with a name with internal linkage +appear in the same translation unit. +All declarations of an entity with module linkage +are attached to the same module. +\end{note} \pnum \indextext{linkage!\idxcode{static} and}% diff --git a/source/compatibility.tex b/source/compatibility.tex index 46fb8d7557..8bdb2b42be 100644 --- a/source/compatibility.tex +++ b/source/compatibility.tex @@ -2584,6 +2584,24 @@ Programs that have a legitimate reason to treat string literal objects as potentially modifiable memory are probably rare. +\rSec2[diff.cpp]{\ref{cpp}: preprocessing directives} + +\diffref{cpp.predefined} +\change +Whether \mname{STDC} is defined and if so, what its value is, are +\impldef{definition and meaning of \mname{STDC}}. +\rationale +\Cpp{} is not identical to C\@. +Mandating that \mname{STDC} +be defined would require that translators make an incorrect claim. +\effect +Change to semantics of well-defined feature. +\difficulty +Semantic transformation. +\howwide +Programs and headers that reference \mname{STDC} are +quite common. + \rSec2[diff.basic]{\ref{basic}: basics} \diffref{basic.def} @@ -3370,24 +3388,6 @@ \howwide Seldom. -\rSec2[diff.cpp]{\ref{cpp}: preprocessing directives} - -\diffref{cpp.predefined} -\change -Whether \mname{STDC} is defined and if so, what its value is, are -\impldef{definition and meaning of \mname{STDC}}. -\rationale -\Cpp{} is not identical to C\@. -Mandating that \mname{STDC} -be defined would require that translators make an incorrect claim. -\effect -Change to semantics of well-defined feature. -\difficulty -Semantic transformation. -\howwide -Programs and headers that reference \mname{STDC} are -quite common. - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \rSec1[diff.library]{C standard library} diff --git a/source/config.tex b/source/config.tex index be1743a556..304f32893b 100644 --- a/source/config.tex +++ b/source/config.tex @@ -8,6 +8,10 @@ %% Release date \newcommand{\reldate}{\today} +%% Core chapters +\newcommand{\firstcorechapter}{lex} +\newcommand{\lastcorechapter}{except} + %% Library chapters \newcommand{\firstlibchapter}{support} \newcommand{\lastlibchapter}{thread} diff --git a/source/expressions.tex b/source/expressions.tex index f259fa68d3..4a6630e613 100644 --- a/source/expressions.tex +++ b/source/expressions.tex @@ -7524,7 +7524,7 @@ \item an operation that would have undefined or erroneous behavior -as specified in \ref{intro} through \ref{cpp}, +as specified in \ref{intro} through \ref{\lastcorechapter}, excluding \ref{dcl.attr.assume} and \ref{dcl.attr.noreturn}; \begin{footnote} This includes, diff --git a/source/intro.tex b/source/intro.tex index ca6009f3c9..39b72a650c 100644 --- a/source/intro.tex +++ b/source/intro.tex @@ -256,7 +256,7 @@ Implementations are allowed, but not required, to diagnose it\iref{intro.compliance.general}. Evaluation of a constant expression\iref{expr.const} -never exhibits behavior specified as erroneous in \ref{intro} through \ref{cpp}. +never exhibits behavior specified as erroneous in \ref{intro} through \ref{\lastcorechapter}. \end{defnote} \definition{expression-equivalent}{defns.expression.equivalent} @@ -644,7 +644,7 @@ issuance of a diagnostic message). Many incorrect program constructs do not engender undefined behavior; they are required to be diagnosed. Evaluation of a constant expression\iref{expr.const} never exhibits behavior explicitly -specified as undefined in \ref{intro} through \ref{cpp}. +specified as undefined in \ref{intro} through \ref{\lastcorechapter}. \end{defnote} \indexdefn{behavior!unspecified}% @@ -722,7 +722,8 @@ \begin{itemize} \item If a program contains no violations of the rules in -\ref{lex} through \ref{\lastlibchapter} as well as those specified in \ref{depr}, +\ref{\firstcorechapter} through \ref{\lastlibchapter} as well as those +specified in \ref{depr}, a conforming implementation shall, in accordance with the resource limits specified in \ref{implimits}, accept and correctly execute @@ -787,7 +788,7 @@ consistent with the descriptions in the library Clauses. \pnum -A \Cpp{} translation unit\iref{lex.phases} +A \Cpp{} translation unit\iref{lex.phase.7,module.unit} obtains access to the names defined in the library by including the appropriate standard library header or importing the appropriate standard library named header unit\iref{using.headers}. @@ -796,7 +797,7 @@ The templates, classes, functions, and objects in the library have external linkage\iref{basic.link}. The implementation provides definitions for standard library entities, as necessary, while combining -translation units to form a complete \Cpp{} program\iref{lex.phases}.% +translation units to form a complete \Cpp{} program\iref{lex.phase.9}.% \indextext{conformance requirements!library|)} \pnum @@ -809,7 +810,7 @@ supports all the facilities described in this document, while a freestanding implementation supports the entire \Cpp{} language -described in \ref{lex} through \ref{cpp} and +described in \ref{\firstcorechapter} through \ref{\lastcorechapter} and the subset of the library facilities described in \ref{compliance}. \pnum @@ -952,7 +953,7 @@ \pnum \indextext{standard!structure of|(}% \indextext{standard!structure of}% -\ref{lex} through \ref{cpp} describe the \Cpp{} programming +\ref{\firstcorechapter} through \ref{\lastlibchapter} describe the \Cpp{} programming language. That description includes detailed syntactic specifications in a form described in~\ref{syntax}. For convenience, \ref{gram} repeats all such syntactic specifications. diff --git a/source/lex.tex b/source/lex.tex index 8ac924c910..8b015ac351 100644 --- a/source/lex.tex +++ b/source/lex.tex @@ -24,6 +24,11 @@ \rSec1[lex.separate]{Separate translation} +\pnum +\indextext{program}% +A \defn{program} consists of one or more translation units\iref{lex.phase.7,module.unit} +linked together. + \pnum \indextext{conventions!lexical|(}% \indextext{compilation!separate|(}% @@ -35,8 +40,9 @@ conditional inclusion\iref{cpp.cond} preprocessing directives, as modified by the implementation-defined behavior of any conditionally-supported-directives\iref{cpp.pre} and pragmas\iref{cpp.pragma}, -if any, is -called a \defnadj{preprocessing}{translation unit}. +if any, is called a \defnadj{preprocessing}{translation unit}. + +\pnum \begin{note} A \Cpp{} program need not all be translated at the same time. \end{note} @@ -45,8 +51,7 @@ \begin{note} Previously translated translation units and instantiation units can be preserved individually or in libraries. The separate -translation units of a program communicate\iref{basic.link} by (for -example) +translation units of a program communicate\iref{basic.link} by (for example) calls to functions whose identifiers have external or module linkage, manipulation of objects whose identifiers have external or module linkage, or manipulation of data files. Translation units can be separately @@ -55,33 +60,40 @@ \end{note} \indextext{compilation!separate|)} -\rSec1[lex.phases]{Phases of translation}% - \pnum \indextext{translation!phases|(}% The precedence among the syntax rules of translation is specified by the -following phases. -\begin{footnote} +following phases of tranlation\iref{lex.phases}. + +\pnum +\begin{note} Implementations behave as if these separate phases occur, although in practice different phases can be folded together. -\end{footnote} +\end{note} -\begin{enumerate} -\item +\rSec1[lex.phases]{Phases of translation}% + +\rSec2[lex.phase.1]{Mapping to translation characters}% + +\pnum \indextext{character!source file}% -An implementation shall support input files +An implementation shall support source files that are a sequence of UTF-8 code units (UTF-8 files). It may also support -an \impldef{supported input files} set of other kinds of input files, and, -if so, the kind of an input file is determined in -an \impldef{determination of kind of input file} manner -that includes a means of designating input files as UTF-8 files, +an \impldef{supported source files} set of other kinds of source files, and, +if so, the kind of an source file is determined in +an \impldef{determination of kind of source file} manner +that includes a means of designating source files as UTF-8 files, independent of their content. + +\pnum \begin{note} In other words, recognizing the \unicode{feff}{byte order mark} is not sufficient. \end{note} -If an input file is determined to be a UTF-8 file, + +\pnum +If a source file is determined to be a UTF-8 file, then it shall be a well-formed UTF-8 code unit sequence and it is decoded to produce a sequence of Unicode \begin{footnote} @@ -100,13 +112,15 @@ \unicode{000d}{carriage return} not immediately followed by a \unicode{000a}{line feed}, is replaced by a single new-line character. -For any other kind of input file supported by the implementation, +\pnum +For any other kind of source file supported by the implementation, characters are mapped, in an \impldef{mapping physical source file characters to translation character set} manner, to a sequence of translation character set elements\iref{lex.charset}, representing end-of-line indicators as new-line characters. -\item +\rSec2[lex.phase.2]{Line splicing}% +\pnum \indextext{line splicing}% If the first translation character is \unicode{feff}{byte order mark}, it is deleted. @@ -117,16 +131,22 @@ physical source lines to form logical source lines. Only the last backslash on any physical source line shall be eligible for being part of such a splice. + +\pnum \begin{note} Line splicing can form a \grammarterm{universal-character-name}\iref{lex.charset}. \end{note} + +\pnum A source file that is not empty and that (after splicing) does not end in a new-line character shall be processed as if an additional new-line character were appended to the file. -\item The source file is decomposed into preprocessing +\rSec2[lex.phase.3]{Preprocessor tokenization}% +\pnum +The source file is decomposed into preprocessing tokens\iref{lex.pptoken} and sequences of whitespace characters (including comments). A source file shall not end in a partial preprocessing token or in a partial comment. @@ -140,6 +160,31 @@ would arise from a source file ending with an unclosed \tcode{/*} comment. \end{footnote} + +\pnum +\indextext{comment|(}% +\indextext{comment!\tcode{/*} \tcode{*/}}% +\indextext{comment!\tcode{//}}% +The characters \tcode{/*} start a comment, which terminates with the +characters \tcode{*/}. These comments do not nest. +\indextext{comment!\tcode{//}}% +The characters \tcode{//} start a comment, which terminates immediately before the +next new-line character. If there is a form-feed or a vertical-tab +character in such a comment, only whitespace characters shall appear +between it and the new-line that terminates the comment; no diagnostic +is required. + +\pnum +\begin{note} +The comment characters \tcode{//}, \tcode{/*}, +and \tcode{*/} have no special meaning within a \tcode{//} comment and +are treated just like other characters. Similarly, the comment +characters \tcode{//} and \tcode{/*} have no special meaning within a +\tcode{/*} comment. +\end{note} +\indextext{comment|)} + +\pnum Each comment is replaced by one space character. New-line characters are retained. Whether each nonempty sequence of whitespace characters other than new-line is retained or replaced by one space character is @@ -162,40 +207,48 @@ directive\iref{cpp.include}. \end{example} -\item Preprocessing directives are executed, macro invocations are +\rSec2[lex.phase.4]{Preprocessing directives}% + +\pnum +Preprocessing directives are executed, macro invocations are expanded, and \tcode{_Pragma} unary operator expressions are executed. A \tcode{\#include} preprocessing directive causes the named header or source file to be processed from phase 1 through phase 4, recursively. All preprocessing directives are then deleted. -\item +\rSec2[lex.phase.5]{String literal encoding}% + +\pnum For a sequence of two or more adjacent \grammarterm{string-literal} tokens, a common \grammarterm{encoding-prefix} is determined as specified in \ref{lex.string}. Each such \grammarterm{string-literal} token is then considered to have that common \grammarterm{encoding-prefix}. -\item +\rSec2[lex.phase.6]{String literal concatenation}% + +\pnum Adjacent \grammarterm{string-literal} tokens are concatenated\iref{lex.string}. -\item Whitespace characters separating tokens are no longer +\rSec2[lex.phase.7]{Syntactic and semantic analysis}% + +\pnum +Whitespace characters separating tokens are no longer significant. Each preprocessing token is converted into a token\iref{lex.token}. The resulting tokens constitute a \defn{translation unit} and -are syntactically and -semantically analyzed and translated. +are syntactically and semantically analyzed and translated. \begin{note} The process of analyzing and translating the tokens can occasionally result in one token being replaced by a sequence of other tokens\iref{temp.names}. \end{note} -It is -\impldef{whether the sources for -module units and header units + +\pnum +It is \impldef{whether the sources for module units and header units on which the current translation unit has an interface dependency are required to be available during translation} -whether the sources for -module units and header units +whether the sources for module units and header units on which the current translation unit has an interface dependency\iref{module.unit,module.import} are required to be available. @@ -207,12 +260,17 @@ only, and does not specify any particular implementation. \end{note} -\item Translated translation units and instantiation units are combined +\rSec2[lex.phase.8]{Template instantiation}% + +\pnum +Translated translation units and instantiation units are combined as follows: \begin{note} Some or all of these can be supplied from a library. \end{note} + +\pnum Each translated translation unit is examined to produce a list of required instantiations. \begin{note} @@ -220,36 +278,43 @@ instantiations which have been explicitly requested\iref{temp.explicit}. \end{note} -The definitions of the -required templates are located. It is \impldef{whether source of translation units must + +\pnum +The definitions of the required templates are located. +It is \impldef{whether source of translation units must be available to locate template definitions} whether the -source of the translation units containing these definitions is required -to be available. +source of the translation units containing these definitions +is required to be available. \begin{note} An implementation can choose to encode sufficient information into the translated translation unit so as to ensure the source is not required here. \end{note} -All the required instantiations -are performed to produce -\defn{instantiation units}. + +\pnum +All the required instantiations are performed +to produce \defn{instantiation units}. \begin{note} -These are similar -to translated translation units, but contain no references to +These are similar to translated translation units, but contain no references to uninstantiated templates and no template definitions. \end{note} -The -program is ill-formed if any instantiation fails. -\item All external entity references are resolved. Library +\pnum +The program is ill-formed if any instantiation fails. + +\rSec2[lex.phase.9]{Linking}% + +\pnum +All external entity references are resolved. Library components are linked to satisfy external references to entities not defined in the current translation. All such translator output is collected into a program image which contains information needed for execution in its execution environment.% \indextext{translation!phases|)} -\end{enumerate} -\rSec1[lex.charset]{Character sets} +\rSec1[lex.char]{Characters}% + +\rSec2[lex.charset]{Character sets} \pnum \indextext{character set|(}% @@ -326,11 +391,69 @@ \end{floattable} \pnum -The \grammarterm{universal-character-name} construct provides a way to name -other characters. +The \defnadj{basic literal}{character set} consists of +all characters of the basic character set, +plus the control characters specified in \tref{lex.charset.literal}. + +\begin{floattable}{Additional control characters in the basic literal character set}{lex.charset.literal}{ll} +\topline +\ohdrx{2}{character} \\ \capsep +\ucode{0000} & \uname{null} \\ +\ucode{0007} & \uname{alert} \\ +\ucode{0008} & \uname{backspace} \\ +\ucode{000d} & \uname{carriage return} \\ +\end{floattable} + +\pnum +A \defn{code unit} is an integer value +of character type\iref{basic.fundamental}. +Characters in a \grammarterm{character-literal} +other than a multicharacter or non-encodable character literal or +in a \grammarterm{string-literal} are encoded as +a sequence of one or more code units, as determined +by the \grammarterm{encoding-prefix}\iref{lex.ccon,lex.string}; +this is termed the respective \defnadj{literal}{encoding}. +The \defnadj{ordinary literal}{encoding} is +the encoding applied to an ordinary character or string literal. +The \defnadj{wide literal}{encoding} is the encoding applied +to a wide character or string literal. + +\pnum +A literal encoding or a locale-specific encoding of one of +the execution character sets\iref{character.seq} +encodes each element of the basic literal character set as +a single code unit with non-negative value, +distinct from the code unit for any other such element. +\begin{note} +A character not in the basic literal character set +can be encoded with more than one code unit; +the value of such a code unit can be the same as +that of a code unit for an element of the basic literal character set. +\end{note} +\indextext{character!null}% +\indextext{wide-character!null}% +The \unicode{0000}{null} character is encoded as the value \tcode{0}. +No other element of the translation character set +is encoded with a code unit of value \tcode{0}. +The code unit value of each decimal digit character after the digit \tcode{0} (\ucode{0030}) +shall be one greater than the value of the previous. +The ordinary and wide literal encodings are otherwise +\impldef{ordinary and wide literal encodings}. +\indextext{UTF-8}% +\indextext{UTF-16}% +\indextext{UTF-32}% +For a UTF-8, UTF-16, or UTF-32 literal, +the implementation shall encode +the Unicode scalar value +corresponding to each character of the translation character set +as specified in the Unicode Standard +for the respective Unicode encoding form. +\indextext{character set|)} + +\rSec2[lex.universal.char]{Universal character names} \begin{bnf} -\nontermdef{n-char} \textnormal{one of}\br +\nontermdef{n-char}\br \textnormal{any member of the translation character set except the \unicode{007d}{right curly bracket} or new-line character} \end{bnf} @@ -364,6 +487,22 @@ named-universal-character \end{bnf} +\pnum +The \grammarterm{universal-character-name} construct provides a way to name any +element in the translation character set using just the basic character set. +If a \grammarterm{universal-character-name} outside +the \grammarterm{c-char-sequence}, \grammarterm{s-char-sequence}, or +\grammarterm{r-char-sequence} of +a \grammarterm{character-literal} or \grammarterm{string-literal} +(in either case, including within a \grammarterm{user-defined-literal}) +corresponds to a control character or +to a character in the basic character set, the program is ill-formed. +\begin{note} +A sequence of characters resembling a \grammarterm{universal-character-name} in an +\grammarterm{r-char-sequence}\iref{lex.string} does not form a +\grammarterm{universal-character-name}. +\end{note} + \pnum A \grammarterm{universal-character-name} of the form \tcode{\textbackslash u} \grammarterm{hex-quad}, @@ -391,131 +530,59 @@ None of these names or aliases have leading or trailing spaces. \end{note} -\pnum -If a \grammarterm{universal-character-name} outside -the \grammarterm{c-char-sequence}, \grammarterm{s-char-sequence}, or -\grammarterm{r-char-sequence} of -a \grammarterm{character-literal} or \grammarterm{string-literal} -(in either case, including within a \grammarterm{user-defined-literal}) -corresponds to a control character or -to a character in the basic character set, the program is ill-formed. -\begin{note} -A sequence of characters resembling a \grammarterm{universal-character-name} in an -\grammarterm{r-char-sequence}\iref{lex.string} does not form a -\grammarterm{universal-character-name}. -\end{note} - -\pnum -The \defnadj{basic literal}{character set} consists of -all characters of the basic character set, -plus the control characters specified in \tref{lex.charset.literal}. - -\begin{floattable}{Additional control characters in the basic literal character set}{lex.charset.literal}{ll} -\topline -\ohdrx{2}{character} \\ \capsep -\ucode{0000} & \uname{null} \\ -\ucode{0007} & \uname{alert} \\ -\ucode{0008} & \uname{backspace} \\ -\ucode{000d} & \uname{carriage return} \\ -\end{floattable} - -\pnum -A \defn{code unit} is an integer value -of character type\iref{basic.fundamental}. -Characters in a \grammarterm{character-literal} -other than a multicharacter or non-encodable character literal or -in a \grammarterm{string-literal} are encoded as -a sequence of one or more code units, as determined -by the \grammarterm{encoding-prefix}\iref{lex.ccon,lex.string}; -this is termed the respective \defnadj{literal}{encoding}. -The \defnadj{ordinary literal}{encoding} is -the encoding applied to an ordinary character or string literal. -The \defnadj{wide literal}{encoding} is the encoding applied -to a wide character or string literal. - -\pnum -A literal encoding or a locale-specific encoding of one of -the execution character sets\iref{character.seq} -encodes each element of the basic literal character set as -a single code unit with non-negative value, -distinct from the code unit for any other such element. -\begin{note} -A character not in the basic literal character set -can be encoded with more than one code unit; -the value of such a code unit can be the same as -that of a code unit for an element of the basic literal character set. -\end{note} -\indextext{character!null}% -\indextext{wide-character!null}% -The \unicode{0000}{null} character is encoded as the value \tcode{0}. -No other element of the translation character set -is encoded with a code unit of value \tcode{0}. -The code unit value of each decimal digit character after the digit \tcode{0} (\ucode{0030}) -shall be one greater than the value of the previous. -The ordinary and wide literal encodings are otherwise -\impldef{ordinary and wide literal encodings}. -\indextext{UTF-8}% -\indextext{UTF-16}% -\indextext{UTF-32}% -For a UTF-8, UTF-16, or UTF-32 literal, -the implementation shall encode -the Unicode scalar value -corresponding to each character of the translation character set -as specified in the Unicode Standard -for the respective Unicode encoding form. -\indextext{character set|)} - \rSec1[lex.pptoken]{Preprocessing tokens} +\rSec2[lex.ppbasic]{Basic tokens} + \indextext{token!preprocessing|(}% \begin{bnf} \nontermdef{preprocessing-token}\br header-name\br - import-keyword\br - module-keyword\br - export-keyword\br - identifier\br pp-number\br character-literal\br user-defined-character-literal\br string-literal\br user-defined-string-literal\br preprocessing-op-or-punc\br + identifier\br + import-keyword\br + module-keyword\br + export-keyword\br \textnormal{each non-whitespace character that cannot be one of the above} \end{bnf} -\pnum -Each preprocessing token that is converted to a token\iref{lex.token} -shall have the lexical form of a keyword, an identifier, a literal, -or an operator or punctuator. - \pnum A preprocessing token is the minimal lexical element of the language in translation phases 3 through 6. In this document, glyphs are used to identify elements of the basic character set\iref{lex.charset}. -The categories of preprocessing token are: header names, -placeholder tokens produced by preprocessing \tcode{import} and \tcode{module} directives -(\grammarterm{import-keyword}, \grammarterm{module-keyword}, and \grammarterm{export-keyword}), -identifiers, preprocessing numbers, character literals (including user-defined character -literals), string literals (including user-defined string literals), preprocessing -operators and punctuators, and single non-whitespace characters that do not lexically -match the other preprocessing token categories. +The categories of preprocessing token are: header names, preprocessing numbers, +character literals (including user-defined character literals), string literals +(including user-defined string literals), preprocessing operators and punctuators, +identifiers, placeholder tokens produced by preprocessing \tcode{import} and +\tcode{module} directives (\grammarterm{import-keyword}, +\grammarterm{module-keyword}, and \grammarterm{export-keyword}), +and single non-whitespace characters that do not lexically match the other +preprocessing token categories. If a \unicode{0027}{apostrophe} or a \unicode{0022}{quotation mark} character matches the last category, the program is ill-formed. If any character not in the basic character set matches the last category, the program is ill-formed. + +\pnum Preprocessing tokens can be separated by \indextext{whitespace}% whitespace; \indextext{comment}% -this consists of comments\iref{lex.comment}, or whitespace characters +this consists of comments\iref{lex.phase.3}, or whitespace characters (\unicode{0020}{space}, \unicode{0009}{character tabulation}, new-line, \unicode{000b}{line tabulation}, and \unicode{000c}{form feed}), or both. + +\pnum As described in \ref{cpp}, in certain circumstances during translation phase 4, whitespace (or the absence thereof) serves as more than preprocessing token separation. Whitespace @@ -569,17 +636,6 @@ \end{codeblock} \end{example} -\pnum -The \grammarterm{import-keyword} is produced -by processing an \keyword{import} directive\iref{cpp.import}, -the \grammarterm{module-keyword} is produced -by preprocessing a \keyword{module} directive\iref{cpp.module}, and -the \grammarterm{export-keyword} is produced -by preprocessing either of the previous two directives. -\begin{note} -None has any observable spelling. -\end{note} - \pnum \begin{example} The program fragment \tcode{0xe+foo} is parsed as a @@ -602,7 +658,7 @@ \end{example} \indextext{token!preprocessing|)} -\rSec1[lex.digraph]{Alternative tokens} +\rSec2[lex.digraph]{Alternative tokens} \pnum \indextext{token!alternative|(}% @@ -651,58 +707,7 @@ \end{tokentable}% \indextext{token!alternative|)} -\rSec1[lex.token]{Tokens} - -\indextext{token|(}% -\begin{bnf} -\nontermdef{token}\br - identifier\br - keyword\br - literal\br - operator-or-punctuator -\end{bnf} - -\pnum -\indextext{\idxgram{token}}% -There are five kinds of tokens: identifiers, keywords, literals,% -\begin{footnote} -Literals include strings and character and numeric literals. -\end{footnote} -operators, and other separators. -\indextext{whitespace}% -Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments -(collectively, ``whitespace''), as described below, are ignored except -as they serve to separate tokens. -\begin{note} -Whitespace can separate otherwise adjacent identifiers, keywords, numeric -literals, and alternative tokens containing alphabetic characters. -\end{note} -\indextext{token|)} - -\rSec1[lex.comment]{Comments} - -\pnum -\indextext{comment|(}% -\indextext{comment!\tcode{/*} \tcode{*/}}% -\indextext{comment!\tcode{//}}% -The characters \tcode{/*} start a comment, which terminates with the -characters \tcode{*/}. These comments do not nest. -\indextext{comment!\tcode{//}}% -The characters \tcode{//} start a comment, which terminates immediately before the -next new-line character. If there is a form-feed or a vertical-tab -character in such a comment, only whitespace characters shall appear -between it and the new-line that terminates the comment; no diagnostic -is required. -\begin{note} -The comment characters \tcode{//}, \tcode{/*}, -and \tcode{*/} have no special meaning within a \tcode{//} comment and -are treated just like other characters. Similarly, the comment -characters \tcode{//} and \tcode{/*} have no special meaning within a -\tcode{/*} comment. -\end{note} -\indextext{comment|)} - -\rSec1[lex.header]{Header names} +\rSec2[lex.header]{Header names} \indextext{header!name|(}% \begin{bnf} @@ -762,7 +767,7 @@ \end{footnote} \indextext{header!name|)} -\rSec1[lex.ppnumber]{Preprocessing numbers} +\rSec2[lex.ppnumber]{Preprocessing numbers} \indextext{number!preprocessing|(}% \begin{bnf} @@ -791,473 +796,3084 @@ a \grammarterm{floating-point-literal} token.% \indextext{number!preprocessing|)} -\rSec1[lex.name]{Identifiers} +\rSec2[lex.ppliteral]{Literals} -\indextext{identifier|(}% +\rSec3[lex.ccon]{Character literals} + +\indextext{literal!character}% \begin{bnf} -\nontermdef{identifier}\br - identifier-start\br - identifier identifier-continue +\nontermdef{character-literal}\br + \opt{encoding-prefix} \terminal{'} c-char-sequence \terminal{'} \end{bnf} \begin{bnf} -\nontermdef{identifier-start}\br - nondigit\br - \textnormal{an element of the translation character set with the Unicode property XID_Start} +\nontermdef{encoding-prefix} \textnormal{one of}\br + \terminal{u8}\quad\terminal{u}\quad\terminal{U}\quad\terminal{L} \end{bnf} \begin{bnf} -\nontermdef{identifier-continue}\br - digit\br - nondigit\br - \textnormal{an element of the translation character set with the Unicode property XID_Continue} +\nontermdef{c-char-sequence}\br + c-char\br + c-char-sequence c-char \end{bnf} \begin{bnf} -\nontermdef{nondigit} \textnormal{one of}\br - \terminal{a b c d e f g h i j k l m}\br - \terminal{n o p q r s t u v w x y z}\br - \terminal{A B C D E F G H I J K L M}\br - \terminal{N O P Q R S T U V W X Y Z _} +\nontermdef{c-char}\br + basic-c-char\br + escape-sequence\br + universal-character-name \end{bnf} \begin{bnf} -\nontermdef{digit} \textnormal{one of}\br - \terminal{0 1 2 3 4 5 6 7 8 9} +\nontermdef{basic-c-char}\br + \textnormal{any member of the translation character set except the \unicode{0027}{apostrophe},}\br + \bnfindent\textnormal{\unicode{005c}{reverse solidus}, or new-line character} \end{bnf} -\pnum -\indextext{name!length of}% -\indextext{name}% -\begin{note} -The character properties XID_Start and XID_Continue are Derived Core Properties -as described by \UAX{44} of the Unicode Standard. -\begin{footnote} -On systems in which linkers cannot accept extended -characters, an encoding of the \grammarterm{universal-character-name} can be used in -forming valid external identifiers. For example, some otherwise unused -character or sequence of characters can be used to encode the -\tcode{\textbackslash u} in a \grammarterm{universal-character-name}. Extended -characters can produce a long external identifier, but \Cpp{} does not -place a translation limit on significant characters for external -identifiers. -\end{footnote} -\end{note} -The program is ill-formed -if an \grammarterm{identifier} does not conform to -Normalization Form C as specified in the Unicode Standard. -\begin{note} -Identifiers are case-sensitive. -\end{note} -\begin{note} -\ref{uaxid} compares the requirements of \UAX{31} of the Unicode Standard -with the \Cpp{} rules for identifiers. -\end{note} -\begin{note} -In translation phase 4, -\grammarterm{identifier} also includes -those \grammarterm{preprocessing-token}s\iref{lex.pptoken} -differentiated as keywords\iref{lex.key} -in the later translation phase 7\iref{lex.token}. -\end{note} +\begin{bnf} +\nontermdef{escape-sequence}\br + simple-escape-sequence\br + numeric-escape-sequence\br + conditional-escape-sequence +\end{bnf} -\pnum -\indextext{\idxcode{import}}% -\indextext{\idxcode{final}}% -\indextext{\idxcode{module}}% -\indextext{\idxcode{override}}% -The identifiers in \tref{lex.name.special} have a special meaning when -appearing in a certain context. When referred to in the grammar, these identifiers -are used explicitly rather than using the \grammarterm{identifier} grammar production. -Unless otherwise specified, any ambiguity as to whether a given -\grammarterm{identifier} has a special meaning is resolved to interpret the -token as a regular \grammarterm{identifier}. +\begin{bnf} +\nontermdef{simple-escape-sequence}\br + \terminal{\textbackslash} simple-escape-sequence-char +\end{bnf} -\begin{multicolfloattable}{Identifiers with special meaning}{lex.name.special} -{llll} -\keyword{final} \\ -\columnbreak -\keyword{import} \\ -\columnbreak -\keyword{module} \\ -\columnbreak -\keyword{override} \\ -\end{multicolfloattable} +\begin{bnf} +\nontermdef{simple-escape-sequence-char} \textnormal{one of}\br + \terminal{' " ? \textbackslash{} a b f n r t v} +\end{bnf} -\pnum -\indextext{\idxcode{_}|see{character, underscore}}% -\indextext{character!underscore!in identifier}% -\indextext{reserved identifier}% -In addition, some identifiers -appearing as a \grammarterm{token} or \grammarterm{preprocessing-token} -are reserved for use by \Cpp{} -implementations and shall -not be used otherwise; no diagnostic is required. -\begin{itemize} -\item -Each identifier that contains a double underscore -\tcode{\unun} -\indextext{character!underscore}% -or begins with an underscore followed by -an uppercase letter -\indextext{uppercase}% -is reserved to the implementation for any use. -\item -Each identifier that begins with an underscore is -\indextext{character!underscore}% -reserved to the implementation for use as a name in the global namespace.% -\indextext{namespace!global} -\end{itemize}% -\indextext{identifier|)} +\begin{bnf} +\nontermdef{numeric-escape-sequence}\br + octal-escape-sequence\br + hexadecimal-escape-sequence +\end{bnf} -\rSec1[lex.key]{Keywords} +\begin{bnf} +\nontermdef{simple-octal-digit-sequence}\br + octal-digit\br + simple-octal-digit-sequence octal-digit +\end{bnf} \begin{bnf} -\nontermdef{keyword}\br - \textnormal{any identifier listed in \tref{lex.key}}\br - \grammarterm{import-keyword}\br - \grammarterm{module-keyword}\br - \grammarterm{export-keyword} +\nontermdef{octal-escape-sequence}\br + \terminal{\textbackslash} octal-digit\br + \terminal{\textbackslash} octal-digit octal-digit\br + \terminal{\textbackslash} octal-digit octal-digit octal-digit\br + \terminal{\textbackslash o\{} simple-octal-digit-sequence \terminal{\}}\br +\end{bnf} + +\begin{bnf} +\nontermdef{hexadecimal-escape-sequence}\br + \terminal{\textbackslash x} simple-hexadecimal-digit-sequence\br + \terminal{\textbackslash x\{} simple-hexadecimal-digit-sequence \terminal{\}} +\end{bnf} + +\begin{bnf} +\nontermdef{conditional-escape-sequence}\br + \terminal{\textbackslash} conditional-escape-sequence-char +\end{bnf} + +\begin{bnf} +\nontermdef{conditional-escape-sequence-char}\br + \textnormal{any member of the basic character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{N}, \terminal{o}, \terminal{u}, \terminal{U}, or \terminal{x}} \end{bnf} \pnum -\indextext{keyword|(}% -The identifiers shown in \tref{lex.key} are reserved for use -as keywords (that is, they are unconditionally treated as keywords in -phase 7) except in an \grammarterm{attribute-token}\iref{dcl.attr.grammar}. -\begin{note} -The \keyword{register} keyword is unused but -is reserved for future use. -\end{note} - -\begin{multicolfloattable}{Keywords}{lex.key} -{lllll} -\keyword{alignas} \\ -\keyword{alignof} \\ -\keyword{asm} \\ -\keyword{auto} \\ -\keyword{bool} \\ -\keyword{break} \\ -\keyword{case} \\ -\keyword{catch} \\ -\keyword{char} \\ -\keyword{char8_t} \\ -\keyword{char16_t} \\ -\keyword{char32_t} \\ -\keyword{class} \\ -\keyword{concept} \\ -\keyword{const} \\ -\keyword{consteval} \\ -\keyword{constexpr} \\ -\columnbreak -\keyword{constinit} \\ -\keyword{const_cast} \\ -\keyword{continue} \\ -\keyword{co_await} \\ -\keyword{co_return} \\ -\keyword{co_yield} \\ -\keyword{decltype} \\ -\keyword{default} \\ -\keyword{delete} \\ -\keyword{do} \\ -\keyword{double} \\ -\keyword{dynamic_cast} \\ -\keyword{else} \\ -\keyword{enum} \\ -\keyword{explicit} \\ -\keyword{export} \\ -\keyword{extern} \\ -\columnbreak -\keyword{false} \\ -\keyword{float} \\ -\keyword{for} \\ -\keyword{friend} \\ -\keyword{goto} \\ -\keyword{if} \\ -\keyword{inline} \\ -\keyword{int} \\ -\keyword{long} \\ -\keyword{mutable} \\ -\keyword{namespace} \\ -\keyword{new} \\ -\keyword{noexcept} \\ -\keyword{nullptr} \\ -\keyword{operator} \\ -\keyword{private} \\ -\keyword{protected} \\ -\columnbreak -\keyword{public} \\ -\keyword{register} \\ -\keyword{reinterpret_cast} \\ -\keyword{requires} \\ -\keyword{return} \\ -\keyword{short} \\ -\keyword{signed} \\ -\keyword{sizeof} \\ -\keyword{static} \\ -\keyword{static_assert} \\ -\keyword{static_cast} \\ -\keyword{struct} \\ -\keyword{switch} \\ -\keyword{template} \\ -\keyword{this} \\ -\keyword{thread_local} \\ -\keyword{throw} \\ -\columnbreak -\keyword{true} \\ -\keyword{try} \\ -\keyword{typedef} \\ -\keyword{typeid} \\ -\keyword{typename} \\ -\keyword{union} \\ -\keyword{unsigned} \\ -\keyword{using} \\ -\keyword{virtual} \\ -\keyword{void} \\ -\keyword{volatile} \\ -\keyword{wchar_t} \\ -\keyword{while} \\ -\end{multicolfloattable} +\indextext{literal!character}% +\indextext{literal!\idxcode{char8_t}}% +\indextext{literal!\idxcode{char16_t}}% +\indextext{literal!\idxcode{char32_t}}% +\indextext{literal!type of character}% +\indextext{type!\idxcode{char8_t}}% +\indextext{type!\idxcode{char16_t}}% +\indextext{type!\idxcode{char32_t}}% +\indextext{wide-character}% +\indextext{type!\idxcode{wchar_t}}% +A \defnadj{multicharacter}{literal} is a \grammarterm{character-literal} +whose \grammarterm{c-char-sequence} consists of +more than one \grammarterm{c-char}. +A multicharacter literal shall not have an \grammarterm{encoding-prefix}. +If a multicharacter literal contains a \grammarterm{c-char} +that is not encodable as a single code unit in the ordinary literal encoding, +the program is ill-formed. +Multicharacter literals are conditionally-supported. \pnum -Furthermore, the alternative representations shown in -\tref{lex.key.digraph} for certain operators and -punctuators\iref{lex.digraph} are reserved and shall not be used -otherwise. +The kind of a \grammarterm{character-literal}, +its type, and its associated character encoding\iref{lex.charset} +are determined by +its \grammarterm{encoding-prefix} and its \grammarterm{c-char-sequence} +as defined by \tref{lex.ccon.literal}. -\begin{floattable}{Alternative representations}{lex.key.digraph} -{llllll} +\begin{floattable}{Character literals}{lex.ccon.literal} +{l|l|l|l|l} \topline -\keyword{and} & \keyword{and_eq} & \keyword{bitand} & \keyword{bitor} & \keyword{compl} & \keyword{not} \\ -\keyword{not_eq} & \keyword{or} & \keyword{or_eq} & \keyword{xor} & \keyword{xor_eq} & \\ -\end{floattable}% -\indextext{keyword|)}% - +\lhdr{Encoding} & \chdr{Kind} & \chdr{Type} & \chdr{Associated char-} & \rhdr{Example} \\ +\lhdr{prefix} & \chdr{} & \chdr{} & \chdr{acter encoding} & \\ +\capsep +none & +\defnx{ordinary character literal}{literal!character!ordinary} & +\keyword{char} & +ordinary literal & +\tcode{'v'} \\ \cline{2-3}\cline{5-5} + & +multicharacter literal & +\keyword{int} & +encoding & +\tcode{'abcd'} \\ \hline +\tcode{L} & +\defnx{wide character literal}{literal!character!wide} & +\keyword{wchar_t} & +wide literal & +\tcode{L'w'} \\ + & & & encoding & \\ \hline +\tcode{u8} & +\defnx{UTF-8 character literal}{literal!character!UTF-8} & +\keyword{char8_t} & +UTF-8 & +\tcode{u8'x'} \\ \hline +\tcode{u} & +\defnx{UTF-16 character literal}{literal!character!UTF-16} & +\keyword{char16_t} & +UTF-16 & +\tcode{u'y'} \\ \hline +\tcode{U} & +\defnx{UTF-32 character literal}{literal!character!UTF-32} & +\keyword{char32_t} & +UTF-32 & +\tcode{U'z'} \\ +\end{floattable} -\rSec1[lex.operators]{Operators and punctuators} +\pnum +In translation phase 4, +the value of a \grammarterm{character-literal} is determined +using the range of representable values +of the \grammarterm{character-literal}'s type in translation phase 7. +A multicharacter literal has an +\impldef{value of non-encodable character literal or multicharacter literal} +value. +The value of any other kind of \grammarterm{character-literal} +is determined as follows: +\begin{itemize} +\item +A \grammarterm{character-literal} with +a \grammarterm{c-char-sequence} consisting of a single +\grammarterm{basic-c-char}, +\grammarterm{simple-escape-sequence}, or +\grammarterm{universal-character-name} +is the code unit value of the specified character +as encoded in the literal's associated character encoding. +If the specified character lacks +representation in the literal's associated character encoding or +if it cannot be encoded as a single code unit, +then the program is ill-formed. +\item +A \grammarterm{character-literal} with +a \grammarterm{c-char-sequence} consisting of +a single \grammarterm{numeric-escape-sequence} +has a value as follows: +\begin{itemize} +\item +Let $v$ be the integer value represented by +the octal number comprising +the sequence of \grammarterm{octal-digit}{s} in +an \grammarterm{octal-escape-sequence} or by +the hexadecimal number comprising +the sequence of \grammarterm{hexadecimal-digit}{s} in +a \grammarterm{hexadecimal-escape-sequence}. +\item +If $v$ does not exceed +the range of representable values of the \grammarterm{character-literal}'s type, +then the value is $v$. +\item +Otherwise, +if the \grammarterm{character-literal}'s \grammarterm{encoding-prefix} +is absent or \tcode{L}, and +$v$ does not exceed the range of representable values of the corresponding unsigned type for the underlying type of the \grammarterm{character-literal}'s type, +then the value is the unique value of the \grammarterm{character-literal}'s type \tcode{T} that is congruent to $v$ modulo $2^N$, where $N$ is the width of \tcode{T}. +\item +Otherwise, the program is ill-formed. +\end{itemize} +\item +A \grammarterm{character-literal} with +a \grammarterm{c-char-sequence} consisting of +a single \grammarterm{conditional-escape-sequence} +is conditionally-supported and +has an \impldef{value of \grammarterm{conditional-escape-sequence}} value. +\end{itemize} \pnum -\indextext{operator|(}% -\indextext{punctuator|(}% -The lexical representation of \Cpp{} programs includes a number of -preprocessing tokens that are used in the syntax of the preprocessor or -are converted into tokens for operators and punctuators: +\indextext{backslash character}% +\indextext{\idxcode{\textbackslash}|see{backslash character}}% +\indextext{escape character|see{backslash character}}% +The character specified by a \grammarterm{simple-escape-sequence} +is specified in \tref{lex.ccon.esc}. +\begin{note} +Using an escape sequence for a question mark +is supported for compatibility with \CppXIV{} and C. +\end{note} + +\begin{floattable}{Simple escape sequences}{lex.ccon.esc} +{lll} +\topline +\lhdrx{2}{character} & \rhdr{\grammarterm{simple-escape-sequence}} \\ \capsep +\ucode{000a} & \uname{line feed} & \tcode{\textbackslash n} \\ +\ucode{0009} & \uname{character tabulation} & \tcode{\textbackslash t} \\ +\ucode{000b} & \uname{line tabulation} & \tcode{\textbackslash v} \\ +\ucode{0008} & \uname{backspace} & \tcode{\textbackslash b} \\ +\ucode{000d} & \uname{carriage return} & \tcode{\textbackslash r} \\ +\ucode{000c} & \uname{form feed} & \tcode{\textbackslash f} \\ +\ucode{0007} & \uname{alert} & \tcode{\textbackslash a} \\ +\ucode{005c} & \uname{reverse solidus} & \tcode{\textbackslash\textbackslash} \\ +\ucode{003f} & \uname{question mark} & \tcode{\textbackslash ?} \\ +\ucode{0027} & \uname{apostrophe} & \tcode{\textbackslash '} \\ +\ucode{0022} & \uname{quotation mark} & \tcode{\textbackslash "} \\ +\end{floattable} +\rSec3[lex.string]{String literals} + +\indextext{literal!string}% \begin{bnf} -\nontermdef{preprocessing-op-or-punc}\br - preprocessing-operator\br - operator-or-punctuator +\nontermdef{string-literal}\br + \opt{encoding-prefix} \terminal{"} \opt{s-char-sequence} \terminal{"}\br + \opt{encoding-prefix} \terminal{R} raw-string \end{bnf} \begin{bnf} -%% Ed. note: character protrusion would misalign various operators. -\microtypesetup{protrusion=false}\obeyspaces -\nontermdef{preprocessing-operator} \textnormal{one of}\br - \terminal{\# \#\# \%: \%:\%:} +\nontermdef{s-char-sequence}\br + s-char\br + s-char-sequence s-char \end{bnf} \begin{bnf} -\microtypesetup{protrusion=false}\obeyspaces -\nontermdef{operator-or-punctuator} \textnormal{one of}\br - \terminal{\{ \} [ ] ( )}\br - \terminal{<: :> <\% \%> ; : ...}\br - \terminal{? :: . .* -> ->* \~}\br - \terminal{! + - * / \% \caret{} \& |}\br - \terminal{= += -= *= /= \%= \caret{}= \&= |=}\br - \terminal{== != < > <= >= <=> \&\& ||}\br - \terminal{<< >> <<= >>= ++ -- ,}\br - \terminal{\keyword{and} \keyword{or} \keyword{xor} \keyword{not} \keyword{bitand} \keyword{bitor} \keyword{compl}}\br - \terminal{\keyword{and_eq} \keyword{or_eq} \keyword{xor_eq} \keyword{not_eq}} +\nontermdef{s-char}\br + basic-s-char\br + escape-sequence\br + universal-character-name \end{bnf} -Each \grammarterm{operator-or-punctuator} is converted to a single token -in translation phase 7\iref{lex.phases}.% -\indextext{punctuator|)}% -\indextext{operator|)} - -\rSec1[lex.literal]{Literals}% -\indextext{literal|(} - -\rSec2[lex.literal.kinds]{Kinds of literals} - -\pnum -\indextext{constant}% -\indextext{literal!constant}% -There are several kinds of literals. -\begin{footnote} -The term ``literal'' generally designates, in this -document, those tokens that are called ``constants'' in C. -\end{footnote} - -\begin{bnf} -\nontermdef{literal}\br - integer-literal\br - character-literal\br - floating-point-literal\br - string-literal\br - boolean-literal\br - pointer-literal\br - user-defined-literal -\end{bnf} -\begin{note} -When appearing as an \grammarterm{expression}, -a literal has a type and a value category\iref{expr.prim.literal}. -\end{note} - -\rSec2[lex.icon]{Integer literals} - -\indextext{literal!integer}% -\begin{bnf} -\nontermdef{integer-literal}\br - binary-literal \opt{integer-suffix}\br - octal-literal \opt{integer-suffix}\br - decimal-literal \opt{integer-suffix}\br - hexadecimal-literal \opt{integer-suffix} -\end{bnf} - -\begin{bnf} -\nontermdef{binary-literal}\br - \terminal{0b} binary-digit\br - \terminal{0B} binary-digit\br - binary-literal \opt{\terminal{'}} binary-digit -\end{bnf} - -\begin{bnf} -\nontermdef{octal-literal}\br - \terminal{0}\br - octal-literal \opt{\terminal{'}} octal-digit -\end{bnf} - -\begin{bnf} -\nontermdef{decimal-literal}\br - nonzero-digit\br - decimal-literal \opt{\terminal{'}} digit -\end{bnf} - -\begin{bnf} -\nontermdef{hexadecimal-literal}\br - hexadecimal-prefix hexadecimal-digit-sequence -\end{bnf} - -\begin{bnf} -\nontermdef{binary-digit} \textnormal{one of}\br - \terminal{0 1} -\end{bnf} - -\begin{bnf} -\nontermdef{octal-digit} \textnormal{one of}\br - \terminal{0 1 2 3 4 5 6 7} -\end{bnf} - -\begin{bnf} -\nontermdef{nonzero-digit} \textnormal{one of}\br - \terminal{1 2 3 4 5 6 7 8 9} -\end{bnf} +\begin{bnf} +\nontermdef{basic-s-char}\br + \textnormal{any member of the translation character set except the \unicode{0022}{quotation mark},}\br + \bnfindent\textnormal{\unicode{005c}{reverse solidus}, or new-line character} +\end{bnf} \begin{bnf} -\nontermdef{hexadecimal-prefix} \textnormal{one of}\br - \terminal{0x 0X} +\nontermdef{raw-string}\br + \terminal{"} \opt{d-char-sequence} \terminal{(} \opt{r-char-sequence} \terminal{)} \opt{d-char-sequence} \terminal{"} \end{bnf} \begin{bnf} -\nontermdef{hexadecimal-digit-sequence}\br - hexadecimal-digit\br - hexadecimal-digit-sequence \opt{\terminal{'}} hexadecimal-digit +\nontermdef{r-char-sequence}\br + r-char\br + r-char-sequence r-char \end{bnf} \begin{bnf} -\nontermdef{hexadecimal-digit} \textnormal{one of}\br - \terminal{0 1 2 3 4 5 6 7 8 9}\br - \terminal{a b c d e f}\br - \terminal{A B C D E F} +\nontermdef{r-char}\br + \textnormal{any member of the translation character set, except a \unicode{0029}{right parenthesis} followed by}\br + \bnfindent\textnormal{the initial \grammarterm{d-char-sequence} (which may be empty) followed by a \unicode{0022}{quotation mark}} \end{bnf} \begin{bnf} -\nontermdef{integer-suffix}\br - unsigned-suffix \opt{long-suffix} \br - unsigned-suffix \opt{long-long-suffix} \br - unsigned-suffix \opt{size-suffix} \br - long-suffix \opt{unsigned-suffix} \br - long-long-suffix \opt{unsigned-suffix} \br - size-suffix \opt{unsigned-suffix} +\nontermdef{d-char-sequence}\br + d-char\br + d-char-sequence d-char \end{bnf} \begin{bnf} -\nontermdef{unsigned-suffix} \textnormal{one of}\br - \terminal{u U} +\nontermdef{d-char}\br + \textnormal{any member of the basic character set except:}\br + \bnfindent\textnormal{\unicode{0020}{space}, \unicode{0028}{left parenthesis}, \unicode{0029}{right parenthesis}, \unicode{005c}{reverse solidus},}\br + \bnfindent\textnormal{\unicode{0009}{character tabulation}, \unicode{000b}{line tabulation}, \unicode{000c}{form feed}, and new-line} \end{bnf} -\begin{bnf} -\nontermdef{long-suffix} \textnormal{one of}\br - \terminal{l L} -\end{bnf} +\pnum +\indextext{literal!string}% +\indextext{character string}% +\indextext{string!type of}% +\indextext{type!\idxcode{wchar_t}}% +\indextext{prefix!\idxcode{L}}% +\indextext{literal!string!\idxcode{char16_t}}% +\indextext{type!\idxcode{char16_t}}% +\indextext{literal!string!\idxcode{char32_t}}% +\indextext{type!\idxcode{char32_t}}% +The kind of a \grammarterm{string-literal}, +its type, and +its associated character encoding\iref{lex.charset} +are determined by its encoding prefix and sequence of +\grammarterm{s-char}s or \grammarterm{r-char}s +as defined by \tref{lex.string.literal} +where $n$ is the number of encoded code units as described below. -\begin{bnf} -\nontermdef{long-long-suffix} \textnormal{one of}\br - \terminal{ll LL} -\end{bnf} +\begin{floattable}{String literals}{lex.string.literal} +{llp{2.6cm}p{2.3cm}p{4.7cm}} +\topline +\lhdr{Enco-} & \chdr{Kind} & \chdr{Type} & \chdr{Associated} & \rhdr{Examples} \\ +\lhdr{ding} & \chdr{} & \chdr{} & \chdr{character} & \rhdr{} \\ +\lhdr{prefix} & \chdr{} & \chdr{} & \chdr{encoding} & \rhdr{} \\ +\capsep +none & +\defnx{ordinary string literal}{literal!string!ordinary} & +array of $n$\newline \tcode{\keyword{const} \keyword{char}} & +ordinary literal encoding & +\tcode{"ordinary string"}\newline +\tcode{R"(ordinary raw string)"} \\ +\tcode{L} & +\defnx{wide string literal}{literal!string!wide} & +array of $n$\newline \tcode{\keyword{const} \keyword{wchar_t}} & +wide literal\newline encoding & +\tcode{L"wide string"}\newline +\tcode{LR"w(wide raw string)w"} \\ +\tcode{u8} & +\defnx{UTF-8 string literal}{literal!string!UTF-8} & +array of $n$\newline \tcode{\keyword{const} \keyword{char8_t}} & +UTF-8 & +\tcode{u8"UTF-8 string"}\newline +\tcode{u8R"x(UTF-8 raw string)x"} \\ +\tcode{u} & +\defnx{UTF-16 string literal}{literal!string!UTF-16} & +array of $n$\newline \tcode{\keyword{const} \keyword{char16_t}} & +UTF-16 & +\tcode{u"UTF-16 string"}\newline +\tcode{uR"y(UTF-16 raw string)y"} \\ +\tcode{U} & +\defnx{UTF-32 string literal}{literal!string!UTF-32} & +array of $n$\newline \tcode{\keyword{const} \keyword{char32_t}} & +UTF-32 & +\tcode{U"UTF-32 string"}\newline +\tcode{UR"z(UTF-32 raw string)z"} \\ +\end{floattable} -\begin{bnf} -\nontermdef{size-suffix} \textnormal{one of}\br - \terminal{z Z} -\end{bnf} +\pnum +\indextext{literal!string!raw}% +A \grammarterm{string-literal} that has an \tcode{R} +\indextext{prefix!\idxcode{R}}% +in the prefix is a \defn{raw string literal}. The +\grammarterm{d-char-sequence} serves as a delimiter. The terminating +\grammarterm{d-char-sequence} of a \grammarterm{raw-string} is the same sequence of +characters as the initial \grammarterm{d-char-sequence}. A \grammarterm{d-char-sequence} +shall consist of at most 16 characters. \pnum -\indextext{literal!\idxcode{unsigned}}% -\indextext{literal!\idxcode{long}}% -\indextext{literal!base of integer}% -In an \grammarterm{integer-literal}, -the sequence of -\grammarterm{binary-digit}s, -\grammarterm{octal-digit}s, -\grammarterm{digit}s, or -\grammarterm{hexadecimal-digit}s -is interpreted as a base $N$ integer as shown in table \tref{lex.icon.base}; -the lexically first digit of the sequence of digits is the most significant. \begin{note} -The prefix and any optional separating single quotes are ignored -when determining the value. +The characters \tcode{'('} and \tcode{')'} can appear in a +\grammarterm{raw-string}. Thus, \tcode{R"delimiter((a|b))delimiter"} is equivalent to +\tcode{"(a|b)"}. \end{note} -\begin{simpletypetable} -{Base of \grammarterm{integer-literal}{s}} -{lex.icon.base} -{lr} -\topline -\lhdr{Kind of \grammarterm{integer-literal}} & \rhdr{base $N$} \\ \capsep -\grammarterm{binary-literal} & 2 \\ -\grammarterm{octal-literal} & 8 \\ -\grammarterm{decimal-literal} & 10 \\ -\grammarterm{hexadecimal-literal} & 16 \\ -\end{simpletypetable} +\pnum +\begin{note} +A source-file new-line in a raw string literal results in a new-line in the +resulting execution string literal. Assuming no +whitespace at the beginning of lines in the following example, the assert will succeed: +\begin{codeblock} +const char* p = R"(a\ +b +c)"; +assert(std::strcmp(p, "a\\\nb\nc") == 0); +\end{codeblock} +\end{note} \pnum -The \grammarterm{hexadecimal-digit}s -\tcode{a} through \tcode{f} and \tcode{A} through \tcode{F} -have decimal values ten through fifteen. \begin{example} -The number twelve can be written \tcode{12}, \tcode{014}, -\tcode{0XC}, or \tcode{0b1100}. The \grammarterm{integer-literal}s \tcode{1048576}, -\tcode{1'048'576}, \tcode{0X100000}, \tcode{0x10'0000}, and -\tcode{0'004'000'000} all have the same value. +The raw string +\begin{codeblock} +R"a( +)\ +a" +)a" +\end{codeblock} +is equivalent to \tcode{"\textbackslash n)\textbackslash \textbackslash \textbackslash na\textbackslash"\textbackslash n"}. The raw string +\begin{codeblock} +R"(x = "\"y\"")" +\end{codeblock} +is equivalent to \tcode{"x = \textbackslash "\textbackslash\textbackslash\textbackslash "y\textbackslash\textbackslash\textbackslash "\textbackslash ""}. \end{example} \pnum -\indextext{literal!\idxcode{long}}% -\indextext{literal!\idxcode{unsigned}}% -\indextext{literal!integer}% -\indextext{literal!type of integer}% -\indextext{suffix!\idxcode{L}}% -\indextext{suffix!\idxcode{U}}% -\indextext{suffix!\idxcode{l}}% -\indextext{suffix!\idxcode{u}}% -The type of an \grammarterm{integer-literal} is -the first type in the list in \tref{lex.icon.type} -corresponding to its optional \grammarterm{integer-suffix} -in which its value can be represented. +\indextext{literal!narrow-character}% +Ordinary string literals and UTF-8 string literals are +also referred to as \defnx{narrow string literals}{literal!string!narrow}. -\begin{floattable}{Types of \grammarterm{integer-literal}s}{lex.icon.type}{l|l|l} -\topline +\pnum +\indextext{concatenation!string}% +The common \grammarterm{encoding-prefix} +for a sequence of adjacent \grammarterm{string-literal}s +is determined pairwise as follows. +If two \grammarterm{string-literal}{s} have +the same \grammarterm{encoding-prefix}, +the common \grammarterm{encoding-prefix} is that \grammarterm{encoding-prefix}. +If one \grammarterm{string-literal} has no \grammarterm{encoding-prefix}, +the common \grammarterm{encoding-prefix} is that +of the other \grammarterm{string-literal}. +Any other combinations are ill-formed. +\begin{note} +A \grammarterm{string-literal}'s rawness has +no effect on the determination of the common \grammarterm{encoding-prefix}. +\end{note} + +\pnum +In translation phase 6\iref{lex.phase.6}, +adjacent \grammarterm{string-literal}s are concatenated. +The lexical structure and grouping of +the contents of the individual \grammarterm{string-literal}s is retained. +\begin{example} +\begin{codeblock} +"\xA" "B" +\end{codeblock} +represents +the code unit \tcode{'\textbackslash xA'} and the character \tcode{'B'} +after concatenation +(and not the single code unit \tcode{'\textbackslash xAB'}). +Similarly, +\begin{codeblock} +R"(\u00)" "41" +\end{codeblock} +represents six characters, +starting with a backslash and ending with the digit \tcode{1} +(and not the single character \tcode{'A'} +specified by a \grammarterm{universal-character-name}). + +\tref{lex.string.concat} has some examples of valid concatenations. +\end{example} + +\begin{floattable}{String literal concatenations}{lex.string.concat} +{lll|lll|lll} +\topline +\multicolumn{2}{|c}{Source} & +Means & +\multicolumn{2}{c}{Source} & +Means & +\multicolumn{2}{c}{Source} & +Means \\ +\tcode{u"a"} & \tcode{u"b"} & \tcode{u"ab"} & +\tcode{U"a"} & \tcode{U"b"} & \tcode{U"ab"} & +\tcode{L"a"} & \tcode{L"b"} & \tcode{L"ab"} \\ +\tcode{u"a"} & \tcode{"b"} & \tcode{u"ab"} & +\tcode{U"a"} & \tcode{"b"} & \tcode{U"ab"} & +\tcode{L"a"} & \tcode{"b"} & \tcode{L"ab"} \\ +\tcode{"a"} & \tcode{u"b"} & \tcode{u"ab"} & +\tcode{"a"} & \tcode{U"b"} & \tcode{U"ab"} & +\tcode{"a"} & \tcode{L"b"} & \tcode{L"ab"} \\ +\end{floattable} + +\pnum +Evaluating a \grammarterm{string-literal} results in a string literal object +with static storage duration\iref{basic.stc}. +\begin{note} +String literal objects are potentially non-unique\iref{intro.object}. +Whether successive evaluations of a +\grammarterm{string-literal} yield the same or a different object is +unspecified. +\end{note} +\begin{note} +\indextext{literal!string!undefined change to}% +The effect of attempting to modify a string literal object is undefined. +\end{note} + +\pnum +\indextext{\idxcode{0}!string terminator}% +\indextext{\idxcode{0}!null character|see {character, null}}% +String literal objects are initialized with +the sequence of code unit values +corresponding to the \grammarterm{string-literal}'s sequence of +\grammarterm{s-char}s (originally from non-raw string literals) and +\grammarterm{r-char}s (originally from raw string literals), +plus a terminating \unicode{0000}{null} character, +in order as follows: +\begin{itemize} +\item +The sequence of characters denoted by each contiguous sequence of +\grammarterm{basic-s-char}s, +\grammarterm{r-char}s, +\grammarterm{simple-escape-sequence}s\iref{lex.ccon}, and +\grammarterm{universal-character-name}s\iref{lex.charset} +is encoded to a code unit sequence +using the \grammarterm{string-literal}'s associated character encoding. +If a character lacks representation in the associated character encoding, +then the program is ill-formed. +\begin{note} +No character lacks representation in any Unicode encoding form. +\end{note} +When encoding a stateful character encoding, +implementations should encode the first such sequence +beginning with the initial encoding state and +encode subsequent sequences +beginning with the final encoding state of the prior sequence. +\begin{note} +The encoded code unit sequence can differ from +the sequence of code units that would be obtained by +encoding each character independently. +\end{note} +\item +Each \grammarterm{numeric-escape-sequence}\iref{lex.ccon} +contributes a single code unit with a value as follows: +\begin{itemize} +\item +Let $v$ be the integer value represented by +the octal number comprising +the sequence of \grammarterm{octal-digit}{s} in +an \grammarterm{octal-escape-sequence} or by +the hexadecimal number comprising +the sequence of \grammarterm{hexadecimal-digit}{s} in +a \grammarterm{hexadecimal-escape-sequence}. +\item +If $v$ does not exceed the range of representable values of +the \grammarterm{string-literal}'s array element type, +then the value is $v$. +\item +Otherwise, +if the \grammarterm{string-literal}'s \grammarterm{encoding-prefix} +is absent or \tcode{L}, and +$v$ does not exceed the range of representable values of +the corresponding unsigned type for the underlying type of +the \grammarterm{string-literal}'s array element type, +then the value is the unique value of +the \grammarterm{string-literal}'s array element type \tcode{T} +that is congruent to $v$ modulo $2^N$, where $N$ is the width of \tcode{T}. +\item +Otherwise, the program is ill-formed. +\end{itemize} +When encoding a stateful character encoding, +these sequences should have no effect on encoding state. +\item +Each \grammarterm{conditional-escape-sequence}\iref{lex.ccon} +contributes an +\impldef{code unit sequence for \grammarterm{conditional-escape-sequence}} +code unit sequence. +When encoding a stateful character encoding, +it is +\impldef{effect of \grammarterm{conditional-escape-sequence} on encoding state} +what effect these sequences have on encoding state. +\end{itemize} + +\rSec2[lex.operators]{Operators and punctuators} + +\pnum +\indextext{operator|(}% +\indextext{punctuator|(}% +The lexical representation of \Cpp{} programs includes a number of +preprocessing tokens that are used in the syntax of the preprocessor or +are converted into tokens for operators and punctuators: + +\begin{bnf} +\nontermdef{preprocessing-op-or-punc}\br + preprocessing-operator\br + operator-or-punctuator +\end{bnf} + +\begin{bnf} +%% Ed. note: character protrusion would misalign various operators. +\microtypesetup{protrusion=false}\obeyspaces +\nontermdef{preprocessing-operator} \textnormal{one of}\br + \terminal{\# \#\# \%: \%:\%:} +\end{bnf} + +\begin{bnf} +\microtypesetup{protrusion=false}\obeyspaces +\nontermdef{operator-or-punctuator} \textnormal{one of}\br + \terminal{\{ \} [ ] ( )}\br + \terminal{<: :> <\% \%> ; : ...}\br + \terminal{? :: . .* -> ->* \~}\br + \terminal{! + - * / \% \caret{} \& |}\br + \terminal{= += -= *= /= \%= \caret{}= \&= |=}\br + \terminal{== != < > <= >= <=> \&\& ||}\br + \terminal{<< >> <<= >>= ++ -- ,}\br + \terminal{\keyword{and} \keyword{or} \keyword{xor} \keyword{not} \keyword{bitand} \keyword{bitor} \keyword{compl}}\br + \terminal{\keyword{and_eq} \keyword{or_eq} \keyword{xor_eq} \keyword{not_eq}} +\end{bnf} + +Each \grammarterm{operator-or-punctuator} is converted to a single token +in translation phase 7\iref{lex.phase.7}.% +\indextext{punctuator|)}% +\indextext{operator|)} + +\rSec2[lex.name]{Identifiers} + +\indextext{identifier|(}% +\begin{bnf} +\nontermdef{identifier}\br + identifier-start\br + identifier identifier-continue +\end{bnf} + +\begin{bnf} +\nontermdef{identifier-start}\br + nondigit\br + \textnormal{an element of the translation character set with the Unicode property XID_Start} +\end{bnf} + +\begin{bnf} +\nontermdef{identifier-continue}\br + digit\br + nondigit\br + \textnormal{an element of the translation character set with the Unicode property XID_Continue} +\end{bnf} + +\begin{bnf} +\nontermdef{nondigit} \textnormal{one of}\br + \terminal{a b c d e f g h i j k l m}\br + \terminal{n o p q r s t u v w x y z}\br + \terminal{A B C D E F G H I J K L M}\br + \terminal{N O P Q R S T U V W X Y Z _} +\end{bnf} + +\begin{bnf} +\nontermdef{digit} \textnormal{one of}\br + \terminal{0 1 2 3 4 5 6 7 8 9} +\end{bnf} + +\pnum +\indextext{name!length of}% +\indextext{name}% +\begin{note} +The character properties XID_Start and XID_Continue are Derived Core Properties +as described by \UAX{44} of the Unicode Standard. +\begin{footnote} +On systems in which linkers cannot accept extended +characters, an encoding of the \grammarterm{universal-character-name} can be used in +forming valid external identifiers. For example, some otherwise unused +character or sequence of characters can be used to encode the +\tcode{\textbackslash u} in a \grammarterm{universal-character-name}. Extended +characters can produce a long external identifier, but \Cpp{} does not +place a translation limit on significant characters for external +identifiers. +\end{footnote} +\end{note} +The program is ill-formed +if an \grammarterm{identifier} does not conform to +Normalization Form C as specified in the Unicode Standard. +\begin{note} +Identifiers are case-sensitive. +\end{note} +\begin{note} +\ref{uaxid} compares the requirements of \UAX{31} of the Unicode Standard +with the \Cpp{} rules for identifiers. +\end{note} +\begin{note} +In translation phase 4, +\grammarterm{identifier} also includes +those \grammarterm{preprocessing-token}s\iref{lex.pptoken} +differentiated as keywords\iref{lex.key} +in the later translation phase 7\iref{lex.token}. +\end{note} + +\pnum +\indextext{\idxcode{import}}% +\indextext{\idxcode{final}}% +\indextext{\idxcode{module}}% +\indextext{\idxcode{override}}% +The identifiers in \tref{lex.name.special} have a special meaning when +appearing in a certain context. When referred to in the grammar, these identifiers +are used explicitly rather than using the \grammarterm{identifier} grammar production. +Unless otherwise specified, any ambiguity as to whether a given +\grammarterm{identifier} has a special meaning is resolved to interpret the +token as a regular \grammarterm{identifier}. + +\begin{multicolfloattable}{Identifiers with special meaning}{lex.name.special} +{llll} +\keyword{final} \\ +\columnbreak +\keyword{import} \\ +\columnbreak +\keyword{module} \\ +\columnbreak +\keyword{override} \\ +\end{multicolfloattable} + +\pnum +\indextext{\idxcode{_}|see{character, underscore}}% +\indextext{character!underscore!in identifier}% +\indextext{reserved identifier}% +In addition, some identifiers +appearing as a \grammarterm{token} or \grammarterm{preprocessing-token} +are reserved for use by \Cpp{} +implementations and shall +not be used otherwise; no diagnostic is required. +\begin{itemize} +\item +Each identifier that contains a double underscore +\tcode{\unun} +\indextext{character!underscore}% +or begins with an underscore followed by +an uppercase letter +\indextext{uppercase}% +is reserved to the implementation for any use. +\item +Each identifier that begins with an underscore is +\indextext{character!underscore}% +reserved to the implementation for use as a name in the global namespace.% +\indextext{namespace!global} +\end{itemize}% +\indextext{identifier|)} + +\rSec1[cpp]{Preprocessing directives}% +\indextext{preprocessing directive|(} + +\indextext{compiler control line|see{preprocessing directive}}% +\indextext{control line|see{preprocessing directive}}% +\indextext{directive, preprocessing|see{preprocessing directive}} + +\gramSec[gram.cpp]{Preprocessing directives} + +\rSec2[cpp.pre]{Preamble} + +\begin{bnf} +\nontermdef{preprocessing-file}\br + \opt{group}\br + module-file +\end{bnf} + +\begin{bnf} +\nontermdef{module-file}\br + \opt{pp-global-module-fragment} pp-module \opt{group} \opt{pp-private-module-fragment} +\end{bnf} + +\begin{bnf} +\nontermdef{pp-global-module-fragment}\br + \keyword{module} \terminal{;} new-line \opt{group} +\end{bnf} + +\begin{bnf} +\nontermdef{pp-private-module-fragment}\br + \keyword{module} \terminal{:} \keyword{private} \terminal{;} new-line \opt{group} +\end{bnf} + +\begin{bnf} +\nontermdef{group}\br + group-part\br + group group-part +\end{bnf} + +\begin{bnf} +\nontermdef{group-part}\br + control-line\br + if-section\br + text-line\br + \terminal{\#} conditionally-supported-directive +\end{bnf} + +\begin{bnf}\obeyspaces +\nontermdef{control-line}\br + \terminal{\# include} pp-tokens new-line\br + pp-import\br + \terminal{\# define } identifier replacement-list new-line\br + \terminal{\# define } identifier lparen \opt{identifier-list} \terminal{)} replacement-list new-line\br + \terminal{\# define } identifier lparen \terminal{... )} replacement-list new-line\br + \terminal{\# define } identifier lparen identifier-list \terminal{, ... )} replacement-list new-line\br + \terminal{\# undef } identifier new-line\br + \terminal{\# line } pp-tokens new-line\br + \terminal{\# error } \opt{pp-tokens} new-line\br + \terminal{\# warning} \opt{pp-tokens} new-line\br + \terminal{\# pragma } \opt{pp-tokens} new-line\br + \terminal{\# }new-line +\end{bnf} + +\begin{bnf} +\nontermdef{if-section}\br + if-group \opt{elif-groups} \opt{else-group} endif-line +\end{bnf} + +\begin{bnf}\obeyspaces +\nontermdef{if-group}\br + \terminal{\# if } constant-expression new-line \opt{group}\br + \terminal{\# ifdef } identifier new-line \opt{group}\br + \terminal{\# ifndef } identifier new-line \opt{group} +\end{bnf} + +\begin{bnf} +\nontermdef{elif-groups}\br + elif-group\br + elif-groups elif-group +\end{bnf} + +\begin{bnf}\obeyspaces +\nontermdef{elif-group}\br + \terminal{\# elif } constant-expression new-line \opt{group}\br + \terminal{\# elifdef } identifier new-line \opt{group}\br + \terminal{\# elifndef} identifier new-line \opt{group} +\end{bnf} + +\begin{bnf}\obeyspaces +\nontermdef{else-group}\br + \terminal{\# else } new-line \opt{group} +\end{bnf} + +\begin{bnf}\obeyspaces +\nontermdef{endif-line}\br + \terminal{\# endif } new-line +\end{bnf} + +\begin{bnf} +\nontermdef{text-line}\br + \opt{pp-tokens} new-line +\end{bnf} + +\begin{bnf} +\nontermdef{conditionally-supported-directive}\br + pp-tokens new-line +\end{bnf} + +\begin{bnf} +\nontermdef{lparen}\br + \descr{a \terminal{(} character not immediately preceded by whitespace} +\end{bnf} + +\begin{bnf} +\nontermdef{identifier-list}\br + identifier\br + identifier-list \terminal{,} identifier +\end{bnf} + +\begin{bnf} +\nontermdef{replacement-list}\br + \opt{pp-tokens} +\end{bnf} + +\begin{bnf} +\nontermdef{pp-tokens}\br + preprocessing-token\br + pp-tokens preprocessing-token +\end{bnf} + +\begin{bnf} +\nontermdef{new-line}\br + \descr{the new-line character} +\end{bnf} + +\pnum +A \defn{preprocessing directive} consists of a sequence of preprocessing tokens +that satisfies the following constraints: +At the start of translation phase 4, +the first token in the sequence, +referred to as a \defnadj{directive-introducing}{token}, +begins with the first character in the source file +(optionally after whitespace containing no new-line characters) or +follows whitespace containing at least one new-line character, +and is + +\begin{itemize} +\item +a \tcode{\#} preprocessing token, or + +\item +an \keyword{import} preprocessing token +immediately followed on the same logical line by a +\grammarterm{header-name}, +\tcode{<}, +\grammarterm{identifier}, +\grammarterm{string-literal}, or +\tcode{:} +preprocessing token, or + +\item +a \keyword{module} preprocessing token +immediately followed on the same logical line by an +\grammarterm{identifier}, +\tcode{:}, or +\tcode{;} +preprocessing token, or + +\item +an \keyword{export} preprocessing token +immediately followed on the same logical line by +one of the two preceding forms. +\end{itemize} + +The last token in the sequence is the first token within the sequence that +is immediately followed by whitespace containing a new-line character. +\begin{footnote} +Thus, +preprocessing directives are commonly called ``lines''. +These ``lines'' have no other syntactic significance, +as all whitespace is equivalent except in certain situations +during preprocessing (see the +\tcode{\#} +character string literal creation operator in~\ref{cpp.stringize}, for example). +\end{footnote} +\begin{note} +A new-line character ends the preprocessing directive even if it occurs +within what would otherwise be an invocation of a function-like macro. +\end{note} + +\begin{example} +\begin{codeblock} +# // preprocessing directive +module ; // preprocessing directive +export module leftpad; // preprocessing directive +import ; // preprocessing directive +export import "squee"; // preprocessing directive +import rightpad; // preprocessing directive +import :part; // preprocessing directive + +module // not a preprocessing directive +; // not a preprocessing directive + +export // not a preprocessing directive +import // not a preprocessing directive +foo; // not a preprocessing directive + +export // not a preprocessing directive +import foo; // preprocessing directive (ill-formed at phase 7) + +import :: // not a preprocessing directive +import -> // not a preprocessing directive +\end{codeblock} +\end{example} + +\pnum +A sequence of preprocessing tokens is only a \grammarterm{text-line} +if it does not begin with a directive-introducing token. +A sequence of preprocessing tokens is only a \grammarterm{conditionally-supported-directive} +if it does not begin with any of the directive names +appearing after a \tcode{\#} in the syntax. +A \grammarterm{conditionally-supported-directive} is +conditionally-supported with +\impldef{additional supported forms of preprocessing directive} +semantics. + +\pnum +At the start of phase 4 of translation, +the \grammarterm{group} of a \grammarterm{pp-global-module-fragment} shall +contain neither a \grammarterm{text-line} nor a \grammarterm{pp-import}. + +\pnum +When in a group that is skipped\iref{cpp.cond}, the directive +syntax is relaxed to allow any sequence of preprocessing tokens to occur between +the directive name and the following new-line character. + +\pnum +The only whitespace characters that shall appear +between preprocessing tokens +within a preprocessing directive +(from just after the directive-introducing token +through just before the terminating new-line character) +are space and horizontal-tab +(including spaces that have replaced comments +or possibly other whitespace characters +in translation phase 3). + +\pnum +The implementation can +process and skip sections of source files conditionally, +include other source files, +import macros from header units, +and replace macros. +These capabilities are called +\defn{preprocessing}, +because conceptually they occur +before translation of the resulting translation unit. + +\pnum +The preprocessing tokens within a preprocessing directive +are not subject to macro expansion unless otherwise stated. + +\begin{example} +In: +\begin{codeblock} +#define EMPTY +EMPTY # include +\end{codeblock} +the sequence of preprocessing tokens on the second line is \textit{not} +a preprocessing directive, because it does not begin with a \tcode{\#} at the start of +translation phase 4, even though it will do so after the macro \tcode{EMPTY} +has been replaced. +\end{example} + +\rSec2[cpp.module]{Module directive} +\indextext{preprocessing directive!module}% + +\begin{bnf} +\nontermdef{pp-module}\br + \opt{\keyword{export}} \keyword{module} \opt{pp-tokens} \terminal{;} new-line +\end{bnf} + +\pnum +A \grammarterm{pp-module} shall not +appear in a context where \tcode{module} +or (if it is the first token of the \grammarterm{pp-module}) \tcode{export} +is an identifier defined as an object-like macro. + +\pnum +The \grammarterm{pp-tokens}, if any, of a \grammarterm{pp-module} +shall be of the form: +\begin{ncsimplebnf} +pp-module-name \opt{pp-module-partition} \opt{pp-tokens} +\end{ncsimplebnf} +where the \grammarterm{pp-tokens} (if any) shall not begin with +a \tcode{(} preprocessing token and +the grammar non-terminals are defined as: +\begin{ncbnf} +\nontermdef{pp-module-name}\br + \opt{pp-module-name-qualifier} identifier +\end{ncbnf} +\begin{ncbnf} +\nontermdef{pp-module-partition}\br + \terminal{:} \opt{pp-module-name-qualifier} identifier +\end{ncbnf} +\begin{ncbnf} +\nontermdef{pp-module-name-qualifier}\br + identifier \terminal{.}\br + pp-module-name-qualifier identifier \terminal{.} +\end{ncbnf} +No \grammarterm{identifier} in +the \grammarterm{pp-module-name} or \grammarterm{pp-module-partition} +shall currently be defined as an object-like macro. + +\pnum +Any preprocessing tokens after the \tcode{module} preprocessing token +in the \tcode{module} directive are processed just as in normal text. +\begin{note} +Each identifier currently defined as a macro name +is replaced by its replacement list of preprocessing tokens. +\end{note} + +\pnum +The \tcode{module} and \tcode{export} (if it exists) preprocessing tokens +are replaced by the \grammarterm{module-keyword} and +\grammarterm{export-keyword} preprocessing tokens respectively. +\begin{note} +This makes the line no longer a directive +so it is not removed at the end of phase 4. +\end{note} + +\rSec2[cpp.null]{Null directive}% +\indextext{preprocessing directive!null} + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\#} new-line +\end{ncsimplebnf} +has no effect. + +\rSec2[cpp.cond]{Conditional inclusion}% +\indextext{preprocessing directive!conditional inclusion}% +\indextext{inclusion!conditional|see{preprocessing directive, conditional inclusion}} + +\indextext{\idxcode{defined}}% +\begin{bnf} +\nontermdef{defined-macro-expression}\br + \terminal{defined} identifier\br + \terminal{defined (} identifier \terminal{)} +\end{bnf} + +\begin{bnf} +\nontermdef{h-preprocessing-token}\br + \textnormal{any \grammarterm{preprocessing-token} other than \terminal{>}} +\end{bnf} + +\begin{bnf} +\nontermdef{h-pp-tokens}\br + h-preprocessing-token\br + h-pp-tokens h-preprocessing-token +\end{bnf} + +\begin{bnf} +\nontermdef{header-name-tokens}\br + string-literal\br + \terminal{<} h-pp-tokens \terminal{>} +\end{bnf} + +\indextext{\idxxname{has_include}}% +\begin{bnf} +\nontermdef{has-include-expression}\br + \terminal{\xname{has_include}} \terminal{(} header-name \terminal{)}\br + \terminal{\xname{has_include}} \terminal{(} header-name-tokens \terminal{)} +\end{bnf} + +\indextext{\idxxname{has_cpp_attribute}}% +\begin{bnf} +\nontermdef{has-attribute-expression}\br + \terminal{\xname{has_cpp_attribute} (} pp-tokens \terminal{)} +\end{bnf} + +\pnum +The expression that controls conditional inclusion +shall be an integral constant expression except that +identifiers +(including those lexically identical to keywords) +are interpreted as described below +\begin{footnote} +Because the controlling constant expression is evaluated +during translation phase 4, +all identifiers either are or are not macro names --- +there simply are no keywords, enumeration constants, etc. +\end{footnote} +and it may contain zero or more \grammarterm{defined-macro-expression}{s} and/or +\grammarterm{has-include-expression}{s} and/or +\grammarterm{has-attribute-expression}{s} as unary operator expressions. + +\pnum +A \grammarterm{defined-macro-expression} evaluates to \tcode{1} +if the identifier is currently defined +as a macro name +(that is, if it is predefined +or if it has one or more active macro definitions\iref{cpp.import}, +for example because +it has been the subject of a +\tcode{\#define} +preprocessing directive +without an intervening +\tcode{\#undef} +directive with the same subject identifier), \tcode{0} if it is not. + +\pnum +The second form of \grammarterm{has-include-expression} +is considered only if the first form does not match, +in which case the preprocessing tokens are processed just as in normal text. + +\pnum +The header or source file identified by +the parenthesized preprocessing token sequence +in each contained \grammarterm{has-include-expression} +is searched for as if that preprocessing token sequence +were the \grammarterm{pp-tokens} in a \tcode{\#include} directive, +except that no further macro expansion is performed. +If such a directive would not satisfy the syntactic requirements +of a \tcode{\#include} directive, the program is ill-formed. +The \grammarterm{has-include-expression} evaluates +to \tcode{1} if the search for the source file succeeds, and +to \tcode{0} if the search fails. + +\pnum +Each \grammarterm{has-attribute-expression} is replaced by +a non-zero \grammarterm{pp-number} +matching the form of an \grammarterm{integer-literal} +if the implementation supports an attribute +with the name specified by interpreting +the \grammarterm{pp-tokens}, after macro expansion, +as an \grammarterm{attribute-token}, +and by \tcode{0} otherwise. +The program is ill-formed if the \grammarterm{pp-tokens} +do not match the form of an \grammarterm{attribute-token}. + +\pnum +For an attribute specified in this document, +it is \impldef{value of \grammarterm{has-attribute-expression} +for standard attributes} +whether the value of the \grammarterm{has-attribute-expression} +is \tcode{0} or is given by \tref{cpp.cond.ha}. +For other attributes recognized by the implementation, +the value is +\impldef{value of \grammarterm{has-attribute-expression} +for non-standard attributes}. +\begin{note} +It is expected +that the availability of an attribute can be detected by any non-zero result. +\end{note} + +\begin{floattable}{\xname{has_cpp_attribute} values}{cpp.cond.ha} +{ll} +\topline +\lhdr{Attribute} & \rhdr{Value} \\ \rowsep +\tcode{assume} & \tcode{202207L} \\ +\tcode{carries_dependency} & \tcode{200809L} \\ +\tcode{deprecated} & \tcode{201309L} \\ +\tcode{fallthrough} & \tcode{201603L} \\ +\tcode{likely} & \tcode{201803L} \\ +\tcode{maybe_unused} & \tcode{201603L} \\ +\tcode{no_unique_address} & \tcode{201803L} \\ +\tcode{nodiscard} & \tcode{201907L} \\ +\tcode{noreturn} & \tcode{200809L} \\ +\tcode{unlikely} & \tcode{201803L} \\ +\end{floattable} + +\pnum +The +\tcode{\#ifdef}, \tcode{\#ifndef}, \tcode{\#elifdef}, and \tcode{\#elifndef} +directives, and +the \tcode{defined} conditional inclusion operator, +shall treat \xname{has_include} and \xname{has_cpp_attribute} +as if they were the names of defined macros. +The identifiers \xname{has_include} and \xname{has_cpp_attribute} +shall not appear in any context not mentioned in this subclause. + +\pnum +Each preprocessing token that remains (in the list of preprocessing tokens that +will become the controlling expression) +after all macro replacements have occurred +shall be in the lexical form of a token\iref{lex.token}. + +\pnum +Preprocessing directives of the forms +\begin{ncsimplebnf}\obeyspaces +\indextext{\idxcode{\#if}}% +\terminal{\# if } constant-expression new-line \opt{group}\br +\indextext{\idxcode{\#elif}}% +\terminal{\# elif } constant-expression new-line \opt{group} +\end{ncsimplebnf} +check whether the controlling constant expression evaluates to nonzero. + +\pnum +Prior to evaluation, +macro invocations in the list of preprocessing tokens +that will become the controlling constant expression +are replaced +(except for those macro names modified by the +\tcode{defined} +unary operator), +just as in normal text. +If the token +\tcode{defined} +is generated as a result of this replacement process +or use of the +\tcode{defined} +unary operator does not match one of the two specified forms +prior to macro replacement, +the behavior is undefined. + +\pnum +After all replacements due to macro expansion and +evaluations of +\grammarterm{defined-macro-expression}s, +\grammarterm{has-include-expression}s, and +\grammarterm{has-attribute-expression}s +have been performed, +all remaining identifiers and keywords, +except for +\tcode{true} +and +\tcode{false}, +are replaced with the \grammarterm{pp-number} +\tcode{0}, +and then each preprocessing token is converted into a token. +\begin{note} +An alternative +token\iref{lex.digraph} is not an identifier, +even when its spelling consists entirely of letters and underscores. +Therefore it is not subject to this replacement. +\end{note} + +\pnum +The resulting tokens comprise the controlling constant expression +which is evaluated according to the rules of~\ref{expr.const} +using arithmetic that has at least the ranges specified +in~\ref{support.limits}. For the purposes of this token conversion and evaluation +all signed and unsigned integer types +act as if they have the same representation as, respectively, +\tcode{intmax_t} or \tcode{uintmax_t}\iref{cstdint.syn}. +\begin{note} +Thus on an +implementation where \tcode{std::numeric_limits::max()} is \tcode{0x7FFF} +and \tcode{std::numeric_limits::max()} is \tcode{0xFFFF}, +the integer literal \tcode{0x8000} is signed and positive within a \tcode{\#if} +expression even though it is unsigned in translation phase +7\iref{lex.phase.7}. +\end{note} +This includes interpreting \grammarterm{character-literal}s +according to the rules in \ref{lex.ccon}. +\begin{note} +The associated character encodings of literals are the same +in \tcode{\#if} and \tcode{\#elif} directives and in any expression. +\end{note} +Each subexpression with type +\tcode{bool} +is subjected to integral promotion before processing continues. + +\pnum +Preprocessing directives of the forms +\begin{ncsimplebnf}\obeyspaces +\terminal{\# ifdef } identifier new-line \opt{group}\br +\indextext{\idxcode{\#ifdef}}% +\terminal{\# ifndef } identifier new-line \opt{group}\br +\indextext{\idxcode{\#ifndef}}% +\terminal{\# elifdef } identifier new-line \opt{group}\br +\indextext{\idxcode{\#elifdef}}% +\terminal{\# elifndef} identifier new-line \opt{group} +\indextext{\idxcode{\#elifndef}}% +\end{ncsimplebnf} +check whether the identifier is or is not currently defined as a macro name. +Their conditions are equivalent to +\tcode{\#if} \tcode{defined} \grammarterm{identifier}, +\tcode{\#if} \tcode{!defined} \grammarterm{identifier}, +\tcode{\#elif} \tcode{defined} \grammarterm{identifier}, and +\tcode{\#elif} \tcode{!defined} \grammarterm{identifier}, +respectively. + +\pnum +Each directive's condition is checked in order. +If it evaluates to false (zero), +the group that it controls is skipped: +directives are processed only through the name that determines +the directive in order to keep track of the level +of nested conditionals; +the rest of the directives' preprocessing tokens are ignored, +as are the other preprocessing tokens in the group. +Only the first group +whose control condition evaluates to true (nonzero) is processed; +any following groups are skipped and their controlling directives +are processed as if they were in a group that is skipped. +If none of the conditions evaluates to true, +and there is a +\tcode{\#else} +\indextext{\idxcode{\#else}}% +directive, +the group controlled by the +\tcode{\#else} +is processed; lacking a +\tcode{\#else} +directive, all the groups until the +\tcode{\#endif} +\indextext{\idxcode{\#endif}}% +are skipped.% +\begin{footnote} +As indicated by the syntax, +a preprocessing token cannot follow a +\tcode{\#else} +or +\tcode{\#endif} +directive before the terminating new-line character. +However, +comments can appear anywhere in a source file, +including within a preprocessing directive. +\end{footnote} + +\pnum +\begin{example} +This demonstrates a way to include a library \tcode{optional} facility +only if it is available: + +\begin{codeblock} +#if __has_include() +# include +# if __cpp_lib_optional >= 201603 +# define have_optional 1 +# endif +#elif __has_include() +# include +# if __cpp_lib_experimental_optional >= 201411 +# define have_optional 1 +# define experimental_optional 1 +# endif +#endif +#ifndef have_optional +# define have_optional 0 +#endif +\end{codeblock} +\end{example} + +\pnum +\begin{example} +This demonstrates a way to use the attribute \tcode{[[acme::deprecated]]} +only if it is available. +\begin{codeblock} +#if __has_cpp_attribute(acme::deprecated) +# define ATTR_DEPRECATED(msg) [[acme::deprecated(msg)]] +#else +# define ATTR_DEPRECATED(msg) [[deprecated(msg)]] +#endif +ATTR_DEPRECATED("This function is deprecated") void anvil(); +\end{codeblock} +\end{example} + +\rSec2[cpp.include]{Source file inclusion} +\indextext{preprocessing directive!header inclusion} +\indextext{preprocessing directive!source-file inclusion} +\indextext{inclusion!source file|see{preprocessing directive, source-file inclusion}}% +\indextext{\idxcode{\#include}}% + +\pnum +A +\tcode{\#include} +directive shall identify a header or source file +that can be processed by the implementation. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# include <} h-char-sequence \terminal{>} new-line +\end{ncsimplebnf} +searches a sequence of +\impldef{sequence of places searched for a header} +places +for a header identified uniquely by the specified sequence +between the +\tcode{<} +and +\tcode{>} +delimiters, +and causes the replacement of that +directive by the entire contents of the header. +How the places are specified +or the header identified +is \impldef{search locations for \tcode{<>} header}. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# include "} q-char-sequence \terminal{"} new-line +\end{ncsimplebnf} +causes the replacement of that +directive by the entire contents of the +source file identified by the specified sequence between the +\tcode{"} +delimiters. +The named source file is searched for in an +\impldef{manner of search for included source file} +manner. +If this search is not supported, +or if the search fails, +the directive is reprocessed as if it read +\begin{ncsimplebnf} +\terminal{\# include <} h-char-sequence \terminal{>} new-line +\end{ncsimplebnf} +with the identical contained sequence (including +\tcode{>} +characters, if any) from the original directive. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# include} pp-tokens new-line +\end{ncsimplebnf} +(that does not match one of the two previous forms) is permitted. +The preprocessing tokens after +\tcode{include} +in the directive are processed just as in normal text +(i.e., each identifier currently defined as a macro name is replaced by its +replacement list of preprocessing tokens). +If the directive resulting after all replacements does not match +one of the two previous forms, the behavior is +undefined. +\begin{footnote} +Note that adjacent \grammarterm{string-literal}s are not concatenated into +a single \grammarterm{string-literal} +(see the translation phases in~\ref{lex.phase.6}); +thus, an expansion that results in two \grammarterm{string-literal}s is an +invalid directive. +\end{footnote} +The method by which a sequence of preprocessing tokens between a +\tcode{<} +and a +\tcode{>} +preprocessing token pair or a pair of +\tcode{"} +characters is combined into a single header name +preprocessing token is \impldef{search locations for \tcode{""""} header}. + +\pnum +The implementation shall provide unique mappings for +sequences consisting of one or more +\grammarterm{nondigit}{s} or \grammarterm{digit}{s}\iref{lex.name} +followed by a period +(\tcode{.}) +and a single +\grammarterm{nondigit}. +The first character shall not be a \grammarterm{digit}. +The implementation may ignore distinctions of alphabetical case. + +\pnum +A +\tcode{\#include} +preprocessing directive may appear +in a source file that has been read because of a +\tcode{\#include} +directive in another file, +up to an \impldef{nesting limit for \tcode{\#include} directives} nesting limit. + +\pnum +If the header identified by the \grammarterm{header-name} +denotes an importable header\iref{module.import}, +it is +\impldef{whether source file inclusion of importable header +is replaced with \tcode{import} directive} +whether the \tcode{\#include} preprocessing directive +is instead replaced by an \tcode{import} directive\iref{cpp.import} of the form +\begin{ncbnf} +\terminal{import} header-name \terminal{;} new-line +\end{ncbnf} + +\pnum +\begin{note} +An implementation can provide a mechanism for making arbitrary +source files available to the \tcode{< >} search. +However, using the \tcode{< >} form for headers provided +with the implementation and the \tcode{" "} form for sources +outside the control of the implementation +achieves wider portability. For instance: + +\begin{codeblock} +#include +#include +#include "usefullib.h" +#include "myprog.h" +\end{codeblock} + +\end{note} + +\pnum +\begin{example} +This illustrates macro-replaced +\tcode{\#include} +directives: + +\begin{codeblock} +#if VERSION == 1 + #define INCFILE "vers1.h" +#elif VERSION == 2 + #define INCFILE "vers2.h" // and so on +#else + #define INCFILE "versN.h" +#endif +#include INCFILE +\end{codeblock} +\end{example} + +\rSec2[cpp.import]{Header unit importation} +\indextext{header unit!preprocessing}% +\indextext{preprocessing directive!import}% +\indextext{macro!import|(}% + +\begin{bnf} +\nontermdef{pp-import}\br + \opt{\keyword{export}} \keyword{import} header-name \opt{pp-tokens} \terminal{;} new-line\br + \opt{\keyword{export}} \keyword{import} header-name-tokens \opt{pp-tokens} \terminal{;} new-line\br + \opt{\keyword{export}} \keyword{import} pp-tokens \terminal{;} new-line +\end{bnf} + +\pnum +A \grammarterm{pp-import} shall not +appear in a context where \tcode{import} +or (if it is the first token of the \grammarterm{pp-import}) \tcode{export} +is an identifier defined as an object-like macro. + +\pnum +The preprocessing tokens after the \tcode{import} preprocessing token +in the \tcode{import} \grammarterm{control-line} +are processed just as in normal text +(i.e., each identifier currently defined as a macro name +is replaced by its replacement list of preprocessing tokens). +\begin{note} +An \tcode{import} directive +matching the first two forms of a \grammarterm{pp-import} +instructs the preprocessor to import macros +from the header unit\iref{module.import} +denoted by the \grammarterm{header-name}, +as described below. +\end{note} +\indextext{point of!macro import|see{macro, point of import}}% +The \defnx{point of macro import}{macro!point of import} for the +first two forms of \grammarterm{pp-import} is +immediately after the \grammarterm{new-line} terminating +the \grammarterm{pp-import}. +The last form of \grammarterm{pp-import} is only considered +if the first two forms did not match, and +does not have a point of macro import. + +\pnum +If a \grammarterm{pp-import} is produced by source file inclusion +(including by the rewrite produced +when a \tcode{\#include} directive names an importable header) +while processing the \grammarterm{group} of a \grammarterm{module-file}, +the program is ill-formed. + +\pnum +In all three forms of \grammarterm{pp-import}, +the \tcode{import} and \tcode{export} (if it exists) preprocessing tokens +are replaced by the \grammarterm{import-keyword} and +\grammarterm{export-keyword} preprocessing tokens respectively. +\begin{note} +This makes the line no longer a directive +so it is not removed at the end of phase 4. +\end{note} +Additionally, in the second form of \grammarterm{pp-import}, +a \grammarterm{header-name} token is formed as if +the \grammarterm{header-name-tokens} +were the \grammarterm{pp-tokens} of a \tcode{\#include} directive. +The \grammarterm{header-name-tokens} are replaced by +the \grammarterm{header-name} token. +\begin{note} +This ensures that imports are treated consistently by +the preprocessor and later phases of translation. +\end{note} + +\pnum +Each \tcode{\#define} directive encountered when preprocessing +each translation unit in a program results in a distinct +\defnx{macro definition}{macro!definition}. +\begin{note} +A predefined macro name\iref{cpp.predefined} +is not introduced by a \tcode{\#define} directive. +Implementations providing mechanisms to predefine additional macros +are encouraged to not treat them +as being introduced by a \tcode{\#define} directive. +\end{note} +Each macro definition has at most one point of definition in +each translation unit and at most one point of undefinition, as follows: +\begin{itemize} +\item +\indextext{point of!macro definition|see{macro, point of definition}}% +The \defnx{point of definition}{macro!point of definition} +of a macro definition within a translation unit $T$ is +\begin{itemize} +\item +if the \tcode{\#define} directive of the macro definition occurs within $T$, +the point at which that directive occurs, or otherwise, +\item +if the macro name is not lexically identical to a keyword\iref{lex.key} +or to the \grammarterm{identifier}{s} \tcode{module} or \tcode{import}, +the first point of macro import in $T$ of a header unit +containing a point of definition for the macro definition, if any. +\end{itemize} +In the latter case, the macro is said +to be \defnx{imported}{macro!import} from the header unit. + +\item +\indextext{point of!macro undefinition|see{macro, point of undefinition}}% +The \defnx{point of undefinition}{macro!point of undefinition} +of a macro definition within a translation unit +is the first point at which a \tcode{\#undef} directive naming the macro occurs +after its point of definition, or the first point +of macro import of a header unit containing a point of undefinition for the +macro definition, whichever (if any) occurs first. +\end{itemize} + +\pnum +\indextext{active macro directive|see{macro, active}}% +A macro directive is \defnx{active}{macro!active} at a source location +if it has a point of definition in that translation unit preceding the location, +and does not have a point of undefinition in that translation unit preceding +the location. + +\pnum +If a macro would be replaced or redefined, and multiple macro definitions +are active for that macro name, the active macro definitions shall all be +valid redefinitions of the same macro\iref{cpp.replace}. +\begin{note} +The relative order of \grammarterm{pp-import}{s} has no bearing on whether a +particular macro definition is active. +\end{note} + +\pnum +\begin{example} +\begin{codeblocktu}{Importable header \tcode{"a.h"}} +#define X 123 // \#1 +#define Y 45 // \#2 +#define Z a // \#3 +#undef X // point of undefinition of \#1 in \tcode{"a.h"} +\end{codeblocktu} + +\begin{codeblocktu}{Importable header \tcode{"b.h"}} +import "a.h"; // point of definition of \#1, \#2, and \#3, point of undefinition of \#1 in \tcode{"b.h"} +#define X 456 // OK, \#1 is not active +#define Y 6 // error: \#2 is active +\end{codeblocktu} + +\begin{codeblocktu}{Importable header \tcode{"c.h"}} +#define Y 45 // \#4 +#define Z c // \#5 +\end{codeblocktu} + +\begin{codeblocktu}{Importable header \tcode{"d.h"}} +import "c.h"; // point of definition of \#4 and \#5 in \tcode{"d.h"} +\end{codeblocktu} + +\begin{codeblocktu}{Importable header \tcode{"e.h"}} +import "a.h"; // point of definition of \#1, \#2, and \#3, point of undefinition of \#1 in \tcode{"e.h"} +import "d.h"; // point of definition of \#4 and \#5 in \tcode{"e.h"} +int a = Y; // OK, active macro definitions \#2 and \#4 are valid redefinitions +int c = Z; // error: active macro definitions \#3 and \#5 are not valid redefinitions of \tcode{Z} +\end{codeblocktu} + +\begin{codeblocktu}{Module unit \tcode{f}} +export module f; +export import "a.h"; + +int a = Y; // OK +\end{codeblocktu} + +\begin{codeblocktu}{Translation unit \tcode{\#1}} +import f; +int x = Y; // error: \tcode{Y} is neither a defined macro nor a declared name +\end{codeblocktu} +\end{example} +\indextext{macro!import|)} + +\rSec2[cpp.replace]{Macro replacement}% + +\rSec3[cpp.replace.general]{General}% +\indextext{macro!replacement|(}% +\indextext{replacement!macro|see{macro, replacement}}% +\indextext{preprocessing directive!macro replacement|see{macro, replacement}} + +\pnum +\indextext{macro!replacement list}% +Two replacement lists are identical if and only if +the preprocessing tokens in both have +the same number, ordering, spelling, and whitespace separation, +where all whitespace separations are considered identical. + +\pnum +An identifier currently defined as an +\indextext{macro!object-like}% +object-like macro (see below) may be redefined by another +\tcode{\#define} +preprocessing directive provided that the second definition is an +object-like macro definition and the two replacement lists +are identical, otherwise the program is ill-formed. +Likewise, an identifier currently defined as a +\indextext{macro!function-like}% +function-like macro (see below) may be redefined by another +\tcode{\#define} +preprocessing directive provided that the second definition is a +function-like macro definition that has the same number and spelling +of parameters, +and the two replacement lists are identical, +otherwise the program is ill-formed. + +\pnum +\begin{example} +The following sequence is valid: +\begin{codeblock} +#define OBJ_LIKE (1-1) +#define OBJ_LIKE @\tcode{/* whitespace */ (1-1) /* other */}@ +#define FUNC_LIKE(a) ( a ) +#define FUNC_LIKE( a )( @\tcode{/* note the whitespace */ \textbackslash}@ + a @\tcode{/* other stuff on this line}@ + @\tcode{*/}@ ) +\end{codeblock} +But the following redefinitions are invalid: +\begin{codeblock} +#define OBJ_LIKE (0) // different token sequence +#define OBJ_LIKE (1 - 1) // different whitespace +#define FUNC_LIKE(b) ( a ) // different parameter usage +#define FUNC_LIKE(b) ( b ) // different parameter spelling +\end{codeblock} +\end{example} + +\pnum +\indextext{macro!replacement list}% +There shall be whitespace between the identifier and the replacement list +in the definition of an object-like macro. + +\pnum +If the \grammarterm{identifier-list} in the macro definition does not end with +an ellipsis, the number of arguments (including those arguments consisting +of no preprocessing tokens) +in an invocation of a function-like macro shall +equal the number of parameters in the macro definition. +Otherwise, there shall be at least as many arguments in the invocation as there are +parameters in the macro definition (excluding the \tcode{...}). There +shall exist a +\tcode{)} +preprocessing token that terminates the invocation. + +\pnum +\indextext{__va_args__@\mname{VA_ARGS}}% +\indextext{__va_opt__@\mname{VA_OPT}}% +The identifiers \mname{VA_ARGS} and \mname{VA_OPT} +shall occur only in the \grammarterm{replacement-list} +of a function-like macro that uses the ellipsis notation in the parameters. + +\pnum +A parameter identifier in a function-like macro +shall be uniquely declared within its scope. + +\pnum +The identifier immediately following the +\tcode{define} +is called the +\indextext{name!macro|see{macro, name}}% +\defnx{macro name}{macro!name}. +There is one name space for macro names. +Any whitespace characters preceding or following the +replacement list of preprocessing tokens are not considered +part of the replacement list for either form of macro. + +\pnum +If a +\indextext{\#\#0 operator@\tcode{\#} operator} +\tcode{\#} +preprocessing token, +followed by an identifier, +occurs lexically +at the point at which a preprocessing directive can begin, +the identifier is not subject to macro replacement. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# define} identifier replacement-list new-line +\indextext{\idxcode{\#define}}% +\end{ncsimplebnf} +defines an +\defnadj{object-like}{macro} that +causes each subsequent instance of the macro name +\begin{footnote} +Since, by macro-replacement time, +all \grammarterm{character-literal}s and \grammarterm{string-literal}s are preprocessing tokens, +not sequences possibly containing identifier-like subsequences +(see \ref{lex.phase.3}, translation phases), +they are never scanned for macro names or parameters. +\end{footnote} +to be replaced by the replacement list of preprocessing tokens +that constitute the remainder of the directive. +\begin{footnote} +An alternative token\iref{lex.digraph} is not an identifier, +even when its spelling consists entirely of letters and underscores. +Therefore it is not possible to define a macro +whose name is the same as that of an alternative token. +\end{footnote} +The replacement list is then rescanned for more macro names as +specified below. + +\pnum +\begin{example} +The simplest use of this facility is to define a ``manifest constant'', +as in +\begin{codeblock} +#define TABSIZE 100 +int table[TABSIZE]; +\end{codeblock} +\end{example} + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# define} identifier lparen \opt{identifier-list} \terminal{)} replacement-list new-line\br +\terminal{\# define} identifier lparen \terminal{...} \terminal{)} replacement-list new-line\br +\terminal{\# define} identifier lparen identifier-list \terminal{, ...} \terminal{)} replacement-list new-line +\end{ncsimplebnf} +defines a \defnadj{function-like}{macro} +with parameters, whose use is +similar syntactically to a function call. +The parameters +\indextext{parameter!macro}% +are specified by the optional list of identifiers. +Each subsequent instance of the function-like macro name followed by a +\tcode{(} +as the next preprocessing token +introduces the sequence of preprocessing tokens that is replaced +by the replacement list in the definition +(an invocation of the macro). +\indextext{invocation!macro}% +The replaced sequence of preprocessing tokens is terminated by the matching +\tcode{)} +preprocessing token, skipping intervening matched pairs of left and +right parenthesis preprocessing tokens. +Within the sequence of preprocessing tokens making up an invocation +of a function-like macro, +new-line is considered a normal whitespace character. + +\pnum +\indextext{macro!function-like!arguments}% +The sequence of preprocessing tokens +bounded by the outside-most matching parentheses +forms the list of arguments for the function-like macro. +The individual arguments within the list +are separated by comma preprocessing tokens, +but comma preprocessing tokens between matching +inner parentheses do not separate arguments. +If there are sequences of preprocessing tokens within the list of +arguments that would otherwise act as preprocessing directives, +\begin{footnote} +A \grammarterm{conditionally-supported-directive} is a preprocessing directive regardless of whether the implementation supports it. +\end{footnote} +the behavior is undefined. + +\pnum +\begin{example} +The following defines a function-like +macro whose value is the maximum of its arguments. +It has the disadvantages of evaluating one or the other of its arguments +a second time +(including +\indextext{side effects}% +side effects) +and generating more code than a function if invoked several times. +It also cannot have its address taken, +as it has none. + +\begin{codeblock} +#define max(a, b) ((a) > (b) ? (a) : (b)) +\end{codeblock} + +The parentheses ensure that the arguments and +the resulting expression are bound properly. +\end{example} + +\pnum +\indextext{macro!function-like!arguments}% +If there is a \tcode{...} immediately preceding the \tcode{)} in the +function-like macro +definition, then the trailing arguments (if any), including any separating comma preprocessing +tokens, are merged to form a single item: the \defn{variable arguments}. The number of +arguments so combined is such that, following merger, the number of arguments is +either equal to or +one more than the number of parameters in the macro definition (excluding the +\tcode{...}). + +\rSec3[cpp.subst]{Argument substitution}% +\indextext{macro!argument substitution}% +\indextext{argument substitution|see{macro, argument substitution}}% + +\indextext{__va_opt__@\mname{VA_OPT}}% +\begin{bnf} +\nontermdef{va-opt-replacement}\br + \terminal{\mname{VA_OPT} (} \opt{pp-tokens} \terminal{)} +\end{bnf} + +\pnum +After the arguments for the invocation of a function-like macro have +been identified, argument substitution takes place. +For each parameter in the replacement list that is neither +preceded by a \tcode{\#} or \tcode{\#\#} preprocessing token nor +followed by a \tcode{\#\#} preprocessing token, the preprocessing tokens +naming the parameter are replaced by a token sequence determined as follows: +\begin{itemize} +\item + If the parameter is of the form \grammarterm{va-opt-replacement}, + the replacement preprocessing tokens are the + preprocessing token sequence for the corresponding argument, + as specified below. +\item + Otherwise, the replacement preprocessing tokens are the + preprocessing tokens of corresponding argument after all + macros contained therein have been expanded. The argument's + preprocessing tokens are completely macro replaced before + being substituted as if they formed the rest of the preprocessing + file with no other preprocessing tokens being available. +\end{itemize} +\begin{example} +\begin{codeblock} +#define LPAREN() ( +#define G(Q) 42 +#define F(R, X, ...) __VA_OPT__(G R X) ) +int x = F(LPAREN(), 0, <:-); // replaced by \tcode{int x = 42;} +\end{codeblock} +\end{example} + +\pnum +\indextext{__va_args__@\mname{VA_ARGS}}% +An identifier \mname{VA_ARGS} that occurs in the replacement list +shall be treated as if it were a parameter, and the variable arguments shall form +the preprocessing tokens used to replace it. + +\pnum +\begin{example} +\begin{codeblock} +#define debug(...) fprintf(stderr, @\mname{VA_ARGS}@) +#define showlist(...) puts(#@\mname{VA_ARGS}@) +#define report(test, ...) ((test) ? puts(#test) : printf(@\mname{VA_ARGS}@)) +debug("Flag"); +debug("X = %d\n", x); +showlist(The first, second, and third items.); +report(x>y, "x is %d but y is %d", x, y); +\end{codeblock} +results in +\begin{codeblock} +fprintf(stderr, "Flag"); +fprintf(stderr, "X = %d\n", x); +puts("The first, second, and third items."); +((x>y) ? puts("x>y") : printf("x is %d but y is %d", x, y)); +\end{codeblock} +\end{example} + +\pnum +\indextext{__va_opt__@\mname{VA_OPT}}% +The identifier \mname{VA_OPT} +shall always occur as part of the preprocessing token sequence +\grammarterm{va-opt-replacement}; +its closing \tcode{)} is determined by skipping +intervening pairs of matching left and right parentheses +in its \grammarterm{pp-tokens}. +The \grammarterm{pp-tokens} of a \grammarterm{va-opt-replacement} +shall not contain \mname{VA_OPT}. +If the \grammarterm{pp-tokens} would be ill-formed +as the replacement list of the current function-like macro, +the program is ill-formed. +A \grammarterm{va-opt-replacement} is treated as if it were a parameter, +and the preprocessing token sequence for the corresponding +argument is defined as follows. +If the substitution of \mname{VA_ARGS} as neither an operand +of \tcode{\#} nor \tcode{\#\#} consists of no preprocessing tokens, +the argument consists of +a single placemarker preprocessing token\iref{cpp.concat,cpp.rescan}. +Otherwise, the argument consists of +the results of the expansion of the contained \grammarterm{pp-tokens} +as the replacement list of the current function-like macro +before removal of placemarker tokens, rescanning, and further replacement. +\begin{note} +The placemarker tokens are removed before stringization\iref{cpp.stringize}, +and can be removed by rescanning and further replacement\iref{cpp.rescan}. +\end{note} +\begin{example} +\begin{codeblock} +#define F(...) f(0 __VA_OPT__(,) __VA_ARGS__) +#define G(X, ...) f(0, X __VA_OPT__(,) __VA_ARGS__) +#define SDEF(sname, ...) S sname __VA_OPT__(= { __VA_ARGS__ }) +#define EMP + +F(a, b, c) // replaced by \tcode{f(0, a, b, c)} +F() // replaced by \tcode{f(0)} +F(EMP) // replaced by \tcode{f(0)} + +G(a, b, c) // replaced by \tcode{f(0, a, b, c)} +G(a, ) // replaced by \tcode{f(0, a)} +G(a) // replaced by \tcode{f(0, a)} + +SDEF(foo); // replaced by \tcode{S foo;} +SDEF(bar, 1, 2); // replaced by \tcode{S bar = \{ 1, 2 \};} + +#define H1(X, ...) X __VA_OPT__(##) __VA_ARGS__ // error: \tcode{\#\#} may not appear at + // the beginning of a replacement list\iref{cpp.concat} + +#define H2(X, Y, ...) __VA_OPT__(X ## Y,) __VA_ARGS__ +H2(a, b, c, d) // replaced by \tcode{ab, c, d} + +#define H3(X, ...) #__VA_OPT__(X##X X##X) +H3(, 0) // replaced by \tcode{""} + +#define H4(X, ...) __VA_OPT__(a X ## X) ## b +H4(, 1) // replaced by \tcode{a b} + +#define H5A(...) __VA_OPT__()@\tcode{/**/}@__VA_OPT__() +#define H5B(X) a ## X ## b +#define H5C(X) H5B(X) +H5C(H5A()) // replaced by \tcode{ab} +\end{codeblock} +\end{example} + +\rSec3[cpp.stringize]{The \tcode{\#} operator}% +\indextext{\#\#0 operator@\tcode{\#} operator}% +\indextext{stringize|see{\tcode{\#} operator}} + +\pnum +Each +\tcode{\#} +preprocessing token in the replacement list for a function-like +macro shall be followed by a parameter as the next preprocessing +token in the replacement list. + +\pnum +A \defn{character string literal} is a \grammarterm{string-literal} with no prefix. +If, in the replacement list, a parameter is immediately +preceded by a +\tcode{\#} +preprocessing token, +both are replaced by a single character string literal preprocessing token that +contains the spelling of the preprocessing token sequence for the +corresponding argument (excluding placemarker tokens). +Let the \defn{stringizing argument} be the preprocessing token sequence +for the corresponding argument with placemarker tokens removed. +Each occurrence of whitespace between the stringizing argument's preprocessing +tokens becomes a single space character in the character string literal. +Whitespace before the first preprocessing token and after the last +preprocessing token comprising the stringizing argument is deleted. +Otherwise, the original spelling of each preprocessing token in the +stringizing argument is retained in the character string literal, +except for special handling for producing the spelling of +\grammarterm{string-literal}s and \grammarterm{character-literal}s: +a +\tcode{\textbackslash} +character is inserted before each +\tcode{"} +and +\tcode{\textbackslash} +character of a \grammarterm{character-literal} or \grammarterm{string-literal} +(including the delimiting +\tcode{"} +characters). +If the replacement that results is not a valid character string literal, +the behavior is undefined. The character string literal corresponding to +an empty stringizing argument is \tcode{""}. +The order of evaluation of +\tcode{\#} +and +\tcode{\#\#} +operators is unspecified. + +\rSec3[cpp.concat]{The \tcode{\#\#} operator}% +\indextext{\#\#1 operator@\tcode{\#\#} operator}% +\indextext{concatenation!macro argument|see{\tcode{\#\#} operator}} + +\pnum +A +\tcode{\#\#} +preprocessing token shall not occur at the beginning or +at the end of a replacement list for either form +of macro definition. + +\pnum +If, in the replacement list of a function-like macro, a parameter is +immediately preceded or followed by a +\tcode{\#\#} +preprocessing token, the parameter is replaced by the +corresponding argument's preprocessing token sequence; however, if an argument consists of no preprocessing tokens, the parameter is +replaced by a placemarker preprocessing token instead. +\begin{footnote} +Placemarker preprocessing tokens do not appear in the syntax +because they are temporary entities that exist only within translation phase 4. +\end{footnote} + +\pnum +For both object-like and function-like macro invocations, before the +replacement list is reexamined for more macro names to replace, +each instance of a +\tcode{\#\#} +preprocessing token in the replacement list +(not from an argument) is deleted and the +preceding preprocessing token is concatenated +with the following preprocessing token. +Placemarker preprocessing tokens are handled specially: concatenation +of two placemarkers results in a single placemarker preprocessing token, and +concatenation of a placemarker with a non-placemarker preprocessing token results +in the non-placemarker preprocessing token. +\begin{note} +Concatenation can form +a \grammarterm{universal-character-name}\iref{lex.charset}. +\end{note} +If the result is not a valid preprocessing token, +the behavior is undefined. +The resulting token is available for further macro replacement. +The order of evaluation of +\tcode{\#\#} +operators is unspecified. + +\pnum +\begin{example} +The sequence +\begin{codeblock} +#define str(s) # s +#define xstr(s) str(s) +#define debug(s, t) printf("x" # s "= %d, x" # t "= %s", @\textbackslash@ + x ## s, x ## t) +#define INCFILE(n) vers ## n +#define glue(a, b) a ## b +#define xglue(a, b) glue(a, b) +#define HIGHLOW "hello" +#define LOW LOW ", world" + +debug(1, 2); +fputs(str(strncmp("abc@\textbackslash@0d", "abc", '@\textbackslash@4') // this goes away + == 0) str(: @\atsign\textbackslash@n), s); +#include xstr(INCFILE(2).h) +glue(HIGH, LOW); +xglue(HIGH, LOW) +\end{codeblock} +results in +\begin{codeblock} +printf("x" "1" "= %d, x" "2" "= %s", x1, x2); +fputs("strncmp(@\textbackslash@"abc@\textbackslash\textbackslash@0d@\textbackslash@", @\textbackslash@"abc@\textbackslash@", '@\textbackslash\textbackslash@4') == 0" ": @\atsign\textbackslash@n", s); +#include "vers2.h" @\textrm{(\textit{after macro replacement, before file access})}@ +"hello"; +"hello" ", world" +\end{codeblock} +or, after concatenation of the character string literals, +\begin{codeblock} +printf("x1= %d, x2= %s", x1, x2); +fputs("strncmp(@\textbackslash@"abc@\textbackslash\textbackslash@0d@\textbackslash@", @\textbackslash@"abc@\textbackslash@", '@\textbackslash\textbackslash@4') == 0: @\atsign\textbackslash@n", s); +#include "vers2.h" @\textrm{(\textit{after macro replacement, before file access})}@ +"hello"; +"hello, world" +\end{codeblock} + +Space around the \tcode{\#} and \tcode{\#\#} tokens in the macro definition +is optional. +\end{example} + +\pnum +\begin{example} +In the following fragment: + +\begin{codeblock} +#define hash_hash # ## # +#define mkstr(a) # a +#define in_between(a) mkstr(a) +#define join(c, d) in_between(c hash_hash d) +char p[] = join(x, y); // equivalent to \tcode{char p[] = "x \#\# y";} +\end{codeblock} + +The expansion produces, at various stages: + +\begin{codeblock} +join(x, y) +in_between(x hash_hash y) +in_between(x ## y) +mkstr(x ## y) +"x ## y" +\end{codeblock} + +In other words, expanding \tcode{hash_hash} produces a new token, +consisting of two adjacent sharp signs, but this new token is not the +\tcode{\#\#} operator. +\end{example} + +\pnum +\begin{example} +To illustrate the rules for placemarker preprocessing tokens, the sequence +\begin{codeblock} +#define t(x,y,z) x ## y ## z +int j[] = { t(1,2,3), t(,4,5), t(6,,7), t(8,9,), + t(10,,), t(,11,), t(,,12), t(,,) }; +\end{codeblock} +results in +\begin{codeblock} +int j[] = { 123, 45, 67, 89, + 10, 11, 12, }; +\end{codeblock} +\end{example} + +\rSec3[cpp.rescan]{Rescanning and further replacement}% +\indextext{macro!rescanning and replacement}% +\indextext{rescanning and replacement|see{macro, rescanning and replacement}} + +\pnum +After all parameters in the replacement list have been substituted and \tcode{\#} and \tcode{\#\#} processing has taken +place, all placemarker preprocessing tokens are removed. Then +the resulting preprocessing token sequence is rescanned, along with all +subsequent preprocessing tokens of the source file, for more macro names +to replace. + +\pnum +\begin{example} +The sequence +\begin{codeblock} +#define x 3 +#define f(a) f(x * (a)) +#undef x +#define x 2 +#define g f +#define z z[0] +#define h g(~ +#define m(a) a(w) +#define w 0,1 +#define t(a) a +#define p() int +#define q(x) x +#define r(x,y) x ## y +#define str(x) # x + +f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); +g(x+(3,4)-w) | h 5) & m + (f)^m(m); +p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) }; +char c[2][6] = { str(hello), str() }; +\end{codeblock} +results in +\begin{codeblock} +f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1); +f(2 * (2+(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1); +int i[] = { 1, 23, 4, 5, }; +char c[2][6] = { "hello", "" }; +\end{codeblock} +\end{example} + +\pnum +If the name of the macro being replaced is found during this scan of +the replacement list +(not including the rest of the source file's preprocessing tokens), +it is not replaced. +Furthermore, +if any nested replacements encounter the name of the macro being replaced, +it is not replaced. +These nonreplaced macro name preprocessing tokens are no longer available +for further replacement even if they are later (re)examined in contexts +in which that macro name preprocessing token would otherwise have been +replaced. + +\pnum +The resulting completely macro-replaced preprocessing token sequence +is not processed as a preprocessing directive even if it resembles one, +but all pragma unary operator expressions within it are then processed as +specified in~\ref{cpp.pragma.op} below. + +\rSec3[cpp.scope]{Scope of macro definitions}% +\indextext{macro!scope of definition}% +\indextext{scope!macro definition|see{macro, scope of definition}} + +\pnum +A macro definition lasts +(independent of block structure) +until a corresponding +\tcode{\#undef} +directive is encountered or +(if none is encountered) +until the end of the translation unit. +Macro definitions have no significance after translation phase 4. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# undef} identifier new-line +\indextext{\idxcode{\#undef}}% +\end{ncsimplebnf} +causes the specified identifier no longer to be defined as a macro name. +It is ignored if the specified identifier is not currently defined as +a macro name. + +\indextext{macro!replacement|)} + +\rSec2[cpp.line]{Line control}% +\indextext{preprocessing directive!line control}% +\indextext{\idxcode{\#line}|see{preprocessing directive, line control}} + +\pnum +The \grammarterm{string-literal} of a +\tcode{\#line} +directive, if present, +shall be a character string literal. + +\pnum +The +\defn{line number} +of the current source line is one greater than +the number of new-line characters read or introduced +in translation phase 1\iref{lex.phase.1} +while processing the source file to the current token. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# line} digit-sequence new-line +\end{ncsimplebnf} +causes the implementation to behave as if +the following sequence of source lines begins with a +source line that has a line number as specified +by the digit sequence (interpreted as a decimal integer). +If the digit sequence specifies zero +or a number greater than 2147483647, +the behavior is undefined. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# line} digit-sequence \terminal{"} \opt{s-char-sequence} \terminal{"} new-line +\end{ncsimplebnf} +sets the presumed line number similarly and changes the +presumed name of the source file to be the contents +of the character string literal. + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# line} pp-tokens new-line +\end{ncsimplebnf} +(that does not match one of the two previous forms) +is permitted. +The preprocessing tokens after +\tcode{line} +on the directive are processed just as in normal text +(each identifier currently defined as a macro name is replaced by its +replacement list of preprocessing tokens). +If the directive resulting after all replacements does not match +one of the two previous forms, the behavior is undefined; +otherwise, the result is processed as appropriate. + +\rSec2[cpp.error]{Diagnostic directives}% +\indextext{preprocessing directive!error}% +\indextext{preprocessing directive!diagnostic}% +\indextext{preprocessing directive!warning}% +\indextext{\idxcode{\#error}|see{preprocessing directive, error}} + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# error} \opt{pp-tokens} new-line +\end{ncsimplebnf} +renders the program ill-formed. +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# warning} \opt{pp-tokens} new-line +\end{ncsimplebnf} +requires the implementation to produce at least one diagnostic message +for the preprocessing translation unit\iref{intro.compliance.general}. +\recommended +Any diagnostic message caused by either of these directives +should include the specified sequence of preprocessing tokens. + +\rSec2[cpp.pragmas]{Pragmas}% + +\rSec3[cpp.pragma]{Pragma directive}% +\indextext{preprocessing directive!pragma}% +\indextext{\idxcode{\#pragma}|see{preprocessing directive, pragma}} + +\pnum +A preprocessing directive of the form +\begin{ncsimplebnf} +\terminal{\# pragma} \opt{pp-tokens} new-line +\end{ncsimplebnf} +causes the implementation to behave +in an \impldef{\tcode{\#pragma}} manner. +The behavior may cause translation to fail or cause the translator or +the resulting program to behave in a non-conforming manner. +Any pragma that is not recognized by the implementation is ignored. + +\rSec3[cpp.pragma.op]{Pragma operator}% +\indextext{macro!pragma operator}% +\indextext{operator!pragma|see{macro, pragma operator}} + +\pnum +A unary operator expression of the form: +\begin{ncbnf} +\terminal{_Pragma} \terminal{(} string-literal \terminal{)} +\end{ncbnf} +is processed as follows: The \grammarterm{string-literal} is \defnx{destringized}{destringization} +by deleting the \tcode{L} prefix, if present, deleting the leading and trailing +double-quotes, replacing each escape sequence \tcode{\textbackslash"} by a double-quote, and +replacing each escape sequence \tcode{\textbackslash\textbackslash} by a single +backslash. The resulting sequence of characters is processed through translation phase 3 +to produce preprocessing tokens that are executed as if they were the +\grammarterm{pp-tokens} in a pragma directive. The original four preprocessing +tokens in the unary operator expression are removed. + +\pnum +\begin{example} +\begin{codeblock} +#pragma listing on "..\listing.dir" +\end{codeblock} +can also be expressed as: +\begin{codeblock} +_Pragma ( "listing on \"..\\listing.dir\"" ) +\end{codeblock} +The latter form is processed in the same way whether it appears literally +as shown, or results from macro replacement, as in: +\begin{codeblock} +#define LISTING(x) PRAGMA(listing on #x) +#define PRAGMA(x) _Pragma(#x) + +LISTING( ..\listing.dir ) +\end{codeblock} +\end{example} + +\rSec2[cpp.predefined]{Predefined macro names} +\indextext{macro!predefined}% +\indextext{name!predefined macro|see{macro, predefined}} + +\pnum +The following macro names shall be defined by the implementation: + +\begin{description} + +\item +\indextext{\idxxname{cplusplus}}% +\xname{cplusplus}\\ +The integer literal \tcode{\cppver}. +\begin{note} +Future revisions of this document will +replace the value of this macro with a greater value. +\end{note} + +\item The names listed in \tref{cpp.predefined.ft}.\\ +The macros defined in \tref{cpp.predefined.ft} shall be defined to +the corresponding integer literal. +\begin{note} +Future revisions of this document might replace +the values of these macros with greater values. +\end{note} + +\item +\indextext{__date__@\mname{DATE}}% +\mname{DATE}\\ +The date of translation of the source file: +a character string literal of the form +\tcode{"Mmm~dd~yyyy"}, +where the names of the months are the same as those generated +by the +\tcode{asctime} +function, +and the first character of +\tcode{dd} +is a space character if the value is less than 10. +If the date of translation is not available, +an \impldef{text of \mname{DATE} when date of translation is not available} valid date +shall be supplied. + +\item +\indextext{__file__@\mname{FILE}}% +\mname{FILE}\\ +The presumed name of the current source file (a character string +literal). +\begin{footnote} +The presumed source file name can be changed by the \tcode{\#line} directive. +\end{footnote} + +\item +\indextext{__line__@\mname{LINE}}% +\mname{LINE}\\ +The presumed line number (within the current source file) of the current source line +(an integer literal). +\begin{footnote} +The presumed line number can be changed by the \tcode{\#line} directive. +\end{footnote} + +\item +\indextext{__stdc_hosted__@\mname{STDC_HOSTED}}% +\indextext{implementation!hosted}% +\indextext{implementation!freestanding}% +\mname{STDC_HOSTED}\\ +The integer literal \tcode{1} +if the implementation is a hosted implementation or +the integer literal \tcode{0} +if it is a freestanding implementation\iref{intro.compliance}. + +\item +\indextext{__stdcpp_default_new_alignment__@\mname{STDCPP_DEFAULT_NEW_ALIGNMENT}}% +\mname{STDCPP_DEFAULT_NEW_ALIGNMENT}\\ +An integer literal of type \tcode{std::size_t} +whose value is the alignment guaranteed +by a call to \tcode{operator new(std::size_t)} +or \tcode{operator new[](std::size_t)}. +\begin{note} +Larger alignments will be passed to +\tcode{operator new(std::size_t, std::align_val_t)}, etc.\iref{expr.new}. +\end{note} + +\item +\indextext{__stdcpp_float16_t__@\mname{STDCPP_FLOAT16_T}}% +\mname{STDCPP_FLOAT16_T}\\ +Defined as the integer literal \tcode{1} +if and only if the implementation supports +the ISO/IEC/IEEE 60559 floating-point interchange format binary16 +as an extended floating-point type\iref{basic.extended.fp}. + +\item +\indextext{__stdcpp_float32_t__@\mname{STDCPP_FLOAT32_T}}% +\mname{STDCPP_FLOAT32_T}\\ +Defined as the integer literal \tcode{1} +if and only if the implementation supports +the ISO/IEC/IEEE 60559 floating-point interchange format binary32 +as an extended floating-point type. + +\item +\indextext{__stdcpp_float64_t__@\mname{STDCPP_FLOAT64_T}}% +\mname{STDCPP_FLOAT64_T}\\ +Defined as the integer literal \tcode{1} +if and only if the implementation supports +the ISO/IEC/IEEE 60559 floating-point interchange format binary64 +as an extended floating-point type. + +\item +\indextext{__stdcpp_float128_t__@\mname{STDCPP_FLOAT128_T}}% +\mname{STDCPP_FLOAT128_T}\\ +Defined as the integer literal \tcode{1} +if and only if the implementation supports +the ISO/IEC/IEEE 60559 floating-point interchange format binary128 +as an extended floating-point type. + +\item +\indextext{__stdcpp_bfloat16_t__@\mname{STDCPP_BFLOAT16_T}}% +\mname{STDCPP_BFLOAT16_T}\\ +Defined as the integer literal \tcode{1} +if and only if the implementation supports an extended floating-point type +with the properties of the \grammarterm{typedef-name} \tcode{std::bfloat16_t} +as described in \ref{basic.extended.fp}. + +\item +\indextext{__time__@\mname{TIME}}% +\mname{TIME}\\ +The time of translation of the source file: +a character string literal of the form +\tcode{"hh:mm:ss"} +as in the time generated by the +\tcode{asctime} +function. +If the time of translation is not available, +an \impldef{text of \mname{TIME} when time of translation is not available} valid time shall be supplied. +\end{description} + +\indextext{macro!feature-test}% +\indextext{feature-test macro|see{macro, feature-test}}% +\begin{LongTable}{Feature-test macros}{cpp.predefined.ft}{ll} +\\ \topline +\lhdr{Macro name} & \rhdr{Value} \\ \capsep +\endfirsthead +\continuedcaption \\ +\hline +\lhdr{Name} & \rhdr{Value} \\ \capsep +\endhead +\defnxname{cpp_aggregate_bases} & \tcode{201603L} \\ \rowsep +\defnxname{cpp_aggregate_nsdmi} & \tcode{201304L} \\ \rowsep +\defnxname{cpp_aggregate_paren_init} & \tcode{201902L} \\ \rowsep +\defnxname{cpp_alias_templates} & \tcode{200704L} \\ \rowsep +\defnxname{cpp_aligned_new} & \tcode{201606L} \\ \rowsep +\defnxname{cpp_attributes} & \tcode{200809L} \\ \rowsep +\defnxname{cpp_auto_cast} & \tcode{202110L} \\ \rowsep +\defnxname{cpp_binary_literals} & \tcode{201304L} \\ \rowsep +\defnxname{cpp_capture_star_this} & \tcode{201603L} \\ \rowsep +\defnxname{cpp_char8_t} & \tcode{202207L} \\ \rowsep +\defnxname{cpp_concepts} & \tcode{202002L} \\ \rowsep +\defnxname{cpp_conditional_explicit} & \tcode{201806L} \\ \rowsep +\defnxname{cpp_constexpr} & \tcode{202306L} \\ \rowsep +\defnxname{cpp_constexpr_dynamic_alloc} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_constexpr_in_decltype} & \tcode{201711L} \\ \rowsep +\defnxname{cpp_consteval} & \tcode{202211L} \\ \rowsep +\defnxname{cpp_constinit} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_decltype} & \tcode{200707L} \\ \rowsep +\defnxname{cpp_decltype_auto} & \tcode{201304L} \\ \rowsep +\defnxname{cpp_deduction_guides} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_delegating_constructors} & \tcode{200604L} \\ \rowsep +\defnxname{cpp_deleted_function} & \tcode{202403L} \\ \rowsep +\defnxname{cpp_designated_initializers} & \tcode{201707L} \\ \rowsep +\defnxname{cpp_enumerator_attributes} & \tcode{201411L} \\ \rowsep +\defnxname{cpp_explicit_this_parameter} & \tcode{202110L} \\ \rowsep +\defnxname{cpp_fold_expressions} & \tcode{201603L} \\ \rowsep +\defnxname{cpp_generic_lambdas} & \tcode{201707L} \\ \rowsep +\defnxname{cpp_guaranteed_copy_elision} & \tcode{201606L} \\ \rowsep +\defnxname{cpp_hex_float} & \tcode{201603L} \\ \rowsep +\defnxname{cpp_if_consteval} & \tcode{202106L} \\ \rowsep +\defnxname{cpp_if_constexpr} & \tcode{201606L} \\ \rowsep +\defnxname{cpp_impl_coroutine} & \tcode{201902L} \\ \rowsep +\defnxname{cpp_impl_destroying_delete} & \tcode{201806L} \\ \rowsep +\defnxname{cpp_impl_three_way_comparison} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_implicit_move} & \tcode{202207L} \\ \rowsep +\defnxname{cpp_inheriting_constructors} & \tcode{201511L} \\ \rowsep +\defnxname{cpp_init_captures} & \tcode{201803L} \\ \rowsep +\defnxname{cpp_initializer_lists} & \tcode{200806L} \\ \rowsep +\defnxname{cpp_inline_variables} & \tcode{201606L} \\ \rowsep +\defnxname{cpp_lambdas} & \tcode{200907L} \\ \rowsep +\defnxname{cpp_modules} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_multidimensional_subscript} & \tcode{202211L} \\ \rowsep +\defnxname{cpp_named_character_escapes} & \tcode{202207L} \\ \rowsep +\defnxname{cpp_namespace_attributes} & \tcode{201411L} \\ \rowsep +\defnxname{cpp_noexcept_function_type} & \tcode{201510L} \\ \rowsep +\defnxname{cpp_nontype_template_args} & \tcode{201911L} \\ \rowsep +\defnxname{cpp_nontype_template_parameter_auto} & \tcode{201606L} \\ \rowsep +\defnxname{cpp_nsdmi} & \tcode{200809L} \\ \rowsep +\defnxname{cpp_pack_indexing} & \tcode{202311L} \\ \rowsep +\defnxname{cpp_placeholder_variables} & \tcode{202306L} \\ \rowsep +\defnxname{cpp_range_based_for} & \tcode{202211L} \\ \rowsep +\defnxname{cpp_raw_strings} & \tcode{200710L} \\ \rowsep +\defnxname{cpp_ref_qualifiers} & \tcode{200710L} \\ \rowsep +\defnxname{cpp_return_type_deduction} & \tcode{201304L} \\ \rowsep +\defnxname{cpp_rvalue_references} & \tcode{200610L} \\ \rowsep +\defnxname{cpp_size_t_suffix} & \tcode{202011L} \\ \rowsep +\defnxname{cpp_sized_deallocation} & \tcode{201309L} \\ \rowsep +\defnxname{cpp_static_assert} & \tcode{202306L} \\ \rowsep +\defnxname{cpp_static_call_operator} & \tcode{202207L} \\ \rowsep +\defnxname{cpp_structured_bindings} & \tcode{202403L} \\ \rowsep +\defnxname{cpp_template_template_args} & \tcode{201611L} \\ \rowsep +\defnxname{cpp_threadsafe_static_init} & \tcode{200806L} \\ \rowsep +\defnxname{cpp_unicode_characters} & \tcode{200704L} \\ \rowsep +\defnxname{cpp_unicode_literals} & \tcode{200710L} \\ \rowsep +\defnxname{cpp_user_defined_literals} & \tcode{200809L} \\ \rowsep +\defnxname{cpp_using_enum} & \tcode{201907L} \\ \rowsep +\defnxname{cpp_variable_templates} & \tcode{201304L} \\ \rowsep +\defnxname{cpp_variadic_friend} & \tcode{202403L} \\ \rowsep +\defnxname{cpp_variadic_templates} & \tcode{200704L} \\ \rowsep +\defnxname{cpp_variadic_using} & \tcode{201611L} \\ +\end{LongTable} + +\pnum +The following macro names are conditionally defined by the implementation: + +\begin{description} +\item +\indextext{__stdc__@\mname{STDC}}% +\mname{STDC}\\ +Whether \mname{STDC} is predefined and if so, what its value is, +are \impldef{definition and meaning of \mname{STDC}}. + +\item +\indextext{__stdc_mb_might_neq_wc__@\mname{STDC_MB_MIGHT_NEQ_WC}}% +\mname{STDC_MB_MIGHT_NEQ_WC}\\ +The integer literal \tcode{1}, intended to indicate that, in the encoding for +\keyword{wchar_t}, a member of the basic character set need not have a code value equal to +its value when used as the lone character in an ordinary character literal. + +\item +\indextext{__stdc_version__@\mname{STDC_VERSION}}% +\mname{STDC_VERSION}\\ +Whether \mname{STDC_VERSION} is predefined and if so, what its value is, +are \impldef{definition and meaning of \mname{STDC_VERSION}}. + +\item +\indextext{__stdc_iso_10646__@\mname{STDC_ISO_10646}}% +\mname{STDC_ISO_10646}\\ +An integer literal of the form \tcode{yyyymmL} +(for example, \tcode{199712L}). +Whether \mname{STDC_ISO_10646} is predefined and +if so, what its value is, +are \impldef{presence and value of \mname{STDC_ISO_10646}}. + +\item +\indextext{__stdcpp_threads__@\mname{STDCPP_THREADS}}% +\mname{STDCPP_THREADS}\\ +Defined, and has the value integer literal 1, if and only if a program +can have more than one thread of execution\iref{intro.multithread}. + +\end{description} + +\pnum +The values of the predefined macros +(except for +\mname{FILE} +and +\mname{LINE}) +remain constant throughout the translation unit. + +\pnum +If any of the pre-defined macro names in this subclause, +or the identifier +\tcode{defined}, +is the subject of a +\tcode{\#define} +or a +\tcode{\#undef} +preprocessing directive, +the behavior is undefined. +Any other predefined macro names shall begin with a +leading underscore followed by an uppercase letter or a second +underscore. +\indextext{preprocessing directive|)} + +\rSec1[lex.token]{Tokens} + +\indextext{token|(}% +\begin{bnf} +\nontermdef{token}\br + identifier\br + keyword\br + literal\br + operator-or-punctuator +\end{bnf} + +\pnum +Each preprocessing token that is converted to a token\iref{lex.token} +shall have the lexical form of a keyword, an identifier, a literal, +or an operator or punctuator. + +\pnum +\indextext{\idxgram{token}}% +There are five kinds of tokens: identifiers, keywords, literals,% +\begin{footnote} +Literals include strings and character and numeric literals. +\end{footnote} +operators, and other separators. +\indextext{whitespace}% +Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments +(collectively, ``whitespace''), as described below, are ignored except +as they serve to separate tokens. +\begin{note} +Whitespace can separate otherwise adjacent identifiers, keywords, numeric +literals, and alternative tokens containing alphabetic characters. +\end{note} +\indextext{token|)} + +\rSec2[lex.key]{Keywords} + +\begin{bnf} +\nontermdef{keyword}\br + \textnormal{any identifier listed in \tref{lex.key}}\br + \grammarterm{import-keyword}\br + \grammarterm{module-keyword}\br + \grammarterm{export-keyword} +\end{bnf} + +\pnum +The \grammarterm{import-keyword} is produced +by processing an \keyword{import} directive\iref{cpp.import}, +the \grammarterm{module-keyword} is produced +by preprocessing a \keyword{module} directive\iref{cpp.module}, and +the \grammarterm{export-keyword} is produced +by preprocessing either of the previous two directives. +\begin{note} +None has any observable spelling. +\end{note} + +\pnum +\indextext{keyword|(}% +The identifiers shown in \tref{lex.key} are reserved for use +as keywords (that is, they are unconditionally treated as keywords in +phase 7) except in an \grammarterm{attribute-token}\iref{dcl.attr.grammar}. +\begin{note} +The \keyword{register} keyword is unused but +is reserved for future use. +\end{note} + +\begin{multicolfloattable}{Keywords}{lex.key} +{lllll} +\keyword{alignas} \\ +\keyword{alignof} \\ +\keyword{asm} \\ +\keyword{auto} \\ +\keyword{bool} \\ +\keyword{break} \\ +\keyword{case} \\ +\keyword{catch} \\ +\keyword{char} \\ +\keyword{char8_t} \\ +\keyword{char16_t} \\ +\keyword{char32_t} \\ +\keyword{class} \\ +\keyword{concept} \\ +\keyword{const} \\ +\keyword{consteval} \\ +\keyword{constexpr} \\ +\columnbreak +\keyword{constinit} \\ +\keyword{const_cast} \\ +\keyword{continue} \\ +\keyword{co_await} \\ +\keyword{co_return} \\ +\keyword{co_yield} \\ +\keyword{decltype} \\ +\keyword{default} \\ +\keyword{delete} \\ +\keyword{do} \\ +\keyword{double} \\ +\keyword{dynamic_cast} \\ +\keyword{else} \\ +\keyword{enum} \\ +\keyword{explicit} \\ +\keyword{export} \\ +\keyword{extern} \\ +\columnbreak +\keyword{false} \\ +\keyword{float} \\ +\keyword{for} \\ +\keyword{friend} \\ +\keyword{goto} \\ +\keyword{if} \\ +\keyword{inline} \\ +\keyword{int} \\ +\keyword{long} \\ +\keyword{mutable} \\ +\keyword{namespace} \\ +\keyword{new} \\ +\keyword{noexcept} \\ +\keyword{nullptr} \\ +\keyword{operator} \\ +\keyword{private} \\ +\keyword{protected} \\ +\columnbreak +\keyword{public} \\ +\keyword{register} \\ +\keyword{reinterpret_cast} \\ +\keyword{requires} \\ +\keyword{return} \\ +\keyword{short} \\ +\keyword{signed} \\ +\keyword{sizeof} \\ +\keyword{static} \\ +\keyword{static_assert} \\ +\keyword{static_cast} \\ +\keyword{struct} \\ +\keyword{switch} \\ +\keyword{template} \\ +\keyword{this} \\ +\keyword{thread_local} \\ +\keyword{throw} \\ +\columnbreak +\keyword{true} \\ +\keyword{try} \\ +\keyword{typedef} \\ +\keyword{typeid} \\ +\keyword{typename} \\ +\keyword{union} \\ +\keyword{unsigned} \\ +\keyword{using} \\ +\keyword{virtual} \\ +\keyword{void} \\ +\keyword{volatile} \\ +\keyword{wchar_t} \\ +\keyword{while} \\ +\end{multicolfloattable} + +\pnum +Furthermore, the alternative representations shown in +\tref{lex.key.digraph} for certain operators and +punctuators\iref{lex.digraph} are reserved and shall not be used +otherwise. + +\begin{floattable}{Alternative representations}{lex.key.digraph} +{llllll} +\topline +\keyword{and} & \keyword{and_eq} & \keyword{bitand} & \keyword{bitor} & \keyword{compl} & \keyword{not} \\ +\keyword{not_eq} & \keyword{or} & \keyword{or_eq} & \keyword{xor} & \keyword{xor_eq} & \\ +\end{floattable}% +\indextext{keyword|)}% + +\rSec2[lex.literal]{Literals}% +\indextext{literal|(} + +\rSec3[lex.literal.kinds]{Kinds of literals} + +\pnum +\indextext{constant}% +\indextext{literal!constant}% +There are several kinds of literals. +\begin{footnote} +The term ``literal'' generally designates, in this +document, those tokens that are called ``constants'' in C. +\end{footnote} + +\begin{bnf} +\nontermdef{literal}\br + integer-literal\br + character-literal\br + floating-point-literal\br + string-literal\br + boolean-literal\br + pointer-literal\br + user-defined-literal +\end{bnf} +\begin{note} +When appearing as an \grammarterm{expression}, +a literal has a type and a value category\iref{expr.prim.literal}. +\end{note} + +\rSec3[lex.icon]{Integer literals} + +\indextext{literal!integer}% +\begin{bnf} +\nontermdef{integer-literal}\br + binary-literal \opt{integer-suffix}\br + octal-literal \opt{integer-suffix}\br + decimal-literal \opt{integer-suffix}\br + hexadecimal-literal \opt{integer-suffix} +\end{bnf} + +\begin{bnf} +\nontermdef{binary-literal}\br + \terminal{0b} binary-digit\br + \terminal{0B} binary-digit\br + binary-literal \opt{\terminal{'}} binary-digit +\end{bnf} + +\begin{bnf} +\nontermdef{octal-literal}\br + \terminal{0}\br + octal-literal \opt{\terminal{'}} octal-digit +\end{bnf} + +\begin{bnf} +\nontermdef{decimal-literal}\br + nonzero-digit\br + decimal-literal \opt{\terminal{'}} digit +\end{bnf} + +\begin{bnf} +\nontermdef{hexadecimal-literal}\br + hexadecimal-prefix hexadecimal-digit-sequence +\end{bnf} + +\begin{bnf} +\nontermdef{binary-digit} \textnormal{one of}\br + \terminal{0 1} +\end{bnf} + +\begin{bnf} +\nontermdef{octal-digit} \textnormal{one of}\br + \terminal{0 1 2 3 4 5 6 7} +\end{bnf} + +\begin{bnf} +\nontermdef{nonzero-digit} \textnormal{one of}\br + \terminal{1 2 3 4 5 6 7 8 9} +\end{bnf} + +\begin{bnf} +\nontermdef{hexadecimal-prefix} \textnormal{one of}\br + \terminal{0x 0X} +\end{bnf} + +\begin{bnf} +\nontermdef{hexadecimal-digit-sequence}\br + hexadecimal-digit\br + hexadecimal-digit-sequence \opt{\terminal{'}} hexadecimal-digit +\end{bnf} + +\begin{bnf} +\nontermdef{hexadecimal-digit} \textnormal{one of}\br + \terminal{0 1 2 3 4 5 6 7 8 9}\br + \terminal{a b c d e f}\br + \terminal{A B C D E F} +\end{bnf} + +\begin{bnf} +\nontermdef{integer-suffix}\br + unsigned-suffix \opt{long-suffix} \br + unsigned-suffix \opt{long-long-suffix} \br + unsigned-suffix \opt{size-suffix} \br + long-suffix \opt{unsigned-suffix} \br + long-long-suffix \opt{unsigned-suffix} \br + size-suffix \opt{unsigned-suffix} +\end{bnf} + +\begin{bnf} +\nontermdef{unsigned-suffix} \textnormal{one of}\br + \terminal{u U} +\end{bnf} + +\begin{bnf} +\nontermdef{long-suffix} \textnormal{one of}\br + \terminal{l L} +\end{bnf} + +\begin{bnf} +\nontermdef{long-long-suffix} \textnormal{one of}\br + \terminal{ll LL} +\end{bnf} + +\begin{bnf} +\nontermdef{size-suffix} \textnormal{one of}\br + \terminal{z Z} +\end{bnf} + +\pnum +\indextext{literal!\idxcode{unsigned}}% +\indextext{literal!\idxcode{long}}% +\indextext{literal!base of integer}% +In an \grammarterm{integer-literal}, +the sequence of +\grammarterm{binary-digit}s, +\grammarterm{octal-digit}s, +\grammarterm{digit}s, or +\grammarterm{hexadecimal-digit}s +is interpreted as a base $N$ integer as shown in table \tref{lex.icon.base}; +the lexically first digit of the sequence of digits is the most significant. +\begin{note} +The prefix and any optional separating single quotes are ignored +when determining the value. +\end{note} + +\begin{simpletypetable} +{Base of \grammarterm{integer-literal}{s}} +{lex.icon.base} +{lr} +\topline +\lhdr{Kind of \grammarterm{integer-literal}} & \rhdr{base $N$} \\ \capsep +\grammarterm{binary-literal} & 2 \\ +\grammarterm{octal-literal} & 8 \\ +\grammarterm{decimal-literal} & 10 \\ +\grammarterm{hexadecimal-literal} & 16 \\ +\end{simpletypetable} + +\pnum +The \grammarterm{hexadecimal-digit}s +\tcode{a} through \tcode{f} and \tcode{A} through \tcode{F} +have decimal values ten through fifteen. +\begin{example} +The number twelve can be written \tcode{12}, \tcode{014}, +\tcode{0XC}, or \tcode{0b1100}. The \grammarterm{integer-literal}s \tcode{1048576}, +\tcode{1'048'576}, \tcode{0X100000}, \tcode{0x10'0000}, and +\tcode{0'004'000'000} all have the same value. +\end{example} + +\pnum +\indextext{literal!\idxcode{long}}% +\indextext{literal!\idxcode{unsigned}}% +\indextext{literal!integer}% +\indextext{literal!type of integer}% +\indextext{suffix!\idxcode{L}}% +\indextext{suffix!\idxcode{U}}% +\indextext{suffix!\idxcode{l}}% +\indextext{suffix!\idxcode{u}}% +The type of an \grammarterm{integer-literal} is +the first type in the list in \tref{lex.icon.type} +corresponding to its optional \grammarterm{integer-suffix} +in which its value can be represented. + +\begin{floattable}{Types of \grammarterm{integer-literal}s}{lex.icon.type}{l|l|l} +\topline \lhdr{\grammarterm{integer-suffix}} & \chdr{\grammarterm{decimal-literal}} & \rhdr{\grammarterm{integer-literal} other than \grammarterm{decimal-literal}} \\ \capsep none & \tcode{int} & @@ -1356,244 +3972,7 @@ is ill-formed if it cannot be represented by \tcode{std::size_t}. \end{note} -\rSec2[lex.ccon]{Character literals} - -\indextext{literal!character}% -\begin{bnf} -\nontermdef{character-literal}\br - \opt{encoding-prefix} \terminal{'} c-char-sequence \terminal{'} -\end{bnf} - -\begin{bnf} -\nontermdef{encoding-prefix} \textnormal{one of}\br - \terminal{u8}\quad\terminal{u}\quad\terminal{U}\quad\terminal{L} -\end{bnf} - -\begin{bnf} -\nontermdef{c-char-sequence}\br - c-char\br - c-char-sequence c-char -\end{bnf} - -\begin{bnf} -\nontermdef{c-char}\br - basic-c-char\br - escape-sequence\br - universal-character-name -\end{bnf} - -\begin{bnf} -\nontermdef{basic-c-char}\br - \textnormal{any member of the translation character set except the \unicode{0027}{apostrophe},}\br - \bnfindent\textnormal{\unicode{005c}{reverse solidus}, or new-line character} -\end{bnf} - -\begin{bnf} -\nontermdef{escape-sequence}\br - simple-escape-sequence\br - numeric-escape-sequence\br - conditional-escape-sequence -\end{bnf} - -\begin{bnf} -\nontermdef{simple-escape-sequence}\br - \terminal{\textbackslash} simple-escape-sequence-char -\end{bnf} - -\begin{bnf} -\nontermdef{simple-escape-sequence-char} \textnormal{one of}\br - \terminal{' " ? \textbackslash{} a b f n r t v} -\end{bnf} - -\begin{bnf} -\nontermdef{numeric-escape-sequence}\br - octal-escape-sequence\br - hexadecimal-escape-sequence -\end{bnf} - -\begin{bnf} -\nontermdef{simple-octal-digit-sequence}\br - octal-digit\br - simple-octal-digit-sequence octal-digit -\end{bnf} - -\begin{bnf} -\nontermdef{octal-escape-sequence}\br - \terminal{\textbackslash} octal-digit\br - \terminal{\textbackslash} octal-digit octal-digit\br - \terminal{\textbackslash} octal-digit octal-digit octal-digit\br - \terminal{\textbackslash o\{} simple-octal-digit-sequence \terminal{\}}\br -\end{bnf} - -\begin{bnf} -\nontermdef{hexadecimal-escape-sequence}\br - \terminal{\textbackslash x} simple-hexadecimal-digit-sequence\br - \terminal{\textbackslash x\{} simple-hexadecimal-digit-sequence \terminal{\}} -\end{bnf} - -\begin{bnf} -\nontermdef{conditional-escape-sequence}\br - \terminal{\textbackslash} conditional-escape-sequence-char -\end{bnf} - -\begin{bnf} -\nontermdef{conditional-escape-sequence-char}\br - \textnormal{any member of the basic character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{N}, \terminal{o}, \terminal{u}, \terminal{U}, or \terminal{x}} -\end{bnf} - -\pnum -\indextext{literal!character}% -\indextext{literal!\idxcode{char8_t}}% -\indextext{literal!\idxcode{char16_t}}% -\indextext{literal!\idxcode{char32_t}}% -\indextext{literal!type of character}% -\indextext{type!\idxcode{char8_t}}% -\indextext{type!\idxcode{char16_t}}% -\indextext{type!\idxcode{char32_t}}% -\indextext{wide-character}% -\indextext{type!\idxcode{wchar_t}}% -A \defnadj{multicharacter}{literal} is a \grammarterm{character-literal} -whose \grammarterm{c-char-sequence} consists of -more than one \grammarterm{c-char}. -A multicharacter literal shall not have an \grammarterm{encoding-prefix}. -If a multicharacter literal contains a \grammarterm{c-char} -that is not encodable as a single code unit in the ordinary literal encoding, -the program is ill-formed. -Multicharacter literals are conditionally-supported. - -\pnum -The kind of a \grammarterm{character-literal}, -its type, and its associated character encoding\iref{lex.charset} -are determined by -its \grammarterm{encoding-prefix} and its \grammarterm{c-char-sequence} -as defined by \tref{lex.ccon.literal}. - -\begin{floattable}{Character literals}{lex.ccon.literal} -{l|l|l|l|l} -\topline -\lhdr{Encoding} & \chdr{Kind} & \chdr{Type} & \chdr{Associated char-} & \rhdr{Example} \\ -\lhdr{prefix} & \chdr{} & \chdr{} & \chdr{acter encoding} & \\ -\capsep -none & -\defnx{ordinary character literal}{literal!character!ordinary} & -\keyword{char} & -ordinary literal & -\tcode{'v'} \\ \cline{2-3}\cline{5-5} - & -multicharacter literal & -\keyword{int} & -encoding & -\tcode{'abcd'} \\ \hline -\tcode{L} & -\defnx{wide character literal}{literal!character!wide} & -\keyword{wchar_t} & -wide literal & -\tcode{L'w'} \\ - & & & encoding & \\ \hline -\tcode{u8} & -\defnx{UTF-8 character literal}{literal!character!UTF-8} & -\keyword{char8_t} & -UTF-8 & -\tcode{u8'x'} \\ \hline -\tcode{u} & -\defnx{UTF-16 character literal}{literal!character!UTF-16} & -\keyword{char16_t} & -UTF-16 & -\tcode{u'y'} \\ \hline -\tcode{U} & -\defnx{UTF-32 character literal}{literal!character!UTF-32} & -\keyword{char32_t} & -UTF-32 & -\tcode{U'z'} \\ -\end{floattable} - -\pnum -In translation phase 4, -the value of a \grammarterm{character-literal} is determined -using the range of representable values -of the \grammarterm{character-literal}'s type in translation phase 7. -A multicharacter literal has an -\impldef{value of non-encodable character literal or multicharacter literal} -value. -The value of any other kind of \grammarterm{character-literal} -is determined as follows: -\begin{itemize} -\item -A \grammarterm{character-literal} with -a \grammarterm{c-char-sequence} consisting of a single -\grammarterm{basic-c-char}, -\grammarterm{simple-escape-sequence}, or -\grammarterm{universal-character-name} -is the code unit value of the specified character -as encoded in the literal's associated character encoding. -If the specified character lacks -representation in the literal's associated character encoding or -if it cannot be encoded as a single code unit, -then the program is ill-formed. -\item -A \grammarterm{character-literal} with -a \grammarterm{c-char-sequence} consisting of -a single \grammarterm{numeric-escape-sequence} -has a value as follows: -\begin{itemize} -\item -Let $v$ be the integer value represented by -the octal number comprising -the sequence of \grammarterm{octal-digit}{s} in -an \grammarterm{octal-escape-sequence} or by -the hexadecimal number comprising -the sequence of \grammarterm{hexadecimal-digit}{s} in -a \grammarterm{hexadecimal-escape-sequence}. -\item -If $v$ does not exceed -the range of representable values of the \grammarterm{character-literal}'s type, -then the value is $v$. -\item -Otherwise, -if the \grammarterm{character-literal}'s \grammarterm{encoding-prefix} -is absent or \tcode{L}, and -$v$ does not exceed the range of representable values of the corresponding unsigned type for the underlying type of the \grammarterm{character-literal}'s type, -then the value is the unique value of the \grammarterm{character-literal}'s type \tcode{T} that is congruent to $v$ modulo $2^N$, where $N$ is the width of \tcode{T}. -\item -Otherwise, the program is ill-formed. -\end{itemize} -\item -A \grammarterm{character-literal} with -a \grammarterm{c-char-sequence} consisting of -a single \grammarterm{conditional-escape-sequence} -is conditionally-supported and -has an \impldef{value of \grammarterm{conditional-escape-sequence}} value. -\end{itemize} - -\pnum -\indextext{backslash character}% -\indextext{\idxcode{\textbackslash}|see{backslash character}}% -\indextext{escape character|see{backslash character}}% -The character specified by a \grammarterm{simple-escape-sequence} -is specified in \tref{lex.ccon.esc}. -\begin{note} -Using an escape sequence for a question mark -is supported for compatibility with \CppXIV{} and C. -\end{note} - -\begin{floattable}{Simple escape sequences}{lex.ccon.esc} -{lll} -\topline -\lhdrx{2}{character} & \rhdr{\grammarterm{simple-escape-sequence}} \\ \capsep -\ucode{000a} & \uname{line feed} & \tcode{\textbackslash n} \\ -\ucode{0009} & \uname{character tabulation} & \tcode{\textbackslash t} \\ -\ucode{000b} & \uname{line tabulation} & \tcode{\textbackslash v} \\ -\ucode{0008} & \uname{backspace} & \tcode{\textbackslash b} \\ -\ucode{000d} & \uname{carriage return} & \tcode{\textbackslash r} \\ -\ucode{000c} & \uname{form feed} & \tcode{\textbackslash f} \\ -\ucode{0007} & \uname{alert} & \tcode{\textbackslash a} \\ -\ucode{005c} & \uname{reverse solidus} & \tcode{\textbackslash\textbackslash} \\ -\ucode{003f} & \uname{question mark} & \tcode{\textbackslash ?} \\ -\ucode{0027} & \uname{apostrophe} & \tcode{\textbackslash '} \\ -\ucode{0022} & \uname{quotation mark} & \tcode{\textbackslash "} \\ -\end{floattable} - -\rSec2[lex.fcon]{Floating-point literals} +\rSec3[lex.fcon]{Floating-point literals} \indextext{literal!floating-point}% \begin{bnf} @@ -1618,438 +3997,120 @@ \nontermdef{fractional-constant}\br \opt{digit-sequence} \terminal{.} digit-sequence\br digit-sequence \terminal{.} -\end{bnf} - -\begin{bnf} -\nontermdef{hexadecimal-fractional-constant}\br - \opt{hexadecimal-digit-sequence} \terminal{.} hexadecimal-digit-sequence\br - hexadecimal-digit-sequence \terminal{.} -\end{bnf} - -\begin{bnf} -\nontermdef{exponent-part}\br - \terminal{e} \opt{sign} digit-sequence\br - \terminal{E} \opt{sign} digit-sequence -\end{bnf} - -\begin{bnf} -\nontermdef{binary-exponent-part}\br - \terminal{p} \opt{sign} digit-sequence\br - \terminal{P} \opt{sign} digit-sequence -\end{bnf} - -\begin{bnf} -\nontermdef{sign} \textnormal{one of}\br - \terminal{+ -} -\end{bnf} - -\begin{bnf} -\nontermdef{digit-sequence}\br - digit\br - digit-sequence \opt{\terminal{'}} digit -\end{bnf} - -\begin{bnf} -\nontermdef{floating-point-suffix} \textnormal{one of}\br - \terminal{f l f16 f32 f64 f128 bf16 F L F16 F32 F64 F128 BF16} -\end{bnf} - -\pnum -\indextext{literal!type of floating-point}% -\indextext{literal!\idxcode{float}}% -\indextext{suffix!\idxcode{F}}% -\indextext{suffix!\idxcode{f}}% -\indextext{suffix!\idxcode{L}}% -\indextext{suffix!\idxcode{l}}% -\indextext{literal!\idxcode{long double}}% -The type of -a \grammarterm{floating-point-literal}\iref{basic.fundamental,basic.extended.fp} -is determined by -its \grammarterm{floating-point-suffix} as specified in \tref{lex.fcon.type}. -\begin{note} -The floating-point suffixes -\tcode{f16}, \tcode{f32}, \tcode{f64}, \tcode{f128}, \tcode{bf16}, -\tcode{F16}, \tcode{F32}, \tcode{F64}, \tcode{F128}, and \tcode{BF16} -are conditionally-supported. See \ref{basic.extended.fp}. -\end{note} -\begin{simpletypetable} -{Types of \grammarterm{floating-point-literal}{s}} -{lex.fcon.type} -{ll} -\topline -\lhdr{\grammarterm{floating-point-suffix}} & \rhdr{type} \\ \capsep -none & \keyword{double} \\ -\tcode{f} or \tcode{F} & \keyword {float} \\ -\tcode{l} or \tcode{L} & \keyword{long} \keyword{double} \\ -\tcode{f16} or \tcode{F16} & \tcode{std::float16_t} \\ -\tcode{f32} or \tcode{F32} & \tcode{std::float32_t} \\ -\tcode{f64} or \tcode{F64} & \tcode{std::float64_t} \\ -\tcode{f128} or \tcode{F128} & \tcode{std::float128_t} \\ -\tcode{bf16} or \tcode{BF16} & \tcode{std::bfloat16_t} \\ -\end{simpletypetable} - -\pnum -\indextext{literal!floating-point}% -The \defn{significand} of a \grammarterm{floating-point-literal} -is the \grammarterm{fractional-constant} or \grammarterm{digit-sequence} -of a \grammarterm{decimal-floating-point-literal} -or the \grammarterm{hexadecimal-fractional-constant} -or \grammarterm{hexadecimal-digit-sequence} -of a \grammarterm{hexadecimal-floating-point-literal}. -In the significand, -the sequence of \grammarterm{digit}s or \grammarterm{hexadecimal-digit}s -and optional period are interpreted as a base $N$ real number $s$, -where $N$ is 10 for a \grammarterm{decimal-floating-point-literal} and -16 for a \grammarterm{hexadecimal-floating-point-literal}. -\begin{note} -Any optional separating single quotes are ignored when determining the value. -\end{note} -If an \grammarterm{exponent-part} or \grammarterm{binary-exponent-part} -is present, -the exponent $e$ of the \grammarterm{floating-point-literal} -is the result of interpreting -the sequence of an optional \grammarterm{sign} and the \grammarterm{digit}s -as a base 10 integer. -Otherwise, the exponent $e$ is 0. -The scaled value of the literal is -$s \times 10^e$ for a \grammarterm{decimal-floating-point-literal} and -$s \times 2^e$ for a \grammarterm{hexadecimal-floating-point-literal}. -\begin{example} -The \grammarterm{floating-point-literal}{s} -\tcode{49.625} and \tcode{0xC.68p+2} have the same value. -The \grammarterm{floating-point-literal}{s} -\tcode{1.602'176'565e-19} and \tcode{1.602176565e-19} -have the same value. -\end{example} - -\pnum -If the scaled value is not in the range of representable -values for its type, the program is ill-formed. -Otherwise, the value of a \grammarterm{floating-point-literal} -is the scaled value if representable, -else the larger or smaller representable value nearest the scaled value, -chosen in an \impldef{choice of larger or smaller value of -\grammarterm{floating-point-literal}} manner. - -\rSec2[lex.string]{String literals} - -\indextext{literal!string}% -\begin{bnf} -\nontermdef{string-literal}\br - \opt{encoding-prefix} \terminal{"} \opt{s-char-sequence} \terminal{"}\br - \opt{encoding-prefix} \terminal{R} raw-string -\end{bnf} - -\begin{bnf} -\nontermdef{s-char-sequence}\br - s-char\br - s-char-sequence s-char -\end{bnf} - -\begin{bnf} -\nontermdef{s-char}\br - basic-s-char\br - escape-sequence\br - universal-character-name -\end{bnf} - -\begin{bnf} -\nontermdef{basic-s-char}\br - \textnormal{any member of the translation character set except the \unicode{0022}{quotation mark},}\br - \bnfindent\textnormal{\unicode{005c}{reverse solidus}, or new-line character} -\end{bnf} - -\begin{bnf} -\nontermdef{raw-string}\br - \terminal{"} \opt{d-char-sequence} \terminal{(} \opt{r-char-sequence} \terminal{)} \opt{d-char-sequence} \terminal{"} -\end{bnf} - -\begin{bnf} -\nontermdef{r-char-sequence}\br - r-char\br - r-char-sequence r-char -\end{bnf} - -\begin{bnf} -\nontermdef{r-char}\br - \textnormal{any member of the translation character set, except a \unicode{0029}{right parenthesis} followed by}\br - \bnfindent\textnormal{the initial \grammarterm{d-char-sequence} (which may be empty) followed by a \unicode{0022}{quotation mark}} -\end{bnf} - -\begin{bnf} -\nontermdef{d-char-sequence}\br - d-char\br - d-char-sequence d-char -\end{bnf} - -\begin{bnf} -\nontermdef{d-char}\br - \textnormal{any member of the basic character set except:}\br - \bnfindent\textnormal{\unicode{0020}{space}, \unicode{0028}{left parenthesis}, \unicode{0029}{right parenthesis}, \unicode{005c}{reverse solidus},}\br - \bnfindent\textnormal{\unicode{0009}{character tabulation}, \unicode{000b}{line tabulation}, \unicode{000c}{form feed}, and new-line} -\end{bnf} - -\pnum -\indextext{literal!string}% -\indextext{character string}% -\indextext{string!type of}% -\indextext{type!\idxcode{wchar_t}}% -\indextext{prefix!\idxcode{L}}% -\indextext{literal!string!\idxcode{char16_t}}% -\indextext{type!\idxcode{char16_t}}% -\indextext{literal!string!\idxcode{char32_t}}% -\indextext{type!\idxcode{char32_t}}% -The kind of a \grammarterm{string-literal}, -its type, and -its associated character encoding\iref{lex.charset} -are determined by its encoding prefix and sequence of -\grammarterm{s-char}s or \grammarterm{r-char}s -as defined by \tref{lex.string.literal} -where $n$ is the number of encoded code units as described below. - -\begin{floattable}{String literals}{lex.string.literal} -{llp{2.6cm}p{2.3cm}p{4.7cm}} -\topline -\lhdr{Enco-} & \chdr{Kind} & \chdr{Type} & \chdr{Associated} & \rhdr{Examples} \\ -\lhdr{ding} & \chdr{} & \chdr{} & \chdr{character} & \rhdr{} \\ -\lhdr{prefix} & \chdr{} & \chdr{} & \chdr{encoding} & \rhdr{} \\ -\capsep -none & -\defnx{ordinary string literal}{literal!string!ordinary} & -array of $n$\newline \tcode{\keyword{const} \keyword{char}} & -ordinary literal encoding & -\tcode{"ordinary string"}\newline -\tcode{R"(ordinary raw string)"} \\ -\tcode{L} & -\defnx{wide string literal}{literal!string!wide} & -array of $n$\newline \tcode{\keyword{const} \keyword{wchar_t}} & -wide literal\newline encoding & -\tcode{L"wide string"}\newline -\tcode{LR"w(wide raw string)w"} \\ -\tcode{u8} & -\defnx{UTF-8 string literal}{literal!string!UTF-8} & -array of $n$\newline \tcode{\keyword{const} \keyword{char8_t}} & -UTF-8 & -\tcode{u8"UTF-8 string"}\newline -\tcode{u8R"x(UTF-8 raw string)x"} \\ -\tcode{u} & -\defnx{UTF-16 string literal}{literal!string!UTF-16} & -array of $n$\newline \tcode{\keyword{const} \keyword{char16_t}} & -UTF-16 & -\tcode{u"UTF-16 string"}\newline -\tcode{uR"y(UTF-16 raw string)y"} \\ -\tcode{U} & -\defnx{UTF-32 string literal}{literal!string!UTF-32} & -array of $n$\newline \tcode{\keyword{const} \keyword{char32_t}} & -UTF-32 & -\tcode{U"UTF-32 string"}\newline -\tcode{UR"z(UTF-32 raw string)z"} \\ -\end{floattable} - -\pnum -\indextext{literal!string!raw}% -A \grammarterm{string-literal} that has an \tcode{R} -\indextext{prefix!\idxcode{R}}% -in the prefix is a \defn{raw string literal}. The -\grammarterm{d-char-sequence} serves as a delimiter. The terminating -\grammarterm{d-char-sequence} of a \grammarterm{raw-string} is the same sequence of -characters as the initial \grammarterm{d-char-sequence}. A \grammarterm{d-char-sequence} -shall consist of at most 16 characters. - -\pnum -\begin{note} -The characters \tcode{'('} and \tcode{')'} can appear in a -\grammarterm{raw-string}. Thus, \tcode{R"delimiter((a|b))delimiter"} is equivalent to -\tcode{"(a|b)"}. -\end{note} - -\pnum -\begin{note} -A source-file new-line in a raw string literal results in a new-line in the -resulting execution string literal. Assuming no -whitespace at the beginning of lines in the following example, the assert will succeed: -\begin{codeblock} -const char* p = R"(a\ -b -c)"; -assert(std::strcmp(p, "a\\\nb\nc") == 0); -\end{codeblock} -\end{note} +\end{bnf} -\pnum -\begin{example} -The raw string -\begin{codeblock} -R"a( -)\ -a" -)a" -\end{codeblock} -is equivalent to \tcode{"\textbackslash n)\textbackslash \textbackslash \textbackslash na\textbackslash"\textbackslash n"}. The raw string -\begin{codeblock} -R"(x = "\"y\"")" -\end{codeblock} -is equivalent to \tcode{"x = \textbackslash "\textbackslash\textbackslash\textbackslash "y\textbackslash\textbackslash\textbackslash "\textbackslash ""}. -\end{example} +\begin{bnf} +\nontermdef{hexadecimal-fractional-constant}\br + \opt{hexadecimal-digit-sequence} \terminal{.} hexadecimal-digit-sequence\br + hexadecimal-digit-sequence \terminal{.} +\end{bnf} -\pnum -\indextext{literal!narrow-character}% -Ordinary string literals and UTF-8 string literals are -also referred to as \defnx{narrow string literals}{literal!string!narrow}. +\begin{bnf} +\nontermdef{exponent-part}\br + \terminal{e} \opt{sign} digit-sequence\br + \terminal{E} \opt{sign} digit-sequence +\end{bnf} -\pnum -\indextext{concatenation!string}% -The common \grammarterm{encoding-prefix} -for a sequence of adjacent \grammarterm{string-literal}s -is determined pairwise as follows. -If two \grammarterm{string-literal}{s} have -the same \grammarterm{encoding-prefix}, -the common \grammarterm{encoding-prefix} is that \grammarterm{encoding-prefix}. -If one \grammarterm{string-literal} has no \grammarterm{encoding-prefix}, -the common \grammarterm{encoding-prefix} is that -of the other \grammarterm{string-literal}. -Any other combinations are ill-formed. -\begin{note} -A \grammarterm{string-literal}'s rawness has -no effect on the determination of the common \grammarterm{encoding-prefix}. -\end{note} +\begin{bnf} +\nontermdef{binary-exponent-part}\br + \terminal{p} \opt{sign} digit-sequence\br + \terminal{P} \opt{sign} digit-sequence +\end{bnf} -\pnum -In translation phase 6\iref{lex.phases}, -adjacent \grammarterm{string-literal}s are concatenated. -The lexical structure and grouping of -the contents of the individual \grammarterm{string-literal}s is retained. -\begin{example} -\begin{codeblock} -"\xA" "B" -\end{codeblock} -represents -the code unit \tcode{'\textbackslash xA'} and the character \tcode{'B'} -after concatenation -(and not the single code unit \tcode{'\textbackslash xAB'}). -Similarly, -\begin{codeblock} -R"(\u00)" "41" -\end{codeblock} -represents six characters, -starting with a backslash and ending with the digit \tcode{1} -(and not the single character \tcode{'A'} -specified by a \grammarterm{universal-character-name}). +\begin{bnf} +\nontermdef{sign} \textnormal{one of}\br + \terminal{+ -} +\end{bnf} -\tref{lex.string.concat} has some examples of valid concatenations. -\end{example} +\begin{bnf} +\nontermdef{digit-sequence}\br + digit\br + digit-sequence \opt{\terminal{'}} digit +\end{bnf} -\begin{floattable}{String literal concatenations}{lex.string.concat} -{lll|lll|lll} -\topline -\multicolumn{2}{|c}{Source} & -Means & -\multicolumn{2}{c}{Source} & -Means & -\multicolumn{2}{c}{Source} & -Means \\ -\tcode{u"a"} & \tcode{u"b"} & \tcode{u"ab"} & -\tcode{U"a"} & \tcode{U"b"} & \tcode{U"ab"} & -\tcode{L"a"} & \tcode{L"b"} & \tcode{L"ab"} \\ -\tcode{u"a"} & \tcode{"b"} & \tcode{u"ab"} & -\tcode{U"a"} & \tcode{"b"} & \tcode{U"ab"} & -\tcode{L"a"} & \tcode{"b"} & \tcode{L"ab"} \\ -\tcode{"a"} & \tcode{u"b"} & \tcode{u"ab"} & -\tcode{"a"} & \tcode{U"b"} & \tcode{U"ab"} & -\tcode{"a"} & \tcode{L"b"} & \tcode{L"ab"} \\ -\end{floattable} +\begin{bnf} +\nontermdef{floating-point-suffix} \textnormal{one of}\br + \terminal{f l f16 f32 f64 f128 bf16 F L F16 F32 F64 F128 BF16} +\end{bnf} \pnum -Evaluating a \grammarterm{string-literal} results in a string literal object -with static storage duration\iref{basic.stc}. -\begin{note} -String literal objects are potentially non-unique\iref{intro.object}. -Whether successive evaluations of a -\grammarterm{string-literal} yield the same or a different object is -unspecified. -\end{note} +\indextext{literal!type of floating-point}% +\indextext{literal!\idxcode{float}}% +\indextext{suffix!\idxcode{F}}% +\indextext{suffix!\idxcode{f}}% +\indextext{suffix!\idxcode{L}}% +\indextext{suffix!\idxcode{l}}% +\indextext{literal!\idxcode{long double}}% +The type of +a \grammarterm{floating-point-literal}\iref{basic.fundamental,basic.extended.fp} +is determined by +its \grammarterm{floating-point-suffix} as specified in \tref{lex.fcon.type}. \begin{note} -\indextext{literal!string!undefined change to}% -The effect of attempting to modify a string literal object is undefined. +The floating-point suffixes +\tcode{f16}, \tcode{f32}, \tcode{f64}, \tcode{f128}, \tcode{bf16}, +\tcode{F16}, \tcode{F32}, \tcode{F64}, \tcode{F128}, and \tcode{BF16} +are conditionally-supported. See \ref{basic.extended.fp}. \end{note} +\begin{simpletypetable} +{Types of \grammarterm{floating-point-literal}{s}} +{lex.fcon.type} +{ll} +\topline +\lhdr{\grammarterm{floating-point-suffix}} & \rhdr{type} \\ \capsep +none & \keyword{double} \\ +\tcode{f} or \tcode{F} & \keyword {float} \\ +\tcode{l} or \tcode{L} & \keyword{long} \keyword{double} \\ +\tcode{f16} or \tcode{F16} & \tcode{std::float16_t} \\ +\tcode{f32} or \tcode{F32} & \tcode{std::float32_t} \\ +\tcode{f64} or \tcode{F64} & \tcode{std::float64_t} \\ +\tcode{f128} or \tcode{F128} & \tcode{std::float128_t} \\ +\tcode{bf16} or \tcode{BF16} & \tcode{std::bfloat16_t} \\ +\end{simpletypetable} \pnum -\indextext{\idxcode{0}!string terminator}% -\indextext{\idxcode{0}!null character|see {character, null}}% -String literal objects are initialized with -the sequence of code unit values -corresponding to the \grammarterm{string-literal}'s sequence of -\grammarterm{s-char}s (originally from non-raw string literals) and -\grammarterm{r-char}s (originally from raw string literals), -plus a terminating \unicode{0000}{null} character, -in order as follows: -\begin{itemize} -\item -The sequence of characters denoted by each contiguous sequence of -\grammarterm{basic-s-char}s, -\grammarterm{r-char}s, -\grammarterm{simple-escape-sequence}s\iref{lex.ccon}, and -\grammarterm{universal-character-name}s\iref{lex.charset} -is encoded to a code unit sequence -using the \grammarterm{string-literal}'s associated character encoding. -If a character lacks representation in the associated character encoding, -then the program is ill-formed. -\begin{note} -No character lacks representation in any Unicode encoding form. -\end{note} -When encoding a stateful character encoding, -implementations should encode the first such sequence -beginning with the initial encoding state and -encode subsequent sequences -beginning with the final encoding state of the prior sequence. +\indextext{literal!floating-point}% +The \defn{significand} of a \grammarterm{floating-point-literal} +is the \grammarterm{fractional-constant} or \grammarterm{digit-sequence} +of a \grammarterm{decimal-floating-point-literal} +or the \grammarterm{hexadecimal-fractional-constant} +or \grammarterm{hexadecimal-digit-sequence} +of a \grammarterm{hexadecimal-floating-point-literal}. +In the significand, +the sequence of \grammarterm{digit}s or \grammarterm{hexadecimal-digit}s +and optional period are interpreted as a base $N$ real number $s$, +where $N$ is 10 for a \grammarterm{decimal-floating-point-literal} and +16 for a \grammarterm{hexadecimal-floating-point-literal}. \begin{note} -The encoded code unit sequence can differ from -the sequence of code units that would be obtained by -encoding each character independently. +Any optional separating single quotes are ignored when determining the value. \end{note} -\item -Each \grammarterm{numeric-escape-sequence}\iref{lex.ccon} -contributes a single code unit with a value as follows: -\begin{itemize} -\item -Let $v$ be the integer value represented by -the octal number comprising -the sequence of \grammarterm{octal-digit}{s} in -an \grammarterm{octal-escape-sequence} or by -the hexadecimal number comprising -the sequence of \grammarterm{hexadecimal-digit}{s} in -a \grammarterm{hexadecimal-escape-sequence}. -\item -If $v$ does not exceed the range of representable values of -the \grammarterm{string-literal}'s array element type, -then the value is $v$. -\item -Otherwise, -if the \grammarterm{string-literal}'s \grammarterm{encoding-prefix} -is absent or \tcode{L}, and -$v$ does not exceed the range of representable values of -the corresponding unsigned type for the underlying type of -the \grammarterm{string-literal}'s array element type, -then the value is the unique value of -the \grammarterm{string-literal}'s array element type \tcode{T} -that is congruent to $v$ modulo $2^N$, where $N$ is the width of \tcode{T}. -\item -Otherwise, the program is ill-formed. -\end{itemize} -When encoding a stateful character encoding, -these sequences should have no effect on encoding state. -\item -Each \grammarterm{conditional-escape-sequence}\iref{lex.ccon} -contributes an -\impldef{code unit sequence for \grammarterm{conditional-escape-sequence}} -code unit sequence. -When encoding a stateful character encoding, -it is -\impldef{effect of \grammarterm{conditional-escape-sequence} on encoding state} -what effect these sequences have on encoding state. -\end{itemize} +If an \grammarterm{exponent-part} or \grammarterm{binary-exponent-part} +is present, +the exponent $e$ of the \grammarterm{floating-point-literal} +is the result of interpreting +the sequence of an optional \grammarterm{sign} and the \grammarterm{digit}s +as a base 10 integer. +Otherwise, the exponent $e$ is 0. +The scaled value of the literal is +$s \times 10^e$ for a \grammarterm{decimal-floating-point-literal} and +$s \times 2^e$ for a \grammarterm{hexadecimal-floating-point-literal}. +\begin{example} +The \grammarterm{floating-point-literal}{s} +\tcode{49.625} and \tcode{0xC.68p+2} have the same value. +The \grammarterm{floating-point-literal}{s} +\tcode{1.602'176'565e-19} and \tcode{1.602176565e-19} +have the same value. +\end{example} + +\pnum +If the scaled value is not in the range of representable +values for its type, the program is ill-formed. +Otherwise, the value of a \grammarterm{floating-point-literal} +is the scaled value if representable, +else the larger or smaller representable value nearest the scaled value, +chosen in an \impldef{choice of larger or smaller value of +\grammarterm{floating-point-literal}} manner. -\rSec2[lex.string.uneval]{Unevaluated strings} +\rSec3[lex.string.uneval]{Unevaluated strings} \begin{bnf} \nontermdef{unevaluated-string}\br @@ -2071,7 +4132,7 @@ An \grammarterm{unevaluated-string} is never evaluated and its interpretation depends on the context in which it appears. -\rSec2[lex.bool]{Boolean literals} +\rSec3[lex.bool]{Boolean literals} \indextext{literal!boolean}% \begin{bnf} @@ -2085,7 +4146,7 @@ The Boolean literals are the keywords \tcode{false} and \tcode{true}. Such literals have type \tcode{bool}. -\rSec2[lex.nullptr]{Pointer literals} +\rSec3[lex.nullptr]{Pointer literals} \indextext{literal!pointer}% \begin{bnf} @@ -2103,7 +4164,7 @@ and~\ref{conv.mem}. \end{note} -\rSec2[lex.ext]{User-defined literals} +\rSec3[lex.ext]{User-defined literals} \indextext{literal!user-defined}% \begin{bnf} @@ -2264,7 +4325,7 @@ \end{example} \pnum -In translation phase 6\iref{lex.phases}, adjacent \grammarterm{string-literal}s are concatenated and +In translation phase 6\iref{lex.phase.6}, adjacent \grammarterm{string-literal}s are concatenated and \grammarterm{user-defined-string-literal}{s} are considered \grammarterm{string-literal}s for that purpose. During concatenation, \grammarterm{ud-suffix}{es} are removed and ignored and the concatenation process occurs as described in~\ref{lex.string}. At the end of phase diff --git a/source/lib-intro.tex b/source/lib-intro.tex index fb0a13a5df..08901e4001 100644 --- a/source/lib-intro.tex +++ b/source/lib-intro.tex @@ -1584,8 +1584,8 @@ \pnum Subclause \ref{using} describes how a \Cpp{} program gains access to the facilities of the \Cpp{} standard library. \ref{using.headers} describes effects during translation -phase 4, while~\ref{using.linkage} describes effects during phase -8\iref{lex.phases}. +phase 4\iref{lex.phase.4}, while~\ref{using.linkage} describes effects during phase +8\iref{lex.phase.9}. \rSec3[using.headers]{Headers} diff --git a/source/limits.tex b/source/limits.tex index ca3953634b..610b5f6d98 100644 --- a/source/limits.tex +++ b/source/limits.tex @@ -59,10 +59,10 @@ \item% Arguments in one macro invocation\iref{cpp.replace} [256]. \item% -Characters in one logical source line\iref{lex.phases} [65\,536]. +Characters in one logical source line\iref{lex.phase.2} [65\,536]. \item% Characters in a \grammarterm{string-literal}\iref{lex.string} -(after concatenation\iref{lex.phases}) [65\,536]. +(after concatenation\iref{lex.phase.6}) [65\,536]. \item% Size of an object\iref{intro.object} [262\,144]. \item% diff --git a/source/modules.tex b/source/modules.tex index 2ed009a3b6..91ad6b8c2b 100644 --- a/source/modules.tex +++ b/source/modules.tex @@ -6,6 +6,10 @@ \rSec1[module.unit]{Module units and purviews} \begin{bnf} +\nontermdef{translation-unit}\br + \opt{declaration-seq}\br + \opt{global-module-fragment} module-declaration \opt{declaration-seq} \opt{private-module-fragment} + \nontermdef{module-declaration}\br \opt{export-keyword} module-keyword module-name \opt{module-partition} \opt{attribute-specifier-seq} \terminal{;} \end{bnf} @@ -26,6 +30,10 @@ module-name-qualifier identifier \terminal{.} \end{bnf} +\pnum +\indextext{translation unit}% +A translation unit consists of a sequence of declarations. + \pnum A \defn{module unit} is a translation unit that contains a \grammarterm{module-declaration}. @@ -518,7 +526,7 @@ \begin{note} Such indirect importation does not make macros available, because a translation unit is -a sequence of tokens in translation phase 7\iref{lex.phases}. +a sequence of tokens in translation phase 7\iref{lex.phase.7}. Macros can be made available by directly importing header units as described in \ref{cpp.import}. \end{note} diff --git a/source/preprocessor.tex b/source/preprocessor.tex deleted file mode 100644 index db4bce19d1..0000000000 --- a/source/preprocessor.tex +++ /dev/null @@ -1,2009 +0,0 @@ -%!TEX root = std.tex -\rSec0[cpp]{Preprocessing directives}% -\indextext{preprocessing directive|(} - -\indextext{compiler control line|see{preprocessing directive}}% -\indextext{control line|see{preprocessing directive}}% -\indextext{directive, preprocessing|see{preprocessing directive}} - -\gramSec[gram.cpp]{Preprocessing directives} - -\rSec1[cpp.pre]{Preamble} - -\begin{bnf} -\nontermdef{preprocessing-file}\br - \opt{group}\br - module-file -\end{bnf} - -\begin{bnf} -\nontermdef{module-file}\br - \opt{pp-global-module-fragment} pp-module \opt{group} \opt{pp-private-module-fragment} -\end{bnf} - -\begin{bnf} -\nontermdef{pp-global-module-fragment}\br - \keyword{module} \terminal{;} new-line \opt{group} -\end{bnf} - -\begin{bnf} -\nontermdef{pp-private-module-fragment}\br - \keyword{module} \terminal{:} \keyword{private} \terminal{;} new-line \opt{group} -\end{bnf} - -\begin{bnf} -\nontermdef{group}\br - group-part\br - group group-part -\end{bnf} - -\begin{bnf} -\nontermdef{group-part}\br - control-line\br - if-section\br - text-line\br - \terminal{\#} conditionally-supported-directive -\end{bnf} - -\begin{bnf}\obeyspaces -\nontermdef{control-line}\br - \terminal{\# include} pp-tokens new-line\br - pp-import\br - \terminal{\# define } identifier replacement-list new-line\br - \terminal{\# define } identifier lparen \opt{identifier-list} \terminal{)} replacement-list new-line\br - \terminal{\# define } identifier lparen \terminal{... )} replacement-list new-line\br - \terminal{\# define } identifier lparen identifier-list \terminal{, ... )} replacement-list new-line\br - \terminal{\# undef } identifier new-line\br - \terminal{\# line } pp-tokens new-line\br - \terminal{\# error } \opt{pp-tokens} new-line\br - \terminal{\# warning} \opt{pp-tokens} new-line\br - \terminal{\# pragma } \opt{pp-tokens} new-line\br - \terminal{\# }new-line -\end{bnf} - -\begin{bnf} -\nontermdef{if-section}\br - if-group \opt{elif-groups} \opt{else-group} endif-line -\end{bnf} - -\begin{bnf}\obeyspaces -\nontermdef{if-group}\br - \terminal{\# if } constant-expression new-line \opt{group}\br - \terminal{\# ifdef } identifier new-line \opt{group}\br - \terminal{\# ifndef } identifier new-line \opt{group} -\end{bnf} - -\begin{bnf} -\nontermdef{elif-groups}\br - elif-group\br - elif-groups elif-group -\end{bnf} - -\begin{bnf}\obeyspaces -\nontermdef{elif-group}\br - \terminal{\# elif } constant-expression new-line \opt{group}\br - \terminal{\# elifdef } identifier new-line \opt{group}\br - \terminal{\# elifndef} identifier new-line \opt{group} -\end{bnf} - -\begin{bnf}\obeyspaces -\nontermdef{else-group}\br - \terminal{\# else } new-line \opt{group} -\end{bnf} - -\begin{bnf}\obeyspaces -\nontermdef{endif-line}\br - \terminal{\# endif } new-line -\end{bnf} - -\begin{bnf} -\nontermdef{text-line}\br - \opt{pp-tokens} new-line -\end{bnf} - -\begin{bnf} -\nontermdef{conditionally-supported-directive}\br - pp-tokens new-line -\end{bnf} - -\begin{bnf} -\nontermdef{lparen}\br - \descr{a \terminal{(} character not immediately preceded by whitespace} -\end{bnf} - -\begin{bnf} -\nontermdef{identifier-list}\br - identifier\br - identifier-list \terminal{,} identifier -\end{bnf} - -\begin{bnf} -\nontermdef{replacement-list}\br - \opt{pp-tokens} -\end{bnf} - -\begin{bnf} -\nontermdef{pp-tokens}\br - preprocessing-token\br - pp-tokens preprocessing-token -\end{bnf} - -\begin{bnf} -\nontermdef{new-line}\br - \descr{the new-line character} -\end{bnf} - -\pnum -A \defn{preprocessing directive} consists of a sequence of preprocessing tokens -that satisfies the following constraints: -At the start of translation phase 4, -the first token in the sequence, -referred to as a \defnadj{directive-introducing}{token}, -begins with the first character in the source file -(optionally after whitespace containing no new-line characters) or -follows whitespace containing at least one new-line character, -and is - -\begin{itemize} -\item -a \tcode{\#} preprocessing token, or - -\item -an \keyword{import} preprocessing token -immediately followed on the same logical line by a -\grammarterm{header-name}, -\tcode{<}, -\grammarterm{identifier}, -\grammarterm{string-literal}, or -\tcode{:} -preprocessing token, or - -\item -a \keyword{module} preprocessing token -immediately followed on the same logical line by an -\grammarterm{identifier}, -\tcode{:}, or -\tcode{;} -preprocessing token, or - -\item -an \keyword{export} preprocessing token -immediately followed on the same logical line by -one of the two preceding forms. -\end{itemize} - -The last token in the sequence is the first token within the sequence that -is immediately followed by whitespace containing a new-line character. -\begin{footnote} -Thus, -preprocessing directives are commonly called ``lines''. -These ``lines'' have no other syntactic significance, -as all whitespace is equivalent except in certain situations -during preprocessing (see the -\tcode{\#} -character string literal creation operator in~\ref{cpp.stringize}, for example). -\end{footnote} -\begin{note} -A new-line character ends the preprocessing directive even if it occurs -within what would otherwise be an invocation of a function-like macro. -\end{note} - -\begin{example} -\begin{codeblock} -# // preprocessing directive -module ; // preprocessing directive -export module leftpad; // preprocessing directive -import ; // preprocessing directive -export import "squee"; // preprocessing directive -import rightpad; // preprocessing directive -import :part; // preprocessing directive - -module // not a preprocessing directive -; // not a preprocessing directive - -export // not a preprocessing directive -import // not a preprocessing directive -foo; // not a preprocessing directive - -export // not a preprocessing directive -import foo; // preprocessing directive (ill-formed at phase 7) - -import :: // not a preprocessing directive -import -> // not a preprocessing directive -\end{codeblock} -\end{example} - -\pnum -A sequence of preprocessing tokens is only a \grammarterm{text-line} -if it does not begin with a directive-introducing token. -A sequence of preprocessing tokens is only a \grammarterm{conditionally-supported-directive} -if it does not begin with any of the directive names -appearing after a \tcode{\#} in the syntax. -A \grammarterm{conditionally-supported-directive} is -conditionally-supported with -\impldef{additional supported forms of preprocessing directive} -semantics. - -\pnum -At the start of phase 4 of translation, -the \grammarterm{group} of a \grammarterm{pp-global-module-fragment} shall -contain neither a \grammarterm{text-line} nor a \grammarterm{pp-import}. - -\pnum -When in a group that is skipped\iref{cpp.cond}, the directive -syntax is relaxed to allow any sequence of preprocessing tokens to occur between -the directive name and the following new-line character. - -\pnum -The only whitespace characters that shall appear -between preprocessing tokens -within a preprocessing directive -(from just after the directive-introducing token -through just before the terminating new-line character) -are space and horizontal-tab -(including spaces that have replaced comments -or possibly other whitespace characters -in translation phase 3). - -\pnum -The implementation can -process and skip sections of source files conditionally, -include other source files, -import macros from header units, -and replace macros. -These capabilities are called -\defn{preprocessing}, -because conceptually they occur -before translation of the resulting translation unit. - -\pnum -The preprocessing tokens within a preprocessing directive -are not subject to macro expansion unless otherwise stated. - -\begin{example} -In: -\begin{codeblock} -#define EMPTY -EMPTY # include -\end{codeblock} -the sequence of preprocessing tokens on the second line is \textit{not} -a preprocessing directive, because it does not begin with a \tcode{\#} at the start of -translation phase 4, even though it will do so after the macro \tcode{EMPTY} -has been replaced. -\end{example} - -\rSec1[cpp.cond]{Conditional inclusion}% -\indextext{preprocessing directive!conditional inclusion}% -\indextext{inclusion!conditional|see{preprocessing directive, conditional inclusion}} - -\indextext{\idxcode{defined}}% -\begin{bnf} -\nontermdef{defined-macro-expression}\br - \terminal{defined} identifier\br - \terminal{defined (} identifier \terminal{)} -\end{bnf} - -\begin{bnf} -\nontermdef{h-preprocessing-token}\br - \textnormal{any \grammarterm{preprocessing-token} other than \terminal{>}} -\end{bnf} - -\begin{bnf} -\nontermdef{h-pp-tokens}\br - h-preprocessing-token\br - h-pp-tokens h-preprocessing-token -\end{bnf} - -\begin{bnf} -\nontermdef{header-name-tokens}\br - string-literal\br - \terminal{<} h-pp-tokens \terminal{>} -\end{bnf} - -\indextext{\idxxname{has_include}}% -\begin{bnf} -\nontermdef{has-include-expression}\br - \terminal{\xname{has_include}} \terminal{(} header-name \terminal{)}\br - \terminal{\xname{has_include}} \terminal{(} header-name-tokens \terminal{)} -\end{bnf} - -\indextext{\idxxname{has_cpp_attribute}}% -\begin{bnf} -\nontermdef{has-attribute-expression}\br - \terminal{\xname{has_cpp_attribute} (} pp-tokens \terminal{)} -\end{bnf} - -\pnum -The expression that controls conditional inclusion -shall be an integral constant expression except that -identifiers -(including those lexically identical to keywords) -are interpreted as described below -\begin{footnote} -Because the controlling constant expression is evaluated -during translation phase 4, -all identifiers either are or are not macro names --- -there simply are no keywords, enumeration constants, etc. -\end{footnote} -and it may contain zero or more \grammarterm{defined-macro-expression}{s} and/or -\grammarterm{has-include-expression}{s} and/or -\grammarterm{has-attribute-expression}{s} as unary operator expressions. - -\pnum -A \grammarterm{defined-macro-expression} evaluates to \tcode{1} -if the identifier is currently defined -as a macro name -(that is, if it is predefined -or if it has one or more active macro definitions\iref{cpp.import}, -for example because -it has been the subject of a -\tcode{\#define} -preprocessing directive -without an intervening -\tcode{\#undef} -directive with the same subject identifier), \tcode{0} if it is not. - -\pnum -The second form of \grammarterm{has-include-expression} -is considered only if the first form does not match, -in which case the preprocessing tokens are processed just as in normal text. - -\pnum -The header or source file identified by -the parenthesized preprocessing token sequence -in each contained \grammarterm{has-include-expression} -is searched for as if that preprocessing token sequence -were the \grammarterm{pp-tokens} in a \tcode{\#include} directive, -except that no further macro expansion is performed. -If such a directive would not satisfy the syntactic requirements -of a \tcode{\#include} directive, the program is ill-formed. -The \grammarterm{has-include-expression} evaluates -to \tcode{1} if the search for the source file succeeds, and -to \tcode{0} if the search fails. - -\pnum -Each \grammarterm{has-attribute-expression} is replaced by -a non-zero \grammarterm{pp-number} -matching the form of an \grammarterm{integer-literal} -if the implementation supports an attribute -with the name specified by interpreting -the \grammarterm{pp-tokens}, after macro expansion, -as an \grammarterm{attribute-token}, -and by \tcode{0} otherwise. -The program is ill-formed if the \grammarterm{pp-tokens} -do not match the form of an \grammarterm{attribute-token}. - -\pnum -For an attribute specified in this document, -it is \impldef{value of \grammarterm{has-attribute-expression} -for standard attributes} -whether the value of the \grammarterm{has-attribute-expression} -is \tcode{0} or is given by \tref{cpp.cond.ha}. -For other attributes recognized by the implementation, -the value is -\impldef{value of \grammarterm{has-attribute-expression} -for non-standard attributes}. -\begin{note} -It is expected -that the availability of an attribute can be detected by any non-zero result. -\end{note} - -\begin{floattable}{\xname{has_cpp_attribute} values}{cpp.cond.ha} -{ll} -\topline -\lhdr{Attribute} & \rhdr{Value} \\ \rowsep -\tcode{assume} & \tcode{202207L} \\ -\tcode{carries_dependency} & \tcode{200809L} \\ -\tcode{deprecated} & \tcode{201309L} \\ -\tcode{fallthrough} & \tcode{201603L} \\ -\tcode{likely} & \tcode{201803L} \\ -\tcode{maybe_unused} & \tcode{201603L} \\ -\tcode{no_unique_address} & \tcode{201803L} \\ -\tcode{nodiscard} & \tcode{201907L} \\ -\tcode{noreturn} & \tcode{200809L} \\ -\tcode{unlikely} & \tcode{201803L} \\ -\end{floattable} - -\pnum -The -\tcode{\#ifdef}, \tcode{\#ifndef}, \tcode{\#elifdef}, and \tcode{\#elifndef} -directives, and -the \tcode{defined} conditional inclusion operator, -shall treat \xname{has_include} and \xname{has_cpp_attribute} -as if they were the names of defined macros. -The identifiers \xname{has_include} and \xname{has_cpp_attribute} -shall not appear in any context not mentioned in this subclause. - -\pnum -Each preprocessing token that remains (in the list of preprocessing tokens that -will become the controlling expression) -after all macro replacements have occurred -shall be in the lexical form of a token\iref{lex.token}. - -\pnum -Preprocessing directives of the forms -\begin{ncsimplebnf}\obeyspaces -\indextext{\idxcode{\#if}}% -\terminal{\# if } constant-expression new-line \opt{group}\br -\indextext{\idxcode{\#elif}}% -\terminal{\# elif } constant-expression new-line \opt{group} -\end{ncsimplebnf} -check whether the controlling constant expression evaluates to nonzero. - -\pnum -Prior to evaluation, -macro invocations in the list of preprocessing tokens -that will become the controlling constant expression -are replaced -(except for those macro names modified by the -\tcode{defined} -unary operator), -just as in normal text. -If the token -\tcode{defined} -is generated as a result of this replacement process -or use of the -\tcode{defined} -unary operator does not match one of the two specified forms -prior to macro replacement, -the behavior is undefined. - -\pnum -After all replacements due to macro expansion and -evaluations of -\grammarterm{defined-macro-expression}s, -\grammarterm{has-include-expression}s, and -\grammarterm{has-attribute-expression}s -have been performed, -all remaining identifiers and keywords, -except for -\tcode{true} -and -\tcode{false}, -are replaced with the \grammarterm{pp-number} -\tcode{0}, -and then each preprocessing token is converted into a token. -\begin{note} -An alternative -token\iref{lex.digraph} is not an identifier, -even when its spelling consists entirely of letters and underscores. -Therefore it is not subject to this replacement. -\end{note} - -\pnum -The resulting tokens comprise the controlling constant expression -which is evaluated according to the rules of~\ref{expr.const} -using arithmetic that has at least the ranges specified -in~\ref{support.limits}. For the purposes of this token conversion and evaluation -all signed and unsigned integer types -act as if they have the same representation as, respectively, -\tcode{intmax_t} or \tcode{uintmax_t}\iref{cstdint.syn}. -\begin{note} -Thus on an -implementation where \tcode{std::numeric_limits::max()} is \tcode{0x7FFF} -and \tcode{std::numeric_limits::max()} is \tcode{0xFFFF}, -the integer literal \tcode{0x8000} is signed and positive within a \tcode{\#if} -expression even though it is unsigned in translation phase -7\iref{lex.phases}. -\end{note} -This includes interpreting \grammarterm{character-literal}s -according to the rules in \ref{lex.ccon}. -\begin{note} -The associated character encodings of literals are the same -in \tcode{\#if} and \tcode{\#elif} directives and in any expression. -\end{note} -Each subexpression with type -\tcode{bool} -is subjected to integral promotion before processing continues. - -\pnum -Preprocessing directives of the forms -\begin{ncsimplebnf}\obeyspaces -\terminal{\# ifdef } identifier new-line \opt{group}\br -\indextext{\idxcode{\#ifdef}}% -\terminal{\# ifndef } identifier new-line \opt{group}\br -\indextext{\idxcode{\#ifndef}}% -\terminal{\# elifdef } identifier new-line \opt{group}\br -\indextext{\idxcode{\#elifdef}}% -\terminal{\# elifndef} identifier new-line \opt{group} -\indextext{\idxcode{\#elifndef}}% -\end{ncsimplebnf} -check whether the identifier is or is not currently defined as a macro name. -Their conditions are equivalent to -\tcode{\#if} \tcode{defined} \grammarterm{identifier}, -\tcode{\#if} \tcode{!defined} \grammarterm{identifier}, -\tcode{\#elif} \tcode{defined} \grammarterm{identifier}, and -\tcode{\#elif} \tcode{!defined} \grammarterm{identifier}, -respectively. - -\pnum -Each directive's condition is checked in order. -If it evaluates to false (zero), -the group that it controls is skipped: -directives are processed only through the name that determines -the directive in order to keep track of the level -of nested conditionals; -the rest of the directives' preprocessing tokens are ignored, -as are the other preprocessing tokens in the group. -Only the first group -whose control condition evaluates to true (nonzero) is processed; -any following groups are skipped and their controlling directives -are processed as if they were in a group that is skipped. -If none of the conditions evaluates to true, -and there is a -\tcode{\#else} -\indextext{\idxcode{\#else}}% -directive, -the group controlled by the -\tcode{\#else} -is processed; lacking a -\tcode{\#else} -directive, all the groups until the -\tcode{\#endif} -\indextext{\idxcode{\#endif}}% -are skipped.% -\begin{footnote} -As indicated by the syntax, -a preprocessing token cannot follow a -\tcode{\#else} -or -\tcode{\#endif} -directive before the terminating new-line character. -However, -comments can appear anywhere in a source file, -including within a preprocessing directive. -\end{footnote} - -\pnum -\begin{example} -This demonstrates a way to include a library \tcode{optional} facility -only if it is available: - -\begin{codeblock} -#if __has_include() -# include -# if __cpp_lib_optional >= 201603 -# define have_optional 1 -# endif -#elif __has_include() -# include -# if __cpp_lib_experimental_optional >= 201411 -# define have_optional 1 -# define experimental_optional 1 -# endif -#endif -#ifndef have_optional -# define have_optional 0 -#endif -\end{codeblock} -\end{example} - -\pnum -\begin{example} -This demonstrates a way to use the attribute \tcode{[[acme::deprecated]]} -only if it is available. -\begin{codeblock} -#if __has_cpp_attribute(acme::deprecated) -# define ATTR_DEPRECATED(msg) [[acme::deprecated(msg)]] -#else -# define ATTR_DEPRECATED(msg) [[deprecated(msg)]] -#endif -ATTR_DEPRECATED("This function is deprecated") void anvil(); -\end{codeblock} -\end{example} - -\rSec1[cpp.include]{Source file inclusion} -\indextext{preprocessing directive!header inclusion} -\indextext{preprocessing directive!source-file inclusion} -\indextext{inclusion!source file|see{preprocessing directive, source-file inclusion}}% -\indextext{\idxcode{\#include}}% - -\pnum -A -\tcode{\#include} -directive shall identify a header or source file -that can be processed by the implementation. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# include <} h-char-sequence \terminal{>} new-line -\end{ncsimplebnf} -searches a sequence of -\impldef{sequence of places searched for a header} -places -for a header identified uniquely by the specified sequence -between the -\tcode{<} -and -\tcode{>} -delimiters, -and causes the replacement of that -directive by the entire contents of the header. -How the places are specified -or the header identified -is \impldef{search locations for \tcode{<>} header}. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# include "} q-char-sequence \terminal{"} new-line -\end{ncsimplebnf} -causes the replacement of that -directive by the entire contents of the -source file identified by the specified sequence between the -\tcode{"} -delimiters. -The named source file is searched for in an -\impldef{manner of search for included source file} -manner. -If this search is not supported, -or if the search fails, -the directive is reprocessed as if it read -\begin{ncsimplebnf} -\terminal{\# include <} h-char-sequence \terminal{>} new-line -\end{ncsimplebnf} -with the identical contained sequence (including -\tcode{>} -characters, if any) from the original directive. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# include} pp-tokens new-line -\end{ncsimplebnf} -(that does not match one of the two previous forms) is permitted. -The preprocessing tokens after -\tcode{include} -in the directive are processed just as in normal text -(i.e., each identifier currently defined as a macro name is replaced by its -replacement list of preprocessing tokens). -If the directive resulting after all replacements does not match -one of the two previous forms, the behavior is -undefined. -\begin{footnote} -Note that adjacent \grammarterm{string-literal}s are not concatenated into -a single \grammarterm{string-literal} -(see the translation phases in~\ref{lex.phases}); -thus, an expansion that results in two \grammarterm{string-literal}s is an -invalid directive. -\end{footnote} -The method by which a sequence of preprocessing tokens between a -\tcode{<} -and a -\tcode{>} -preprocessing token pair or a pair of -\tcode{"} -characters is combined into a single header name -preprocessing token is \impldef{search locations for \tcode{""""} header}. - -\pnum -The implementation shall provide unique mappings for -sequences consisting of one or more -\grammarterm{nondigit}{s} or \grammarterm{digit}{s}\iref{lex.name} -followed by a period -(\tcode{.}) -and a single -\grammarterm{nondigit}. -The first character shall not be a \grammarterm{digit}. -The implementation may ignore distinctions of alphabetical case. - -\pnum -A -\tcode{\#include} -preprocessing directive may appear -in a source file that has been read because of a -\tcode{\#include} -directive in another file, -up to an \impldef{nesting limit for \tcode{\#include} directives} nesting limit. - -\pnum -If the header identified by the \grammarterm{header-name} -denotes an importable header\iref{module.import}, -it is -\impldef{whether source file inclusion of importable header -is replaced with \tcode{import} directive} -whether the \tcode{\#include} preprocessing directive -is instead replaced by an \tcode{import} directive\iref{cpp.import} of the form -\begin{ncbnf} -\terminal{import} header-name \terminal{;} new-line -\end{ncbnf} - -\pnum -\begin{note} -An implementation can provide a mechanism for making arbitrary -source files available to the \tcode{< >} search. -However, using the \tcode{< >} form for headers provided -with the implementation and the \tcode{" "} form for sources -outside the control of the implementation -achieves wider portability. For instance: - -\begin{codeblock} -#include -#include -#include "usefullib.h" -#include "myprog.h" -\end{codeblock} - -\end{note} - -\pnum -\begin{example} -This illustrates macro-replaced -\tcode{\#include} -directives: - -\begin{codeblock} -#if VERSION == 1 - #define INCFILE "vers1.h" -#elif VERSION == 2 - #define INCFILE "vers2.h" // and so on -#else - #define INCFILE "versN.h" -#endif -#include INCFILE -\end{codeblock} -\end{example} - -\rSec1[cpp.module]{Module directive} -\indextext{preprocessing directive!module}% - -\begin{bnf} -\nontermdef{pp-module}\br - \opt{\keyword{export}} \keyword{module} \opt{pp-tokens} \terminal{;} new-line -\end{bnf} - -\pnum -A \grammarterm{pp-module} shall not -appear in a context where \tcode{module} -or (if it is the first token of the \grammarterm{pp-module}) \tcode{export} -is an identifier defined as an object-like macro. - -\pnum -The \grammarterm{pp-tokens}, if any, of a \grammarterm{pp-module} -shall be of the form: -\begin{ncsimplebnf} -pp-module-name \opt{pp-module-partition} \opt{pp-tokens} -\end{ncsimplebnf} -where the \grammarterm{pp-tokens} (if any) shall not begin with -a \tcode{(} preprocessing token and -the grammar non-terminals are defined as: -\begin{ncbnf} -\nontermdef{pp-module-name}\br - \opt{pp-module-name-qualifier} identifier -\end{ncbnf} -\begin{ncbnf} -\nontermdef{pp-module-partition}\br - \terminal{:} \opt{pp-module-name-qualifier} identifier -\end{ncbnf} -\begin{ncbnf} -\nontermdef{pp-module-name-qualifier}\br - identifier \terminal{.}\br - pp-module-name-qualifier identifier \terminal{.} -\end{ncbnf} -No \grammarterm{identifier} in -the \grammarterm{pp-module-name} or \grammarterm{pp-module-partition} -shall currently be defined as an object-like macro. - -\pnum -Any preprocessing tokens after the \tcode{module} preprocessing token -in the \tcode{module} directive are processed just as in normal text. -\begin{note} -Each identifier currently defined as a macro name -is replaced by its replacement list of preprocessing tokens. -\end{note} - -\pnum -The \tcode{module} and \tcode{export} (if it exists) preprocessing tokens -are replaced by the \grammarterm{module-keyword} and -\grammarterm{export-keyword} preprocessing tokens respectively. -\begin{note} -This makes the line no longer a directive -so it is not removed at the end of phase 4. -\end{note} - -\rSec1[cpp.import]{Header unit importation} -\indextext{header unit!preprocessing}% -\indextext{preprocessing directive!import}% -\indextext{macro!import|(}% - -\begin{bnf} -\nontermdef{pp-import}\br - \opt{\keyword{export}} \keyword{import} header-name \opt{pp-tokens} \terminal{;} new-line\br - \opt{\keyword{export}} \keyword{import} header-name-tokens \opt{pp-tokens} \terminal{;} new-line\br - \opt{\keyword{export}} \keyword{import} pp-tokens \terminal{;} new-line -\end{bnf} - -\pnum -A \grammarterm{pp-import} shall not -appear in a context where \tcode{import} -or (if it is the first token of the \grammarterm{pp-import}) \tcode{export} -is an identifier defined as an object-like macro. - -\pnum -The preprocessing tokens after the \tcode{import} preprocessing token -in the \tcode{import} \grammarterm{control-line} -are processed just as in normal text -(i.e., each identifier currently defined as a macro name -is replaced by its replacement list of preprocessing tokens). -\begin{note} -An \tcode{import} directive -matching the first two forms of a \grammarterm{pp-import} -instructs the preprocessor to import macros -from the header unit\iref{module.import} -denoted by the \grammarterm{header-name}, -as described below. -\end{note} -\indextext{point of!macro import|see{macro, point of import}}% -The \defnx{point of macro import}{macro!point of import} for the -first two forms of \grammarterm{pp-import} is -immediately after the \grammarterm{new-line} terminating -the \grammarterm{pp-import}. -The last form of \grammarterm{pp-import} is only considered -if the first two forms did not match, and -does not have a point of macro import. - -\pnum -If a \grammarterm{pp-import} is produced by source file inclusion -(including by the rewrite produced -when a \tcode{\#include} directive names an importable header) -while processing the \grammarterm{group} of a \grammarterm{module-file}, -the program is ill-formed. - -\pnum -In all three forms of \grammarterm{pp-import}, -the \tcode{import} and \tcode{export} (if it exists) preprocessing tokens -are replaced by the \grammarterm{import-keyword} and -\grammarterm{export-keyword} preprocessing tokens respectively. -\begin{note} -This makes the line no longer a directive -so it is not removed at the end of phase 4. -\end{note} -Additionally, in the second form of \grammarterm{pp-import}, -a \grammarterm{header-name} token is formed as if -the \grammarterm{header-name-tokens} -were the \grammarterm{pp-tokens} of a \tcode{\#include} directive. -The \grammarterm{header-name-tokens} are replaced by -the \grammarterm{header-name} token. -\begin{note} -This ensures that imports are treated consistently by -the preprocessor and later phases of translation. -\end{note} - -\pnum -Each \tcode{\#define} directive encountered when preprocessing -each translation unit in a program results in a distinct -\defnx{macro definition}{macro!definition}. -\begin{note} -A predefined macro name\iref{cpp.predefined} -is not introduced by a \tcode{\#define} directive. -Implementations providing mechanisms to predefine additional macros -are encouraged to not treat them -as being introduced by a \tcode{\#define} directive. -\end{note} -Each macro definition has at most one point of definition in -each translation unit and at most one point of undefinition, as follows: -\begin{itemize} -\item -\indextext{point of!macro definition|see{macro, point of definition}}% -The \defnx{point of definition}{macro!point of definition} -of a macro definition within a translation unit $T$ is -\begin{itemize} -\item -if the \tcode{\#define} directive of the macro definition occurs within $T$, -the point at which that directive occurs, or otherwise, -\item -if the macro name is not lexically identical to a keyword\iref{lex.key} -or to the \grammarterm{identifier}{s} \tcode{module} or \tcode{import}, -the first point of macro import in $T$ of a header unit -containing a point of definition for the macro definition, if any. -\end{itemize} -In the latter case, the macro is said -to be \defnx{imported}{macro!import} from the header unit. - -\item -\indextext{point of!macro undefinition|see{macro, point of undefinition}}% -The \defnx{point of undefinition}{macro!point of undefinition} -of a macro definition within a translation unit -is the first point at which a \tcode{\#undef} directive naming the macro occurs -after its point of definition, or the first point -of macro import of a header unit containing a point of undefinition for the -macro definition, whichever (if any) occurs first. -\end{itemize} - -\pnum -\indextext{active macro directive|see{macro, active}}% -A macro directive is \defnx{active}{macro!active} at a source location -if it has a point of definition in that translation unit preceding the location, -and does not have a point of undefinition in that translation unit preceding -the location. - -\pnum -If a macro would be replaced or redefined, and multiple macro definitions -are active for that macro name, the active macro definitions shall all be -valid redefinitions of the same macro\iref{cpp.replace}. -\begin{note} -The relative order of \grammarterm{pp-import}{s} has no bearing on whether a -particular macro definition is active. -\end{note} - -\pnum -\begin{example} -\begin{codeblocktu}{Importable header \tcode{"a.h"}} -#define X 123 // \#1 -#define Y 45 // \#2 -#define Z a // \#3 -#undef X // point of undefinition of \#1 in \tcode{"a.h"} -\end{codeblocktu} - -\begin{codeblocktu}{Importable header \tcode{"b.h"}} -import "a.h"; // point of definition of \#1, \#2, and \#3, point of undefinition of \#1 in \tcode{"b.h"} -#define X 456 // OK, \#1 is not active -#define Y 6 // error: \#2 is active -\end{codeblocktu} - -\begin{codeblocktu}{Importable header \tcode{"c.h"}} -#define Y 45 // \#4 -#define Z c // \#5 -\end{codeblocktu} - -\begin{codeblocktu}{Importable header \tcode{"d.h"}} -import "c.h"; // point of definition of \#4 and \#5 in \tcode{"d.h"} -\end{codeblocktu} - -\begin{codeblocktu}{Importable header \tcode{"e.h"}} -import "a.h"; // point of definition of \#1, \#2, and \#3, point of undefinition of \#1 in \tcode{"e.h"} -import "d.h"; // point of definition of \#4 and \#5 in \tcode{"e.h"} -int a = Y; // OK, active macro definitions \#2 and \#4 are valid redefinitions -int c = Z; // error: active macro definitions \#3 and \#5 are not valid redefinitions of \tcode{Z} -\end{codeblocktu} - -\begin{codeblocktu}{Module unit \tcode{f}} -export module f; -export import "a.h"; - -int a = Y; // OK -\end{codeblocktu} - -\begin{codeblocktu}{Translation unit \tcode{\#1}} -import f; -int x = Y; // error: \tcode{Y} is neither a defined macro nor a declared name -\end{codeblocktu} -\end{example} -\indextext{macro!import|)} - -\rSec1[cpp.replace]{Macro replacement}% - -\rSec2[cpp.replace.general]{General}% -\indextext{macro!replacement|(}% -\indextext{replacement!macro|see{macro, replacement}}% -\indextext{preprocessing directive!macro replacement|see{macro, replacement}} - -\pnum -\indextext{macro!replacement list}% -Two replacement lists are identical if and only if -the preprocessing tokens in both have -the same number, ordering, spelling, and whitespace separation, -where all whitespace separations are considered identical. - -\pnum -An identifier currently defined as an -\indextext{macro!object-like}% -object-like macro (see below) may be redefined by another -\tcode{\#define} -preprocessing directive provided that the second definition is an -object-like macro definition and the two replacement lists -are identical, otherwise the program is ill-formed. -Likewise, an identifier currently defined as a -\indextext{macro!function-like}% -function-like macro (see below) may be redefined by another -\tcode{\#define} -preprocessing directive provided that the second definition is a -function-like macro definition that has the same number and spelling -of parameters, -and the two replacement lists are identical, -otherwise the program is ill-formed. - -\pnum -\begin{example} -The following sequence is valid: -\begin{codeblock} -#define OBJ_LIKE (1-1) -#define OBJ_LIKE @\tcode{/* whitespace */ (1-1) /* other */}@ -#define FUNC_LIKE(a) ( a ) -#define FUNC_LIKE( a )( @\tcode{/* note the whitespace */ \textbackslash}@ - a @\tcode{/* other stuff on this line}@ - @\tcode{*/}@ ) -\end{codeblock} -But the following redefinitions are invalid: -\begin{codeblock} -#define OBJ_LIKE (0) // different token sequence -#define OBJ_LIKE (1 - 1) // different whitespace -#define FUNC_LIKE(b) ( a ) // different parameter usage -#define FUNC_LIKE(b) ( b ) // different parameter spelling -\end{codeblock} -\end{example} - -\pnum -\indextext{macro!replacement list}% -There shall be whitespace between the identifier and the replacement list -in the definition of an object-like macro. - -\pnum -If the \grammarterm{identifier-list} in the macro definition does not end with -an ellipsis, the number of arguments (including those arguments consisting -of no preprocessing tokens) -in an invocation of a function-like macro shall -equal the number of parameters in the macro definition. -Otherwise, there shall be at least as many arguments in the invocation as there are -parameters in the macro definition (excluding the \tcode{...}). There -shall exist a -\tcode{)} -preprocessing token that terminates the invocation. - -\pnum -\indextext{__va_args__@\mname{VA_ARGS}}% -\indextext{__va_opt__@\mname{VA_OPT}}% -The identifiers \mname{VA_ARGS} and \mname{VA_OPT} -shall occur only in the \grammarterm{replacement-list} -of a function-like macro that uses the ellipsis notation in the parameters. - -\pnum -A parameter identifier in a function-like macro -shall be uniquely declared within its scope. - -\pnum -The identifier immediately following the -\tcode{define} -is called the -\indextext{name!macro|see{macro, name}}% -\defnx{macro name}{macro!name}. -There is one name space for macro names. -Any whitespace characters preceding or following the -replacement list of preprocessing tokens are not considered -part of the replacement list for either form of macro. - -\pnum -If a -\indextext{\#\#0 operator@\tcode{\#} operator} -\tcode{\#} -preprocessing token, -followed by an identifier, -occurs lexically -at the point at which a preprocessing directive can begin, -the identifier is not subject to macro replacement. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# define} identifier replacement-list new-line -\indextext{\idxcode{\#define}}% -\end{ncsimplebnf} -defines an -\defnadj{object-like}{macro} that -causes each subsequent instance of the macro name -\begin{footnote} -Since, by macro-replacement time, -all \grammarterm{character-literal}s and \grammarterm{string-literal}s are preprocessing tokens, -not sequences possibly containing identifier-like subsequences -(see \ref{lex.phases}, translation phases), -they are never scanned for macro names or parameters. -\end{footnote} -to be replaced by the replacement list of preprocessing tokens -that constitute the remainder of the directive. -\begin{footnote} -An alternative token\iref{lex.digraph} is not an identifier, -even when its spelling consists entirely of letters and underscores. -Therefore it is not possible to define a macro -whose name is the same as that of an alternative token. -\end{footnote} -The replacement list is then rescanned for more macro names as -specified below. - -\pnum -\begin{example} -The simplest use of this facility is to define a ``manifest constant'', -as in -\begin{codeblock} -#define TABSIZE 100 -int table[TABSIZE]; -\end{codeblock} -\end{example} - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# define} identifier lparen \opt{identifier-list} \terminal{)} replacement-list new-line\br -\terminal{\# define} identifier lparen \terminal{...} \terminal{)} replacement-list new-line\br -\terminal{\# define} identifier lparen identifier-list \terminal{, ...} \terminal{)} replacement-list new-line -\end{ncsimplebnf} -defines a \defnadj{function-like}{macro} -with parameters, whose use is -similar syntactically to a function call. -The parameters -\indextext{parameter!macro}% -are specified by the optional list of identifiers. -Each subsequent instance of the function-like macro name followed by a -\tcode{(} -as the next preprocessing token -introduces the sequence of preprocessing tokens that is replaced -by the replacement list in the definition -(an invocation of the macro). -\indextext{invocation!macro}% -The replaced sequence of preprocessing tokens is terminated by the matching -\tcode{)} -preprocessing token, skipping intervening matched pairs of left and -right parenthesis preprocessing tokens. -Within the sequence of preprocessing tokens making up an invocation -of a function-like macro, -new-line is considered a normal whitespace character. - -\pnum -\indextext{macro!function-like!arguments}% -The sequence of preprocessing tokens -bounded by the outside-most matching parentheses -forms the list of arguments for the function-like macro. -The individual arguments within the list -are separated by comma preprocessing tokens, -but comma preprocessing tokens between matching -inner parentheses do not separate arguments. -If there are sequences of preprocessing tokens within the list of -arguments that would otherwise act as preprocessing directives, -\begin{footnote} -A \grammarterm{conditionally-supported-directive} is a preprocessing directive regardless of whether the implementation supports it. -\end{footnote} -the behavior is undefined. - -\pnum -\begin{example} -The following defines a function-like -macro whose value is the maximum of its arguments. -It has the disadvantages of evaluating one or the other of its arguments -a second time -(including -\indextext{side effects}% -side effects) -and generating more code than a function if invoked several times. -It also cannot have its address taken, -as it has none. - -\begin{codeblock} -#define max(a, b) ((a) > (b) ? (a) : (b)) -\end{codeblock} - -The parentheses ensure that the arguments and -the resulting expression are bound properly. -\end{example} - -\pnum -\indextext{macro!function-like!arguments}% -If there is a \tcode{...} immediately preceding the \tcode{)} in the -function-like macro -definition, then the trailing arguments (if any), including any separating comma preprocessing -tokens, are merged to form a single item: the \defn{variable arguments}. The number of -arguments so combined is such that, following merger, the number of arguments is -either equal to or -one more than the number of parameters in the macro definition (excluding the -\tcode{...}). - -\rSec2[cpp.subst]{Argument substitution}% -\indextext{macro!argument substitution}% -\indextext{argument substitution|see{macro, argument substitution}}% - -\indextext{__va_opt__@\mname{VA_OPT}}% -\begin{bnf} -\nontermdef{va-opt-replacement}\br - \terminal{\mname{VA_OPT} (} \opt{pp-tokens} \terminal{)} -\end{bnf} - -\pnum -After the arguments for the invocation of a function-like macro have -been identified, argument substitution takes place. -For each parameter in the replacement list that is neither -preceded by a \tcode{\#} or \tcode{\#\#} preprocessing token nor -followed by a \tcode{\#\#} preprocessing token, the preprocessing tokens -naming the parameter are replaced by a token sequence determined as follows: -\begin{itemize} -\item - If the parameter is of the form \grammarterm{va-opt-replacement}, - the replacement preprocessing tokens are the - preprocessing token sequence for the corresponding argument, - as specified below. -\item - Otherwise, the replacement preprocessing tokens are the - preprocessing tokens of corresponding argument after all - macros contained therein have been expanded. The argument's - preprocessing tokens are completely macro replaced before - being substituted as if they formed the rest of the preprocessing - file with no other preprocessing tokens being available. -\end{itemize} -\begin{example} -\begin{codeblock} -#define LPAREN() ( -#define G(Q) 42 -#define F(R, X, ...) __VA_OPT__(G R X) ) -int x = F(LPAREN(), 0, <:-); // replaced by \tcode{int x = 42;} -\end{codeblock} -\end{example} - -\pnum -\indextext{__va_args__@\mname{VA_ARGS}}% -An identifier \mname{VA_ARGS} that occurs in the replacement list -shall be treated as if it were a parameter, and the variable arguments shall form -the preprocessing tokens used to replace it. - -\pnum -\begin{example} -\begin{codeblock} -#define debug(...) fprintf(stderr, @\mname{VA_ARGS}@) -#define showlist(...) puts(#@\mname{VA_ARGS}@) -#define report(test, ...) ((test) ? puts(#test) : printf(@\mname{VA_ARGS}@)) -debug("Flag"); -debug("X = %d\n", x); -showlist(The first, second, and third items.); -report(x>y, "x is %d but y is %d", x, y); -\end{codeblock} -results in -\begin{codeblock} -fprintf(stderr, "Flag"); -fprintf(stderr, "X = %d\n", x); -puts("The first, second, and third items."); -((x>y) ? puts("x>y") : printf("x is %d but y is %d", x, y)); -\end{codeblock} -\end{example} - -\pnum -\indextext{__va_opt__@\mname{VA_OPT}}% -The identifier \mname{VA_OPT} -shall always occur as part of the preprocessing token sequence -\grammarterm{va-opt-replacement}; -its closing \tcode{)} is determined by skipping -intervening pairs of matching left and right parentheses -in its \grammarterm{pp-tokens}. -The \grammarterm{pp-tokens} of a \grammarterm{va-opt-replacement} -shall not contain \mname{VA_OPT}. -If the \grammarterm{pp-tokens} would be ill-formed -as the replacement list of the current function-like macro, -the program is ill-formed. -A \grammarterm{va-opt-replacement} is treated as if it were a parameter, -and the preprocessing token sequence for the corresponding -argument is defined as follows. -If the substitution of \mname{VA_ARGS} as neither an operand -of \tcode{\#} nor \tcode{\#\#} consists of no preprocessing tokens, -the argument consists of -a single placemarker preprocessing token\iref{cpp.concat,cpp.rescan}. -Otherwise, the argument consists of -the results of the expansion of the contained \grammarterm{pp-tokens} -as the replacement list of the current function-like macro -before removal of placemarker tokens, rescanning, and further replacement. -\begin{note} -The placemarker tokens are removed before stringization\iref{cpp.stringize}, -and can be removed by rescanning and further replacement\iref{cpp.rescan}. -\end{note} -\begin{example} -\begin{codeblock} -#define F(...) f(0 __VA_OPT__(,) __VA_ARGS__) -#define G(X, ...) f(0, X __VA_OPT__(,) __VA_ARGS__) -#define SDEF(sname, ...) S sname __VA_OPT__(= { __VA_ARGS__ }) -#define EMP - -F(a, b, c) // replaced by \tcode{f(0, a, b, c)} -F() // replaced by \tcode{f(0)} -F(EMP) // replaced by \tcode{f(0)} - -G(a, b, c) // replaced by \tcode{f(0, a, b, c)} -G(a, ) // replaced by \tcode{f(0, a)} -G(a) // replaced by \tcode{f(0, a)} - -SDEF(foo); // replaced by \tcode{S foo;} -SDEF(bar, 1, 2); // replaced by \tcode{S bar = \{ 1, 2 \};} - -#define H1(X, ...) X __VA_OPT__(##) __VA_ARGS__ // error: \tcode{\#\#} may not appear at - // the beginning of a replacement list\iref{cpp.concat} - -#define H2(X, Y, ...) __VA_OPT__(X ## Y,) __VA_ARGS__ -H2(a, b, c, d) // replaced by \tcode{ab, c, d} - -#define H3(X, ...) #__VA_OPT__(X##X X##X) -H3(, 0) // replaced by \tcode{""} - -#define H4(X, ...) __VA_OPT__(a X ## X) ## b -H4(, 1) // replaced by \tcode{a b} - -#define H5A(...) __VA_OPT__()@\tcode{/**/}@__VA_OPT__() -#define H5B(X) a ## X ## b -#define H5C(X) H5B(X) -H5C(H5A()) // replaced by \tcode{ab} -\end{codeblock} -\end{example} - -\rSec2[cpp.stringize]{The \tcode{\#} operator}% -\indextext{\#\#0 operator@\tcode{\#} operator}% -\indextext{stringize|see{\tcode{\#} operator}} - -\pnum -Each -\tcode{\#} -preprocessing token in the replacement list for a function-like -macro shall be followed by a parameter as the next preprocessing -token in the replacement list. - -\pnum -A \defn{character string literal} is a \grammarterm{string-literal} with no prefix. -If, in the replacement list, a parameter is immediately -preceded by a -\tcode{\#} -preprocessing token, -both are replaced by a single character string literal preprocessing token that -contains the spelling of the preprocessing token sequence for the -corresponding argument (excluding placemarker tokens). -Let the \defn{stringizing argument} be the preprocessing token sequence -for the corresponding argument with placemarker tokens removed. -Each occurrence of whitespace between the stringizing argument's preprocessing -tokens becomes a single space character in the character string literal. -Whitespace before the first preprocessing token and after the last -preprocessing token comprising the stringizing argument is deleted. -Otherwise, the original spelling of each preprocessing token in the -stringizing argument is retained in the character string literal, -except for special handling for producing the spelling of -\grammarterm{string-literal}s and \grammarterm{character-literal}s: -a -\tcode{\textbackslash} -character is inserted before each -\tcode{"} -and -\tcode{\textbackslash} -character of a \grammarterm{character-literal} or \grammarterm{string-literal} -(including the delimiting -\tcode{"} -characters). -If the replacement that results is not a valid character string literal, -the behavior is undefined. The character string literal corresponding to -an empty stringizing argument is \tcode{""}. -The order of evaluation of -\tcode{\#} -and -\tcode{\#\#} -operators is unspecified. - -\rSec2[cpp.concat]{The \tcode{\#\#} operator}% -\indextext{\#\#1 operator@\tcode{\#\#} operator}% -\indextext{concatenation!macro argument|see{\tcode{\#\#} operator}} - -\pnum -A -\tcode{\#\#} -preprocessing token shall not occur at the beginning or -at the end of a replacement list for either form -of macro definition. - -\pnum -If, in the replacement list of a function-like macro, a parameter is -immediately preceded or followed by a -\tcode{\#\#} -preprocessing token, the parameter is replaced by the -corresponding argument's preprocessing token sequence; however, if an argument consists of no preprocessing tokens, the parameter is -replaced by a placemarker preprocessing token instead. -\begin{footnote} -Placemarker preprocessing tokens do not appear in the syntax -because they are temporary entities that exist only within translation phase 4. -\end{footnote} - -\pnum -For both object-like and function-like macro invocations, before the -replacement list is reexamined for more macro names to replace, -each instance of a -\tcode{\#\#} -preprocessing token in the replacement list -(not from an argument) is deleted and the -preceding preprocessing token is concatenated -with the following preprocessing token. -Placemarker preprocessing tokens are handled specially: concatenation -of two placemarkers results in a single placemarker preprocessing token, and -concatenation of a placemarker with a non-placemarker preprocessing token results -in the non-placemarker preprocessing token. -\begin{note} -Concatenation can form -a \grammarterm{universal-character-name}\iref{lex.charset}. -\end{note} -If the result is not a valid preprocessing token, -the behavior is undefined. -The resulting token is available for further macro replacement. -The order of evaluation of -\tcode{\#\#} -operators is unspecified. - -\pnum -\begin{example} -The sequence -\begin{codeblock} -#define str(s) # s -#define xstr(s) str(s) -#define debug(s, t) printf("x" # s "= %d, x" # t "= %s", @\textbackslash@ - x ## s, x ## t) -#define INCFILE(n) vers ## n -#define glue(a, b) a ## b -#define xglue(a, b) glue(a, b) -#define HIGHLOW "hello" -#define LOW LOW ", world" - -debug(1, 2); -fputs(str(strncmp("abc@\textbackslash@0d", "abc", '@\textbackslash@4') // this goes away - == 0) str(: @\atsign\textbackslash@n), s); -#include xstr(INCFILE(2).h) -glue(HIGH, LOW); -xglue(HIGH, LOW) -\end{codeblock} -results in -\begin{codeblock} -printf("x" "1" "= %d, x" "2" "= %s", x1, x2); -fputs("strncmp(@\textbackslash@"abc@\textbackslash\textbackslash@0d@\textbackslash@", @\textbackslash@"abc@\textbackslash@", '@\textbackslash\textbackslash@4') == 0" ": @\atsign\textbackslash@n", s); -#include "vers2.h" @\textrm{(\textit{after macro replacement, before file access})}@ -"hello"; -"hello" ", world" -\end{codeblock} -or, after concatenation of the character string literals, -\begin{codeblock} -printf("x1= %d, x2= %s", x1, x2); -fputs("strncmp(@\textbackslash@"abc@\textbackslash\textbackslash@0d@\textbackslash@", @\textbackslash@"abc@\textbackslash@", '@\textbackslash\textbackslash@4') == 0: @\atsign\textbackslash@n", s); -#include "vers2.h" @\textrm{(\textit{after macro replacement, before file access})}@ -"hello"; -"hello, world" -\end{codeblock} - -Space around the \tcode{\#} and \tcode{\#\#} tokens in the macro definition -is optional. -\end{example} - -\pnum -\begin{example} -In the following fragment: - -\begin{codeblock} -#define hash_hash # ## # -#define mkstr(a) # a -#define in_between(a) mkstr(a) -#define join(c, d) in_between(c hash_hash d) -char p[] = join(x, y); // equivalent to \tcode{char p[] = "x \#\# y";} -\end{codeblock} - -The expansion produces, at various stages: - -\begin{codeblock} -join(x, y) -in_between(x hash_hash y) -in_between(x ## y) -mkstr(x ## y) -"x ## y" -\end{codeblock} - -In other words, expanding \tcode{hash_hash} produces a new token, -consisting of two adjacent sharp signs, but this new token is not the -\tcode{\#\#} operator. -\end{example} - -\pnum -\begin{example} -To illustrate the rules for placemarker preprocessing tokens, the sequence -\begin{codeblock} -#define t(x,y,z) x ## y ## z -int j[] = { t(1,2,3), t(,4,5), t(6,,7), t(8,9,), - t(10,,), t(,11,), t(,,12), t(,,) }; -\end{codeblock} -results in -\begin{codeblock} -int j[] = { 123, 45, 67, 89, - 10, 11, 12, }; -\end{codeblock} -\end{example} - -\rSec2[cpp.rescan]{Rescanning and further replacement}% -\indextext{macro!rescanning and replacement}% -\indextext{rescanning and replacement|see{macro, rescanning and replacement}} - -\pnum -After all parameters in the replacement list have been substituted and \tcode{\#} and \tcode{\#\#} processing has taken -place, all placemarker preprocessing tokens are removed. Then -the resulting preprocessing token sequence is rescanned, along with all -subsequent preprocessing tokens of the source file, for more macro names -to replace. - -\pnum -\begin{example} -The sequence -\begin{codeblock} -#define x 3 -#define f(a) f(x * (a)) -#undef x -#define x 2 -#define g f -#define z z[0] -#define h g(~ -#define m(a) a(w) -#define w 0,1 -#define t(a) a -#define p() int -#define q(x) x -#define r(x,y) x ## y -#define str(x) # x - -f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); -g(x+(3,4)-w) | h 5) & m - (f)^m(m); -p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) }; -char c[2][6] = { str(hello), str() }; -\end{codeblock} -results in -\begin{codeblock} -f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1); -f(2 * (2+(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1); -int i[] = { 1, 23, 4, 5, }; -char c[2][6] = { "hello", "" }; -\end{codeblock} -\end{example} - -\pnum -If the name of the macro being replaced is found during this scan of -the replacement list -(not including the rest of the source file's preprocessing tokens), -it is not replaced. -Furthermore, -if any nested replacements encounter the name of the macro being replaced, -it is not replaced. -These nonreplaced macro name preprocessing tokens are no longer available -for further replacement even if they are later (re)examined in contexts -in which that macro name preprocessing token would otherwise have been -replaced. - -\pnum -The resulting completely macro-replaced preprocessing token sequence -is not processed as a preprocessing directive even if it resembles one, -but all pragma unary operator expressions within it are then processed as -specified in~\ref{cpp.pragma.op} below. - -\rSec2[cpp.scope]{Scope of macro definitions}% -\indextext{macro!scope of definition}% -\indextext{scope!macro definition|see{macro, scope of definition}} - -\pnum -A macro definition lasts -(independent of block structure) -until a corresponding -\tcode{\#undef} -directive is encountered or -(if none is encountered) -until the end of the translation unit. -Macro definitions have no significance after translation phase 4. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# undef} identifier new-line -\indextext{\idxcode{\#undef}}% -\end{ncsimplebnf} -causes the specified identifier no longer to be defined as a macro name. -It is ignored if the specified identifier is not currently defined as -a macro name. - -\indextext{macro!replacement|)} - -\rSec1[cpp.line]{Line control}% -\indextext{preprocessing directive!line control}% -\indextext{\idxcode{\#line}|see{preprocessing directive, line control}} - -\pnum -The \grammarterm{string-literal} of a -\tcode{\#line} -directive, if present, -shall be a character string literal. - -\pnum -The -\defn{line number} -of the current source line is one greater than -the number of new-line characters read or introduced -in translation phase 1\iref{lex.phases} -while processing the source file to the current token. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# line} digit-sequence new-line -\end{ncsimplebnf} -causes the implementation to behave as if -the following sequence of source lines begins with a -source line that has a line number as specified -by the digit sequence (interpreted as a decimal integer). -If the digit sequence specifies zero -or a number greater than 2147483647, -the behavior is undefined. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# line} digit-sequence \terminal{"} \opt{s-char-sequence} \terminal{"} new-line -\end{ncsimplebnf} -sets the presumed line number similarly and changes the -presumed name of the source file to be the contents -of the character string literal. - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# line} pp-tokens new-line -\end{ncsimplebnf} -(that does not match one of the two previous forms) -is permitted. -The preprocessing tokens after -\tcode{line} -on the directive are processed just as in normal text -(each identifier currently defined as a macro name is replaced by its -replacement list of preprocessing tokens). -If the directive resulting after all replacements does not match -one of the two previous forms, the behavior is undefined; -otherwise, the result is processed as appropriate. - -\rSec1[cpp.error]{Diagnostic directives}% -\indextext{preprocessing directive!error}% -\indextext{preprocessing directive!diagnostic}% -\indextext{preprocessing directive!warning}% -\indextext{\idxcode{\#error}|see{preprocessing directive, error}} - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# error} \opt{pp-tokens} new-line -\end{ncsimplebnf} -renders the program ill-formed. -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# warning} \opt{pp-tokens} new-line -\end{ncsimplebnf} -requires the implementation to produce at least one diagnostic message -for the preprocessing translation unit\iref{intro.compliance.general}. -\recommended -Any diagnostic message caused by either of these directives -should include the specified sequence of preprocessing tokens. - -\rSec1[cpp.pragma]{Pragma directive}% -\indextext{preprocessing directive!pragma}% -\indextext{\idxcode{\#pragma}|see{preprocessing directive, pragma}} - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\# pragma} \opt{pp-tokens} new-line -\end{ncsimplebnf} -causes the implementation to behave -in an \impldef{\tcode{\#pragma}} manner. -The behavior may cause translation to fail or cause the translator or -the resulting program to behave in a non-conforming manner. -Any pragma that is not recognized by the implementation is ignored. - -\rSec1[cpp.null]{Null directive}% -\indextext{preprocessing directive!null} - -\pnum -A preprocessing directive of the form -\begin{ncsimplebnf} -\terminal{\#} new-line -\end{ncsimplebnf} -has no effect. - -\rSec1[cpp.predefined]{Predefined macro names} -\indextext{macro!predefined}% -\indextext{name!predefined macro|see{macro, predefined}} - -\pnum -The following macro names shall be defined by the implementation: - -\begin{description} - -\item -\indextext{\idxxname{cplusplus}}% -\xname{cplusplus}\\ -The integer literal \tcode{\cppver}. -\begin{note} -Future revisions of this document will -replace the value of this macro with a greater value. -\end{note} - -\item The names listed in \tref{cpp.predefined.ft}.\\ -The macros defined in \tref{cpp.predefined.ft} shall be defined to -the corresponding integer literal. -\begin{note} -Future revisions of this document might replace -the values of these macros with greater values. -\end{note} - -\item -\indextext{__date__@\mname{DATE}}% -\mname{DATE}\\ -The date of translation of the source file: -a character string literal of the form -\tcode{"Mmm~dd~yyyy"}, -where the names of the months are the same as those generated -by the -\tcode{asctime} -function, -and the first character of -\tcode{dd} -is a space character if the value is less than 10. -If the date of translation is not available, -an \impldef{text of \mname{DATE} when date of translation is not available} valid date -shall be supplied. - -\item -\indextext{__file__@\mname{FILE}}% -\mname{FILE}\\ -The presumed name of the current source file (a character string -literal). -\begin{footnote} -The presumed source file name can be changed by the \tcode{\#line} directive. -\end{footnote} - -\item -\indextext{__line__@\mname{LINE}}% -\mname{LINE}\\ -The presumed line number (within the current source file) of the current source line -(an integer literal). -\begin{footnote} -The presumed line number can be changed by the \tcode{\#line} directive. -\end{footnote} - -\item -\indextext{__stdc_hosted__@\mname{STDC_HOSTED}}% -\indextext{implementation!hosted}% -\indextext{implementation!freestanding}% -\mname{STDC_HOSTED}\\ -The integer literal \tcode{1} -if the implementation is a hosted implementation or -the integer literal \tcode{0} -if it is a freestanding implementation\iref{intro.compliance}. - -\item -\indextext{__stdcpp_default_new_alignment__@\mname{STDCPP_DEFAULT_NEW_ALIGNMENT}}% -\mname{STDCPP_DEFAULT_NEW_ALIGNMENT}\\ -An integer literal of type \tcode{std::size_t} -whose value is the alignment guaranteed -by a call to \tcode{operator new(std::size_t)} -or \tcode{operator new[](std::size_t)}. -\begin{note} -Larger alignments will be passed to -\tcode{operator new(std::size_t, std::align_val_t)}, etc.\iref{expr.new}. -\end{note} - -\item -\indextext{__stdcpp_float16_t__@\mname{STDCPP_FLOAT16_T}}% -\mname{STDCPP_FLOAT16_T}\\ -Defined as the integer literal \tcode{1} -if and only if the implementation supports -the ISO/IEC/IEEE 60559 floating-point interchange format binary16 -as an extended floating-point type\iref{basic.extended.fp}. - -\item -\indextext{__stdcpp_float32_t__@\mname{STDCPP_FLOAT32_T}}% -\mname{STDCPP_FLOAT32_T}\\ -Defined as the integer literal \tcode{1} -if and only if the implementation supports -the ISO/IEC/IEEE 60559 floating-point interchange format binary32 -as an extended floating-point type. - -\item -\indextext{__stdcpp_float64_t__@\mname{STDCPP_FLOAT64_T}}% -\mname{STDCPP_FLOAT64_T}\\ -Defined as the integer literal \tcode{1} -if and only if the implementation supports -the ISO/IEC/IEEE 60559 floating-point interchange format binary64 -as an extended floating-point type. - -\item -\indextext{__stdcpp_float128_t__@\mname{STDCPP_FLOAT128_T}}% -\mname{STDCPP_FLOAT128_T}\\ -Defined as the integer literal \tcode{1} -if and only if the implementation supports -the ISO/IEC/IEEE 60559 floating-point interchange format binary128 -as an extended floating-point type. - -\item -\indextext{__stdcpp_bfloat16_t__@\mname{STDCPP_BFLOAT16_T}}% -\mname{STDCPP_BFLOAT16_T}\\ -Defined as the integer literal \tcode{1} -if and only if the implementation supports an extended floating-point type -with the properties of the \grammarterm{typedef-name} \tcode{std::bfloat16_t} -as described in \ref{basic.extended.fp}. - -\item -\indextext{__time__@\mname{TIME}}% -\mname{TIME}\\ -The time of translation of the source file: -a character string literal of the form -\tcode{"hh:mm:ss"} -as in the time generated by the -\tcode{asctime} -function. -If the time of translation is not available, -an \impldef{text of \mname{TIME} when time of translation is not available} valid time shall be supplied. -\end{description} - -\indextext{macro!feature-test}% -\indextext{feature-test macro|see{macro, feature-test}}% -\begin{LongTable}{Feature-test macros}{cpp.predefined.ft}{ll} -\\ \topline -\lhdr{Macro name} & \rhdr{Value} \\ \capsep -\endfirsthead -\continuedcaption \\ -\hline -\lhdr{Name} & \rhdr{Value} \\ \capsep -\endhead -\defnxname{cpp_aggregate_bases} & \tcode{201603L} \\ \rowsep -\defnxname{cpp_aggregate_nsdmi} & \tcode{201304L} \\ \rowsep -\defnxname{cpp_aggregate_paren_init} & \tcode{201902L} \\ \rowsep -\defnxname{cpp_alias_templates} & \tcode{200704L} \\ \rowsep -\defnxname{cpp_aligned_new} & \tcode{201606L} \\ \rowsep -\defnxname{cpp_attributes} & \tcode{200809L} \\ \rowsep -\defnxname{cpp_auto_cast} & \tcode{202110L} \\ \rowsep -\defnxname{cpp_binary_literals} & \tcode{201304L} \\ \rowsep -\defnxname{cpp_capture_star_this} & \tcode{201603L} \\ \rowsep -\defnxname{cpp_char8_t} & \tcode{202207L} \\ \rowsep -\defnxname{cpp_concepts} & \tcode{202002L} \\ \rowsep -\defnxname{cpp_conditional_explicit} & \tcode{201806L} \\ \rowsep -\defnxname{cpp_constexpr} & \tcode{202306L} \\ \rowsep -\defnxname{cpp_constexpr_dynamic_alloc} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_constexpr_in_decltype} & \tcode{201711L} \\ \rowsep -\defnxname{cpp_consteval} & \tcode{202211L} \\ \rowsep -\defnxname{cpp_constinit} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_decltype} & \tcode{200707L} \\ \rowsep -\defnxname{cpp_decltype_auto} & \tcode{201304L} \\ \rowsep -\defnxname{cpp_deduction_guides} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_delegating_constructors} & \tcode{200604L} \\ \rowsep -\defnxname{cpp_deleted_function} & \tcode{202403L} \\ \rowsep -\defnxname{cpp_designated_initializers} & \tcode{201707L} \\ \rowsep -\defnxname{cpp_enumerator_attributes} & \tcode{201411L} \\ \rowsep -\defnxname{cpp_explicit_this_parameter} & \tcode{202110L} \\ \rowsep -\defnxname{cpp_fold_expressions} & \tcode{201603L} \\ \rowsep -\defnxname{cpp_generic_lambdas} & \tcode{201707L} \\ \rowsep -\defnxname{cpp_guaranteed_copy_elision} & \tcode{201606L} \\ \rowsep -\defnxname{cpp_hex_float} & \tcode{201603L} \\ \rowsep -\defnxname{cpp_if_consteval} & \tcode{202106L} \\ \rowsep -\defnxname{cpp_if_constexpr} & \tcode{201606L} \\ \rowsep -\defnxname{cpp_impl_coroutine} & \tcode{201902L} \\ \rowsep -\defnxname{cpp_impl_destroying_delete} & \tcode{201806L} \\ \rowsep -\defnxname{cpp_impl_three_way_comparison} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_implicit_move} & \tcode{202207L} \\ \rowsep -\defnxname{cpp_inheriting_constructors} & \tcode{201511L} \\ \rowsep -\defnxname{cpp_init_captures} & \tcode{201803L} \\ \rowsep -\defnxname{cpp_initializer_lists} & \tcode{200806L} \\ \rowsep -\defnxname{cpp_inline_variables} & \tcode{201606L} \\ \rowsep -\defnxname{cpp_lambdas} & \tcode{200907L} \\ \rowsep -\defnxname{cpp_modules} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_multidimensional_subscript} & \tcode{202211L} \\ \rowsep -\defnxname{cpp_named_character_escapes} & \tcode{202207L} \\ \rowsep -\defnxname{cpp_namespace_attributes} & \tcode{201411L} \\ \rowsep -\defnxname{cpp_noexcept_function_type} & \tcode{201510L} \\ \rowsep -\defnxname{cpp_nontype_template_args} & \tcode{201911L} \\ \rowsep -\defnxname{cpp_nontype_template_parameter_auto} & \tcode{201606L} \\ \rowsep -\defnxname{cpp_nsdmi} & \tcode{200809L} \\ \rowsep -\defnxname{cpp_pack_indexing} & \tcode{202311L} \\ \rowsep -\defnxname{cpp_placeholder_variables} & \tcode{202306L} \\ \rowsep -\defnxname{cpp_range_based_for} & \tcode{202211L} \\ \rowsep -\defnxname{cpp_raw_strings} & \tcode{200710L} \\ \rowsep -\defnxname{cpp_ref_qualifiers} & \tcode{200710L} \\ \rowsep -\defnxname{cpp_return_type_deduction} & \tcode{201304L} \\ \rowsep -\defnxname{cpp_rvalue_references} & \tcode{200610L} \\ \rowsep -\defnxname{cpp_size_t_suffix} & \tcode{202011L} \\ \rowsep -\defnxname{cpp_sized_deallocation} & \tcode{201309L} \\ \rowsep -\defnxname{cpp_static_assert} & \tcode{202306L} \\ \rowsep -\defnxname{cpp_static_call_operator} & \tcode{202207L} \\ \rowsep -\defnxname{cpp_structured_bindings} & \tcode{202403L} \\ \rowsep -\defnxname{cpp_template_template_args} & \tcode{201611L} \\ \rowsep -\defnxname{cpp_threadsafe_static_init} & \tcode{200806L} \\ \rowsep -\defnxname{cpp_unicode_characters} & \tcode{200704L} \\ \rowsep -\defnxname{cpp_unicode_literals} & \tcode{200710L} \\ \rowsep -\defnxname{cpp_user_defined_literals} & \tcode{200809L} \\ \rowsep -\defnxname{cpp_using_enum} & \tcode{201907L} \\ \rowsep -\defnxname{cpp_variable_templates} & \tcode{201304L} \\ \rowsep -\defnxname{cpp_variadic_friend} & \tcode{202403L} \\ \rowsep -\defnxname{cpp_variadic_templates} & \tcode{200704L} \\ \rowsep -\defnxname{cpp_variadic_using} & \tcode{201611L} \\ -\end{LongTable} - -\pnum -The following macro names are conditionally defined by the implementation: - -\begin{description} -\item -\indextext{__stdc__@\mname{STDC}}% -\mname{STDC}\\ -Whether \mname{STDC} is predefined and if so, what its value is, -are \impldef{definition and meaning of \mname{STDC}}. - -\item -\indextext{__stdc_mb_might_neq_wc__@\mname{STDC_MB_MIGHT_NEQ_WC}}% -\mname{STDC_MB_MIGHT_NEQ_WC}\\ -The integer literal \tcode{1}, intended to indicate that, in the encoding for -\keyword{wchar_t}, a member of the basic character set need not have a code value equal to -its value when used as the lone character in an ordinary character literal. - -\item -\indextext{__stdc_version__@\mname{STDC_VERSION}}% -\mname{STDC_VERSION}\\ -Whether \mname{STDC_VERSION} is predefined and if so, what its value is, -are \impldef{definition and meaning of \mname{STDC_VERSION}}. - -\item -\indextext{__stdc_iso_10646__@\mname{STDC_ISO_10646}}% -\mname{STDC_ISO_10646}\\ -An integer literal of the form \tcode{yyyymmL} -(for example, \tcode{199712L}). -Whether \mname{STDC_ISO_10646} is predefined and -if so, what its value is, -are \impldef{presence and value of \mname{STDC_ISO_10646}}. - -\item -\indextext{__stdcpp_threads__@\mname{STDCPP_THREADS}}% -\mname{STDCPP_THREADS}\\ -Defined, and has the value integer literal 1, if and only if a program -can have more than one thread of execution\iref{intro.multithread}. - -\end{description} - -\pnum -The values of the predefined macros -(except for -\mname{FILE} -and -\mname{LINE}) -remain constant throughout the translation unit. - -\pnum -If any of the pre-defined macro names in this subclause, -or the identifier -\tcode{defined}, -is the subject of a -\tcode{\#define} -or a -\tcode{\#undef} -preprocessing directive, -the behavior is undefined. -Any other predefined macro names shall begin with a -leading underscore followed by an uppercase letter or a second -underscore. - -\rSec1[cpp.pragma.op]{Pragma operator}% -\indextext{macro!pragma operator}% -\indextext{operator!pragma|see{macro, pragma operator}} - -\pnum -A unary operator expression of the form: -\begin{ncbnf} -\terminal{_Pragma} \terminal{(} string-literal \terminal{)} -\end{ncbnf} -is processed as follows: The \grammarterm{string-literal} is \defnx{destringized}{destringization} -by deleting the \tcode{L} prefix, if present, deleting the leading and trailing -double-quotes, replacing each escape sequence \tcode{\textbackslash"} by a double-quote, and -replacing each escape sequence \tcode{\textbackslash\textbackslash} by a single -backslash. The resulting sequence of characters is processed through translation phase 3 -to produce preprocessing tokens that are executed as if they were the -\grammarterm{pp-tokens} in a pragma directive. The original four preprocessing -tokens in the unary operator expression are removed. - -\pnum -\begin{example} -\begin{codeblock} -#pragma listing on "..\listing.dir" -\end{codeblock} -can also be expressed as: -\begin{codeblock} -_Pragma ( "listing on \"..\\listing.dir\"" ) -\end{codeblock} -The latter form is processed in the same way whether it appears literally -as shown, or results from macro replacement, as in: -\begin{codeblock} -#define LISTING(x) PRAGMA(listing on #x) -#define PRAGMA(x) _Pragma(#x) - -LISTING( ..\listing.dir ) -\end{codeblock} -\end{example} -\indextext{preprocessing directive|)} diff --git a/source/std.tex b/source/std.tex index de1f338cc2..b16cd4b97c 100644 --- a/source/std.tex +++ b/source/std.tex @@ -106,16 +106,15 @@ \include{intro} \include{lex} +\include{modules} \include{basic} \include{expressions} \include{statements} \include{declarations} -\include{modules} \include{classes} \include{overloading} \include{templates} \include{exceptions} -\include{preprocessor} \include{lib-intro} \include{support} \include{concepts} diff --git a/source/xrefdelta.tex b/source/xrefdelta.tex index cd814e0a13..c45b5d2199 100644 --- a/source/xrefdelta.tex +++ b/source/xrefdelta.tex @@ -82,6 +82,9 @@ % P2875 Undeprecate polymorphic_allocator::destroy \movedxref{depr.mem.poly.allocator.mem}{mem.poly.allocator.mem} +%%% Refactoring core clauses +\movedxref{lex.comment}{lex.phase.3} + % https://github.com/cplusplus/draft/pull/6653 \movedxref{mismatch}{alg.mismatch}