Skip to content

Commit 739a384

Browse files
committed
P2071R2 Named universal character escapes
Wording for the paper P2071R2 Named universal character escapes, actually an alternate form of universal-character-name, as reviewed and approved by CWG. Not yet approved by plenary. Needs final date for feature test macro.
1 parent 1c162a7 commit 739a384

File tree

2 files changed

+145
-2
lines changed

2 files changed

+145
-2
lines changed

source/lex.tex

Lines changed: 144 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,25 @@
288288
The \grammarterm{universal-character-name} construct provides a way to name
289289
other characters.
290290

291+
\begin{bnf}
292+
\nontermdef{n-char} \textnormal{one of}\br
293+
\terminal{A B C D E F G H I J K L M N O P Q R S T U V W X Y Z}\br
294+
\terminal{0 1 2 3 4 5 6 7 8 9}\br
295+
\textnormal{\unicode{002d}{hyphen-minus}}\br
296+
\textnormal{\unicode{0020}{space}}
297+
\end{bnf}
298+
299+
\begin{bnf}
300+
\nontermdef{n-char-sequence}\br
301+
n-char\br
302+
n-char-sequence n-char
303+
\end{bnf}
304+
305+
\begin{bnf}
306+
\nontermdef{named-universal-character}\br
307+
\terminal{\textbackslash N\{n-char-sequence\}}
308+
\end{bnf}
309+
291310
\begin{bnf}
292311
\nontermdef{hex-quad}\br
293312
hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
@@ -296,15 +315,138 @@
296315
\begin{bnf}
297316
\nontermdef{universal-character-name}\br
298317
\terminal{\textbackslash u} hex-quad\br
299-
\terminal{\textbackslash U} hex-quad hex-quad
318+
\terminal{\textbackslash U} hex-quad hex-quad\br
319+
named-universal-character
300320
\end{bnf}
301321

322+
\pnum
302323
A \grammarterm{universal-character-name}
324+
of the form \textbackslash u \grammarterm{hex-quad} or \textbackslash U
325+
\grammarterm{hex-quad} \grammarterm{hex-quad}
303326
designates the character in the translation character set
304327
whose UCS scalar value is the hexadecimal number represented by
305328
the sequence of \grammarterm{hexadecimal-digit}s
306329
in the \grammarterm{universal-character-name}.
307330
The program is ill-formed if that number is not a UCS scalar value.
331+
332+
\pnum
333+
A \grammarterm{universal-character-name} that is a
334+
\grammarterm{named-universal-character} designates the
335+
character named by its \grammarterm{n-char-sequence}. A character is so named if the
336+
\grammarterm{n-char-sequence} is equal to
337+
\begin{itemize}
338+
\item the associated character name or associated character name alias specified in
339+
ISO/IEC 10646 subclause ``Code charts and lists of character names'' or
340+
\item the control code alias given in \tref{lex.charset.aliases}.
341+
\begin{note}
342+
The aliases in \tref{lex.charset.aliases} are provided for control characters
343+
which otherwise have no associated character name or character name alias.
344+
These names are derived from the Unicode Character Database's \tcode{NameAliases.txt}.
345+
For historical reasons, control characters are formally unnamed.
346+
\end{note}
347+
\end{itemize}
348+
349+
350+
\begin{note}
351+
None of the associated character names, associated character name aliases, or
352+
control code aliases have leading or trailing spaces.
353+
\end{note}
354+
355+
\begin{LongTable}{Control Code Aliases}{lex.charset.aliases}{ll}
356+
\\ \topline
357+
\lhdr{Code point} & \rhdr{Control code alias} \\ \capsep
358+
\endfirsthead
359+
\continuedcaption \\
360+
\hline
361+
\lhdr{Code point} & \rhdr{Control code alias} \\ \capsep
362+
\endhead
363+
\ucode{0000} & \tcode{NULL} \\ \rowsep
364+
\ucode{0001} & \tcode{START OF HEADING} \\ \rowsep
365+
\ucode{0002} & \tcode{START OF TEXT} \\ \rowsep
366+
\ucode{0003} & \tcode{END OF TEXT} \\ \rowsep
367+
\ucode{0004} & \tcode{END OF TRANSMISSION} \\ \rowsep
368+
\ucode{0005} & \tcode{ENQUIRY} \\ \rowsep
369+
\ucode{0006} & \tcode{ACKNOWLEDGE} \\ \rowsep
370+
\ucode{0007} & \tcode{ALERT} \\ \rowsep
371+
\ucode{0008} & \tcode{BACKSPACE} \\ \rowsep
372+
\ucode{0009} & \tcode{CHARACTER TABULATION} \\
373+
& \tcode{HORIZONTAL TABULATION} \\ \rowsep
374+
\ucode{000a} & \tcode{LINE FEED} \\
375+
& \tcode{NEW LINE} \\
376+
& \tcode{END OF LINE} \\ \rowsep
377+
\ucode{000b} & \tcode{LINE TABULATION} \\
378+
& \tcode{VERTICAL TABULATION} \\ \rowsep
379+
\ucode{000c} & \tcode{FORM FEED} \\ \rowsep
380+
\ucode{000d} & \tcode{CARRIAGE RETURN} \\ \rowsep
381+
\ucode{000e} & \tcode{SHIFT OUT} \\
382+
& \tcode{LOCKING-SHIFT ONE} \\ \rowsep
383+
\ucode{000f} & \tcode{SHIFT IN} \\
384+
& \tcode{LOCKING-SHIFT ZERO} \\ \rowsep
385+
\ucode{0010} & \tcode{DATA LINK ESCAPE} \\ \rowsep
386+
\ucode{0011} & \tcode{DEVICE CONTROL ONE} \\ \rowsep
387+
\ucode{0012} & \tcode{DEVICE CONTROL TWO} \\ \rowsep
388+
\ucode{0013} & \tcode{DEVICE CONTROL THREE} \\ \rowsep
389+
\ucode{0014} & \tcode{DEVICE CONTROL FOUR} \\ \rowsep
390+
\ucode{0015} & \tcode{NEGATIVE ACKNOWLEDGE} \\ \rowsep
391+
\ucode{0016} & \tcode{SYNCHRONOUS IDLE} \\ \rowsep
392+
\ucode{0017} & \tcode{END OF TRANSMISSION BLOCK} \\ \rowsep
393+
\ucode{0018} & \tcode{CANCEL} \\ \rowsep
394+
\ucode{0019} & \tcode{END OF MEDIUM} \\ \rowsep
395+
\ucode{001a} & \tcode{SUBSTITUTE} \\ \rowsep
396+
\ucode{001b} & \tcode{ESCAPE} \\ \rowsep
397+
\ucode{001c} & \tcode{INFORMATION SEPARATOR FOUR} \\
398+
& \tcode{FILE SEPARATOR} \\ \rowsep
399+
\ucode{001d} & \tcode{INFORMATION SEPARATOR THREE} \\
400+
& \tcode{GROUP SEPARATOR} \\ \rowsep
401+
\ucode{001e} & \tcode{INFORMATION SEPARATOR TWO} \\
402+
& \tcode{RECORD SEPARATOR} \\ \rowsep
403+
\ucode{001f} & \tcode{INFORMATION SEPARATOR ON} \\
404+
& \tcode{UNIT SEPARATOR} \\ \rowsep
405+
\ucode{007f} & \tcode{DELETE} \\ \rowsep
406+
\ucode{0082} & \tcode{BREAK PERMITTED HERE} \\ \rowsep
407+
\ucode{0083} & \tcode{NO BREAK HERE} \\ \rowsep
408+
\ucode{0084} & \tcode{INDEX} \\ \rowsep
409+
\ucode{0085} & \tcode{NEXT LINE} \\ \rowsep
410+
\ucode{0086} & \tcode{START OF SELECTED AREA} \\ \rowsep
411+
\ucode{0087} & \tcode{END OF SELECTED AREA} \\ \rowsep
412+
\ucode{0088} & \tcode{CHARACTER TABULATION SET} \\
413+
& \tcode{HORIZONTAL TABULATION SET} \\ \rowsep
414+
\ucode{0089} & \tcode{CHARACTER TABULATION WITH JUSTIFICATION} \\
415+
& \tcode{HORIZONTAL TABULATION WITH JUSTIFICATION} \\ \rowsep
416+
\ucode{008a} & \tcode{LINE TABULATION SET} \\
417+
& \tcode{VERTICAL TABULATION SET} \\ \rowsep
418+
\ucode{008b} & \tcode{PARTIAL LINE FORWARD} \\
419+
& \tcode{PARTIAL LINE DOWN} \\ \rowsep
420+
\ucode{008c} & \tcode{PARTIAL LINE BACKWARD} \\
421+
& \tcode{PARTIAL LINE UP} \\ \rowsep
422+
\ucode{008d} & \tcode{REVERSE LINE FEED} \\
423+
& \tcode{REVERSE INDEX} \\ \rowsep
424+
\ucode{008e} & \tcode{SINGLE SHIFT TWO} \\
425+
& \tcode{SINGLE-SHIFT-2} \\ \rowsep
426+
\ucode{008f} & \tcode{SINGLE SHIFT THREE} \\
427+
& \tcode{SINGLE-SHIFT-3} \\ \rowsep
428+
\ucode{0090} & \tcode{DEVICE CONTROL STRING} \\ \rowsep
429+
\ucode{0091} & \tcode{PRIVATE USE ONE} \\
430+
& \tcode{PRIVATE USE-1} \\ \rowsep
431+
\ucode{0092} & \tcode{PRIVATE USE TWO} \\
432+
& \tcode{PRIVATE USE-2} \\ \rowsep
433+
\ucode{0093} & \tcode{SET TRANSMIT STATE} \\ \rowsep
434+
\ucode{0094} & \tcode{CANCEL CHARACTER} \\ \rowsep
435+
\ucode{0095} & \tcode{MESSAGE WAITING} \\ \rowsep
436+
\ucode{0096} & \tcode{START OF GUARDED AREA} \\
437+
& \tcode{START OF PROTECTED AREA} \\ \rowsep
438+
\ucode{0097} & \tcode{END OF GUARDED AREA} \\
439+
& \tcode{END OF PROTECTED AREA} \\ \rowsep
440+
\ucode{0098} & \tcode{START OF STRING} \\ \rowsep
441+
\ucode{009a} & \tcode{SINGLE CHARACTER INTRODUCER} \\ \rowsep
442+
\ucode{009b} & \tcode{CONTROL SEQUENCE INTRODUCER} \\ \rowsep
443+
\ucode{009c} & \tcode{STRING TERMINATOR} \\ \rowsep
444+
\ucode{009d} & \tcode{OPERATING SYSTEM COMMAND} \\ \rowsep
445+
\ucode{009e} & \tcode{PRIVACY MESSAGE} \\ \rowsep
446+
\ucode{009f} & \tcode{APPLICATION PROGRAM COMMAND} \\
447+
\end{LongTable}
448+
449+
\pnum
308450
If a \grammarterm{universal-character-name} outside
309451
the \grammarterm{c-char-sequence}, \grammarterm{s-char-sequence}, or
310452
\grammarterm{r-char-sequence} of
@@ -1330,7 +1472,7 @@
13301472

13311473
\begin{bnf}
13321474
\nontermdef{conditional-escape-sequence-char}\br
1333-
\textnormal{any member of the basic character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{u}, \terminal{U}, or \terminal{x}}
1475+
\textnormal{any member of the basic character set that is not an} octal-digit\textnormal{, a} simple-escape-sequence-char\textnormal{, or the characters \terminal{N}, \terminal{u}, \terminal{U}, or \terminal{x}}
13341476
\end{bnf}
13351477

13361478
\pnum

source/preprocessor.tex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1784,6 +1784,7 @@
17841784
\defnxname{cpp_lambdas} & \tcode{200907L} \\ \rowsep
17851785
\defnxname{cpp_modules} & \tcode{201907L} \\ \rowsep
17861786
\defnxname{cpp_multidimensional_subscript} & \tcode{202110L} \\ \rowsep
1787+
\defnxname{cpp_named_character_escapes} & \tcode{XXXXXXL} \\ \rowsep % FIXME IF PLENARY ADOPTS
17871788
\defnxname{cpp_namespace_attributes} & \tcode{201411L} \\ \rowsep
17881789
\defnxname{cpp_noexcept_function_type} & \tcode{201510L} \\ \rowsep
17891790
\defnxname{cpp_nontype_template_args} & \tcode{201911L} \\ \rowsep

0 commit comments

Comments
 (0)