Skip to content

Commit 703e037

Browse files
committed
Upgrade bundled PCRE2 to 10.31
1 parent 1cae6cf commit 703e037

39 files changed

+4890
-3173
lines changed

ext/pcre/config.w32

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
EXTENSION("pcre", "php_pcre.c", false /* never shared */,
55
"-Iext/pcre/pcre2lib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
6-
ADD_SOURCES("ext/pcre/pcre2lib", "pcre2_auto_possess.c pcre2_chartables.c pcre2_compile.c pcre2_config.c pcre2_context.c pcre2_dfa_match.c pcre2_error.c pcre2_jit_compile.c pcre2_maketables.c pcre2_match.c pcre2_match_data.c pcre2_newline.c pcre2_ord2utf.c pcre2_pattern_info.c pcre2_serialize.c pcre2_string_utils.c pcre2_study.c pcre2_substitute.c pcre2_substring.c pcre2_tables.c pcre2_ucd.c pcre2_valid_utf.c pcre2_xclass.c pcre2_find_bracket.c pcre2_convert.c ", "pcre");
6+
ADD_SOURCES("ext/pcre/pcre2lib", "pcre2_auto_possess.c pcre2_chartables.c pcre2_compile.c pcre2_config.c pcre2_context.c pcre2_dfa_match.c pcre2_error.c pcre2_jit_compile.c pcre2_maketables.c pcre2_match.c pcre2_match_data.c pcre2_newline.c pcre2_ord2utf.c pcre2_pattern_info.c pcre2_serialize.c pcre2_string_utils.c pcre2_study.c pcre2_substitute.c pcre2_substring.c pcre2_tables.c pcre2_ucd.c pcre2_valid_utf.c pcre2_xclass.c pcre2_find_bracket.c pcre2_convert.c pcre2_extuni.c", "pcre");
77
ADD_DEF_FILE("ext\\pcre\\php_pcre.def");
88

99
AC_DEFINE('HAVE_BUNDLED_PCRE', 1, 'Using bundled PCRE library');

ext/pcre/config0.m4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ PHP_ARG_WITH(pcre-jit,,[ --with-pcre-jit Enable PCRE JIT functionality
6868
pcre2lib/pcre2_newline.c pcre2lib/pcre2_ord2utf.c pcre2lib/pcre2_pattern_info.c pcre2lib/pcre2_serialize.c \
6969
pcre2lib/pcre2_string_utils.c pcre2lib/pcre2_study.c pcre2lib/pcre2_substitute.c pcre2lib/pcre2_substring.c \
7070
pcre2lib/pcre2_tables.c pcre2lib/pcre2_ucd.c pcre2lib/pcre2_valid_utf.c pcre2lib/pcre2_xclass.c \
71-
pcre2lib/pcre2_find_bracket.c pcre2lib/pcre2_convert.c"
71+
pcre2lib/pcre2_find_bracket.c pcre2lib/pcre2_convert.c pcre2lib/pcre2_extuni.c"
7272
PHP_PCRE_CFLAGS="-DHAVE_CONFIG_H -I@ext_srcdir@/pcre2lib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"
7373
PHP_NEW_EXTENSION(pcre, $pcrelib_sources php_pcre.c, no,,$PHP_PCRE_CFLAGS)
7474
PHP_ADD_BUILD_DIR($ext_builddir/pcre2lib)

ext/pcre/pcre2lib/pcre2.h

Lines changed: 116 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
4242
/* The current PCRE version information. */
4343

4444
#define PCRE2_MAJOR 10
45-
#define PCRE2_MINOR 30
46-
#define PCRE2_PRERELEASE
47-
#define PCRE2_DATE 2017-08-14
45+
#define PCRE2_MINOR 31
46+
#define PCRE2_PRERELEASE
47+
#define PCRE2_DATE 2018-02-12
4848

4949
/* When an application links to a PCRE DLL in Windows, the symbols that are
5050
imported have to be identified as such. When building PCRE2, the appropriate
@@ -208,7 +208,104 @@ greater than zero. */
208208
#define PCRE2_BSR_UNICODE 1
209209
#define PCRE2_BSR_ANYCRLF 2
210210

211-
/* Error codes: no match and partial match are "expected" errors. */
211+
/* Error codes for pcre2_compile(). Some of these are also used by
212+
pcre2_pattern_convert(). */
213+
214+
#define PCRE2_ERROR_END_BACKSLASH 101
215+
#define PCRE2_ERROR_END_BACKSLASH_C 102
216+
#define PCRE2_ERROR_UNKNOWN_ESCAPE 103
217+
#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104
218+
#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105
219+
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106
220+
#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107
221+
#define PCRE2_ERROR_CLASS_RANGE_ORDER 108
222+
#define PCRE2_ERROR_QUANTIFIER_INVALID 109
223+
#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110
224+
#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111
225+
#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112
226+
#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113
227+
#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114
228+
#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115
229+
#define PCRE2_ERROR_NULL_PATTERN 116
230+
#define PCRE2_ERROR_BAD_OPTIONS 117
231+
#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118
232+
#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119
233+
#define PCRE2_ERROR_PATTERN_TOO_LARGE 120
234+
#define PCRE2_ERROR_HEAP_FAILED 121
235+
#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122
236+
#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123
237+
#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124
238+
#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125
239+
#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126
240+
#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127
241+
#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128
242+
#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129
243+
#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130
244+
#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131
245+
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132
246+
#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133
247+
#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134
248+
#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135
249+
#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
250+
#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137
251+
#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138
252+
#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139
253+
#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140
254+
#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141
255+
#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142
256+
#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143
257+
#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144
258+
#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
259+
#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146
260+
#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147
261+
#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148
262+
#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149
263+
#define PCRE2_ERROR_CLASS_INVALID_RANGE 150
264+
#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151
265+
#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152
266+
#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153
267+
#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154
268+
#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155
269+
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
270+
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
271+
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
272+
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
273+
#define PCRE2_ERROR_VERB_UNKNOWN 160
274+
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
275+
#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162
276+
#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163
277+
#define PCRE2_ERROR_INVALID_OCTAL 164
278+
#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165
279+
#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166
280+
#define PCRE2_ERROR_INVALID_HEXADECIMAL 167
281+
#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168
282+
#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169
283+
#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170
284+
#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171
285+
#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172
286+
#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173
287+
#define PCRE2_ERROR_UTF_IS_DISABLED 174
288+
#define PCRE2_ERROR_UCP_IS_DISABLED 175
289+
#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176
290+
#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
291+
#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178
292+
#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179
293+
#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
294+
#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181
295+
#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182
296+
#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183
297+
#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184
298+
#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185
299+
#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186
300+
#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187
301+
#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188
302+
#define PCRE2_ERROR_INTERNAL_BAD_CODE 189
303+
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
304+
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
305+
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
306+
307+
308+
/* "Expected" matching error codes: no match and partial match. */
212309

213310
#define PCRE2_ERROR_NOMATCH (-1)
214311
#define PCRE2_ERROR_PARTIAL (-2)
@@ -248,10 +345,10 @@ greater than zero. */
248345
#define PCRE2_ERROR_UTF32_ERR1 (-27)
249346
#define PCRE2_ERROR_UTF32_ERR2 (-28)
250347

251-
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, context
252-
functions, and serializing functions. They are in numerical order. Originally
253-
they were in alphabetical order too, but now that PCRE2 is released, the
254-
numbers must not be changed. */
348+
/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
349+
functions, context functions, and serializing functions. They are in numerical
350+
order. Originally they were in alphabetical order too, but now that PCRE2 is
351+
released, the numbers must not be changed. */
255352

256353
#define PCRE2_ERROR_BADDATA (-29)
257354
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
@@ -321,6 +418,7 @@ numbers must not be changed. */
321418
#define PCRE2_INFO_HASBACKSLASHC 23
322419
#define PCRE2_INFO_FRAMESIZE 24
323420
#define PCRE2_INFO_HEAPLIMIT 25
421+
#define PCRE2_INFO_EXTRAOPTIONS 26
324422

325423
/* Request types for pcre2_config(). */
326424

@@ -338,6 +436,9 @@ numbers must not be changed. */
338436
#define PCRE2_CONFIG_UNICODE_VERSION 10
339437
#define PCRE2_CONFIG_VERSION 11
340438
#define PCRE2_CONFIG_HEAPLIMIT 12
439+
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
440+
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
441+
341442

342443
/* Types for code units in patterns and subject strings. */
343444

@@ -393,6 +494,11 @@ without changing the API of the function, thereby allowing old clients to work
393494
without modification. Define the generic version in a macro; the width-specific
394495
versions are generated from this macro below. */
395496

497+
/* Flags for the callout_flags field. These are cleared after a callout. */
498+
499+
#define PCRE2_CALLOUT_STARTMATCH 0x00000001u /* Set for each bumpalong */
500+
#define PCRE2_CALLOUT_BACKTRACK 0x00000002u /* Set after a backtrack */
501+
396502
#define PCRE2_STRUCTURE_LIST \
397503
typedef struct pcre2_callout_block { \
398504
uint32_t version; /* Identifies version of block */ \
@@ -412,6 +518,8 @@ typedef struct pcre2_callout_block { \
412518
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
413519
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
414520
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
521+
/* ------------------- Added for Version 2 -------------------------- */ \
522+
uint32_t callout_flags; /* See above for list */ \
415523
/* ------------------------------------------------------------------ */ \
416524
} pcre2_callout_block; \
417525
\

ext/pcre/pcre2lib/pcre2_auto_possess.c

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -558,47 +558,73 @@ for(;;)
558558
continue;
559559
}
560560

561+
/* At the end of a branch, skip to the end of the group. */
562+
561563
if (c == OP_ALT)
562564
{
563565
do code += GET(code, 1); while (*code == OP_ALT);
564566
c = *code;
565567
}
566568

569+
/* Inspect the next opcode. */
570+
567571
switch(c)
568572
{
569-
case OP_END:
570-
case OP_KETRPOS:
571-
/* TRUE only in greedy case. The non-greedy case could be replaced by
572-
an OP_EXACT, but it is probably not worth it. (And note that OP_EXACT
573-
uses more memory, which we cannot get at this stage.) */
573+
/* We can always possessify a greedy iterator at the end of the pattern,
574+
which is reached after skipping over the final OP_KET. A non-greedy
575+
iterator must never be possessified. */
574576

577+
case OP_END:
575578
return base_list[1] != 0;
576579

580+
/* When an iterator is at the end of certain kinds of group we can inspect
581+
what follows the group by skipping over the closing ket. Note that this
582+
does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
583+
iteration is variable (could be another iteration or could be the next
584+
item). As these two opcodes are not listed in the next switch, they will
585+
end up as the next code to inspect, and return FALSE by virtue of being
586+
unsupported. */
587+
577588
case OP_KET:
578-
/* If the bracket is capturing, and referenced by an OP_RECURSE, or
579-
it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
580-
cannot be converted to a possessive form. */
589+
case OP_KETRPOS:
590+
/* The non-greedy case cannot be converted to a possessive form. */
581591

582592
if (base_list[1] == 0) return FALSE;
583593

594+
/* If the bracket is capturing it might be referenced by an OP_RECURSE
595+
so its last iterator can never be possessified if the pattern contains
596+
recursions. (This could be improved by keeping a list of group numbers that
597+
are called by recursion.) */
598+
584599
switch(*(code - GET(code, 1)))
585600
{
601+
case OP_CBRA:
602+
case OP_SCBRA:
603+
case OP_CBRAPOS:
604+
case OP_SCBRAPOS:
605+
if (cb->had_recurse) return FALSE;
606+
break;
607+
608+
/* Atomic sub-patterns and assertions can always auto-possessify their
609+
last iterator. However, if the group was entered as a result of checking
610+
a previous iterator, this is not possible. */
611+
586612
case OP_ASSERT:
587613
case OP_ASSERT_NOT:
588614
case OP_ASSERTBACK:
589615
case OP_ASSERTBACK_NOT:
590616
case OP_ONCE:
591617

592-
/* Atomic sub-patterns and assertions can always auto-possessify their
593-
last iterator. However, if the group was entered as a result of checking
594-
a previous iterator, this is not possible. */
595-
596618
return !entered_a_group;
597619
}
598620

621+
/* Skip over the bracket and inspect what comes next. */
622+
599623
code += PRIV(OP_lengths)[c];
600624
continue;
601625

626+
/* Handle cases where the next item is a group. */
627+
602628
case OP_ONCE:
603629
case OP_BRA:
604630
case OP_CBRA:
@@ -637,11 +663,15 @@ for(;;)
637663
code += PRIV(OP_lengths)[c];
638664
continue;
639665

666+
/* The next opcode does not need special handling; fall through and use it
667+
to see if the base can be possessified. */
668+
640669
default:
641670
break;
642671
}
643672

644-
/* Check for a supported opcode, and load its properties. */
673+
/* We now have the next appropriate opcode to compare with the base. Check
674+
for a supported opcode, and load its properties. */
645675

646676
code = get_chr_property_list(code, utf, cb->fcc, list);
647677
if (code == NULL) return FALSE; /* Unsupported */

ext/pcre/pcre2lib/pcre2_chartables.c

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,17 @@
22
* Perl-Compatible Regular Expressions *
33
*************************************************/
44

5-
/* This file contains character tables that are used when no external tables
6-
are passed to PCRE2 by the application that calls it. The tables are used only
7-
for characters whose code values are less than 256.
8-
9-
This is a default version of the tables that assumes ASCII encoding. A program
10-
called dftables (which is distributed with PCRE2) can be used to build
11-
alternative versions of this file. This is necessary if you are running in an
12-
EBCDIC environment, or if you want to default to a different encoding, for
13-
example ISO-8859-1. When dftables is run, it creates these tables in the
14-
current locale. If PCRE2 is configured with --enable-rebuild-chartables, this
15-
happens automatically.
16-
17-
The following #includes are present because without them gcc 4.x may remove the
18-
array definition from the final binary if PCRE2 is built into a static library
19-
and dead code stripping is activated. This leads to link errors. Pulling in the
20-
header ensures that the array gets flagged as "someone outside this compilation
21-
unit might reference this" and so it will always be supplied to the linker. */
5+
/* This file was automatically written by the dftables auxiliary
6+
program. It contains character tables that are used when no external
7+
tables are passed to PCRE2 by the application that calls it. The tables
8+
are used only for characters whose code values are less than 256. */
9+
10+
/* The following #includes are present because without them gcc 4.x may remove
11+
the array definition from the final binary if PCRE2 is built into a static
12+
library and dead code stripping is activated. This leads to link errors.
13+
Pulling in the header ensures that the array gets flagged as "someone
14+
outside this compilation unit might reference this" and so it will always
15+
be supplied to the linker. */
2216

2317
#ifdef HAVE_CONFIG_H
2418
#include "config.h"
@@ -98,10 +92,11 @@ const uint8_t PRIV(default_tables)[] = {
9892
240,241,242,243,244,245,246,247,
9993
248,249,250,251,252,253,254,255,
10094

101-
/* This table contains bit maps for various character classes. Each map is 32
102-
bytes long and the bits run from the least significant end of each byte. The
103-
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
104-
graph, print, punct, and cntrl. Other classes are built from combinations. */
95+
/* This table contains bit maps for various character classes.
96+
Each map is 32 bytes long and the bits run from the least
97+
significant end of each byte. The classes that have their own
98+
maps are: space, xdigit, digit, upper, lower, word, graph
99+
print, punct, and cntrl. Other classes are built from combinations. */
105100

106101
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
107102
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

0 commit comments

Comments
 (0)