Skip to content

Commit da71157

Browse files
committed
Add some special cases for parser errors
* unexpected token """ -> unexpected double-quote mark * unexpected quoted string "foo" -> unexpected single-quoted string "foo" / unexpected double-quoted string "foo" * unexpected illegal character "_" -> unexpected character 0xNN (where _ is almost certainly a control character, and NN is the hexadecimal value of the byte)
1 parent d9e20fd commit da71157

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

Zend/zend_language_parser.y

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,15 @@ static YYSIZE_T zend_yytnamerr(char *yyres, const char *yystr)
14541454
return sizeof("token \"\\\"")-1;
14551455
}
14561456

1457+
/* Avoid unreadable """ */
1458+
/* "'" would theoretically be just as bad, but is never currently parsed as a separate token */
1459+
if (strcmp(toktype, "'\"'") == 0) {
1460+
if (yyres) {
1461+
yystpcpy(yyres, "double-quote mark");
1462+
}
1463+
return sizeof("double-quote mark");
1464+
}
1465+
14571466
/* Strip off the outer quote marks */
14581467
if (toktype_len >= 2 && *toktype == '"') {
14591468
toktype++;
@@ -1475,12 +1484,34 @@ static YYSIZE_T zend_yytnamerr(char *yyres, const char *yystr)
14751484
tokcontent = LANG_SCNG(yy_text);
14761485
tokcontent_len = LANG_SCNG(yy_leng);
14771486

1487+
/* For T_BAD_CHARACTER, the content probably won't be a printable char */
1488+
/* Also, "unexpected invalid character" sounds a bit redundant */
1489+
if (tokcontent_len == 1 && strcmp(yystr, "\"invalid character\"") == 0) {
1490+
if (yyres) {
1491+
snprintf(buffer, sizeof(buffer), "character 0x%02hhX", *tokcontent);
1492+
yystpcpy(yyres, buffer);
1493+
}
1494+
return sizeof("character 0x00")-1;
1495+
}
1496+
14781497
/* Truncate at line end to avoid messing up log formats */
14791498
tokcontent_end = memchr(tokcontent, '\n', LANG_SCNG(yy_leng));
14801499
if (tokcontent_end != NULL) {
14811500
tokcontent_len = (tokcontent_end - tokcontent);
14821501
}
14831502

1503+
/* Try to be helpful about what kind of string was found, before stripping the quotes */
1504+
if (tokcontent_len > 0 && strcmp(yystr, "\"quoted string\"") == 0) {
1505+
if (*tokcontent=='"') {
1506+
toktype="double-quoted string";
1507+
toktype_len=sizeof("double-quoted string")-1;
1508+
}
1509+
else if (*tokcontent=='\'') {
1510+
toktype="single-quoted string";
1511+
toktype_len=sizeof("single-quoted string")-1;
1512+
}
1513+
}
1514+
14841515
/* For quoted strings, strip off another layer of quotes to avoid putting quotes inside quotes */
14851516
if (tokcontent_len > 0 && (*tokcontent=='\'' || *tokcontent=='"')) {
14861517
tokcontent++;

0 commit comments

Comments
 (0)