Skip to content

Commit 7f62461

Browse files
committed
More wrong encoding conversions
1 parent d1828c6 commit 7f62461

21 files changed

+283
-253
lines changed

ext/mbstring/mbstring.c

Lines changed: 60 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ static const sapi_post_entry mbstr_post_entries[] = {
323323
};
324324
/* }}} */
325325

326-
static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name) {
326+
static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, const uint32_t arg_num) {
327327
if (encoding_name) {
328328
const mbfl_encoding *encoding;
329329
zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
@@ -334,7 +334,8 @@ static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name) {
334334

335335
encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
336336
if (!encoding) {
337-
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", ZSTR_VAL(encoding_name));
337+
zend_argument_value_error(arg_num, "must be a valid encoding, encoding \"%s\" is unknown",
338+
ZSTR_VAL(encoding_name));
338339
return NULL;
339340
}
340341

@@ -1936,9 +1937,9 @@ PHP_FUNCTION(mb_str_split)
19361937
string.val = (unsigned char *) ZSTR_VAL(str);
19371938
string.len = ZSTR_LEN(str);
19381939
string.no_language = MBSTRG(language);
1939-
string.encoding = php_mb_get_encoding(encoding);
1940+
string.encoding = php_mb_get_encoding(encoding, 3);
19401941
if (!string.encoding) {
1941-
RETURN_FALSE;
1942+
RETURN_THROWS();
19421943
}
19431944

19441945
p = ZSTR_VAL(str); /* string cursor pointer */
@@ -2066,9 +2067,9 @@ PHP_FUNCTION(mb_strlen)
20662067
string.val = (unsigned char *) str;
20672068
string.len = str_len;
20682069
string.no_language = MBSTRG(language);
2069-
string.encoding = php_mb_get_encoding(enc_name);
2070+
string.encoding = php_mb_get_encoding(enc_name, 2);
20702071
if (!string.encoding) {
2071-
RETURN_FALSE;
2072+
RETURN_THROWS();
20722073
}
20732074

20742075
n = mbfl_strlen(&string);
@@ -2111,9 +2112,9 @@ PHP_FUNCTION(mb_strpos)
21112112
}
21122113

21132114
haystack.no_language = needle.no_language = MBSTRG(language);
2114-
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2115+
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
21152116
if (!haystack.encoding) {
2116-
RETURN_FALSE;
2117+
RETURN_THROWS();
21172118
}
21182119

21192120
n = mbfl_strpos(&haystack, &needle, offset, reverse);
@@ -2139,9 +2140,9 @@ PHP_FUNCTION(mb_strrpos)
21392140
}
21402141

21412142
haystack.no_language = needle.no_language = MBSTRG(language);
2142-
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2143+
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
21432144
if (!haystack.encoding) {
2144-
RETURN_FALSE;
2145+
RETURN_THROWS();
21452146
}
21462147

21472148
n = mbfl_strpos(&haystack, &needle, offset, 1);
@@ -2167,7 +2168,7 @@ PHP_FUNCTION(mb_stripos)
21672168
RETURN_THROWS();
21682169
}
21692170

2170-
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2171+
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding, 4);
21712172

21722173
if (!mbfl_is_error(n)) {
21732174
RETVAL_LONG(n);
@@ -2191,7 +2192,7 @@ PHP_FUNCTION(mb_strripos)
21912192
RETURN_THROWS();
21922193
}
21932194

2194-
n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding);
2195+
n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding, 4);
21952196

21962197
if (!mbfl_is_error(n)) {
21972198
RETVAL_LONG(n);
@@ -2216,9 +2217,9 @@ PHP_FUNCTION(mb_strstr)
22162217
}
22172218

22182219
haystack.no_language = needle.no_language = MBSTRG(language);
2219-
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2220+
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
22202221
if (!haystack.encoding) {
2221-
RETURN_FALSE;
2222+
RETURN_THROWS();
22222223
}
22232224

22242225
n = mbfl_strpos(&haystack, &needle, 0, 0);
@@ -2262,9 +2263,9 @@ PHP_FUNCTION(mb_strrchr)
22622263
}
22632264

22642265
haystack.no_language = needle.no_language = MBSTRG(language);
2265-
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2266+
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
22662267
if (!haystack.encoding) {
2267-
RETURN_FALSE;
2268+
RETURN_THROWS();
22682269
}
22692270

22702271
n = mbfl_strpos(&haystack, &needle, 0, 1);
@@ -2308,12 +2309,12 @@ PHP_FUNCTION(mb_stristr)
23082309
}
23092310

23102311
haystack.no_language = needle.no_language = MBSTRG(language);
2311-
haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2312+
haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding, 4);
23122313
if (!haystack.encoding) {
2313-
RETURN_FALSE;
2314+
RETURN_THROWS();
23142315
}
23152316

2316-
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2317+
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding, 4);
23172318
if (mbfl_is_error(n)) {
23182319
RETURN_FALSE;
23192320
}
@@ -2354,12 +2355,12 @@ PHP_FUNCTION(mb_strrichr)
23542355
}
23552356

23562357
haystack.no_language = needle.no_language = MBSTRG(language);
2357-
haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2358+
haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding, 4);
23582359
if (!haystack.encoding) {
2359-
RETURN_FALSE;
2360+
RETURN_THROWS();
23602361
}
23612362

2362-
n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding);
2363+
n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding, 4);
23632364
if (mbfl_is_error(n)) {
23642365
RETURN_FALSE;
23652366
}
@@ -2399,9 +2400,9 @@ PHP_FUNCTION(mb_substr_count)
23992400
}
24002401

24012402
haystack.no_language = needle.no_language = MBSTRG(language);
2402-
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2403+
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 3);
24032404
if (!haystack.encoding) {
2404-
RETURN_FALSE;
2405+
RETURN_THROWS();
24052406
}
24062407

24072408
if (needle.len == 0) {
@@ -2435,9 +2436,9 @@ PHP_FUNCTION(mb_substr)
24352436
}
24362437

24372438
string.no_language = MBSTRG(language);
2438-
string.encoding = php_mb_get_encoding(encoding);
2439+
string.encoding = php_mb_get_encoding(encoding, 4);
24392440
if (!string.encoding) {
2440-
RETURN_FALSE;
2441+
RETURN_THROWS();
24412442
}
24422443

24432444
string.val = (unsigned char *)str;
@@ -2498,9 +2499,9 @@ PHP_FUNCTION(mb_strcut)
24982499
}
24992500

25002501
string.no_language = MBSTRG(language);
2501-
string.encoding = php_mb_get_encoding(encoding);
2502+
string.encoding = php_mb_get_encoding(encoding, 4);
25022503
if (!string.encoding) {
2503-
RETURN_FALSE;
2504+
RETURN_THROWS();
25042505
}
25052506

25062507
if (len_is_null) {
@@ -2555,12 +2556,9 @@ PHP_FUNCTION(mb_strwidth)
25552556
}
25562557

25572558
string.no_language = MBSTRG(language);
2558-
// TODO CHECK THIS WHAT THE FUCK IT DOES
2559-
string.encoding = php_mb_get_encoding(enc_name);
2559+
string.encoding = php_mb_get_encoding(enc_name, 2);
25602560
if (!string.encoding) {
2561-
//zend_argument_value_error(1, "must be a valid encoding, encoding \"%s\" is unknown", name);
2562-
//RETURN_THROWS();
2563-
RETURN_FALSE;
2561+
RETURN_THROWS();
25642562
}
25652563

25662564
n = mbfl_strwidth(&string);
@@ -2587,9 +2585,9 @@ PHP_FUNCTION(mb_strimwidth)
25872585
}
25882586

25892587
string.no_language = marker.no_language = MBSTRG(language);
2590-
string.encoding = marker.encoding = php_mb_get_encoding(encoding);
2588+
string.encoding = marker.encoding = php_mb_get_encoding(encoding, 5);
25912589
if (!string.encoding) {
2592-
RETURN_FALSE;
2590+
RETURN_THROWS();
25932591
}
25942592

25952593
string.val = (unsigned char *)str;
@@ -2827,9 +2825,9 @@ PHP_FUNCTION(mb_convert_encoding)
28272825
RETURN_THROWS();
28282826
}
28292827

2830-
to_encoding = php_mb_get_encoding(to_encoding_name);
2828+
to_encoding = php_mb_get_encoding(to_encoding_name, 2);
28312829
if (!to_encoding) {
2832-
RETURN_FALSE;
2830+
RETURN_THROWS();
28332831
}
28342832

28352833
if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
@@ -2930,9 +2928,9 @@ PHP_FUNCTION(mb_convert_case)
29302928
RETURN_THROWS();
29312929
}
29322930

2933-
enc = php_mb_get_encoding(from_encoding);
2931+
enc = php_mb_get_encoding(from_encoding, 3);
29342932
if (!enc) {
2935-
return;
2933+
RETURN_THROWS();
29362934
}
29372935

29382936
if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
@@ -2967,9 +2965,9 @@ PHP_FUNCTION(mb_strtoupper)
29672965
RETURN_THROWS();
29682966
}
29692967

2970-
enc = php_mb_get_encoding(from_encoding);
2968+
enc = php_mb_get_encoding(from_encoding, 2);
29712969
if (!enc) {
2972-
RETURN_FALSE;
2970+
RETURN_THROWS();
29732971
}
29742972

29752973
newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
@@ -3001,9 +2999,9 @@ PHP_FUNCTION(mb_strtolower)
30012999
RETURN_THROWS();
30023000
}
30033001

3004-
enc = php_mb_get_encoding(from_encoding);
3002+
enc = php_mb_get_encoding(from_encoding, 2);
30053003
if (!enc) {
3006-
RETURN_FALSE;
3004+
RETURN_THROWS();
30073005
}
30083006

30093007
newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
@@ -3310,9 +3308,9 @@ PHP_FUNCTION(mb_convert_kana)
33103308

33113309
/* encoding */
33123310
string.no_language = MBSTRG(language);
3313-
string.encoding = php_mb_get_encoding(encname);
3311+
string.encoding = php_mb_get_encoding(encname, 3);
33143312
if (!string.encoding) {
3315-
RETURN_FALSE;
3313+
RETURN_THROWS();
33163314
}
33173315

33183316
ret = mbfl_ja_jp_hantozen(&string, &result, opt);
@@ -4431,14 +4429,15 @@ PHP_FUNCTION(mb_check_encoding)
44314429
/* }}} */
44324430

44334431

4434-
static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name)
4432+
static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name,
4433+
const uint32_t enc_name_arg_num)
44354434
{
44364435
const mbfl_encoding *enc;
44374436
enum mbfl_no_encoding no_enc;
44384437

4439-
enc = php_mb_get_encoding(enc_name);
4438+
enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
44404439
if (!enc) {
4441-
return -1;
4440+
return -2;
44424441
}
44434442

44444443
no_enc = enc->no_encoding;
@@ -4497,9 +4496,12 @@ PHP_FUNCTION(mb_ord)
44974496
Z_PARAM_STR(enc)
44984497
ZEND_PARSE_PARAMETERS_END();
44994498

4500-
cp = php_mb_ord(str, str_len, enc);
4499+
cp = php_mb_ord(str, str_len, enc, 2);
45014500

45024501
if (0 > cp) {
4502+
if (cp == -2) {
4503+
RETURN_THROWS();
4504+
}
45034505
RETURN_FALSE;
45044506
}
45054507

@@ -4508,15 +4510,15 @@ PHP_FUNCTION(mb_ord)
45084510
/* }}} */
45094511

45104512

4511-
static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name)
4513+
static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name, uint32_t enc_name_arg_num)
45124514
{
45134515
const mbfl_encoding *enc;
45144516
enum mbfl_no_encoding no_enc;
45154517
zend_string *ret;
45164518
char* buf;
45174519
size_t buf_len;
45184520

4519-
enc = php_mb_get_encoding(enc_name);
4521+
enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
45204522
if (!enc) {
45214523
return NULL;
45224524
}
@@ -4605,7 +4607,7 @@ PHP_FUNCTION(mb_chr)
46054607
Z_PARAM_STR(enc)
46064608
ZEND_PARSE_PARAMETERS_END();
46074609

4608-
ret = php_mb_chr(cp, enc);
4610+
ret = php_mb_chr(cp, enc, 2);
46094611
if (ret == NULL) {
46104612
RETURN_FALSE;
46114613
}
@@ -4637,9 +4639,9 @@ PHP_FUNCTION(mb_scrub)
46374639
Z_PARAM_STR(enc_name)
46384640
ZEND_PARSE_PARAMETERS_END();
46394641

4640-
enc = php_mb_get_encoding(enc_name);
4642+
enc = php_mb_get_encoding(enc_name, 2);
46414643
if (!enc) {
4642-
RETURN_FALSE;
4644+
RETURN_THROWS();
46434645
}
46444646

46454647
ret = php_mb_scrub(str, str_len, enc, &ret_len);
@@ -4764,13 +4766,15 @@ MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nby
47644766

47654767
/* {{{ MBSTRING_API int php_mb_stripos()
47664768
*/
4767-
MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, zend_string *from_encoding)
4769+
MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len,
4770+
const char *old_needle, size_t old_needle_len, zend_long offset,
4771+
zend_string *from_encoding, const uint32_t from_encoding_arg_num)
47684772
{
47694773
size_t n = (size_t) -1;
47704774
mbfl_string haystack, needle;
47714775
const mbfl_encoding *enc;
47724776

4773-
enc = php_mb_get_encoding(from_encoding);
4777+
enc = php_mb_get_encoding(from_encoding, from_encoding_arg_num);
47744778
if (!enc) {
47754779
return (size_t) -1;
47764780
}

ext/mbstring/mbstring.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,9 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length,
125125
MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc);
126126
MBSTRING_API size_t php_mb_mbchar_bytes(const char *s);
127127

128-
MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, zend_string *from_encoding);
128+
MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len,
129+
const char *old_needle, size_t old_needle_len, zend_long offset,
130+
zend_string *from_encoding, const uint32_t from_encoding_arg_num);
129131
MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc);
130132

131133
ZEND_BEGIN_MODULE_GLOBALS(mbstring)

0 commit comments

Comments
 (0)