static inline int php_mb_check_code_point(zend_long cp)
{
- if (cp <= 0 || cp >= 0x110000) {
+ if (cp < 0 || cp >= 0x110000) {
/* Out of Unicode range */
return 0;
}
return 1;
}
-/* {{{ proto mixed mb_substitute_character([mixed substchar])
+/* {{{ proto string|int|true mb_substitute_character([string|int|null substitute_character])
Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)
{
- zval *arg1 = NULL;
+ zend_string *substitute_character = NULL;
+ zend_long substitute_codepoint;
+ zend_bool substitute_is_null = 1;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
- RETURN_THROWS();
- }
+ ZEND_PARSE_PARAMETERS_START(0, 1)
+ Z_PARAM_OPTIONAL
+ Z_PARAM_STR_OR_LONG_OR_NULL(substitute_character, substitute_codepoint, substitute_is_null)
+ ZEND_PARSE_PARAMETERS_END();
- if (!arg1) {
+ if (substitute_is_null) {
if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
RETURN_STRING("none");
- } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
+ }
+ if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
RETURN_STRING("long");
- } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
+ }
+ if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
RETURN_STRING("entity");
- } else {
- RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
}
- } else {
- RETVAL_TRUE;
-
- switch (Z_TYPE_P(arg1)) {
- case IS_STRING:
- if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
- } else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
- } else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
- } else {
- convert_to_long_ex(arg1);
+ RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
+ }
- if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
- MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
- } else {
- php_error_docref(NULL, E_WARNING, "Unknown character");
- RETURN_FALSE;
- }
- }
- break;
- default:
- convert_to_long_ex(arg1);
- if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
- MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
- MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
- } else {
- php_error_docref(NULL, E_WARNING, "Unknown character");
- RETURN_FALSE;
- }
- break;
+ if (substitute_character != NULL) {
+ if (zend_string_equals_literal_ci(substitute_character, "none")) {
+ MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
+ RETURN_TRUE;
+ }
+ if (zend_string_equals_literal_ci(substitute_character, "long")) {
+ MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
+ RETURN_TRUE;
+ }
+ if (zend_string_equals_literal_ci(substitute_character, "entity")) {
+ MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
+ RETURN_TRUE;
}
+ /* Invalid string value */
+ zend_argument_value_error(1, "must be 'none', 'long', 'entity' or a valid codepoint");
+ RETURN_THROWS();
+ }
+ /* Integer codepoint passed */
+ if (!php_mb_check_code_point(substitute_codepoint)) {
+ zend_argument_value_error(1, "is not a valid codepoint");
+ RETURN_THROWS();
}
+
+ MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
+ MBSTRG(current_filter_illegal_substchar) = substitute_codepoint;
+ RETURN_TRUE;
}
/* }}} */
function mb_detect_order(array|string $encoding = UNKNOWN): array|bool {}
-/** @param string|int $substchar */
-function mb_substitute_character($substchar = UNKNOWN): string|int|bool {}
+function mb_substitute_character(string|int|null $substitute_character = null): string|int|bool {}
function mb_preferred_mime_name(string $encoding): string|false {}
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_substitute_character, 0, 0, MAY_BE_STRING|MAY_BE_LONG|MAY_BE_BOOL)
- ZEND_ARG_INFO(0, substchar)
+ ZEND_ARG_TYPE_MASK(0, substitute_character, MAY_BE_STRING|MAY_BE_LONG|MAY_BE_NULL, "null")
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_preferred_mime_name, 0, 1, MAY_BE_STRING|MAY_BE_FALSE)
var_dump(mb_substitute_character(0x1F600));
var_dump(bin2hex(mb_scrub("\xff")));
mb_substitute_character(0x3f); // Reset to '?', as the next call will fail
-var_dump(mb_substitute_character(0xD800)); // Surrogate (illegal)
+try {
+ var_dump(mb_substitute_character(0xD800)); // Surrogate (illegal)
+} catch (\ValueError $e) {
+ echo $e->getMessage() . \PHP_EOL;
+}
var_dump(bin2hex(mb_scrub("\xff")));
mb_internal_encoding('EUC-JP-2004');
mb_substitute_character(0x63); // Reset to '?', as the next call will fail
-mb_substitute_character(0x8fa1ef); // EUC-JP-2004 encoding of U+50AA (illegal)
+try {
+ mb_substitute_character(0x8fa1ef); // EUC-JP-2004 encoding of U+50AA (illegal)
+} catch (\ValueError $e) {
+ echo $e->getMessage() . \PHP_EOL;
+}
var_dump(bin2hex(mb_scrub("\x8d")));
mb_substitute_character(0x50aa);
var_dump(bin2hex(mb_scrub("\x8d")));
?>
---EXPECTF--
+--EXPECT--
bool(true)
string(8) "f09f9880"
-
-Warning: mb_substitute_character(): Unknown character in %s on line %d
-bool(false)
+mb_substitute_character(): Argument #1 ($substitute_character) is not a valid codepoint
string(2) "3f"
-
-Warning: mb_substitute_character(): Unknown character in %s on line %d
+mb_substitute_character(): Argument #1 ($substitute_character) is not a valid codepoint
string(2) "63"
string(6) "8fa1ef"
var_dump(mb_substitute_character());
var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8")));
-var_dump(mb_substitute_character('BAD_NAME'));
+try {
+ var_dump(mb_substitute_character('BAD_NAME'));
+} catch (\ValueError $e) {
+ echo $e->getMessage() . \PHP_EOL;
+}
?>
---EXPECTF--
+--EXPECT--
bool(true)
int(12356)
string(8) "82a282a0"
bool(true)
string(6) "entity"
string(20) "262378323636303b82a0"
-
-Warning: mb_substitute_character(): Unknown character in %s on line %d
-bool(false)
+mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
--SKIPIF--
<?php
extension_loaded('mbstring') or die('skip');
-function_exists('mb_substitute_character') or die("skip mb_substitute_character() is not available in this build");
?>
--FILE--
<?php
var_dump( mb_substitute_character() );
var_dump( mb_substitute_character(1234) );
var_dump( mb_substitute_character() );
-var_dump( mb_substitute_character("none") );
+var_dump( mb_substitute_character('none') );
var_dump( mb_substitute_character() );
-var_dump( mb_substitute_character("b") );
+// Check string case insensitivity
+var_dump( mb_substitute_character('LoNg') );
+var_dump( mb_substitute_character() );
+try {
+ var_dump( mb_substitute_character("b") );
+} catch (\ValueError $e) {
+ echo $e->getMessage() . \PHP_EOL;
+}
?>
---EXPECTF--
+--EXPECT--
*** Testing mb_substitute_character() : basic functionality ***
int(63)
bool(true)
int(1234)
bool(true)
string(4) "none"
-
-Warning: mb_substitute_character(): Unknown character in %s on line %d
-bool(false)
+bool(true)
+string(4) "long"
+mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
--SKIPIF--
<?php
extension_loaded('mbstring') or die('skip');
-function_exists('mb_substitute_character') or die("skip mb_substitute_character() is not available in this build");
?>
--FILE--
<?php
--- /dev/null
+--TEST--
+Test mb_substitute_character() function : usage variation
+--SKIPIF--
+<?php
+extension_loaded('mbstring') or die('skip');
+?>
+--FILE--
+<?php
+declare(strict_types=1);
+/* Prototype : string|int|true mb_substitute_character([string|int|null substitute_character])
+ * Description: Sets the current substitute_character or returns the current substitute_character
+ * Source code: ext/mbstring/mbstring.c
+ * Alias to functions:
+ */
+
+echo "*** Testing mb_substitute_character(): various types in strict typing mode ***\n";
+
+// Initialise function arguments not being substituted (if any)
+
+//get an unset variable
+$unset_var = 10;
+unset ($unset_var);
+
+// define some classes
+class classWithToString
+{
+ public function __toString() {
+ return "Class A object";
+ }
+}
+
+class classWithoutToString
+{
+}
+
+// heredoc string
+$heredoc = <<<EOT
+hello world
+EOT;
+
+// get a resource variable
+$fp = fopen(__FILE__, "r");
+
+// add arrays
+$index_array = array (1, 2, 3);
+$assoc_array = array ('one' => 1, 'two' => 2);
+
+//array of values to iterate over
+$inputs = array(
+
+ // int data
+ 'int 0' => 0,
+ 'int 1' => 1,
+ 'int 12345' => 12345,
+ 'int -12345' => -2345,
+
+ // float data
+ 'float 10.5' => 10.5,
+ 'float -10.5' => -10.5,
+ 'float 10.0e19' => 10.0e19, // Cannot be represented as int
+ 'float -10.0e19' => -10.0e19, // Cannot be represented as int
+ 'float .5' => .5,
+
+ // array data
+ 'empty array' => array(),
+ 'int indexed array' => $index_array,
+ 'associative array' => $assoc_array,
+ 'nested arrays' => array('foo', $index_array, $assoc_array),
+
+ // null data
+ 'uppercase NULL' => NULL,
+ 'lowercase null' => null,
+
+ // boolean data
+ 'lowercase true' => true,
+ 'lowercase false' =>false,
+ 'uppercase TRUE' =>TRUE,
+ 'uppercase FALSE' =>FALSE,
+
+ // empty data
+ 'empty string DQ' => "",
+ 'empty string SQ' => '',
+
+ // string data
+ 'string DQ' => "string",
+ 'string SQ' => 'string',
+ 'mixed case string' => "sTrInG",
+ 'heredoc' => $heredoc,
+
+ // object data
+ 'instance of classWithToString' => new classWithToString(),
+ 'instance of classWithoutToString' => new classWithoutToString(),
+
+ // undefined data
+ 'undefined var' => @$undefined_var,
+
+ // unset data
+ 'unset var' => @$unset_var,
+);
+
+// loop through each element of the array for substchar
+
+mb_internal_encoding('utf-8');
+foreach($inputs as $key =>$value) {
+ echo "--$key--\n";
+ try {
+ var_dump( mb_substitute_character($value) );
+ } catch (\ValueError|\TypeError $e) {
+ echo get_class($e) . ': ' . $e->getMessage() . \PHP_EOL;
+ }
+}
+
+fclose($fp);
+
+?>
+--EXPECT--
+*** Testing mb_substitute_character(): various types in strict typing mode ***
+--int 0--
+bool(true)
+--int 1--
+bool(true)
+--int 12345--
+bool(true)
+--int -12345--
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) is not a valid codepoint
+--float 10.5--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, float given
+--float -10.5--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, float given
+--float 10.0e19--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, float given
+--float -10.0e19--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, float given
+--float .5--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, float given
+--empty array--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, array given
+--int indexed array--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, array given
+--associative array--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, array given
+--nested arrays--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, array given
+--uppercase NULL--
+int(12345)
+--lowercase null--
+int(12345)
+--lowercase true--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, bool given
+--lowercase false--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, bool given
+--uppercase TRUE--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, bool given
+--uppercase FALSE--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, bool given
+--empty string DQ--
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
+--empty string SQ--
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
+--string DQ--
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
+--string SQ--
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
+--mixed case string--
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
+--heredoc--
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
+--instance of classWithToString--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, object given
+--instance of classWithoutToString--
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, object given
+--undefined var--
+int(12345)
+--unset var--
+int(12345)
--SKIPIF--
<?php
extension_loaded('mbstring') or die('skip');
-function_exists('mb_substitute_character') or die("skip mb_substitute_character() is not available in this build");
?>
--FILE--
<?php
-/* Prototype : mixed mb_substitute_character([mixed substchar])
+/* Prototype : string|int|true mb_substitute_character([string|int|null substitute_character])
* Description: Sets the current substitute_character or returns the current substitute_character
* Source code: ext/mbstring/mbstring.c
* Alias to functions:
*/
-echo "*** Testing mb_substitute_character() : usage variation ***\n";
-
-// Define error handler
-function test_error_handler($err_no, $err_msg, $filename, $linenum) {
- if (error_reporting() & $err_no) {
- // report non-silenced errors
- echo "Error: $err_no - $err_msg, $filename($linenum)\n";
- }
-}
-set_error_handler('test_error_handler');
+echo "*** Testing mb_substitute_character(): various types in weak typing mode ***\n";
// Initialise function arguments not being substituted (if any)
// float data
'float 10.5' => 10.5,
'float -10.5' => -10.5,
- 'float 12.3456789000e10' => 12.3456789000e10,
- 'float -12.3456789000e10' => -12.3456789000e10,
+ 'float 10.0e19' => 10.0e19, // Cannot be represented as int
+ 'float -10.0e19' => -10.0e19, // Cannot be represented as int
'float .5' => .5,
// array data
mb_internal_encoding('utf-8');
foreach($inputs as $key =>$value) {
- echo "\n--$key--\n";
- var_dump( mb_substitute_character($value) );
-};
+ echo "--$key--\n";
+ try {
+ var_dump( mb_substitute_character($value) );
+ } catch (\ValueError|\TypeError $e) {
+ echo get_class($e) . ': ' . $e->getMessage() . \PHP_EOL;
+ }
+}
fclose($fp);
?>
---EXPECTF--
-*** Testing mb_substitute_character() : usage variation ***
-
+--EXPECT--
+*** Testing mb_substitute_character(): various types in weak typing mode ***
--int 0--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+bool(true)
--int 1--
bool(true)
-
--int 12345--
bool(true)
-
--int -12345--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) is not a valid codepoint
--float 10.5--
bool(true)
-
--float -10.5--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
---float 12.3456789000e10--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
---float -12.3456789000e10--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) is not a valid codepoint
+--float 10.0e19--
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
+--float -10.0e19--
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
--float .5--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+bool(true)
--empty array--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, array given
--int indexed array--
-bool(true)
-
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, array given
--associative array--
-bool(true)
-
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, array given
--nested arrays--
-bool(true)
-
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, array given
--uppercase NULL--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+int(0)
--lowercase null--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+int(0)
--lowercase true--
bool(true)
-
--lowercase false--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+bool(true)
--uppercase TRUE--
bool(true)
-
--uppercase FALSE--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
---empty string DQ--
bool(true)
-
+--empty string DQ--
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
--empty string SQ--
-bool(true)
-
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
--string DQ--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
--string SQ--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
--mixed case string--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
--heredoc--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
--instance of classWithToString--
-Error: 8 - Object of class classWithToString could not be converted to int, %s(%d)
-bool(true)
-
+ValueError: mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint
--instance of classWithoutToString--
-Error: 8 - Object of class classWithoutToString could not be converted to int, %s(%d)
-bool(true)
-
+TypeError: mb_substitute_character(): Argument #1 ($substitute_character) must be of type string|int|null, object given
--undefined var--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
-
+int(0)
--unset var--
-Error: 2 - mb_substitute_character(): Unknown character, %s(%d)
-bool(false)
+int(0)