From d373c11e710b525feb6373629e3d1ebffef2dd5b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 22 Jan 2019 17:47:16 +0100 Subject: [PATCH] Implement new custom object serialization mechanism RFC: https://wiki.php.net/rfc/custom_object_serialization --- UPGRADING | 15 ++ .../tests/serialize/__serialize_001.phpt | 32 +++++ .../tests/serialize/__serialize_002.phpt | 20 +++ .../tests/serialize/__serialize_003.phpt | 56 ++++++++ .../tests/serialize/__serialize_004.phpt | 131 ++++++++++++++++++ .../tests/serialize/__serialize_005.phpt | 56 ++++++++ ext/standard/var.c | 63 +++++++++ ext/standard/var_unserializer.re | 125 +++++++++++------ 8 files changed, 457 insertions(+), 41 deletions(-) create mode 100644 ext/standard/tests/serialize/__serialize_001.phpt create mode 100644 ext/standard/tests/serialize/__serialize_002.phpt create mode 100644 ext/standard/tests/serialize/__serialize_003.phpt create mode 100644 ext/standard/tests/serialize/__serialize_004.phpt create mode 100644 ext/standard/tests/serialize/__serialize_005.phpt diff --git a/UPGRADING b/UPGRADING index fc8fa459ce..3344a73042 100644 --- a/UPGRADING +++ b/UPGRADING @@ -107,6 +107,7 @@ PHP 7.4 UPGRADE NOTES This will enforce that $user->id can only be assigned integer and $user->name can only be assigned strings. For more information see the RFC: https://wiki.php.net/rfc/typed_properties_v2 + . Added support for coalesce assign (??=) operator. For example: $array['key'] ??= computeDefault(); @@ -156,6 +157,20 @@ PHP 7.4 UPGRADE NOTES . strip_tags() now also accepts an array of allowed tags: Instead of strip_tags($str, '

') you can now write strip_tags($str, ['a', 'p']). + . A new mechanism for custom object serialization has been added, which + uses two new magic methods: + + // Returns array containing all the necessary state of the object. + public function __serialize(): array; + + // Restores the object state from the given data array. + public function __unserialize(array $data): void; + + The new serialization mechanism supersedes the Serializable interface, + which will be deprecated in the future. + + RFC: https://wiki.php.net/rfc/custom_object_serialization + ======================================== 3. Changes in SAPI modules ======================================== diff --git a/ext/standard/tests/serialize/__serialize_001.phpt b/ext/standard/tests/serialize/__serialize_001.phpt new file mode 100644 index 0000000000..ddfccfd4b6 --- /dev/null +++ b/ext/standard/tests/serialize/__serialize_001.phpt @@ -0,0 +1,32 @@ +--TEST-- +__serialize() mechanism (001): Basics +--FILE-- + $this->prop, 42 => $this->prop2]; + } + public function __unserialize(array $data) { + $this->prop = $data["value"]; + $this->prop2 = $data[42]; + } +} + +$test = new Test; +$test->prop = "foobar"; +$test->prop2 = "barfoo"; +var_dump($s = serialize($test)); +var_dump(unserialize($s)); + +?> +--EXPECT-- +string(58) "O:4:"Test":2:{s:5:"value";s:6:"foobar";i:42;s:6:"barfoo";}" +object(Test)#2 (2) { + ["prop"]=> + string(6) "foobar" + ["prop2"]=> + string(6) "barfoo" +} diff --git a/ext/standard/tests/serialize/__serialize_002.phpt b/ext/standard/tests/serialize/__serialize_002.phpt new file mode 100644 index 0000000000..143ee933ed --- /dev/null +++ b/ext/standard/tests/serialize/__serialize_002.phpt @@ -0,0 +1,20 @@ +--TEST-- +__serialize() mechanism (002): TypeError on invalid return type +--FILE-- +getMessage(), "\n"; +} + +?> +--EXPECT-- +__serialize() must return an array diff --git a/ext/standard/tests/serialize/__serialize_003.phpt b/ext/standard/tests/serialize/__serialize_003.phpt new file mode 100644 index 0000000000..13a9bf7be0 --- /dev/null +++ b/ext/standard/tests/serialize/__serialize_003.phpt @@ -0,0 +1,56 @@ +--TEST-- +__serialize() mechanism (003): Interoperability of different serialization mechanisms +--FILE-- + "value"]; + } + + public function __unserialize(array $data) { + echo "__unserialize() called\n"; + var_dump($data); + } + + public function serialize() { + echo "serialize() called\n"; + return "payload"; + } + + public function unserialize($payload) { + echo "unserialize() called\n"; + var_dump($payload); + } +} + +$test = new Test; +var_dump($s = serialize($test)); +var_dump(unserialize($s)); + +var_dump(unserialize('C:4:"Test":7:{payload}')); + +?> +--EXPECT-- +__serialize() called +string(37) "O:4:"Test":1:{s:3:"key";s:5:"value";}" +__unserialize() called +array(1) { + ["key"]=> + string(5) "value" +} +object(Test)#2 (0) { +} +unserialize() called +string(7) "payload" +object(Test)#2 (0) { +} diff --git a/ext/standard/tests/serialize/__serialize_004.phpt b/ext/standard/tests/serialize/__serialize_004.phpt new file mode 100644 index 0000000000..cc55d67aa0 --- /dev/null +++ b/ext/standard/tests/serialize/__serialize_004.phpt @@ -0,0 +1,131 @@ +--TEST-- +__serialize() mechanism (004): Delayed __unserialize() calls +--FILE-- +data = $data; + } + public function __wakeup() { + echo "__wakeup() called\n"; + var_dump($this->data); + $this->woken_up = true; + } +} + +class Unserialize { + public $data; + public function __construct(array $data) { + $this->data = $data; + } + public function __serialize() { + return $this->data; + } + public function __unserialize(array $data) { + $this->data = $data; + echo "__unserialize() called\n"; + var_dump($this->data); + $this->unserialized = true; + } +} + +$obj = new Wakeup([new Unserialize([new Wakeup([new Unserialize([])])])]); +var_dump($s = serialize($obj)); +var_dump(unserialize($s)); + +?> +--EXPECT-- +string(126) "O:6:"Wakeup":1:{s:4:"data";a:1:{i:0;O:11:"Unserialize":1:{i:0;O:6:"Wakeup":1:{s:4:"data";a:1:{i:0;O:11:"Unserialize":0:{}}}}}}" +__unserialize() called +array(0) { +} +__wakeup() called +array(1) { + [0]=> + object(Unserialize)#8 (2) { + ["data"]=> + array(0) { + } + ["unserialized"]=> + bool(true) + } +} +__unserialize() called +array(1) { + [0]=> + object(Wakeup)#7 (2) { + ["data"]=> + array(1) { + [0]=> + object(Unserialize)#8 (2) { + ["data"]=> + array(0) { + } + ["unserialized"]=> + bool(true) + } + } + ["woken_up"]=> + bool(true) + } +} +__wakeup() called +array(1) { + [0]=> + object(Unserialize)#6 (2) { + ["data"]=> + array(1) { + [0]=> + object(Wakeup)#7 (2) { + ["data"]=> + array(1) { + [0]=> + object(Unserialize)#8 (2) { + ["data"]=> + array(0) { + } + ["unserialized"]=> + bool(true) + } + } + ["woken_up"]=> + bool(true) + } + } + ["unserialized"]=> + bool(true) + } +} +object(Wakeup)#5 (2) { + ["data"]=> + array(1) { + [0]=> + object(Unserialize)#6 (2) { + ["data"]=> + array(1) { + [0]=> + object(Wakeup)#7 (2) { + ["data"]=> + array(1) { + [0]=> + object(Unserialize)#8 (2) { + ["data"]=> + array(0) { + } + ["unserialized"]=> + bool(true) + } + } + ["woken_up"]=> + bool(true) + } + } + ["unserialized"]=> + bool(true) + } + } + ["woken_up"]=> + bool(true) +} diff --git a/ext/standard/tests/serialize/__serialize_005.phpt b/ext/standard/tests/serialize/__serialize_005.phpt new file mode 100644 index 0000000000..3656a034b3 --- /dev/null +++ b/ext/standard/tests/serialize/__serialize_005.phpt @@ -0,0 +1,56 @@ +--TEST-- +__serialize() mechanism (005): parent::__unserialize() is safe +--FILE-- +data = $data; + } + public function __serialize() { + return $this->data; + } + public function __unserialize(array $data) { + $this->data = $data; + } +} + +class B extends A { + private $data2; + public function __construct(array $data, array $data2) { + parent::__construct($data); + $this->data2 = $data2; + } + public function __serialize() { + return [$this->data2, parent::__serialize()]; + } + public function __unserialize(array $payload) { + [$data2, $data] = $payload; + parent::__unserialize($data); + $this->data2 = $data2; + } +} + +$common = new stdClass; +$obj = new B([$common], [$common]); +var_dump($s = serialize($obj)); +var_dump(unserialize($s)); + +?> +--EXPECT-- +string(63) "O:1:"B":2:{i:0;a:1:{i:0;O:8:"stdClass":0:{}}i:1;a:1:{i:0;r:3;}}" +object(B)#3 (2) { + ["data2":"B":private]=> + array(1) { + [0]=> + object(stdClass)#4 (0) { + } + } + ["data":"A":private]=> + array(1) { + [0]=> + object(stdClass)#4 (0) { + } + } +} diff --git a/ext/standard/var.c b/ext/standard/var.c index 66150c7274..12834e0fca 100644 --- a/ext/standard/var.c +++ b/ext/standard/var.c @@ -745,6 +745,32 @@ static int php_var_serialize_call_sleep(zval *retval, zval *struc) /* {{{ */ } /* }}} */ +static int php_var_serialize_call_magic_serialize(zval *retval, zval *obj) /* {{{ */ +{ + zval fname; + int res; + + ZVAL_STRINGL(&fname, "__serialize", sizeof("__serialize") - 1); + BG(serialize_lock)++; + res = call_user_function(CG(function_table), obj, &fname, retval, 0, 0); + BG(serialize_lock)--; + zval_ptr_dtor_str(&fname); + + if (res == FAILURE || Z_ISUNDEF_P(retval)) { + zval_ptr_dtor(retval); + return FAILURE; + } + + if (Z_TYPE_P(retval) != IS_ARRAY) { + zval_ptr_dtor(retval); + zend_type_error("__serialize() must return an array"); + return FAILURE; + } + + return SUCCESS; +} +/* }}} */ + static void php_var_serialize_collect_names(HashTable *ht, HashTable *src) /* {{{ */ { zval *val; @@ -915,6 +941,43 @@ again: case IS_OBJECT: { zend_class_entry *ce = Z_OBJCE_P(struc); + if (zend_hash_str_exists(&ce->function_table, "__serialize", sizeof("__serialize")-1)) { + zval retval, obj; + zend_string *key; + zval *data; + zend_ulong index; + + ZVAL_COPY(&obj, struc); + if (php_var_serialize_call_magic_serialize(&retval, &obj) == FAILURE) { + if (!EG(exception)) { + smart_str_appendl(buf, "N;", 2); + } + zval_ptr_dtor(&obj); + return; + } + + php_var_serialize_class_name(buf, &obj); + smart_str_append_unsigned(buf, zend_array_count(Z_ARRVAL(retval))); + smart_str_appendl(buf, ":{", 2); + ZEND_HASH_FOREACH_KEY_VAL_IND(Z_ARRVAL(retval), index, key, data) { + if (!key) { + php_var_serialize_long(buf, index); + } else { + php_var_serialize_string(buf, ZSTR_VAL(key), ZSTR_LEN(key)); + } + + if (Z_ISREF_P(data) && Z_REFCOUNT_P(data) == 1) { + data = Z_REFVAL_P(data); + } + php_var_serialize_intern(buf, data, var_hash); + } ZEND_HASH_FOREACH_END(); + smart_str_appendc(buf, '}'); + + zval_ptr_dtor(&obj); + zval_ptr_dtor(&retval); + return; + } + if (ce->serialize != NULL) { /* has custom handler */ unsigned char *serialized_data = NULL; diff --git a/ext/standard/var_unserializer.re b/ext/standard/var_unserializer.re index fde0513e49..afb992cca4 100644 --- a/ext/standard/var_unserializer.re +++ b/ext/standard/var_unserializer.re @@ -26,8 +26,10 @@ #define VAR_DTOR_ENTRIES_MAX 255 /* 256 - offsetof(var_dtor_entries, data) / sizeof(zval) */ #define VAR_ENTRIES_DBG 0 -/* VAR_FLAG used in var_dtor entries to signify an entry on which __wakeup should be called */ +/* VAR_FLAG used in var_dtor entries to signify an entry on which + * __wakeup/__unserialize should be called */ #define VAR_WAKEUP_FLAG 1 +#define VAR_UNSERIALIZE_FLAG 2 typedef struct { zend_long used_slots; @@ -191,9 +193,10 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx) zend_long i; var_entries *var_hash = (*var_hashx)->entries.next; var_dtor_entries *var_dtor_hash = (*var_hashx)->first_dtor; - zend_bool wakeup_failed = 0; - zval wakeup_name; + zend_bool delayed_call_failed = 0; + zval wakeup_name, unserialize_name; ZVAL_UNDEF(&wakeup_name); + ZVAL_UNDEF(&unserialize_name); #if VAR_ENTRIES_DBG fprintf(stderr, "var_destroy(%ld)\n", var_hash?var_hash->used_slots:-1L); @@ -212,9 +215,9 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx) fprintf(stderr, "var_destroy dtor(%p, %ld)\n", var_dtor_hash->data[i], Z_REFCOUNT_P(var_dtor_hash->data[i])); #endif - /* Perform delayed __wakeup calls */ if (Z_EXTRA_P(zv) == VAR_WAKEUP_FLAG) { - if (!wakeup_failed) { + /* Perform delayed __wakeup calls */ + if (!delayed_call_failed) { zval retval; if (Z_ISUNDEF(wakeup_name)) { ZVAL_STRINGL(&wakeup_name, "__wakeup", sizeof("__wakeup") - 1); @@ -222,11 +225,33 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx) BG(serialize_lock)++; if (call_user_function(NULL, zv, &wakeup_name, &retval, 0, 0) == FAILURE || Z_ISUNDEF(retval)) { - wakeup_failed = 1; + delayed_call_failed = 1; GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED); } BG(serialize_lock)--; + zval_ptr_dtor(&retval); + } else { + GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED); + } + } else if (Z_EXTRA_P(zv) == VAR_UNSERIALIZE_FLAG) { + /* Perform delayed __unserialize calls */ + if (!delayed_call_failed) { + zval retval, param; + ZVAL_COPY(¶m, &var_dtor_hash->data[i + 1]); + + if (Z_ISUNDEF(unserialize_name)) { + ZVAL_STRINGL(&unserialize_name, "__unserialize", sizeof("__unserialize") - 1); + } + + BG(serialize_lock)++; + if (call_user_function(CG(function_table), zv, &unserialize_name, &retval, 1, ¶m) == FAILURE || Z_ISUNDEF(retval)) { + delayed_call_failed = 1; + GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED); + } + BG(serialize_lock)--; + + zval_ptr_dtor(¶m); zval_ptr_dtor(&retval); } else { GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED); @@ -241,6 +266,7 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx) } zval_ptr_dtor_nogc(&wakeup_name); + zval_ptr_dtor_nogc(&unserialize_name); if ((*var_hashx)->ref_props) { zend_hash_destroy((*var_hashx)->ref_props); @@ -601,41 +627,38 @@ static inline int object_custom(UNSERIALIZE_PARAMETER, zend_class_entry *ce) return 1; } -static inline zend_long object_common1(UNSERIALIZE_PARAMETER, zend_class_entry *ce) -{ - zend_long elements; - - if( *p >= max - 2) { - zend_error(E_WARNING, "Bad unserialize data"); - return -1; - } - - elements = parse_iv2((*p) + 2, p); - - (*p) += 2; - - if (ce->serialize == NULL) { - object_init_ex(rval, ce); - } else { - /* If this class implements Serializable, it should not land here but in object_custom(). The passed string - obviously doesn't descend from the regular serializer. */ - zend_error(E_WARNING, "Erroneous data format for unserializing '%s'", ZSTR_VAL(ce->name)); - return -1; - } - - return elements; -} - #ifdef PHP_WIN32 # pragma optimize("", off) #endif -static inline int object_common2(UNSERIALIZE_PARAMETER, zend_long elements) +static inline int object_common(UNSERIALIZE_PARAMETER, zend_long elements, zend_bool has_unserialize) { HashTable *ht; zend_bool has_wakeup; - if (Z_TYPE_P(rval) != IS_OBJECT) { - return 0; + if (has_unserialize) { + zval ary, *tmp; + + if (elements >= HT_MAX_SIZE) { + return 0; + } + + array_init_size(&ary, elements); + if (!process_nested_data(UNSERIALIZE_PASSTHRU, Z_ARRVAL(ary), elements, NULL)) { + ZVAL_DEREF(rval); + GC_ADD_FLAGS(Z_OBJ_P(rval), IS_OBJ_DESTRUCTOR_CALLED); + return 0; + } + + /* Delay __unserialize() call until end of serialization. We use two slots here to + * store both the object and the unserialized data array. */ + ZVAL_DEREF(rval); + tmp = var_tmp_var(var_hash); + ZVAL_COPY(tmp, rval); + Z_EXTRA_P(tmp) = VAR_UNSERIALIZE_FLAG; + tmp = var_tmp_var(var_hash); + ZVAL_COPY_VALUE(tmp, &ary); + + return finish_nested_data(UNSERIALIZE_PASSTHRU); } has_wakeup = Z_OBJCE_P(rval) != PHP_IC_ENTRY @@ -954,9 +977,9 @@ object ":" uiv ":" ["] { char *str; zend_string *class_name; zend_class_entry *ce; - int incomplete_class = 0; - - int custom_object = 0; + zend_bool incomplete_class = 0; + zend_bool custom_object = 0; + zend_bool has_unserialize = 0; zval user_func; zval retval; @@ -1085,19 +1108,39 @@ object ":" uiv ":" ["] { return ret; } - elements = object_common1(UNSERIALIZE_PASSTHRU, ce); + if (*p >= max - 2) { + zend_error(E_WARNING, "Bad unserialize data"); + zend_string_release_ex(class_name, 0); + return 0; + } + elements = parse_iv2(*p + 2, p); if (elements < 0) { - zend_string_release_ex(class_name, 0); - return 0; + zend_string_release_ex(class_name, 0); + return 0; + } + *p += 2; + + has_unserialize = !incomplete_class + && zend_hash_str_exists(&ce->function_table, "__unserialize", sizeof("__unserialize")-1); + + /* If this class implements Serializable, it should not land here but in object_custom(). + * The passed string obviously doesn't descend from the regular serializer. However, if + * there is both Serializable::unserialize() and __unserialize(), then both may be used, + * depending on the serialization format. */ + if (ce->serialize != NULL && !has_unserialize) { + zend_error(E_WARNING, "Erroneous data format for unserializing '%s'", ZSTR_VAL(ce->name)); + zend_string_release_ex(class_name, 0); + return 0; } + object_init_ex(rval, ce); if (incomplete_class) { php_store_class_name(rval, ZSTR_VAL(class_name), len2); } zend_string_release_ex(class_name, 0); - return object_common2(UNSERIALIZE_PASSTHRU, elements); + return object_common(UNSERIALIZE_PASSTHRU, elements, has_unserialize); } "}" { -- 2.40.0