]> granicus.if.org Git - php/commitdiff
Implement new custom object serialization mechanism
authorNikita Popov <nikita.ppv@gmail.com>
Tue, 22 Jan 2019 16:47:16 +0000 (17:47 +0100)
committerNikita Popov <nikita.ppv@gmail.com>
Fri, 22 Mar 2019 09:43:06 +0000 (10:43 +0100)
RFC: https://wiki.php.net/rfc/custom_object_serialization

UPGRADING
ext/standard/tests/serialize/__serialize_001.phpt [new file with mode: 0644]
ext/standard/tests/serialize/__serialize_002.phpt [new file with mode: 0644]
ext/standard/tests/serialize/__serialize_003.phpt [new file with mode: 0644]
ext/standard/tests/serialize/__serialize_004.phpt [new file with mode: 0644]
ext/standard/tests/serialize/__serialize_005.phpt [new file with mode: 0644]
ext/standard/var.c
ext/standard/var_unserializer.re

index fc8fa459ce75b49c69bc8c2f0e125ad2a855a374..3344a73042fb490ca33fb2396dbb48ee220a6345 100644 (file)
--- a/UPGRADING
+++ b/UPGRADING
@@ -107,6 +107,7 @@ PHP 7.4 UPGRADE NOTES
     This will enforce that $user->id can only be assigned integer and
     $user->name can only be assigned strings. For more information see the
     RFC: https://wiki.php.net/rfc/typed_properties_v2
+
   . Added support for coalesce assign (??=) operator. For example:
 
         $array['key'] ??= computeDefault();
@@ -156,6 +157,20 @@ PHP 7.4 UPGRADE NOTES
   . strip_tags() now also accepts an array of allowed tags: Instead of
     strip_tags($str, '<a><p>') you can now write strip_tags($str, ['a', 'p']).
 
+  . A new mechanism for custom object serialization has been added, which
+    uses two new magic methods:
+
+        // Returns array containing all the necessary state of the object.
+        public function __serialize(): array;
+
+        // Restores the object state from the given data array.
+        public function __unserialize(array $data): void;
+
+    The new serialization mechanism supersedes the Serializable interface,
+    which will be deprecated in the future.
+
+    RFC: https://wiki.php.net/rfc/custom_object_serialization
+
 ========================================
 3. Changes in SAPI modules
 ========================================
diff --git a/ext/standard/tests/serialize/__serialize_001.phpt b/ext/standard/tests/serialize/__serialize_001.phpt
new file mode 100644 (file)
index 0000000..ddfccfd
--- /dev/null
@@ -0,0 +1,32 @@
+--TEST--
+__serialize() mechanism (001): Basics
+--FILE--
+<?php
+
+class Test {
+    public $prop;
+    public $prop2;
+    public function __serialize() {
+        return ["value" => $this->prop, 42 => $this->prop2];
+    }
+    public function __unserialize(array $data) {
+        $this->prop = $data["value"];
+        $this->prop2 = $data[42];
+    }
+}
+
+$test = new Test;
+$test->prop = "foobar";
+$test->prop2 = "barfoo";
+var_dump($s = serialize($test));
+var_dump(unserialize($s));
+
+?>
+--EXPECT--
+string(58) "O:4:"Test":2:{s:5:"value";s:6:"foobar";i:42;s:6:"barfoo";}"
+object(Test)#2 (2) {
+  ["prop"]=>
+  string(6) "foobar"
+  ["prop2"]=>
+  string(6) "barfoo"
+}
diff --git a/ext/standard/tests/serialize/__serialize_002.phpt b/ext/standard/tests/serialize/__serialize_002.phpt
new file mode 100644 (file)
index 0000000..143ee93
--- /dev/null
@@ -0,0 +1,20 @@
+--TEST--
+__serialize() mechanism (002): TypeError on invalid return type
+--FILE--
+<?php
+
+class Test {
+    public function __serialize() {
+        return $this;
+    }
+}
+
+try {
+    serialize(new Test);
+} catch (TypeError $e) {
+    echo $e->getMessage(), "\n";
+}
+
+?>
+--EXPECT--
+__serialize() must return an array
diff --git a/ext/standard/tests/serialize/__serialize_003.phpt b/ext/standard/tests/serialize/__serialize_003.phpt
new file mode 100644 (file)
index 0000000..13a9bf7
--- /dev/null
@@ -0,0 +1,56 @@
+--TEST--
+__serialize() mechanism (003): Interoperability of different serialization mechanisms
+--FILE--
+<?php
+
+class Test implements Serializable {
+    public function __sleep() {
+        echo "__sleep() called\n";
+    }
+
+    public function __wakeup() {
+        echo "__wakeup() called\n";
+    }
+
+    public function __serialize() {
+        echo "__serialize() called\n";
+        return ["key" => "value"];
+    }
+
+    public function __unserialize(array $data) {
+        echo "__unserialize() called\n";
+        var_dump($data);
+    }
+
+    public function serialize() {
+        echo "serialize() called\n";
+        return "payload";
+    }
+
+    public function unserialize($payload) {
+        echo "unserialize() called\n";
+        var_dump($payload);
+    }
+}
+
+$test = new Test;
+var_dump($s = serialize($test));
+var_dump(unserialize($s));
+
+var_dump(unserialize('C:4:"Test":7:{payload}'));
+
+?>
+--EXPECT--
+__serialize() called
+string(37) "O:4:"Test":1:{s:3:"key";s:5:"value";}"
+__unserialize() called
+array(1) {
+  ["key"]=>
+  string(5) "value"
+}
+object(Test)#2 (0) {
+}
+unserialize() called
+string(7) "payload"
+object(Test)#2 (0) {
+}
diff --git a/ext/standard/tests/serialize/__serialize_004.phpt b/ext/standard/tests/serialize/__serialize_004.phpt
new file mode 100644 (file)
index 0000000..cc55d67
--- /dev/null
@@ -0,0 +1,131 @@
+--TEST--
+__serialize() mechanism (004): Delayed __unserialize() calls
+--FILE--
+<?php
+
+class Wakeup {
+    public $data;
+    public function __construct(array $data) {
+        $this->data = $data;
+    }
+    public function __wakeup() {
+        echo "__wakeup() called\n";
+        var_dump($this->data);
+        $this->woken_up = true;
+    }
+}
+
+class Unserialize {
+    public $data;
+    public function __construct(array $data) {
+        $this->data = $data;
+    }
+    public function __serialize() {
+        return $this->data;
+    }
+    public function __unserialize(array $data) {
+        $this->data = $data;
+        echo "__unserialize() called\n";
+        var_dump($this->data);
+        $this->unserialized = true;
+    }
+}
+
+$obj = new Wakeup([new Unserialize([new Wakeup([new Unserialize([])])])]);
+var_dump($s = serialize($obj));
+var_dump(unserialize($s));
+
+?>
+--EXPECT--
+string(126) "O:6:"Wakeup":1:{s:4:"data";a:1:{i:0;O:11:"Unserialize":1:{i:0;O:6:"Wakeup":1:{s:4:"data";a:1:{i:0;O:11:"Unserialize":0:{}}}}}}"
+__unserialize() called
+array(0) {
+}
+__wakeup() called
+array(1) {
+  [0]=>
+  object(Unserialize)#8 (2) {
+    ["data"]=>
+    array(0) {
+    }
+    ["unserialized"]=>
+    bool(true)
+  }
+}
+__unserialize() called
+array(1) {
+  [0]=>
+  object(Wakeup)#7 (2) {
+    ["data"]=>
+    array(1) {
+      [0]=>
+      object(Unserialize)#8 (2) {
+        ["data"]=>
+        array(0) {
+        }
+        ["unserialized"]=>
+        bool(true)
+      }
+    }
+    ["woken_up"]=>
+    bool(true)
+  }
+}
+__wakeup() called
+array(1) {
+  [0]=>
+  object(Unserialize)#6 (2) {
+    ["data"]=>
+    array(1) {
+      [0]=>
+      object(Wakeup)#7 (2) {
+        ["data"]=>
+        array(1) {
+          [0]=>
+          object(Unserialize)#8 (2) {
+            ["data"]=>
+            array(0) {
+            }
+            ["unserialized"]=>
+            bool(true)
+          }
+        }
+        ["woken_up"]=>
+        bool(true)
+      }
+    }
+    ["unserialized"]=>
+    bool(true)
+  }
+}
+object(Wakeup)#5 (2) {
+  ["data"]=>
+  array(1) {
+    [0]=>
+    object(Unserialize)#6 (2) {
+      ["data"]=>
+      array(1) {
+        [0]=>
+        object(Wakeup)#7 (2) {
+          ["data"]=>
+          array(1) {
+            [0]=>
+            object(Unserialize)#8 (2) {
+              ["data"]=>
+              array(0) {
+              }
+              ["unserialized"]=>
+              bool(true)
+            }
+          }
+          ["woken_up"]=>
+          bool(true)
+        }
+      }
+      ["unserialized"]=>
+      bool(true)
+    }
+  }
+  ["woken_up"]=>
+  bool(true)
+}
diff --git a/ext/standard/tests/serialize/__serialize_005.phpt b/ext/standard/tests/serialize/__serialize_005.phpt
new file mode 100644 (file)
index 0000000..3656a03
--- /dev/null
@@ -0,0 +1,56 @@
+--TEST--
+__serialize() mechanism (005): parent::__unserialize() is safe
+--FILE--
+<?php
+
+class A {
+    private $data;
+    public function __construct(array $data) {
+        $this->data = $data;
+    }
+    public function __serialize() {
+        return $this->data;
+    }
+    public function __unserialize(array $data) {
+        $this->data = $data;
+    }
+}
+
+class B extends A {
+    private $data2;
+    public function __construct(array $data, array $data2) {
+        parent::__construct($data);
+        $this->data2 = $data2;
+    }
+    public function __serialize() {
+        return [$this->data2, parent::__serialize()];
+    }
+    public function __unserialize(array $payload) {
+        [$data2, $data] = $payload;
+        parent::__unserialize($data);
+        $this->data2 = $data2;
+    }
+}
+
+$common = new stdClass;
+$obj = new B([$common], [$common]);
+var_dump($s = serialize($obj));
+var_dump(unserialize($s));
+
+?>
+--EXPECT--
+string(63) "O:1:"B":2:{i:0;a:1:{i:0;O:8:"stdClass":0:{}}i:1;a:1:{i:0;r:3;}}"
+object(B)#3 (2) {
+  ["data2":"B":private]=>
+  array(1) {
+    [0]=>
+    object(stdClass)#4 (0) {
+    }
+  }
+  ["data":"A":private]=>
+  array(1) {
+    [0]=>
+    object(stdClass)#4 (0) {
+    }
+  }
+}
index 66150c72744d1cfd085a697e0e91f75ff6af9551..12834e0fca5d4365e942486999f652c8a3e69788 100644 (file)
@@ -745,6 +745,32 @@ static int php_var_serialize_call_sleep(zval *retval, zval *struc) /* {{{ */
 }
 /* }}} */
 
+static int php_var_serialize_call_magic_serialize(zval *retval, zval *obj) /* {{{ */
+{
+       zval fname;
+       int res;
+
+       ZVAL_STRINGL(&fname, "__serialize", sizeof("__serialize") - 1);
+       BG(serialize_lock)++;
+       res = call_user_function(CG(function_table), obj, &fname, retval, 0, 0);
+       BG(serialize_lock)--;
+       zval_ptr_dtor_str(&fname);
+
+       if (res == FAILURE || Z_ISUNDEF_P(retval)) {
+               zval_ptr_dtor(retval);
+               return FAILURE;
+       }
+
+       if (Z_TYPE_P(retval) != IS_ARRAY) {
+               zval_ptr_dtor(retval);
+               zend_type_error("__serialize() must return an array");
+               return FAILURE;
+       }
+
+       return SUCCESS;
+}
+/* }}} */
+
 static void php_var_serialize_collect_names(HashTable *ht, HashTable *src) /* {{{ */
 {
        zval *val;
@@ -915,6 +941,43 @@ again:
                case IS_OBJECT: {
                                zend_class_entry *ce = Z_OBJCE_P(struc);
 
+                               if (zend_hash_str_exists(&ce->function_table, "__serialize", sizeof("__serialize")-1)) {
+                                       zval retval, obj;
+                                       zend_string *key;
+                                       zval *data;
+                                       zend_ulong index;
+
+                                       ZVAL_COPY(&obj, struc);
+                                       if (php_var_serialize_call_magic_serialize(&retval, &obj) == FAILURE) {
+                                               if (!EG(exception)) {
+                                                       smart_str_appendl(buf, "N;", 2);
+                                               }
+                                               zval_ptr_dtor(&obj);
+                                               return;
+                                       }
+
+                                       php_var_serialize_class_name(buf, &obj);
+                                       smart_str_append_unsigned(buf, zend_array_count(Z_ARRVAL(retval)));
+                                       smart_str_appendl(buf, ":{", 2);
+                                       ZEND_HASH_FOREACH_KEY_VAL_IND(Z_ARRVAL(retval), index, key, data) {
+                                               if (!key) {
+                                                       php_var_serialize_long(buf, index);
+                                               } else {
+                                                       php_var_serialize_string(buf, ZSTR_VAL(key), ZSTR_LEN(key));
+                                               }
+
+                                               if (Z_ISREF_P(data) && Z_REFCOUNT_P(data) == 1) {
+                                                       data = Z_REFVAL_P(data);
+                                               }
+                                               php_var_serialize_intern(buf, data, var_hash);
+                                       } ZEND_HASH_FOREACH_END();
+                                       smart_str_appendc(buf, '}');
+
+                                       zval_ptr_dtor(&obj);
+                                       zval_ptr_dtor(&retval);
+                                       return;
+                               }
+
                                if (ce->serialize != NULL) {
                                        /* has custom handler */
                                        unsigned char *serialized_data = NULL;
index fde0513e49505399094ef375dc450874aac94c62..afb992cca4a5355175d5a20e3e3db2d52cf9aa98 100644 (file)
 #define VAR_DTOR_ENTRIES_MAX 255 /* 256 - offsetof(var_dtor_entries, data) / sizeof(zval) */
 #define VAR_ENTRIES_DBG 0
 
-/* VAR_FLAG used in var_dtor entries to signify an entry on which __wakeup should be called */
+/* VAR_FLAG used in var_dtor entries to signify an entry on which
+ * __wakeup/__unserialize should be called */
 #define VAR_WAKEUP_FLAG 1
+#define VAR_UNSERIALIZE_FLAG 2
 
 typedef struct {
        zend_long used_slots;
@@ -191,9 +193,10 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx)
        zend_long i;
        var_entries *var_hash = (*var_hashx)->entries.next;
        var_dtor_entries *var_dtor_hash = (*var_hashx)->first_dtor;
-       zend_bool wakeup_failed = 0;
-       zval wakeup_name;
+       zend_bool delayed_call_failed = 0;
+       zval wakeup_name, unserialize_name;
        ZVAL_UNDEF(&wakeup_name);
+       ZVAL_UNDEF(&unserialize_name);
 
 #if VAR_ENTRIES_DBG
        fprintf(stderr, "var_destroy(%ld)\n", var_hash?var_hash->used_slots:-1L);
@@ -212,9 +215,9 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx)
                        fprintf(stderr, "var_destroy dtor(%p, %ld)\n", var_dtor_hash->data[i], Z_REFCOUNT_P(var_dtor_hash->data[i]));
 #endif
 
-                       /* Perform delayed __wakeup calls */
                        if (Z_EXTRA_P(zv) == VAR_WAKEUP_FLAG) {
-                               if (!wakeup_failed) {
+                               /* Perform delayed __wakeup calls */
+                               if (!delayed_call_failed) {
                                        zval retval;
                                        if (Z_ISUNDEF(wakeup_name)) {
                                                ZVAL_STRINGL(&wakeup_name, "__wakeup", sizeof("__wakeup") - 1);
@@ -222,11 +225,33 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx)
 
                                        BG(serialize_lock)++;
                                        if (call_user_function(NULL, zv, &wakeup_name, &retval, 0, 0) == FAILURE || Z_ISUNDEF(retval)) {
-                                               wakeup_failed = 1;
+                                               delayed_call_failed = 1;
                                                GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED);
                                        }
                                        BG(serialize_lock)--;
 
+                                       zval_ptr_dtor(&retval);
+                               } else {
+                                       GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED);
+                               }
+                       } else if (Z_EXTRA_P(zv) == VAR_UNSERIALIZE_FLAG) {
+                               /* Perform delayed __unserialize calls */
+                               if (!delayed_call_failed) {
+                                       zval retval, param;
+                                       ZVAL_COPY(&param, &var_dtor_hash->data[i + 1]);
+
+                                       if (Z_ISUNDEF(unserialize_name)) {
+                                               ZVAL_STRINGL(&unserialize_name, "__unserialize", sizeof("__unserialize") - 1);
+                                       }
+
+                                       BG(serialize_lock)++;
+                                       if (call_user_function(CG(function_table), zv, &unserialize_name, &retval, 1, &param) == FAILURE || Z_ISUNDEF(retval)) {
+                                               delayed_call_failed = 1;
+                                               GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED);
+                                       }
+                                       BG(serialize_lock)--;
+
+                                       zval_ptr_dtor(&param);
                                        zval_ptr_dtor(&retval);
                                } else {
                                        GC_ADD_FLAGS(Z_OBJ_P(zv), IS_OBJ_DESTRUCTOR_CALLED);
@@ -241,6 +266,7 @@ PHPAPI void var_destroy(php_unserialize_data_t *var_hashx)
        }
 
        zval_ptr_dtor_nogc(&wakeup_name);
+       zval_ptr_dtor_nogc(&unserialize_name);
 
        if ((*var_hashx)->ref_props) {
                zend_hash_destroy((*var_hashx)->ref_props);
@@ -601,41 +627,38 @@ static inline int object_custom(UNSERIALIZE_PARAMETER, zend_class_entry *ce)
        return 1;
 }
 
-static inline zend_long object_common1(UNSERIALIZE_PARAMETER, zend_class_entry *ce)
-{
-       zend_long elements;
-
-       if( *p >= max - 2) {
-               zend_error(E_WARNING, "Bad unserialize data");
-               return -1;
-       }
-
-       elements = parse_iv2((*p) + 2, p);
-
-       (*p) += 2;
-
-       if (ce->serialize == NULL) {
-               object_init_ex(rval, ce);
-       } else {
-               /* If this class implements Serializable, it should not land here but in object_custom(). The passed string
-               obviously doesn't descend from the regular serializer. */
-               zend_error(E_WARNING, "Erroneous data format for unserializing '%s'", ZSTR_VAL(ce->name));
-               return -1;
-       }
-
-       return elements;
-}
-
 #ifdef PHP_WIN32
 # pragma optimize("", off)
 #endif
-static inline int object_common2(UNSERIALIZE_PARAMETER, zend_long elements)
+static inline int object_common(UNSERIALIZE_PARAMETER, zend_long elements, zend_bool has_unserialize)
 {
        HashTable *ht;
        zend_bool has_wakeup;
 
-       if (Z_TYPE_P(rval) != IS_OBJECT) {
-               return 0;
+       if (has_unserialize) {
+               zval ary, *tmp;
+
+               if (elements >= HT_MAX_SIZE) {
+                       return 0;
+               }
+
+               array_init_size(&ary, elements);
+               if (!process_nested_data(UNSERIALIZE_PASSTHRU, Z_ARRVAL(ary), elements, NULL)) {
+                       ZVAL_DEREF(rval);
+                       GC_ADD_FLAGS(Z_OBJ_P(rval), IS_OBJ_DESTRUCTOR_CALLED);
+                       return 0;
+               }
+
+               /* Delay __unserialize() call until end of serialization. We use two slots here to
+                * store both the object and the unserialized data array. */
+               ZVAL_DEREF(rval);
+               tmp = var_tmp_var(var_hash);
+               ZVAL_COPY(tmp, rval);
+               Z_EXTRA_P(tmp) = VAR_UNSERIALIZE_FLAG;
+               tmp = var_tmp_var(var_hash);
+               ZVAL_COPY_VALUE(tmp, &ary);
+
+               return finish_nested_data(UNSERIALIZE_PASSTHRU);
        }
 
        has_wakeup = Z_OBJCE_P(rval) != PHP_IC_ENTRY
@@ -954,9 +977,9 @@ object ":" uiv ":" ["]      {
        char *str;
        zend_string *class_name;
        zend_class_entry *ce;
-       int incomplete_class = 0;
-
-       int custom_object = 0;
+       zend_bool incomplete_class = 0;
+       zend_bool custom_object = 0;
+       zend_bool has_unserialize = 0;
 
        zval user_func;
        zval retval;
@@ -1085,19 +1108,39 @@ object ":" uiv ":" ["]  {
                return ret;
        }
 
-       elements = object_common1(UNSERIALIZE_PASSTHRU, ce);
+       if (*p >= max - 2) {
+               zend_error(E_WARNING, "Bad unserialize data");
+               zend_string_release_ex(class_name, 0);
+               return 0;
+       }
 
+       elements = parse_iv2(*p + 2, p);
        if (elements < 0) {
-          zend_string_release_ex(class_name, 0);
-          return 0;
+               zend_string_release_ex(class_name, 0);
+               return 0;
+       }
+       *p += 2;
+
+       has_unserialize = !incomplete_class
+               && zend_hash_str_exists(&ce->function_table, "__unserialize", sizeof("__unserialize")-1);
+
+       /* If this class implements Serializable, it should not land here but in object_custom().
+        * The passed string obviously doesn't descend from the regular serializer. However, if
+        * there is both Serializable::unserialize() and __unserialize(), then both may be used,
+        * depending on the serialization format. */
+       if (ce->serialize != NULL && !has_unserialize) {
+               zend_error(E_WARNING, "Erroneous data format for unserializing '%s'", ZSTR_VAL(ce->name));
+               zend_string_release_ex(class_name, 0);
+               return 0;
        }
 
+       object_init_ex(rval, ce);
        if (incomplete_class) {
                php_store_class_name(rval, ZSTR_VAL(class_name), len2);
        }
        zend_string_release_ex(class_name, 0);
 
-       return object_common2(UNSERIALIZE_PASSTHRU, elements);
+       return object_common(UNSERIALIZE_PASSTHRU, elements, has_unserialize);
 }
 
 "}" {