]> granicus.if.org Git - php/commitdiff
Optimize array_slice for packed arrays with large offsets
authorTyson Andre <tysonandre775@hotmail.com>
Fri, 25 Oct 2019 13:31:06 +0000 (09:31 -0400)
committerNikita Popov <nikita.ppv@gmail.com>
Mon, 28 Oct 2019 10:30:59 +0000 (11:30 +0100)
If the offset is 100000, and there are no gaps in the packed/unpacked array,
then advance the pointer once by 100000,
instead of looping and skipping 100000 times.

Add a new test of array_slice handling unset offsets.

Closes GH-4860.

UPGRADING
ext/standard/array.c
ext/standard/tests/array/array_slice_variation11.phpt [new file with mode: 0644]

index 41cab54f1e5a2a870b6a458208e6ef91a4be5886..f083d6d245e87890f46214fabf04a814b0631c07 100644 (file)
--- a/UPGRADING
+++ b/UPGRADING
@@ -369,3 +369,6 @@ PHP 8.0 UPGRADE NOTES
 14. Performance Improvements
 ========================================
 
+- array_slice() on a packed array will no longer scan the whole array to find
+  the start offset. This may significantly reduce the runtime of the function
+  with large offsets and small lengths.
index 8fe281da264ebbc73f4cd31fbee416a63a3101dc..9b501861672175548edb1b979b1f242d3ba37783 100644 (file)
@@ -3455,6 +3455,32 @@ PHP_FUNCTION(array_splice)
 }
 /* }}} */
 
+/* {{{ find_bucket_at_offset(HashTable* ht, zend_long offset)
+   Finds the bucket at the given valid offset */
+static inline Bucket* find_bucket_at_offset(HashTable* ht, zend_long offset)
+{
+       zend_long pos;
+       Bucket *bucket;
+       ZEND_ASSERT(offset >= 0 && offset <= ht->nNumOfElements);
+       if (HT_IS_WITHOUT_HOLES(ht)) {
+               /* There's no need to iterate over the array to filter out holes if there are no holes */
+               /* This properly handles both packed and unpacked arrays. */
+               return ht->arData + offset;
+       }
+       /* Otherwise, this code has to iterate over the HashTable and skip holes in the array. */
+       pos = 0;
+       ZEND_HASH_FOREACH_BUCKET(ht, bucket) {
+               if (pos >= offset) {
+                       /* This is the bucket of the array element at the requested offset */
+                       return bucket;
+               }
+               ++pos;
+       } ZEND_HASH_FOREACH_END();
+
+       /* Return a pointer to the end of the bucket array. */
+       return ht->arData + ht->nNumUsed;
+}
+
 /* {{{ proto array array_slice(array input, int offset [, int length [, bool preserve_keys]])
    Returns elements specified by offset and length */
 PHP_FUNCTION(array_slice)
@@ -3466,7 +3492,6 @@ PHP_FUNCTION(array_slice)
        zend_bool length_is_null = 1; /* Whether an explicit length has been omitted */
        zend_bool preserve_keys = 0;  /* Whether to preserve keys while copying to the new array */
        uint32_t num_in;              /* Number of elements in the input array */
-       uint32_t pos;                 /* Current position in the array */
        zend_string *string_key;
        zend_ulong num_key;
 
@@ -3507,50 +3532,61 @@ PHP_FUNCTION(array_slice)
        /* Initialize returned array */
        array_init_size(return_value, (uint32_t)length);
 
-       /* Start at the beginning and go until we hit offset */
-       pos = 0;
-       if (HT_IS_PACKED(Z_ARRVAL_P(input)) &&
-           (!preserve_keys ||
-            (offset == 0 && HT_IS_WITHOUT_HOLES(Z_ARRVAL_P(input))))) {
-               zend_hash_real_init_packed(Z_ARRVAL_P(return_value));
-               ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
-                       ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(input), entry) {
-                               pos++;
-                               if (pos <= offset) {
+       // Contains modified variants of ZEND_HASH_FOREACH_VAL
+       {
+               HashTable *ht = Z_ARRVAL_P(input);
+               Bucket *p = find_bucket_at_offset(ht, offset);
+               Bucket *end = ht->arData + ht->nNumUsed;
+
+               /* Start at the beginning and go until we hit offset */
+               if (HT_IS_PACKED(Z_ARRVAL_P(input)) &&
+                       (!preserve_keys ||
+                        (offset == 0 && HT_IS_WITHOUT_HOLES(Z_ARRVAL_P(input))))) {
+
+                       zend_hash_real_init_packed(Z_ARRVAL_P(return_value));
+                       ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
+                               for (; p != end; p++) {
+                                       if (__fill_idx >= length) {
+                                               break;
+                                       }
+                                       entry = &p->val;
+                                       if (UNEXPECTED(Z_TYPE_P(entry) == IS_UNDEF)) {
+                                               continue;
+                                       }
+                                       if (UNEXPECTED(Z_ISREF_P(entry)) &&
+                                               UNEXPECTED(Z_REFCOUNT_P(entry) == 1)) {
+                                               entry = Z_REFVAL_P(entry);
+                                       }
+                                       Z_TRY_ADDREF_P(entry);
+                                       ZEND_HASH_FILL_ADD(entry);
+                               }
+                       } ZEND_HASH_FILL_END();
+               } else {
+                       zend_long n = 0;  /* Current number of elements */
+                       for (; p != end; p++) {
+                               entry = &p->val;
+                               if (UNEXPECTED(Z_TYPE_P(entry) == IS_UNDEF)) {
                                        continue;
                                }
-                               if (pos > offset + length) {
+                               if (n >= length) {
                                        break;
                                }
-                               if (UNEXPECTED(Z_ISREF_P(entry)) &&
-                                       UNEXPECTED(Z_REFCOUNT_P(entry) == 1)) {
-                                       entry = Z_REFVAL_P(entry);
-                               }
-                               Z_TRY_ADDREF_P(entry);
-                               ZEND_HASH_FILL_ADD(entry);
-                       } ZEND_HASH_FOREACH_END();
-               } ZEND_HASH_FILL_END();
-       } else {
-               ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
-                       pos++;
-                       if (pos <= offset) {
-                               continue;
-                       }
-                       if (pos > offset + length) {
-                               break;
-                       }
+                               n++;
+                               num_key = p->h;
+                               string_key = p->key;
 
-                       if (string_key) {
-                               entry = zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, entry);
-                       } else {
-                               if (preserve_keys) {
-                                       entry = zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, entry);
+                               if (string_key) {
+                                       entry = zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, entry);
                                } else {
-                                       entry = zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), entry);
+                                       if (preserve_keys) {
+                                               entry = zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, entry);
+                                       } else {
+                                               entry = zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), entry);
+                                       }
                                }
+                               zval_add_ref(entry);
                        }
-                       zval_add_ref(entry);
-               } ZEND_HASH_FOREACH_END();
+               }
        }
 }
 /* }}} */
diff --git a/ext/standard/tests/array/array_slice_variation11.phpt b/ext/standard/tests/array/array_slice_variation11.phpt
new file mode 100644 (file)
index 0000000..aaffcd4
--- /dev/null
@@ -0,0 +1,105 @@
+--TEST--
+Test array_slice() function : usage variations - array has holes in buckets
+--FILE--
+<?php
+/* Prototype  : array array_slice(array $input, int $offset [, int $length [, bool $preserve_keys]])
+ * Description: Returns elements specified by offset and length
+ * Source code: ext/standard/array.c
+ */
+
+/*
+ * Check that results of array_slice are correct when there are holes in buckets caused by unset()
+ */
+
+echo "*** Testing array_slice() : usage variations ***\n";
+
+function dump_slice(array $input, $offsetToUnset, int $offset, int $length) {
+    unset($input[$offsetToUnset]);
+    var_dump(array_slice($input, $offset, $length));
+}
+
+echo "\n-- Call array_slice() on array with string keys--\n";
+$input = ['one' => 'un', 'two' => 'deux', 23 => 'twenty-three', 'zero'];
+dump_slice($input, 'two', 0, 1);
+dump_slice($input, 'two', 0, 2);
+dump_slice($input, 'two', 0, 3);
+dump_slice($input, 23, 1, 2);
+
+echo "\n-- Call array_slice() on array with packed keys--\n";
+$input = [10, 11, 12, 'thirteen'];
+dump_slice($input, 0, 0, 1);
+dump_slice($input, 1, 0, 1);
+dump_slice($input, 1, 0, 3);
+dump_slice($input, 1, -1, 1);
+dump_slice($input, 1, 0, 3);
+dump_slice($input, 1, -3, 3);
+
+echo "Done";
+?>
+--EXPECT--
+*** Testing array_slice() : usage variations ***
+
+-- Call array_slice() on array with string keys--
+array(1) {
+  ["one"]=>
+  string(2) "un"
+}
+array(2) {
+  ["one"]=>
+  string(2) "un"
+  [0]=>
+  string(12) "twenty-three"
+}
+array(3) {
+  ["one"]=>
+  string(2) "un"
+  [0]=>
+  string(12) "twenty-three"
+  [1]=>
+  string(4) "zero"
+}
+array(2) {
+  ["two"]=>
+  string(4) "deux"
+  [0]=>
+  string(4) "zero"
+}
+
+-- Call array_slice() on array with packed keys--
+array(1) {
+  [0]=>
+  int(11)
+}
+array(1) {
+  [0]=>
+  int(10)
+}
+array(3) {
+  [0]=>
+  int(10)
+  [1]=>
+  int(12)
+  [2]=>
+  string(8) "thirteen"
+}
+array(1) {
+  [0]=>
+  string(8) "thirteen"
+}
+array(3) {
+  [0]=>
+  int(10)
+  [1]=>
+  int(12)
+  [2]=>
+  string(8) "thirteen"
+}
+array(3) {
+  [0]=>
+  int(10)
+  [1]=>
+  int(12)
+  [2]=>
+  string(8) "thirteen"
+}
+Done