]> granicus.if.org Git - php/commitdiff
Speed up array_intersect/array_diff/array_filter
authorTyson Andre <tysonandre775@hotmail.com>
Sat, 7 Dec 2019 15:09:32 +0000 (10:09 -0500)
committerTyson Andre <tysonandre775@hotmail.com>
Wed, 11 Dec 2019 00:29:49 +0000 (19:29 -0500)
Use zend_hash_update instead of zend_hash_add.

These are taking a subset of keys from an array with unique keys,
so the result should also have unique keys.
(this is already done for array_map())

Also, speed up array_intersect and array_diff slightly by
using ZEND_HASH_FOREACH macros.
This way, it doesn't need to load the same buckets and array counts
from memory every time (compiler previously couldn't infer they won't change)

```php
<?php
// $n=10000 now takes 0.095 seconds instead of 0.102
function test_bench(int $n) {
    $values = range(0,1000);
    $other = range(0,1000);
    unset($other[500]);
    unset($values[400]);

    $total = 0;
    for ($i = 0; $i < $n; $i++) {
        $total += count(array_intersect_key($values, $other));
    }
    return $total;
}
```

ext/standard/array.c

index a96d3a47f657c03e7825cd1500d45c8a5b010427..a47aee5ee373a548c87dd147debd7389925b8acb 100644 (file)
@@ -4681,8 +4681,6 @@ static int zval_user_compare(zval *a, zval *b) /* {{{ */
 
 static void php_array_intersect_key(INTERNAL_FUNCTION_PARAMETERS, int data_compare_type) /* {{{ */
 {
-    uint32_t idx;
-       Bucket *p;
        int argc, i;
        zval *args;
        int (*intersect_data_compare_func)(zval *, zval *) = NULL;
@@ -4690,6 +4688,8 @@ static void php_array_intersect_key(INTERNAL_FUNCTION_PARAMETERS, int data_compa
        zval *val, *data;
        int req_args;
        char *param_spec;
+       zend_string *key;
+       zend_ulong h;
 
        /* Get the argument count */
        argc = ZEND_NUM_ARGS();
@@ -4727,21 +4727,15 @@ static void php_array_intersect_key(INTERNAL_FUNCTION_PARAMETERS, int data_compa
 
        array_init(return_value);
 
-       for (idx = 0; idx < Z_ARRVAL(args[0])->nNumUsed; idx++) {
-               p = Z_ARRVAL(args[0])->arData + idx;
-               val = &p->val;
-               if (Z_TYPE_P(val) == IS_UNDEF) continue;
-               if (UNEXPECTED(Z_TYPE_P(val) == IS_INDIRECT)) {
-                       val = Z_INDIRECT_P(val);
-                       if (Z_TYPE_P(val) == IS_UNDEF) continue;
-               }
+       /* Iterate over keys of the first array (handling possibility of indirects such as in $GLOBALS), to compute keys that are in all of the other arrays. */
+       ZEND_HASH_FOREACH_KEY_VAL_IND(Z_ARRVAL(args[0]), h, key, val) {
                if (Z_ISREF_P(val) && Z_REFCOUNT_P(val) == 1) {
                        val = Z_REFVAL_P(val);
                }
-               if (p->key == NULL) {
+               if (key == NULL) {
                        ok = 1;
                        for (i = 1; i < argc; i++) {
-                               if ((data = zend_hash_index_find(Z_ARRVAL(args[i]), p->h)) == NULL ||
+                               if ((data = zend_hash_index_find(Z_ARRVAL(args[i]), h)) == NULL ||
                                        (intersect_data_compare_func &&
                                        intersect_data_compare_func(val, data) != 0)
                                ) {
@@ -4751,12 +4745,12 @@ static void php_array_intersect_key(INTERNAL_FUNCTION_PARAMETERS, int data_compa
                        }
                        if (ok) {
                                Z_TRY_ADDREF_P(val);
-                               zend_hash_index_update(Z_ARRVAL_P(return_value), p->h, val);
+                               zend_hash_index_add_new(Z_ARRVAL_P(return_value), h, val);
                        }
                } else {
                        ok = 1;
                        for (i = 1; i < argc; i++) {
-                               if ((data = zend_hash_find_ex_ind(Z_ARRVAL(args[i]), p->key, 1)) == NULL ||
+                               if ((data = zend_hash_find_ex_ind(Z_ARRVAL(args[i]), key, 1)) == NULL ||
                                        (intersect_data_compare_func &&
                                        intersect_data_compare_func(val, data) != 0)
                                ) {
@@ -4766,10 +4760,10 @@ static void php_array_intersect_key(INTERNAL_FUNCTION_PARAMETERS, int data_compa
                        }
                        if (ok) {
                                Z_TRY_ADDREF_P(val);
-                               zend_hash_update(Z_ARRVAL_P(return_value), p->key, val);
+                               zend_hash_add_new(Z_ARRVAL_P(return_value), key, val);
                        }
                }
-       }
+       } ZEND_HASH_FOREACH_END();
 }
 /* }}} */
 
@@ -5097,13 +5091,13 @@ PHP_FUNCTION(array_uintersect_uassoc)
 
 static void php_array_diff_key(INTERNAL_FUNCTION_PARAMETERS, int data_compare_type) /* {{{ */
 {
-    uint32_t idx;
-       Bucket *p;
        int argc, i;
        zval *args;
        int (*diff_data_compare_func)(zval *, zval *) = NULL;
        zend_bool ok;
        zval *val, *data;
+       zend_string *key;
+       zend_ulong h;
 
        /* Get the argument count */
        argc = ZEND_NUM_ARGS();
@@ -5138,21 +5132,15 @@ static void php_array_diff_key(INTERNAL_FUNCTION_PARAMETERS, int data_compare_ty
 
        array_init(return_value);
 
-       for (idx = 0; idx < Z_ARRVAL(args[0])->nNumUsed; idx++) {
-               p = Z_ARRVAL(args[0])->arData + idx;
-               val = &p->val;
-               if (Z_TYPE_P(val) == IS_UNDEF) continue;
-               if (UNEXPECTED(Z_TYPE_P(val) == IS_INDIRECT)) {
-                       val = Z_INDIRECT_P(val);
-                       if (Z_TYPE_P(val) == IS_UNDEF) continue;
-               }
+       /* Iterate over keys of the first array (handling possibility of indirects such as in $GLOBALS), to compute keys that aren't in the other arrays. */
+       ZEND_HASH_FOREACH_KEY_VAL_IND(Z_ARRVAL(args[0]), h, key, val) {
                if (Z_ISREF_P(val) && Z_REFCOUNT_P(val) == 1) {
                        val = Z_REFVAL_P(val);
                }
-               if (p->key == NULL) {
+               if (key == NULL) {
                        ok = 1;
                        for (i = 1; i < argc; i++) {
-                               if ((data = zend_hash_index_find(Z_ARRVAL(args[i]), p->h)) != NULL &&
+                               if ((data = zend_hash_index_find(Z_ARRVAL(args[i]), h)) != NULL &&
                                        (!diff_data_compare_func ||
                                        diff_data_compare_func(val, data) == 0)
                                ) {
@@ -5162,12 +5150,12 @@ static void php_array_diff_key(INTERNAL_FUNCTION_PARAMETERS, int data_compare_ty
                        }
                        if (ok) {
                                Z_TRY_ADDREF_P(val);
-                               zend_hash_index_update(Z_ARRVAL_P(return_value), p->h, val);
+                               zend_hash_index_add_new(Z_ARRVAL_P(return_value), h, val);
                        }
                } else {
                        ok = 1;
                        for (i = 1; i < argc; i++) {
-                               if ((data = zend_hash_find_ex_ind(Z_ARRVAL(args[i]), p->key, 1)) != NULL &&
+                               if ((data = zend_hash_find_ex_ind(Z_ARRVAL(args[i]), key, 1)) != NULL &&
                                        (!diff_data_compare_func ||
                                        diff_data_compare_func(val, data) == 0)
                                ) {
@@ -5177,10 +5165,10 @@ static void php_array_diff_key(INTERNAL_FUNCTION_PARAMETERS, int data_compare_ty
                        }
                        if (ok) {
                                Z_TRY_ADDREF_P(val);
-                               zend_hash_update(Z_ARRVAL_P(return_value), p->key, val);
+                               zend_hash_add_new(Z_ARRVAL_P(return_value), key, val);
                        }
                }
-       }
+       } ZEND_HASH_FOREACH_END();
 }
 /* }}} */
 
@@ -6103,11 +6091,12 @@ PHP_FUNCTION(array_filter)
                Z_PARAM_LONG(use_type)
        ZEND_PARSE_PARAMETERS_END();
 
-       array_init(return_value);
        if (zend_hash_num_elements(Z_ARRVAL_P(array)) == 0) {
+               RETVAL_EMPTY_ARRAY();
                zend_release_fcall_info_cache(&fci_cache);
                return;
        }
+       array_init(return_value);
 
        if (ZEND_NUM_ARGS() > 1) {
                have_callback = 1;
@@ -6161,9 +6150,9 @@ PHP_FUNCTION(array_filter)
                }
 
                if (string_key) {
-                       operand = zend_hash_update(Z_ARRVAL_P(return_value), string_key, operand);
+                       operand = zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, operand);
                } else {
-                       operand = zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, operand);
+                       operand = zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, operand);
                }
                zval_add_ref(operand);
        } ZEND_HASH_FOREACH_END();
@@ -6402,7 +6391,11 @@ PHP_FUNCTION(array_chunk)
        num_in = zend_hash_num_elements(Z_ARRVAL_P(input));
 
        if (size > num_in) {
-               size = num_in > 0 ? num_in : 1;
+               if (num_in == 0) {
+                       RETVAL_EMPTY_ARRAY();
+                       return;
+               }
+               size = num_in;
        }
 
        array_init_size(return_value, (uint32_t)(((num_in - 1) / size) + 1));
@@ -6418,9 +6411,9 @@ PHP_FUNCTION(array_chunk)
                /* Add entry to the chunk, preserving keys if necessary. */
                if (preserve_keys) {
                        if (str_key) {
-                               entry = zend_hash_update(Z_ARRVAL(chunk), str_key, entry);
+                               entry = zend_hash_add_new(Z_ARRVAL(chunk), str_key, entry);
                        } else {
-                               entry = zend_hash_index_update(Z_ARRVAL(chunk), num_key, entry);
+                               entry = zend_hash_index_add_new(Z_ARRVAL(chunk), num_key, entry);
                        }
                } else {
                        entry = zend_hash_next_index_insert(Z_ARRVAL(chunk), entry);