From 22d3eb3117b1b5c484152c9fec5a74b0e95024ea Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 8 Apr 2014 23:24:52 +0200 Subject: [PATCH] Add zend_hash_splice This implements the original functionality of php_splice, but as an in-place operation, thus avoiding copying the HT. This is much faster (~10x) if the splice removes a small portion of the array and doesn't insert many elements. --- Zend/zend_hash.c | 116 ++++++++++++++++++++------ Zend/zend_hash.h | 4 + ext/standard/array.c | 171 +++++++-------------------------------- ext/standard/php_array.h | 2 +- 4 files changed, 125 insertions(+), 168 deletions(-) diff --git a/Zend/zend_hash.c b/Zend/zend_hash.c index d41ad43969..a33e8726e2 100644 --- a/Zend/zend_hash.c +++ b/Zend/zend_hash.c @@ -29,18 +29,24 @@ (element)->pNext->pLast = (element); \ } -#define CONNECT_TO_GLOBAL_DLLIST(element, ht) \ - (element)->pListLast = (ht)->pListTail; \ - (ht)->pListTail = (element); \ - (element)->pListNext = NULL; \ - if ((element)->pListLast != NULL) { \ - (element)->pListLast->pListNext = (element); \ - } \ - if (!(ht)->pListHead) { \ +#define CONNECT_TO_GLOBAL_DLLIST_EX(element, ht, last, next)\ + (element)->pListLast = (last); \ + (element)->pListNext = (next); \ + if ((last) != NULL) { \ + (last)->pListNext = (element); \ + } else { \ (ht)->pListHead = (element); \ } \ - if ((ht)->pInternalPointer == NULL) { \ - (ht)->pInternalPointer = (element); \ + if ((next) != NULL) { \ + (next)->pListLast = (element); \ + } else { \ + (ht)->pListTail = (element); \ + } \ + +#define CONNECT_TO_GLOBAL_DLLIST(element, ht) \ + CONNECT_TO_GLOBAL_DLLIST_EX(element, ht, (ht)->pListTail, (Bucket *) NULL); \ + if ((ht)->pInternalPointer == NULL) { \ + (ht)->pInternalPointer = (element); \ } #if ZEND_DEBUG @@ -122,13 +128,13 @@ ZEND_API ulong zend_hash_func(const char *arKey, uint nKeyLength) memcpy((p)->pData, pData, nDataSize); \ } -#define INIT_DATA(ht, p, pData, nDataSize); \ +#define INIT_DATA(ht, p, _pData, nDataSize); \ if (nDataSize == sizeof(void*)) { \ - memcpy(&(p)->pDataPtr, pData, sizeof(void *)); \ + memcpy(&(p)->pDataPtr, (_pData), sizeof(void *)); \ (p)->pData = &(p)->pDataPtr; \ } else { \ (p)->pData = (void *) pemalloc_rel(nDataSize, (ht)->persistent);\ - memcpy((p)->pData, pData, nDataSize); \ + memcpy((p)->pData, (_pData), nDataSize); \ (p)->pDataPtr=NULL; \ } @@ -1246,21 +1252,12 @@ ZEND_API int zend_hash_update_current_key_ex(HashTable *ht, int key_type, const q->pData = p->pData; } q->pDataPtr = p->pDataPtr; - q->pListNext = p->pListNext; - q->pListLast = p->pListLast; - if (q->pListNext) { - p->pListNext->pListLast = q; - } else { - ht->pListTail = q; - } - if (q->pListLast) { - p->pListLast->pListNext = q; - } else { - ht->pListHead = q; - } + + CONNECT_TO_GLOBAL_DLLIST_EX(q, ht, p->pListLast, p->pListNext); if (ht->pInternalPointer == p) { ht->pInternalPointer = q; } + if (pos) { *pos = q; } @@ -1291,6 +1288,75 @@ ZEND_API int zend_hash_update_current_key_ex(HashTable *ht, int key_type, const } } +/* Performs an in-place splice operation on a hashtable: + * The elements between offset and offset+length are removed and the elements in list[list_count] + * are inserted in their place. The removed elements can be optionally collected into a hashtable. + * This operation reindexes the hashtable, i.e. integer keys will be zero-based and sequential, + * while string keys stay intact. The same applies to the elements inserted into the removed HT. */ +ZEND_API void _zend_hash_splice(HashTable *ht, uint nDataSize, copy_ctor_func_t pCopyConstructor, uint offset, uint length, void **list, uint list_count, HashTable *removed ZEND_FILE_LINE_DC) /* {{{ */ +{ + int pos; + Bucket *p; + + IS_CONSISTENT(ht); + CHECK_INIT(ht); + + /* Skip all elements until offset */ + for (pos = 0, p = ht->pListHead; pos < offset && p; pos++, p = p->pListNext); + + while (pos < offset + length && p) { + /* Copy removed element into HT, if it was specified */ + if (removed != NULL) { + void *new_entry; + + if (p->nKeyLength == 0) { + zend_hash_next_index_insert(removed, p->pData, sizeof(zval *), &new_entry); + } else { + zend_hash_quick_update(removed, p->arKey, p->nKeyLength, p->h, p->pData, sizeof(zval *), &new_entry); + } + + if (pCopyConstructor) { + pCopyConstructor(new_entry); + } + } + + /* Remove element */ + { + Bucket *p_next = p->pListNext; + zend_hash_bucket_delete(ht, p); + p = p_next; + } + + pos++; + } + + if (list != NULL) { + int i; + for (i = 0; i < list_count; i++) { + /* Add new element only to the global linked list, not the bucket list. + * Also use key 0 for everything, as we'll reindex the hashtable anyways. */ + Bucket *q = pemalloc_rel(sizeof(Bucket), ht->persistent); + q->arKey = NULL; + q->nKeyLength = 0; + q->h = 0; + INIT_DATA(ht, q, list[i], nDataSize); + + CONNECT_TO_GLOBAL_DLLIST_EX(q, ht, p ? p->pListLast : ht->pListTail, p); + + ht->nNumOfElements++; + + if (pCopyConstructor) { + pCopyConstructor(q->pData); + } + } + + ZEND_HASH_IF_FULL_DO_RESIZE(ht); + } + + zend_hash_reindex(ht); +} +/* }}} */ + ZEND_API int zend_hash_sort(HashTable *ht, sort_func_t sort_func, compare_func_t compar, int renumber TSRMLS_DC) { diff --git a/Zend/zend_hash.h b/Zend/zend_hash.h index eb9a53769e..b07efc84ec 100644 --- a/Zend/zend_hash.h +++ b/Zend/zend_hash.h @@ -229,6 +229,10 @@ ZEND_API int zend_hash_num_elements(const HashTable *ht); ZEND_API int zend_hash_rehash(HashTable *ht); ZEND_API void zend_hash_reindex(HashTable *ht); +ZEND_API void _zend_hash_splice(HashTable *ht, uint nDataSize, copy_ctor_func_t pCopyConstructor, uint offset, uint length, void **list, uint list_count, HashTable *removed ZEND_FILE_LINE_DC); +#define zend_hash_splice(ht, nDataSize, pCopyConstructor, offset, length, list, list_count, removed) \ + _zend_hash_splice(ht, nDataSize, pCopyConstructor, offset, length, list, list_count, removed ZEND_FILE_LINE_CC) + /* * DJBX33A (Daniel J. Bernstein, Times 33 with Addition) * diff --git a/ext/standard/array.c b/ext/standard/array.c index 90ef8e47b8..a46b97b93d 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -1815,93 +1815,15 @@ PHP_FUNCTION(shuffle) } /* }}} */ -PHPAPI HashTable* php_splice(HashTable *in_hash, int offset, int length, zval ***list, int list_count, HashTable **removed) /* {{{ */ +PHPAPI void php_splice(HashTable *ht, zend_uint offset, zend_uint length, zval ***list, zend_uint list_count, HashTable *removed TSRMLS_DC) /* {{{ */ { - HashTable *out_hash = NULL; /* Output hashtable */ - int num_in, /* Number of entries in the input hashtable */ - pos, /* Current position in the hashtable */ - i; /* Loop counter */ - Bucket *p; /* Pointer to hash bucket */ - zval *entry; /* Hash entry */ + zend_hash_splice(ht, sizeof(zval *), (copy_ctor_func_t) zval_add_ref, offset, length, (void **) list, list_count, removed); - /* If input hash doesn't exist, we have nothing to do */ - if (!in_hash) { - return NULL; - } - - /* Get number of entries in the input hash */ - num_in = zend_hash_num_elements(in_hash); - - /* Clamp the offset.. */ - if (offset > num_in) { - offset = num_in; - } else if (offset < 0 && (offset = (num_in + offset)) < 0) { - offset = 0; - } - - /* ..and the length */ - if (length < 0) { - length = num_in - offset + length; - } else if (((unsigned)offset + (unsigned)length) > (unsigned)num_in) { - length = num_in - offset; - } - - /* Create and initialize output hash */ - ALLOC_HASHTABLE(out_hash); - zend_hash_init(out_hash, (length > 0 ? num_in - length : 0) + list_count, NULL, ZVAL_PTR_DTOR, 0); - - /* Start at the beginning of the input hash and copy entries to output hash until offset is reached */ - for (pos = 0, p = in_hash->pListHead; pos < offset && p ; pos++, p = p->pListNext) { - /* Get entry and increase reference count */ - entry = *((zval **)p->pData); - Z_ADDREF_P(entry); - - /* Update output hash depending on key type */ - if (p->nKeyLength == 0) { - zend_hash_next_index_insert(out_hash, &entry, sizeof(zval *), NULL); - } else { - zend_hash_quick_update(out_hash, p->arKey, p->nKeyLength, p->h, &entry, sizeof(zval *), NULL); - } - } - - /* If hash for removed entries exists, go until offset+length and copy the entries to it */ - if (removed != NULL) { - for ( ; pos < offset + length && p; pos++, p = p->pListNext) { - entry = *((zval **)p->pData); - Z_ADDREF_P(entry); - if (p->nKeyLength == 0) { - zend_hash_next_index_insert(*removed, &entry, sizeof(zval *), NULL); - } else { - zend_hash_quick_update(*removed, p->arKey, p->nKeyLength, p->h, &entry, sizeof(zval *), NULL); - } - } - } else { /* otherwise just skip those entries */ - for ( ; pos < offset + length && p; pos++, p = p->pListNext); - } + zend_hash_internal_pointer_reset(ht); - /* If there are entries to insert.. */ - if (list != NULL) { - /* ..for each one, create a new zval, copy entry into it and copy it into the output hash */ - for (i = 0; i < list_count; i++) { - entry = *list[i]; - Z_ADDREF_P(entry); - zend_hash_next_index_insert(out_hash, &entry, sizeof(zval *), NULL); - } - } - - /* Copy the remaining input hash entries to the output hash */ - for ( ; p ; p = p->pListNext) { - entry = *((zval **)p->pData); - Z_ADDREF_P(entry); - if (p->nKeyLength == 0) { - zend_hash_next_index_insert(out_hash, &entry, sizeof(zval *), NULL); - } else { - zend_hash_quick_update(out_hash, p->arKey, p->nKeyLength, p->h, &entry, sizeof(zval *), NULL); - } + if (ht == &EG(symbol_table)) { + zend_reset_all_cv(&EG(symbol_table) TSRMLS_CC); } - - zend_hash_internal_pointer_reset(out_hash); - return out_hash; } /* }}} */ @@ -2006,24 +1928,14 @@ PHP_FUNCTION(array_unshift) { zval ***args, /* Function arguments array */ *stack; /* Input stack */ - HashTable *new_hash; /* New hashtable for the stack */ - HashTable old_hash; int argc; /* Number of function arguments */ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "a+", &stack, &args, &argc) == FAILURE) { return; } - /* Use splice to insert the elements at the beginning. Destroy old - * hashtable and replace it with new one */ - new_hash = php_splice(Z_ARRVAL_P(stack), 0, 0, &args[0], argc, NULL); - old_hash = *Z_ARRVAL_P(stack); - if (Z_ARRVAL_P(stack) == &EG(symbol_table)) { - zend_reset_all_cv(&EG(symbol_table) TSRMLS_CC); - } - *Z_ARRVAL_P(stack) = *new_hash; - FREE_HASHTABLE(new_hash); - zend_hash_destroy(&old_hash); + /* Use splice to insert the elements at the beginning. */ + php_splice(Z_ARRVAL_P(stack), 0, 0, args, argc, NULL TSRMLS_CC); /* Clean up and return the number of elements in the stack */ efree(args); @@ -2038,9 +1950,7 @@ PHP_FUNCTION(array_splice) zval *array, /* Input array */ *repl_array = NULL, /* Replacement array */ ***repl = NULL; /* Replacement elements */ - HashTable *new_hash = NULL, /* Output array's hash */ - **rem_hash = NULL; /* Removed elements' hash */ - HashTable old_hash; + HashTable *rem_hash = NULL; /* Removed elements' hash */ Bucket *p; /* Bucket used for traversing hash */ long i, offset, @@ -2058,7 +1968,7 @@ PHP_FUNCTION(array_splice) length = num_in; } - if (ZEND_NUM_ARGS() == 4) { + if (repl_array) { /* Make sure the last argument, if passed, is an array */ convert_to_array(repl_array); @@ -2070,44 +1980,32 @@ PHP_FUNCTION(array_splice) } } + /* Clamp the offset */ + if (offset < 0 && (offset = num_in + offset) < 0) { + offset = 0; + } else if (offset > num_in) { + offset = num_in; + } + + /* Clamp the length */ + if (length < 0 && (length = num_in - offset + length) < 0) { + length = 0; + } else if ((unsigned long) offset + (unsigned long) length > (unsigned) num_in) { + length = num_in - offset; + } + /* Don't create the array of removed elements if it's not going * to be used; e.g. only removing and/or replacing elements */ if (return_value_used) { - int size = length; - - /* Clamp the offset.. */ - if (offset > num_in) { - offset = num_in; - } else if (offset < 0 && (offset = (num_in + offset)) < 0) { - offset = 0; - } - - /* ..and the length */ - if (length < 0) { - size = num_in - offset + length; - } else if (((unsigned long) offset + (unsigned long) length) > (unsigned) num_in) { - size = num_in - offset; - } - - /* Initialize return value */ - array_init_size(return_value, size > 0 ? size : 0); - rem_hash = &Z_ARRVAL_P(return_value); + array_init_size(return_value, length); + rem_hash = Z_ARRVAL_P(return_value); } /* Perform splice */ - new_hash = php_splice(Z_ARRVAL_P(array), offset, length, repl, repl_num, rem_hash); - - /* Replace input array's hashtable with the new one */ - old_hash = *Z_ARRVAL_P(array); - if (Z_ARRVAL_P(array) == &EG(symbol_table)) { - zend_reset_all_cv(&EG(symbol_table) TSRMLS_CC); - } - *Z_ARRVAL_P(array) = *new_hash; - FREE_HASHTABLE(new_hash); - zend_hash_destroy(&old_hash); + php_splice(Z_ARRVAL_P(array), offset, length, repl, repl_num, rem_hash TSRMLS_CC); /* Clean up */ - if (ZEND_NUM_ARGS() == 4) { + if (repl) { efree(repl); } } @@ -2669,8 +2567,6 @@ PHP_FUNCTION(array_pad) zval *input; /* Input array */ zval *pad_value; /* Padding value obviously */ zval ***pads; /* Array to pass to splice */ - HashTable *new_hash;/* Return value from splice */ - HashTable old_hash; long pad_size; /* Size to pad to */ long pad_size_abs; /* Absolute value of pad_size */ int input_size; /* Size of the input array */ @@ -2714,19 +2610,10 @@ PHP_FUNCTION(array_pad) /* Pad on the right or on the left */ if (pad_size > 0) { - new_hash = php_splice(Z_ARRVAL_P(return_value), input_size, 0, pads, num_pads, NULL); + php_splice(Z_ARRVAL_P(return_value), input_size, 0, pads, num_pads, NULL TSRMLS_CC); } else { - new_hash = php_splice(Z_ARRVAL_P(return_value), 0, 0, pads, num_pads, NULL); - } - - /* Copy the result hash into return value */ - old_hash = *Z_ARRVAL_P(return_value); - if (Z_ARRVAL_P(return_value) == &EG(symbol_table)) { - zend_reset_all_cv(&EG(symbol_table) TSRMLS_CC); + php_splice(Z_ARRVAL_P(return_value), 0, 0, pads, num_pads, NULL TSRMLS_CC); } - *Z_ARRVAL_P(return_value) = *new_hash; - FREE_HASHTABLE(new_hash); - zend_hash_destroy(&old_hash); /* Clean up */ efree(pads); diff --git a/ext/standard/php_array.h b/ext/standard/php_array.h index c4389dd92a..deddeb4871 100644 --- a/ext/standard/php_array.h +++ b/ext/standard/php_array.h @@ -103,7 +103,7 @@ PHP_FUNCTION(array_key_exists); PHP_FUNCTION(array_chunk); PHP_FUNCTION(array_combine); -PHPAPI HashTable* php_splice(HashTable *, int, int, zval ***, int, HashTable **); +PHPAPI void php_splice(HashTable *ht, zend_uint offset, zend_uint length, zval ***list, zend_uint list_count, HashTable *removed TSRMLS_DC); PHPAPI int php_array_merge(HashTable *dest, HashTable *src, int recursive TSRMLS_DC); PHPAPI int php_array_replace_recursive(HashTable *dest, HashTable *src TSRMLS_DC); PHPAPI int php_multisort_compare(const void *a, const void *b TSRMLS_DC); -- 2.40.0