From: Leigh Date: Wed, 6 Jul 2016 11:35:13 +0000 (+0100) Subject: Improve array_rand distribution X-Git-Tag: php-7.1.0beta1~43^2~4 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=06607993f69a8e67758c301776b5db9525783498;p=php Improve array_rand distribution --- diff --git a/ext/standard/array.c b/ext/standard/array.c index 2fc3f284ca..2cdb117832 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -79,6 +79,12 @@ #define INTERSECT_COMP_DATA_USER 1 #define INTERSECT_COMP_KEY_INTERNAL 0 #define INTERSECT_COMP_KEY_USER 1 + +// For array_rand even distribution +#define BITFIELD_BYTES(bits) ((bits + 7) >> 3) +#define BITFIELD_SETBIT(field, bit) field[bit >> 3] |= 1 << (bit & 7) +#define BITFIELD_BITSET(field, bit) ((field[bit >> 3] & (1 << (bit & 7))) != 0) + /* }}} */ ZEND_DECLARE_MODULE_GLOBALS(array) @@ -5033,9 +5039,12 @@ PHP_FUNCTION(array_rand) { zval *input; zend_long randval, num_req = 1; - int num_avail; zend_string *string_key; zend_ulong num_key; + int i; + int num_avail; + char *bitfield; + int negative_bitfield = 0; if (zend_parse_parameters(ZEND_NUM_ARGS(), "a|l", &input, &num_req) == FAILURE) { return; @@ -5043,46 +5052,63 @@ PHP_FUNCTION(array_rand) num_avail = zend_hash_num_elements(Z_ARRVAL_P(input)); - if (ZEND_NUM_ARGS() > 1) { - if (num_req <= 0 || num_req > num_avail) { - php_error_docref(NULL, E_WARNING, "Second argument has to be between 1 and the number of elements in the array"); - return; - } - } - - /* Make the return value an array only if we need to pass back more than one result. */ - if (num_req > 1) { - array_init_size(return_value, (uint32_t)num_req); + if (num_avail == 0) { + php_error_docref(NULL, E_WARNING, "Array is empty"); + return; } - /* We can't use zend_hash_index_find() because the array may have string keys or gaps. */ - ZEND_HASH_FOREACH_KEY(Z_ARRVAL_P(input), num_key, string_key) { - if (!num_req) { - break; - } - - randval = php_rand(); + if (num_req == 1) { + randval = php_mt_rand_range(0, num_avail - 1); - if ((double) (randval / PHP_RAND_MAX) <= (double) num_req / (double) num_avail) { - /* If we are returning a single result, just do it. */ - if (Z_TYPE_P(return_value) != IS_ARRAY) { + ZEND_HASH_FOREACH_KEY(Z_ARRVAL_P(input), num_key, string_key) { + if (randval-- == 0) { if (string_key) { RETURN_STR_COPY(string_key); } else { RETURN_LONG(num_key); } + } + } ZEND_HASH_FOREACH_END(); + } + + if (num_req <= 0 || num_req > num_avail) { + php_error_docref(NULL, E_WARNING, "Second argument has to be between 1 and the number of elements in the array"); + return; + } + + /* Make the return value an array only if we need to pass back more than one result. */ + array_init_size(return_value, (uint32_t)num_req); + if (num_req > (num_avail >> 1)) { + negative_bitfield = 1; + num_req = num_avail - num_req; + } + + bitfield = emalloc(BITFIELD_BYTES(num_avail)); + memset(bitfield, 0, BITFIELD_BYTES(num_avail)); + + i = num_req; + while (i) { + randval = php_mt_rand_range(0, num_avail - 1); + if (!BITFIELD_BITSET(bitfield, randval)) { + BITFIELD_SETBIT(bitfield, randval); + i--; + } + } + + /* We can't use zend_hash_index_find() because the array may have string keys or gaps. */ + i = 0; + ZEND_HASH_FOREACH_KEY(Z_ARRVAL_P(input), num_key, string_key) { + if (BITFIELD_BITSET(bitfield, i) ^ negative_bitfield) { + if (string_key) { + add_next_index_str(return_value, zend_string_copy(string_key)); } else { - /* Append the result to the return value. */ - if (string_key) { - add_next_index_str(return_value, zend_string_copy(string_key)); - } else { - add_next_index_long(return_value, num_key); - } + add_next_index_long(return_value, num_key); } - num_req--; } - num_avail--; + i++; } ZEND_HASH_FOREACH_END(); + + efree(bitfield); } /* }}} */ diff --git a/ext/standard/tests/array/array_rand.phpt b/ext/standard/tests/array/array_rand.phpt index 1f495f4b12..c158b0f74b 100644 --- a/ext/standard/tests/array/array_rand.phpt +++ b/ext/standard/tests/array/array_rand.phpt @@ -18,9 +18,11 @@ echo "Done\n"; --EXPECTF-- Warning: array_rand() expects at least 1 parameter, 0 given in %s on line %d NULL + +Warning: array_rand(): Array is empty in %s on line %d NULL -Warning: array_rand(): Second argument has to be between 1 and the number of elements in the array in %s on line %d +Warning: array_rand(): Array is empty in %s on line %d NULL Warning: array_rand() expects parameter 1 to be array, integer given in %s on line %d