From: Alvaro Herrera Date: Mon, 11 Mar 2019 16:17:50 +0000 (-0300) Subject: Move hash_any prototype from access/hash.h to utils/hashutils.h X-Git-Tag: REL_12_BETA1~571 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=af38498d4c9b840e0e454574519459edda3871db;p=postgresql Move hash_any prototype from access/hash.h to utils/hashutils.h ... as well as its implementation from backend/access/hash/hashfunc.c to backend/utils/hash/hashfn.c. access/hash is the place for the hash index AM, not really appropriate for generic facilities, which is what hash_any is; having things the old way meant that anything using hash_any had to include the AM's include file, pointlessly polluting its namespace with unrelated, unnecessary cruft. Also move the HTEqual strategy number to access/stratnum.h from access/hash.h. To avoid breaking third-party extension code, add an #include "utils/hashutils.h" to access/hash.h. (An easily removed line by committers who enjoy their asbestos suits to protect them from angry extension authors.) Discussion: https://postgr.es/m/201901251935.ser5e4h6djt2@alvherre.pgsql --- diff --git a/contrib/citext/citext.c b/contrib/citext/citext.c index 24ceeb11fc..a4adafe895 100644 --- a/contrib/citext/citext.c +++ b/contrib/citext/citext.c @@ -3,10 +3,10 @@ */ #include "postgres.h" -#include "access/hash.h" #include "catalog/pg_collation.h" #include "utils/builtins.h" #include "utils/formatting.h" +#include "utils/hashutils.h" #include "utils/varlena.h" PG_MODULE_MAGIC; diff --git a/contrib/hstore/hstore_op.c b/contrib/hstore/hstore_op.c index b852fb5a8a..87f1aef3a3 100644 --- a/contrib/hstore/hstore_op.c +++ b/contrib/hstore/hstore_op.c @@ -3,11 +3,11 @@ */ #include "postgres.h" -#include "access/hash.h" #include "access/htup_details.h" #include "catalog/pg_type.h" #include "funcapi.h" #include "utils/builtins.h" +#include "utils/hashutils.h" #include "utils/memutils.h" #include "hstore.h" diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 7b39283c89..16b8074a00 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -61,7 +61,6 @@ #include #include -#include "access/hash.h" #include "catalog/pg_authid.h" #include "executor/instrument.h" #include "funcapi.h" @@ -78,6 +77,7 @@ #include "tcop/utility.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/hashutils.h" #include "utils/memutils.h" PG_MODULE_MAGIC; diff --git a/contrib/sepgsql/uavc.c b/contrib/sepgsql/uavc.c index fce6e98785..7d94c31e56 100644 --- a/contrib/sepgsql/uavc.c +++ b/contrib/sepgsql/uavc.c @@ -12,11 +12,11 @@ */ #include "postgres.h" -#include "access/hash.h" #include "catalog/pg_proc.h" #include "commands/seclabel.h" #include "storage/ipc.h" #include "utils/guc.h" +#include "utils/hashutils.h" #include "utils/memutils.h" #include "sepgsql.h" diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index 832c3e9af6..0158950a43 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -19,7 +19,6 @@ #include "postgres.h" -#include "access/hash.h" #include "access/htup_details.h" #include "access/tupdesc_details.h" #include "catalog/pg_collation.h" diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 63005ddc4d..e5f3d42e04 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -28,6 +28,7 @@ #include "access/hash.h" #include "utils/builtins.h" +#include "utils/hashutils.h" /* * Datatype-specific hash functions. @@ -307,629 +308,3 @@ hashvarlenaextended(PG_FUNCTION_ARGS) return result; } - -/* - * This hash function was written by Bob Jenkins - * (bob_jenkins@burtleburtle.net), and superficially adapted - * for PostgreSQL by Neil Conway. For more information on this - * hash function, see http://burtleburtle.net/bob/hash/doobs.html, - * or Bob's article in Dr. Dobb's Journal, Sept. 1997. - * - * In the current code, we have adopted Bob's 2006 update of his hash - * function to fetch the data a word at a time when it is suitably aligned. - * This makes for a useful speedup, at the cost of having to maintain - * four code paths (aligned vs unaligned, and little-endian vs big-endian). - * It also uses two separate mixing functions mix() and final(), instead - * of a slower multi-purpose function. - */ - -/* Get a bit mask of the bits set in non-uint32 aligned addresses */ -#define UINT32_ALIGN_MASK (sizeof(uint32) - 1) - -/* Rotate a uint32 value left by k bits - note multiple evaluation! */ -#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) - -/*---------- - * mix -- mix 3 32-bit values reversibly. - * - * This is reversible, so any information in (a,b,c) before mix() is - * still in (a,b,c) after mix(). - * - * If four pairs of (a,b,c) inputs are run through mix(), or through - * mix() in reverse, there are at least 32 bits of the output that - * are sometimes the same for one pair and different for another pair. - * This was tested for: - * * pairs that differed by one bit, by two bits, in any combination - * of top bits of (a,b,c), or in any combination of bottom bits of - * (a,b,c). - * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed - * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as - * is commonly produced by subtraction) look like a single 1-bit - * difference. - * * the base values were pseudorandom, all zero but one bit set, or - * all zero plus a counter that starts at zero. - * - * This does not achieve avalanche. There are input bits of (a,b,c) - * that fail to affect some output bits of (a,b,c), especially of a. The - * most thoroughly mixed value is c, but it doesn't really even achieve - * avalanche in c. - * - * This allows some parallelism. Read-after-writes are good at doubling - * the number of bits affected, so the goal of mixing pulls in the opposite - * direction from the goal of parallelism. I did what I could. Rotates - * seem to cost as much as shifts on every machine I could lay my hands on, - * and rotates are much kinder to the top and bottom bits, so I used rotates. - *---------- - */ -#define mix(a,b,c) \ -{ \ - a -= c; a ^= rot(c, 4); c += b; \ - b -= a; b ^= rot(a, 6); a += c; \ - c -= b; c ^= rot(b, 8); b += a; \ - a -= c; a ^= rot(c,16); c += b; \ - b -= a; b ^= rot(a,19); a += c; \ - c -= b; c ^= rot(b, 4); b += a; \ -} - -/*---------- - * final -- final mixing of 3 32-bit values (a,b,c) into c - * - * Pairs of (a,b,c) values differing in only a few bits will usually - * produce values of c that look totally different. This was tested for - * * pairs that differed by one bit, by two bits, in any combination - * of top bits of (a,b,c), or in any combination of bottom bits of - * (a,b,c). - * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed - * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as - * is commonly produced by subtraction) look like a single 1-bit - * difference. - * * the base values were pseudorandom, all zero but one bit set, or - * all zero plus a counter that starts at zero. - * - * The use of separate functions for mix() and final() allow for a - * substantial performance increase since final() does not need to - * do well in reverse, but is does need to affect all output bits. - * mix(), on the other hand, does not need to affect all output - * bits (affecting 32 bits is enough). The original hash function had - * a single mixing operation that had to satisfy both sets of requirements - * and was slower as a result. - *---------- - */ -#define final(a,b,c) \ -{ \ - c ^= b; c -= rot(b,14); \ - a ^= c; a -= rot(c,11); \ - b ^= a; b -= rot(a,25); \ - c ^= b; c -= rot(b,16); \ - a ^= c; a -= rot(c, 4); \ - b ^= a; b -= rot(a,14); \ - c ^= b; c -= rot(b,24); \ -} - -/* - * hash_any() -- hash a variable-length key into a 32-bit value - * k : the key (the unaligned variable-length array of bytes) - * len : the length of the key, counting by bytes - * - * Returns a uint32 value. Every bit of the key affects every bit of - * the return value. Every 1-bit and 2-bit delta achieves avalanche. - * About 6*len+35 instructions. The best hash table sizes are powers - * of 2. There is no need to do mod a prime (mod is sooo slow!). - * If you need less than 32 bits, use a bitmask. - * - * This procedure must never throw elog(ERROR); the ResourceOwner code - * relies on this not to fail. - * - * Note: we could easily change this function to return a 64-bit hash value - * by using the final values of both b and c. b is perhaps a little less - * well mixed than c, however. - */ -Datum -hash_any(register const unsigned char *k, register int keylen) -{ - register uint32 a, - b, - c, - len; - - /* Set up the internal state */ - len = keylen; - a = b = c = 0x9e3779b9 + len + 3923095; - - /* If the source pointer is word-aligned, we use word-wide fetches */ - if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0) - { - /* Code path for aligned source data */ - register const uint32 *ka = (const uint32 *) k; - - /* handle most of the key */ - while (len >= 12) - { - a += ka[0]; - b += ka[1]; - c += ka[2]; - mix(a, b, c); - ka += 3; - len -= 12; - } - - /* handle the last 11 bytes */ - k = (const unsigned char *) ka; -#ifdef WORDS_BIGENDIAN - switch (len) - { - case 11: - c += ((uint32) k[10] << 8); - /* fall through */ - case 10: - c += ((uint32) k[9] << 16); - /* fall through */ - case 9: - c += ((uint32) k[8] << 24); - /* fall through */ - case 8: - /* the lowest byte of c is reserved for the length */ - b += ka[1]; - a += ka[0]; - break; - case 7: - b += ((uint32) k[6] << 8); - /* fall through */ - case 6: - b += ((uint32) k[5] << 16); - /* fall through */ - case 5: - b += ((uint32) k[4] << 24); - /* fall through */ - case 4: - a += ka[0]; - break; - case 3: - a += ((uint32) k[2] << 8); - /* fall through */ - case 2: - a += ((uint32) k[1] << 16); - /* fall through */ - case 1: - a += ((uint32) k[0] << 24); - /* case 0: nothing left to add */ - } -#else /* !WORDS_BIGENDIAN */ - switch (len) - { - case 11: - c += ((uint32) k[10] << 24); - /* fall through */ - case 10: - c += ((uint32) k[9] << 16); - /* fall through */ - case 9: - c += ((uint32) k[8] << 8); - /* fall through */ - case 8: - /* the lowest byte of c is reserved for the length */ - b += ka[1]; - a += ka[0]; - break; - case 7: - b += ((uint32) k[6] << 16); - /* fall through */ - case 6: - b += ((uint32) k[5] << 8); - /* fall through */ - case 5: - b += k[4]; - /* fall through */ - case 4: - a += ka[0]; - break; - case 3: - a += ((uint32) k[2] << 16); - /* fall through */ - case 2: - a += ((uint32) k[1] << 8); - /* fall through */ - case 1: - a += k[0]; - /* case 0: nothing left to add */ - } -#endif /* WORDS_BIGENDIAN */ - } - else - { - /* Code path for non-aligned source data */ - - /* handle most of the key */ - while (len >= 12) - { -#ifdef WORDS_BIGENDIAN - a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24)); - b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24)); - c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24)); -#else /* !WORDS_BIGENDIAN */ - a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); - b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); - c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); -#endif /* WORDS_BIGENDIAN */ - mix(a, b, c); - k += 12; - len -= 12; - } - - /* handle the last 11 bytes */ -#ifdef WORDS_BIGENDIAN - switch (len) - { - case 11: - c += ((uint32) k[10] << 8); - /* fall through */ - case 10: - c += ((uint32) k[9] << 16); - /* fall through */ - case 9: - c += ((uint32) k[8] << 24); - /* fall through */ - case 8: - /* the lowest byte of c is reserved for the length */ - b += k[7]; - /* fall through */ - case 7: - b += ((uint32) k[6] << 8); - /* fall through */ - case 6: - b += ((uint32) k[5] << 16); - /* fall through */ - case 5: - b += ((uint32) k[4] << 24); - /* fall through */ - case 4: - a += k[3]; - /* fall through */ - case 3: - a += ((uint32) k[2] << 8); - /* fall through */ - case 2: - a += ((uint32) k[1] << 16); - /* fall through */ - case 1: - a += ((uint32) k[0] << 24); - /* case 0: nothing left to add */ - } -#else /* !WORDS_BIGENDIAN */ - switch (len) - { - case 11: - c += ((uint32) k[10] << 24); - /* fall through */ - case 10: - c += ((uint32) k[9] << 16); - /* fall through */ - case 9: - c += ((uint32) k[8] << 8); - /* fall through */ - case 8: - /* the lowest byte of c is reserved for the length */ - b += ((uint32) k[7] << 24); - /* fall through */ - case 7: - b += ((uint32) k[6] << 16); - /* fall through */ - case 6: - b += ((uint32) k[5] << 8); - /* fall through */ - case 5: - b += k[4]; - /* fall through */ - case 4: - a += ((uint32) k[3] << 24); - /* fall through */ - case 3: - a += ((uint32) k[2] << 16); - /* fall through */ - case 2: - a += ((uint32) k[1] << 8); - /* fall through */ - case 1: - a += k[0]; - /* case 0: nothing left to add */ - } -#endif /* WORDS_BIGENDIAN */ - } - - final(a, b, c); - - /* report the result */ - return UInt32GetDatum(c); -} - -/* - * hash_any_extended() -- hash into a 64-bit value, using an optional seed - * k : the key (the unaligned variable-length array of bytes) - * len : the length of the key, counting by bytes - * seed : a 64-bit seed (0 means no seed) - * - * Returns a uint64 value. Otherwise similar to hash_any. - */ -Datum -hash_any_extended(register const unsigned char *k, register int keylen, - uint64 seed) -{ - register uint32 a, - b, - c, - len; - - /* Set up the internal state */ - len = keylen; - a = b = c = 0x9e3779b9 + len + 3923095; - - /* If the seed is non-zero, use it to perturb the internal state. */ - if (seed != 0) - { - /* - * In essence, the seed is treated as part of the data being hashed, - * but for simplicity, we pretend that it's padded with four bytes of - * zeroes so that the seed constitutes a 12-byte chunk. - */ - a += (uint32) (seed >> 32); - b += (uint32) seed; - mix(a, b, c); - } - - /* If the source pointer is word-aligned, we use word-wide fetches */ - if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0) - { - /* Code path for aligned source data */ - register const uint32 *ka = (const uint32 *) k; - - /* handle most of the key */ - while (len >= 12) - { - a += ka[0]; - b += ka[1]; - c += ka[2]; - mix(a, b, c); - ka += 3; - len -= 12; - } - - /* handle the last 11 bytes */ - k = (const unsigned char *) ka; -#ifdef WORDS_BIGENDIAN - switch (len) - { - case 11: - c += ((uint32) k[10] << 8); - /* fall through */ - case 10: - c += ((uint32) k[9] << 16); - /* fall through */ - case 9: - c += ((uint32) k[8] << 24); - /* fall through */ - case 8: - /* the lowest byte of c is reserved for the length */ - b += ka[1]; - a += ka[0]; - break; - case 7: - b += ((uint32) k[6] << 8); - /* fall through */ - case 6: - b += ((uint32) k[5] << 16); - /* fall through */ - case 5: - b += ((uint32) k[4] << 24); - /* fall through */ - case 4: - a += ka[0]; - break; - case 3: - a += ((uint32) k[2] << 8); - /* fall through */ - case 2: - a += ((uint32) k[1] << 16); - /* fall through */ - case 1: - a += ((uint32) k[0] << 24); - /* case 0: nothing left to add */ - } -#else /* !WORDS_BIGENDIAN */ - switch (len) - { - case 11: - c += ((uint32) k[10] << 24); - /* fall through */ - case 10: - c += ((uint32) k[9] << 16); - /* fall through */ - case 9: - c += ((uint32) k[8] << 8); - /* fall through */ - case 8: - /* the lowest byte of c is reserved for the length */ - b += ka[1]; - a += ka[0]; - break; - case 7: - b += ((uint32) k[6] << 16); - /* fall through */ - case 6: - b += ((uint32) k[5] << 8); - /* fall through */ - case 5: - b += k[4]; - /* fall through */ - case 4: - a += ka[0]; - break; - case 3: - a += ((uint32) k[2] << 16); - /* fall through */ - case 2: - a += ((uint32) k[1] << 8); - /* fall through */ - case 1: - a += k[0]; - /* case 0: nothing left to add */ - } -#endif /* WORDS_BIGENDIAN */ - } - else - { - /* Code path for non-aligned source data */ - - /* handle most of the key */ - while (len >= 12) - { -#ifdef WORDS_BIGENDIAN - a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24)); - b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24)); - c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24)); -#else /* !WORDS_BIGENDIAN */ - a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); - b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); - c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); -#endif /* WORDS_BIGENDIAN */ - mix(a, b, c); - k += 12; - len -= 12; - } - - /* handle the last 11 bytes */ -#ifdef WORDS_BIGENDIAN - switch (len) - { - case 11: - c += ((uint32) k[10] << 8); - /* fall through */ - case 10: - c += ((uint32) k[9] << 16); - /* fall through */ - case 9: - c += ((uint32) k[8] << 24); - /* fall through */ - case 8: - /* the lowest byte of c is reserved for the length */ - b += k[7]; - /* fall through */ - case 7: - b += ((uint32) k[6] << 8); - /* fall through */ - case 6: - b += ((uint32) k[5] << 16); - /* fall through */ - case 5: - b += ((uint32) k[4] << 24); - /* fall through */ - case 4: - a += k[3]; - /* fall through */ - case 3: - a += ((uint32) k[2] << 8); - /* fall through */ - case 2: - a += ((uint32) k[1] << 16); - /* fall through */ - case 1: - a += ((uint32) k[0] << 24); - /* case 0: nothing left to add */ - } -#else /* !WORDS_BIGENDIAN */ - switch (len) - { - case 11: - c += ((uint32) k[10] << 24); - /* fall through */ - case 10: - c += ((uint32) k[9] << 16); - /* fall through */ - case 9: - c += ((uint32) k[8] << 8); - /* fall through */ - case 8: - /* the lowest byte of c is reserved for the length */ - b += ((uint32) k[7] << 24); - /* fall through */ - case 7: - b += ((uint32) k[6] << 16); - /* fall through */ - case 6: - b += ((uint32) k[5] << 8); - /* fall through */ - case 5: - b += k[4]; - /* fall through */ - case 4: - a += ((uint32) k[3] << 24); - /* fall through */ - case 3: - a += ((uint32) k[2] << 16); - /* fall through */ - case 2: - a += ((uint32) k[1] << 8); - /* fall through */ - case 1: - a += k[0]; - /* case 0: nothing left to add */ - } -#endif /* WORDS_BIGENDIAN */ - } - - final(a, b, c); - - /* report the result */ - PG_RETURN_UINT64(((uint64) b << 32) | c); -} - -/* - * hash_uint32() -- hash a 32-bit value to a 32-bit value - * - * This has the same result as - * hash_any(&k, sizeof(uint32)) - * but is faster and doesn't force the caller to store k into memory. - */ -Datum -hash_uint32(uint32 k) -{ - register uint32 a, - b, - c; - - a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095; - a += k; - - final(a, b, c); - - /* report the result */ - return UInt32GetDatum(c); -} - -/* - * hash_uint32_extended() -- hash a 32-bit value to a 64-bit value, with a seed - * - * Like hash_uint32, this is a convenience function. - */ -Datum -hash_uint32_extended(uint32 k, uint64 seed) -{ - register uint32 a, - b, - c; - - a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095; - - if (seed != 0) - { - a += (uint32) (seed >> 32); - b += (uint32) seed; - mix(a, b, c); - } - - a += k; - - final(a, b, c); - - /* report the result */ - PG_RETURN_UINT64(((uint64) b << 32) | c); -} diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c index 9360b5bbc9..42d373ab83 100644 --- a/src/backend/access/tablesample/bernoulli.c +++ b/src/backend/access/tablesample/bernoulli.c @@ -26,11 +26,11 @@ #include -#include "access/hash.h" #include "access/tsmapi.h" #include "catalog/pg_type.h" #include "optimizer/optimizer.h" #include "utils/builtins.h" +#include "utils/hashutils.h" /* Private state */ diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c index 298e0ab4a0..fb1a563424 100644 --- a/src/backend/access/tablesample/system.c +++ b/src/backend/access/tablesample/system.c @@ -26,13 +26,13 @@ #include -#include "access/hash.h" #include "access/heapam.h" #include "access/relscan.h" #include "access/tsmapi.h" #include "catalog/pg_type.h" #include "optimizer/optimizer.h" #include "utils/builtins.h" +#include "utils/hashutils.h" /* Private state */ diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index 96f9275072..a994d7bb6d 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -18,7 +18,6 @@ #include "miscadmin.h" #include "access/genam.h" -#include "access/hash.h" #include "access/heapam.h" #include "access/htup_details.h" #include "access/xact.h" diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c index eb9a093ea0..4d48be0b92 100644 --- a/src/backend/commands/publicationcmds.c +++ b/src/backend/commands/publicationcmds.c @@ -18,7 +18,6 @@ #include "miscadmin.h" #include "access/genam.h" -#include "access/hash.h" #include "access/htup_details.h" #include "access/table.h" #include "access/xact.h" diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index a9d80e692b..417e971ec8 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -18,7 +18,6 @@ */ #include "postgres.h" -#include "access/hash.h" #include "access/parallel.h" #include "executor/executor.h" #include "miscadmin.h" diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c index da4a65fd30..65ad959641 100644 --- a/src/backend/executor/nodeSamplescan.c +++ b/src/backend/executor/nodeSamplescan.c @@ -14,7 +14,6 @@ */ #include "postgres.h" -#include "access/hash.h" #include "access/heapam.h" #include "access/relscan.h" #include "access/tsmapi.h" @@ -22,6 +21,7 @@ #include "executor/nodeSamplescan.h" #include "miscadmin.h" #include "pgstat.h" +#include "storage/bufmgr.h" #include "storage/predicate.h" #include "utils/builtins.h" #include "utils/rel.h" diff --git a/src/backend/lib/bloomfilter.c b/src/backend/lib/bloomfilter.c index e2c1276f21..d82420bd68 100644 --- a/src/backend/lib/bloomfilter.c +++ b/src/backend/lib/bloomfilter.c @@ -35,9 +35,9 @@ #include -#include "access/hash.h" #include "lib/bloomfilter.h" #include "port/pg_bitutils.h" +#include "utils/hashutils.h" #define MAX_HASH_FUNCS 10 diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 54f8567c01..07e6f03a87 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -20,9 +20,10 @@ */ #include "postgres.h" -#include "access/hash.h" +#include "nodes/bitmapset.h" #include "nodes/pg_list.h" #include "port/pg_bitutils.h" +#include "utils/hashutils.h" #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) diff --git a/src/backend/storage/file/sharedfileset.c b/src/backend/storage/file/sharedfileset.c index 9d923027ad..e922431a4a 100644 --- a/src/backend/storage/file/sharedfileset.c +++ b/src/backend/storage/file/sharedfileset.c @@ -18,13 +18,15 @@ #include "postgres.h" -#include "access/hash.h" +#include + #include "catalog/pg_tablespace.h" #include "commands/tablespace.h" #include "miscadmin.h" #include "storage/dsm.h" #include "storage/sharedfileset.h" #include "utils/builtins.h" +#include "utils/hashutils.h" static void SharedFileSetOnDetach(dsm_segment *segment, Datum datum); static void SharedFileSetPath(char *path, SharedFileSet *fileset, Oid tablespace); diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c index 28495a1e2d..530f1ccddb 100644 --- a/src/backend/tsearch/ts_typanalyze.c +++ b/src/backend/tsearch/ts_typanalyze.c @@ -13,12 +13,12 @@ */ #include "postgres.h" -#include "access/hash.h" #include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "commands/vacuum.h" #include "tsearch/ts_type.h" #include "utils/builtins.h" +#include "utils/hashutils.h" /* A hash key for lexemes */ diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c index 967b033d20..83c35a4d8b 100644 --- a/src/backend/utils/adt/acl.c +++ b/src/backend/utils/adt/acl.c @@ -16,7 +16,6 @@ #include -#include "access/hash.h" #include "access/htup_details.h" #include "catalog/catalog.h" #include "catalog/namespace.h" @@ -33,6 +32,7 @@ #include "utils/acl.h" #include "utils/builtins.h" #include "utils/catcache.h" +#include "utils/hashutils.h" #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 5b2917d159..a34605ac94 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -17,7 +17,6 @@ #include #include -#include "access/hash.h" #include "access/htup_details.h" #include "catalog/pg_type.h" #include "funcapi.h" diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index cf5a1c6039..1ff3cfea8b 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -20,7 +20,6 @@ #include #include -#include "access/hash.h" #include "access/xact.h" #include "libpq/pqformat.h" #include "miscadmin.h" @@ -30,6 +29,7 @@ #include "utils/builtins.h" #include "utils/date.h" #include "utils/datetime.h" +#include "utils/hashutils.h" #include "utils/sortsupport.h" /* diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c index a7f73b6960..bae5287f70 100644 --- a/src/backend/utils/adt/jsonb_gin.c +++ b/src/backend/utils/adt/jsonb_gin.c @@ -14,11 +14,11 @@ #include "postgres.h" #include "access/gin.h" -#include "access/hash.h" #include "access/stratnum.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" #include "utils/builtins.h" +#include "utils/hashutils.h" #include "utils/jsonb.h" #include "utils/varlena.h" diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c index 6695363a4b..84796a11eb 100644 --- a/src/backend/utils/adt/jsonb_util.c +++ b/src/backend/utils/adt/jsonb_util.c @@ -13,10 +13,10 @@ */ #include "postgres.h" -#include "access/hash.h" #include "catalog/pg_collation.h" #include "miscadmin.h" #include "utils/builtins.h" +#include "utils/hashutils.h" #include "utils/jsonb.h" #include "utils/memutils.h" #include "utils/varlena.h" diff --git a/src/backend/utils/adt/mac.c b/src/backend/utils/adt/mac.c index a65c1129ac..3bfeb75fa2 100644 --- a/src/backend/utils/adt/mac.c +++ b/src/backend/utils/adt/mac.c @@ -13,12 +13,12 @@ #include "postgres.h" -#include "access/hash.h" #include "lib/hyperloglog.h" #include "libpq/pqformat.h" #include "port/pg_bswap.h" #include "utils/builtins.h" #include "utils/guc.h" +#include "utils/hashutils.h" #include "utils/inet.h" #include "utils/sortsupport.h" diff --git a/src/backend/utils/adt/mac8.c b/src/backend/utils/adt/mac8.c index 867e90ca4e..0b1fe4978e 100644 --- a/src/backend/utils/adt/mac8.c +++ b/src/backend/utils/adt/mac8.c @@ -21,9 +21,9 @@ #include "postgres.h" -#include "access/hash.h" #include "libpq/pqformat.h" #include "utils/builtins.h" +#include "utils/hashutils.h" #include "utils/inet.h" /* diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c index 7f3ca7f930..db9dfcbb7d 100644 --- a/src/backend/utils/adt/network.c +++ b/src/backend/utils/adt/network.c @@ -12,7 +12,7 @@ #include #include -#include "access/hash.h" +#include "access/stratnum.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_type.h" #include "common/ip.h" @@ -24,6 +24,7 @@ #include "nodes/supportnodes.h" #include "utils/builtins.h" #include "utils/fmgroids.h" +#include "utils/hashutils.h" #include "utils/inet.h" #include "utils/lsyscache.h" diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 1c9deebc1d..0765f2cdb5 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -26,7 +26,6 @@ #include #include -#include "access/hash.h" #include "catalog/pg_type.h" #include "common/int.h" #include "funcapi.h" @@ -39,6 +38,7 @@ #include "utils/builtins.h" #include "utils/float.h" #include "utils/guc.h" +#include "utils/hashutils.h" #include "utils/int8.h" #include "utils/numeric.h" #include "utils/sortsupport.h" diff --git a/src/backend/utils/adt/pg_lsn.c b/src/backend/utils/adt/pg_lsn.c index 2880e33b65..7242d3cfed 100644 --- a/src/backend/utils/adt/pg_lsn.c +++ b/src/backend/utils/adt/pg_lsn.c @@ -13,7 +13,6 @@ */ #include "postgres.h" -#include "access/hash.h" #include "funcapi.h" #include "libpq/pqformat.h" #include "utils/builtins.h" diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c index c171c7db28..72c450c70e 100644 --- a/src/backend/utils/adt/rangetypes.c +++ b/src/backend/utils/adt/rangetypes.c @@ -30,12 +30,13 @@ */ #include "postgres.h" -#include "access/hash.h" +#include "access/tupmacs.h" #include "lib/stringinfo.h" #include "libpq/pqformat.h" #include "miscadmin.h" #include "utils/builtins.h" #include "utils/date.h" +#include "utils/hashutils.h" #include "utils/int8.h" #include "utils/lsyscache.h" #include "utils/rangetypes.h" diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c index f5ffd12cfc..8c62771261 100644 --- a/src/backend/utils/adt/tid.c +++ b/src/backend/utils/adt/tid.c @@ -20,7 +20,6 @@ #include #include -#include "access/hash.h" #include "access/heapam.h" #include "access/sysattr.h" #include "catalog/namespace.h" @@ -30,6 +29,7 @@ #include "parser/parsetree.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/hashutils.h" #include "utils/rel.h" #include "utils/snapmgr.h" #include "utils/varlena.h" diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 1b0effa924..e5ac371fa0 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -20,7 +20,6 @@ #include #include -#include "access/hash.h" #include "access/xact.h" #include "catalog/pg_type.h" #include "common/int128.h" diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c index ca98429d79..09aa6b6daa 100644 --- a/src/backend/utils/adt/uuid.c +++ b/src/backend/utils/adt/uuid.c @@ -13,12 +13,12 @@ #include "postgres.h" -#include "access/hash.h" #include "lib/hyperloglog.h" #include "libpq/pqformat.h" #include "port/pg_bswap.h" #include "utils/builtins.h" #include "utils/guc.h" +#include "utils/hashutils.h" #include "utils/sortsupport.h" #include "utils/uuid.h" diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index c866af022f..440fc8ed66 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -14,8 +14,6 @@ */ #include "postgres.h" - -#include "access/hash.h" #include "access/tuptoaster.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" @@ -24,6 +22,7 @@ #include "nodes/supportnodes.h" #include "utils/array.h" #include "utils/builtins.h" +#include "utils/hashutils.h" #include "utils/varlena.h" #include "mb/pg_wchar.h" diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 693ccc5149..39c394331b 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -17,7 +17,6 @@ #include #include -#include "access/hash.h" #include "access/tuptoaster.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" @@ -30,6 +29,7 @@ #include "regex/regex.h" #include "utils/builtins.h" #include "utils/bytea.h" +#include "utils/hashutils.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_locale.h" diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 78dd5714fa..07e1cd7696 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -15,7 +15,6 @@ #include "postgres.h" #include "access/genam.h" -#include "access/hash.h" #include "access/relscan.h" #include "access/sysattr.h" #include "access/table.h" diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index d9ffb78484..6b28c243bd 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -30,7 +30,6 @@ #include #include -#include "access/hash.h" #include "access/htup_details.h" #include "access/multixact.h" #include "access/nbtree.h" diff --git a/src/backend/utils/hash/hashfn.c b/src/backend/utils/hash/hashfn.c index de754146e5..9f5e2925de 100644 --- a/src/backend/utils/hash/hashfn.c +++ b/src/backend/utils/hash/hashfn.c @@ -1,7 +1,8 @@ /*------------------------------------------------------------------------- * * hashfn.c - * Hash functions for use in dynahash.c hashtables + * Generic hashing functions, and hash functions for use in dynahash.c + * hashtables * * * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group @@ -21,10 +22,637 @@ */ #include "postgres.h" -#include "access/hash.h" +#include "fmgr.h" +#include "nodes/bitmapset.h" +#include "utils/hashutils.h" #include "utils/hsearch.h" +/* + * This hash function was written by Bob Jenkins + * (bob_jenkins@burtleburtle.net), and superficially adapted + * for PostgreSQL by Neil Conway. For more information on this + * hash function, see http://burtleburtle.net/bob/hash/doobs.html, + * or Bob's article in Dr. Dobb's Journal, Sept. 1997. + * + * In the current code, we have adopted Bob's 2006 update of his hash + * function to fetch the data a word at a time when it is suitably aligned. + * This makes for a useful speedup, at the cost of having to maintain + * four code paths (aligned vs unaligned, and little-endian vs big-endian). + * It also uses two separate mixing functions mix() and final(), instead + * of a slower multi-purpose function. + */ + +/* Get a bit mask of the bits set in non-uint32 aligned addresses */ +#define UINT32_ALIGN_MASK (sizeof(uint32) - 1) + +/* Rotate a uint32 value left by k bits - note multiple evaluation! */ +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/*---------- + * mix -- mix 3 32-bit values reversibly. + * + * This is reversible, so any information in (a,b,c) before mix() is + * still in (a,b,c) after mix(). + * + * If four pairs of (a,b,c) inputs are run through mix(), or through + * mix() in reverse, there are at least 32 bits of the output that + * are sometimes the same for one pair and different for another pair. + * This was tested for: + * * pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * * the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * This does not achieve avalanche. There are input bits of (a,b,c) + * that fail to affect some output bits of (a,b,c), especially of a. The + * most thoroughly mixed value is c, but it doesn't really even achieve + * avalanche in c. + * + * This allows some parallelism. Read-after-writes are good at doubling + * the number of bits affected, so the goal of mixing pulls in the opposite + * direction from the goal of parallelism. I did what I could. Rotates + * seem to cost as much as shifts on every machine I could lay my hands on, + * and rotates are much kinder to the top and bottom bits, so I used rotates. + *---------- + */ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/*---------- + * final -- final mixing of 3 32-bit values (a,b,c) into c + * + * Pairs of (a,b,c) values differing in only a few bits will usually + * produce values of c that look totally different. This was tested for + * * pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * * the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * The use of separate functions for mix() and final() allow for a + * substantial performance increase since final() does not need to + * do well in reverse, but is does need to affect all output bits. + * mix(), on the other hand, does not need to affect all output + * bits (affecting 32 bits is enough). The original hash function had + * a single mixing operation that had to satisfy both sets of requirements + * and was slower as a result. + *---------- + */ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c, 4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* + * hash_any() -- hash a variable-length key into a 32-bit value + * k : the key (the unaligned variable-length array of bytes) + * len : the length of the key, counting by bytes + * + * Returns a uint32 value. Every bit of the key affects every bit of + * the return value. Every 1-bit and 2-bit delta achieves avalanche. + * About 6*len+35 instructions. The best hash table sizes are powers + * of 2. There is no need to do mod a prime (mod is sooo slow!). + * If you need less than 32 bits, use a bitmask. + * + * This procedure must never throw elog(ERROR); the ResourceOwner code + * relies on this not to fail. + * + * Note: we could easily change this function to return a 64-bit hash value + * by using the final values of both b and c. b is perhaps a little less + * well mixed than c, however. + */ +Datum +hash_any(register const unsigned char *k, register int keylen) +{ + register uint32 a, + b, + c, + len; + + /* Set up the internal state */ + len = keylen; + a = b = c = 0x9e3779b9 + len + 3923095; + + /* If the source pointer is word-aligned, we use word-wide fetches */ + if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0) + { + /* Code path for aligned source data */ + register const uint32 *ka = (const uint32 *) k; + + /* handle most of the key */ + while (len >= 12) + { + a += ka[0]; + b += ka[1]; + c += ka[2]; + mix(a, b, c); + ka += 3; + len -= 12; + } + + /* handle the last 11 bytes */ + k = (const unsigned char *) ka; +#ifdef WORDS_BIGENDIAN + switch (len) + { + case 11: + c += ((uint32) k[10] << 8); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 24); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 8); + /* fall through */ + case 6: + b += ((uint32) k[5] << 16); + /* fall through */ + case 5: + b += ((uint32) k[4] << 24); + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 8); + /* fall through */ + case 2: + a += ((uint32) k[1] << 16); + /* fall through */ + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) + { + case 11: + c += ((uint32) k[10] << 24); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 8); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 16); + /* fall through */ + case 6: + b += ((uint32) k[5] << 8); + /* fall through */ + case 5: + b += k[4]; + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 16); + /* fall through */ + case 2: + a += ((uint32) k[1] << 8); + /* fall through */ + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + else + { + /* Code path for non-aligned source data */ + + /* handle most of the key */ + while (len >= 12) + { +#ifdef WORDS_BIGENDIAN + a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24)); + b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24)); + c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24)); +#else /* !WORDS_BIGENDIAN */ + a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); + b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); + c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); +#endif /* WORDS_BIGENDIAN */ + mix(a, b, c); + k += 12; + len -= 12; + } + + /* handle the last 11 bytes */ +#ifdef WORDS_BIGENDIAN + switch (len) + { + case 11: + c += ((uint32) k[10] << 8); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 24); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += k[7]; + /* fall through */ + case 7: + b += ((uint32) k[6] << 8); + /* fall through */ + case 6: + b += ((uint32) k[5] << 16); + /* fall through */ + case 5: + b += ((uint32) k[4] << 24); + /* fall through */ + case 4: + a += k[3]; + /* fall through */ + case 3: + a += ((uint32) k[2] << 8); + /* fall through */ + case 2: + a += ((uint32) k[1] << 16); + /* fall through */ + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) + { + case 11: + c += ((uint32) k[10] << 24); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 8); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ((uint32) k[7] << 24); + /* fall through */ + case 7: + b += ((uint32) k[6] << 16); + /* fall through */ + case 6: + b += ((uint32) k[5] << 8); + /* fall through */ + case 5: + b += k[4]; + /* fall through */ + case 4: + a += ((uint32) k[3] << 24); + /* fall through */ + case 3: + a += ((uint32) k[2] << 16); + /* fall through */ + case 2: + a += ((uint32) k[1] << 8); + /* fall through */ + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + + final(a, b, c); + + /* report the result */ + return UInt32GetDatum(c); +} + +/* + * hash_any_extended() -- hash into a 64-bit value, using an optional seed + * k : the key (the unaligned variable-length array of bytes) + * len : the length of the key, counting by bytes + * seed : a 64-bit seed (0 means no seed) + * + * Returns a uint64 value. Otherwise similar to hash_any. + */ +Datum +hash_any_extended(register const unsigned char *k, register int keylen, + uint64 seed) +{ + register uint32 a, + b, + c, + len; + + /* Set up the internal state */ + len = keylen; + a = b = c = 0x9e3779b9 + len + 3923095; + + /* If the seed is non-zero, use it to perturb the internal state. */ + if (seed != 0) + { + /* + * In essence, the seed is treated as part of the data being hashed, + * but for simplicity, we pretend that it's padded with four bytes of + * zeroes so that the seed constitutes a 12-byte chunk. + */ + a += (uint32) (seed >> 32); + b += (uint32) seed; + mix(a, b, c); + } + + /* If the source pointer is word-aligned, we use word-wide fetches */ + if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0) + { + /* Code path for aligned source data */ + register const uint32 *ka = (const uint32 *) k; + + /* handle most of the key */ + while (len >= 12) + { + a += ka[0]; + b += ka[1]; + c += ka[2]; + mix(a, b, c); + ka += 3; + len -= 12; + } + + /* handle the last 11 bytes */ + k = (const unsigned char *) ka; +#ifdef WORDS_BIGENDIAN + switch (len) + { + case 11: + c += ((uint32) k[10] << 8); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 24); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 8); + /* fall through */ + case 6: + b += ((uint32) k[5] << 16); + /* fall through */ + case 5: + b += ((uint32) k[4] << 24); + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 8); + /* fall through */ + case 2: + a += ((uint32) k[1] << 16); + /* fall through */ + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) + { + case 11: + c += ((uint32) k[10] << 24); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 8); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ka[1]; + a += ka[0]; + break; + case 7: + b += ((uint32) k[6] << 16); + /* fall through */ + case 6: + b += ((uint32) k[5] << 8); + /* fall through */ + case 5: + b += k[4]; + /* fall through */ + case 4: + a += ka[0]; + break; + case 3: + a += ((uint32) k[2] << 16); + /* fall through */ + case 2: + a += ((uint32) k[1] << 8); + /* fall through */ + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + else + { + /* Code path for non-aligned source data */ + + /* handle most of the key */ + while (len >= 12) + { +#ifdef WORDS_BIGENDIAN + a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24)); + b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24)); + c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24)); +#else /* !WORDS_BIGENDIAN */ + a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24)); + b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24)); + c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24)); +#endif /* WORDS_BIGENDIAN */ + mix(a, b, c); + k += 12; + len -= 12; + } + + /* handle the last 11 bytes */ +#ifdef WORDS_BIGENDIAN + switch (len) + { + case 11: + c += ((uint32) k[10] << 8); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 24); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += k[7]; + /* fall through */ + case 7: + b += ((uint32) k[6] << 8); + /* fall through */ + case 6: + b += ((uint32) k[5] << 16); + /* fall through */ + case 5: + b += ((uint32) k[4] << 24); + /* fall through */ + case 4: + a += k[3]; + /* fall through */ + case 3: + a += ((uint32) k[2] << 8); + /* fall through */ + case 2: + a += ((uint32) k[1] << 16); + /* fall through */ + case 1: + a += ((uint32) k[0] << 24); + /* case 0: nothing left to add */ + } +#else /* !WORDS_BIGENDIAN */ + switch (len) + { + case 11: + c += ((uint32) k[10] << 24); + /* fall through */ + case 10: + c += ((uint32) k[9] << 16); + /* fall through */ + case 9: + c += ((uint32) k[8] << 8); + /* fall through */ + case 8: + /* the lowest byte of c is reserved for the length */ + b += ((uint32) k[7] << 24); + /* fall through */ + case 7: + b += ((uint32) k[6] << 16); + /* fall through */ + case 6: + b += ((uint32) k[5] << 8); + /* fall through */ + case 5: + b += k[4]; + /* fall through */ + case 4: + a += ((uint32) k[3] << 24); + /* fall through */ + case 3: + a += ((uint32) k[2] << 16); + /* fall through */ + case 2: + a += ((uint32) k[1] << 8); + /* fall through */ + case 1: + a += k[0]; + /* case 0: nothing left to add */ + } +#endif /* WORDS_BIGENDIAN */ + } + + final(a, b, c); + + /* report the result */ + PG_RETURN_UINT64(((uint64) b << 32) | c); +} + +/* + * hash_uint32() -- hash a 32-bit value to a 32-bit value + * + * This has the same result as + * hash_any(&k, sizeof(uint32)) + * but is faster and doesn't force the caller to store k into memory. + */ +Datum +hash_uint32(uint32 k) +{ + register uint32 a, + b, + c; + + a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095; + a += k; + + final(a, b, c); + + /* report the result */ + return UInt32GetDatum(c); +} + +/* + * hash_uint32_extended() -- hash a 32-bit value to a 64-bit value, with a seed + * + * Like hash_uint32, this is a convenience function. + */ +Datum +hash_uint32_extended(uint32 k, uint64 seed) +{ + register uint32 a, + b, + c; + + a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095; + + if (seed != 0) + { + a += (uint32) (seed >> 32); + b += (uint32) seed; + mix(a, b, c); + } + + a += k; + + final(a, b, c); + + /* report the result */ + PG_RETURN_UINT64(((uint64) b << 32) | c); +} /* * string_hash: hash function for keys that are NUL-terminated strings. * diff --git a/src/backend/utils/resowner/resowner.c b/src/backend/utils/resowner/resowner.c index 84ddb44a41..f7597b0991 100644 --- a/src/backend/utils/resowner/resowner.c +++ b/src/backend/utils/resowner/resowner.c @@ -20,11 +20,12 @@ */ #include "postgres.h" -#include "access/hash.h" #include "jit/jit.h" +#include "storage/bufmgr.h" #include "storage/ipc.h" #include "storage/predicate.h" #include "storage/proc.h" +#include "utils/hashutils.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/resowner_private.h" diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 7b10fd2974..2946b47b46 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -96,9 +96,9 @@ #include +#include "access/hash.h" #include "access/htup_details.h" #include "access/nbtree.h" -#include "access/hash.h" #include "catalog/index.h" #include "catalog/pg_am.h" #include "commands/tablespace.h" diff --git a/src/include/access/hash.h b/src/include/access/hash.h index 0b8eb64911..a1b0ccc898 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -24,6 +24,7 @@ #include "lib/stringinfo.h" #include "storage/bufmgr.h" #include "storage/lockdefs.h" +#include "utils/hashutils.h" #include "utils/hsearch.h" #include "utils/relcache.h" @@ -38,17 +39,6 @@ typedef uint32 Bucket; #define BUCKET_TO_BLKNO(metap,B) \ ((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_spareindex((B)+1)-1] : 0)) + 1) -/* - * Rotate the high 32 bits and the low 32 bits separately. The standard - * hash function sometimes rotates the low 32 bits by one bit when - * combining elements. We want extended hash functions to be compatible with - * that algorithm when the seed is 0, so we can't just do a normal rotation. - * This works, though. - */ -#define ROTATE_HIGH_AND_LOW_32BITS(v) \ - ((((v) << 1) & UINT64CONST(0xfffffffefffffffe)) | \ - (((v) >> 31) & UINT64CONST(0x100000001))) - /* * Special space for hash index pages. * @@ -333,12 +323,6 @@ typedef HashMetaPageData *HashMetaPage; #define HASH_WRITE BUFFER_LOCK_EXCLUSIVE #define HASH_NOLOCK (-1) -/* - * Strategy number. There's only one valid strategy for hashing: equality. - */ -#define HTEqualStrategyNumber 1 -#define HTMaxStrategyNumber 1 - /* * When a new operator class is declared, we require that the user supply * us with an amproc function for hashing a key of the new type, returning @@ -380,12 +364,6 @@ extern IndexBulkDeleteResult *hashvacuumcleanup(IndexVacuumInfo *info, extern bytea *hashoptions(Datum reloptions, bool validate); extern bool hashvalidate(Oid opclassoid); -extern Datum hash_any(register const unsigned char *k, register int keylen); -extern Datum hash_any_extended(register const unsigned char *k, - register int keylen, uint64 seed); -extern Datum hash_uint32(uint32 k); -extern Datum hash_uint32_extended(uint32 k, uint64 seed); - /* private routines */ /* hashinsert.c */ diff --git a/src/include/access/stratnum.h b/src/include/access/stratnum.h index 8fdba2884b..65b5223641 100644 --- a/src/include/access/stratnum.h +++ b/src/include/access/stratnum.h @@ -34,6 +34,13 @@ typedef uint16 StrategyNumber; #define BTMaxStrategyNumber 5 +/* + * Strategy numbers for hash indexes. There's only one valid strategy for + * hashing: equality. + */ +#define HTEqualStrategyNumber 1 + +#define HTMaxStrategyNumber 1 /* * Strategy numbers common to (some) GiST, SP-GiST and BRIN opclasses. diff --git a/src/include/utils/hashutils.h b/src/include/utils/hashutils.h index 58bacbf19f..aa90d4195a 100644 --- a/src/include/utils/hashutils.h +++ b/src/include/utils/hashutils.h @@ -7,6 +7,25 @@ #ifndef HASHUTILS_H #define HASHUTILS_H + +/* + * Rotate the high 32 bits and the low 32 bits separately. The standard + * hash function sometimes rotates the low 32 bits by one bit when + * combining elements. We want extended hash functions to be compatible with + * that algorithm when the seed is 0, so we can't just do a normal rotation. + * This works, though. + */ +#define ROTATE_HIGH_AND_LOW_32BITS(v) \ + ((((v) << 1) & UINT64CONST(0xfffffffefffffffe)) | \ + (((v) >> 31) & UINT64CONST(0x100000001))) + + +extern Datum hash_any(register const unsigned char *k, register int keylen); +extern Datum hash_any_extended(register const unsigned char *k, + register int keylen, uint64 seed); +extern Datum hash_uint32(uint32 k); +extern Datum hash_uint32_extended(uint32 k, uint64 seed); + /* * Combine two 32-bit hash values, resulting in another hash value, with * decent bit mixing.