*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.56 2002/03/09 17:35:35 tgl Exp $
*
* NOTES
* This file contains only the public interface routines.
Datum *datum = (Datum *) PG_GETARG_POINTER(1);
char *nulls = (char *) PG_GETARG_POINTER(2);
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
+#ifdef NOT_USED
+ Relation heapRel = (Relation) PG_GETARG_POINTER(4);
+#endif
InsertIndexResult res;
HashItem hitem;
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.33 2002/03/09 17:35:35 tgl Exp $
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
{
float4 key = PG_GETARG_FLOAT4(0);
- return hash_any((char *) &key, sizeof(key));
+ return hash_any((unsigned char *) &key, sizeof(key));
}
Datum
{
float8 key = PG_GETARG_FLOAT8(0);
- return hash_any((char *) &key, sizeof(key));
+ return hash_any((unsigned char *) &key, sizeof(key));
}
Datum
{
Oid *key = (Oid *) PG_GETARG_POINTER(0);
- return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(Oid));
+ return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(Oid));
}
/*
{
int16 *key = (int16 *) PG_GETARG_POINTER(0);
- return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(int16));
+ return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(int16));
}
Datum
hashname(PG_FUNCTION_ARGS)
{
char *key = NameStr(*PG_GETARG_NAME(0));
+ int keylen = strlen(key);
- Assert(strlen(key) <= NAMEDATALEN);
+ Assert(keylen < NAMEDATALEN); /* else it's not truncated correctly */
- return hash_any(key, strlen(key));
+ return hash_any((unsigned char *) key, keylen);
}
/*
struct varlena *key = PG_GETARG_VARLENA_P(0);
Datum result;
- result = hash_any(VARDATA(key), VARSIZE(key) - VARHDRSZ);
+ result = hash_any((unsigned char *) VARDATA(key),
+ VARSIZE(key) - VARHDRSZ);
/* Avoid leaking memory for toasted inputs */
PG_FREE_IF_COPY(key, 0);
return result;
}
-/* This hash function was written by Bob Jenkins
+/*
+ * This hash function was written by Bob Jenkins
* (bob_jenkins@burtleburtle.net), and superficially adapted
* for PostgreSQL by Neil Conway. For more information on this
- * hash function, see http://burtleburtle.net/bob/hash/doobs.html
+ * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
+ * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
*/
-/*
+/*----------
* mix -- mix 3 32-bit values reversibly.
* For every delta with one or two bits set, and the deltas of all three
* high bits or all three low bits, whether the original value of a,b,c
* have at least 1/4 probability of changing.
* - If mix() is run forward, every bit of c will change between 1/3 and
* 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
+ *----------
*/
#define mix(a,b,c) \
{ \
* hash_any() -- hash a variable-length key into a 32-bit value
* k : the key (the unaligned variable-length array of bytes)
* len : the length of the key, counting by bytes
- * Returns a 32-bit value. Every bit of the key affects every bit of
+ *
+ * Returns a uint32 value. Every bit of the key affects every bit of
* the return value. Every 1-bit and 2-bit delta achieves avalanche.
* About 6*len+35 instructions. The best hash table sizes are powers
* of 2. There is no need to do mod a prime (mod is sooo slow!).
* If you need less than 32 bits, use a bitmask.
*/
Datum
-hash_any(register const char *k, register int keylen)
+hash_any(register const unsigned char *k, register int keylen)
{
- register Datum a,b,c,len;
-
- /* Set up the internal state */
- len = keylen;
- a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
- /* Another arbitrary value. If the hash function is called
- * multiple times, this could be the previously generated
- * hash value; however, the interface currently doesn't allow
- * this. AFAIK this isn't a big deal.
- */
- c = 3923095;
-
- /* handle most of the key */
- while (len >= 12)
- {
- a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24));
- b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24));
- c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24));
- mix(a,b,c);
- k += 12; len -= 12;
- }
-
- /* handle the last 11 bytes */
- c += keylen;
- switch(len) /* all the case statements fall through */
- {
- case 11: c+=((Datum)k[10]<<24);
- case 10: c+=((Datum)k[9]<<16);
- case 9 : c+=((Datum)k[8]<<8);
- /* the first byte of c is reserved for the length */
- case 8 : b+=((Datum)k[7]<<24);
- case 7 : b+=((Datum)k[6]<<16);
- case 6 : b+=((Datum)k[5]<<8);
- case 5 : b+=k[4];
- case 4 : a+=((Datum)k[3]<<24);
- case 3 : a+=((Datum)k[2]<<16);
- case 2 : a+=((Datum)k[1]<<8);
- case 1 : a+=k[0];
- /* case 0: nothing left to add */
- }
- mix(a,b,c);
- /* report the result */
- return c;
+ register uint32 a,b,c,len;
+
+ /* Set up the internal state */
+ len = keylen;
+ a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
+ c = 3923095; /* initialize with an arbitrary value */
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+ a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24));
+ b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24));
+ c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24));
+ mix(a,b,c);
+ k += 12; len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+ c += keylen;
+ switch (len) /* all the case statements fall through */
+ {
+ case 11: c+=((uint32)k[10]<<24);
+ case 10: c+=((uint32)k[9]<<16);
+ case 9 : c+=((uint32)k[8]<<8);
+ /* the first byte of c is reserved for the length */
+ case 8 : b+=((uint32)k[7]<<24);
+ case 7 : b+=((uint32)k[6]<<16);
+ case 6 : b+=((uint32)k[5]<<8);
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32)k[3]<<24);
+ case 3 : a+=((uint32)k[2]<<16);
+ case 2 : a+=((uint32)k[1]<<8);
+ case 1 : a+=k[0];
+ /* case 0: nothing left to add */
+ }
+ mix(a,b,c);
+ /* report the result */
+ return UInt32GetDatum(c);
}
* Portions Copyright (c) 1994, Regents of the University of California
*
*
- * $Id: nodeHash.c,v 1.61 2002/03/06 20:49:44 momjian Exp $
+ * $Id: nodeHash.c,v 1.62 2002/03/09 17:35:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include <sys/types.h>
#include <math.h>
+#include "access/hash.h"
#include "executor/execdebug.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
#include "utils/lsyscache.h"
-static int hashFunc(Datum key, int len, bool byVal);
+static uint32 hashFunc(Datum key, int len, bool byVal);
/* ----------------------------------------------------------------
* ExecHash
bucketno = hashFunc(keyval,
(int) hashtable->typLen,
hashtable->typByVal)
- % hashtable->totalbuckets;
+ % (uint32) hashtable->totalbuckets;
}
#ifdef HJDEBUG
/* ----------------------------------------------------------------
* hashFunc
*
- * the hash function, copied from Margo
+ * the hash function for hash joins
*
* XXX this probably ought to be replaced with datatype-specific
* hash functions, such as those already implemented for hash indexes.
* ----------------------------------------------------------------
*/
-static int
+static uint32
hashFunc(Datum key, int len, bool byVal)
{
- unsigned int h = 0;
+ unsigned char *k;
if (byVal)
{
/*
- * If it's a by-value data type, use the 'len' least significant
- * bytes of the Datum value. This should do the right thing on
- * either bigendian or littleendian hardware --- see the Datum
- * access macros in c.h.
+ * If it's a by-value data type, just hash the whole Datum value.
+ * This assumes that datatypes narrower than Datum are consistently
+ * padded (either zero-extended or sign-extended, but not random
+ * bits) to fill Datum; see the XXXGetDatum macros in postgres.h.
+ * NOTE: it would not work to do hash_any(&key, len) since this
+ * would get the wrong bytes on a big-endian machine.
*/
- while (len-- > 0)
- {
- h = (h * PRIME1) ^ (key & 0xFF);
- key >>= 8;
- }
+ k = (unsigned char *) &key;
+ len = sizeof(Datum);
}
else
{
* freeing the detoasted copy; that happens for free when the
* per-tuple memory context is reset in ExecHashGetBucket.)
*/
- unsigned char *k;
-
if (len < 0)
{
struct varlena *vkey = PG_DETOAST_DATUM(key);
}
else
k = (unsigned char *) DatumGetPointer(key);
-
- while (len-- > 0)
- h = (h * PRIME1) ^ (*k++);
}
- return h % PRIME2;
+ return DatumGetUInt32(hash_any(k, len));
}
/* ----------------------------------------------------------------
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.64 2001/11/21 05:57:33 thomas Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.65 2002/03/09 17:35:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* sizeof(TimeTzADT), so that any garbage pad bytes in the structure
* won't be included in the hash!
*/
- return hash_any((char *) key, sizeof(double) + sizeof(int4));
+ return hash_any((unsigned char *) key, sizeof(double) + sizeof(int4));
}
Datum
/*
* PostgreSQL type definitions for MAC addresses.
*
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.21 2001/08/21 21:23:21 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.22 2002/03/09 17:35:35 tgl Exp $
*/
#include "postgres.h"
{
macaddr *key = PG_GETARG_MACADDR_P(0);
- return hash_any((char *) key, sizeof(macaddr));
+ return hash_any((unsigned char *) key, sizeof(macaddr));
}
/*
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.64 2002/03/06 06:10:18 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.65 2002/03/09 17:35:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* sizeof(Interval), so that any garbage pad bytes in the structure
* won't be included in the hash!
*/
- return hash_any((char *) key, sizeof(double) + sizeof(int4));
+ return hash_any((unsigned char *) key, sizeof(double) + sizeof(int4));
}
/* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator.
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.87 2001/11/18 12:07:07 ishii Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.88 2002/03/09 17:35:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
keydata = VARDATA(key);
keylen = bcTruelen(key);
- result = hash_any(keydata, keylen);
+ result = hash_any((unsigned char *) keydata, keylen);
/* Avoid leaking memory for toasted inputs */
PG_FREE_IF_COPY(key, 0);
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.41 2002/03/02 21:39:33 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.42 2002/03/09 17:35:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
}
#if HASH_DEBUG
- fprintf(stderr, "%s\n%s%p\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
- "init_htab:",
+ fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n%s%ld\n",
"TABLE POINTER ", hashp,
"DIRECTORY SIZE ", hctl->dsize,
"SEGMENT SIZE ", hctl->ssize,
fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
where, hashp->hctl->accesses, hashp->hctl->collisions);
- fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %d segmentcount %d\n",
+ fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
hashp->hctl->nentries, hashp->hctl->keysize,
hashp->hctl->max_bucket, hashp->hctl->nsegs);
fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
call_hash(HTAB *hashp, void *k)
{
HASHHDR *hctl = hashp->hctl;
- long hash_val,
+ uint32 hash_val,
bucket;
hash_val = hashp->hash(k, (int) hctl->keysize);
if (bucket > hctl->max_bucket)
bucket = bucket & hctl->low_mask;
- return (uint32) bucket;
+ return bucket;
}
/*----------
/* caller is expected to fill the data field on return */
/* Check if it is time to split the segment */
- if (++hctl->nentries / (hctl->max_bucket + 1) > hctl->ffactor)
+ if (++hctl->nentries / (long) (hctl->max_bucket + 1) > hctl->ffactor)
{
/*
* NOTE: failure to expand table is not a fatal error, it
/*
* If we crossed a power of 2, readjust masks.
*/
- if (new_bucket > hctl->high_mask)
+ if ((uint32) new_bucket > hctl->high_mask)
{
hctl->low_mask = hctl->high_mask;
- hctl->high_mask = new_bucket | hctl->low_mask;
+ hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
}
/*
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.15 2001/10/25 05:49:51 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.16 2002/03/09 17:35:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include "access/hash.h"
#include "utils/hsearch.h"
+
/*
* string_hash: hash function for keys that are null-terminated strings.
*
*
* NOTE: this is the default hash function if none is specified.
*/
-long
+uint32
string_hash(void *key, int keysize)
{
- unsigned char *k = (unsigned char *) key;
- long h = 0;
-
- while (*k)
- h = (h * PRIME1) ^ (*k++);
-
- h %= PRIME2;
-
- return h;
+ return DatumGetUInt32(hash_any((unsigned char *) key, strlen((char *) key)));
}
/*
* tag_hash: hash function for fixed-size tag values
- *
- * NB: we assume that the supplied key is aligned at least on an 'int'
- * boundary, if its size is >= sizeof(int).
*/
-long
+uint32
tag_hash(void *key, int keysize)
{
- int *k = (int *) key;
- long h = 0;
-
- /*
- * Use four byte chunks in a "jump table" to go a little faster.
- *
- * Currently the maximum keysize is 16 (mar 17 1992). I have put in
- * cases for up to 32. Bigger than this will resort to a for loop
- * (see the default case).
- */
- switch (keysize)
- {
- case 8 * sizeof(int):
- h = (h * PRIME1) ^(*k++);
- /* fall through */
-
- case 7 * sizeof(int):
- h = (h * PRIME1) ^(*k++);
- /* fall through */
-
- case 6 * sizeof(int):
- h = (h * PRIME1) ^(*k++);
- /* fall through */
-
- case 5 * sizeof(int):
- h = (h * PRIME1) ^(*k++);
- /* fall through */
-
- case 4 * sizeof(int):
- h = (h * PRIME1) ^(*k++);
- /* fall through */
-
- case 3 * sizeof(int):
- h = (h * PRIME1) ^(*k++);
- /* fall through */
-
- case 2 * sizeof(int):
- h = (h * PRIME1) ^(*k++);
- /* fall through */
-
- case sizeof(int):
- h = (h * PRIME1) ^(*k++);
- break;
-
- default:
- /* Do an int at a time */
- for (; keysize >= (int) sizeof(int); keysize -= sizeof(int))
- h = (h * PRIME1) ^ (*k++);
-
- /* Cope with any partial-int leftover bytes */
- if (keysize > 0)
- {
- unsigned char *keybyte = (unsigned char *) k;
-
- do
- h = (h * PRIME1) ^ (*keybyte++);
- while (--keysize > 0);
- }
- break;
- }
-
- h %= PRIME2;
-
- return h;
+ return DatumGetUInt32(hash_any((unsigned char *) key, keysize));
}
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: hash.h,v 1.44 2002/03/06 20:49:45 momjian Exp $
+ * $Id: hash.h,v 1.45 2002/03/09 17:35:37 tgl Exp $
*
* NOTES
* modeled after Margo Seltzer's hash implementation for unix.
* Datatype-specific hash functions in hashfunc.c.
*
* NOTE: some of these are also used by catcache operations, without
- * any direct connection to hash indexes.
+ * any direct connection to hash indexes. Also, the common hash_any
+ * routine is also used by dynahash tables and hash joins.
*/
extern Datum hashchar(PG_FUNCTION_ARGS);
extern Datum hashint2(PG_FUNCTION_ARGS);
extern Datum hashint2vector(PG_FUNCTION_ARGS);
extern Datum hashname(PG_FUNCTION_ARGS);
extern Datum hashvarlena(PG_FUNCTION_ARGS);
-extern Datum hash_any(register const char *k, register int keylen);
+extern Datum hash_any(register const unsigned char *k, register int keylen);
/* private routines */
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: hsearch.h,v 1.25 2001/11/05 17:46:36 momjian Exp $
+ * $Id: hsearch.h,v 1.26 2002/03/09 17:35:37 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#define DEF_DIRSIZE 256
#define DEF_FFACTOR 1 /* default fill factor */
-#define PRIME1 37 /* for the hash function */
-#define PRIME2 1048583
-
/*
* HASHELEMENT is the private part of a hashtable entry. The caller's data
{
long dsize; /* Directory Size */
long ssize; /* Segment Size --- must be power of 2 */
- long sshift; /* Segment shift */
- long max_bucket; /* ID of Maximum bucket in use */
- long high_mask; /* Mask to modulo into entire table */
- long low_mask; /* Mask to modulo into lower half of table */
+ int sshift; /* Segment shift = log2(ssize) */
+ uint32 max_bucket; /* ID of Maximum bucket in use */
+ uint32 high_mask; /* Mask to modulo into entire table */
+ uint32 low_mask; /* Mask to modulo into lower half of table */
long ffactor; /* Fill factor */
long nentries; /* Number of entries in hash table */
long nsegs; /* Number of allocated segments */
{
HASHHDR *hctl; /* shared control information */
HASHSEGMENT *dir; /* directory of segment starts */
- long (*hash) (void *key, int keysize); /* Hash Function */
+ uint32 (*hash) (void *key, int keysize); /* Hash Function */
void *(*alloc) (Size); /* memory allocator */
MemoryContext hcxt; /* memory context if default allocator
* used */
long ssize; /* Segment Size */
long dsize; /* (initial) Directory Size */
long ffactor; /* Fill factor */
- long (*hash) (void *key, int keysize); /* Hash Function */
+ uint32 (*hash) (void *key, int keysize); /* Hash Function */
long keysize; /* hash key length in bytes */
long entrysize; /* total user element size in bytes */
long max_dsize; /* limit to dsize if directory size is
typedef struct
{
HTAB *hashp;
- long curBucket; /* index of current bucket */
+ uint32 curBucket; /* index of current bucket */
HASHELEMENT *curEntry; /* current entry in bucket */
} HASH_SEQ_STATUS;
/*
* prototypes for functions in hashfn.c
*/
-extern long string_hash(void *key, int keysize);
-extern long tag_hash(void *key, int keysize);
+extern uint32 string_hash(void *key, int keysize);
+extern uint32 tag_hash(void *key, int keysize);
#endif /* HSEARCH_H */