*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.54 2002/03/02 21:39:16 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $
*
* NOTES
* This file contains only the public interface routines.
char *nulls = (char *) PG_GETARG_POINTER(2);
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
-#ifdef NOT_USED
- Relation heapRel = (Relation) PG_GETARG_POINTER(4);
-#endif
InsertIndexResult res;
HashItem hitem;
IndexTuple itup;
/*
* hashmarkpos() -- save current scan position
- *
*/
Datum
hashmarkpos(PG_FUNCTION_ARGS)
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.31 2002/02/25 04:06:47 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
{
char *key = NameStr(*PG_GETARG_NAME(0));
+ Assert(strlen(key) <= NAMEDATALEN);
+
return hash_any(key, strlen(key));
}
return result;
}
+/* This hash function was written by Bob Jenkins
+ * (bob_jenkins@burtleburtle.net), and superficially adapted
+ * for PostgreSQL by Neil Conway. For more information on this
+ * hash function, see http://burtleburtle.net/bob/hash/doobs.html
+ */
/*
- * hash_any --- compute a hash function for any specified chunk of memory
- *
- * This can be used as the underlying hash function for any pass-by-reference
- * data type in which there are no non-significant bits.
- *
- * (Comment from the original db3 hashing code: )
- *
- * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte
- * units. On the first time through the loop we get the 'leftover bytes'
- * (strlen % 8). On every later iteration, we perform 8 HASHC's so we handle
- * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If
- * this routine is heavily used enough, it's worth the ugly coding.
- *
- * "OZ's original sdbm hash"
+ * mix -- mix 3 32-bit values reversibly.
+ * For every delta with one or two bits set, and the deltas of all three
+ * high bits or all three low bits, whether the original value of a,b,c
+ * is almost all zero or is uniformly distributed,
+ * - If mix() is run forward or backward, at least 32 bits in a,b,c
+ * have at least 1/4 probability of changing.
+ * - If mix() is run forward, every bit of c will change between 1/3 and
+ * 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
+ */
+#define mix(a,b,c) \
+{ \
+ a -= b; a -= c; a ^= (c>>13); \
+ b -= c; b -= a; b ^= (a<<8); \
+ c -= a; c -= b; c ^= (b>>13); \
+ a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); \
+ c -= a; c -= b; c ^= (b>>5); \
+ a -= b; a -= c; a ^= (c>>3); \
+ b -= c; b -= a; b ^= (a<<10); \
+ c -= a; c -= b; c ^= (b>>15); \
+}
+
+/*
+ * hash_any() -- hash a variable-length key into a 32-bit value
+ * k : the key (the unaligned variable-length array of bytes)
+ * len : the length of the key, counting by bytes
+ * Returns a 32-bit value. Every bit of the key affects every bit of
+ * the return value. Every 1-bit and 2-bit delta achieves avalanche.
+ * About 6*len+35 instructions. The best hash table sizes are powers
+ * of 2. There is no need to do mod a prime (mod is sooo slow!).
+ * If you need less than 32 bits, use a bitmask.
*/
Datum
-hash_any(const char *keydata, int keylen)
+hash_any(register const char *k, register int keylen)
{
- uint32 n;
- int loop;
-
-#define HASHC n = *keydata++ + 65599 * n
-
- n = 0;
- if (keylen > 0)
- {
- loop = (keylen + 8 - 1) >> 3;
-
- switch (keylen & (8 - 1))
- {
- case 0:
- do
- { /* All fall throughs */
- HASHC;
- case 7:
- HASHC;
- case 6:
- HASHC;
- case 5:
- HASHC;
- case 4:
- HASHC;
- case 3:
- HASHC;
- case 2:
- HASHC;
- case 1:
- HASHC;
- } while (--loop);
- }
- }
-
-#undef HASHC
-
- PG_RETURN_UINT32(n);
+ register Datum a,b,c,len;
+
+ /* Set up the internal state */
+ len = keylen;
+ a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
+ /* Another arbitrary value. If the hash function is called
+ * multiple times, this could be the previously generated
+ * hash value; however, the interface currently doesn't allow
+ * this. AFAIK this isn't a big deal.
+ */
+ c = 3923095;
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+ a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24));
+ b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24));
+ c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24));
+ mix(a,b,c);
+ k += 12; len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+ c += keylen;
+ switch(len) /* all the case statements fall through */
+ {
+ case 11: c+=((Datum)k[10]<<24);
+ case 10: c+=((Datum)k[9]<<16);
+ case 9 : c+=((Datum)k[8]<<8);
+ /* the first byte of c is reserved for the length */
+ case 8 : b+=((Datum)k[7]<<24);
+ case 7 : b+=((Datum)k[6]<<16);
+ case 6 : b+=((Datum)k[5]<<8);
+ case 5 : b+=k[4];
+ case 4 : a+=((Datum)k[3]<<24);
+ case 3 : a+=((Datum)k[2]<<16);
+ case 2 : a+=((Datum)k[1]<<8);
+ case 1 : a+=k[0];
+ /* case 0: nothing left to add */
+ }
+ mix(a,b,c);
+ /* report the result */
+ return c;
}
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.23 2001/10/25 05:49:21 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.24 2002/03/06 20:49:40 momjian Exp $
*
*-------------------------------------------------------------------------
*/
itup = &(hitem->hash_itup);
if ((natts = rel->rd_rel->relnatts) != 1)
elog(ERROR, "Hash indices valid for only one index key.");
- itup_scankey = _hash_mkscankey(rel, itup, metap);
+ itup_scankey = _hash_mkscankey(rel, itup);
/*
* find the first page in the bucket chain containing this key and
RelationGetRelationName(rel));
/* write the buffer, but hold our lock */
- _hash_wrtnorelbuf(rel, buf);
+ _hash_wrtnorelbuf(buf);
return itup_off;
}
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.31 2001/10/25 05:49:21 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.32 2002/03/06 20:49:41 momjian Exp $
*
* NOTES
* Overflow pages look like ordinary relation pages.
ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
ovflopaque->hasho_oaddr = oaddr;
ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
- _hash_wrtnorelbuf(rel, ovflbuf);
+ _hash_wrtnorelbuf(ovflbuf);
/* logically chain overflow page to previous page */
pageopaque->hasho_nextblkno = ovflblkno;
- _hash_wrtnorelbuf(rel, buf);
+ _hash_wrtnorelbuf(buf);
return ovflbuf;
}
* the "next" ItemId.
*/
PageIndexTupleDelete(rpage, roffnum);
- _hash_wrtnorelbuf(rel, rbuf);
+ _hash_wrtnorelbuf(rbuf);
/*
* if the "read" page is now empty because of the deletion, free
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.34 2002/01/15 22:14:16 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.35 2002/03/06 20:49:42 momjian Exp $
*
* NOTES
* Postgres hash pages look like ordinary relation pages. The opaque
elog(ERROR, "Problem with _hash_initbitmap.");
/* all done */
- _hash_wrtnorelbuf(rel, metabuf);
+ _hash_wrtnorelbuf(metabuf);
/*
* initialize the first two buckets
* or a reference to the buffer.
*/
void
-_hash_wrtnorelbuf(Relation rel, Buffer buf)
+_hash_wrtnorelbuf(Buffer buf)
{
BlockNumber blkno;
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
PageIndexTupleDelete(page, offno);
- _hash_wrtnorelbuf(rel, buf);
+ _hash_wrtnorelbuf(buf);
if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE))
{
nopaque->hasho_flag = LH_BUCKET_PAGE;
nopaque->hasho_oaddr = InvalidOvflAddress;
nopaque->hasho_bucket = nbucket;
- _hash_wrtnorelbuf(rel, nbuf);
+ _hash_wrtnorelbuf(nbuf);
/*
* make sure the old bucket isn't empty. advance 'opage' and friends
== InvalidOffsetNumber)
elog(ERROR, "_hash_splitpage: failed to add index item to %s",
RelationGetRelationName(rel));
- _hash_wrtnorelbuf(rel, nbuf);
+ _hash_wrtnorelbuf(nbuf);
/*
* now delete the tuple from the old bucket. after this
* instead of calling PageGetMaxOffsetNumber.
*/
PageIndexTupleDelete(opage, ooffnum);
- _hash_wrtnorelbuf(rel, obuf);
+ _hash_wrtnorelbuf(obuf);
omaxoffnum = OffsetNumberPrev(omaxoffnum);
/*
/*-------------------------------------------------------------------------
*
- * btutils.c
- * Utility code for Postgres btree implementation.
+ * hashutil.c
+ * Utility code for Postgres hash implementation.
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.27 2001/10/06 23:21:43 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.28 2002/03/06 20:49:43 momjian Exp $
*
*-------------------------------------------------------------------------
*/
ScanKey
-_hash_mkscankey(Relation rel, IndexTuple itup, HashMetaPage metap)
+_hash_mkscankey(Relation rel, IndexTuple itup)
{
ScanKey skey;
TupleDesc itupdesc;
* Portions Copyright (c) 1994, Regents of the University of California
*
*
- * $Id: nodeHash.c,v 1.60 2001/10/25 05:49:28 momjian Exp $
+ * $Id: nodeHash.c,v 1.61 2002/03/06 20:49:44 momjian Exp $
*
*-------------------------------------------------------------------------
*/
HashState *hashstate;
Plan *outerPlan;
- SO1_printf("ExecInitHash: %s\n",
- "initializing hash node");
+ SO_printf("ExecInitHash: initializing hash node\n");
/*
* assign the node's execution state
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/cache/catcache.c,v 1.91 2002/03/06 06:10:21 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/cache/catcache.c,v 1.92 2002/03/06 20:49:45 momjian Exp $
*
*-------------------------------------------------------------------------
*/
/*
* Constants related to size of the catcache.
*
- * NCCBUCKETS should be prime and must be less than 64K (because
+ * NCCBUCKETS must be a power of two and must be less than 64K (because
* SharedInvalCatcacheMsg crams hash indexes into a uint16 field). In
* practice it should be a lot less, anyway, to avoid chewing up too much
* space on hash bucket headers.
* MAXCCTUPLES could be as small as a few hundred, if per-backend memory
* consumption is at a premium.
*/
-#define NCCBUCKETS 257 /* Hash buckets per CatCache */
+#define NCCBUCKETS 256 /* Hash buckets per CatCache */
#define MAXCCTUPLES 5000 /* Maximum # of tuples in all caches */
+/*
+ * Given a hash value and the size of the hash table, find the bucket
+ * in which the hash value belongs. Since the hash table must contain
+ * a power-of-2 number of elements, this is a simple bitmask.
+ */
+#define HASH_INDEX(h, sz) ((Index) ((h) & ((sz) - 1)))
+
/*
* variables, macros and other stuff
/*
* inspect the proper hash bucket for matches
*/
- hashIndex = (Index) (hashValue % (uint32) ccp->cc_size);
+ hashIndex = HASH_INDEX(hashValue, ccp->cc_nbuckets);
for (elt = DLGetHead(&ccp->cc_bucket[hashIndex]); elt; elt = nextelt)
{
int i;
/* Remove each tuple in this cache, or at least mark it dead */
- for (i = 0; i < cache->cc_size; i++)
+ for (i = 0; i < cache->cc_nbuckets; i++)
{
Dlelem *elt,
*nextelt;
continue; /* nope, leave it alone */
/* Yes, scan the tuples and remove those related to relId */
- for (i = 0; i < cache->cc_size; i++)
+ for (i = 0; i < cache->cc_nbuckets; i++)
{
Dlelem *elt,
*nextelt;
#define InitCatCache_DEBUG1 \
do { \
elog(DEBUG1, "InitCatCache: rel=%s id=%d nkeys=%d size=%d\n", \
- cp->cc_relname, cp->id, cp->cc_nkeys, cp->cc_size); \
+ cp->cc_relname, cp->id, cp->cc_nkeys, cp->cc_nbuckets); \
} while(0)
#else
cp->cc_tupdesc = (TupleDesc) NULL;
cp->cc_reloidattr = reloidattr;
cp->cc_ntup = 0;
- cp->cc_size = NCCBUCKETS;
+ cp->cc_nbuckets = NCCBUCKETS;
cp->cc_nkeys = nkeys;
for (i = 0; i < nkeys; ++i)
cp->cc_key[i] = key[i];
* find the hash bucket in which to look for the tuple
*/
hashValue = CatalogCacheComputeHashValue(cache, cur_skey);
- hashIndex = (Index) (hashValue % (uint32) cache->cc_size);
+ hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets);
/*
* scan the hash bucket until we find a match or exhaust our tuples
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: hash.h,v 1.43 2002/02/25 04:06:52 momjian Exp $
+ * $Id: hash.h,v 1.44 2002/03/06 20:49:45 momjian Exp $
*
* NOTES
* modeled after Margo Seltzer's hash implementation for unix.
extern Datum hashint2vector(PG_FUNCTION_ARGS);
extern Datum hashname(PG_FUNCTION_ARGS);
extern Datum hashvarlena(PG_FUNCTION_ARGS);
-extern Datum hash_any(const char *keydata, int keylen);
-
+extern Datum hash_any(register const char *k, register int keylen);
/* private routines */
extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access);
extern void _hash_relbuf(Relation rel, Buffer buf, int access);
extern void _hash_wrtbuf(Relation rel, Buffer buf);
-extern void _hash_wrtnorelbuf(Relation rel, Buffer buf);
+extern void _hash_wrtnorelbuf(Buffer buf);
extern Page _hash_chgbufaccess(Relation rel, Buffer *bufp, int from_access,
int to_access);
extern void _hash_pageinit(Page page, Size size);
/* hashutil.c */
-extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup,
- HashMetaPage metap);
+extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup);
extern void _hash_freeskey(ScanKey skey);
extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup);
extern HashItem _hash_formitem(IndexTuple itup);
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: catcache.h,v 1.39 2002/03/03 17:47:56 tgl Exp $
+ * $Id: catcache.h,v 1.40 2002/03/06 20:49:46 momjian Exp $
*
*-------------------------------------------------------------------------
*/
TupleDesc cc_tupdesc; /* tuple descriptor (copied from reldesc) */
int cc_reloidattr; /* AttrNumber of relation OID attr, or 0 */
int cc_ntup; /* # of tuples currently in this cache */
- int cc_size; /* # of hash buckets in this cache */
- int cc_nkeys; /* number of keys (1..4) */
+ int cc_nbuckets; /* # of hash buckets in this cache */
+ int cc_nkeys; /* # of keys (1..4) */
int cc_key[4]; /* AttrNumber of each key */
PGFunction cc_hashfunc[4]; /* hash function to use for each key */
ScanKeyData cc_skey[4]; /* precomputed key info for heap scans */