1 /*-------------------------------------------------------------------------
7 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
12 * $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.41 2002/03/02 21:39:33 momjian Exp $
14 *-------------------------------------------------------------------------
18 * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
19 * Coded into C, with minor code improvements, and with hsearch(3) interface,
20 * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
21 * also, hcreate/hdestroy routines added to simulate hsearch(3).
23 * These routines simulate hsearch(3) and family, with the important
24 * difference that the hash table is dynamic - can grow indefinitely
25 * beyond its original size (as supplied to hcreate()).
27 * Performance appears to be comparable to that of hsearch(3).
28 * The 'source-code' options referred to in hsearch(3)'s 'man' page
29 * are not implemented; otherwise functionality is identical.
31 * Compilation controls:
32 * DEBUG controls some informative traces, mainly for debugging.
33 * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained;
34 * when combined with HASH_DEBUG, these are displayed by hdestroy().
36 * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor
37 * concatenation property, in probably unnecessary code 'optimisation'.
39 * Modified margo@postgres.berkeley.edu February 1990
40 * added multiple table interface
41 * Modified by sullivan@postgres.berkeley.edu April 1990
42 * changed ctl structure for shared memory
47 #include <sys/types.h>
49 #include "utils/dynahash.h"
50 #include "utils/hsearch.h"
51 #include "utils/memutils.h"
54 * Key (also entry) part of a HASHELEMENT
56 #define ELEMENTKEY(helem) (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
59 * Fast MOD arithmetic, assuming that y is a power of 2 !
61 #define MOD(x,y) ((x) & ((y)-1))
64 * Private function prototypes
66 static void *DynaHashAlloc(Size size);
67 static uint32 call_hash(HTAB *hashp, void *k);
68 static HASHSEGMENT seg_alloc(HTAB *hashp);
69 static bool element_alloc(HTAB *hashp);
70 static bool dir_realloc(HTAB *hashp);
71 static bool expand_table(HTAB *hashp);
72 static bool hdefault(HTAB *hashp);
73 static bool init_htab(HTAB *hashp, long nelem);
74 static void hash_corrupted(HTAB *hashp);
78 * memory allocation routines
80 static MemoryContext DynaHashCxt = NULL;
81 static MemoryContext CurrentDynaHashCxt = NULL;
84 DynaHashAlloc(Size size)
86 Assert(MemoryContextIsValid(CurrentDynaHashCxt));
87 return MemoryContextAlloc(CurrentDynaHashCxt, size);
90 #define MEM_ALLOC DynaHashAlloc
91 #define MEM_FREE pfree
95 static long hash_accesses,
101 /************************** CREATE ROUTINES **********************/
104 hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
109 /* First time through, create a memory context for hash tables */
111 DynaHashCxt = AllocSetContextCreate(TopMemoryContext,
113 ALLOCSET_DEFAULT_MINSIZE,
114 ALLOCSET_DEFAULT_INITSIZE,
115 ALLOCSET_DEFAULT_MAXSIZE);
117 /* Select allocation context for this hash table */
118 if (flags & HASH_CONTEXT)
119 CurrentDynaHashCxt = info->hcxt;
121 CurrentDynaHashCxt = DynaHashCxt;
123 /* Initialize the hash header */
124 hashp = (HTAB *) MEM_ALLOC(sizeof(HTAB));
127 MemSet(hashp, 0, sizeof(HTAB));
129 hashp->tabname = (char *) MEM_ALLOC(strlen(tabname) + 1);
130 strcpy(hashp->tabname, tabname);
132 if (flags & HASH_FUNCTION)
133 hashp->hash = info->hash;
135 hashp->hash = string_hash; /* default hash function */
137 if (flags & HASH_SHARED_MEM)
140 * ctl structure is preallocated for shared memory tables. Note
141 * that HASH_DIRSIZE had better be set as well.
143 hashp->hctl = info->hctl;
144 hashp->dir = info->dir;
145 hashp->alloc = info->alloc;
147 hashp->isshared = true;
149 /* hash table already exists, we're just attaching to it */
150 if (flags & HASH_ATTACH)
155 /* setup hash table defaults */
158 hashp->alloc = MEM_ALLOC;
159 hashp->hcxt = DynaHashCxt;
160 hashp->isshared = false;
165 hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));
170 if (!hdefault(hashp))
174 #ifdef HASH_STATISTICS
175 hctl->accesses = hctl->collisions = 0;
178 if (flags & HASH_SEGMENT)
180 hctl->ssize = info->ssize;
181 hctl->sshift = my_log2(info->ssize);
182 /* ssize had better be a power of 2 */
183 Assert(hctl->ssize == (1L << hctl->sshift));
185 if (flags & HASH_FFACTOR)
186 hctl->ffactor = info->ffactor;
189 * SHM hash tables have fixed directory size passed by the caller.
191 if (flags & HASH_DIRSIZE)
193 hctl->max_dsize = info->max_dsize;
194 hctl->dsize = info->dsize;
198 * hash table now allocates space for key and data but you have to say
199 * how much space to allocate
201 if (flags & HASH_ELEM)
203 hctl->keysize = info->keysize;
204 hctl->entrysize = info->entrysize;
207 if (flags & HASH_ALLOC)
208 hashp->alloc = info->alloc;
211 if (flags & HASH_CONTEXT)
213 /* hash table structures live in child of given context */
214 CurrentDynaHashCxt = AllocSetContextCreate(info->hcxt,
216 ALLOCSET_DEFAULT_MINSIZE,
217 ALLOCSET_DEFAULT_INITSIZE,
218 ALLOCSET_DEFAULT_MAXSIZE);
219 hashp->hcxt = CurrentDynaHashCxt;
223 /* hash table structures live in child of DynaHashCxt */
224 CurrentDynaHashCxt = AllocSetContextCreate(DynaHashCxt,
226 ALLOCSET_DEFAULT_MINSIZE,
227 ALLOCSET_DEFAULT_INITSIZE,
228 ALLOCSET_DEFAULT_MAXSIZE);
229 hashp->hcxt = CurrentDynaHashCxt;
233 if (!init_htab(hashp, nelem))
242 * Set default HASHHDR parameters.
245 hdefault(HTAB *hashp)
247 HASHHDR *hctl = hashp->hctl;
249 MemSet(hctl, 0, sizeof(HASHHDR));
251 hctl->ssize = DEF_SEGSIZE;
252 hctl->sshift = DEF_SEGSIZE_SHIFT;
253 hctl->dsize = DEF_DIRSIZE;
254 hctl->ffactor = DEF_FFACTOR;
258 /* I added these MS. */
260 /* rather pointless defaults for key & entry size */
261 hctl->keysize = sizeof(char *);
262 hctl->entrysize = 2 * sizeof(char *);
264 /* table has no fixed maximum size */
265 hctl->max_dsize = NO_MAX_DSIZE;
267 /* garbage collection for HASH_REMOVE */
268 hctl->freeList = NULL;
275 init_htab(HTAB *hashp, long nelem)
277 HASHHDR *hctl = hashp->hctl;
283 * Divide number of elements by the fill factor to determine a desired
284 * number of buckets. Allocate space for the next greater power of
285 * two number of buckets
287 nelem = (nelem - 1) / hctl->ffactor + 1;
289 nbuckets = 1 << my_log2(nelem);
291 hctl->max_bucket = hctl->low_mask = nbuckets - 1;
292 hctl->high_mask = (nbuckets << 1) - 1;
295 * Figure number of directory segments needed, round up to a power of
298 nsegs = (nbuckets - 1) / hctl->ssize + 1;
299 nsegs = 1 << my_log2(nsegs);
302 * Make sure directory is big enough. If pre-allocated directory is
303 * too small, choke (caller screwed up).
305 if (nsegs > hctl->dsize)
313 /* Allocate a directory */
316 CurrentDynaHashCxt = hashp->hcxt;
317 hashp->dir = (HASHSEGMENT *)
318 hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));
323 /* Allocate initial segments */
324 for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
326 *segp = seg_alloc(hashp);
332 fprintf(stderr, "%s\n%s%p\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
334 "TABLE POINTER ", hashp,
335 "DIRECTORY SIZE ", hctl->dsize,
336 "SEGMENT SIZE ", hctl->ssize,
337 "SEGMENT SHIFT ", hctl->sshift,
338 "FILL FACTOR ", hctl->ffactor,
339 "MAX BUCKET ", hctl->max_bucket,
340 "HIGH MASK ", hctl->high_mask,
341 "LOW MASK ", hctl->low_mask,
342 "NSEGS ", hctl->nsegs,
343 "NENTRIES ", hctl->nentries);
349 * Estimate the space needed for a hashtable containing the given number
350 * of entries of given size.
351 * NOTE: this is used to estimate the footprint of hashtables in shared
352 * memory; therefore it does not count HTAB which is in local memory.
353 * NB: assumes that all hash structure parameters have default values!
356 hash_estimate_size(long num_entries, long entrysize)
365 /* estimate number of buckets wanted */
366 nBuckets = 1L << my_log2((num_entries - 1) / DEF_FFACTOR + 1);
367 /* # of segments needed for nBuckets */
368 nSegments = 1L << my_log2((nBuckets - 1) / DEF_SEGSIZE + 1);
369 /* directory entries */
370 nDirEntries = DEF_DIRSIZE;
371 while (nDirEntries < nSegments)
372 nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
374 /* fixed control info */
375 size += MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */
377 size += MAXALIGN(nDirEntries * sizeof(HASHSEGMENT));
379 size += nSegments * MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET));
380 /* elements --- allocated in groups of HASHELEMENT_ALLOC_INCR */
381 elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
382 nElementAllocs = (num_entries - 1) / HASHELEMENT_ALLOC_INCR + 1;
383 size += nElementAllocs * HASHELEMENT_ALLOC_INCR * elementSize;
389 * Select an appropriate directory size for a hashtable with the given
390 * maximum number of entries.
391 * This is only needed for hashtables in shared memory, whose directories
392 * cannot be expanded dynamically.
393 * NB: assumes that all hash structure parameters have default values!
395 * XXX this had better agree with the behavior of init_htab()...
398 hash_select_dirsize(long num_entries)
404 /* estimate number of buckets wanted */
405 nBuckets = 1L << my_log2((num_entries - 1) / DEF_FFACTOR + 1);
406 /* # of segments needed for nBuckets */
407 nSegments = 1L << my_log2((nBuckets - 1) / DEF_SEGSIZE + 1);
408 /* directory entries */
409 nDirEntries = DEF_DIRSIZE;
410 while (nDirEntries < nSegments)
411 nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
417 /********************** DESTROY ROUTINES ************************/
420 hash_destroy(HTAB *hashp)
424 /* allocation method must be one we know how to free, too */
425 Assert(hashp->alloc == MEM_ALLOC);
426 /* so this hashtable must have it's own context */
427 Assert(hashp->hcxt != NULL);
429 hash_stats("destroy", hashp);
432 * Free buckets, dir etc. by destroying the hash table's memory
435 MemoryContextDelete(hashp->hcxt);
438 * Free the HTAB and control structure, which are allocated in the
439 * parent context (DynaHashCxt or the context given by the caller
442 MEM_FREE(hashp->hctl);
443 MEM_FREE(hashp->tabname);
449 hash_stats(const char *where, HTAB *hashp)
453 fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
454 where, hashp->hctl->accesses, hashp->hctl->collisions);
456 fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %d segmentcount %d\n",
457 hashp->hctl->nentries, hashp->hctl->keysize,
458 hashp->hctl->max_bucket, hashp->hctl->nsegs);
459 fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
460 where, hash_accesses, hash_collisions);
461 fprintf(stderr, "hash_stats: total expansions %ld\n",
467 /*******************************SEARCH ROUTINES *****************************/
470 call_hash(HTAB *hashp, void *k)
472 HASHHDR *hctl = hashp->hctl;
476 hash_val = hashp->hash(k, (int) hctl->keysize);
478 bucket = hash_val & hctl->high_mask;
479 if (bucket > hctl->max_bucket)
480 bucket = bucket & hctl->low_mask;
482 return (uint32) bucket;
486 * hash_search -- look up key in table and perform action
489 * HASH_FIND: look up key in table
490 * HASH_ENTER: look up key in table, creating entry if not present
491 * HASH_REMOVE: look up key in table, remove entry if present
492 * HASH_FIND_SAVE: look up key in table, also save in static var
493 * HASH_REMOVE_SAVED: remove entry saved by HASH_FIND_SAVE
495 * Return value is a pointer to the element found/entered/removed if any,
496 * or NULL if no match was found. (NB: in the case of the REMOVE actions,
497 * the result is a dangling pointer that shouldn't be dereferenced!)
498 * A NULL result for HASH_ENTER implies we ran out of memory.
500 * If foundPtr isn't NULL, then *foundPtr is set TRUE if we found an
501 * existing entry in the table, FALSE otherwise. This is needed in the
502 * HASH_ENTER case, but is redundant with the return value otherwise.
504 * The HASH_FIND_SAVE/HASH_REMOVE_SAVED interface is a hack to save one
505 * table lookup in a find/process/remove scenario. Note that no other
506 * addition or removal in the table can safely happen in between.
510 hash_search(HTAB *hashp,
515 HASHHDR *hctl = hashp->hctl;
520 HASHBUCKET currBucket;
521 HASHBUCKET *prevBucketPtr;
525 HASHBUCKET currBucket;
526 HASHBUCKET *prevBucketPtr;
535 * Do the initial lookup (or recall result of prior lookup)
537 if (action == HASH_REMOVE_SAVED)
539 currBucket = saveState.currBucket;
540 prevBucketPtr = saveState.prevBucketPtr;
543 * Try to catch subsequent errors
545 Assert(currBucket && !(saveState.currBucket = NULL));
549 bucket = call_hash(hashp, keyPtr);
550 segment_num = bucket >> hctl->sshift;
551 segment_ndx = MOD(bucket, hctl->ssize);
553 segp = hashp->dir[segment_num];
556 hash_corrupted(hashp);
558 prevBucketPtr = &segp[segment_ndx];
559 currBucket = *prevBucketPtr;
562 * Follow collision chain looking for matching key
564 while (currBucket != NULL)
566 if (memcmp(ELEMENTKEY(currBucket), keyPtr, hctl->keysize) == 0)
568 prevBucketPtr = &(currBucket->link);
569 currBucket = *prevBucketPtr;
578 *foundPtr = (bool) (currBucket != NULL);
586 if (currBucket != NULL)
587 return (void *) ELEMENTKEY(currBucket);
591 if (currBucket != NULL)
593 saveState.currBucket = currBucket;
594 saveState.prevBucketPtr = prevBucketPtr;
595 return (void *) ELEMENTKEY(currBucket);
600 case HASH_REMOVE_SAVED:
601 if (currBucket != NULL)
603 Assert(hctl->nentries > 0);
606 /* remove record from hash bucket's chain. */
607 *prevBucketPtr = currBucket->link;
609 /* add the record to the freelist for this table. */
610 currBucket->link = hctl->freeList;
611 hctl->freeList = currBucket;
614 * better hope the caller is synchronizing access to this
615 * element, because someone else is going to reuse it the
616 * next time something is added to the table
618 return (void *) ELEMENTKEY(currBucket);
623 /* Return existing element if found, else create one */
624 if (currBucket != NULL)
625 return (void *) ELEMENTKEY(currBucket);
627 /* get the next free element */
628 currBucket = hctl->freeList;
629 if (currBucket == NULL)
631 /* no free elements. allocate another chunk of buckets */
632 if (!element_alloc(hashp))
633 return NULL; /* out of memory */
634 currBucket = hctl->freeList;
635 Assert(currBucket != NULL);
638 hctl->freeList = currBucket->link;
640 /* link into hashbucket chain */
641 *prevBucketPtr = currBucket;
642 currBucket->link = NULL;
644 /* copy key into record */
645 memcpy(ELEMENTKEY(currBucket), keyPtr, hctl->keysize);
647 /* caller is expected to fill the data field on return */
649 /* Check if it is time to split the segment */
650 if (++hctl->nentries / (hctl->max_bucket + 1) > hctl->ffactor)
653 * NOTE: failure to expand table is not a fatal error, it
654 * just means we have to run at higher fill factor than we
660 return (void *) ELEMENTKEY(currBucket);
663 elog(ERROR, "hash_search: bogus action %d", (int) action);
665 return NULL; /* keep compiler quiet */
669 * hash_seq_init/_search
670 * Sequentially search through hash table and return
671 * all the elements one by one, return NULL when no more.
673 * NOTE: caller may delete the returned element before continuing the scan.
674 * However, deleting any other element while the scan is in progress is
675 * UNDEFINED (it might be the one that curIndex is pointing at!). Also,
676 * if elements are added to the table while the scan is in progress, it is
677 * unspecified whether they will be visited by the scan or not.
680 hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
682 status->hashp = hashp;
683 status->curBucket = 0;
684 status->curEntry = NULL;
688 hash_seq_search(HASH_SEQ_STATUS *status)
690 HTAB *hashp = status->hashp;
691 HASHHDR *hctl = hashp->hctl;
693 while (status->curBucket <= hctl->max_bucket)
699 if (status->curEntry != NULL)
701 /* Continuing scan of curBucket... */
702 HASHELEMENT *curElem;
704 curElem = status->curEntry;
705 status->curEntry = curElem->link;
706 if (status->curEntry == NULL) /* end of this bucket */
708 return (void *) ELEMENTKEY(curElem);
712 * initialize the search within this bucket.
714 segment_num = status->curBucket >> hctl->sshift;
715 segment_ndx = MOD(status->curBucket, hctl->ssize);
718 * first find the right segment in the table directory.
720 segp = hashp->dir[segment_num];
722 hash_corrupted(hashp);
725 * now find the right index into the segment for the first item in
726 * this bucket's chain. if the bucket is not empty (its entry in
727 * the dir is valid), we know this must correspond to a valid
728 * element and not a freed element because it came out of the
729 * directory of valid stuff. if there are elements in the bucket
730 * chains that point to the freelist we're in big trouble.
732 status->curEntry = segp[segment_ndx];
734 if (status->curEntry == NULL) /* empty bucket */
738 return NULL; /* out of buckets */
742 /********************************* UTILITIES ************************/
745 * Expand the table by adding one more hash bucket.
748 expand_table(HTAB *hashp)
750 HASHHDR *hctl = hashp->hctl;
761 HASHBUCKET currElement,
764 #ifdef HASH_STATISTICS
768 new_bucket = hctl->max_bucket + 1;
769 new_segnum = new_bucket >> hctl->sshift;
770 new_segndx = MOD(new_bucket, hctl->ssize);
772 if (new_segnum >= hctl->nsegs)
774 /* Allocate new segment if necessary -- could fail if dir full */
775 if (new_segnum >= hctl->dsize)
776 if (!dir_realloc(hashp))
778 if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
783 /* OK, we created a new bucket */
787 * *Before* changing masks, find old bucket corresponding to same hash
788 * values; values in that bucket may need to be relocated to new
789 * bucket. Note that new_bucket is certainly larger than low_mask at
790 * this point, so we can skip the first step of the regular hash mask
793 old_bucket = (new_bucket & hctl->low_mask);
796 * If we crossed a power of 2, readjust masks.
798 if (new_bucket > hctl->high_mask)
800 hctl->low_mask = hctl->high_mask;
801 hctl->high_mask = new_bucket | hctl->low_mask;
805 * Relocate records to the new bucket. NOTE: because of the way the
806 * hash masking is done in call_hash, only one old bucket can need to
807 * be split at this point. With a different way of reducing the hash
808 * value, that might not be true!
810 old_segnum = old_bucket >> hctl->sshift;
811 old_segndx = MOD(old_bucket, hctl->ssize);
813 old_seg = hashp->dir[old_segnum];
814 new_seg = hashp->dir[new_segnum];
816 oldlink = &old_seg[old_segndx];
817 newlink = &new_seg[new_segndx];
819 for (currElement = *oldlink;
821 currElement = nextElement)
823 nextElement = currElement->link;
824 if ((long) call_hash(hashp, (void *) ELEMENTKEY(currElement))
827 *oldlink = currElement;
828 oldlink = &currElement->link;
832 *newlink = currElement;
833 newlink = &currElement->link;
836 /* don't forget to terminate the rebuilt hash chains... */
845 dir_realloc(HTAB *hashp)
853 if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
856 /* Reallocate directory */
857 new_dsize = hashp->hctl->dsize << 1;
858 old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);
859 new_dirsize = new_dsize * sizeof(HASHSEGMENT);
862 CurrentDynaHashCxt = hashp->hcxt;
863 p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);
867 memcpy(p, old_p, old_dirsize);
868 MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
869 MEM_FREE((char *) old_p);
871 hashp->hctl->dsize = new_dsize;
880 seg_alloc(HTAB *hashp)
884 CurrentDynaHashCxt = hashp->hcxt;
885 segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->hctl->ssize);
890 MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->hctl->ssize);
896 * allocate some new elements and link them into the free list
899 element_alloc(HTAB *hashp)
901 HASHHDR *hctl = hashp->hctl;
903 HASHELEMENT *tmpElement;
906 /* Each element has a HASHELEMENT header plus user data. */
907 elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
909 CurrentDynaHashCxt = hashp->hcxt;
910 tmpElement = (HASHELEMENT *)
911 hashp->alloc(HASHELEMENT_ALLOC_INCR * elementSize);
916 /* link all the new entries into the freelist */
917 for (i = 0; i < HASHELEMENT_ALLOC_INCR; i++)
919 tmpElement->link = hctl->freeList;
920 hctl->freeList = tmpElement;
921 tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
927 /* complain when we have detected a corrupted hashtable */
929 hash_corrupted(HTAB *hashp)
932 * If the corruption is in a shared hashtable, we'd better force a
933 * systemwide restart. Otherwise, just shut down this one backend.
936 elog(PANIC, "Hash table '%s' corrupted", hashp->tabname);
938 elog(FATAL, "Hash table '%s' corrupted", hashp->tabname);
941 /* calculate ceil(log base 2) of num */
948 for (i = 0, limit = 1; limit < num; i++, limit <<= 1)