Marginal performance improvements in dynahash: make sure that everything

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 6 May 2005 00:19:14 +0000 (00:19 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 6 May 2005 00:19:14 +0000 (00:19 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 6 May 2005 00:19:14 +0000 (00:19 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 6 May 2005 00:19:14 +0000 (00:19 +0000)
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c

index 06e848a47d381e53dd482486cbe1beac2e15153d..1ff3a9d142d349bdacdb3a0100a6eae8dc6dce63 100644 (file)
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.58 2004/12/31 22:01:37 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.59 2005/05/06 00:19:14 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -72,9 +72,8 @@ static void hash_corrupted(HTAB *hashp);
  
  
  /*
- * memory allocation routines
+ * memory allocation support
   */
-static MemoryContext DynaHashCxt = NULL;
  static MemoryContext CurrentDynaHashCxt = NULL;
  
  static void *
@@ -84,10 +83,6 @@ DynaHashAlloc(Size size)
         return MemoryContextAlloc(CurrentDynaHashCxt, size);
  }
  
-#define MEM_ALLOC              DynaHashAlloc
-#undef MEM_FREE                                /* already in windows header files */
-#define MEM_FREE               pfree
-
  
  #if HASH_STATISTICS
  static long hash_accesses,
@@ -98,31 +93,60 @@ static long hash_accesses,
  
  /************************** CREATE ROUTINES **********************/
  
+/*
+ * hash_create -- create a new dynamic hash table
+ *
+ *     tabname: a name for the table (for debugging purposes)
+ *     nelem: maximum number of elements expected
+ *     *info: additional table parameters, as indicated by flags
+ *     flags: bitmask indicating which parameters to take from *info
+ *
+ * Note: for a shared-memory hashtable, nelem needs to be a pretty good
+ * estimate, since we can't expand the table on the fly.  But an unshared
+ * hashtable can be expanded on-the-fly, so it's better for nelem to be
+ * on the small side and let the table grow if it's exceeded.  An overly
+ * large nelem will penalize hash_seq_search speed without buying much.
+ */
  HTAB *
  hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
  {
         HTAB       *hashp;
         HASHHDR    *hctl;
  
-       /* First time through, create a memory context for hash tables */
-       if (!DynaHashCxt)
-               DynaHashCxt = AllocSetContextCreate(TopMemoryContext,
-                                                                                       "DynaHash",
-                                                                                       ALLOCSET_DEFAULT_MINSIZE,
-                                                                                       ALLOCSET_DEFAULT_INITSIZE,
-                                                                                       ALLOCSET_DEFAULT_MAXSIZE);
-
-       /* Select allocation context for this hash table */
-       if (flags & HASH_CONTEXT)
-               CurrentDynaHashCxt = info->hcxt;
+       /*
+        * For shared hash tables, we have a local hash header (HTAB struct)
+        * that we allocate in TopMemoryContext; all else is in shared memory.
+        *
+        * For non-shared hash tables, everything including the hash header
+        * is in a memory context created specially for the hash table ---
+        * this makes hash_destroy very simple.  The memory context is made
+        * a child of either a context specified by the caller, or
+        * TopMemoryContext if nothing is specified.
+        */
+       if (flags & HASH_SHARED_MEM)
+       {
+               /* Set up to allocate the hash header */
+               CurrentDynaHashCxt = TopMemoryContext;
+       }
         else
-               CurrentDynaHashCxt = DynaHashCxt;
+       {
+               /* Create the hash table's private memory context */
+               if (flags & HASH_CONTEXT)
+                       CurrentDynaHashCxt = info->hcxt;
+               else
+                       CurrentDynaHashCxt = TopMemoryContext;
+               CurrentDynaHashCxt = AllocSetContextCreate(CurrentDynaHashCxt,
+                                                                                                  tabname,
+                                                                                                  ALLOCSET_DEFAULT_MINSIZE,
+                                                                                                  ALLOCSET_DEFAULT_INITSIZE,
+                                                                                                  ALLOCSET_DEFAULT_MAXSIZE);
+       }
  
-       /* Initialize the hash header */
-       hashp = (HTAB *) MEM_ALLOC(sizeof(HTAB));
+       /* Initialize the hash header, plus a copy of the table name */
+       hashp = (HTAB *) DynaHashAlloc(sizeof(HTAB) + strlen(tabname) + 1);
         MemSet(hashp, 0, sizeof(HTAB));
  
-       hashp->tabname = (char *) MEM_ALLOC(strlen(tabname) + 1);
+       hashp->tabname = (char *) (hashp + 1);
         strcpy(hashp->tabname, tabname);
  
         if (flags & HASH_FUNCTION)
@@ -143,6 +167,11 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
         else
                 hashp->match = memcmp;
  
+       if (flags & HASH_ALLOC)
+               hashp->alloc = info->alloc;
+       else
+               hashp->alloc = DynaHashAlloc;
+
         if (flags & HASH_SHARED_MEM)
         {
                 /*
@@ -151,7 +180,6 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
                  */
                 hashp->hctl = info->hctl;
                 hashp->dir = info->dir;
-               hashp->alloc = info->alloc;
                 hashp->hcxt = NULL;
                 hashp->isshared = true;
  
@@ -164,7 +192,6 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
                 /* setup hash table defaults */
                 hashp->hctl = NULL;
                 hashp->dir = NULL;
-               hashp->alloc = MEM_ALLOC;
                 hashp->hcxt = CurrentDynaHashCxt;
                 hashp->isshared = false;
         }
@@ -210,23 +237,11 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
          */
         if (flags & HASH_ELEM)
         {
+               Assert(info->entrysize >= info->keysize);
                 hctl->keysize = info->keysize;
                 hctl->entrysize = info->entrysize;
         }
  
-       if (flags & HASH_ALLOC)
-               hashp->alloc = info->alloc;
-       else
-       {
-               /* remaining hash table structures live in child of given context */
-               hashp->hcxt = AllocSetContextCreate(CurrentDynaHashCxt,
-                                                                                       tabname,
-                                                                                       ALLOCSET_DEFAULT_MINSIZE,
-                                                                                       ALLOCSET_DEFAULT_INITSIZE,
-                                                                                       ALLOCSET_DEFAULT_MAXSIZE);
-               CurrentDynaHashCxt = hashp->hcxt;
-       }
-
         /* Build the hash directory structure */
         if (!init_htab(hashp, nelem))
         {
@@ -431,26 +446,16 @@ hash_destroy(HTAB *hashp)
         if (hashp != NULL)
         {
                 /* allocation method must be one we know how to free, too */
-               Assert(hashp->alloc == MEM_ALLOC);
+               Assert(hashp->alloc == DynaHashAlloc);
                 /* so this hashtable must have it's own context */
                 Assert(hashp->hcxt != NULL);
  
                 hash_stats("destroy", hashp);
  
                 /*
-                * Free buckets, dir etc. by destroying the hash table's memory
-                * context.
+                * Free everything by destroying the hash table's memory context.
                  */
                 MemoryContextDelete(hashp->hcxt);
-
-               /*
-                * Free the HTAB and control structure, which are allocated in the
-                * parent context (DynaHashCxt or the context given by the caller
-                * of hash_create()).
-                */
-               MEM_FREE(hashp->hctl);
-               MEM_FREE(hashp->tabname);
-               MEM_FREE(hashp);
         }
  }
  
@@ -702,55 +707,74 @@ hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
  void *
  hash_seq_search(HASH_SEQ_STATUS *status)
  {
-       HTAB       *hashp = status->hashp;
-       HASHHDR    *hctl = hashp->hctl;
+       HTAB       *hashp;
+       HASHHDR    *hctl;
+       uint32          max_bucket;
+       long            ssize;
+       long            segment_num;
+       long            segment_ndx;
+       HASHSEGMENT segp;
+       uint32          curBucket;
+       HASHELEMENT *curElem;
  
-       while (status->curBucket <= hctl->max_bucket)
+       if ((curElem = status->curEntry) != NULL)
         {
-               long            segment_num;
-               long            segment_ndx;
-               HASHSEGMENT segp;
+               /* Continuing scan of curBucket... */
+               status->curEntry = curElem->link;
+               if (status->curEntry == NULL)           /* end of this bucket */
+                       ++status->curBucket;
+               return (void *) ELEMENTKEY(curElem);
+       }
  
-               if (status->curEntry != NULL)
-               {
-                       /* Continuing scan of curBucket... */
-                       HASHELEMENT *curElem;
-
-                       curElem = status->curEntry;
-                       status->curEntry = curElem->link;
-                       if (status->curEntry == NULL)           /* end of this bucket */
-                               ++status->curBucket;
-                       return (void *) ELEMENTKEY(curElem);
-               }
+       /*
+        * Search for next nonempty bucket starting at curBucket.
+        */
+       curBucket = status->curBucket;
+       hashp = status->hashp;
+       hctl = hashp->hctl;
+       ssize = hctl->ssize;
+       max_bucket = hctl->max_bucket;
  
-               /*
-                * initialize the search within this bucket.
-                */
-               segment_num = status->curBucket >> hctl->sshift;
-               segment_ndx = MOD(status->curBucket, hctl->ssize);
+       if (curBucket > max_bucket)
+               return NULL;                                            /* search is done */
  
-               /*
-                * first find the right segment in the table directory.
-                */
-               segp = hashp->dir[segment_num];
-               if (segp == NULL)
-                       hash_corrupted(hashp);
+       /*
+        * first find the right segment in the table directory.
+        */
+       segment_num = curBucket >> hctl->sshift;
+       segment_ndx = MOD(curBucket, ssize);
  
-               /*
-                * now find the right index into the segment for the first item in
-                * this bucket's chain.  if the bucket is not empty (its entry in
-                * the dir is valid), we know this must correspond to a valid
-                * element and not a freed element because it came out of the
-                * directory of valid stuff.  if there are elements in the bucket
-                * chains that point to the freelist we're in big trouble.
-                */
-               status->curEntry = segp[segment_ndx];
+       segp = hashp->dir[segment_num];
  
-               if (status->curEntry == NULL)   /* empty bucket */
-                       ++status->curBucket;
+       /*
+        * Pick up the first item in this bucket's chain.  If chain is
+        * not empty we can go back around the outer loop to search it.
+        * Otherwise we have to advance to find the next nonempty bucket.
+        * We try to optimize that case since searching a near-empty
+        * hashtable has to iterate this loop a lot.
+        */
+       while ((curElem = segp[segment_ndx]) == NULL)
+       {
+               /* empty bucket, advance to next */
+               if (++curBucket > max_bucket)
+               {
+                       status->curBucket = curBucket;
+                       return NULL;                                    /* search is done */
+               }
+               if (++segment_ndx >= ssize)
+               {
+                       segment_num++;
+                       segment_ndx = 0;
+                       segp = hashp->dir[segment_num];
+               }
         }
  
-       return NULL;                            /* out of buckets */
+       /* Begin scan of curBucket... */
+       status->curEntry = curElem->link;
+       if (status->curEntry == NULL)           /* end of this bucket */
+               ++curBucket;
+       status->curBucket = curBucket;
+       return (void *) ELEMENTKEY(curElem);
  }
  
  
@@ -880,9 +904,13 @@ dir_realloc(HTAB *hashp)
         {
                 memcpy(p, old_p, old_dirsize);
                 MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
-               MEM_FREE((char *) old_p);
                 hashp->dir = p;
                 hashp->hctl->dsize = new_dsize;
+
+               /* XXX assume the allocator is palloc, so we know how to free */
+               Assert(hashp->alloc == DynaHashAlloc);
+               pfree(old_p);
+
                 return true;
         }
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 6 May 2005 00:19:14 +0000 (00:19 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 6 May 2005 00:19:14 +0000 (00:19 +0000)