]> granicus.if.org Git - postgresql/commitdiff
Marginal hack to avoid spending a lot of time in find_join_rel during
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 8 Jun 2005 23:02:05 +0000 (23:02 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 8 Jun 2005 23:02:05 +0000 (23:02 +0000)
large planning problems: when the list of join rels gets too long, make
an auxiliary hash table that hashes on the identifying Bitmapset.

src/backend/nodes/bitmapset.c
src/backend/optimizer/geqo/geqo_eval.c
src/backend/optimizer/geqo/geqo_main.c
src/backend/optimizer/plan/planmain.c
src/backend/optimizer/util/relnode.c
src/backend/utils/hash/hashfn.c
src/include/nodes/bitmapset.h
src/include/nodes/relation.h
src/include/utils/hsearch.h

index 699f0439ffc0c89a0dbe3dda05e51f27c1c9f313..5f4ca9a779b26bc8974601735ded4044d5b458d2 100644 (file)
@@ -14,7 +14,7 @@
  * Copyright (c) 2003-2005, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.7 2005/01/01 20:44:15 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.8 2005/06/08 23:02:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -763,3 +763,28 @@ bms_first_member(Bitmapset *a)
        }
        return -1;
 }
+
+/*
+ * bms_hash_value - compute a hash key for a Bitmapset
+ *
+ * Note: we must ensure that any two bitmapsets that are bms_equal() will
+ * hash to the same value; in practice this means that trailing all-zero
+ * words cannot affect the result.  Longitudinal XOR provides a reasonable
+ * hash value that has this property.
+ */
+uint32
+bms_hash_value(const Bitmapset *a)
+{
+       bitmapword      result = 0;
+       int                     nwords;
+       int                     wordnum;
+
+       if (a == NULL)
+               return 0;                               /* All empty sets hash to 0 */
+       nwords = a->nwords;
+       for (wordnum = 0; wordnum < nwords; wordnum++)
+       {
+               result ^= a->words[wordnum];
+       }
+       return (uint32) result;
+}
index 3460eb5b8e3cf51b3154fcb9d7dd31ffcb4da4a1..5d31ac738e73727e1598e9e31d594614a2010268 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.74 2005/06/05 22:32:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.75 2005/06/08 23:02:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -47,7 +47,8 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
        MemoryContext oldcxt;
        RelOptInfo *joinrel;
        Cost            fitness;
-       List       *savelist;
+       int                     savelength;
+       struct HTAB *savehash;
 
        /*
         * Because gimme_tree considers both left- and right-sided trees,
@@ -83,13 +84,19 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
         * gimme_tree will add entries to root->join_rel_list, which may or may
         * not already contain some entries.  The newly added entries will be
         * recycled by the MemoryContextDelete below, so we must ensure that
-        * the list is restored to its former state before exiting.  With the
-        * new List implementation, the easiest way is to make a duplicate list
-        * that gimme_tree can modify.
+        * the list is restored to its former state before exiting.  We can
+        * do this by truncating the list to its original length.  NOTE this
+        * assumes that any added entries are appended at the end!
+        *
+        * We also must take care not to mess up the outer join_rel_hash,
+        * if there is one.  We can do this by just temporarily setting the
+        * link to NULL.  (If we are dealing with enough join rels, which we
+        * very likely are, a new hash table will get built and used locally.)
         */
-       savelist = evaldata->root->join_rel_list;
+       savelength = list_length(evaldata->root->join_rel_list);
+       savehash = evaldata->root->join_rel_hash;
 
-       evaldata->root->join_rel_list = list_copy(savelist);
+       evaldata->root->join_rel_hash = NULL;
 
        /* construct the best path for the given combination of relations */
        joinrel = gimme_tree(tour, num_gene, evaldata);
@@ -105,8 +112,13 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
        else
                fitness = DBL_MAX;
 
-       /* restore join_rel_list */
-       evaldata->root->join_rel_list = savelist;
+       /*
+        * Restore join_rel_list to its former state, and put back original
+        * hashtable if any.
+        */
+       evaldata->root->join_rel_list = list_truncate(evaldata->root->join_rel_list,
+                                                                                                 savelength);
+       evaldata->root->join_rel_hash = savehash;
 
        /* release all the memory acquired within gimme_tree */
        MemoryContextSwitchTo(oldcxt);
index f19f5f7c34d611e4faeed528680d411413bb8590..c027f4370c3a539f26c7a6032accb1a024338d82 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.49 2005/06/05 22:32:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.50 2005/06/08 23:02:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -252,7 +252,6 @@ geqo(PlannerInfo *root, int number_of_rels, List *initial_rels)
         */
        best_tour = (Gene *) pool->data[0].string;
 
-       /* root->join_rel_list will be modified during this ! */
        best_rel = gimme_tree(best_tour, pool->string_length, &evaldata);
 
        if (best_rel == NULL)
index 1c87b24e4c6c07982823d8bf4269d282100bd50a..50e1bc5ea8c281aea6d86b07a5b6997d08b54426 100644 (file)
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.83 2005/06/06 04:13:35 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.84 2005/06/08 23:02:04 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -116,6 +116,7 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
        root->base_rel_array = (RelOptInfo **)
                palloc0(root->base_rel_array_size * sizeof(RelOptInfo *));
        root->join_rel_list = NIL;
+       root->join_rel_hash = NULL;
        root->equi_key_list = NIL;
 
        /*
index 996f7691870a6e337b9ff8e8438743c504233d2f..fbaf0de83a89cf1502a24fc390b0e49ab3797566 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.68 2005/06/06 04:13:36 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.69 2005/06/08 23:02:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
 #include "parser/parsetree.h"
+#include "utils/hsearch.h"
 
 
+typedef struct JoinHashEntry
+{
+       Relids          join_relids;    /* hash key --- MUST BE FIRST */
+       RelOptInfo *join_rel;
+} JoinHashEntry;
+
 static RelOptInfo *make_reloptinfo(PlannerInfo *root, int relid,
                                                                   RelOptKind reloptkind);
 static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
@@ -197,6 +204,47 @@ find_base_rel(PlannerInfo *root, int relid)
        return NULL;                            /* keep compiler quiet */
 }
 
+/*
+ * build_join_rel_hash
+ *       Construct the auxiliary hash table for join relations.
+ */
+static void
+build_join_rel_hash(PlannerInfo *root)
+{
+       HTAB       *hashtab;
+       HASHCTL         hash_ctl;
+       ListCell   *l;
+
+       /* Create the hash table */
+       MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+       hash_ctl.keysize = sizeof(Relids);
+       hash_ctl.entrysize = sizeof(JoinHashEntry);
+       hash_ctl.hash = bitmap_hash;
+       hash_ctl.match = bitmap_match;
+       hash_ctl.hcxt = CurrentMemoryContext;
+       hashtab = hash_create("JoinRelHashTable",
+                                                 256L,
+                                                 &hash_ctl,
+                                                 HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+
+       /* Insert all the already-existing joinrels */
+       foreach(l, root->join_rel_list)
+       {
+               RelOptInfo *rel = (RelOptInfo *) lfirst(l);
+               JoinHashEntry *hentry;
+               bool            found;
+
+               hentry = (JoinHashEntry *) hash_search(hashtab,
+                                                                                          &(rel->relids),
+                                                                                          HASH_ENTER,
+                                                                                          &found);
+               Assert(!found);
+               hentry->join_rel = rel;
+       }
+
+       root->join_rel_hash = hashtab;
+}
+
 /*
  * find_join_rel
  *       Returns relation entry corresponding to 'relids' (a set of RT indexes),
@@ -205,14 +253,44 @@ find_base_rel(PlannerInfo *root, int relid)
 RelOptInfo *
 find_join_rel(PlannerInfo *root, Relids relids)
 {
-       ListCell   *l;
+       /*
+        * Switch to using hash lookup when list grows "too long".  The threshold
+        * is arbitrary and is known only here.
+        */
+       if (!root->join_rel_hash && list_length(root->join_rel_list) > 32)
+               build_join_rel_hash(root);
 
-       foreach(l, root->join_rel_list)
+       /*
+        * Use either hashtable lookup or linear search, as appropriate.
+        *
+        * Note: the seemingly redundant hashkey variable is used to avoid
+        * taking the address of relids; unless the compiler is exceedingly
+        * smart, doing so would force relids out of a register and thus
+        * probably slow down the list-search case.
+        */
+       if (root->join_rel_hash)
        {
-               RelOptInfo *rel = (RelOptInfo *) lfirst(l);
+               Relids          hashkey = relids;
+               JoinHashEntry *hentry;
+
+               hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
+                                                                                          &hashkey,
+                                                                                          HASH_FIND,
+                                                                                          NULL);
+               if (hentry)
+                       return hentry->join_rel;
+       }
+       else
+       {
+               ListCell   *l;
 
-               if (bms_equal(rel->relids, relids))
-                       return rel;
+               foreach(l, root->join_rel_list)
+               {
+                       RelOptInfo *rel = (RelOptInfo *) lfirst(l);
+
+                       if (bms_equal(rel->relids, relids))
+                               return rel;
+               }
        }
 
        return NULL;
@@ -329,9 +407,24 @@ build_join_rel(PlannerInfo *root,
                                                           jointype, restrictlist);
 
        /*
-        * Add the joinrel to the query's joinrel list.
+        * Add the joinrel to the query's joinrel list, and store it into
+        * the auxiliary hashtable if there is one.  NB: GEQO requires us
+        * to append the new joinrel to the end of the list!
         */
-       root->join_rel_list = lcons(joinrel, root->join_rel_list);
+       root->join_rel_list = lappend(root->join_rel_list, joinrel);
+
+       if (root->join_rel_hash)
+       {
+               JoinHashEntry *hentry;
+               bool            found;
+
+               hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
+                                                                                          &(joinrel->relids),
+                                                                                          HASH_ENTER,
+                                                                                          &found);
+               Assert(!found);
+               hentry->join_rel = joinrel;
+       }
 
        return joinrel;
 }
index 24255f31e6e0ac19a5416461e31f1c7338539447..c59686581614101b30bfedcbd919995edb336120 100644 (file)
@@ -9,13 +9,14 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.23 2005/04/14 20:32:43 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.24 2005/06/08 23:02:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include "access/hash.h"
+#include "nodes/bitmapset.h"
 #include "utils/hsearch.h"
 
 
@@ -53,3 +54,26 @@ oid_hash(const void *key, Size keysize)
        /* We don't actually bother to do anything to the OID value ... */
        return (uint32) *((const Oid *) key);
 }
+
+/*
+ * bitmap_hash: hash function for keys that are (pointers to) Bitmapsets
+ *
+ * Note: don't forget to specify bitmap_match as the match function!
+ */
+uint32
+bitmap_hash(const void *key, Size keysize)
+{
+       Assert(keysize == sizeof(Bitmapset *));
+       return bms_hash_value(*((const Bitmapset * const *) key));
+}
+
+/*
+ * bitmap_match: match function to use with bitmap_hash
+ */
+int
+bitmap_match(const void *key1, const void *key2, Size keysize)
+{
+       Assert(keysize == sizeof(Bitmapset *));
+       return !bms_equal(*((const Bitmapset * const *) key1),
+                                         *((const Bitmapset * const *) key2));
+}
index 2ba017fc2ee6dbba303c573437ba04c83d3b5808..b831c4e59a13e18aac10505228ede60fe028ad3d 100644 (file)
@@ -13,7 +13,7 @@
  *
  * Copyright (c) 2003-2005, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/nodes/bitmapset.h,v 1.6 2005/01/01 20:44:28 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/bitmapset.h,v 1.7 2005/06/08 23:02:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -80,4 +80,7 @@ extern Bitmapset *bms_join(Bitmapset *a, Bitmapset *b);
 /* support for iterating through the integer elements of a set: */
 extern int     bms_first_member(Bitmapset *a);
 
+/* support for hashtables using Bitmapsets as keys: */
+extern uint32 bms_hash_value(const Bitmapset *a);
+
 #endif   /* BITMAPSET_H */
index 93dc78aece4fd4bc0d95ff03f7e114a793631a9c..7c702f7105ae6e15cc8a824e3877821e2e23a134 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.111 2005/06/06 04:13:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.112 2005/06/08 23:02:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -72,7 +72,17 @@ typedef struct PlannerInfo
        struct RelOptInfo **base_rel_array;     /* All one-relation RelOptInfos */
        int                     base_rel_array_size;    /* current allocated array len */
 
+       /*
+        * join_rel_list is a list of all join-relation RelOptInfos we have
+        * considered in this planning run.  For small problems we just scan
+        * the list to do lookups, but when there are many join relations we
+        * build a hash table for faster lookups.  The hash table is present
+        * and valid when join_rel_hash is not NULL.  Note that we still maintain
+        * the list even when using the hash table for lookups; this simplifies
+        * life for GEQO.
+        */
        List       *join_rel_list;      /* list of join-relation RelOptInfos */
+       struct HTAB *join_rel_hash;     /* optional hashtable for join relations */
 
        List       *equi_key_list;      /* list of lists of equijoined
                                                                 * PathKeyItems */
index bb93dea0775d6d5488f36a181c6dec412507d2cf..881327a3e03f2822ddafb7270223426773e1b932 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.36 2005/05/29 04:23:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.37 2005/06/08 23:02:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -184,5 +184,7 @@ extern long hash_select_dirsize(long num_entries);
 extern uint32 string_hash(const void *key, Size keysize);
 extern uint32 tag_hash(const void *key, Size keysize);
 extern uint32 oid_hash(const void *key, Size keysize);
+extern uint32 bitmap_hash(const void *key, Size keysize);
+extern int     bitmap_match(const void *key1, const void *key2, Size keysize);
 
 #endif   /* HSEARCH_H */