From e3a33a9a9f1a6afb80c9b83c1456c1a36fbcb70b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 8 Jun 2005 23:02:05 +0000 Subject: [PATCH] Marginal hack to avoid spending a lot of time in find_join_rel during large planning problems: when the list of join rels gets too long, make an auxiliary hash table that hashes on the identifying Bitmapset. --- src/backend/nodes/bitmapset.c | 27 +++++- src/backend/optimizer/geqo/geqo_eval.c | 30 +++++-- src/backend/optimizer/geqo/geqo_main.c | 3 +- src/backend/optimizer/plan/planmain.c | 3 +- src/backend/optimizer/util/relnode.c | 109 +++++++++++++++++++++++-- src/backend/utils/hash/hashfn.c | 26 +++++- src/include/nodes/bitmapset.h | 5 +- src/include/nodes/relation.h | 12 ++- src/include/utils/hsearch.h | 4 +- 9 files changed, 194 insertions(+), 25 deletions(-) diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 699f0439ff..5f4ca9a779 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -14,7 +14,7 @@ * Copyright (c) 2003-2005, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.7 2005/01/01 20:44:15 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.8 2005/06/08 23:02:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -763,3 +763,28 @@ bms_first_member(Bitmapset *a) } return -1; } + +/* + * bms_hash_value - compute a hash key for a Bitmapset + * + * Note: we must ensure that any two bitmapsets that are bms_equal() will + * hash to the same value; in practice this means that trailing all-zero + * words cannot affect the result. Longitudinal XOR provides a reasonable + * hash value that has this property. + */ +uint32 +bms_hash_value(const Bitmapset *a) +{ + bitmapword result = 0; + int nwords; + int wordnum; + + if (a == NULL) + return 0; /* All empty sets hash to 0 */ + nwords = a->nwords; + for (wordnum = 0; wordnum < nwords; wordnum++) + { + result ^= a->words[wordnum]; + } + return (uint32) result; +} diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 3460eb5b8e..5d31ac738e 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.74 2005/06/05 22:32:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.75 2005/06/08 23:02:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -47,7 +47,8 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata) MemoryContext oldcxt; RelOptInfo *joinrel; Cost fitness; - List *savelist; + int savelength; + struct HTAB *savehash; /* * Because gimme_tree considers both left- and right-sided trees, @@ -83,13 +84,19 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata) * gimme_tree will add entries to root->join_rel_list, which may or may * not already contain some entries. The newly added entries will be * recycled by the MemoryContextDelete below, so we must ensure that - * the list is restored to its former state before exiting. With the - * new List implementation, the easiest way is to make a duplicate list - * that gimme_tree can modify. + * the list is restored to its former state before exiting. We can + * do this by truncating the list to its original length. NOTE this + * assumes that any added entries are appended at the end! + * + * We also must take care not to mess up the outer join_rel_hash, + * if there is one. We can do this by just temporarily setting the + * link to NULL. (If we are dealing with enough join rels, which we + * very likely are, a new hash table will get built and used locally.) */ - savelist = evaldata->root->join_rel_list; + savelength = list_length(evaldata->root->join_rel_list); + savehash = evaldata->root->join_rel_hash; - evaldata->root->join_rel_list = list_copy(savelist); + evaldata->root->join_rel_hash = NULL; /* construct the best path for the given combination of relations */ joinrel = gimme_tree(tour, num_gene, evaldata); @@ -105,8 +112,13 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata) else fitness = DBL_MAX; - /* restore join_rel_list */ - evaldata->root->join_rel_list = savelist; + /* + * Restore join_rel_list to its former state, and put back original + * hashtable if any. + */ + evaldata->root->join_rel_list = list_truncate(evaldata->root->join_rel_list, + savelength); + evaldata->root->join_rel_hash = savehash; /* release all the memory acquired within gimme_tree */ MemoryContextSwitchTo(oldcxt); diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c index f19f5f7c34..c027f4370c 100644 --- a/src/backend/optimizer/geqo/geqo_main.c +++ b/src/backend/optimizer/geqo/geqo_main.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.49 2005/06/05 22:32:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.50 2005/06/08 23:02:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -252,7 +252,6 @@ geqo(PlannerInfo *root, int number_of_rels, List *initial_rels) */ best_tour = (Gene *) pool->data[0].string; - /* root->join_rel_list will be modified during this ! */ best_rel = gimme_tree(best_tour, pool->string_length, &evaldata); if (best_rel == NULL) diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 1c87b24e4c..50e1bc5ea8 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.83 2005/06/06 04:13:35 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.84 2005/06/08 23:02:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -116,6 +116,7 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction, root->base_rel_array = (RelOptInfo **) palloc0(root->base_rel_array_size * sizeof(RelOptInfo *)); root->join_rel_list = NIL; + root->join_rel_hash = NULL; root->equi_key_list = NIL; /* diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 996f769187..fbaf0de83a 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.68 2005/06/06 04:13:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.69 2005/06/08 23:02:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,8 +21,15 @@ #include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" #include "parser/parsetree.h" +#include "utils/hsearch.h" +typedef struct JoinHashEntry +{ + Relids join_relids; /* hash key --- MUST BE FIRST */ + RelOptInfo *join_rel; +} JoinHashEntry; + static RelOptInfo *make_reloptinfo(PlannerInfo *root, int relid, RelOptKind reloptkind); static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, @@ -197,6 +204,47 @@ find_base_rel(PlannerInfo *root, int relid) return NULL; /* keep compiler quiet */ } +/* + * build_join_rel_hash + * Construct the auxiliary hash table for join relations. + */ +static void +build_join_rel_hash(PlannerInfo *root) +{ + HTAB *hashtab; + HASHCTL hash_ctl; + ListCell *l; + + /* Create the hash table */ + MemSet(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(Relids); + hash_ctl.entrysize = sizeof(JoinHashEntry); + hash_ctl.hash = bitmap_hash; + hash_ctl.match = bitmap_match; + hash_ctl.hcxt = CurrentMemoryContext; + hashtab = hash_create("JoinRelHashTable", + 256L, + &hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); + + /* Insert all the already-existing joinrels */ + foreach(l, root->join_rel_list) + { + RelOptInfo *rel = (RelOptInfo *) lfirst(l); + JoinHashEntry *hentry; + bool found; + + hentry = (JoinHashEntry *) hash_search(hashtab, + &(rel->relids), + HASH_ENTER, + &found); + Assert(!found); + hentry->join_rel = rel; + } + + root->join_rel_hash = hashtab; +} + /* * find_join_rel * Returns relation entry corresponding to 'relids' (a set of RT indexes), @@ -205,14 +253,44 @@ find_base_rel(PlannerInfo *root, int relid) RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids) { - ListCell *l; + /* + * Switch to using hash lookup when list grows "too long". The threshold + * is arbitrary and is known only here. + */ + if (!root->join_rel_hash && list_length(root->join_rel_list) > 32) + build_join_rel_hash(root); - foreach(l, root->join_rel_list) + /* + * Use either hashtable lookup or linear search, as appropriate. + * + * Note: the seemingly redundant hashkey variable is used to avoid + * taking the address of relids; unless the compiler is exceedingly + * smart, doing so would force relids out of a register and thus + * probably slow down the list-search case. + */ + if (root->join_rel_hash) { - RelOptInfo *rel = (RelOptInfo *) lfirst(l); + Relids hashkey = relids; + JoinHashEntry *hentry; + + hentry = (JoinHashEntry *) hash_search(root->join_rel_hash, + &hashkey, + HASH_FIND, + NULL); + if (hentry) + return hentry->join_rel; + } + else + { + ListCell *l; - if (bms_equal(rel->relids, relids)) - return rel; + foreach(l, root->join_rel_list) + { + RelOptInfo *rel = (RelOptInfo *) lfirst(l); + + if (bms_equal(rel->relids, relids)) + return rel; + } } return NULL; @@ -329,9 +407,24 @@ build_join_rel(PlannerInfo *root, jointype, restrictlist); /* - * Add the joinrel to the query's joinrel list. + * Add the joinrel to the query's joinrel list, and store it into + * the auxiliary hashtable if there is one. NB: GEQO requires us + * to append the new joinrel to the end of the list! */ - root->join_rel_list = lcons(joinrel, root->join_rel_list); + root->join_rel_list = lappend(root->join_rel_list, joinrel); + + if (root->join_rel_hash) + { + JoinHashEntry *hentry; + bool found; + + hentry = (JoinHashEntry *) hash_search(root->join_rel_hash, + &(joinrel->relids), + HASH_ENTER, + &found); + Assert(!found); + hentry->join_rel = joinrel; + } return joinrel; } diff --git a/src/backend/utils/hash/hashfn.c b/src/backend/utils/hash/hashfn.c index 24255f31e6..c596865816 100644 --- a/src/backend/utils/hash/hashfn.c +++ b/src/backend/utils/hash/hashfn.c @@ -9,13 +9,14 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.23 2005/04/14 20:32:43 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.24 2005/06/08 23:02:05 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/hash.h" +#include "nodes/bitmapset.h" #include "utils/hsearch.h" @@ -53,3 +54,26 @@ oid_hash(const void *key, Size keysize) /* We don't actually bother to do anything to the OID value ... */ return (uint32) *((const Oid *) key); } + +/* + * bitmap_hash: hash function for keys that are (pointers to) Bitmapsets + * + * Note: don't forget to specify bitmap_match as the match function! + */ +uint32 +bitmap_hash(const void *key, Size keysize) +{ + Assert(keysize == sizeof(Bitmapset *)); + return bms_hash_value(*((const Bitmapset * const *) key)); +} + +/* + * bitmap_match: match function to use with bitmap_hash + */ +int +bitmap_match(const void *key1, const void *key2, Size keysize) +{ + Assert(keysize == sizeof(Bitmapset *)); + return !bms_equal(*((const Bitmapset * const *) key1), + *((const Bitmapset * const *) key2)); +} diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 2ba017fc2e..b831c4e59a 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -13,7 +13,7 @@ * * Copyright (c) 2003-2005, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/nodes/bitmapset.h,v 1.6 2005/01/01 20:44:28 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/bitmapset.h,v 1.7 2005/06/08 23:02:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -80,4 +80,7 @@ extern Bitmapset *bms_join(Bitmapset *a, Bitmapset *b); /* support for iterating through the integer elements of a set: */ extern int bms_first_member(Bitmapset *a); +/* support for hashtables using Bitmapsets as keys: */ +extern uint32 bms_hash_value(const Bitmapset *a); + #endif /* BITMAPSET_H */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 93dc78aece..7c702f7105 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.111 2005/06/06 04:13:36 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.112 2005/06/08 23:02:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -72,7 +72,17 @@ typedef struct PlannerInfo struct RelOptInfo **base_rel_array; /* All one-relation RelOptInfos */ int base_rel_array_size; /* current allocated array len */ + /* + * join_rel_list is a list of all join-relation RelOptInfos we have + * considered in this planning run. For small problems we just scan + * the list to do lookups, but when there are many join relations we + * build a hash table for faster lookups. The hash table is present + * and valid when join_rel_hash is not NULL. Note that we still maintain + * the list even when using the hash table for lookups; this simplifies + * life for GEQO. + */ List *join_rel_list; /* list of join-relation RelOptInfos */ + struct HTAB *join_rel_hash; /* optional hashtable for join relations */ List *equi_key_list; /* list of lists of equijoined * PathKeyItems */ diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h index bb93dea077..881327a3e0 100644 --- a/src/include/utils/hsearch.h +++ b/src/include/utils/hsearch.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.36 2005/05/29 04:23:06 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.37 2005/06/08 23:02:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -184,5 +184,7 @@ extern long hash_select_dirsize(long num_entries); extern uint32 string_hash(const void *key, Size keysize); extern uint32 tag_hash(const void *key, Size keysize); extern uint32 oid_hash(const void *key, Size keysize); +extern uint32 bitmap_hash(const void *key, Size keysize); +extern int bitmap_match(const void *key1, const void *key2, Size keysize); #endif /* HSEARCH_H */ -- 2.40.0