*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.220 2007/04/15 20:09:28 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.221 2007/04/17 20:03:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID)
+/* Per-path data used within choose_bitmap_and() */
+typedef struct
+{
+ Path *path; /* IndexPath, BitmapAndPath, or BitmapOrPath */
+ List *quals; /* the WHERE clauses it uses */
+ List *preds; /* predicates of its partial index(es) */
+ Bitmapset *clauseids; /* quals+preds represented as a bitmapset */
+} PathClauseUsage;
+
+
static List *find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *outer_clauses,
bool istoplevel, RelOptInfo *outer_rel,
bool istoplevel, RelOptInfo *outer_rel);
static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
List *paths, RelOptInfo *outer_rel);
-static int bitmap_path_comparator(const void *a, const void *b);
+static int path_usage_comparator(const void *a, const void *b);
+static Cost bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel,
+ Path *ipath, RelOptInfo *outer_rel);
static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel,
List *paths, RelOptInfo *outer_rel);
+static PathClauseUsage *classify_index_clause_usage(Path *path,
+ List **clauselist);
static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
-static bool lists_intersect(List *list1, List *list2);
+static int find_list_position(Node *node, List **nodelist);
static bool match_clause_to_indexcol(IndexOptInfo *index,
int indexcol, Oid opfamily,
RestrictInfo *rinfo,
List *paths, RelOptInfo *outer_rel)
{
int npaths = list_length(paths);
- Path **patharray;
- Cost costsofar;
- List *qualsofar;
- List *firstpred;
- ListCell *lastcell;
- int i;
+ PathClauseUsage **pathinfoarray;
+ PathClauseUsage *pathinfo;
+ List *clauselist;
+ List *bestpaths = NIL;
+ Cost bestcost = 0;
+ int i, j;
ListCell *l;
Assert(npaths > 0); /* else caller error */
* In theory we should consider every nonempty subset of the given paths.
* In practice that seems like overkill, given the crude nature of the
* estimates, not to mention the possible effects of higher-level AND and
- * OR clauses. As a compromise, we sort the paths by selectivity. We
- * always take the first, and sequentially add on paths that result in a
- * lower estimated cost.
- *
- * We also make some effort to detect directly redundant input paths, as
- * can happen if there are multiple possibly usable indexes. (Another way
- * it can happen is that best_inner_indexscan will find the same OR join
- * clauses that create_or_index_quals has pulled OR restriction clauses
- * out of, and then both versions show up as duplicate paths.) We
- * consider an index redundant if any of its index conditions were already
- * used by earlier indexes. (We could use predicate_implied_by to have a
- * more intelligent, but much more expensive, check --- but in most cases
- * simple equality should suffice.)
+ * OR clauses. Moreover, it's completely impractical if there are a large
+ * number of paths, since the work would grow as O(2^N).
*
- * You might think the condition for redundancy should be "all index
- * conditions already used", not "any", but this turns out to be wrong.
- * For example, if we use an index on A, and then come to an index with
- * conditions on A and B, the only way that the second index can be later
- * in the selectivity-order sort is if the condition on B is completely
- * non-selective. In any case, we'd surely be drastically misestimating
- * the selectivity if we count the same condition twice.
+ * As a heuristic, we first check for paths using exactly the same
+ * sets of WHERE clauses + index predicate conditions, and reject all
+ * but the cheapest-to-scan in any such group. This primarily gets rid
+ * of indexes that include the interesting columns but also irrelevant
+ * columns. (In situations where the DBA has gone overboard on creating
+ * variant indexes, this can make for a very large reduction in the number
+ * of paths considered further.)
*
- * We must also consider index predicate conditions in checking for
- * redundancy, because the estimated selectivity of a partial index
- * includes its predicate even if the explicit index conditions don't.
- * Here we have to work harder than just checking expression equality:
- * we check to see if any of the predicate clauses are implied by
- * index conditions or predicate clauses of previous paths. This covers
- * cases such as a condition "x = 42" used with a plain index, followed
- * by a clauseless scan of a partial index "WHERE x >= 40 AND x < 50".
- * Also, we reject indexes that have a qual condition matching any
- * previously-used index's predicate (by including predicate conditions
- * into qualsofar). It should be sufficient to check equality in this
- * case, not implication, since we've sorted the paths by selectivity
- * and so tighter conditions are seen first --- only for exactly equal
- * cases might the partial index come first.
+ * We then sort the surviving paths with the cheapest-to-scan first,
+ * and for each path, consider using that path alone as the basis for
+ * a bitmap scan. Then we consider bitmap AND scans formed from that
+ * path plus each subsequent (higher-cost) path, adding on a subsequent
+ * path if it results in a reduction in the estimated total scan cost.
+ * This means we consider about O(N^2) rather than O(2^N) path
+ * combinations, which is quite tolerable, especially given than N is
+ * usually reasonably small because of the prefiltering step. The
+ * cheapest of these is returned.
*
- * XXX the reason we need all these redundancy checks is that costsize.c
- * and clausesel.c aren't very smart about redundant clauses: they will
- * usually double-count the redundant clauses, producing a too-small
- * selectivity that makes a redundant AND look like it reduces the total
- * cost. Perhaps someday that code will be smarter and we can remove
- * these heuristics.
+ * We will only consider AND combinations in which no two indexes use
+ * the same WHERE clause. This is a bit of a kluge: it's needed because
+ * costsize.c and clausesel.c aren't very smart about redundant clauses.
+ * They will usually double-count the redundant clauses, producing a
+ * too-small selectivity that makes a redundant AND step look like it
+ * reduces the total cost. Perhaps someday that code will be smarter and
+ * we can remove this limitation. (But note that this also defends
+ * against flat-out duplicate input paths, which can happen because
+ * best_inner_indexscan will find the same OR join clauses that
+ * create_or_index_quals has pulled OR restriction clauses out of.)
*
- * Note: outputting the selected sub-paths in selectivity order is a good
- * thing even if we weren't using that as part of the selection method,
- * because it makes the short-circuit case in MultiExecBitmapAnd() more
- * likely to apply.
+ * For the same reason, we reject AND combinations in which an index
+ * predicate clause duplicates another clause. Here we find it necessary
+ * to be even stricter: we'll reject a partial index if any of its
+ * predicate clauses are implied by the set of WHERE clauses and predicate
+ * clauses used so far. This covers cases such as a condition "x = 42"
+ * used with a plain index, followed by a clauseless scan of a partial
+ * index "WHERE x >= 40 AND x < 50". The partial index has been accepted
+ * only because "x = 42" was present, and so allowing it would partially
+ * double-count selectivity. (We could use predicate_implied_by on
+ * regular qual clauses too, to have a more intelligent, but much more
+ * expensive, check for redundancy --- but in most cases simple equality
+ * seems to suffice.)
*/
- /* Convert list to array so we can apply qsort */
- patharray = (Path **) palloc(npaths * sizeof(Path *));
- i = 0;
+ /*
+ * Extract clause usage info and detect any paths that use exactly
+ * the same set of clauses; keep only the cheapest-to-scan of any such
+ * groups. The surviving paths are put into an array for qsort'ing.
+ */
+ pathinfoarray = (PathClauseUsage **)
+ palloc(npaths * sizeof(PathClauseUsage *));
+ clauselist = NIL;
+ npaths = 0;
foreach(l, paths)
{
- patharray[i++] = (Path *) lfirst(l);
+ Path *ipath = (Path *) lfirst(l);
+
+ pathinfo = classify_index_clause_usage(ipath, &clauselist);
+ for (i = 0; i < npaths; i++)
+ {
+ if (bms_equal(pathinfo->clauseids, pathinfoarray[i]->clauseids))
+ break;
+ }
+ if (i < npaths)
+ {
+ /* duplicate clauseids, keep the cheaper one */
+ Cost ncost;
+ Cost ocost;
+ Selectivity nselec;
+ Selectivity oselec;
+
+ cost_bitmap_tree_node(pathinfo->path, &ncost, &nselec);
+ cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, &oselec);
+ if (ncost < ocost)
+ pathinfoarray[i] = pathinfo;
+ }
+ else
+ {
+ /* not duplicate clauseids, add to array */
+ pathinfoarray[npaths++] = pathinfo;
+ }
}
- qsort(patharray, npaths, sizeof(Path *), bitmap_path_comparator);
- paths = list_make1(patharray[0]);
- costsofar = bitmap_and_cost_est(root, rel, paths, outer_rel);
- find_indexpath_quals(patharray[0], &qualsofar, &firstpred);
- qualsofar = list_concat(qualsofar, firstpred);
- lastcell = list_head(paths); /* for quick deletions */
+ /* If only one surviving path, we're done */
+ if (npaths == 1)
+ return pathinfoarray[0]->path;
- for (i = 1; i < npaths; i++)
+ /* Sort the surviving paths by index access cost */
+ qsort(pathinfoarray, npaths, sizeof(PathClauseUsage *),
+ path_usage_comparator);
+
+ /*
+ * For each surviving index, consider it as an "AND group leader", and
+ * see whether adding on any of the later indexes results in an AND path
+ * with cheaper total cost than before. Then take the cheapest AND group.
+ */
+ for (i = 0; i < npaths; i++)
{
- Path *newpath = patharray[i];
- List *newqual;
- List *newpred;
- Cost newcost;
-
- find_indexpath_quals(newpath, &newqual, &newpred);
- if (lists_intersect(newqual, qualsofar))
- continue; /* consider it redundant */
- if (newpred)
+ Cost costsofar;
+ List *qualsofar;
+ Bitmapset *clauseidsofar;
+ ListCell *lastcell;
+
+ pathinfo = pathinfoarray[i];
+ paths = list_make1(pathinfo->path);
+ costsofar = bitmap_scan_cost_est(root, rel, pathinfo->path, outer_rel);
+ qualsofar = list_concat(list_copy(pathinfo->quals),
+ list_copy(pathinfo->preds));
+ clauseidsofar = bms_copy(pathinfo->clauseids);
+ lastcell = list_head(paths); /* for quick deletions */
+
+ for (j = i+1; j < npaths; j++)
{
- bool redundant = false;
+ Cost newcost;
- /* we check each predicate clause separately */
- foreach(l, newpred)
+ pathinfo = pathinfoarray[j];
+ /* Check for redundancy */
+ if (bms_overlap(pathinfo->clauseids, clauseidsofar))
+ continue; /* consider it redundant */
+ if (pathinfo->preds)
{
- Node *np = (Node *) lfirst(l);
+ bool redundant = false;
- if (predicate_implied_by(list_make1(np), qualsofar))
+ /* we check each predicate clause separately */
+ foreach(l, pathinfo->preds)
{
- redundant = true;
- break; /* out of inner loop */
+ Node *np = (Node *) lfirst(l);
+
+ if (predicate_implied_by(list_make1(np), qualsofar))
+ {
+ redundant = true;
+ break; /* out of inner foreach loop */
+ }
}
+ if (redundant)
+ continue;
}
- if (redundant)
- continue;
- }
- /* tentatively add newpath to paths, so we can estimate cost */
- paths = lappend(paths, newpath);
- newcost = bitmap_and_cost_est(root, rel, paths, outer_rel);
- if (newcost < costsofar)
- {
- /* keep newpath in paths, update subsidiary variables */
- costsofar = newcost;
- qualsofar = list_concat(list_concat(qualsofar, newqual), newpred);
- lastcell = lnext(lastcell);
+ /* tentatively add new path to paths, so we can estimate cost */
+ paths = lappend(paths, pathinfo->path);
+ newcost = bitmap_and_cost_est(root, rel, paths, outer_rel);
+ if (newcost < costsofar)
+ {
+ /* keep new path in paths, update subsidiary variables */
+ costsofar = newcost;
+ qualsofar = list_concat(qualsofar,
+ list_copy(pathinfo->quals));
+ qualsofar = list_concat(qualsofar,
+ list_copy(pathinfo->preds));
+ clauseidsofar = bms_add_members(clauseidsofar,
+ pathinfo->clauseids);
+ lastcell = lnext(lastcell);
+ }
+ else
+ {
+ /* reject new path, remove it from paths list */
+ paths = list_delete_cell(paths, lnext(lastcell), lastcell);
+ }
+ Assert(lnext(lastcell) == NULL);
}
- else
+
+ /* Keep the cheapest AND-group (or singleton) */
+ if (i == 0 || costsofar < bestcost)
{
- /* reject newpath, remove it from paths list */
- paths = list_delete_cell(paths, lnext(lastcell), lastcell);
+ bestpaths = paths;
+ bestcost = costsofar;
}
- Assert(lnext(lastcell) == NULL);
+
+ /* some easy cleanup (we don't try real hard though) */
+ list_free(qualsofar);
}
- if (list_length(paths) == 1)
- return (Path *) linitial(paths); /* no need for AND */
- return (Path *) create_bitmap_and_path(root, rel, paths);
+ if (list_length(bestpaths) == 1)
+ return (Path *) linitial(bestpaths); /* no need for AND */
+ return (Path *) create_bitmap_and_path(root, rel, bestpaths);
}
-/* qsort comparator to sort in increasing selectivity order */
+/* qsort comparator to sort in increasing index access cost order */
static int
-bitmap_path_comparator(const void *a, const void *b)
+path_usage_comparator(const void *a, const void *b)
{
- Path *pa = *(Path *const *) a;
- Path *pb = *(Path *const *) b;
+ PathClauseUsage *pa = *(PathClauseUsage *const *) a;
+ PathClauseUsage *pb = *(PathClauseUsage *const *) b;
Cost acost;
Cost bcost;
Selectivity aselec;
Selectivity bselec;
- cost_bitmap_tree_node(pa, &acost, &aselec);
- cost_bitmap_tree_node(pb, &bcost, &bselec);
+ cost_bitmap_tree_node(pa->path, &acost, &aselec);
+ cost_bitmap_tree_node(pb->path, &bcost, &bselec);
/*
- * If selectivities are the same, sort by cost. (Note: there used to be
- * logic here to do "fuzzy comparison", but that's a bad idea because it
- * fails to be transitive, which will confuse qsort terribly.)
+ * If costs are the same, sort by selectivity.
*/
- if (aselec < bselec)
+ if (acost < bcost)
return -1;
- if (aselec > bselec)
+ if (acost > bcost)
return 1;
- if (acost < bcost)
+ if (aselec < bselec)
return -1;
- if (acost > bcost)
+ if (aselec > bselec)
return 1;
return 0;
}
/*
- * Estimate the cost of actually executing a BitmapAnd with the given
+ * Estimate the cost of actually executing a bitmap scan with a single
+ * index path (no BitmapAnd, at least not at this level).
+ */
+static Cost
+bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel,
+ Path *ipath, RelOptInfo *outer_rel)
+{
+ Path bpath;
+
+ cost_bitmap_heap_scan(&bpath, root, rel, ipath, outer_rel);
+
+ return bpath.total_cost;
+}
+
+/*
+ * Estimate the cost of actually executing a BitmapAnd scan with the given
* inputs.
*/
static Cost
return bpath.total_cost;
}
+
+/*
+ * classify_index_clause_usage
+ * Construct a PathClauseUsage struct describing the WHERE clauses and
+ * index predicate clauses used by the given indexscan path.
+ * We consider two clauses the same if they are equal().
+ *
+ * At some point we might want to migrate this info into the Path data
+ * structure proper, but for the moment it's only needed within
+ * choose_bitmap_and().
+ *
+ * *clauselist is used and expanded as needed to identify all the distinct
+ * clauses seen across successive calls. Caller must initialize it to NIL
+ * before first call of a set.
+ */
+static PathClauseUsage *
+classify_index_clause_usage(Path *path, List **clauselist)
+{
+ PathClauseUsage *result;
+ Bitmapset *clauseids;
+ ListCell *lc;
+
+ result = (PathClauseUsage *) palloc(sizeof(PathClauseUsage));
+ result->path = path;
+
+ /* Recursively find the quals and preds used by the path */
+ result->quals = NIL;
+ result->preds = NIL;
+ find_indexpath_quals(path, &result->quals, &result->preds);
+
+ /* Build up a bitmapset representing the quals and preds */
+ clauseids = NULL;
+ foreach(lc, result->quals)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ clauseids = bms_add_member(clauseids,
+ find_list_position(node, clauselist));
+ }
+ foreach(lc, result->preds)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ clauseids = bms_add_member(clauseids,
+ find_list_position(node, clauselist));
+ }
+ result->clauseids = clauseids;
+
+ return result;
+}
+
+
/*
* find_indexpath_quals
*
* Given the Path structure for a plain or bitmap indexscan, extract lists
* of all the indexquals and index predicate conditions used in the Path.
+ * These are appended to the initial contents of *quals and *preds (hence
+ * caller should initialize those to NIL).
*
* This is sort of a simplified version of make_restrictinfo_from_bitmapqual;
* here, we are not trying to produce an accurate representation of the AND/OR
static void
find_indexpath_quals(Path *bitmapqual, List **quals, List **preds)
{
- ListCell *l;
-
- *quals = NIL;
- *preds = NIL;
-
if (IsA(bitmapqual, BitmapAndPath))
{
BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;
+ ListCell *l;
foreach(l, apath->bitmapquals)
{
- List *subquals;
- List *subpreds;
-
- find_indexpath_quals((Path *) lfirst(l), &subquals, &subpreds);
- *quals = list_concat(*quals, subquals);
- *preds = list_concat(*preds, subpreds);
+ find_indexpath_quals((Path *) lfirst(l), quals, preds);
}
}
else if (IsA(bitmapqual, BitmapOrPath))
{
BitmapOrPath *opath = (BitmapOrPath *) bitmapqual;
+ ListCell *l;
foreach(l, opath->bitmapquals)
{
- List *subquals;
- List *subpreds;
-
- find_indexpath_quals((Path *) lfirst(l), &subquals, &subpreds);
- *quals = list_concat(*quals, subquals);
- *preds = list_concat(*preds, subpreds);
+ find_indexpath_quals((Path *) lfirst(l), quals, preds);
}
}
else if (IsA(bitmapqual, IndexPath))
{
IndexPath *ipath = (IndexPath *) bitmapqual;
- *quals = get_actual_clauses(ipath->indexclauses);
- *preds = list_copy(ipath->indexinfo->indpred);
+ *quals = list_concat(*quals, get_actual_clauses(ipath->indexclauses));
+ *preds = list_concat(*preds, list_copy(ipath->indexinfo->indpred));
}
else
elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
/*
- * lists_intersect
- * Detect whether two lists have a nonempty intersection,
- * using equal() to compare members.
- *
- * This possibly should go into list.c, but it doesn't yet have any use
- * except in choose_bitmap_and.
+ * find_list_position
+ * Return the given node's position (counting from 0) in the given
+ * list of nodes. If it's not equal() to any existing list member,
+ * add it at the end, and return that position.
*/
-static bool
-lists_intersect(List *list1, List *list2)
+static int
+find_list_position(Node *node, List **nodelist)
{
- ListCell *cell1;
+ int i;
+ ListCell *lc;
- foreach(cell1, list1)
+ i = 0;
+ foreach(lc, *nodelist)
{
- void *datum1 = lfirst(cell1);
- ListCell *cell2;
+ Node *oldnode = (Node *) lfirst(lc);
- foreach(cell2, list2)
- {
- if (equal(lfirst(cell2), datum1))
- return true;
- }
+ if (equal(node, oldnode))
+ return i;
+ i++;
}
- return false;
+ *nodelist = lappend(*nodelist, node);
+
+ return i;
}