*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.172 2005/03/28 00:58:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.173 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
((opclass) == BOOL_BTREE_OPS_OID || (opclass) == BOOL_HASH_OPS_OID)
-static List *group_clauses_by_indexkey(IndexOptInfo *index);
static List *group_clauses_by_indexkey_for_join(Query *root,
IndexOptInfo *index,
Relids outer_relids,
static Relids indexable_outerrelids(IndexOptInfo *index);
static Path *make_innerjoin_index_path(Query *root, IndexOptInfo *index,
List *clausegroups);
-static bool match_index_to_operand(Node *operand, int indexcol,
- IndexOptInfo *index);
static bool match_boolean_index_clause(Node *clause, int indexcol,
IndexOptInfo *index);
static bool match_special_index_operator(Expr *clause, Oid opclass,
* clauses matching column C, because the executor couldn't use them anyway.
* Therefore, there are no empty sublists in the result.
*/
-static List *
+List *
group_clauses_by_indexkey(IndexOptInfo *index)
{
List *clausegroup_list = NIL;
* indexcol: the column number of the index (counting from 0)
* index: the index of interest
*/
-static bool
+bool
match_index_to_operand(Node *operand,
int indexcol,
IndexOptInfo *index)
# Makefile for optimizer/plan
#
# IDENTIFICATION
-# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.12 2003/11/29 19:51:50 pgsql Exp $
+# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.13 2005/04/11 23:06:55 tgl Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = createplan.o initsplan.o planmain.o planner.o setrefs.o subselect.o
+OBJS = createplan.o initsplan.o planagg.o planmain.o planner.o \
+ setrefs.o subselect.o
all: SUBSYS.o
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * planagg.c
+ * Special planning for aggregate queries.
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.1 2005/04/11 23:06:55 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/skey.h"
+#include "catalog/pg_aggregate.h"
+#include "catalog/pg_type.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/planmain.h"
+#include "optimizer/subselect.h"
+#include "parser/parsetree.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_expr.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+
+typedef struct
+{
+ Oid aggfnoid; /* pg_proc Oid of the aggregate */
+ Oid aggsortop; /* Oid of its sort operator */
+ Expr *target; /* expression we are aggregating on */
+ IndexPath *path; /* access path for index scan */
+ Cost pathcost; /* estimated cost to fetch first row */
+ Param *param; /* param for subplan's output */
+} MinMaxAggInfo;
+
+static bool find_minmax_aggs_walker(Node *node, List **context);
+static bool build_minmax_path(Query *root, RelOptInfo *rel,
+ MinMaxAggInfo *info);
+static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info,
+ IndexOptInfo *index, int indexcol);
+static void make_agg_subplan(Query *root, MinMaxAggInfo *info,
+ List *constant_quals);
+static Node *replace_aggs_with_params_mutator(Node *node, List **context);
+static Oid fetch_agg_sort_op(Oid aggfnoid);
+
+
+/*
+ * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
+ *
+ * This checks to see if we can replace MIN/MAX aggregate functions by
+ * subqueries of the form
+ * (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
+ * Given a suitable index on tab.col, this can be much faster than the
+ * generic scan-all-the-rows plan.
+ *
+ * We are passed the Query, the preprocessed tlist, and the best path
+ * devised for computing the input of a standard Agg node. If we are able
+ * to optimize all the aggregates, and the result is estimated to be cheaper
+ * than the generic aggregate method, then generate and return a Plan that
+ * does it that way. Otherwise, return NULL.
+ */
+Plan *
+optimize_minmax_aggregates(Query *root, List *tlist, Path *best_path)
+{
+ RangeTblRef *rtr;
+ RangeTblEntry *rte;
+ RelOptInfo *rel;
+ List *aggs_list;
+ ListCell *l;
+ Cost total_cost;
+ Path agg_p;
+ Plan *plan;
+ Node *hqual;
+ QualCost tlist_cost;
+ List *constant_quals;
+
+ /* Nothing to do if query has no aggregates */
+ if (!root->hasAggs)
+ return NULL;
+
+ Assert(!root->setOperations); /* shouldn't get here if a setop */
+ Assert(root->rowMarks == NIL); /* nor if FOR UPDATE */
+
+ /*
+ * Reject unoptimizable cases.
+ *
+ * We don't handle GROUP BY, because our current implementations of
+ * grouping require looking at all the rows anyway, and so there's not
+ * much point in optimizing MIN/MAX.
+ */
+ if (root->groupClause)
+ return NULL;
+
+ /*
+ * We also restrict the query to reference exactly one table, since
+ * join conditions can't be handled reasonably. (We could perhaps
+ * handle a query containing cartesian-product joins, but it hardly
+ * seems worth the trouble.)
+ */
+ Assert(root->jointree != NULL && IsA(root->jointree, FromExpr));
+ if (list_length(root->jointree->fromlist) != 1)
+ return NULL;
+ rtr = (RangeTblRef *) linitial(root->jointree->fromlist);
+ if (!IsA(rtr, RangeTblRef))
+ return NULL;
+ rte = rt_fetch(rtr->rtindex, root->rtable);
+ if (rte->rtekind != RTE_RELATION)
+ return NULL;
+ rel = find_base_rel(root, rtr->rtindex);
+
+ /*
+ * Also reject cases with subplans or volatile functions in WHERE.
+ * This may be overly paranoid, but it's not entirely clear if the
+ * transformation is safe then.
+ */
+ if (contain_subplans(root->jointree->quals) ||
+ contain_volatile_functions(root->jointree->quals))
+ return NULL;
+
+ /*
+ * Since this optimization is not applicable all that often, we want
+ * to fall out before doing very much work if possible. Therefore
+ * we do the work in several passes. The first pass scans the tlist
+ * and HAVING qual to find all the aggregates and verify that
+ * each of them is a MIN/MAX aggregate. If that succeeds, the second
+ * pass looks at each aggregate to see if it is optimizable; if so
+ * we make an IndexPath describing how we would scan it. (We do not
+ * try to optimize if only some aggs are optimizable, since that means
+ * we'll have to scan all the rows anyway.) If that succeeds, we have
+ * enough info to compare costs against the generic implementation.
+ * Only if that test passes do we build a Plan.
+ */
+
+ /* Pass 1: find all the aggregates */
+ aggs_list = NIL;
+ if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
+ return NULL;
+ if (find_minmax_aggs_walker(root->havingQual, &aggs_list))
+ return NULL;
+
+ /* Pass 2: see if each one is optimizable */
+ total_cost = 0;
+ foreach(l, aggs_list)
+ {
+ MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
+
+ if (!build_minmax_path(root, rel, info))
+ return NULL;
+ total_cost += info->pathcost;
+ }
+
+ /*
+ * Make the cost comparison.
+ *
+ * Note that we don't include evaluation cost of the tlist here;
+ * this is OK since it isn't included in best_path's cost either,
+ * and should be the same in either case.
+ */
+ cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list),
+ 0, 0,
+ best_path->startup_cost, best_path->total_cost,
+ best_path->parent->rows);
+
+ if (total_cost > agg_p.total_cost)
+ return NULL; /* too expensive */
+
+ /*
+ * OK, we are going to generate an optimized plan. The first thing we
+ * need to do is look for any non-variable WHERE clauses that query_planner
+ * might have removed from the basic plan. (Normal WHERE clauses will
+ * be properly incorporated into the sub-plans by create_plan.) If there
+ * are any, they will be in a gating Result node atop the best_path.
+ * They have to be incorporated into a gating Result in each sub-plan
+ * in order to produce the semantically correct result.
+ */
+ if (IsA(best_path, ResultPath))
+ {
+ Assert(((ResultPath *) best_path)->subpath != NULL);
+ constant_quals = ((ResultPath *) best_path)->constantqual;
+ }
+ else
+ constant_quals = NIL;
+
+ /* Pass 3: generate subplans and output Param nodes */
+ foreach(l, aggs_list)
+ {
+ make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l), constant_quals);
+ }
+
+ /*
+ * Modify the targetlist and HAVING qual to reference subquery outputs
+ */
+ tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist,
+ &aggs_list);
+ hqual = replace_aggs_with_params_mutator(root->havingQual,
+ &aggs_list);
+
+ /*
+ * Generate the output plan --- basically just a Result
+ */
+ plan = (Plan *) make_result(tlist, hqual, NULL);
+
+ /* Account for evaluation cost of the tlist (make_result did the rest) */
+ cost_qual_eval(&tlist_cost, tlist);
+ plan->startup_cost += tlist_cost.startup;
+ plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple;
+
+ return plan;
+}
+
+/*
+ * find_minmax_aggs_walker
+ * Recursively scan the Aggref nodes in an expression tree, and check
+ * that each one is a MIN/MAX aggregate. If so, build a list of the
+ * distinct aggregate calls in the tree.
+ *
+ * Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise.
+ * (This seemingly-backward definition is used because expression_tree_walker
+ * aborts the scan on TRUE return, which is what we want.)
+ *
+ * Found aggregates are added to the list at *context; it's up to the caller
+ * to initialize the list to NIL.
+ *
+ * This does not descend into subqueries, and so should be used only after
+ * reduction of sublinks to subplans. There mustn't be outer-aggregate
+ * references either.
+ */
+static bool
+find_minmax_aggs_walker(Node *node, List **context)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, Aggref))
+ {
+ Aggref *aggref = (Aggref *) node;
+ Oid aggsortop;
+ MinMaxAggInfo *info;
+ ListCell *l;
+
+ Assert(aggref->agglevelsup == 0);
+ if (aggref->aggstar)
+ return true; /* foo(*) is surely not optimizable */
+ /* note: we do not care if DISTINCT is mentioned ... */
+
+ aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
+ if (!OidIsValid(aggsortop))
+ return true; /* not a MIN/MAX aggregate */
+
+ /*
+ * Check whether it's already in the list, and add it if not.
+ */
+ foreach(l, *context)
+ {
+ info = (MinMaxAggInfo *) lfirst(l);
+ if (info->aggfnoid == aggref->aggfnoid &&
+ equal(info->target, aggref->target))
+ return false;
+ }
+
+ info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo));
+ info->aggfnoid = aggref->aggfnoid;
+ info->aggsortop = aggsortop;
+ info->target = aggref->target;
+
+ *context = lappend(*context, info);
+
+ /*
+ * We need not recurse into the argument, since it can't contain
+ * any aggregates.
+ */
+ return false;
+ }
+ Assert(!IsA(node, SubLink));
+ return expression_tree_walker(node, find_minmax_aggs_walker,
+ (void *) context);
+}
+
+/*
+ * build_minmax_path
+ * Given a MIN/MAX aggregate, try to find an index it can be optimized
+ * with. Build a Path describing the best such index path.
+ *
+ * Returns TRUE if successful, FALSE if not. In the TRUE case, info->path
+ * is filled in.
+ *
+ * XXX look at sharing more code with indxpath.c.
+ *
+ * Note: check_partial_indexes() must have been run previously.
+ */
+static bool
+build_minmax_path(Query *root, RelOptInfo *rel, MinMaxAggInfo *info)
+{
+ IndexPath *best_path = NULL;
+ Cost best_cost = 0;
+ ListCell *l;
+
+ foreach(l, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
+ ScanDirection indexscandir = NoMovementScanDirection;
+ int indexcol;
+ int prevcol;
+ List *restrictclauses;
+ IndexPath *new_path;
+ Cost new_cost;
+
+ /* Ignore non-btree indexes */
+ if (index->relam != BTREE_AM_OID)
+ continue;
+
+ /* Ignore partial indexes that do not match the query */
+ if (index->indpred != NIL && !index->predOK)
+ continue;
+
+ /*
+ * Look for a match to one of the index columns. (In a stupidly
+ * designed index, there could be multiple matches, but we only
+ * care about the first one.)
+ */
+ for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+ {
+ indexscandir = match_agg_to_index_col(info, index, indexcol);
+ if (!ScanDirectionIsNoMovement(indexscandir))
+ break;
+ }
+ if (ScanDirectionIsNoMovement(indexscandir))
+ continue;
+
+ /*
+ * If the match is not at the first index column, we have to verify
+ * that there are "x = something" restrictions on all the earlier
+ * index columns. Since we'll need the restrictclauses list anyway
+ * to build the path, it's convenient to extract that first and then
+ * look through it for the equality restrictions.
+ */
+ restrictclauses = group_clauses_by_indexkey(index);
+
+ if (list_length(restrictclauses) < indexcol)
+ continue; /* definitely haven't got enough */
+ for (prevcol = 0; prevcol < indexcol; prevcol++)
+ {
+ List *rinfos = (List *) list_nth(restrictclauses, prevcol);
+ ListCell *ll;
+
+ foreach(ll, rinfos)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(ll);
+ int strategy;
+
+ Assert(is_opclause(rinfo->clause));
+ strategy =
+ get_op_opclass_strategy(((OpExpr *) rinfo->clause)->opno,
+ index->classlist[prevcol]);
+ if (strategy == BTEqualStrategyNumber)
+ break;
+ }
+ if (ll == NULL)
+ break; /* none are Equal for this index col */
+ }
+ if (prevcol < indexcol)
+ continue; /* didn't find all Equal clauses */
+
+ /*
+ * Build the access path. We don't bother marking it with pathkeys.
+ */
+ new_path = create_index_path(root, index,
+ restrictclauses,
+ NIL,
+ indexscandir);
+
+ /*
+ * Estimate actual cost of fetching just one row.
+ */
+ if (new_path->rows > 1.0)
+ new_cost = new_path->path.startup_cost +
+ (new_path->path.total_cost - new_path->path.startup_cost)
+ * 1.0 / new_path->rows;
+ else
+ new_cost = new_path->path.total_cost;
+
+ /*
+ * Keep if first or if cheaper than previous best.
+ */
+ if (best_path == NULL || new_cost < best_cost)
+ {
+ best_path = new_path;
+ best_cost = new_cost;
+ }
+ }
+
+ info->path = best_path;
+ info->pathcost = best_cost;
+ return (best_path != NULL);
+}
+
+/*
+ * match_agg_to_index_col
+ * Does an aggregate match an index column?
+ *
+ * It matches if its argument is equal to the index column's data and its
+ * sortop is either the LessThan or GreaterThan member of the column's opclass.
+ *
+ * We return ForwardScanDirection if match the LessThan member,
+ * BackwardScanDirection if match the GreaterThan member,
+ * and NoMovementScanDirection if there's no match.
+ */
+static ScanDirection
+match_agg_to_index_col(MinMaxAggInfo *info, IndexOptInfo *index, int indexcol)
+{
+ int strategy;
+
+ /* Check for data match */
+ if (!match_index_to_operand((Node *) info->target, indexcol, index))
+ return NoMovementScanDirection;
+
+ /* Look up the operator in the opclass */
+ strategy = get_op_opclass_strategy(info->aggsortop,
+ index->classlist[indexcol]);
+ if (strategy == BTLessStrategyNumber)
+ return ForwardScanDirection;
+ if (strategy == BTGreaterStrategyNumber)
+ return BackwardScanDirection;
+ return NoMovementScanDirection;
+}
+
+/*
+ * Construct a suitable plan for a converted aggregate query
+ */
+static void
+make_agg_subplan(Query *root, MinMaxAggInfo *info, List *constant_quals)
+{
+ Query *subquery;
+ Path *path;
+ Plan *plan;
+ TargetEntry *tle;
+ SortClause *sortcl;
+
+ /*
+ * Generate a suitably modified Query node. Much of the work here is
+ * probably unnecessary in the normal case, but we want to make it look
+ * good if someone tries to EXPLAIN the result.
+ */
+ subquery = (Query *) copyObject(root);
+ subquery->commandType = CMD_SELECT;
+ subquery->resultRelation = 0;
+ subquery->resultRelations = NIL;
+ subquery->into = NULL;
+ subquery->hasAggs = false;
+ subquery->groupClause = NIL;
+ subquery->havingQual = NULL;
+ subquery->hasHavingQual = false;
+ subquery->distinctClause = NIL;
+
+ /* single tlist entry that is the aggregate target */
+ tle = makeTargetEntry(copyObject(info->target),
+ 1,
+ pstrdup("agg_target"),
+ false);
+ subquery->targetList = list_make1(tle);
+
+ /* set up the appropriate ORDER BY entry */
+ sortcl = makeNode(SortClause);
+ sortcl->tleSortGroupRef = assignSortGroupRef(tle, subquery->targetList);
+ sortcl->sortop = info->aggsortop;
+ subquery->sortClause = list_make1(sortcl);
+
+ /* set up LIMIT 1 */
+ subquery->limitOffset = NULL;
+ subquery->limitCount = (Node *) makeConst(INT4OID, sizeof(int4),
+ Int32GetDatum(1),
+ false, true);
+
+ /*
+ * Generate the plan for the subquery. We already have a Path for
+ * the basic indexscan, but we have to convert it to a Plan and
+ * attach a LIMIT node above it. We might need a gating Result, too,
+ * which is most easily added at the Path stage.
+ */
+ path = (Path *) info->path;
+
+ if (constant_quals)
+ path = (Path *) create_result_path(NULL,
+ path,
+ copyObject(constant_quals));
+
+ plan = create_plan(subquery, path);
+
+ plan->targetlist = copyObject(subquery->targetList);
+
+ plan = (Plan *) make_limit(plan,
+ subquery->limitOffset,
+ subquery->limitCount);
+
+ /*
+ * Convert the plan into an InitPlan, and make a Param for its result.
+ */
+ info->param = SS_make_initplan_from_plan(subquery, plan,
+ exprType((Node *) tle->expr),
+ -1);
+}
+
+/*
+ * Replace original aggregate calls with subplan output Params
+ */
+static Node *
+replace_aggs_with_params_mutator(Node *node, List **context)
+{
+ if (node == NULL)
+ return NULL;
+ if (IsA(node, Aggref))
+ {
+ Aggref *aggref = (Aggref *) node;
+ ListCell *l;
+
+ foreach(l, *context)
+ {
+ MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
+
+ if (info->aggfnoid == aggref->aggfnoid &&
+ equal(info->target, aggref->target))
+ return (Node *) info->param;
+ }
+ elog(ERROR, "failed to re-find aggregate info record");
+ }
+ Assert(!IsA(node, SubLink));
+ return expression_tree_mutator(node, replace_aggs_with_params_mutator,
+ (void *) context);
+}
+
+/*
+ * Get the OID of the sort operator, if any, associated with an aggregate.
+ * Returns InvalidOid if there is no such operator.
+ */
+static Oid
+fetch_agg_sort_op(Oid aggfnoid)
+{
+#ifdef NOT_YET
+ HeapTuple aggTuple;
+ Form_pg_aggregate aggform;
+ Oid aggsortop;
+
+ /* fetch aggregate entry from pg_aggregate */
+ aggTuple = SearchSysCache(AGGFNOID,
+ ObjectIdGetDatum(aggfnoid),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(aggTuple))
+ return InvalidOid;
+ aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+ aggsortop = aggform->aggsortop;
+ ReleaseSysCache(aggTuple);
+
+ return aggsortop;
+#else
+ /*
+ * XXX stub implementation for testing: hardwire a few cases.
+ */
+ if (aggfnoid == 2132) /* min(int4) -> int4lt */
+ return 97;
+ if (aggfnoid == 2116) /* max(int4) -> int4gt */
+ return 521;
+ if (aggfnoid == 2145) /* min(text) -> text_lt */
+ return 664;
+ if (aggfnoid == 2129) /* max(text) -> text_gt */
+ return 666;
+ return InvalidOid;
+#endif
+}
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.183 2005/04/10 19:50:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.184 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* If any subplans were generated, or if we're inside a subplan, build
- * initPlan list and extParam/allParam sets for plan nodes.
+ * initPlan list and extParam/allParam sets for plan nodes, and attach
+ * the initPlans to the top plan node.
*/
if (PlannerPlanId != saved_planid || PlannerQueryLevel > 1)
- {
- Cost initplan_cost = 0;
-
- /* Prepare extParam/allParam sets for all nodes in tree */
SS_finalize_plan(plan, parse->rtable);
- /*
- * SS_finalize_plan doesn't handle initPlans, so we have to
- * manually attach them to the topmost plan node, and add their
- * extParams to the topmost node's, too.
- *
- * We also add the total_cost of each initPlan to the startup cost of
- * the top node. This is a conservative overestimate, since in
- * fact each initPlan might be executed later than plan startup,
- * or even not at all.
- */
- plan->initPlan = PlannerInitPlan;
-
- foreach(l, plan->initPlan)
- {
- SubPlan *initplan = (SubPlan *) lfirst(l);
-
- plan->extParam = bms_add_members(plan->extParam,
- initplan->plan->extParam);
- /* allParam must include all members of extParam */
- plan->allParam = bms_add_members(plan->allParam,
- plan->extParam);
- initplan_cost += initplan->plan->total_cost;
- }
-
- plan->startup_cost += initplan_cost;
- plan->total_cost += initplan_cost;
- }
-
/* Return to outer subquery context */
PlannerQueryLevel--;
PlannerInitPlan = saved_initplan;
double sub_tuple_fraction;
Path *cheapest_path;
Path *sorted_path;
+ Path *best_path;
double dNumGroups = 0;
long numGroups = 0;
AggClauseCounts agg_counts;
}
/*
- * Select the best path and create a plan to execute it.
- *
- * If we are doing hashed grouping, we will always read all the input
- * tuples, so use the cheapest-total path. Otherwise, trust
- * query_planner's decision about which to use.
+ * Select the best path. If we are doing hashed grouping, we will
+ * always read all the input tuples, so use the cheapest-total
+ * path. Otherwise, trust query_planner's decision about which to use.
*/
- if (sorted_path && !use_hashed_grouping)
- {
- result_plan = create_plan(parse, sorted_path);
- current_pathkeys = sorted_path->pathkeys;
- }
+ if (use_hashed_grouping || !sorted_path)
+ best_path = cheapest_path;
else
- {
- result_plan = create_plan(parse, cheapest_path);
- current_pathkeys = cheapest_path->pathkeys;
- }
+ best_path = sorted_path;
/*
- * create_plan() returns a plan with just a "flat" tlist of
- * required Vars. Usually we need to insert the sub_tlist as the
- * tlist of the top plan node. However, we can skip that if we
- * determined that whatever query_planner chose to return will be
- * good enough.
+ * Check to see if it's possible to optimize MIN/MAX aggregates.
+ * If so, we will forget all the work we did so far to choose a
+ * "regular" path ... but we had to do it anyway to be able to
+ * tell which way is cheaper.
*/
- if (need_tlist_eval)
+ result_plan = optimize_minmax_aggregates(parse,
+ tlist,
+ best_path);
+ if (result_plan != NULL)
+ {
+ /*
+ * optimize_minmax_aggregates generated the full plan, with
+ * the right tlist, and it has no sort order.
+ */
+ current_pathkeys = NIL;
+ }
+ else
{
/*
- * If the top-level plan node is one that cannot do expression
- * evaluation, we must insert a Result node to project the
- * desired tlist.
+ * Normal case --- create a plan according to query_planner's
+ * results.
*/
- if (!is_projection_capable_plan(result_plan))
+ result_plan = create_plan(parse, best_path);
+ current_pathkeys = best_path->pathkeys;
+
+ /*
+ * create_plan() returns a plan with just a "flat" tlist of
+ * required Vars. Usually we need to insert the sub_tlist as the
+ * tlist of the top plan node. However, we can skip that if we
+ * determined that whatever query_planner chose to return will be
+ * good enough.
+ */
+ if (need_tlist_eval)
{
- result_plan = (Plan *) make_result(sub_tlist, NULL,
- result_plan);
+ /*
+ * If the top-level plan node is one that cannot do expression
+ * evaluation, we must insert a Result node to project the
+ * desired tlist.
+ */
+ if (!is_projection_capable_plan(result_plan))
+ {
+ result_plan = (Plan *) make_result(sub_tlist, NULL,
+ result_plan);
+ }
+ else
+ {
+ /*
+ * Otherwise, just replace the subplan's flat tlist with
+ * the desired tlist.
+ */
+ result_plan->targetlist = sub_tlist;
+ }
+
+ /*
+ * Also, account for the cost of evaluation of the sub_tlist.
+ *
+ * Up to now, we have only been dealing with "flat" tlists,
+ * containing just Vars. So their evaluation cost is zero
+ * according to the model used by cost_qual_eval() (or if you
+ * prefer, the cost is factored into cpu_tuple_cost). Thus we
+ * can avoid accounting for tlist cost throughout
+ * query_planner() and subroutines. But now we've inserted a
+ * tlist that might contain actual operators, sub-selects, etc
+ * --- so we'd better account for its cost.
+ *
+ * Below this point, any tlist eval cost for added-on nodes
+ * should be accounted for as we create those nodes.
+ * Presently, of the node types we can add on, only Agg and
+ * Group project new tlists (the rest just copy their input
+ * tuples) --- so make_agg() and make_group() are responsible
+ * for computing the added cost.
+ */
+ cost_qual_eval(&tlist_cost, sub_tlist);
+ result_plan->startup_cost += tlist_cost.startup;
+ result_plan->total_cost += tlist_cost.startup +
+ tlist_cost.per_tuple * result_plan->plan_rows;
}
else
{
/*
- * Otherwise, just replace the subplan's flat tlist with
- * the desired tlist.
+ * Since we're using query_planner's tlist and not the one
+ * make_subplanTargetList calculated, we have to refigure any
+ * grouping-column indexes make_subplanTargetList computed.
*/
- result_plan->targetlist = sub_tlist;
+ locate_grouping_columns(parse, tlist, result_plan->targetlist,
+ groupColIdx);
}
/*
- * Also, account for the cost of evaluation of the sub_tlist.
- *
- * Up to now, we have only been dealing with "flat" tlists,
- * containing just Vars. So their evaluation cost is zero
- * according to the model used by cost_qual_eval() (or if you
- * prefer, the cost is factored into cpu_tuple_cost). Thus we
- * can avoid accounting for tlist cost throughout
- * query_planner() and subroutines. But now we've inserted a
- * tlist that might contain actual operators, sub-selects, etc
- * --- so we'd better account for its cost.
+ * Insert AGG or GROUP node if needed, plus an explicit sort step
+ * if necessary.
*
- * Below this point, any tlist eval cost for added-on nodes
- * should be accounted for as we create those nodes.
- * Presently, of the node types we can add on, only Agg and
- * Group project new tlists (the rest just copy their input
- * tuples) --- so make_agg() and make_group() are responsible
- * for computing the added cost.
- */
- cost_qual_eval(&tlist_cost, sub_tlist);
- result_plan->startup_cost += tlist_cost.startup;
- result_plan->total_cost += tlist_cost.startup +
- tlist_cost.per_tuple * result_plan->plan_rows;
- }
- else
- {
- /*
- * Since we're using query_planner's tlist and not the one
- * make_subplanTargetList calculated, we have to refigure any
- * grouping-column indexes make_subplanTargetList computed.
+ * HAVING clause, if any, becomes qual of the Agg or Group node.
*/
- locate_grouping_columns(parse, tlist, result_plan->targetlist,
- groupColIdx);
- }
+ if (use_hashed_grouping)
+ {
+ /* Hashed aggregate plan --- no sort needed */
+ result_plan = (Plan *) make_agg(parse,
+ tlist,
+ (List *) parse->havingQual,
+ AGG_HASHED,
+ numGroupCols,
+ groupColIdx,
+ numGroups,
+ agg_counts.numAggs,
+ result_plan);
+ /* Hashed aggregation produces randomly-ordered results */
+ current_pathkeys = NIL;
+ }
+ else if (parse->hasAggs)
+ {
+ /* Plain aggregate plan --- sort if needed */
+ AggStrategy aggstrategy;
- /*
- * Insert AGG or GROUP node if needed, plus an explicit sort step
- * if necessary.
- *
- * HAVING clause, if any, becomes qual of the Agg or Group node.
- */
- if (use_hashed_grouping)
- {
- /* Hashed aggregate plan --- no sort needed */
- result_plan = (Plan *) make_agg(parse,
- tlist,
- (List *) parse->havingQual,
- AGG_HASHED,
- numGroupCols,
- groupColIdx,
- numGroups,
- agg_counts.numAggs,
- result_plan);
- /* Hashed aggregation produces randomly-ordered results */
- current_pathkeys = NIL;
- }
- else if (parse->hasAggs)
- {
- /* Plain aggregate plan --- sort if needed */
- AggStrategy aggstrategy;
+ if (parse->groupClause)
+ {
+ if (!pathkeys_contained_in(group_pathkeys,
+ current_pathkeys))
+ {
+ result_plan = (Plan *)
+ make_sort_from_groupcols(parse,
+ parse->groupClause,
+ groupColIdx,
+ result_plan);
+ current_pathkeys = group_pathkeys;
+ }
+ aggstrategy = AGG_SORTED;
- if (parse->groupClause)
+ /*
+ * The AGG node will not change the sort ordering of its
+ * groups, so current_pathkeys describes the result too.
+ */
+ }
+ else
+ {
+ aggstrategy = AGG_PLAIN;
+ /* Result will be only one row anyway; no sort order */
+ current_pathkeys = NIL;
+ }
+
+ result_plan = (Plan *) make_agg(parse,
+ tlist,
+ (List *) parse->havingQual,
+ aggstrategy,
+ numGroupCols,
+ groupColIdx,
+ numGroups,
+ agg_counts.numAggs,
+ result_plan);
+ }
+ else if (parse->groupClause)
{
+ /*
+ * GROUP BY without aggregation, so insert a group node (plus
+ * the appropriate sort node, if necessary).
+ *
+ * Add an explicit sort if we couldn't make the path come
+ * out the way the GROUP node needs it.
+ */
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
{
result_plan = (Plan *)
result_plan);
current_pathkeys = group_pathkeys;
}
- aggstrategy = AGG_SORTED;
- /*
- * The AGG node will not change the sort ordering of its
- * groups, so current_pathkeys describes the result too.
- */
+ result_plan = (Plan *) make_group(parse,
+ tlist,
+ (List *) parse->havingQual,
+ numGroupCols,
+ groupColIdx,
+ dNumGroups,
+ result_plan);
+ /* The Group node won't change sort ordering */
}
- else
+ else if (parse->hasHavingQual)
{
- aggstrategy = AGG_PLAIN;
- /* Result will be only one row anyway; no sort order */
- current_pathkeys = NIL;
- }
-
- result_plan = (Plan *) make_agg(parse,
- tlist,
- (List *) parse->havingQual,
- aggstrategy,
- numGroupCols,
- groupColIdx,
- numGroups,
- agg_counts.numAggs,
- result_plan);
- }
- else if (parse->groupClause)
- {
- /*
- * GROUP BY without aggregation, so insert a group node (plus the
- * appropriate sort node, if necessary).
- *
- * Add an explicit sort if we couldn't make the path come
- * out the way the GROUP node needs it.
- */
- if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
- {
- result_plan = (Plan *)
- make_sort_from_groupcols(parse,
- parse->groupClause,
- groupColIdx,
- result_plan);
- current_pathkeys = group_pathkeys;
+ /*
+ * No aggregates, and no GROUP BY, but we have a HAVING qual.
+ * This is a degenerate case in which we are supposed to emit
+ * either 0 or 1 row depending on whether HAVING succeeds.
+ * Furthermore, there cannot be any variables in either HAVING
+ * or the targetlist, so we actually do not need the FROM table
+ * at all! We can just throw away the plan-so-far and generate
+ * a Result node. This is a sufficiently unusual corner case
+ * that it's not worth contorting the structure of this routine
+ * to avoid having to generate the plan in the first place.
+ */
+ result_plan = (Plan *) make_result(tlist,
+ parse->havingQual,
+ NULL);
}
-
- result_plan = (Plan *) make_group(parse,
- tlist,
- (List *) parse->havingQual,
- numGroupCols,
- groupColIdx,
- dNumGroups,
- result_plan);
- /* The Group node won't change sort ordering */
- }
- else if (parse->hasHavingQual)
- {
- /*
- * No aggregates, and no GROUP BY, but we have a HAVING qual.
- * This is a degenerate case in which we are supposed to emit
- * either 0 or 1 row depending on whether HAVING succeeds.
- * Furthermore, there cannot be any variables in either HAVING
- * or the targetlist, so we actually do not need the FROM table
- * at all! We can just throw away the plan-so-far and generate
- * a Result node. This is a sufficiently unusual corner case
- * that it's not worth contorting the structure of this routine
- * to avoid having to generate the plan in the first place.
- */
- result_plan = (Plan *) make_result(tlist,
- parse->havingQual,
- NULL);
- }
+ } /* end of non-minmax-aggregate case */
} /* end of if (setOperations) */
/*
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.95 2005/04/06 16:34:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.96 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* SS_finalize_plan - do final sublink processing for a completed Plan.
*
- * This recursively computes the extParam and allParam sets
- * for every Plan node in the given plan tree.
+ * This recursively computes the extParam and allParam sets for every Plan
+ * node in the given plan tree. It also attaches any generated InitPlans
+ * to the top plan node.
*/
void
SS_finalize_plan(Plan *plan, List *rtable)
{
Bitmapset *outer_params = NULL;
Bitmapset *valid_params = NULL;
+ Cost initplan_cost = 0;
int paramid;
ListCell *l;
bms_free(outer_params);
bms_free(valid_params);
+
+ /*
+ * Finally, attach any initPlans to the topmost plan node,
+ * and add their extParams to the topmost node's, too.
+ *
+ * We also add the total_cost of each initPlan to the startup cost of
+ * the top node. This is a conservative overestimate, since in
+ * fact each initPlan might be executed later than plan startup,
+ * or even not at all.
+ */
+ plan->initPlan = PlannerInitPlan;
+ PlannerInitPlan = NIL; /* make sure they're not attached twice */
+
+ foreach(l, plan->initPlan)
+ {
+ SubPlan *initplan = (SubPlan *) lfirst(l);
+
+ plan->extParam = bms_add_members(plan->extParam,
+ initplan->plan->extParam);
+ /* allParam must include all members of extParam */
+ plan->allParam = bms_add_members(plan->allParam,
+ plan->extParam);
+ initplan_cost += initplan->plan->total_cost;
+ }
+
+ plan->startup_cost += initplan_cost;
+ plan->total_cost += initplan_cost;
}
/*
return expression_tree_walker(node, finalize_primnode,
(void *) context);
}
+
+/*
+ * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
+ *
+ * The plan is expected to return a scalar value of the indicated type.
+ * We build an EXPR_SUBLINK SubPlan node and put it into the initplan
+ * list for the current query level. A Param that represents the initplan's
+ * output is returned.
+ *
+ * We assume the plan hasn't been put through SS_finalize_plan.
+ */
+Param *
+SS_make_initplan_from_plan(Query *root, Plan *plan,
+ Oid resulttype, int32 resulttypmod)
+{
+ List *saved_initplan = PlannerInitPlan;
+ SubPlan *node;
+ Param *prm;
+ Bitmapset *tmpset;
+ int paramid;
+
+ /*
+ * Set up for a new level of subquery. This is just to keep
+ * SS_finalize_plan from becoming confused.
+ */
+ PlannerQueryLevel++;
+ PlannerInitPlan = NIL;
+
+ /*
+ * Build extParam/allParam sets for plan nodes.
+ */
+ SS_finalize_plan(plan, root->rtable);
+
+ /* Return to outer subquery context */
+ PlannerQueryLevel--;
+ PlannerInitPlan = saved_initplan;
+
+ /*
+ * Create a SubPlan node and add it to the outer list of InitPlans.
+ */
+ node = makeNode(SubPlan);
+ node->subLinkType = EXPR_SUBLINK;
+ node->plan = plan;
+ node->plan_id = PlannerPlanId++; /* Assign unique ID to this
+ * SubPlan */
+
+ node->rtable = root->rtable;
+
+ PlannerInitPlan = lappend(PlannerInitPlan, node);
+
+ /*
+ * Make parParam list of params that current query level will pass to
+ * this child plan. (In current usage there probably aren't any.)
+ */
+ tmpset = bms_copy(plan->extParam);
+ while ((paramid = bms_first_member(tmpset)) >= 0)
+ {
+ PlannerParamItem *pitem = list_nth(PlannerParamList, paramid);
+
+ if (pitem->abslevel == PlannerQueryLevel)
+ node->parParam = lappend_int(node->parParam, paramid);
+ }
+ bms_free(tmpset);
+
+ /*
+ * Make a Param that will be the subplan's output.
+ */
+ prm = generate_new_param(resulttype, resulttypmod);
+ node->setParam = list_make1_int(prm->paramid);
+
+ return prm;
+}
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.122 2005/03/31 22:46:14 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.123 2005/04/11 23:06:56 tgl Exp $
*
* NOTES
* Eventually, the index information should go through here, too.
0, 0);
}
+/*
+ * get_op_opclass_strategy
+ *
+ * Get the operator's strategy number within the specified opclass,
+ * or 0 if it's not a member of the opclass.
+ */
+int
+get_op_opclass_strategy(Oid opno, Oid opclass)
+{
+ HeapTuple tp;
+ Form_pg_amop amop_tup;
+ int result;
+
+ tp = SearchSysCache(AMOPOPID,
+ ObjectIdGetDatum(opno),
+ ObjectIdGetDatum(opclass),
+ 0, 0);
+ if (!HeapTupleIsValid(tp))
+ return 0;
+ amop_tup = (Form_pg_amop) GETSTRUCT(tp);
+ result = amop_tup->amopstrategy;
+ ReleaseSysCache(tp);
+ return result;
+}
+
/*
* get_op_opclass_properties
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.80 2005/03/27 06:29:49 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.81 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern void create_index_paths(Query *root, RelOptInfo *rel);
extern Path *best_inner_indexscan(Query *root, RelOptInfo *rel,
Relids outer_relids, JoinType jointype);
+extern List *group_clauses_by_indexkey(IndexOptInfo *index);
extern List *group_clauses_by_indexkey_for_or(IndexOptInfo *index,
Expr *orsubclause);
+extern bool match_index_to_operand(Node *operand, int indexcol,
+ IndexOptInfo *index);
extern List *expand_indexqual_conditions(IndexOptInfo *index,
List *clausegroups);
extern void check_partial_indexes(Query *root, RelOptInfo *rel);
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.80 2005/03/10 23:21:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.81 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern void query_planner(Query *root, List *tlist, double tuple_fraction,
Path **cheapest_path, Path **sorted_path);
+/*
+ * prototypes for plan/planagg.c
+ */
+extern Plan *optimize_minmax_aggregates(Query *root, List *tlist,
+ Path *best_path);
+
/*
* prototypes for plan/createplan.c
*/
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.23 2004/12/31 22:03:36 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.24 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern Node *SS_replace_correlation_vars(Node *expr);
extern Node *SS_process_sublinks(Node *expr, bool isQual);
extern void SS_finalize_plan(Plan *plan, List *rtable);
+extern Param *SS_make_initplan_from_plan(Query *root, Plan *plan,
+ Oid resulttype, int32 resulttypmod);
#endif /* SUBSELECT_H */
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.96 2005/03/31 22:46:27 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.97 2005/04/11 23:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
} IOFuncSelector;
extern bool op_in_opclass(Oid opno, Oid opclass);
+extern int get_op_opclass_strategy(Oid opno, Oid opclass);
extern void get_op_opclass_properties(Oid opno, Oid opclass,
int *strategy, Oid *subtype,
bool *recheck);
t | t | f | | f | t
(1 row)
+--
+-- Test several cases that should be optimized into indexscans instead of
+-- the generic aggregate implementation. We can't actually verify that they
+-- are done as indexscans, but we can check that the results are correct.
+--
+-- Basic cases
+select max(unique1) from tenk1;
+ max
+------
+ 9999
+(1 row)
+
+select max(unique1) from tenk1 where unique1 < 42;
+ max
+-----
+ 41
+(1 row)
+
+select max(unique1) from tenk1 where unique1 > 42;
+ max
+------
+ 9999
+(1 row)
+
+select max(unique1) from tenk1 where unique1 > 42000;
+ max
+-----
+
+(1 row)
+
+-- multi-column index (uses tenk1_thous_tenthous)
+select max(tenthous) from tenk1 where thousand = 33;
+ max
+------
+ 9033
+(1 row)
+
+select min(tenthous) from tenk1 where thousand = 33;
+ min
+-----
+ 33
+(1 row)
+
+-- check parameter propagation into an indexscan subquery
+select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
+from int4_tbl;
+ f1 | gt
+-------------+----
+ 0 | 1
+ 123456 |
+ -123456 | 0
+ 2147483647 |
+ -2147483647 | 0
+(5 rows)
+
CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops);
CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops);
CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);
+CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);
CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);
CREATE INDEX tenk2_hundred ON tenk2 USING btree(hundred int4_ops);
BOOL_OR(NOT b2) AS "f",
BOOL_OR(NOT b3) AS "t"
FROM bool_test;
+
+--
+-- Test several cases that should be optimized into indexscans instead of
+-- the generic aggregate implementation. We can't actually verify that they
+-- are done as indexscans, but we can check that the results are correct.
+--
+
+-- Basic cases
+select max(unique1) from tenk1;
+select max(unique1) from tenk1 where unique1 < 42;
+select max(unique1) from tenk1 where unique1 > 42;
+select max(unique1) from tenk1 where unique1 > 42000;
+
+-- multi-column index (uses tenk1_thous_tenthous)
+select max(tenthous) from tenk1 where thousand = 33;
+select min(tenthous) from tenk1 where thousand = 33;
+
+-- check parameter propagation into an indexscan subquery
+select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
+from int4_tbl;
CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);
+CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
+
CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);
CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);