fixes things so that it works for cases where nested removals are possible.
The overhead of the optimization should be significantly less, as well.
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.383 2010/02/16 22:34:43 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.384 2010/03/28 22:59:32 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
WRITE_FLOAT_FIELD(rows, "%.0f");
}
-static void
-_outNoOpPath(StringInfo str, NoOpPath *node)
-{
- WRITE_NODE_TYPE("NOOPPATH");
-
- _outPathInfo(str, (Path *) node);
-
- WRITE_NODE_FIELD(subpath);
-}
-
static void
_outNestPath(StringInfo str, NestPath *node)
{
case T_UniquePath:
_outUniquePath(str, obj);
break;
- case T_NoOpPath:
- _outNoOpPath(str, obj);
- break;
case T_NestPath:
_outNestPath(str, obj);
break;
-$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.52 2009/09/29 01:20:34 tgl Exp $
+$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.53 2010/03/28 22:59:32 tgl Exp $
Optimizer
=========
NestPath - nested-loop joins
MergePath - merge joins
HashPath - hash joins
- NoOpPath - same as its input path (used when a join is removed)
EquivalenceClass - a data structure representing a set of values known equal
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.193 2010/02/26 02:00:44 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.194 2010/03/28 22:59:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
ptype = "Unique";
subpath = ((UniquePath *) path)->subpath;
break;
- case T_NoOpPath:
- ptype = "NoOp";
- subpath = ((NoOpPath *) path)->subpath;
- break;
case T_NestPath:
ptype = "NestLoop";
join = true;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.131 2010/03/22 13:57:15 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.132 2010/03/28 22:59:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "optimizer/paths.h"
-static bool join_is_removable(PlannerInfo *root, RelOptInfo *joinrel,
- RelOptInfo *outerrel, RelOptInfo *innerrel,
- List *restrictlist, JoinType jointype);
-static void generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
- RelOptInfo *outerrel);
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
{
List *mergeclause_list = NIL;
- /*
- * 0. Consider join removal. This is always the most efficient strategy,
- * so if it works, there's no need to consider anything further.
- */
- if (join_is_removable(root, joinrel, outerrel, innerrel,
- restrictlist, jointype))
- {
- generate_outer_only(root, joinrel, outerrel);
- return;
- }
-
/*
* Find potential mergejoin clauses. We can skip this if we are not
* interested in doing a mergejoin. However, mergejoin is currently our
* only way of implementing full outer joins, so override mergejoin
* disable if it's a full join.
- *
- * Note: do this after join_is_removable(), because this sets the
- * outer_is_left flags in the mergejoin clauses, while join_is_removable
- * uses those flags for its own purposes. Currently, they set the flags
- * the same way anyway, but let's avoid unnecessary entanglement.
*/
if (enable_mergejoin || jointype == JOIN_FULL)
mergeclause_list = select_mergejoin_clauses(root,
return false; /* no good for these input relations */
}
-/*
- * join_is_removable
- * Determine whether we need not perform the join at all, because
- * it will just duplicate its left input.
- *
- * This is true for a left join for which the join condition cannot match
- * more than one inner-side row. (There are other possibly interesting
- * cases, but we don't have the infrastructure to prove them.) We also
- * have to check that the inner side doesn't generate any variables needed
- * above the join.
- *
- * Note: there is no need to consider the symmetrical case of duplicating the
- * right input, because add_paths_to_joinrel() will be called with each rel
- * on the outer side.
- */
-static bool
-join_is_removable(PlannerInfo *root,
- RelOptInfo *joinrel,
- RelOptInfo *outerrel,
- RelOptInfo *innerrel,
- List *restrictlist,
- JoinType jointype)
-{
- List *clause_list = NIL;
- ListCell *l;
- int attroff;
-
- /*
- * Currently, we only know how to remove left joins to a baserel with
- * unique indexes. We can check most of these criteria pretty trivially
- * to avoid doing useless extra work. But checking whether any of the
- * indexes are unique would require iterating over the indexlist, so for
- * now we just make sure there are indexes of some sort or other. If none
- * of them are unique, join removal will still fail, just slightly later.
- */
- if (jointype != JOIN_LEFT ||
- innerrel->reloptkind == RELOPT_JOINREL ||
- innerrel->rtekind != RTE_RELATION ||
- innerrel->indexlist == NIL)
- return false;
-
- /*
- * We can't remove the join if any inner-rel attributes are used above the
- * join.
- *
- * Note that this test only detects use of inner-rel attributes in higher
- * join conditions and the target list. There might be such attributes in
- * pushed-down conditions at this join, too. We check that case below.
- *
- * As a micro-optimization, it seems better to start with max_attr and
- * count down rather than starting with min_attr and counting up, on the
- * theory that the system attributes are somewhat less likely to be wanted
- * and should be tested last.
- */
- for (attroff = innerrel->max_attr - innerrel->min_attr;
- attroff >= 0;
- attroff--)
- {
- if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids))
- return false;
- }
-
- /*
- * Similarly check that the inner rel doesn't produce any PlaceHolderVars
- * that will be used above the join.
- */
- foreach(l, root->placeholder_list)
- {
- PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
-
- if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids) &&
- !bms_is_subset(phinfo->ph_needed, joinrel->relids))
- return false;
- }
-
- /*
- * Search for mergejoinable clauses that constrain the inner rel against
- * either the outer rel or a pseudoconstant. If an operator is
- * mergejoinable then it behaves like equality for some btree opclass, so
- * it's what we want. The mergejoinability test also eliminates clauses
- * containing volatile functions, which we couldn't depend on.
- */
- foreach(l, restrictlist)
- {
- RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
-
- /*
- * If we find a pushed-down clause, it must have come from above the
- * outer join and it must contain references to the inner rel. (If it
- * had only outer-rel variables, it'd have been pushed down into the
- * outer rel.) Therefore, we can conclude that join removal is unsafe
- * without any examination of the clause contents.
- */
- if (restrictinfo->is_pushed_down)
- return false;
-
- /* Ignore if it's not a mergejoinable clause */
- if (!restrictinfo->can_join ||
- restrictinfo->mergeopfamilies == NIL)
- continue; /* not mergejoinable */
-
- /*
- * Check if clause has the form "outer op inner" or "inner op outer".
- */
- if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
- continue; /* no good for these input relations */
-
- /* OK, add to list */
- clause_list = lappend(clause_list, restrictinfo);
- }
-
- /* Now examine the rel's restriction clauses for var = const clauses */
- foreach(l, innerrel->baserestrictinfo)
- {
- RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
-
- /*
- * Note: can_join won't be set for a restriction clause, but
- * mergeopfamilies will be if it has a mergejoinable operator and
- * doesn't contain volatile functions.
- */
- if (restrictinfo->mergeopfamilies == NIL)
- continue; /* not mergejoinable */
-
- /*
- * The clause certainly doesn't refer to anything but the given rel.
- * If either side is pseudoconstant then we can use it.
- */
- if (bms_is_empty(restrictinfo->left_relids))
- {
- /* righthand side is inner */
- restrictinfo->outer_is_left = true;
- }
- else if (bms_is_empty(restrictinfo->right_relids))
- {
- /* lefthand side is inner */
- restrictinfo->outer_is_left = false;
- }
- else
- continue;
-
- /* OK, add to list */
- clause_list = lappend(clause_list, restrictinfo);
- }
-
- /* Now examine the indexes to see if we have a matching unique index */
- if (relation_has_unique_index_for(root, innerrel, clause_list))
- return true;
-
- /*
- * Some day it would be nice to check for other methods of establishing
- * distinctness.
- */
- return false;
-}
-
-/*
- * generate_outer_only
- * Generate "join" paths when we have found the join is removable.
- */
-static void
-generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
- RelOptInfo *outerrel)
-{
- ListCell *lc;
-
- /*
- * For the moment, replicate all of the outerrel's paths as join paths.
- * Some of them might not really be interesting above the join, if they
- * have sort orderings that have no real use except to do a mergejoin for
- * the join we've just found we don't need. But distinguishing that case
- * probably isn't worth the extra code it would take.
- */
- foreach(lc, outerrel->pathlist)
- {
- Path *outerpath = (Path *) lfirst(lc);
-
- add_path(joinrel, (Path *)
- create_noop_path(root, joinrel, outerpath));
- }
-}
-
/*
* sort_inner_and_outer
* Create mergejoin join paths by explicitly sorting both the outer and
# Makefile for optimizer/plan
#
# IDENTIFICATION
-# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.15 2008/02/19 10:30:07 petere Exp $
+# $PostgreSQL: pgsql/src/backend/optimizer/plan/Makefile,v 1.16 2010/03/28 22:59:32 tgl Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = createplan.o initsplan.o planagg.o planmain.o planner.o \
+OBJS = analyzejoins.o createplan.o initsplan.o planagg.o planmain.o planner.o \
setrefs.o subselect.o
include $(top_srcdir)/src/backend/common.mk
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * analyzejoins.c
+ * Routines for simplifying joins after initial query analysis
+ *
+ * While we do a great deal of join simplification in prep/prepjointree.c,
+ * certain optimizations cannot be performed at that stage for lack of
+ * detailed information about the query. The routines here are invoked
+ * after initsplan.c has done its work, and can do additional join removal
+ * and simplification steps based on the information extracted. The penalty
+ * is that we have to work harder to clean up after ourselves when we modify
+ * the query, since the derived data structures have to be updated too.
+ *
+ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/analyzejoins.c,v 1.1 2010/03/28 22:59:32 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/planmain.h"
+
+/* local functions */
+static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
+static void remove_rel_from_query(PlannerInfo *root, int relid);
+static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
+
+
+/*
+ * remove_useless_joins
+ * Check for relations that don't actually need to be joined at all,
+ * and remove them from the query.
+ *
+ * We are passed the current joinlist and return the updated list. Other
+ * data structures that have to be updated are accessible via "root".
+ */
+List *
+remove_useless_joins(PlannerInfo *root, List *joinlist)
+{
+ ListCell *lc;
+
+ /*
+ * We are only interested in relations that are left-joined to, so we
+ * can scan the join_info_list to find them easily.
+ */
+restart:
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+ int innerrelid;
+ int nremoved;
+
+ /* Skip if not removable */
+ if (!join_is_removable(root, sjinfo))
+ continue;
+
+ /*
+ * Currently, join_is_removable can only succeed when the sjinfo's
+ * righthand is a single baserel. Remove that rel from the query and
+ * joinlist.
+ */
+ innerrelid = bms_singleton_member(sjinfo->min_righthand);
+
+ remove_rel_from_query(root, innerrelid);
+
+ /* We verify that exactly one reference gets removed from joinlist */
+ nremoved = 0;
+ joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
+ if (nremoved != 1)
+ elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
+
+ /*
+ * We can delete this SpecialJoinInfo from the list too, since it's no
+ * longer of interest.
+ */
+ root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
+
+ /*
+ * Restart the scan. This is necessary to ensure we find all
+ * removable joins independently of ordering of the join_info_list
+ * (note that removal of attr_needed bits may make a join appear
+ * removable that did not before). Also, since we just deleted the
+ * current list cell, we'd have to have some kluge to continue the
+ * list scan anyway.
+ */
+ goto restart;
+ }
+
+ return joinlist;
+}
+
+/*
+ * clause_sides_match_join
+ * Determine whether a join clause is of the right form to use in this join.
+ *
+ * We already know that the clause is a binary opclause referencing only the
+ * rels in the current join. The point here is to check whether it has the
+ * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
+ * rather than mixing outer and inner vars on either side. If it matches,
+ * we set the transient flag outer_is_left to identify which side is which.
+ */
+static inline bool
+clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
+ Relids innerrelids)
+{
+ if (bms_is_subset(rinfo->left_relids, outerrelids) &&
+ bms_is_subset(rinfo->right_relids, innerrelids))
+ {
+ /* lefthand side is outer */
+ rinfo->outer_is_left = true;
+ return true;
+ }
+ else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
+ bms_is_subset(rinfo->right_relids, outerrelids))
+ {
+ /* righthand side is outer */
+ rinfo->outer_is_left = false;
+ return true;
+ }
+ return false; /* no good for these input relations */
+}
+
+/*
+ * join_is_removable
+ * Check whether we need not perform this special join at all, because
+ * it will just duplicate its left input.
+ *
+ * This is true for a left join for which the join condition cannot match
+ * more than one inner-side row. (There are other possibly interesting
+ * cases, but we don't have the infrastructure to prove them.) We also
+ * have to check that the inner side doesn't generate any variables needed
+ * above the join.
+ */
+static bool
+join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
+{
+ int innerrelid;
+ RelOptInfo *innerrel;
+ Relids joinrelids;
+ List *clause_list = NIL;
+ ListCell *l;
+ int attroff;
+
+ /*
+ * Currently, we only know how to remove left joins to a baserel with
+ * unique indexes. We can check most of these criteria pretty trivially
+ * to avoid doing useless extra work. But checking whether any of the
+ * indexes are unique would require iterating over the indexlist, so for
+ * now we just make sure there are indexes of some sort or other. If none
+ * of them are unique, join removal will still fail, just slightly later.
+ */
+ if (sjinfo->jointype != JOIN_LEFT ||
+ sjinfo->delay_upper_joins ||
+ bms_membership(sjinfo->min_righthand) != BMS_SINGLETON)
+ return false;
+
+ innerrelid = bms_singleton_member(sjinfo->min_righthand);
+ innerrel = find_base_rel(root, innerrelid);
+
+ if (innerrel->reloptkind != RELOPT_BASEREL ||
+ innerrel->rtekind != RTE_RELATION ||
+ innerrel->indexlist == NIL)
+ return false;
+
+ /* Compute the relid set for the join we are considering */
+ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+
+ /*
+ * We can't remove the join if any inner-rel attributes are used above the
+ * join.
+ *
+ * Note that this test only detects use of inner-rel attributes in higher
+ * join conditions and the target list. There might be such attributes in
+ * pushed-down conditions at this join, too. We check that case below.
+ *
+ * As a micro-optimization, it seems better to start with max_attr and
+ * count down rather than starting with min_attr and counting up, on the
+ * theory that the system attributes are somewhat less likely to be wanted
+ * and should be tested last.
+ */
+ for (attroff = innerrel->max_attr - innerrel->min_attr;
+ attroff >= 0;
+ attroff--)
+ {
+ if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
+ return false;
+ }
+
+ /*
+ * Similarly check that the inner rel doesn't produce any PlaceHolderVars
+ * that will be used above the join.
+ */
+ foreach(l, root->placeholder_list)
+ {
+ PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
+
+ if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids) &&
+ !bms_is_subset(phinfo->ph_needed, joinrelids))
+ return false;
+ }
+
+ /*
+ * Search for mergejoinable clauses that constrain the inner rel against
+ * either the outer rel or a pseudoconstant. If an operator is
+ * mergejoinable then it behaves like equality for some btree opclass, so
+ * it's what we want. The mergejoinability test also eliminates clauses
+ * containing volatile functions, which we couldn't depend on.
+ */
+ foreach(l, innerrel->joininfo)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+
+ /* Ignore clauses not pertinent to this join */
+ if (!bms_is_subset(restrictinfo->required_relids, joinrelids))
+ continue;
+
+ /*
+ * If we find a pushed-down clause, it must have come from above the
+ * outer join and it must contain references to the inner rel. (If it
+ * had only outer-rel variables, it'd have been pushed down into the
+ * outer rel.) Therefore, we can conclude that join removal is unsafe
+ * without any examination of the clause contents.
+ */
+ if (restrictinfo->is_pushed_down)
+ return false;
+
+ /* Ignore if it's not a mergejoinable clause */
+ if (!restrictinfo->can_join ||
+ restrictinfo->mergeopfamilies == NIL)
+ continue; /* not mergejoinable */
+
+ /*
+ * Check if clause has the form "outer op inner" or "inner op outer".
+ */
+ if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
+ innerrel->relids))
+ continue; /* no good for these input relations */
+
+ /* OK, add to list */
+ clause_list = lappend(clause_list, restrictinfo);
+ }
+
+ /* Now examine the rel's restriction clauses for var = const clauses */
+ foreach(l, innerrel->baserestrictinfo)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+
+ /*
+ * Note: can_join won't be set for a restriction clause, but
+ * mergeopfamilies will be if it has a mergejoinable operator and
+ * doesn't contain volatile functions.
+ */
+ if (restrictinfo->mergeopfamilies == NIL)
+ continue; /* not mergejoinable */
+
+ /*
+ * The clause certainly doesn't refer to anything but the given rel.
+ * If either side is pseudoconstant then we can use it.
+ */
+ if (bms_is_empty(restrictinfo->left_relids))
+ {
+ /* righthand side is inner */
+ restrictinfo->outer_is_left = true;
+ }
+ else if (bms_is_empty(restrictinfo->right_relids))
+ {
+ /* lefthand side is inner */
+ restrictinfo->outer_is_left = false;
+ }
+ else
+ continue;
+
+ /* OK, add to list */
+ clause_list = lappend(clause_list, restrictinfo);
+ }
+
+ /* Now examine the indexes to see if we have a matching unique index */
+ if (relation_has_unique_index_for(root, innerrel, clause_list))
+ return true;
+
+ /*
+ * Some day it would be nice to check for other methods of establishing
+ * distinctness.
+ */
+ return false;
+}
+
+
+/*
+ * Remove the target relid from the planner's data structures, having
+ * determined that there is no need to include it in the query.
+ *
+ * We are not terribly thorough here. We must make sure that the rel is
+ * no longer treated as a baserel, and that attributes of other baserels
+ * are no longer marked as being needed at joins involving this rel.
+ * In particular, we don't bother removing join quals involving the rel from
+ * the joininfo lists; they'll just get ignored, since we will never form a
+ * join relation at which they could be evaluated.
+ */
+static void
+remove_rel_from_query(PlannerInfo *root, int relid)
+{
+ RelOptInfo *rel = find_base_rel(root, relid);
+ Index rti;
+ ListCell *l;
+
+ /*
+ * Mark the rel as "dead" to show it is no longer part of the join tree.
+ * (Removing it from the baserel array altogether seems too risky.)
+ */
+ rel->reloptkind = RELOPT_DEADREL;
+
+ /*
+ * Remove references to the rel from other baserels' attr_needed arrays.
+ */
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *otherrel = root->simple_rel_array[rti];
+ int attroff;
+
+ /* there may be empty slots corresponding to non-baserel RTEs */
+ if (otherrel == NULL)
+ continue;
+
+ Assert(otherrel->relid == rti); /* sanity check on array */
+
+ /* no point in processing target rel itself */
+ if (otherrel == rel)
+ continue;
+
+ for (attroff = otherrel->max_attr - otherrel->min_attr;
+ attroff >= 0;
+ attroff--)
+ {
+ otherrel->attr_needed[attroff] =
+ bms_del_member(otherrel->attr_needed[attroff], relid);
+ }
+ }
+
+ /*
+ * Likewise remove references from PlaceHolderVar data structures.
+ *
+ * Here we have a special case: if a PHV's eval_at set is just the target
+ * relid, we want to leave it that way instead of reducing it to the empty
+ * set. An empty eval_at set would confuse later processing since it
+ * would match every possible eval placement.
+ */
+ foreach(l, root->placeholder_list)
+ {
+ PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
+
+ phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
+ if (bms_is_empty(phinfo->ph_eval_at)) /* oops, belay that */
+ phinfo->ph_eval_at = bms_add_member(phinfo->ph_eval_at, relid);
+
+ phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
+ }
+}
+
+/*
+ * Remove any occurrences of the target relid from a joinlist structure.
+ *
+ * It's easiest to build a whole new list structure, so we handle it that
+ * way. Efficiency is not a big deal here.
+ *
+ * *nremoved is incremented by the number of occurrences removed (there
+ * should be exactly one, but the caller checks that).
+ */
+static List *
+remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
+{
+ List *result = NIL;
+ ListCell *jl;
+
+ foreach(jl, joinlist)
+ {
+ Node *jlnode = (Node *) lfirst(jl);
+
+ if (IsA(jlnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jlnode)->rtindex;
+
+ if (varno == relid)
+ (*nremoved)++;
+ else
+ result = lappend(result, jlnode);
+ }
+ else if (IsA(jlnode, List))
+ {
+ /* Recurse to handle subproblem */
+ List *sublist;
+
+ sublist = remove_rel_from_joinlist((List *) jlnode,
+ relid, nremoved);
+ /* Avoid including empty sub-lists in the result */
+ if (sublist)
+ result = lappend(result, sublist);
+ }
+ else
+ {
+ elog(ERROR, "unrecognized joinlist node type: %d",
+ (int) nodeTag(jlnode));
+ }
+ }
+
+ return result;
+}
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.273 2010/02/26 02:00:45 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.274 2010/03/28 22:59:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
case T_WorkTableScan:
plan = create_scan_plan(root, best_path);
break;
- case T_Join:
- /* this is only used for no-op joins */
- Assert(IsA(best_path, NoOpPath));
- plan = create_plan(root, ((NoOpPath *) best_path)->subpath);
- break;
case T_HashJoin:
case T_MergeJoin:
case T_NestLoop:
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.117 2010/01/02 16:57:47 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.118 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
*/
add_base_rels_to_query(root, (Node *) parse->jointree);
- /*
- * We should now have size estimates for every actual table involved in
- * the query, so we can compute total_table_pages. Note that appendrels
- * are not double-counted here, even though we don't bother to distinguish
- * RelOptInfos for appendrel parents, because the parents will still have
- * size zero.
- *
- * XXX if a table is self-joined, we will count it once per appearance,
- * which perhaps is the wrong thing ... but that's not completely clear,
- * and detecting self-joins here is difficult, so ignore it for now.
- */
- total_pages = 0;
- for (rti = 1; rti < root->simple_rel_array_size; rti++)
- {
- RelOptInfo *brel = root->simple_rel_array[rti];
-
- if (brel == NULL)
- continue;
-
- Assert(brel->relid == rti); /* sanity check on array */
-
- total_pages += (double) brel->pages;
- }
- root->total_table_pages = total_pages;
-
/*
* Examine the targetlist and qualifications, adding entries to baserel
* targetlists for all referenced Vars. Restrict and join clauses are
*/
fix_placeholder_eval_levels(root);
+ /*
+ * Remove any useless outer joins. Ideally this would be done during
+ * jointree preprocessing, but the necessary information isn't available
+ * until we've built baserel data structures and classified qual clauses.
+ */
+ joinlist = remove_useless_joins(root, joinlist);
+
+ /*
+ * Now distribute "placeholders" to base rels as needed. This has to be
+ * done after join removal because removal could change whether a
+ * placeholder is evaluatable at a base rel.
+ */
+ add_placeholders_to_base_rels(root);
+
+ /*
+ * We should now have size estimates for every actual table involved in
+ * the query, and we also know which if any have been deleted from the
+ * query by join removal; so we can compute total_table_pages.
+ *
+ * Note that appendrels are not double-counted here, even though we don't
+ * bother to distinguish RelOptInfos for appendrel parents, because the
+ * parents will still have size zero.
+ *
+ * XXX if a table is self-joined, we will count it once per appearance,
+ * which perhaps is the wrong thing ... but that's not completely clear,
+ * and detecting self-joins here is difficult, so ignore it for now.
+ */
+ total_pages = 0;
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *brel = root->simple_rel_array[rti];
+
+ if (brel == NULL)
+ continue;
+
+ Assert(brel->relid == rti); /* sanity check on array */
+
+ if (brel->reloptkind == RELOPT_BASEREL ||
+ brel->reloptkind == RELOPT_OTHER_MEMBER_REL)
+ total_pages += (double) brel->pages;
+ }
+ root->total_table_pages = total_pages;
+
/*
* Ready to do the primary planning.
*/
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.157 2010/02/26 02:00:47 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.158 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
return InvalidOid;
}
-/*
- * create_noop_path
- * Creates a path equivalent to the input subpath, but having a different
- * parent rel. This is used when a join is found to be removable.
- */
-NoOpPath *
-create_noop_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
-{
- NoOpPath *pathnode = makeNode(NoOpPath);
-
- pathnode->path.pathtype = T_Join; /* by convention */
- pathnode->path.parent = rel;
- pathnode->path.startup_cost = subpath->startup_cost;
- pathnode->path.total_cost = subpath->total_cost;
- pathnode->path.pathkeys = subpath->pathkeys;
- pathnode->subpath = subpath;
-
- return pathnode;
-}
-
/*
* create_subqueryscan_path
* Creates a path corresponding to a sequential scan of a subquery,
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/placeholder.c,v 1.6 2010/01/02 16:57:48 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/placeholder.c,v 1.7 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
list_free(vars);
}
}
+}
- /*
- * Now, if any placeholder can be computed at a base rel and is needed
- * above it, add it to that rel's targetlist. (This is essentially the
- * same logic as in add_placeholders_to_joinrel, but we can't do that part
- * until joinrels are formed.) We have to do this as a separate step
- * because the ph_needed values aren't stable until the previous loop
- * finishes.
- */
- foreach(lc1, root->placeholder_list)
+/*
+ * add_placeholders_to_base_rels
+ * Add any required PlaceHolderVars to base rels' targetlists.
+ *
+ * If any placeholder can be computed at a base rel and is needed above it,
+ * add it to that rel's targetlist. We have to do this separately from
+ * fix_placeholder_eval_levels() because join removal happens in between,
+ * and can change the ph_eval_at sets. There is essentially the same logic
+ * in add_placeholders_to_joinrel, but we can't do that part until joinrels
+ * are formed.
+ */
+void
+add_placeholders_to_base_rels(PlannerInfo *root)
+{
+ ListCell *lc;
+
+ foreach(lc, root->placeholder_list)
{
- PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc1);
+ PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
Relids eval_at = phinfo->ph_eval_at;
if (bms_membership(eval_at) == BMS_SINGLETON)
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.233 2010/01/05 21:53:59 rhaas Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.234 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
T_ResultPath,
T_MaterialPath,
T_UniquePath,
- T_NoOpPath,
T_EquivalenceClass,
T_EquivalenceMember,
T_PathKey,
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.184 2010/02/26 02:01:25 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.185 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
*
* We also have "other rels", which are like base rels in that they refer to
* single RT indexes; but they are not part of the join tree, and are given
- * a different RelOptKind to identify them.
+ * a different RelOptKind to identify them. Lastly, there is a RelOptKind
+ * for "dead" relations, which are base rels that we have proven we don't
+ * need to join after all.
*
* Currently the only kind of otherrels are those made for member relations
* of an "append relation", that is an inheritance set or UNION ALL subquery.
{
RELOPT_BASEREL,
RELOPT_JOINREL,
- RELOPT_OTHER_MEMBER_REL
+ RELOPT_OTHER_MEMBER_REL,
+ RELOPT_DEADREL
} RelOptKind;
typedef struct RelOptInfo
double rows; /* estimated number of result tuples */
} UniquePath;
-/*
- * NoOpPath represents exactly the same plan as its subpath. This is used
- * when we have determined that a join can be eliminated. The difference
- * between the NoOpPath and its subpath is just that the NoOpPath's parent
- * is the whole join relation while the subpath is for one of the joined
- * relations (and the other one isn't needed).
- *
- * Note: path.pathtype is always T_Join, but this won't actually give rise
- * to a Join plan node.
- */
-typedef struct NoOpPath
-{
- Path path;
- Path *subpath;
-} NoOpPath;
-
/*
* All join-type paths share these fields.
*/
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.83 2010/02/26 02:01:26 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.84 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
Path *subpath, SpecialJoinInfo *sjinfo);
-extern NoOpPath *create_noop_path(PlannerInfo *root, RelOptInfo *rel,
- Path *subpath);
extern Path *create_subqueryscan_path(RelOptInfo *rel, List *pathkeys);
extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel);
extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel);
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/placeholder.h,v 1.4 2010/01/02 16:58:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/placeholder.h,v 1.5 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern PlaceHolderInfo *find_placeholder_info(PlannerInfo *root,
PlaceHolderVar *phv);
extern void fix_placeholder_eval_levels(PlannerInfo *root);
+extern void add_placeholders_to_base_rels(PlannerInfo *root);
extern void add_placeholders_to_joinrel(PlannerInfo *root,
RelOptInfo *joinrel);
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.126 2010/02/26 02:01:26 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.127 2010/03/28 22:59:33 tgl Exp $
*
*-------------------------------------------------------------------------
*/
Expr *item2,
Relids qualscope);
+/*
+ * prototypes for plan/analyzejoins.c
+ */
+extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
+
/*
* prototypes for plan/setrefs.c
*/
--
-- test join removal
--
+begin;
+CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
+NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "a_pkey" for table "a"
+CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
+NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "b_pkey" for table "b"
+CREATE TEMP TABLE c (id int PRIMARY KEY);
+NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "c_pkey" for table "c"
+INSERT INTO a VALUES (0, 0), (1, NULL);
+INSERT INTO b VALUES (0, 0), (1, NULL);
+INSERT INTO c VALUES (0), (1);
+-- all three cases should be optimizable into a simple seqscan
+explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
+ QUERY PLAN
+---------------
+ Seq Scan on a
+(1 row)
+
+explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id;
+ QUERY PLAN
+---------------
+ Seq Scan on b
+(1 row)
+
+explain (costs off)
+ SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id)
+ ON (a.b_id = b.id);
+ QUERY PLAN
+---------------
+ Seq Scan on a
+(1 row)
+
+rollback;
create temp table parent (k int primary key, pd int);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "parent_pkey" for table "parent"
create temp table child (k int unique, cd int);
-> Seq Scan on child c
(5 rows)
+-- bug 5255: this is not optimizable by join removal
+begin;
+CREATE TEMP TABLE a (id int PRIMARY KEY);
+NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "a_pkey" for table "a"
+CREATE TEMP TABLE b (id int PRIMARY KEY, a_id int);
+NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "b_pkey" for table "b"
+INSERT INTO a VALUES (0), (1);
+INSERT INTO b VALUES (0, 0), (1, NULL);
+SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+ id | a_id | id
+----+------+----
+ 1 | |
+(1 row)
+
+SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+ id | a_id
+----+------
+ 1 |
+(1 row)
+
+rollback;
-- test join removal
--
+begin;
+
+CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
+CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
+CREATE TEMP TABLE c (id int PRIMARY KEY);
+INSERT INTO a VALUES (0, 0), (1, NULL);
+INSERT INTO b VALUES (0, 0), (1, NULL);
+INSERT INTO c VALUES (0), (1);
+
+-- all three cases should be optimizable into a simple seqscan
+explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
+explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id;
+explain (costs off)
+ SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id)
+ ON (a.b_id = b.id);
+
+rollback;
+
create temp table parent (k int primary key, pd int);
create temp table child (k int unique, cd int);
insert into parent values (1, 10), (2, 20), (3, 30);
select p.*, linked from parent p
left join (select c.*, true as linked from child c) as ss
on (p.k = ss.k);
+
+-- bug 5255: this is not optimizable by join removal
+begin;
+
+CREATE TEMP TABLE a (id int PRIMARY KEY);
+CREATE TEMP TABLE b (id int PRIMARY KEY, a_id int);
+INSERT INTO a VALUES (0), (1);
+INSERT INTO b VALUES (0, 0), (1, NULL);
+
+SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+
+rollback;