From f41803bb39bc2949db200116a609fd242d0ec221 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 20 Jan 2007 20:45:41 +0000 Subject: [PATCH] Refactor planner's pathkeys data structure to create a separate, explicit representation of equivalence classes of variables. This is an extensive rewrite, but it brings a number of benefits: * planner no longer fails in the presence of "incomplete" operator families that don't offer operators for every possible combination of datatypes. * avoid generating and then discarding redundant equality clauses. * remove bogus assumption that derived equalities always use operators named "=". * mergejoins can work with a variety of sort orders (e.g., descending) now, instead of tying each mergejoinable operator to exactly one sort order. * better recognition of redundant sort columns. * can make use of equalities appearing underneath an outer join. --- doc/src/sgml/xoper.sgml | 92 +- src/backend/nodes/copyfuncs.c | 42 +- src/backend/nodes/equalfuncs.c | 30 +- src/backend/nodes/outfuncs.c | 90 +- src/backend/nodes/print.c | 23 +- src/backend/optimizer/README | 451 +++--- src/backend/optimizer/path/Makefile | 4 +- src/backend/optimizer/path/allpaths.c | 12 +- src/backend/optimizer/path/costsize.c | 54 +- src/backend/optimizer/path/equivclass.c | 1662 +++++++++++++++++++++ src/backend/optimizer/path/indxpath.c | 457 ++---- src/backend/optimizer/path/joinpath.c | 187 +-- src/backend/optimizer/path/joinrels.c | 10 +- src/backend/optimizer/path/pathkeys.c | 1523 ++++++++----------- src/backend/optimizer/plan/createplan.c | 207 ++- src/backend/optimizer/plan/initsplan.c | 756 ++++------ src/backend/optimizer/plan/planmain.c | 31 +- src/backend/optimizer/plan/planner.c | 37 +- src/backend/optimizer/prep/prepjointree.c | 3 +- src/backend/optimizer/prep/prepunion.c | 8 +- src/backend/optimizer/util/joininfo.c | 44 +- src/backend/optimizer/util/pathnode.c | 18 +- src/backend/optimizer/util/relnode.c | 127 +- src/backend/optimizer/util/restrictinfo.c | 169 +-- src/backend/parser/parse_agg.c | 3 +- src/backend/utils/adt/selfuncs.c | 11 +- src/backend/utils/cache/lsyscache.c | 199 +-- src/include/nodes/nodes.h | 6 +- src/include/nodes/relation.h | 197 ++- src/include/optimizer/joininfo.h | 5 +- src/include/optimizer/pathnode.h | 5 +- src/include/optimizer/paths.h | 59 +- src/include/optimizer/planmain.h | 22 +- src/include/optimizer/restrictinfo.h | 8 +- src/include/utils/lsyscache.h | 5 +- 35 files changed, 3860 insertions(+), 2697 deletions(-) create mode 100644 src/backend/optimizer/path/equivclass.c diff --git a/doc/src/sgml/xoper.sgml b/doc/src/sgml/xoper.sgml index 5f84393ed2..f639407866 100644 --- a/doc/src/sgml/xoper.sgml +++ b/doc/src/sgml/xoper.sgml @@ -1,4 +1,4 @@ - + User-Defined Operators @@ -145,29 +145,29 @@ SELECT (a + b) AS c FROM test_complex; - One way is to omit the COMMUTATOR clause in the first operator that - you define, and then provide one in the second operator's definition. - Since PostgreSQL knows that commutative - operators come in pairs, when it sees the second definition it will - automatically go back and fill in the missing COMMUTATOR clause in - the first definition. + One way is to omit the COMMUTATOR clause in the first operator that + you define, and then provide one in the second operator's definition. + Since PostgreSQL knows that commutative + operators come in pairs, when it sees the second definition it will + automatically go back and fill in the missing COMMUTATOR clause in + the first definition. - The other, more straightforward way is just to include COMMUTATOR clauses - in both definitions. When PostgreSQL processes - the first definition and realizes that COMMUTATOR refers to a nonexistent - operator, the system will make a dummy entry for that operator in the - system catalog. This dummy entry will have valid data only - for the operator name, left and right operand types, and result type, - since that's all that PostgreSQL can deduce - at this point. The first operator's catalog entry will link to this - dummy entry. Later, when you define the second operator, the system - updates the dummy entry with the additional information from the second - definition. If you try to use the dummy operator before it's been filled - in, you'll just get an error message. + The other, more straightforward way is just to include COMMUTATOR clauses + in both definitions. When PostgreSQL processes + the first definition and realizes that COMMUTATOR refers to a nonexistent + operator, the system will make a dummy entry for that operator in the + system catalog. This dummy entry will have valid data only + for the operator name, left and right operand types, and result type, + since that's all that PostgreSQL can deduce + at this point. The first operator's catalog entry will link to this + dummy entry. Later, when you define the second operator, the system + updates the dummy entry with the additional information from the second + definition. If you try to use the dummy operator before it's been filled + in, you'll just get an error message. @@ -240,7 +240,7 @@ column OP constant one of the system's standard estimators for many of your own operators. These are the standard restriction estimators: - eqsel for = + eqsel for = neqsel for <> scalarltsel for < or <= scalargtsel for > or >= @@ -337,7 +337,7 @@ table1.column1 OP table2.column2 join will never compare them at all, implicitly assuming that the result of the join operator must be false. So it never makes sense to specify HASHES for operators that do not represent - equality. + some form of equality. @@ -347,7 +347,7 @@ table1.column1 OP table2.column2 exist yet. But attempts to use the operator in hash joins will fail at run time if no such operator family exists. The system needs the operator family to find the data-type-specific hash function for the - operator's input data type. Of course, you must also supply a suitable + operator's input data type. Of course, you must also create a suitable hash function before you can create the operator family. @@ -382,8 +382,9 @@ table1.column1 OP table2.column2 false, never null, for any two nonnull inputs. If this rule is not followed, hash-optimization of IN operations may generate wrong results. (Specifically, IN might return - false where the correct answer according to the standard would be null; or it might - yield an error complaining that it wasn't prepared for a null result.) + false where the correct answer according to the standard would be null; + or it might yield an error complaining that it wasn't prepared for a + null result.) @@ -407,19 +408,18 @@ table1.column1 OP table2.column2 that can only succeed for pairs of values that fall at the same place in the sort order. In practice this means that the join operator must - behave like equality. But unlike hash join, where the left and right - data types had better be the same (or at least bitwise equivalent), - it is possible to merge-join two + behave like equality. But it is possible to merge-join two distinct data types so long as they are logically compatible. For - example, the smallint-versus-integer equality operator - is merge-joinable. + example, the smallint-versus-integer + equality operator is merge-joinable. We only need sorting operators that will bring both data types into a logically compatible sequence. To be marked MERGES, the join operator must appear - in a btree index operator family. This is not enforced when you create + as an equality member of a btree index operator family. + This is not enforced when you create the operator, since of course the referencing operator family couldn't exist yet. But the operator will not actually be used for merge joins unless a matching operator family can be found. The @@ -428,30 +428,14 @@ table1.column1 OP table2.column2 - There are additional restrictions on operators that you mark - merge-joinable. These restrictions are not currently checked by - CREATE OPERATOR, but errors may occur when - the operator is used if any are not true: - - - - - A merge-joinable equality operator must have a merge-joinable - commutator (itself if the two operand data types are the same, or a related - equality operator if they are different). - - - - - - If there is a merge-joinable operator relating any two data types - A and B, and another merge-joinable operator relating B to any - third data type C, then A and C must also have a merge-joinable - operator; in other words, having a merge-joinable operator must - be transitive. - - - + A merge-joinable operator must have a commutator (itself if the two + operand data types are the same, or a related equality operator + if they are different) that appears in the same operator family. + If this is not the case, planner errors may occur when the operator + is used. Also, it is a good idea (but not strictly required) for + a btree operator family that supports multiple datatypes to provide + equality operators for every combination of the datatypes; this + allows better optimization. diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 4943bb2029..7b003dc095 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.361 2007/01/10 18:06:02 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.362 2007/01/20 20:45:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1284,16 +1284,18 @@ _copyFromExpr(FromExpr *from) */ /* - * _copyPathKeyItem + * _copyPathKey */ -static PathKeyItem * -_copyPathKeyItem(PathKeyItem *from) +static PathKey * +_copyPathKey(PathKey *from) { - PathKeyItem *newnode = makeNode(PathKeyItem); + PathKey *newnode = makeNode(PathKey); - COPY_NODE_FIELD(key); - COPY_SCALAR_FIELD(sortop); - COPY_SCALAR_FIELD(nulls_first); + /* EquivalenceClasses are never moved, so just shallow-copy the pointer */ + COPY_SCALAR_FIELD(pk_eclass); + COPY_SCALAR_FIELD(pk_opfamily); + COPY_SCALAR_FIELD(pk_strategy); + COPY_SCALAR_FIELD(pk_nulls_first); return newnode; } @@ -1316,21 +1318,15 @@ _copyRestrictInfo(RestrictInfo *from) COPY_BITMAPSET_FIELD(left_relids); COPY_BITMAPSET_FIELD(right_relids); COPY_NODE_FIELD(orclause); + /* EquivalenceClasses are never copied, so shallow-copy the pointers */ + COPY_SCALAR_FIELD(parent_ec); COPY_SCALAR_FIELD(eval_cost); COPY_SCALAR_FIELD(this_selec); - COPY_SCALAR_FIELD(mergejoinoperator); - COPY_SCALAR_FIELD(left_sortop); - COPY_SCALAR_FIELD(right_sortop); - COPY_SCALAR_FIELD(mergeopfamily); - - /* - * Do not copy pathkeys, since they'd not be canonical in a copied query - */ - newnode->left_pathkey = NIL; - newnode->right_pathkey = NIL; - - COPY_SCALAR_FIELD(left_mergescansel); - COPY_SCALAR_FIELD(right_mergescansel); + COPY_NODE_FIELD(mergeopfamilies); + /* EquivalenceClasses are never copied, so shallow-copy the pointers */ + COPY_SCALAR_FIELD(left_ec); + COPY_SCALAR_FIELD(right_ec); + COPY_SCALAR_FIELD(outer_is_left); COPY_SCALAR_FIELD(hashjoinoperator); COPY_SCALAR_FIELD(left_bucketsize); COPY_SCALAR_FIELD(right_bucketsize); @@ -3033,8 +3029,8 @@ copyObject(void *from) /* * RELATION NODES */ - case T_PathKeyItem: - retval = _copyPathKeyItem(from); + case T_PathKey: + retval = _copyPathKey(from); break; case T_RestrictInfo: retval = _copyRestrictInfo(from); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index fafd8ae546..40a91a2b0e 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.295 2007/01/10 18:06:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.296 2007/01/20 20:45:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -596,11 +596,27 @@ _equalFromExpr(FromExpr *a, FromExpr *b) */ static bool -_equalPathKeyItem(PathKeyItem *a, PathKeyItem *b) +_equalPathKey(PathKey *a, PathKey *b) { - COMPARE_NODE_FIELD(key); - COMPARE_SCALAR_FIELD(sortop); - COMPARE_SCALAR_FIELD(nulls_first); + /* + * This is normally used on non-canonicalized PathKeys, so must chase + * up to the topmost merged EquivalenceClass and see if those are the + * same (by pointer equality). + */ + EquivalenceClass *a_eclass; + EquivalenceClass *b_eclass; + + a_eclass = a->pk_eclass; + while (a_eclass->ec_merged) + a_eclass = a_eclass->ec_merged; + b_eclass = b->pk_eclass; + while (b_eclass->ec_merged) + b_eclass = b_eclass->ec_merged; + if (a_eclass != b_eclass) + return false; + COMPARE_SCALAR_FIELD(pk_opfamily); + COMPARE_SCALAR_FIELD(pk_strategy); + COMPARE_SCALAR_FIELD(pk_nulls_first); return true; } @@ -2016,8 +2032,8 @@ equal(void *a, void *b) /* * RELATION NODES */ - case T_PathKeyItem: - retval = _equalPathKeyItem(a, b); + case T_PathKey: + retval = _equalPathKey(a, b); break; case T_RestrictInfo: retval = _equalRestrictInfo(a, b); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 6137c39af0..f0b72ea0f6 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.293 2007/01/10 18:06:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.294 2007/01/20 20:45:38 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1196,29 +1196,11 @@ _outNestPath(StringInfo str, NestPath *node) static void _outMergePath(StringInfo str, MergePath *node) { - int numCols; - int i; - WRITE_NODE_TYPE("MERGEPATH"); _outJoinPathInfo(str, (JoinPath *) node); WRITE_NODE_FIELD(path_mergeclauses); - - numCols = list_length(node->path_mergeclauses); - - appendStringInfo(str, " :path_mergeFamilies"); - for (i = 0; i < numCols; i++) - appendStringInfo(str, " %u", node->path_mergeFamilies[i]); - - appendStringInfo(str, " :path_mergeStrategies"); - for (i = 0; i < numCols; i++) - appendStringInfo(str, " %d", node->path_mergeStrategies[i]); - - appendStringInfo(str, " :path_mergeNullsFirst"); - for (i = 0; i < numCols; i++) - appendStringInfo(str, " %d", (int) node->path_mergeNullsFirst[i]); - WRITE_NODE_FIELD(outersortkeys); WRITE_NODE_FIELD(innersortkeys); } @@ -1241,7 +1223,8 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node) /* NB: this isn't a complete set of fields */ WRITE_NODE_FIELD(parse); WRITE_NODE_FIELD(join_rel_list); - WRITE_NODE_FIELD(equi_key_list); + WRITE_NODE_FIELD(eq_classes); + WRITE_NODE_FIELD(canon_pathkeys); WRITE_NODE_FIELD(left_join_clauses); WRITE_NODE_FIELD(right_join_clauses); WRITE_NODE_FIELD(full_join_clauses); @@ -1284,6 +1267,7 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node) WRITE_NODE_FIELD(subplan); WRITE_NODE_FIELD(baserestrictinfo); WRITE_NODE_FIELD(joininfo); + WRITE_BOOL_FIELD(has_eclass_joins); WRITE_BITMAPSET_FIELD(index_outer_relids); WRITE_NODE_FIELD(index_inner_paths); } @@ -1306,13 +1290,48 @@ _outIndexOptInfo(StringInfo str, IndexOptInfo *node) } static void -_outPathKeyItem(StringInfo str, PathKeyItem *node) +_outEquivalenceClass(StringInfo str, EquivalenceClass *node) { - WRITE_NODE_TYPE("PATHKEYITEM"); + /* + * To simplify reading, we just chase up to the topmost merged EC and + * print that, without bothering to show the merge-ees separately. + */ + while (node->ec_merged) + node = node->ec_merged; - WRITE_NODE_FIELD(key); - WRITE_OID_FIELD(sortop); - WRITE_BOOL_FIELD(nulls_first); + WRITE_NODE_TYPE("EQUIVALENCECLASS"); + + WRITE_NODE_FIELD(ec_opfamilies); + WRITE_NODE_FIELD(ec_members); + WRITE_NODE_FIELD(ec_sources); + WRITE_BITMAPSET_FIELD(ec_relids); + WRITE_BOOL_FIELD(ec_has_const); + WRITE_BOOL_FIELD(ec_has_volatile); + WRITE_BOOL_FIELD(ec_below_outer_join); + WRITE_BOOL_FIELD(ec_broken); +} + +static void +_outEquivalenceMember(StringInfo str, EquivalenceMember *node) +{ + WRITE_NODE_TYPE("EQUIVALENCEMEMBER"); + + WRITE_NODE_FIELD(em_expr); + WRITE_BITMAPSET_FIELD(em_relids); + WRITE_BOOL_FIELD(em_is_const); + WRITE_BOOL_FIELD(em_is_child); + WRITE_OID_FIELD(em_datatype); +} + +static void +_outPathKey(StringInfo str, PathKey *node) +{ + WRITE_NODE_TYPE("PATHKEY"); + + WRITE_NODE_FIELD(pk_eclass); + WRITE_OID_FIELD(pk_opfamily); + WRITE_INT_FIELD(pk_strategy); + WRITE_BOOL_FIELD(pk_nulls_first); } static void @@ -1331,12 +1350,11 @@ _outRestrictInfo(StringInfo str, RestrictInfo *node) WRITE_BITMAPSET_FIELD(left_relids); WRITE_BITMAPSET_FIELD(right_relids); WRITE_NODE_FIELD(orclause); - WRITE_OID_FIELD(mergejoinoperator); - WRITE_OID_FIELD(left_sortop); - WRITE_OID_FIELD(right_sortop); - WRITE_OID_FIELD(mergeopfamily); - WRITE_NODE_FIELD(left_pathkey); - WRITE_NODE_FIELD(right_pathkey); + WRITE_NODE_FIELD(parent_ec); + WRITE_NODE_FIELD(mergeopfamilies); + WRITE_NODE_FIELD(left_ec); + WRITE_NODE_FIELD(right_ec); + WRITE_BOOL_FIELD(outer_is_left); WRITE_OID_FIELD(hashjoinoperator); } @@ -2163,8 +2181,14 @@ _outNode(StringInfo str, void *obj) case T_IndexOptInfo: _outIndexOptInfo(str, obj); break; - case T_PathKeyItem: - _outPathKeyItem(str, obj); + case T_EquivalenceClass: + _outEquivalenceClass(str, obj); + break; + case T_EquivalenceMember: + _outEquivalenceMember(str, obj); + break; + case T_PathKey: + _outPathKey(str, obj); break; case T_RestrictInfo: _outRestrictInfo(str, obj); diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c index 1a7e1afb71..e50ebf2bb5 100644 --- a/src/backend/nodes/print.c +++ b/src/backend/nodes/print.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/print.c,v 1.82 2007/01/05 22:19:30 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/print.c,v 1.83 2007/01/20 20:45:38 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -404,7 +404,7 @@ print_expr(Node *expr, List *rtable) /* * print_pathkeys - - * pathkeys list of list of PathKeyItems + * pathkeys list of PathKeys */ void print_pathkeys(List *pathkeys, List *rtable) @@ -414,17 +414,26 @@ print_pathkeys(List *pathkeys, List *rtable) printf("("); foreach(i, pathkeys) { - List *pathkey = (List *) lfirst(i); + PathKey *pathkey = (PathKey *) lfirst(i); + EquivalenceClass *eclass; ListCell *k; + bool first = true; + + eclass = pathkey->pk_eclass; + /* chase up, in case pathkey is non-canonical */ + while (eclass->ec_merged) + eclass = eclass->ec_merged; printf("("); - foreach(k, pathkey) + foreach(k, eclass->ec_members) { - PathKeyItem *item = (PathKeyItem *) lfirst(k); + EquivalenceMember *mem = (EquivalenceMember *) lfirst(k); - print_expr(item->key, rtable); - if (lnext(k)) + if (first) + first = false; + else printf(", "); + print_expr((Node *) mem->em_expr, rtable); } printf(")"); if (lnext(i)) diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index f0bb64d1a9..2e736b6dbe 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -90,21 +90,19 @@ have a list of relations to join. However, FULL OUTER JOIN clauses are never flattened, and other kinds of JOIN might not be either, if the flattening process is stopped by join_collapse_limit or from_collapse_limit restrictions. Therefore, we end up with a planning problem that contains -both lists of relations to be joined in any order, and JOIN nodes that -force a particular join order. For each un-flattened JOIN node, we join -exactly that pair of relations (after recursively planning their inputs, -if the inputs aren't single base relations). We generate a Path for each -feasible join method, and select the cheapest Path. Note that the JOIN -clause structure determines the join Path structure, but it doesn't -constrain the join implementation method at each join (nestloop, merge, -hash), nor does it say which rel is considered outer or inner at each -join. We consider all these possibilities in building Paths. - -3) At the top level of the FROM clause we will have a list of relations -that are either base rels or joinrels constructed per un-flattened JOIN -directives. (This is also the situation, recursively, when we can flatten -sub-joins underneath an un-flattenable JOIN into a list of relations to -join.) We can join these rels together in any order the planner sees fit. +lists of relations to be joined in any order, where any individual item +might be a sub-list that has to be joined together before we can consider +joining it to its siblings. We process these sub-problems recursively, +bottom up. Note that the join list structure constrains the possible join +orders, but it doesn't constrain the join implementation method at each +join (nestloop, merge, hash), nor does it say which rel is considered outer +or inner at each join. We consider all these possibilities in building +Paths. We generate a Path for each feasible join method, and select the +cheapest Path. + +For each planning problem, therefore, we will have a list of relations +that are either base rels or joinrels constructed per sub-join-lists. +We can join these rels together in any order the planner sees fit. The standard (non-GEQO) planner does this as follows: Consider joining each RelOptInfo to each other RelOptInfo specified in its @@ -114,17 +112,17 @@ choice but to generate a clauseless Cartesian-product join; so we consider joining that rel to each other available rel. But in the presence of join clauses we will only consider joins that use available join clauses.) -If we only had two relations in the FROM list, we are done: we just pick +If we only had two relations in the list, we are done: we just pick the cheapest path for the join RelOptInfo. If we had more than two, we now need to consider ways of joining join RelOptInfos to each other to make -join RelOptInfos that represent more than two FROM items. +join RelOptInfos that represent more than two list items. The join tree is constructed using a "dynamic programming" algorithm: in the first pass (already described) we consider ways to create join rels -representing exactly two FROM items. The second pass considers ways -to make join rels that represent exactly three FROM items; the next pass, +representing exactly two list items. The second pass considers ways +to make join rels that represent exactly three list items; the next pass, four items, etc. The last pass considers how to make the final join -relation that includes all FROM items --- obviously there can be only one +relation that includes all list items --- obviously there can be only one join rel at this top level, whereas there can be more than one join rel at lower levels. At each level we use joins that follow available join clauses, if possible, just as described for the first level. @@ -155,7 +153,7 @@ For example: {1 2 3 4} We consider left-handed plans (the outer rel of an upper join is a joinrel, -but the inner is always a single FROM item); right-handed plans (outer rel +but the inner is always a single list item); right-handed plans (outer rel is always a single item); and bushy plans (both inner and outer can be joins themselves). For example, when building {1 2 3 4} we consider joining {1 2 3} to {4} (left-handed), {4} to {1 2 3} (right-handed), and @@ -336,7 +334,9 @@ RelOptInfo - a relation or joined relations MergePath - merge joins HashPath - hash joins - PathKeys - a data structure representing the ordering of a path + EquivalenceClass - a data structure representing a set of values known equal + + PathKey - a data structure representing the sort ordering of a path The optimizer spends a good deal of its time worrying about the ordering of the tuples returned by a path. The reason this is useful is that by @@ -363,213 +363,250 @@ without sorting, since it can pick from any of the paths retained for its inputs. +EquivalenceClasses +------------------ + +During the deconstruct_jointree() scan of the query's qual clauses, we look +for mergejoinable equality clauses A = B whose applicability is not delayed +by an outer join; these are called "equivalence clauses". When we find +one, we create an EquivalenceClass containing the expressions A and B to +record this knowledge. If we later find another equivalence clause B = C, +we add C to the existing EquivalenceClass for {A B}; this may require +merging two existing EquivalenceClasses. At the end of the scan, we have +sets of values that are known all transitively equal to each other. We can +therefore use a comparison of any pair of the values as a restriction or +join clause (when these values are available at the scan or join, of +course); furthermore, we need test only one such comparison, not all of +them. Therefore, equivalence clauses are removed from the standard qual +distribution process. Instead, when preparing a restriction or join clause +list, we examine each EquivalenceClass to see if it can contribute a +clause, and if so we select an appropriate pair of values to compare. For +example, if we are trying to join A's relation to C's, we can generate the +clause A = C, even though this appeared nowhere explicitly in the original +query. This may allow us to explore join paths that otherwise would have +been rejected as requiring Cartesian-product joins. + +Sometimes an EquivalenceClass may contain a pseudo-constant expression +(i.e., one not containing Vars or Aggs of the current query level, nor +volatile functions). In this case we do not follow the policy of +dynamically generating join clauses: instead, we dynamically generate +restriction clauses "var = const" wherever one of the variable members of +the class can first be computed. For example, if we have A = B and B = 42, +we effectively generate the restriction clauses A = 42 and B = 42, and then +we need not bother with explicitly testing the join clause A = B when the +relations are joined. In effect, all the class members can be tested at +relation-scan level and there's never a need for join tests. + +The precise technical interpretation of an EquivalenceClass is that it +asserts that at any plan node where more than one of its member values +can be computed, output rows in which the values are not all equal may +be discarded without affecting the query result. (We require all levels +of the plan to enforce EquivalenceClasses, hence a join need not recheck +equality of values that were computable by one of its children.) For an +ordinary EquivalenceClass that is "valid everywhere", we can further infer +that the values are all non-null, because all mergejoinable operators are +strict. However, we also allow equivalence clauses that appear below the +nullable side of an outer join to form EquivalenceClasses; for these +classes, the interpretation is that either all the values are equal, or +all (except pseudo-constants) have gone to null. (This requires a +limitation that non-constant members be strict, else they might not go +to null when the other members do.) Consider for example + + SELECT * + FROM a LEFT JOIN + (SELECT * FROM b JOIN c ON b.y = c.z WHERE b.y = 10) ss + ON a.x = ss.y + WHERE a.x = 42; + +We can form the below-outer-join EquivalenceClass {b.y c.z 10} and thereby +apply c.z = 10 while scanning c. (The reason we disallow outerjoin-delayed +clauses from forming EquivalenceClasses is exactly that we want to be able +to push any derived clauses as far down as possible.) But once above the +outer join it's no longer necessarily the case that b.y = 10, and thus we +cannot use such EquivalenceClasses to conclude that sorting is unnecessary +(see discussion of PathKeys below). + +In this example, notice also that a.x = ss.y (really a.x = b.y) is not an +equivalence clause because its applicability to b is delayed by the outer +join; thus we do not try to insert b.y into the equivalence class {a.x 42}. +But since we see that a.x has been equated to 42 above the outer join, we +are able to form a below-outer-join class {b.y 42}; this restriction can be +added because no b/c row not having b.y = 42 can contribute to the result +of the outer join, and so we need not compute such rows. Now this class +will get merged with {b.y c.z 10}, leading to the contradiction 10 = 42, +which lets the planner deduce that the b/c join need not be computed at all +because none of its rows can contribute to the outer join. (This gets +implemented as a gating Result filter, since more usually the potential +contradiction involves Param values rather than just Consts, and thus has +to be checked at runtime.) + +To aid in determining the sort ordering(s) that can work with a mergejoin, +we mark each mergejoinable clause with the EquivalenceClasses of its left +and right inputs. For an equivalence clause, these are of course the same +EquivalenceClass. For a non-equivalence mergejoinable clause (such as an +outer-join qualification), we generate two separate EquivalenceClasses for +the left and right inputs. This may result in creating single-item +equivalence "classes", though of course these are still subject to merging +if other equivalence clauses are later found to bear on the same +expressions. + +Another way that we may form a single-item EquivalenceClass is in creation +of a PathKey to represent a desired sort order (see below). This is a bit +different from the above cases because such an EquivalenceClass might +contain an aggregate function or volatile expression. (A clause containing +a volatile function will never be considered mergejoinable, even if its top +operator is mergejoinable, so there is no way for a volatile expression to +get into EquivalenceClasses otherwise. Aggregates are disallowed in WHERE +altogether, so will never be found in a mergejoinable clause.) This is just +a convenience to maintain a uniform PathKey representation: such an +EquivalenceClass will never be merged with any other. + +An EquivalenceClass also contains a list of btree opfamily OIDs, which +determines what the equalities it represents actually "mean". All the +equivalence clauses that contribute to an EquivalenceClass must have +equality operators that belong to the same set of opfamilies. (Note: most +of the time, a particular equality operator belongs to only one family, but +it's possible that it belongs to more than one. We keep track of all the +families to ensure that we can make use of an index belonging to any one of +the families for mergejoin purposes.) + + PathKeys -------- The PathKeys data structure represents what is known about the sort order -of a particular Path. +of the tuples generated by a particular Path. A path's pathkeys field is a +list of PathKey nodes, where the n'th item represents the n'th sort key of +the result. Each PathKey contains these fields: -Path.pathkeys is a List of Lists of PathKeyItem nodes that represent -the sort order of the result generated by the Path. The n'th sublist -represents the n'th sort key of the result. + * a reference to an EquivalenceClass + * a btree opfamily OID (must match one of those in the EC) + * a sort direction (ascending or descending) + * a nulls-first-or-last flag + +The EquivalenceClass represents the value being sorted on. Since the +various members of an EquivalenceClass are known equal according to the +opfamily, we can consider a path sorted by any one of them to be sorted by +any other too; this is what justifies referencing the whole +EquivalenceClass rather than just one member of it. In single/base relation RelOptInfo's, the Paths represent various ways of scanning the relation and the resulting ordering of the tuples. Sequential scan Paths have NIL pathkeys, indicating no known ordering. Index scans have Path.pathkeys that represent the chosen index's ordering, -if any. A single-key index would create a pathkey with a single sublist, -e.g. ( (tab1.indexkey1/sortop1) ). A multi-key index generates a sublist -per key, e.g. ( (tab1.indexkey1/sortop1) (tab1.indexkey2/sortop2) ) which -shows major sort by indexkey1 (ordering by sortop1) and minor sort by -indexkey2 with sortop2. - -Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since -we can say nothing about the overall order of its result. Also, an -indexscan on an unordered type of index generates NIL pathkeys. However, -we can always create a pathkey by doing an explicit sort. The pathkeys -for a Sort plan's output just represent the sort key fields and the -ordering operators used. +if any. A single-key index would create a single-PathKey list, while a +multi-column index generates a list with one element per index column. +(Actually, since an index can be scanned either forward or backward, there +are two possible sort orders and two possible PathKey lists it can +generate.) + +Note that a bitmap scan or multi-pass indexscan (OR clause scan) has NIL +pathkeys since we can say nothing about the overall order of its result. +Also, an indexscan on an unordered type of index generates NIL pathkeys. +However, we can always create a pathkey by doing an explicit sort. The +pathkeys for a Sort plan's output just represent the sort key fields and +the ordering operators used. Things get more interesting when we consider joins. Suppose we do a mergejoin between A and B using the mergeclause A.X = B.Y. The output -of the mergejoin is sorted by X --- but it is also sorted by Y. We -represent this fact by listing both keys in a single pathkey sublist: -( (A.X/xsortop B.Y/ysortop) ). This pathkey asserts that the major -sort order of the Path can be taken to be *either* A.X or B.Y. -They are equal, so they are both primary sort keys. By doing this, -we allow future joins to use either var as a pre-sorted key, so upper -Mergejoins may be able to avoid having to re-sort the Path. This is -why pathkeys is a List of Lists. - -We keep a sortop associated with each PathKeyItem because cross-data-type -mergejoins are possible; for example int4 = int8 is mergejoinable. -In this case we need to remember that the left var is ordered by int4lt -while the right var is ordered by int8lt. So the different members of -each sublist could have different sortops. - -Note that while the order of the top list is meaningful (primary vs. -secondary sort key), the order of each sublist is arbitrary. Each sublist -should be regarded as a set of equivalent keys, with no significance -to the list order. - -With a little further thought, it becomes apparent that pathkeys for -joins need not only come from mergejoins. For example, if we do a -nestloop join between outer relation A and inner relation B, then any -pathkeys relevant to A are still valid for the join result: we have -not altered the order of the tuples from A. Even more interesting, -if there was a mergeclause (more formally, an "equijoin clause") A.X=B.Y, -and A.X was a pathkey for the outer relation A, then we can assert that -B.Y is a pathkey for the join result; X was ordered before and still is, -and the joined values of Y are equal to the joined values of X, so Y +of the mergejoin is sorted by X --- but it is also sorted by Y. Again, +this can be represented by a PathKey referencing an EquivalenceClass +containing both X and Y. + +With a little further thought, it becomes apparent that nestloop joins +can also produce sorted output. For example, if we do a nestloop join +between outer relation A and inner relation B, then any pathkeys relevant +to A are still valid for the join result: we have not altered the order of +the tuples from A. Even more interesting, if there was an equivalence clause +A.X=B.Y, and A.X was a pathkey for the outer relation A, then we can assert +that B.Y is a pathkey for the join result; X was ordered before and still +is, and the joined values of Y are equal to the joined values of X, so Y must now be ordered too. This is true even though we used neither an -explicit sort nor a mergejoin on Y. - -More generally, whenever we have an equijoin clause A.X = B.Y and a -pathkey A.X, we can add B.Y to that pathkey if B is part of the joined -relation the pathkey is for, *no matter how we formed the join*. It works -as long as the clause has been applied at some point while forming the -join relation. (In the current implementation, we always apply qual -clauses as soon as possible, ie, as far down in the plan tree as possible. -So we can treat the pathkeys as equivalent everywhere. The exception is -when the relations A and B are joined inside the nullable side of an -OUTER JOIN and the equijoin clause comes from above the OUTER JOIN. In this -case we cannot apply the qual as soon as A and B are joined, so we do not -consider the pathkeys to be equivalent. This could be improved if we wanted -to go to the trouble of making pathkey equivalence be context-dependent, -but that seems much more complex than it's worth.) - -In short, then: when producing the pathkeys for a merge or nestloop join, -we can keep all of the keys of the outer path, since the ordering of the -outer path will be preserved in the result. Furthermore, we can add to -each pathkey sublist any inner vars that are equijoined to any of the -outer vars in the sublist; this works regardless of whether we are -implementing the join using that equijoin clause as a mergeclause, -or merely enforcing the clause after-the-fact as a qpqual filter. - -Although Hashjoins also work only with equijoin operators, it is *not* -safe to consider the output of a Hashjoin to be sorted in any particular -order --- not even the outer path's order. This is true because the -executor might have to split the join into multiple batches. Therefore -a Hashjoin is always given NIL pathkeys. (Also, we need to use only -mergejoinable operators when deducing which inner vars are now sorted, -because a mergejoin operator tells us which left- and right-datatype -sortops can be considered equivalent, whereas a hashjoin operator -doesn't imply anything about sort order.) +explicit sort nor a mergejoin on Y. (Note: hash joins cannot be counted +on to preserve the order of their outer relation, because the executor +might decide to "batch" the join, so we always set pathkeys to NIL for +a hashjoin path.) Exception: a RIGHT or FULL join doesn't preserve the +ordering of its outer relation, because it might insert nulls at random +points in the ordering. + +In general, we can justify using EquivalenceClasses as the basis for +pathkeys because, whenever we scan a relation containing multiple +EquivalenceClass members or join two relations each containing +EquivalenceClass members, we apply restriction or join clauses derived from +the EquivalenceClass. This guarantees that any two values listed in the +EquivalenceClass are in fact equal in all tuples emitted by the scan or +join, and therefore that if the tuples are sorted by one of the values, +they can be considered sorted by any other as well. It does not matter +whether the test clause is used as a mergeclause, or merely enforced +after-the-fact as a qpqual filter. + +Note that there is no particular difficulty in labeling a path's sort +order with a PathKey referencing an EquivalenceClass that contains +variables not yet joined into the path's output. We can simply ignore +such entries as not being relevant (yet). This makes it possible to +use the same EquivalenceClasses throughout the join planning process. +In fact, by being careful not to generate multiple identical PathKey +objects, we can reduce comparison of EquivalenceClasses and PathKeys +to simple pointer comparison, which is a huge savings because add_path +has to make a large number of PathKey comparisons in deciding whether +competing Paths are equivalently sorted. Pathkeys are also useful to represent an ordering that we wish to achieve, since they are easily compared to the pathkeys of a potential candidate path. So, SortClause lists are turned into pathkeys lists for use inside the optimizer. -OK, now for how it *really* works: - -We did implement pathkeys just as described above, and found that the -planner spent a huge amount of time comparing pathkeys, because the -representation of pathkeys as unordered lists made it expensive to decide -whether two were equal or not. So, we've modified the representation -as described next. - -If we scan the WHERE clause for equijoin clauses (mergejoinable clauses) -during planner startup, we can construct lists of equivalent pathkey items -for the query. There could be more than two items per equivalence set; -for example, WHERE A.X = B.Y AND B.Y = C.Z AND D.R = E.S creates the -equivalence sets { A.X B.Y C.Z } and { D.R E.S } (plus associated sortops). -Any pathkey item that belongs to an equivalence set implies that all the -other items in its set apply to the relation too, or at least all the ones -that are for fields present in the relation. (Some of the items in the -set might be for as-yet-unjoined relations.) Furthermore, any multi-item -pathkey sublist that appears at any stage of planning the query *must* be -a subset of one or another of these equivalence sets; there's no way we'd -have put two items in the same pathkey sublist unless they were equijoined -in WHERE. - -Now suppose that we allow a pathkey sublist to contain pathkey items for -vars that are not yet part of the pathkey's relation. This introduces -no logical difficulty, because such items can easily be seen to be -irrelevant; we just mandate that they be ignored. But having allowed -this, we can declare (by fiat) that any multiple-item pathkey sublist -must be "equal()" to the appropriate equivalence set. In effect, -whenever we make a pathkey sublist that mentions any var appearing in an -equivalence set, we instantly add all the other vars equivalenced to it, -whether they appear yet in the pathkey's relation or not. And we also -mandate that the pathkey sublist appear in the same order as the -equivalence set it comes from. - -In fact, we can go even further, and say that the canonical representation -of a pathkey sublist is a pointer directly to the relevant equivalence set, -which is kept in a list of pathkey equivalence sets for the query. Then -pathkey sublist comparison reduces to pointer-equality checking! To do this -we also have to add single-element pathkey sublists to the query's list of -equivalence sets, but that's a small price to pay. - -By the way, it's OK and even useful for us to build equivalence sets -that mention multiple vars from the same relation. For example, if -we have WHERE A.X = A.Y and we are scanning A using an index on X, -we can legitimately conclude that the path is sorted by Y as well; -and this could be handy if Y is the variable used in other join clauses -or ORDER BY. So, any WHERE clause with a mergejoinable operator can -contribute to an equivalence set, even if it's not a join clause. - -As sketched so far, equijoin operators allow us to conclude that -A.X = B.Y and B.Y = C.Z together imply A.X = C.Z, even when different -datatypes are involved. What is not immediately obvious is that to use -the "canonical pathkey" representation, we *must* make this deduction. -An example (from a real bug in Postgres 7.0) is a mergejoin for a query -like - SELECT * FROM t1, t2 WHERE t1.f2 = t2.f3 AND t1.f1 = t2.f3; -The canonical-pathkey mechanism is able to deduce that t1.f1 = t1.f2 -(ie, both appear in the same canonical pathkey set). If we sort t1 -and then apply a mergejoin, we *must* filter the t1 tuples using the -implied qualification f1 = f2, because otherwise the output of the sort -will be ordered by f1 or f2 (whichever we sort on) but not both. The -merge will then fail since (depending on which qual clause it applies -first) it's expecting either ORDER BY f1,f2 or ORDER BY f2,f1, but the -actual output of the sort has neither of these orderings. The best fix -for this is to generate all the implied equality constraints for each -equijoin set and add these clauses to the query's qualification list. -In other words, we *explicitly* deduce f1 = f2 and add this to the WHERE -clause. The constraint will be applied as a qpqual to the output of the -scan on t1, resulting in sort output that is indeed ordered by both vars. -This approach provides more information to the selectivity estimation -code than it would otherwise have, and reduces the number of tuples -processed in join stages, so it's a win to make these deductions even -if we weren't forced to. - -When we generate implied equality constraints, we may find ourselves -adding redundant clauses to specific relations. For example, consider - SELECT * FROM t1, t2, t3 WHERE t1.a = t2.b AND t2.b = t3.c; -We will generate the implied clause t1.a = t3.c and add it to the tree. -This is good since it allows us to consider joining t1 and t3 directly, -which we otherwise wouldn't do. But when we reach the stage of joining -all three relations, we will have redundant join clauses --- eg, if we -join t1 and t2 first, then the path that joins (t1 t2) to t3 will have -both t2.b = t3.c and t1.a = t3.c as restriction clauses. This is bad; -not only is evaluation of the extra clause useless work at runtime, -but the selectivity estimator routines will underestimate the number -of tuples produced since they won't know that the two clauses are -perfectly redundant. We fix this by detecting and removing redundant -clauses as the restriction clause list is built for each join. (We -can't do it sooner, since which clauses are redundant will vary depending -on the join order.) - -Yet another implication of all this is that mergejoinable operators -must form closed equivalence sets. For example, if "int2 = int4" -and "int4 = int8" are both marked mergejoinable, then there had better -be a mergejoinable "int2 = int8" operator as well. Otherwise, when -we're given WHERE int2var = int4var AND int4var = int8var, we'll fail -while trying to create a representation of the implied clause -int2var = int8var. +Because we have to generate pathkeys lists from the sort clauses before +we've finished EquivalenceClass merging, we cannot use the pointer-equality +method of comparing PathKeys in the earliest stages of the planning +process. Instead, we generate "non canonical" PathKeys that reference +single-element EquivalenceClasses that might get merged later. After we +complete EquivalenceClass merging, we replace these with "canonical" +PathKeys that reference only fully-merged classes, and after that we make +sure we don't generate more than one copy of each "canonical" PathKey. +Then it is safe to use pointer comparison on canonical PathKeys. An additional refinement we can make is to insist that canonical pathkey -lists (sort orderings) do not mention the same pathkey set more than once. -For example, a pathkey list ((A) (B) (A)) is redundant --- the second -occurrence of (A) does not change the ordering, since the data must already -be sorted by A. Although a user probably wouldn't write ORDER BY A,B,A -directly, such redundancies are more probable once equijoin equivalences -have been considered. Also, the system is likely to generate redundant -pathkey lists when computing the sort ordering needed for a mergejoin. By -eliminating the redundancy, we save time and improve planning, since the -planner will more easily recognize equivalent orderings as being equivalent. +lists (sort orderings) do not mention the same EquivalenceClass more than +once. For example, in all these cases the second sort column is redundant, +because it cannot distinguish values that are the same according to the +first sort column: + SELECT ... ORDER BY x, x + SELECT ... ORDER BY x, x DESC + SELECT ... WHERE x = y ORDER BY x, y +Although a user probably wouldn't write "ORDER BY x,x" directly, such +redundancies are more probable once equivalence classes have been +considered. Also, the system may generate redundant pathkey lists when +computing the sort ordering needed for a mergejoin. By eliminating the +redundancy, we save time and improve planning, since the planner will more +easily recognize equivalent orderings as being equivalent. + +Another interesting property is that if the underlying EquivalenceClass +contains a constant and is not below an outer join, then the pathkey is +completely redundant and need not be sorted by at all! Every row must +contain the same constant value, so there's no need to sort. (If the EC is +below an outer join, we still have to sort, since some of the rows might +have gone to null and others not. In this case we must be careful to pick +a non-const member to sort by. The assumption that all the non-const +members go to null at the same plan level is critical here, else they might +not produce the same sort order.) This might seem pointless because users +are unlikely to write "... WHERE x = 42 ORDER BY x", but it allows us to +recognize when particular index columns are irrelevant to the sort order: +if we have "... WHERE x = 42 ORDER BY y", scanning an index on (x,y) +produces correctly ordered data without a sort step. We used to have very +ugly ad-hoc code to recognize that in limited contexts, but discarding +constant ECs from pathkeys makes it happen cleanly and automatically. + +You might object that a below-outer-join EquivalenceClass doesn't always +represent the same values at every level of the join tree, and so using +it to uniquely identify a sort order is dubious. This is true, but we +can avoid dealing with the fact explicitly because we always consider that +an outer join destroys any ordering of its nullable inputs. Thus, even +if a path was sorted by {a.x} below an outer join, we'll re-sort if that +sort ordering was important; and so using the same PathKey for both sort +orderings doesn't create any real problem. + + Though Bob Devine was not involved in the coding of our optimizer, he is available to field questions about diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 7b5fce139d..833b3f59a3 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -4,7 +4,7 @@ # Makefile for optimizer/path # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/optimizer/path/Makefile,v 1.17 2007/01/20 17:16:11 petere Exp $ +# $PostgreSQL: pgsql/src/backend/optimizer/path/Makefile,v 1.18 2007/01/20 20:45:38 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/optimizer/path top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = allpaths.o clausesel.o costsize.o indxpath.o \ +OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \ joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o all: SUBSYS.o diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 01178d93dd..cbe78c3abd 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.156 2007/01/09 02:14:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.157 2007/01/20 20:45:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -325,6 +325,16 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, adjust_appendrel_attrs((Node *) rel->joininfo, appinfo); + /* + * We have to make child entries in the EquivalenceClass data + * structures as well. + */ + if (rel->has_eclass_joins) + { + add_child_rel_equivalences(root, appinfo, rel, childrel); + childrel->has_eclass_joins = true; + } + /* * Copy the parent's attr_needed data as well, with appropriate * adjustment of relids and attribute numbers. diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index e1c003b504..22cbf8e2b9 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.174 2007/01/10 18:06:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.175 2007/01/20 20:45:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1258,8 +1258,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; List *mergeclauses = path->path_mergeclauses; - Oid *mergeFamilies = path->path_mergeFamilies; - int *mergeStrategies = path->path_mergeStrategies; List *outersortkeys = path->outersortkeys; List *innersortkeys = path->innersortkeys; Cost startup_cost = 0; @@ -1268,7 +1266,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) Selectivity merge_selec; QualCost merge_qual_cost; QualCost qp_qual_cost; - RestrictInfo *firstclause; double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = PATH_ROWS(inner_path); double outer_rows, @@ -1347,32 +1344,47 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) * inputs that will actually need to be scanned. We use only the first * (most significant) merge clause for this purpose. * - * Since this calculation is somewhat expensive, and will be the same for - * all mergejoin paths associated with the merge clause, we cache the - * results in the RestrictInfo node. XXX that won't work anymore once - * we support multiple possible orderings! + * XXX mergejoinscansel is a bit expensive, can we cache its results? */ if (mergeclauses && path->jpath.jointype != JOIN_FULL) { - firstclause = (RestrictInfo *) linitial(mergeclauses); - if (firstclause->left_mergescansel < 0) /* not computed yet? */ - mergejoinscansel(root, (Node *) firstclause->clause, - mergeFamilies[0], - mergeStrategies[0], - &firstclause->left_mergescansel, - &firstclause->right_mergescansel); - - if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids)) + RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses); + List *opathkeys; + List *ipathkeys; + PathKey *opathkey; + PathKey *ipathkey; + Selectivity leftscansel, + rightscansel; + + /* Get the input pathkeys to determine the sort-order details */ + opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys; + ipathkeys = innersortkeys ? innersortkeys : inner_path->pathkeys; + Assert(opathkeys); + Assert(ipathkeys); + opathkey = (PathKey *) linitial(opathkeys); + ipathkey = (PathKey *) linitial(ipathkeys); + /* debugging check */ + if (opathkey->pk_opfamily != ipathkey->pk_opfamily || + opathkey->pk_strategy != ipathkey->pk_strategy || + opathkey->pk_nulls_first != ipathkey->pk_nulls_first) + elog(ERROR, "left and right pathkeys do not match in mergejoin"); + + mergejoinscansel(root, (Node *) firstclause->clause, + opathkey->pk_opfamily, opathkey->pk_strategy, + &leftscansel, &rightscansel); + + if (bms_is_subset(firstclause->left_relids, + outer_path->parent->relids)) { /* left side of clause is outer */ - outerscansel = firstclause->left_mergescansel; - innerscansel = firstclause->right_mergescansel; + outerscansel = leftscansel; + innerscansel = rightscansel; } else { /* left side of clause is inner */ - outerscansel = firstclause->right_mergescansel; - innerscansel = firstclause->left_mergescansel; + outerscansel = rightscansel; + innerscansel = leftscansel; } if (path->jpath.jointype == JOIN_LEFT) outerscansel = 1.0; diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c new file mode 100644 index 0000000000..063e8d5d01 --- /dev/null +++ b/src/backend/optimizer/path/equivclass.c @@ -0,0 +1,1662 @@ +/*------------------------------------------------------------------------- + * + * equivclass.c + * Routines for managing EquivalenceClasses + * + * See src/backend/optimizer/README for discussion of EquivalenceClasses. + * + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.1 2007/01/20 20:45:39 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/skey.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/paths.h" +#include "optimizer/planmain.h" +#include "optimizer/prep.h" +#include "optimizer/var.h" +#include "utils/lsyscache.h" + + +static void add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, + bool is_child, Oid datatype); +static void generate_base_implied_equalities_const(PlannerInfo *root, + EquivalenceClass *ec); +static void generate_base_implied_equalities_no_const(PlannerInfo *root, + EquivalenceClass *ec); +static void generate_base_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec); +static List *generate_join_implied_equalities_normal(PlannerInfo *root, + EquivalenceClass *ec, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); +static List *generate_join_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); +static Oid select_equality_operator(EquivalenceClass *ec, + Oid lefttype, Oid righttype); +static void reconsider_outer_join_clause(PlannerInfo *root, + RestrictInfo *rinfo, + bool outer_on_left); +static void reconsider_full_join_clause(PlannerInfo *root, + RestrictInfo *rinfo); + + +/* + * process_equivalence + * The given clause has a mergejoinable operator and can be applied without + * any delay by an outer join, so its two sides can be considered equal + * anywhere they are both computable; moreover that equality can be + * extended transitively. Record this knowledge in the EquivalenceClass + * data structure. Returns TRUE if successful, FALSE if not (in which + * case caller should treat the clause as ordinary, not an equivalence). + * + * If below_outer_join is true, then the clause was found below the nullable + * side of an outer join, so its sides might validly be both NULL rather than + * strictly equal. We can still deduce equalities in such cases, but we take + * care to mark an EquivalenceClass if it came from any such clauses. Also, + * we have to check that both sides are either pseudo-constants or strict + * functions of Vars, else they might not both go to NULL above the outer + * join. (This is the reason why we need a failure return. It's more + * convenient to check this case here than at the call sites...) + * + * Note: constructing merged EquivalenceClasses is a standard UNION-FIND + * problem, for which there exist better data structures than simple lists. + * If this code ever proves to be a bottleneck then it could be sped up --- + * but for now, simple is beautiful. + * + * Note: this is only called during planner startup, not during GEQO + * exploration, so we need not worry about whether we're in the right + * memory context. + */ +bool +process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, + bool below_outer_join) +{ + Expr *clause = restrictinfo->clause; + Oid opno, + item1_type, + item2_type; + Expr *item1; + Expr *item2; + Relids item1_relids, + item2_relids; + List *opfamilies; + EquivalenceClass *ec1, + *ec2; + ListCell *lc1; + + /* Extract info from given clause */ + Assert(is_opclause(clause)); + opno = ((OpExpr *) clause)->opno; + item1 = (Expr *) get_leftop(clause); + item2 = (Expr *) get_rightop(clause); + item1_relids = restrictinfo->left_relids; + item2_relids = restrictinfo->right_relids; + + /* + * If below outer join, check for strictness, else reject. + */ + if (below_outer_join) + { + if (!bms_is_empty(item1_relids) && + contain_nonstrict_functions((Node *) item1)) + return false; /* LHS is non-strict but not constant */ + if (!bms_is_empty(item2_relids) && + contain_nonstrict_functions((Node *) item2)) + return false; /* RHS is non-strict but not constant */ + } + + /* + * We use the declared input types of the operator, not exprType() of + * the inputs, as the nominal datatypes for opfamily lookup. This + * presumes that btree operators are always registered with amoplefttype + * and amoprighttype equal to their declared input types. We will need + * this info anyway to build EquivalenceMember nodes, and by extracting + * it now we can use type comparisons to short-circuit some equal() tests. + */ + op_input_types(opno, &item1_type, &item2_type); + + opfamilies = restrictinfo->mergeopfamilies; + + /* + * Sweep through the existing EquivalenceClasses looking for matches + * to item1 and item2. These are the possible outcomes: + * + * 1. We find both in the same EC. The equivalence is already known, + * so there's nothing to do. + * + * 2. We find both in different ECs. Merge the two ECs together. + * + * 3. We find just one. Add the other to its EC. + * + * 4. We find neither. Make a new, two-entry EC. + * + * Note: since all ECs are built through this process, it's impossible + * that we'd match an item in more than one existing EC. It is possible + * to match more than once within an EC, if someone fed us something silly + * like "WHERE X=X". (However, we can't simply discard such clauses, + * since they should fail when X is null; so we will build a 2-member + * EC to ensure the correct restriction clause gets generated. Hence + * there is no shortcut here for item1 and item2 equal.) + */ + ec1 = ec2 = NULL; + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* Never match to a volatile EC */ + if (cur_ec->ec_has_volatile) + continue; + + /* + * A "match" requires matching sets of btree opfamilies. Use of + * equal() for this test has implications discussed in the comments + * for get_mergejoin_opfamilies(). + */ + if (!equal(opfamilies, cur_ec->ec_opfamilies)) + continue; + + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + Assert(!cur_em->em_is_child); /* no children yet */ + + /* + * If below an outer join, don't match constants: they're not + * as constant as they look. + */ + if ((below_outer_join || cur_ec->ec_below_outer_join) && + cur_em->em_is_const) + continue; + + if (!ec1 && + item1_type == cur_em->em_datatype && + equal(item1, cur_em->em_expr)) + { + ec1 = cur_ec; + if (ec2) + break; + } + + if (!ec2 && + item2_type == cur_em->em_datatype && + equal(item2, cur_em->em_expr)) + { + ec2 = cur_ec; + if (ec1) + break; + } + } + + if (ec1 && ec2) + break; + } + + /* Sweep finished, what did we find? */ + + if (ec1 && ec2) + { + /* If case 1, nothing to do, except add to sources */ + if (ec1 == ec2) + { + ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); + ec1->ec_below_outer_join |= below_outer_join; + return true; + } + + /* + * Case 2: need to merge ec1 and ec2. We add ec2's items to ec1, + * then set ec2's ec_merged link to point to ec1 and remove ec2 + * from the eq_classes list. We cannot simply delete ec2 because + * that could leave dangling pointers in existing PathKeys. We + * leave it behind with a link so that the merged EC can be found. + */ + ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members); + ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources); + ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids); + ec1->ec_has_const |= ec2->ec_has_const; + /* can't need to set has_volatile */ + ec1->ec_below_outer_join |= ec2->ec_below_outer_join; + ec2->ec_merged = ec1; + root->eq_classes = list_delete_ptr(root->eq_classes, ec2); + /* just to avoid debugging confusion w/ dangling pointers: */ + ec2->ec_members = NIL; + ec2->ec_sources = NIL; + ec2->ec_relids = NULL; + ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); + ec1->ec_below_outer_join |= below_outer_join; + } + else if (ec1) + { + /* Case 3: add item2 to ec1 */ + add_eq_member(ec1, item2, item2_relids, false, item2_type); + ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); + ec1->ec_below_outer_join |= below_outer_join; + } + else if (ec2) + { + /* Case 3: add item1 to ec2 */ + add_eq_member(ec2, item1, item1_relids, false, item1_type); + ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo); + ec2->ec_below_outer_join |= below_outer_join; + } + else + { + /* Case 4: make a new, two-entry EC */ + EquivalenceClass *ec = makeNode(EquivalenceClass); + + ec->ec_opfamilies = opfamilies; + ec->ec_members = NIL; + ec->ec_sources = list_make1(restrictinfo); + ec->ec_relids = NULL; + ec->ec_has_const = false; + ec->ec_has_volatile = false; + ec->ec_below_outer_join = below_outer_join; + ec->ec_broken = false; + ec->ec_merged = NULL; + add_eq_member(ec, item1, item1_relids, false, item1_type); + add_eq_member(ec, item2, item2_relids, false, item2_type); + + root->eq_classes = lappend(root->eq_classes, ec); + } + + return true; +} + +/* + * add_eq_member - build a new EquivalenceMember and add it to an EC + */ +static void +add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, + bool is_child, Oid datatype) +{ + EquivalenceMember *em = makeNode(EquivalenceMember); + + em->em_expr = expr; + em->em_relids = relids; + em->em_is_const = false; + em->em_is_child = is_child; + em->em_datatype = datatype; + + if (bms_is_empty(relids)) + { + /* + * No Vars, assume it's a pseudoconstant. This is correct for + * entries generated from process_equivalence(), because a WHERE + * clause can't contain aggregates and non-volatility was checked + * before process_equivalence() ever got called. But + * get_eclass_for_sort_expr() has to work harder. We put the tests + * there not here to save cycles in the equivalence case. + */ + Assert(!is_child); + em->em_is_const = true; + ec->ec_has_const = true; + /* it can't affect ec_relids */ + } + else if (!is_child) /* child members don't add to ec_relids */ + { + ec->ec_relids = bms_add_members(ec->ec_relids, relids); + } + ec->ec_members = lappend(ec->ec_members, em); +} + + +/* + * get_eclass_for_sort_expr + * Given an expression and opfamily info, find an existing equivalence + * class it is a member of; if none, build a new single-member + * EquivalenceClass for it. + * + * This can be used safely both before and after EquivalenceClass merging; + * since it never causes merging it does not invalidate any existing ECs + * or PathKeys. + * + * Note: opfamilies must be chosen consistently with the way + * process_equivalence() would do; that is, generated from a mergejoinable + * equality operator. Else we might fail to detect valid equivalences, + * generating poor (but not incorrect) plans. + */ +EquivalenceClass * +get_eclass_for_sort_expr(PlannerInfo *root, + Expr *expr, + Oid expr_datatype, + List *opfamilies) +{ + EquivalenceClass *newec; + ListCell *lc1; + MemoryContext oldcontext; + + /* + * Scan through the existing EquivalenceClasses for a match + */ + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* we allow matching to a volatile EC here */ + + if (!equal(opfamilies, cur_ec->ec_opfamilies)) + continue; + + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + /* + * If below an outer join, don't match constants: they're not + * as constant as they look. + */ + if (cur_ec->ec_below_outer_join && + cur_em->em_is_const) + continue; + + if (expr_datatype == cur_em->em_datatype && + equal(expr, cur_em->em_expr)) + return cur_ec; /* Match! */ + } + } + + /* + * No match, so build a new single-member EC + * + * Here, we must be sure that we construct the EC in the right context. + * We can assume, however, that the passed expr is long-lived. + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + newec = makeNode(EquivalenceClass); + newec->ec_opfamilies = list_copy(opfamilies); + newec->ec_members = NIL; + newec->ec_sources = NIL; + newec->ec_relids = NULL; + newec->ec_has_const = false; + newec->ec_has_volatile = contain_volatile_functions((Node *) expr); + newec->ec_below_outer_join = false; + newec->ec_broken = false; + newec->ec_merged = NULL; + add_eq_member(newec, expr, pull_varnos((Node *) expr), + false, expr_datatype); + + /* + * add_eq_member doesn't check for volatile functions or aggregates, + * but such could appear in sort expressions, so we have to check + * whether its const-marking was correct. + */ + if (newec->ec_has_const) + { + if (newec->ec_has_volatile || contain_agg_clause((Node *) expr)) + { + newec->ec_has_const = false; + ((EquivalenceMember *) linitial(newec->ec_members))->em_is_const = false; + } + } + + root->eq_classes = lappend(root->eq_classes, newec); + + MemoryContextSwitchTo(oldcontext); + + return newec; +} + + +/* + * generate_base_implied_equalities + * Generate any restriction clauses that we can deduce from equivalence + * classes. + * + * When an EC contains pseudoconstants, our strategy is to generate + * "member = const1" clauses where const1 is the first constant member, for + * every other member (including other constants). If we are able to do this + * then we don't need any "var = var" comparisons because we've successfully + * constrained all the vars at their points of creation. If we fail to + * generate any of these clauses due to lack of cross-type operators, we fall + * back to the "ec_broken" strategy described below. (XXX if there are + * multiple constants of different types, it's possible that we might succeed + * in forming all the required clauses if we started from a different const + * member; but this seems a sufficiently hokey corner case to not be worth + * spending lots of cycles on.) + * + * For ECs that contain no pseudoconstants, we generate derived clauses + * "member1 = member2" for each pair of members belonging to the same base + * relation (actually, if there are more than two for the same base relation, + * we only need enough clauses to link each to each other). This provides + * the base case for the recursion: each row emitted by a base relation scan + * will constrain all computable members of the EC to be equal. As each + * join path is formed, we'll add additional derived clauses on-the-fly + * to maintain this invariant (see generate_join_implied_equalities). + * + * If the opfamilies used by the EC do not provide complete sets of cross-type + * equality operators, it is possible that we will fail to generate a clause + * that must be generated to maintain the invariant. (An example: given + * "WHERE a.x = b.y AND b.y = a.z", the scheme breaks down if we cannot + * generate "a.x = a.z" as a restriction clause for A.) In this case we mark + * the EC "ec_broken" and fall back to regurgitating its original source + * RestrictInfos at appropriate times. We do not try to retract any derived + * clauses already generated from the broken EC, so the resulting plan could + * be poor due to bad selectivity estimates caused by redundant clauses. But + * the correct solution to that is to fix the opfamilies ... + * + * Equality clauses derived by this function are passed off to + * process_implied_equality (in plan/initsplan.c) to be inserted into the + * restrictinfo datastructures. Note that this must be called after initial + * scanning of the quals and before Path construction begins. + */ +void +generate_base_implied_equalities(PlannerInfo *root) +{ + ListCell *lc; + Index rti; + + foreach(lc, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc); + + Assert(ec->ec_merged == NULL); /* else shouldn't be in list */ + Assert(!ec->ec_broken); /* not yet anyway... */ + + /* Single-member ECs won't generate any deductions */ + if (list_length(ec->ec_members) <= 1) + continue; + + if (ec->ec_has_const) + generate_base_implied_equalities_const(root, ec); + else + generate_base_implied_equalities_no_const(root, ec); + + /* Recover if we failed to generate required derived clauses */ + if (ec->ec_broken) + generate_base_implied_equalities_broken(root, ec); + } + + /* + * This is also a handy place to mark base rels (which should all + * exist by now) with flags showing whether they have pending eclass + * joins. + */ + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *brel = root->simple_rel_array[rti]; + + if (brel == NULL) + continue; + + brel->has_eclass_joins = has_relevant_eclass_joinclause(root, brel); + } +} + +/* + * generate_base_implied_equalities when EC contains pseudoconstant(s) + */ +static void +generate_base_implied_equalities_const(PlannerInfo *root, + EquivalenceClass *ec) +{ + EquivalenceMember *const_em = NULL; + ListCell *lc; + + /* Find the constant member to use */ + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + + if (cur_em->em_is_const) + { + const_em = cur_em; + break; + } + } + Assert(const_em != NULL); + + /* Generate a derived equality against each other member */ + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + Oid eq_op; + + Assert(!cur_em->em_is_child); /* no children yet */ + if (cur_em == const_em) + continue; + eq_op = select_equality_operator(ec, + cur_em->em_datatype, + const_em->em_datatype); + if (!OidIsValid(eq_op)) + { + /* failed... */ + ec->ec_broken = true; + break; + } + process_implied_equality(root, eq_op, + cur_em->em_expr, const_em->em_expr, + ec->ec_relids, + ec->ec_below_outer_join, + cur_em->em_is_const); + } +} + +/* + * generate_base_implied_equalities when EC contains no pseudoconstants + */ +static void +generate_base_implied_equalities_no_const(PlannerInfo *root, + EquivalenceClass *ec) +{ + EquivalenceMember **prev_ems; + ListCell *lc; + + /* + * We scan the EC members once and track the last-seen member for each + * base relation. When we see another member of the same base relation, + * we generate "prev_mem = cur_mem". This results in the minimum number + * of derived clauses, but it's possible that it will fail when a different + * ordering would succeed. XXX FIXME: use a UNION-FIND algorithm similar + * to the way we build merged ECs. (Use a list-of-lists for each rel.) + */ + prev_ems = (EquivalenceMember **) + palloc0(root->simple_rel_array_size * sizeof(EquivalenceMember *)); + + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + int relid; + + Assert(!cur_em->em_is_child); /* no children yet */ + if (bms_membership(cur_em->em_relids) != BMS_SINGLETON) + continue; + relid = bms_singleton_member(cur_em->em_relids); + Assert(relid < root->simple_rel_array_size); + + if (prev_ems[relid] != NULL) + { + EquivalenceMember *prev_em = prev_ems[relid]; + Oid eq_op; + + eq_op = select_equality_operator(ec, + prev_em->em_datatype, + cur_em->em_datatype); + if (!OidIsValid(eq_op)) + { + /* failed... */ + ec->ec_broken = true; + break; + } + process_implied_equality(root, eq_op, + prev_em->em_expr, cur_em->em_expr, + ec->ec_relids, + ec->ec_below_outer_join, + false); + } + prev_ems[relid] = cur_em; + } + + pfree(prev_ems); + + /* + * We also have to make sure that all the Vars used in the member + * clauses will be available at any join node we might try to reference + * them at. For the moment we force all the Vars to be available at + * all join nodes for this eclass. Perhaps this could be improved by + * doing some pre-analysis of which members we prefer to join, but it's + * no worse than what happened in the pre-8.3 code. + */ + foreach(lc, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + List *vars = pull_var_clause((Node *) cur_em->em_expr, false); + + add_vars_to_targetlist(root, vars, ec->ec_relids); + list_free(vars); + } +} + +/* + * generate_base_implied_equalities cleanup after failure + * + * What we must do here is push any zero- or one-relation source RestrictInfos + * of the EC back into the main restrictinfo datastructures. Multi-relation + * clauses will be regurgitated later by generate_join_implied_equalities(). + * (We do it this way to maintain continuity with the case that ec_broken + * becomes set only after we've gone up a join level or two.) + */ +static void +generate_base_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec) +{ + ListCell *lc; + + foreach(lc, ec->ec_sources) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + if (bms_membership(restrictinfo->required_relids) != BMS_MULTIPLE) + distribute_restrictinfo_to_rels(root, restrictinfo); + } +} + + +/* + * generate_join_implied_equalities + * Generate any join clauses that we can deduce from equivalence classes. + * + * At a join node, we must enforce restriction clauses sufficient to ensure + * that all equivalence-class members computable at that node are equal. + * Since the set of clauses to enforce can vary depending on which subset + * relations are the inputs, we have to compute this afresh for each join + * path pair. Hence a fresh List of RestrictInfo nodes is built and passed + * back on each call. + * + * The results are sufficient for use in merge, hash, and plain nestloop join + * methods. We do not worry here about selecting clauses that are optimal + * for use in a nestloop-with-inner-indexscan join, however. indxpath.c makes + * its own selections of clauses to use, and if the ones we pick here are + * redundant with those, the extras will be eliminated in createplan.c. + */ +List * +generate_join_implied_equalities(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc); + List *sublist = NIL; + + /* ECs containing consts do not need any further enforcement */ + if (ec->ec_has_const) + continue; + + /* Single-member ECs won't generate any deductions */ + if (list_length(ec->ec_members) <= 1) + continue; + + /* We can quickly ignore any that don't overlap the join, too */ + if (!bms_overlap(ec->ec_relids, joinrel->relids)) + continue; + + if (!ec->ec_broken) + sublist = generate_join_implied_equalities_normal(root, + ec, + joinrel, + outer_rel, + inner_rel); + + /* Recover if we failed to generate required derived clauses */ + if (ec->ec_broken) + sublist = generate_join_implied_equalities_broken(root, + ec, + joinrel, + outer_rel, + inner_rel); + + result = list_concat(result, sublist); + } + + return result; +} + +/* + * generate_join_implied_equalities for a still-valid EC + */ +static List * +generate_join_implied_equalities_normal(PlannerInfo *root, + EquivalenceClass *ec, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + List *result = NIL; + List *new_members = NIL; + List *outer_members = NIL; + List *inner_members = NIL; + ListCell *lc1; + + /* + * First, scan the EC to identify member values that are computable + * at the outer rel, at the inner rel, or at this relation but not in + * either input rel. The outer-rel members should already be enforced + * equal, likewise for the inner-rel members. We'll need to create + * clauses to enforce that any newly computable members are all equal + * to each other as well as to at least one input member, plus enforce + * at least one outer-rel member equal to at least one inner-rel member. + */ + foreach(lc1, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1); + + if (cur_em->em_is_child) + continue; /* ignore children here */ + if (!bms_is_subset(cur_em->em_relids, joinrel->relids)) + continue; /* ignore --- not computable yet */ + + if (bms_is_subset(cur_em->em_relids, outer_rel->relids)) + outer_members = lappend(outer_members, cur_em); + else if (bms_is_subset(cur_em->em_relids, inner_rel->relids)) + inner_members = lappend(inner_members, cur_em); + else + new_members = lappend(new_members, cur_em); + } + + /* + * First, select the joinclause if needed. We can equate any one outer + * member to any one inner member, but we have to find a datatype + * combination for which an opfamily member operator exists. If we + * have choices, we prefer simple Var members (possibly with RelabelType) + * since these are (a) cheapest to compute at runtime and (b) most likely + * to have useful statistics. Also, if enable_hashjoin is on, we prefer + * operators that are also hashjoinable. + */ + if (outer_members && inner_members) + { + EquivalenceMember *best_outer_em = NULL; + EquivalenceMember *best_inner_em = NULL; + Oid best_eq_op = InvalidOid; + int best_score = -1; + RestrictInfo *rinfo; + + foreach(lc1, outer_members) + { + EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc1); + ListCell *lc2; + + foreach(lc2, inner_members) + { + EquivalenceMember *inner_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + int score; + + eq_op = select_equality_operator(ec, + outer_em->em_datatype, + inner_em->em_datatype); + if (!OidIsValid(eq_op)) + continue; + score = 0; + if (IsA(outer_em->em_expr, Var) || + (IsA(outer_em->em_expr, RelabelType) && + IsA(((RelabelType *) outer_em->em_expr)->arg, Var))) + score++; + if (IsA(inner_em->em_expr, Var) || + (IsA(inner_em->em_expr, RelabelType) && + IsA(((RelabelType *) inner_em->em_expr)->arg, Var))) + score++; + if (!enable_hashjoin || op_hashjoinable(eq_op)) + score++; + if (score > best_score) + { + best_outer_em = outer_em; + best_inner_em = inner_em; + best_eq_op = eq_op; + best_score = score; + if (best_score == 3) + break; /* no need to look further */ + } + } + if (best_score == 3) + break; /* no need to look further */ + } + if (best_score < 0) + { + /* failed... */ + ec->ec_broken = true; + return NIL; + } + + rinfo = build_implied_join_equality(best_eq_op, + best_outer_em->em_expr, + best_inner_em->em_expr, + ec->ec_relids); + /* mark restrictinfo as redundant with other joinclauses */ + rinfo->parent_ec = ec; + /* we can set these too, rather than letting them be looked up later */ + rinfo->left_ec = ec; + rinfo->right_ec = ec; + + result = lappend(result, rinfo); + } + + /* + * Now deal with building restrictions for any expressions that involve + * Vars from both sides of the join. We have to equate all of these to + * each other as well as to at least one old member (if any). + * + * XXX as in generate_base_implied_equalities_no_const, we could be a + * lot smarter here to avoid unnecessary failures in cross-type situations. + * For now, use the same left-to-right method used there. + */ + if (new_members) + { + List *old_members = list_concat(outer_members, inner_members); + EquivalenceMember *prev_em = NULL; + RestrictInfo *rinfo; + + /* For now, arbitrarily take the first old_member as the one to use */ + if (old_members) + new_members = lappend(new_members, linitial(old_members)); + + foreach(lc1, new_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc1); + + if (prev_em != NULL) + { + Oid eq_op; + + eq_op = select_equality_operator(ec, + prev_em->em_datatype, + cur_em->em_datatype); + if (!OidIsValid(eq_op)) + { + /* failed... */ + ec->ec_broken = true; + return NIL; + } + rinfo = build_implied_join_equality(eq_op, + prev_em->em_expr, + cur_em->em_expr, + ec->ec_relids); + + /* do NOT set parent_ec, this qual is not redundant! */ + + /* we can set these, though */ + rinfo->left_ec = ec; + rinfo->right_ec = ec; + + result = lappend(result, rinfo); + } + prev_em = cur_em; + } + } + + return result; +} + +/* + * generate_join_implied_equalities cleanup after failure + * + * Return any original RestrictInfos that are enforceable at this join. + */ +static List * +generate_join_implied_equalities_broken(PlannerInfo *root, + EquivalenceClass *ec, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + List *result = NIL; + ListCell *lc; + + foreach(lc, ec->ec_sources) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + if (bms_is_subset(restrictinfo->required_relids, joinrel->relids) && + !bms_is_subset(restrictinfo->required_relids, outer_rel->relids) && + !bms_is_subset(restrictinfo->required_relids, inner_rel->relids)) + result = lappend(result, restrictinfo); + } + + return result; +} + + +/* + * select_equality_operator + * Select a suitable equality operator for comparing two EC members + * + * Returns InvalidOid if no operator can be found for this datatype combination + */ +static Oid +select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype) +{ + ListCell *lc; + + foreach(lc, ec->ec_opfamilies) + { + Oid opfamily = lfirst_oid(lc); + Oid opno; + + opno = get_opfamily_member(opfamily, lefttype, righttype, + BTEqualStrategyNumber); + if (OidIsValid(opno)) + return opno; + } + return InvalidOid; +} + + +/* + * reconsider_outer_join_clauses + * Re-examine any outer-join clauses that were set aside by + * distribute_qual_to_rels(), and either create EquivalenceClasses + * to replace them or push them out into the regular join-clause lists. + * + * When we have mergejoinable clauses A = B that are outer-join clauses, + * we can't blindly combine them with other clauses A = C to deduce B = C, + * since in fact the "equality" A = B won't necessarily hold above the + * outer join (one of the variables might be NULL instead). Nonetheless + * there are cases where we can add qual clauses using transitivity. + * + * One case that we look for here is an outer-join clause OUTERVAR = INNERVAR + * for which there is also an equivalence clause OUTERVAR = CONSTANT. + * It is safe and useful to push a clause INNERVAR = CONSTANT into the + * evaluation of the inner (nullable) relation, because any inner rows not + * meeting this condition will not contribute to the outer-join result anyway. + * (Any outer rows they could join to will be eliminated by the pushed-down + * equivalence clause.) + * + * Note that the above rule does not work for full outer joins; nor is it + * very interesting to consider cases where the equivalence clause involves + * relations entirely outside the outer join, since such clauses couldn't + * be pushed into the inner side's scan anyway. So the restriction to + * outervar = pseudoconstant is not really giving up anything. + * + * For full-join cases, we can only do something useful if it's a FULL JOIN + * USING and a merged column has an equivalence MERGEDVAR = CONSTANT. + * By the time it gets here, the merged column will look like + * COALESCE(LEFTVAR, RIGHTVAR) + * and we will have a full-join clause LEFTVAR = RIGHTVAR that we can match + * the COALESCE expression to. In this situation we can push LEFTVAR = CONSTANT + * and RIGHTVAR = CONSTANT into the input relations, since any rows not + * meeting these conditions cannot contribute to the join result. + * + * Again, there isn't any traction to be gained by trying to deal with + * clauses comparing a mergedvar to a non-pseudoconstant. So we can make + * use of the EquivalenceClasses to search for matching variables that were + * equivalenced to constants. The interesting outer-join clauses were + * accumulated for us by distribute_qual_to_rels. + * + * When we find one of these cases, we implement the changes we want by + * generating a new equivalence clause INNERVAR = CONSTANT (or LEFTVAR, etc) + * and pushing it into the EquivalenceClass structures. This is because we + * may already know that INNERVAR is equivalenced to some other var(s), and + * we'd like the constant to propagate to them too. Note that it would be + * unsafe to merge any existing EC for INNERVAR with the OUTERVAR's EC --- + * that could result in propagating constant restrictions from + * INNERVAR to OUTERVAR, which would be very wrong. + * + * If we don't find any match for a set-aside outer join clause, we must + * throw it back into the regular joinclause processing by passing it to + * distribute_restrictinfo_to_rels(). + */ +void +reconsider_outer_join_clauses(PlannerInfo *root) +{ + ListCell *lc; + + /* Process the LEFT JOIN clauses */ + foreach(lc, root->left_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + reconsider_outer_join_clause(root, rinfo, true); + } + /* And the RIGHT JOIN clauses */ + foreach(lc, root->right_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + reconsider_outer_join_clause(root, rinfo, false); + } + /* And the FULL JOIN clauses */ + foreach(lc, root->full_join_clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + reconsider_full_join_clause(root, rinfo); + } +} + +/* + * reconsider_outer_join_clauses for a single LEFT/RIGHT JOIN clause + */ +static void +reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, + bool outer_on_left) +{ + Expr *outervar, + *innervar; + Oid left_type, + right_type, + inner_datatype; + ListCell *lc1; + + /* Extract needed info from the clause */ + Assert(is_opclause(rinfo->clause)); + op_input_types(((OpExpr *) rinfo->clause)->opno, + &left_type, &right_type); + if (outer_on_left) + { + outervar = (Expr *) get_leftop(rinfo->clause); + innervar = (Expr *) get_rightop(rinfo->clause); + inner_datatype = right_type; + } + else + { + outervar = (Expr *) get_rightop(rinfo->clause); + innervar = (Expr *) get_leftop(rinfo->clause); + inner_datatype = left_type; + } + + /* Scan EquivalenceClasses for a match to outervar */ + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + bool match; + ListCell *lc2; + + /* Ignore EC unless it contains pseudoconstants */ + if (!cur_ec->ec_has_const) + continue; + /* Never match to a volatile EC */ + if (cur_ec->ec_has_volatile) + continue; + /* It has to match the outer-join clause as to opfamilies, too */ + if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies)) + continue; + /* Does it contain a match to outervar? */ + match = false; + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + if (equal(outervar, cur_em->em_expr)) + { + match = true; + break; + } + } + if (!match) + continue; /* no match, so ignore this EC */ + + /* + * Yes it does! Try to generate a clause INNERVAR = CONSTANT for + * each CONSTANT in the EC. Note that we must succeed with at + * least one constant before we can decide to throw away the + * outer-join clause. + */ + match = false; + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + RestrictInfo *newrinfo; + + if (!cur_em->em_is_const) + continue; /* ignore non-const members */ + eq_op = select_equality_operator(cur_ec, + inner_datatype, + cur_em->em_datatype); + if (!OidIsValid(eq_op)) + continue; /* can't generate equality */ + newrinfo = build_implied_join_equality(eq_op, + innervar, + cur_em->em_expr, + cur_ec->ec_relids); + if (process_equivalence(root, newrinfo, true)) + match = true; + } + + /* + * If we were able to equate INNERVAR to any constant, we're done, and + * we can throw away the outer-join clause as redundant. Otherwise, + * fall out of the search loop, since we know the OUTERVAR appears in + * at most one EC. + */ + if (match) + return; + else + break; + } + + /* We did not find a match, so throw it back into regular processing */ + distribute_restrictinfo_to_rels(root, rinfo); +} + +/* + * reconsider_outer_join_clauses for a single FULL JOIN clause + */ +static void +reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) +{ + Expr *leftvar; + Expr *rightvar; + Oid left_type, + right_type; + ListCell *lc1; + + /* Extract needed info from the clause */ + Assert(is_opclause(rinfo->clause)); + leftvar = (Expr *) get_leftop(rinfo->clause); + rightvar = (Expr *) get_rightop(rinfo->clause); + op_input_types(((OpExpr *) rinfo->clause)->opno, + &left_type, &right_type); + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + EquivalenceMember *coal_em = NULL; + bool match; + bool matchleft; + bool matchright; + ListCell *lc2; + + /* Ignore EC unless it contains pseudoconstants */ + if (!cur_ec->ec_has_const) + continue; + /* Never match to a volatile EC */ + if (cur_ec->ec_has_volatile) + continue; + /* It has to match the outer-join clause as to opfamilies, too */ + if (!equal(rinfo->mergeopfamilies, cur_ec->ec_opfamilies)) + continue; + + /* + * Does it contain a COALESCE(leftvar, rightvar) construct? + * + * We can assume the COALESCE() inputs are in the same order as + * the join clause, since both were automatically generated in the + * cases we care about. + * + * XXX currently this may fail to match in cross-type cases + * because the COALESCE will contain typecast operations while the + * join clause may not (if there is a cross-type mergejoin + * operator available for the two column types). Is it OK to strip + * implicit coercions from the COALESCE arguments? + */ + match = false; + foreach(lc2, cur_ec->ec_members) + { + coal_em = (EquivalenceMember *) lfirst(lc2); + if (IsA(coal_em->em_expr, CoalesceExpr)) + { + CoalesceExpr *cexpr = (CoalesceExpr *) coal_em->em_expr; + Node *cfirst; + Node *csecond; + + if (list_length(cexpr->args) != 2) + continue; + cfirst = (Node *) linitial(cexpr->args); + csecond = (Node *) lsecond(cexpr->args); + + if (equal(leftvar, cfirst) && equal(rightvar, csecond)) + { + match = true; + break; + } + } + } + if (!match) + continue; /* no match, so ignore this EC */ + + /* + * Yes it does! Try to generate clauses LEFTVAR = CONSTANT and + * RIGHTVAR = CONSTANT for each CONSTANT in the EC. Note that we + * must succeed with at least one constant for each var before + * we can decide to throw away the outer-join clause. + */ + matchleft = matchright = false; + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + Oid eq_op; + RestrictInfo *newrinfo; + + if (!cur_em->em_is_const) + continue; /* ignore non-const members */ + eq_op = select_equality_operator(cur_ec, + left_type, + cur_em->em_datatype); + if (OidIsValid(eq_op)) + { + newrinfo = build_implied_join_equality(eq_op, + leftvar, + cur_em->em_expr, + cur_ec->ec_relids); + if (process_equivalence(root, newrinfo, true)) + matchleft = true; + } + eq_op = select_equality_operator(cur_ec, + right_type, + cur_em->em_datatype); + if (OidIsValid(eq_op)) + { + newrinfo = build_implied_join_equality(eq_op, + rightvar, + cur_em->em_expr, + cur_ec->ec_relids); + if (process_equivalence(root, newrinfo, true)) + matchright = true; + } + } + + /* + * If we were able to equate both vars to constants, we're done, and + * we can throw away the full-join clause as redundant. Moreover, + * we can remove the COALESCE entry from the EC, since the added + * restrictions ensure it will always have the expected value. + * (We don't bother trying to update ec_relids or ec_sources.) + */ + if (matchleft && matchright) + { + cur_ec->ec_members = list_delete_ptr(cur_ec->ec_members, coal_em); + return; + } + /* + * Otherwise, fall out of the search loop, since we know the COALESCE + * appears in at most one EC (XXX might stop being true if we allow + * stripping of coercions above?) + */ + break; + } + + /* We did not find a match, so throw it back into regular processing */ + distribute_restrictinfo_to_rels(root, rinfo); +} + + +/* + * exprs_known_equal + * Detect whether two expressions are known equal due to equivalence + * relationships. + * + * Actually, this only shows that the expressions are equal according + * to some opfamily's notion of equality --- but we only use it for + * selectivity estimation, so a fuzzy idea of equality is OK. + * + * Note: does not bother to check for "equal(item1, item2)"; caller must + * check that case if it's possible to pass identical items. + */ +bool +exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2) +{ + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1); + bool item1member = false; + bool item2member = false; + ListCell *lc2; + + /* Never match to a volatile EC */ + if (ec->ec_has_volatile) + continue; + + foreach(lc2, ec->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); + + if (equal(item1, em->em_expr)) + item1member = true; + else if (equal(item2, em->em_expr)) + item2member = true; + /* Exit as soon as equality is proven */ + if (item1member && item2member) + return true; + } + } + return false; +} + + +/* + * add_child_rel_equivalences + * Search for EC members that reference (only) the parent_rel, and + * add transformed members referencing the child_rel. + * + * We only need to do this for ECs that could generate join conditions, + * since the child members are only used for creating inner-indexscan paths. + * + * parent_rel and child_rel could be derived from appinfo, but since the + * caller has already computed them, we might as well just pass them in. + */ +void +add_child_rel_equivalences(PlannerInfo *root, + AppendRelInfo *appinfo, + RelOptInfo *parent_rel, + RelOptInfo *child_rel) +{ + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1) + continue; + + /* No point in searching if parent rel not mentioned in eclass */ + if (!bms_is_subset(parent_rel->relids, cur_ec->ec_relids)) + continue; + + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + /* Does it reference (only) parent_rel? */ + if (bms_equal(cur_em->em_relids, parent_rel->relids)) + { + /* Yes, generate transformed child version */ + Expr *child_expr; + + child_expr = (Expr *) + adjust_appendrel_attrs((Node *) cur_em->em_expr, + appinfo); + add_eq_member(cur_ec, child_expr, child_rel->relids, + true, cur_em->em_datatype); + } + } + } +} + + +/* + * find_eclass_clauses_for_index_join + * Create joinclauses usable for a nestloop-with-inner-indexscan + * scanning the given inner rel with the specified set of outer rels. + */ +List * +find_eclass_clauses_for_index_join(PlannerInfo *root, RelOptInfo *rel, + Relids outer_relids) +{ + List *result = NIL; + bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL); + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + ListCell *lc2; + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1) + continue; + + /* + * No point in searching if rel not mentioned in eclass (but we + * can't tell that for a child rel). + */ + if (!is_child_rel && + !bms_is_subset(rel->relids, cur_ec->ec_relids)) + continue; + /* ... nor if no overlap with outer_relids */ + if (!bms_overlap(outer_relids, cur_ec->ec_relids)) + continue; + + /* Scan members, looking for indexable columns */ + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + EquivalenceMember *best_outer_em = NULL; + Oid best_eq_op = InvalidOid; + ListCell *lc3; + + if (!bms_equal(cur_em->em_relids, rel->relids) || + !eclass_matches_any_index(cur_ec, cur_em, rel)) + continue; + + /* + * Found one, so try to generate a join clause. This is like + * generate_join_implied_equalities_normal, except simpler + * since our only preference item is to pick a Var on the + * outer side. We only need one join clause per index col. + */ + foreach(lc3, cur_ec->ec_members) + { + EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc3); + Oid eq_op; + + if (!bms_is_subset(outer_em->em_relids, outer_relids)) + continue; + eq_op = select_equality_operator(cur_ec, + cur_em->em_datatype, + outer_em->em_datatype); + if (!OidIsValid(eq_op)) + continue; + best_outer_em = outer_em; + best_eq_op = eq_op; + if (IsA(outer_em->em_expr, Var) || + (IsA(outer_em->em_expr, RelabelType) && + IsA(((RelabelType *) outer_em->em_expr)->arg, Var))) + break; /* no need to look further */ + } + + if (best_outer_em) + { + /* Found a suitable joinclause */ + RestrictInfo *rinfo; + + rinfo = build_implied_join_equality(best_eq_op, + cur_em->em_expr, + best_outer_em->em_expr, + cur_ec->ec_relids); + /* mark restrictinfo as redundant with other joinclauses */ + rinfo->parent_ec = cur_ec; + /* we can set these too */ + rinfo->left_ec = cur_ec; + rinfo->right_ec = cur_ec; + + result = lappend(result, rinfo); + /* + * Note: we keep scanning here because we want to provide + * a clause for every possible indexcol. + */ + } + } + } + + return result; +} + + +/* + * have_relevant_eclass_joinclause + * Detect whether there is an EquivalenceClass that could produce + * a joinclause between the two given relations. + * + * This is essentially a very cut-down version of + * generate_join_implied_equalities(). Note it's OK to occasionally say "yes" + * incorrectly. Hence we don't bother with details like whether the lack of a + * cross-type operator might prevent the clause from actually being generated. + */ +bool +have_relevant_eclass_joinclause(PlannerInfo *root, + RelOptInfo *rel1, RelOptInfo *rel2) +{ + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1); + bool has_rel1; + bool has_rel2; + ListCell *lc2; + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (ec->ec_has_const || list_length(ec->ec_members) <= 1) + continue; + + /* + * Note we don't test ec_broken; if we did, we'd need a separate code + * path to look through ec_sources. Checking the members anyway is OK + * as a possibly-overoptimistic heuristic. + */ + + /* Needn't scan if it couldn't contain members from each rel */ + if (!bms_overlap(rel1->relids, ec->ec_relids) || + !bms_overlap(rel2->relids, ec->ec_relids)) + continue; + + /* Scan the EC to see if it has member(s) in each rel */ + has_rel1 = has_rel2 = false; + foreach(lc2, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + if (cur_em->em_is_child) + continue; /* ignore children here */ + if (bms_is_subset(cur_em->em_relids, rel1->relids)) + { + has_rel1 = true; + if (has_rel2) + break; + } + if (bms_is_subset(cur_em->em_relids, rel2->relids)) + { + has_rel2 = true; + if (has_rel1) + break; + } + } + + if (has_rel1 && has_rel2) + return true; + } + + return false; +} + + +/* + * has_relevant_eclass_joinclause + * Detect whether there is an EquivalenceClass that could produce + * a joinclause between the given relation and anything else. + * + * This is the same as have_relevant_eclass_joinclause with the other rel + * implicitly defined as "everything else in the query". + */ +bool +has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1) +{ + ListCell *lc1; + + foreach(lc1, root->eq_classes) + { + EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1); + bool has_rel1; + bool has_rel2; + ListCell *lc2; + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (ec->ec_has_const || list_length(ec->ec_members) <= 1) + continue; + + /* + * Note we don't test ec_broken; if we did, we'd need a separate code + * path to look through ec_sources. Checking the members anyway is OK + * as a possibly-overoptimistic heuristic. + */ + + /* Needn't scan if it couldn't contain members from each rel */ + if (!bms_overlap(rel1->relids, ec->ec_relids) || + bms_is_subset(ec->ec_relids, rel1->relids)) + continue; + + /* Scan the EC to see if it has member(s) in each rel */ + has_rel1 = has_rel2 = false; + foreach(lc2, ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + if (cur_em->em_is_child) + continue; /* ignore children here */ + if (bms_is_subset(cur_em->em_relids, rel1->relids)) + { + has_rel1 = true; + if (has_rel2) + break; + } + if (!bms_overlap(cur_em->em_relids, rel1->relids)) + { + has_rel2 = true; + if (has_rel1) + break; + } + } + + if (has_rel1 && has_rel2) + return true; + } + + return false; +} + + +/* + * eclass_useful_for_merging + * Detect whether the EC could produce any mergejoinable join clauses + * against the specified relation. + * + * This is just a heuristic test and doesn't have to be exact; it's better + * to say "yes" incorrectly than "no". Hence we don't bother with details + * like whether the lack of a cross-type operator might prevent the clause + * from actually being generated. + */ +bool +eclass_useful_for_merging(EquivalenceClass *eclass, + RelOptInfo *rel) +{ + ListCell *lc; + + Assert(!eclass->ec_merged); + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (eclass->ec_has_const || list_length(eclass->ec_members) <= 1) + return false; + + /* + * Note we don't test ec_broken; if we did, we'd need a separate code + * path to look through ec_sources. Checking the members anyway is OK + * as a possibly-overoptimistic heuristic. + */ + + /* If rel already includes all members of eclass, no point in searching */ + if (bms_is_subset(eclass->ec_relids, rel->relids)) + return false; + + /* To join, we need a member not in the given rel */ + foreach(lc, eclass->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + + if (!cur_em->em_is_child && + !bms_overlap(cur_em->em_relids, rel->relids)) + return true; + } + + return false; +} diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 3fd52d4850..0146cacf4e 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.215 2007/01/09 02:14:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.216 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,7 +32,6 @@ #include "optimizer/var.h" #include "utils/builtins.h" #include "utils/lsyscache.h" -#include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/selfuncs.h" @@ -72,21 +71,11 @@ static bool match_rowcompare_to_indexcol(IndexOptInfo *index, Oid opfamily, RowCompareExpr *clause, Relids outer_relids); -static Relids indexable_outerrelids(RelOptInfo *rel); +static Relids indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel); static bool matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids); static List *find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel, Relids outer_relids, bool isouterjoin); -static ScanDirection match_variant_ordering(PlannerInfo *root, - IndexOptInfo *index, - List *restrictclauses); -static List *identify_ignorable_ordering_cols(PlannerInfo *root, - IndexOptInfo *index, - List *restrictclauses); -static bool match_index_to_query_keys(PlannerInfo *root, - IndexOptInfo *index, - ScanDirection indexscandir, - List *ignorables); static bool match_boolean_index_clause(Node *clause, int indexcol, IndexOptInfo *index); static bool match_special_index_operator(Expr *clause, Oid opfamily, @@ -157,7 +146,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) * participate in such join clauses. We'll use this set later to * recognize outer rels that are equivalent for joining purposes. */ - rel->index_outer_relids = indexable_outerrelids(rel); + rel->index_outer_relids = indexable_outerrelids(root, rel); /* * Find all the index paths that are directly usable for this relation @@ -351,8 +340,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, if (index_is_ordered && istoplevel && outer_rel == NULL) { index_pathkeys = build_index_pathkeys(root, index, - ForwardScanDirection, - true); + ForwardScanDirection); useful_pathkeys = truncate_useless_pathkeys(root, rel, index_pathkeys); } @@ -378,23 +366,21 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, } /* - * 4. If the index is ordered, and there is a requested query ordering - * that we failed to match, consider variant ways of achieving the - * ordering. Again, this is only interesting at top level. + * 4. If the index is ordered, a backwards scan might be + * interesting. Again, this is only interesting at top level. */ - if (index_is_ordered && istoplevel && outer_rel == NULL && - root->query_pathkeys != NIL && - pathkeys_useful_for_ordering(root, useful_pathkeys) == 0) + if (index_is_ordered && istoplevel && outer_rel == NULL) { - ScanDirection scandir; - - scandir = match_variant_ordering(root, index, restrictclauses); - if (!ScanDirectionIsNoMovement(scandir)) + index_pathkeys = build_index_pathkeys(root, index, + BackwardScanDirection); + useful_pathkeys = truncate_useless_pathkeys(root, rel, + index_pathkeys); + if (useful_pathkeys != NIL) { ipath = create_index_path(root, index, restrictclauses, - root->query_pathkeys, - scandir, + useful_pathkeys, + BackwardScanDirection, outer_rel); result = lappend(result, ipath); } @@ -1207,19 +1193,6 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel) List *restrictinfo_list = rel->baserestrictinfo; ListCell *ilist; - /* - * Note: if Postgres tried to optimize queries by forming equivalence - * classes over equi-joined attributes (i.e., if it recognized that a - * qualification such as "where a.b=c.d and a.b=5" could make use of an - * index on c.d), then we could use that equivalence class info here with - * joininfo lists to do more complete tests for the usability of a partial - * index. For now, the test only uses restriction clauses (those in - * baserestrictinfo). --Nels, Dec '92 - * - * XXX as of 7.1, equivalence class info *is* available. Consider - * improving this code as foreseen by Nels. - */ - foreach(ilist, rel->indexlist) { IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); @@ -1242,18 +1215,19 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel) * for the specified table. Returns a set of relids. */ static Relids -indexable_outerrelids(RelOptInfo *rel) +indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel) { Relids outer_relids = NULL; - ListCell *l; + bool is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL); + ListCell *lc1; /* * Examine each joinclause in the joininfo list to see if it matches any * key of any index. If so, add the clause's other rels to the result. */ - foreach(l, rel->joininfo) + foreach(lc1, rel->joininfo) { - RestrictInfo *joininfo = (RestrictInfo *) lfirst(l); + RestrictInfo *joininfo = (RestrictInfo *) lfirst(lc1); Relids other_rels; other_rels = bms_difference(joininfo->required_relids, rel->relids); @@ -1263,6 +1237,71 @@ indexable_outerrelids(RelOptInfo *rel) bms_free(other_rels); } + /* + * We also have to look through the query's EquivalenceClasses to see + * if any of them could generate indexable join conditions for this rel. + */ + if (rel->has_eclass_joins) + { + foreach(lc1, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); + Relids other_rels = NULL; + bool found_index = false; + ListCell *lc2; + + /* + * Won't generate joinclauses if const or single-member (the latter + * test covers the volatile case too) + */ + if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1) + continue; + + /* + * Note we don't test ec_broken; if we did, we'd need a separate + * code path to look through ec_sources. Checking the members + * anyway is OK as a possibly-overoptimistic heuristic. + */ + + /* + * No point in searching if rel not mentioned in eclass (but we + * can't tell that for a child rel). + */ + if (!is_child_rel && + !bms_is_subset(rel->relids, cur_ec->ec_relids)) + continue; + + /* + * Scan members, looking for both an index match and join + * candidates + */ + foreach(lc2, cur_ec->ec_members) + { + EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2); + + /* Join candidate? */ + if (!cur_em->em_is_child && + !bms_overlap(cur_em->em_relids, rel->relids)) + { + other_rels = bms_add_members(other_rels, + cur_em->em_relids); + continue; + } + + /* Check for index match (only need one) */ + if (!found_index && + bms_equal(cur_em->em_relids, rel->relids) && + eclass_matches_any_index(cur_ec, cur_em, rel)) + found_index = true; + } + + if (found_index) + outer_relids = bms_join(outer_relids, other_rels); + else + bms_free(other_rels); + } + } + return outer_relids; } @@ -1339,6 +1378,42 @@ matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids) return false; } +/* + * eclass_matches_any_index + * Workhorse for indexable_outerrelids: see if an EquivalenceClass member + * can be matched to any index column of the given rel. + * + * This is also exported for use by find_eclass_clauses_for_index_join. + */ +bool +eclass_matches_any_index(EquivalenceClass *ec, EquivalenceMember *em, + RelOptInfo *rel) +{ + ListCell *l; + + foreach(l, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(l); + int indexcol = 0; + Oid *families = index->opfamily; + + do + { + Oid curFamily = families[0]; + + if (list_member_oid(ec->ec_opfamilies, curFamily) && + match_index_to_operand((Node *) em->em_expr, indexcol, index)) + return true; + + indexcol++; + families++; + } while (!DoneMatchingIndexKeys(families)); + } + + return false; +} + + /* * best_inner_indexscan * Finds the best available inner indexscan for a nestloop join @@ -1393,12 +1468,12 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, return NULL; /* - * Otherwise, we have to do path selection in the memory context of the - * given rel, so that any created path can be safely attached to the rel's - * cache of best inner paths. (This is not currently an issue for normal - * planning, but it is an issue for GEQO planning.) + * Otherwise, we have to do path selection in the main planning context, + * so that any created path can be safely attached to the rel's cache of + * best inner paths. (This is not currently an issue for normal planning, + * but it is an issue for GEQO planning.) */ - oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); + oldcontext = MemoryContextSwitchTo(root->planner_cxt); /* * Intersect the given outer relids with index_outer_relids to find the @@ -1539,7 +1614,12 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel, Relids join_relids; ListCell *l; - /* Look for joinclauses that are usable with given outer_relids */ + /* + * Look for joinclauses that are usable with given outer_relids. Note + * we'll take anything that's applicable to the join whether it has + * anything to do with an index or not; since we're only building a list, + * it's not worth filtering more finely here. + */ join_relids = bms_union(rel->relids, outer_relids); foreach(l, rel->joininfo) @@ -1557,276 +1637,27 @@ find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel, bms_free(join_relids); - /* if no join clause was matched then forget it, per comments above */ + /* + * Also check to see if any EquivalenceClasses can produce a relevant + * joinclause. Since all such clauses are effectively pushed-down, + * this doesn't apply to outer joins. + */ + if (!isouterjoin && rel->has_eclass_joins) + clause_list = list_concat(clause_list, + find_eclass_clauses_for_index_join(root, + rel, + outer_relids)); + + /* If no join clause was matched then forget it, per comments above */ if (clause_list == NIL) return NIL; - /* - * We can also use any plain restriction clauses for the rel. We put - * these at the front of the clause list for the convenience of - * remove_redundant_join_clauses, which can never remove non-join clauses - * and hence won't be able to get rid of a non-join clause if it appears - * after a join clause it is redundant with. - */ + /* We can also use any plain restriction clauses for the rel */ clause_list = list_concat(list_copy(rel->baserestrictinfo), clause_list); - /* - * We may now have clauses that are known redundant. Get rid of 'em. - */ - if (list_length(clause_list) > 1) - { - clause_list = remove_redundant_join_clauses(root, - clause_list, - isouterjoin); - } - return clause_list; } -/**************************************************************************** - * ---- ROUTINES TO HANDLE PATHKEYS ---- - ****************************************************************************/ - -/* - * match_variant_ordering - * Try to match an index's ordering to the query's requested ordering - * - * This is used when the index is ordered but a naive comparison fails to - * match its ordering (pathkeys) to root->query_pathkeys. It may be that - * we need to scan the index backwards. Also, a less naive comparison can - * help for both forward and backward indexscans. Columns of the index - * that have an equality restriction clause can be ignored in the match; - * that is, an index on (x,y) can be considered to match the ordering of - * ... WHERE x = 42 ORDER BY y; - * - * Note: it would be possible to similarly ignore useless ORDER BY items; - * that is, an index on just y could be considered to match the ordering of - * ... WHERE x = 42 ORDER BY x, y; - * But proving that this is safe would require finding a btree opfamily - * containing both the = operator and the < or > operator in the ORDER BY - * item. That's significantly more expensive than what we do here, since - * we'd have to look at restriction clauses unrelated to the current index - * and search for opfamilies without any hint from the index. The practical - * use-cases seem to be mostly covered by ignoring index columns, so that's - * all we do for now. - * - * Inputs: - * 'index' is the index of interest. - * 'restrictclauses' is the list of sublists of restriction clauses - * matching the columns of the index (NIL if none) - * - * If able to match the requested query pathkeys, returns either - * ForwardScanDirection or BackwardScanDirection to indicate the proper index - * scan direction. If no match, returns NoMovementScanDirection. - */ -static ScanDirection -match_variant_ordering(PlannerInfo *root, - IndexOptInfo *index, - List *restrictclauses) -{ - List *ignorables; - - /* - * Forget the whole thing if not a btree index; our check for ignorable - * columns assumes we are dealing with btree opfamilies. (It'd be possible - * to factor out just the try for backwards indexscan, but considering - * that we presently have no orderable indexes except btrees anyway, it's - * hardly worth contorting this code for that case.) - * - * Note: if you remove this, you probably need to put in a check on - * amoptionalkey to prevent possible clauseless scan on an index that - * won't cope. - */ - if (index->relam != BTREE_AM_OID) - return NoMovementScanDirection; - - /* - * Figure out which index columns can be optionally ignored because they - * have an equality constraint. This is the same set for either forward - * or backward scan, so we do it just once. - */ - ignorables = identify_ignorable_ordering_cols(root, index, - restrictclauses); - - /* - * Try to match to forward scan, then backward scan. However, we can skip - * the forward-scan case if there are no ignorable columns, because - * find_usable_indexes() would have found the match already. - */ - if (ignorables && - match_index_to_query_keys(root, index, ForwardScanDirection, - ignorables)) - return ForwardScanDirection; - - if (match_index_to_query_keys(root, index, BackwardScanDirection, - ignorables)) - return BackwardScanDirection; - - return NoMovementScanDirection; -} - -/* - * identify_ignorable_ordering_cols - * Determine which index columns can be ignored for ordering purposes - * - * Returns an integer List of column numbers (1-based) of ignorable - * columns. The ignorable columns are those that have equality constraints - * against pseudoconstants. - */ -static List * -identify_ignorable_ordering_cols(PlannerInfo *root, - IndexOptInfo *index, - List *restrictclauses) -{ - List *result = NIL; - int indexcol = 0; /* note this is 0-based */ - ListCell *l; - - /* restrictclauses is either NIL or has a sublist per column */ - foreach(l, restrictclauses) - { - List *sublist = (List *) lfirst(l); - Oid opfamily = index->opfamily[indexcol]; - ListCell *l2; - - foreach(l2, sublist) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l2); - OpExpr *clause = (OpExpr *) rinfo->clause; - Oid clause_op; - int op_strategy; - bool varonleft; - bool ispc; - - /* First check for boolean-index cases. */ - if (IsBooleanOpfamily(opfamily)) - { - if (match_boolean_index_clause((Node *) clause, indexcol, - index)) - { - /* - * The clause means either col = TRUE or col = FALSE; we - * do not care which, it's an equality constraint either - * way. - */ - result = lappend_int(result, indexcol + 1); - break; - } - } - - /* Otherwise, ignore if not a binary opclause */ - if (!is_opclause(clause) || list_length(clause->args) != 2) - continue; - - /* Determine left/right sides and check the operator */ - clause_op = clause->opno; - if (match_index_to_operand(linitial(clause->args), indexcol, - index)) - { - /* clause_op is correct */ - varonleft = true; - } - else - { - Assert(match_index_to_operand(lsecond(clause->args), indexcol, - index)); - /* Must flip operator to get the opfamily member */ - clause_op = get_commutator(clause_op); - varonleft = false; - } - if (!OidIsValid(clause_op)) - continue; /* ignore non match, per next comment */ - op_strategy = get_op_opfamily_strategy(clause_op, opfamily); - - /* - * You might expect to see Assert(op_strategy != 0) here, but you - * won't: the clause might contain a special indexable operator - * rather than an ordinary opfamily member. Currently none of the - * special operators are very likely to expand to an equality - * operator; we do not bother to check, but just assume no match. - */ - if (op_strategy != BTEqualStrategyNumber) - continue; - - /* Now check that other side is pseudoconstant */ - if (varonleft) - ispc = is_pseudo_constant_clause_relids(lsecond(clause->args), - rinfo->right_relids); - else - ispc = is_pseudo_constant_clause_relids(linitial(clause->args), - rinfo->left_relids); - if (ispc) - { - result = lappend_int(result, indexcol + 1); - break; - } - } - indexcol++; - } - return result; -} - -/* - * match_index_to_query_keys - * Check a single scan direction for "intelligent" match to query keys - * - * 'index' is the index of interest. - * 'indexscandir' is the scan direction to consider - * 'ignorables' is an integer list of indexes of ignorable index columns - * - * Returns TRUE on successful match (ie, the query_pathkeys can be considered - * to match this index). - */ -static bool -match_index_to_query_keys(PlannerInfo *root, - IndexOptInfo *index, - ScanDirection indexscandir, - List *ignorables) -{ - List *index_pathkeys; - ListCell *index_cell; - int index_col; - ListCell *r; - - /* Get the pathkeys that exactly describe the index */ - index_pathkeys = build_index_pathkeys(root, index, indexscandir, false); - - /* - * Can we match to the query's requested pathkeys? The inner loop skips - * over ignorable index columns while trying to match. - */ - index_cell = list_head(index_pathkeys); - index_col = 0; - - foreach(r, root->query_pathkeys) - { - List *rsubkey = (List *) lfirst(r); - - for (;;) - { - List *isubkey; - - if (index_cell == NULL) - return false; - isubkey = (List *) lfirst(index_cell); - index_cell = lnext(index_cell); - index_col++; /* index_col is now 1-based */ - - /* - * Since we are dealing with canonicalized pathkeys, pointer - * comparison is sufficient to determine a match. - */ - if (rsubkey == isubkey) - break; /* matched current query pathkey */ - - if (!list_member_int(ignorables, index_col)) - return false; /* definite failure to match */ - /* otherwise loop around and try to match to next index col */ - } - } - - return true; -} /**************************************************************************** * ---- PATH CREATION UTILITIES ---- diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 2885b02154..3a68d79ca9 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.110 2007/01/10 18:06:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.111 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,7 +16,6 @@ #include -#include "access/skey.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -40,10 +39,6 @@ static List *select_mergejoin_clauses(RelOptInfo *joinrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype); -static void build_mergejoin_strat_arrays(List *mergeclauses, - Oid **mergefamilies, - int **mergestrategies, - bool **mergenullsfirst); /* @@ -205,9 +200,9 @@ sort_inner_and_outer(PlannerInfo *root, * * Actually, it's not quite true that every mergeclause ordering will * generate a different path order, because some of the clauses may be - * redundant. Therefore, what we do is convert the mergeclause list to a - * list of canonical pathkeys, and then consider different orderings of - * the pathkeys. + * partially redundant (refer to the same EquivalenceClasses). Therefore, + * what we do is convert the mergeclause list to a list of canonical + * pathkeys, and then consider different orderings of the pathkeys. * * Generating a path for *every* permutation of the pathkeys doesn't seem * like a winning strategy; the cost in planning time is too high. For @@ -216,75 +211,58 @@ sort_inner_and_outer(PlannerInfo *root, * mergejoin without re-sorting against any other possible mergejoin * partner path. But if we've not guessed the right ordering of secondary * keys, we may end up evaluating clauses as qpquals when they could have - * been done as mergeclauses. We need to figure out a better way. (Two - * possible approaches: look at all the relevant index relations to - * suggest plausible sort orders, or make just one output path and somehow - * mark it as having a sort-order that can be rearranged freely.) + * been done as mergeclauses. (In practice, it's rare that there's more + * than two or three mergeclauses, so expending a huge amount of thought + * on that is probably not worth it.) + * + * The pathkey order returned by select_outer_pathkeys_for_merge() has + * some heuristics behind it (see that function), so be sure to try it + * exactly as-is as well as making variants. */ - all_pathkeys = make_pathkeys_for_mergeclauses(root, - mergeclause_list, - outerrel); + all_pathkeys = select_outer_pathkeys_for_merge(root, + mergeclause_list, + joinrel); foreach(l, all_pathkeys) { List *front_pathkey = (List *) lfirst(l); - List *cur_pathkeys; List *cur_mergeclauses; - Oid *mergefamilies; - int *mergestrategies; - bool *mergenullsfirst; List *outerkeys; List *innerkeys; List *merge_pathkeys; - /* Make a pathkey list with this guy first. */ + /* Make a pathkey list with this guy first */ if (l != list_head(all_pathkeys)) - cur_pathkeys = lcons(front_pathkey, - list_delete_ptr(list_copy(all_pathkeys), - front_pathkey)); + outerkeys = lcons(front_pathkey, + list_delete_ptr(list_copy(all_pathkeys), + front_pathkey)); else - cur_pathkeys = all_pathkeys; /* no work at first one... */ + outerkeys = all_pathkeys; /* no work at first one... */ - /* - * Select mergeclause(s) that match this sort ordering. If we had - * redundant merge clauses then we will get a subset of the original - * clause list. There had better be some match, however... - */ + /* Sort the mergeclauses into the corresponding ordering */ cur_mergeclauses = find_mergeclauses_for_pathkeys(root, - cur_pathkeys, + outerkeys, + true, mergeclause_list); - Assert(cur_mergeclauses != NIL); - /* Forget it if can't use all the clauses in right/full join */ - if (useallclauses && - list_length(cur_mergeclauses) != list_length(mergeclause_list)) - continue; + /* Should have used them all... */ + Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list)); - /* - * Build sort pathkeys for both sides. - * - * Note: it's possible that the cheapest paths will already be sorted - * properly. create_mergejoin_path will detect that case and suppress - * an explicit sort step, so we needn't do so here. - */ - outerkeys = make_pathkeys_for_mergeclauses(root, - cur_mergeclauses, - outerrel); - innerkeys = make_pathkeys_for_mergeclauses(root, - cur_mergeclauses, - innerrel); - /* Build pathkeys representing output sort order. */ + /* Build sort pathkeys for the inner side */ + innerkeys = make_inner_pathkeys_for_merge(root, + cur_mergeclauses, + outerkeys); + + /* Build pathkeys representing output sort order */ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerkeys); - /* Build opfamily info for execution */ - build_mergejoin_strat_arrays(cur_mergeclauses, - &mergefamilies, - &mergestrategies, - &mergenullsfirst); - /* * And now we can make the path. + * + * Note: it's possible that the cheapest paths will already be sorted + * properly. create_mergejoin_path will detect that case and suppress + * an explicit sort step, so we needn't do so here. */ add_path(joinrel, (Path *) create_mergejoin_path(root, @@ -295,9 +273,6 @@ sort_inner_and_outer(PlannerInfo *root, restrictlist, merge_pathkeys, cur_mergeclauses, - mergefamilies, - mergestrategies, - mergenullsfirst, outerkeys, innerkeys)); } @@ -427,9 +402,6 @@ match_unsorted_outer(PlannerInfo *root, Path *outerpath = (Path *) lfirst(l); List *merge_pathkeys; List *mergeclauses; - Oid *mergefamilies; - int *mergestrategies; - bool *mergenullsfirst; List *innersortkeys; List *trialsortkeys; Path *cheapest_startup_inner; @@ -510,6 +482,7 @@ match_unsorted_outer(PlannerInfo *root, /* Look for useful mergeclauses (if any) */ mergeclauses = find_mergeclauses_for_pathkeys(root, outerpath->pathkeys, + true, mergeclause_list); /* @@ -532,15 +505,9 @@ match_unsorted_outer(PlannerInfo *root, continue; /* Compute the required ordering of the inner path */ - innersortkeys = make_pathkeys_for_mergeclauses(root, - mergeclauses, - innerrel); - - /* Build opfamily info for execution */ - build_mergejoin_strat_arrays(mergeclauses, - &mergefamilies, - &mergestrategies, - &mergenullsfirst); + innersortkeys = make_inner_pathkeys_for_merge(root, + mergeclauses, + outerpath->pathkeys); /* * Generate a mergejoin on the basis of sorting the cheapest inner. @@ -557,9 +524,6 @@ match_unsorted_outer(PlannerInfo *root, restrictlist, merge_pathkeys, mergeclauses, - mergefamilies, - mergestrategies, - mergenullsfirst, NIL, innersortkeys)); @@ -613,18 +577,12 @@ match_unsorted_outer(PlannerInfo *root, newclauses = find_mergeclauses_for_pathkeys(root, trialsortkeys, + false, mergeclauses); Assert(newclauses != NIL); } else newclauses = mergeclauses; - - /* Build opfamily info for execution */ - build_mergejoin_strat_arrays(newclauses, - &mergefamilies, - &mergestrategies, - &mergenullsfirst); - add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, @@ -634,9 +592,6 @@ match_unsorted_outer(PlannerInfo *root, restrictlist, merge_pathkeys, newclauses, - mergefamilies, - mergestrategies, - mergenullsfirst, NIL, NIL)); cheapest_total_inner = innerpath; @@ -666,19 +621,13 @@ match_unsorted_outer(PlannerInfo *root, newclauses = find_mergeclauses_for_pathkeys(root, trialsortkeys, + false, mergeclauses); Assert(newclauses != NIL); } else newclauses = mergeclauses; } - - /* Build opfamily info for execution */ - build_mergejoin_strat_arrays(newclauses, - &mergefamilies, - &mergestrategies, - &mergenullsfirst); - add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, @@ -688,9 +637,6 @@ match_unsorted_outer(PlannerInfo *root, restrictlist, merge_pathkeys, newclauses, - mergefamilies, - mergestrategies, - mergenullsfirst, NIL, NIL)); } @@ -909,6 +855,10 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel, * Select mergejoin clauses that are usable for a particular join. * Returns a list of RestrictInfo nodes for those clauses. * + * We also mark each selected RestrictInfo to show which side is currently + * being considered as outer. These are transient markings that are only + * good for the duration of the current add_paths_to_joinrel() call! + * * We examine each restrictinfo clause known for the join to see * if it is mergejoinable and involves vars from the two sub-relations * currently of interest. @@ -939,7 +889,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel, continue; if (!restrictinfo->can_join || - restrictinfo->mergejoinoperator == InvalidOid) + restrictinfo->mergeopfamilies == NIL) { have_nonmergeable_joinclause = true; continue; /* not mergejoinable */ @@ -954,11 +904,13 @@ select_mergejoin_clauses(RelOptInfo *joinrel, bms_is_subset(restrictinfo->right_relids, innerrel->relids)) { /* righthand side is inner */ + restrictinfo->outer_is_left = true; } else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) && bms_is_subset(restrictinfo->right_relids, outerrel->relids)) { /* lefthand side is inner */ + restrictinfo->outer_is_left = false; } else { @@ -966,7 +918,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel, continue; /* no good for these input relations */ } - result_list = lcons(restrictinfo, result_list); + result_list = lappend(result_list, restrictinfo); } /* @@ -995,46 +947,3 @@ select_mergejoin_clauses(RelOptInfo *joinrel, return result_list; } - -/* - * Temporary hack to build opfamily and strategy info needed for mergejoin - * by the executor. We need to rethink the planner's handling of merge - * planning so that it can deal with multiple possible merge orders, but - * that's not done yet. - */ -static void -build_mergejoin_strat_arrays(List *mergeclauses, - Oid **mergefamilies, - int **mergestrategies, - bool **mergenullsfirst) -{ - int nClauses = list_length(mergeclauses); - int i; - ListCell *l; - - *mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid)); - *mergestrategies = (int *) palloc(nClauses * sizeof(int)); - *mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool)); - - i = 0; - foreach(l, mergeclauses) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); - - /* - * We do not need to worry about whether the mergeclause will be - * commuted at runtime --- it's the same opfamily either way. - */ - (*mergefamilies)[i] = restrictinfo->mergeopfamily; - /* - * For the moment, strategy must always be LessThan --- see - * hack version of get_op_mergejoin_info - */ - (*mergestrategies)[i] = BTLessStrategyNumber; - - /* And we only allow NULLS LAST, too */ - (*mergenullsfirst)[i] = false; - - i++; - } -} diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index f1fe34e539..70e16eeb8e 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.83 2007/01/05 22:19:31 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.84 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -72,7 +72,7 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) other_rels = list_head(joinrels[1]); /* consider all initial * rels */ - if (old_rel->joininfo != NIL) + if (old_rel->joininfo != NIL || old_rel->has_eclass_joins) { /* * Note that if all available join clauses for this rel require @@ -152,7 +152,8 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) * outer joins --- then we might have to force a bushy outer * join. See have_relevant_joinclause(). */ - if (old_rel->joininfo == NIL && root->oj_info_list == NIL) + if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins && + root->oj_info_list == NIL) continue; if (k == other_level) @@ -251,8 +252,7 @@ make_rels_by_joins(PlannerInfo *root, int level, List **joinrels) /* * make_rels_by_clause_joins * Build joins between the given relation 'old_rel' and other relations - * that are mentioned within old_rel's joininfo list (i.e., relations - * that participate in join clauses that 'old_rel' also participates in). + * that participate in join clauses that 'old_rel' also participates in. * The join rel nodes are returned in a list. * * 'old_rel' is the relation entry for the relation to be joined diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 4fc5a5f125..15793b4ef9 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -11,687 +11,180 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.81 2007/01/09 02:14:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.82 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "access/skey.h" #include "nodes/makefuncs.h" +#include "nodes/plannodes.h" #include "optimizer/clauses.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" -#include "optimizer/planmain.h" #include "optimizer/tlist.h" -#include "optimizer/var.h" #include "parser/parsetree.h" #include "parser/parse_expr.h" #include "utils/lsyscache.h" -#include "utils/memutils.h" - - -static PathKeyItem *makePathKeyItem(Node *key, Oid sortop, bool nulls_first, - bool checkType); -static void generate_outer_join_implications(PlannerInfo *root, - List *equi_key_set, - Relids *relids); -static void sub_generate_join_implications(PlannerInfo *root, - List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, - Relids item1_relids); -static void process_implied_const_eq(PlannerInfo *root, - List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, - Relids item1_relids, - bool delete_it); -static List *make_canonical_pathkey(PlannerInfo *root, PathKeyItem *item); -static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, - AttrNumber varattno); /* - * makePathKeyItem - * create a PathKeyItem node + * If an EC contains a const and isn't below-outer-join, any PathKey depending + * on it must be redundant, since there's only one possible value of the key. */ -static PathKeyItem * -makePathKeyItem(Node *key, Oid sortop, bool nulls_first, bool checkType) -{ - PathKeyItem *item = makeNode(PathKeyItem); - - /* - * Some callers pass expressions that are not necessarily of the same type - * as the sort operator expects as input (for example when dealing with an - * index that uses binary-compatible operators). We must relabel these - * with the correct type so that the key expressions will be seen as - * equal() to expressions that have been correctly labeled. - */ - if (checkType) - { - Oid lefttype, - righttype; +#define MUST_BE_REDUNDANT(eclass) \ + ((eclass)->ec_has_const && !(eclass)->ec_below_outer_join) + +static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first); +static PathKey *make_canonical_pathkey(PlannerInfo *root, + EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first); +static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); +static PathKey *make_pathkey_from_sortinfo(PlannerInfo *root, + Expr *expr, Oid ordering_op, + bool nulls_first, + bool canonicalize); +static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, + AttrNumber varattno); - op_input_types(sortop, &lefttype, &righttype); - if (exprType(key) != lefttype) - key = (Node *) makeRelabelType((Expr *) key, - lefttype, -1, - COERCE_DONTCARE); - } - item->key = key; - item->sortop = sortop; - item->nulls_first = nulls_first; - return item; -} +/**************************************************************************** + * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING + ****************************************************************************/ /* - * add_equijoined_keys - * The given clause has a mergejoinable operator, so its two sides - * can be considered equal after restriction clause application; in - * particular, any pathkey mentioning one side (with the correct sortop) - * can be expanded to include the other as well. Record the exprs and - * associated sortops in the query's equi_key_list for future use. + * makePathKey + * create a PathKey node * - * The query's equi_key_list field points to a list of sublists of PathKeyItem - * nodes, where each sublist is a set of two or more exprs+sortops that have - * been identified as logically equivalent (and, therefore, we may consider - * any two in a set to be equal). As described above, we will subsequently - * use direct pointers to one of these sublists to represent any pathkey - * that involves an equijoined variable. + * This does not promise to create a canonical PathKey, it's merely a + * convenience routine to build the specified node. */ -void -add_equijoined_keys(PlannerInfo *root, RestrictInfo *restrictinfo) +static PathKey * +makePathKey(EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first) { - Expr *clause = restrictinfo->clause; - PathKeyItem *item1 = makePathKeyItem(get_leftop(clause), - restrictinfo->left_sortop, - false, /* XXX nulls_first? */ - false); - PathKeyItem *item2 = makePathKeyItem(get_rightop(clause), - restrictinfo->right_sortop, - false, /* XXX nulls_first? */ - false); - List *newset; - ListCell *cursetlink; - - /* We might see a clause X=X; don't make a single-element list from it */ - if (equal(item1, item2)) - return; - - /* - * Our plan is to make a two-element set, then sweep through the existing - * equijoin sets looking for matches to item1 or item2. When we find one, - * we remove that set from equi_key_list and union it into our new set. - * When done, we add the new set to the front of equi_key_list. - * - * It may well be that the two items we're given are already known to be - * equijoin-equivalent, in which case we don't need to change our data - * structure. If we find both of them in the same equivalence set to - * start with, we can quit immediately. - * - * This is a standard UNION-FIND problem, for which there exist better - * data structures than simple lists. If this code ever proves to be a - * bottleneck then it could be sped up --- but for now, simple is - * beautiful. - */ - newset = NIL; - - /* cannot use foreach here because of possible lremove */ - cursetlink = list_head(root->equi_key_list); - while (cursetlink) - { - List *curset = (List *) lfirst(cursetlink); - bool item1here = list_member(curset, item1); - bool item2here = list_member(curset, item2); - - /* must advance cursetlink before lremove possibly pfree's it */ - cursetlink = lnext(cursetlink); - - if (item1here || item2here) - { - /* - * If find both in same equivalence set, no need to do any more - */ - if (item1here && item2here) - { - /* Better not have seen only one in an earlier set... */ - Assert(newset == NIL); - return; - } - - /* Build the new set only when we know we must */ - if (newset == NIL) - newset = list_make2(item1, item2); - - /* Found a set to merge into our new set */ - newset = list_concat_unique(newset, curset); - - /* - * Remove old set from equi_key_list. - */ - root->equi_key_list = list_delete_ptr(root->equi_key_list, curset); - list_free(curset); /* might as well recycle old cons cells */ - } - } + PathKey *pk = makeNode(PathKey); - /* Build the new set only when we know we must */ - if (newset == NIL) - newset = list_make2(item1, item2); + pk->pk_eclass = eclass; + pk->pk_opfamily = opfamily; + pk->pk_strategy = strategy; + pk->pk_nulls_first = nulls_first; - root->equi_key_list = lcons(newset, root->equi_key_list); + return pk; } /* - * generate_implied_equalities - * Scan the completed equi_key_list for the query, and generate explicit - * qualifications (WHERE clauses) for all the pairwise equalities not - * already mentioned in the quals; or remove qualifications found to be - * redundant. - * - * Adding deduced equalities is useful because the additional clauses help - * the selectivity-estimation code and may allow better joins to be chosen; - * and in fact it's *necessary* to ensure that sort keys we think are - * equivalent really are (see src/backend/optimizer/README for more info). - * - * If an equi_key_list set includes any constants then we adopt a different - * strategy: we record all the "var = const" deductions we can make, and - * actively remove all the "var = var" clauses that are implied by the set - * (including the clauses that originally gave rise to the set!). The reason - * is that given input like "a = b AND b = 42", once we have deduced "a = 42" - * there is no longer any need to apply the clause "a = b"; not only is - * it a waste of time to check it, but we will misestimate selectivity if the - * clause is left in. So we must remove it. For this purpose, any pathkey - * item that mentions no Vars of the current level can be taken as a constant. - * (The only case where this would be risky is if the item contains volatile - * functions; but we will never consider such an expression to be a pathkey - * at all, because check_mergejoinable() will reject it.) - * - * Also, when we have constants in an equi_key_list we can try to propagate - * the constants into outer joins; see generate_outer_join_implications - * for discussion. + * make_canonical_pathkey + * Given the parameters for a PathKey, find any pre-existing matching + * pathkey in the query's list of "canonical" pathkeys. Make a new + * entry if there's not one already. * - * This routine just walks the equi_key_list to find all pairwise equalities. - * We call process_implied_equality (in plan/initsplan.c) to adjust the - * restrictinfo datastructures for each pair. + * Note that this function must not be used until after we have completed + * merging EquivalenceClasses. */ -void -generate_implied_equalities(PlannerInfo *root) +static PathKey * +make_canonical_pathkey(PlannerInfo *root, + EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first) { - ListCell *cursetlink; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); - int nitems = list_length(curset); - Relids *relids; - bool have_consts; - ListCell *ptr1; - int i1; + PathKey *pk; + ListCell *lc; + MemoryContext oldcontext; - /* - * A set containing only two items cannot imply any equalities beyond - * the one that created the set, so we can skip it --- unless outer - * joins appear in the query. - */ - if (nitems < 3 && !root->hasOuterJoins) - continue; + /* The passed eclass might be non-canonical, so chase up to the top */ + while (eclass->ec_merged) + eclass = eclass->ec_merged; - /* - * Collect info about relids mentioned in each item. For this routine - * we only really care whether there are any at all in each item, but - * process_implied_equality() needs the exact sets, so we may as well - * pull them here. - */ - relids = (Relids *) palloc(nitems * sizeof(Relids)); - have_consts = false; - i1 = 0; - foreach(ptr1, curset) - { - PathKeyItem *item1 = (PathKeyItem *) lfirst(ptr1); + foreach(lc, root->canon_pathkeys) + { + pk = (PathKey *) lfirst(lc); + if (eclass == pk->pk_eclass && + opfamily == pk->pk_opfamily && + strategy == pk->pk_strategy && + nulls_first == pk->pk_nulls_first) + return pk; + } - relids[i1] = pull_varnos(item1->key); - if (bms_is_empty(relids[i1])) - have_consts = true; - i1++; - } + /* + * Be sure canonical pathkeys are allocated in the main planning context. + * Not an issue in normal planning, but it is for GEQO. + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); - /* - * Match each item in the set with all that appear after it (it's - * sufficient to generate A=B, need not process B=A too). - * - * A set containing only two items cannot imply any equalities beyond - * the one that created the set, so we can skip this processing in - * that case. - */ - if (nitems >= 3) - { - i1 = 0; - foreach(ptr1, curset) - { - PathKeyItem *item1 = (PathKeyItem *) lfirst(ptr1); - bool i1_is_variable = !bms_is_empty(relids[i1]); - ListCell *ptr2; - int i2 = i1 + 1; + pk = makePathKey(eclass, opfamily, strategy, nulls_first); + root->canon_pathkeys = lappend(root->canon_pathkeys, pk); - for_each_cell(ptr2, lnext(ptr1)) - { - PathKeyItem *item2 = (PathKeyItem *) lfirst(ptr2); - bool i2_is_variable = !bms_is_empty(relids[i2]); - - /* - * If it's "const = const" then just ignore it altogether. - * There is no place in the restrictinfo structure to - * store it. (If the two consts are in fact unequal, then - * propagating the comparison to Vars will cause us to - * produce zero rows out, as expected.) - */ - if (i1_is_variable || i2_is_variable) - { - /* - * Tell process_implied_equality to delete the clause, - * not add it, if it's "var = var" and we have - * constants present in the list. - */ - bool delete_it = (have_consts && - i1_is_variable && - i2_is_variable); - - process_implied_equality(root, - item1->key, item2->key, - item1->sortop, item2->sortop, - relids[i1], relids[i2], - delete_it); - } - i2++; - } - i1++; - } - } + MemoryContextSwitchTo(oldcontext); - /* - * If we have constant(s) and outer joins, try to propagate the - * constants through outer-join quals. - */ - if (have_consts && root->hasOuterJoins) - generate_outer_join_implications(root, curset, relids); - } + return pk; } /* - * generate_outer_join_implications - * Generate clauses that can be deduced in outer-join situations. + * pathkey_is_redundant + * Is a pathkey redundant with one already in the given list? * - * When we have mergejoinable clauses A = B that are outer-join clauses, - * we can't blindly combine them with other clauses A = C to deduce B = C, - * since in fact the "equality" A = B won't necessarily hold above the - * outer join (one of the variables might be NULL instead). Nonetheless - * there are cases where we can add qual clauses using transitivity. + * Both the given pathkey and the list members must be canonical for this + * to work properly. We detect two cases: * - * One case that we look for here is an outer-join clause OUTERVAR = INNERVAR - * combined with a pushed-down (valid everywhere) clause OUTERVAR = CONSTANT. - * It is safe and useful to push a clause INNERVAR = CONSTANT into the - * evaluation of the inner (nullable) relation, because any inner rows not - * meeting this condition will not contribute to the outer-join result anyway. - * (Any outer rows they could join to will be eliminated by the pushed-down - * clause.) + * 1. If the new pathkey's equivalence class contains a constant, and isn't + * below an outer join, then we can disregard it as a sort key. An example: + * SELECT ... WHERE x = 42 ORDER BY x, y; + * We may as well just sort by y. Note that because of opfamily matching, + * this is semantically correct: we know that the equality constraint is one + * that actually binds the variable to a single value in the terms of any + * ordering operator that might go with the eclass. This rule not only lets + * us simplify (or even skip) explicit sorts, but also allows matching index + * sort orders to a query when there are don't-care index columns. * - * Note that the above rule does not work for full outer joins, nor for - * pushed-down restrictions on an inner-side variable; nor is it very - * interesting to consider cases where the pushed-down clause involves - * relations entirely outside the outer join, since such clauses couldn't - * be pushed into the inner side's scan anyway. So the restriction to - * outervar = pseudoconstant is not really giving up anything. + * 2. If the new pathkey's equivalence class is the same as that of any + * existing member of the pathkey list, then it is redundant. Some examples: + * SELECT ... ORDER BY x, x; + * SELECT ... ORDER BY x, x DESC; + * SELECT ... WHERE x = y ORDER BY x, y; + * In all these cases the second sort key cannot distinguish values that are + * considered equal by the first, and so there's no point in using it. + * Note in particular that we need not compare opfamily (all the opfamilies + * of the EC have the same notion of equality) nor sort direction. * - * For full-join cases, we can only do something useful if it's a FULL JOIN - * USING and a merged column has a restriction MERGEDVAR = CONSTANT. By - * the time it gets here, the restriction will look like - * COALESCE(LEFTVAR, RIGHTVAR) = CONSTANT - * and we will have a join clause LEFTVAR = RIGHTVAR that we can match the - * COALESCE expression to. In this situation we can push LEFTVAR = CONSTANT - * and RIGHTVAR = CONSTANT into the input relations, since any rows not - * meeting these conditions cannot contribute to the join result. - * - * Again, there isn't any traction to be gained by trying to deal with - * clauses comparing a mergedvar to a non-pseudoconstant. So we can make - * use of the equi_key_lists to quickly find the interesting pushed-down - * clauses. The interesting outer-join clauses were accumulated for us by - * distribute_qual_to_rels. - * - * equi_key_set: a list of PathKeyItems that are known globally equivalent, - * at least one of which is a pseudoconstant. - * relids: an array of Relids sets showing the relation membership of each - * PathKeyItem in equi_key_set. + * Because the equivclass.c machinery forms only one copy of any EC per query, + * pointer comparison is enough to decide whether canonical ECs are the same. */ -static void -generate_outer_join_implications(PlannerInfo *root, - List *equi_key_set, - Relids *relids) +static bool +pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys) { - ListCell *l; - int i = 0; + EquivalenceClass *new_ec = new_pathkey->pk_eclass; + ListCell *lc; - /* Process each non-constant element of equi_key_set */ - foreach(l, equi_key_set) - { - PathKeyItem *item1 = (PathKeyItem *) lfirst(l); + /* Assert we've been given canonical pathkeys */ + Assert(!new_ec->ec_merged); - if (!bms_is_empty(relids[i])) - { - sub_generate_join_implications(root, equi_key_set, relids, - item1->key, - item1->sortop, - relids[i]); - } - i++; - } -} - -/* - * sub_generate_join_implications - * Propagate a constant equality through outer join clauses. - * - * The item described by item1/sortop1/item1_relids has been determined - * to be equal to the constant(s) listed in equi_key_set. Recursively - * trace out the implications of this. - * - * equi_key_set and relids are as for generate_outer_join_implications. - */ -static void -sub_generate_join_implications(PlannerInfo *root, - List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, Relids item1_relids) - -{ - ListCell *l; + /* Check for EC containing a constant --- unconditionally redundant */ + if (MUST_BE_REDUNDANT(new_ec)) + return true; - /* - * Examine each mergejoinable outer-join clause with OUTERVAR on left, - * looking for an OUTERVAR identical to item1 - */ - foreach(l, root->left_join_clauses) + /* If same EC already used in list, then redundant */ + foreach(lc, pathkeys) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Node *leftop = get_leftop(rinfo->clause); - - if (equal(leftop, item1) && rinfo->left_sortop == sortop1) - { - /* - * Match, so find constant member(s) of set and generate implied - * INNERVAR = CONSTANT - */ - Node *rightop = get_rightop(rinfo->clause); + PathKey *old_pathkey = (PathKey *) lfirst(lc); - process_implied_const_eq(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids, - false); + /* Assert we've been given canonical pathkeys */ + Assert(!old_pathkey->pk_eclass->ec_merged); - /* - * We can remove explicit tests of this outer-join qual, too, - * since we now have tests forcing each of its sides to the same - * value. - */ - process_implied_equality(root, - leftop, rightop, - rinfo->left_sortop, rinfo->right_sortop, - rinfo->left_relids, rinfo->right_relids, - true); - - /* - * And recurse to see if we can deduce anything from INNERVAR = - * CONSTANT - */ - sub_generate_join_implications(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids); - } - } - - /* The same, looking at clauses with OUTERVAR on right */ - foreach(l, root->right_join_clauses) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Node *rightop = get_rightop(rinfo->clause); - - if (equal(rightop, item1) && rinfo->right_sortop == sortop1) - { - /* - * Match, so find constant member(s) of set and generate implied - * INNERVAR = CONSTANT - */ - Node *leftop = get_leftop(rinfo->clause); - - process_implied_const_eq(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids, - false); - - /* - * We can remove explicit tests of this outer-join qual, too, - * since we now have tests forcing each of its sides to the same - * value. - */ - process_implied_equality(root, - leftop, rightop, - rinfo->left_sortop, rinfo->right_sortop, - rinfo->left_relids, rinfo->right_relids, - true); - - /* - * And recurse to see if we can deduce anything from INNERVAR = - * CONSTANT - */ - sub_generate_join_implications(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids); - } - } - - /* - * Only COALESCE(x,y) items can possibly match full joins - */ - if (IsA(item1, CoalesceExpr)) - { - CoalesceExpr *cexpr = (CoalesceExpr *) item1; - Node *cfirst; - Node *csecond; - - if (list_length(cexpr->args) != 2) - return; - cfirst = (Node *) linitial(cexpr->args); - csecond = (Node *) lsecond(cexpr->args); - - /* - * Examine each mergejoinable full-join clause, looking for a clause - * of the form "x = y" matching the COALESCE(x,y) expression - */ - foreach(l, root->full_join_clauses) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Node *leftop = get_leftop(rinfo->clause); - Node *rightop = get_rightop(rinfo->clause); - - /* - * We can assume the COALESCE() inputs are in the same order as - * the join clause, since both were automatically generated in the - * cases we care about. - * - * XXX currently this may fail to match in cross-type cases - * because the COALESCE will contain typecast operations while the - * join clause may not (if there is a cross-type mergejoin - * operator available for the two column types). Is it OK to strip - * implicit coercions from the COALESCE arguments? What of the - * sortops in such cases? - */ - if (equal(leftop, cfirst) && - equal(rightop, csecond) && - rinfo->left_sortop == sortop1 && - rinfo->right_sortop == sortop1) - { - /* - * Match, so find constant member(s) of set and generate - * implied LEFTVAR = CONSTANT - */ - process_implied_const_eq(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids, - false); - /* ... and RIGHTVAR = CONSTANT */ - process_implied_const_eq(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids, - false); - /* ... and remove COALESCE() = CONSTANT */ - process_implied_const_eq(root, equi_key_set, relids, - item1, - sortop1, - item1_relids, - true); - - /* - * We can remove explicit tests of this outer-join qual, too, - * since we now have tests forcing each of its sides to the - * same value. - */ - process_implied_equality(root, - leftop, rightop, - rinfo->left_sortop, - rinfo->right_sortop, - rinfo->left_relids, - rinfo->right_relids, - true); - - /* - * And recurse to see if we can deduce anything from LEFTVAR = - * CONSTANT - */ - sub_generate_join_implications(root, equi_key_set, relids, - leftop, - rinfo->left_sortop, - rinfo->left_relids); - /* ... and RIGHTVAR = CONSTANT */ - sub_generate_join_implications(root, equi_key_set, relids, - rightop, - rinfo->right_sortop, - rinfo->right_relids); - - } - } - } -} - -/* - * process_implied_const_eq - * Apply process_implied_equality with the given item and each - * pseudoconstant member of equi_key_set. - * - * equi_key_set and relids are as for generate_outer_join_implications, - * the other parameters as for process_implied_equality. - */ -static void -process_implied_const_eq(PlannerInfo *root, List *equi_key_set, Relids *relids, - Node *item1, Oid sortop1, Relids item1_relids, - bool delete_it) -{ - ListCell *l; - bool found = false; - int i = 0; - - foreach(l, equi_key_set) - { - PathKeyItem *item2 = (PathKeyItem *) lfirst(l); - - if (bms_is_empty(relids[i])) - { - process_implied_equality(root, - item1, item2->key, - sortop1, item2->sortop, - item1_relids, NULL, - delete_it); - found = true; - } - i++; + if (new_ec == old_pathkey->pk_eclass) + return true; } - /* Caller screwed up if no constants in list */ - Assert(found); -} -/* - * exprs_known_equal - * Detect whether two expressions are known equal due to equijoin clauses. - * - * Note: does not bother to check for "equal(item1, item2)"; caller must - * check that case if it's possible to pass identical items. - */ -bool -exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2) -{ - ListCell *cursetlink; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); - bool item1member = false; - bool item2member = false; - ListCell *ptr; - - foreach(ptr, curset) - { - PathKeyItem *pitem = (PathKeyItem *) lfirst(ptr); - - if (equal(item1, pitem->key)) - item1member = true; - else if (equal(item2, pitem->key)) - item2member = true; - /* Exit as soon as equality is proven */ - if (item1member && item2member) - return true; - } - } return false; } - -/* - * make_canonical_pathkey - * Given a PathKeyItem, find the equi_key_list subset it is a member of, - * if any. If so, return a pointer to that sublist, which is the - * canonical representation (for this query) of that PathKeyItem's - * equivalence set. If it is not found, add a singleton "equivalence set" - * to the equi_key_list and return that --- see compare_pathkeys. - * - * Note that this function must not be used until after we have completed - * scanning the WHERE clause for equijoin operators. - */ -static List * -make_canonical_pathkey(PlannerInfo *root, PathKeyItem *item) -{ - List *newset; - ListCell *cursetlink; - - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); - - if (list_member(curset, item)) - return curset; - } - newset = list_make1(item); - root->equi_key_list = lcons(newset, root->equi_key_list); - return newset; -} - /* * canonicalize_pathkeys * Convert a not-necessarily-canonical pathkeys list to canonical form. * * Note that this function must not be used until after we have completed - * scanning the WHERE clause for equijoin operators. + * merging EquivalenceClasses. */ List * canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) @@ -701,55 +194,116 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys) foreach(l, pathkeys) { - List *pathkey = (List *) lfirst(l); - PathKeyItem *item; - List *cpathkey; + PathKey *pathkey = (PathKey *) lfirst(l); + EquivalenceClass *eclass; + PathKey *cpathkey; - /* - * It's sufficient to look at the first entry in the sublist; if there - * are more entries, they're already part of an equivalence set by - * definition. - */ - Assert(pathkey != NIL); - item = (PathKeyItem *) linitial(pathkey); - cpathkey = make_canonical_pathkey(root, item); + /* Find the canonical (merged) EquivalenceClass */ + eclass = pathkey->pk_eclass; + while (eclass->ec_merged) + eclass = eclass->ec_merged; /* - * Eliminate redundant ordering requests --- ORDER BY A,A is the same - * as ORDER BY A. We want to check this only after we have - * canonicalized the keys, so that equivalent-key knowledge is used - * when deciding if an item is redundant. + * If we can tell it's redundant just from the EC, skip. + * pathkey_is_redundant would notice that, but we needn't even bother + * constructing the node... */ - new_pathkeys = list_append_unique_ptr(new_pathkeys, cpathkey); + if (MUST_BE_REDUNDANT(eclass)) + continue; + + /* OK, build a canonicalized PathKey struct */ + cpathkey = make_canonical_pathkey(root, + eclass, + pathkey->pk_opfamily, + pathkey->pk_strategy, + pathkey->pk_nulls_first); + + /* Add to list unless redundant */ + if (!pathkey_is_redundant(cpathkey, new_pathkeys)) + new_pathkeys = lappend(new_pathkeys, cpathkey); } return new_pathkeys; } - /* - * count_canonical_peers - * Given a PathKeyItem, find the equi_key_list subset it is a member of, - * if any. If so, return the number of other members of the set. - * If not, return 0 (without actually adding it to our equi_key_list). + * make_pathkey_from_sortinfo + * Given an expression, a sortop, and a nulls-first flag, create + * a PathKey. If canonicalize = true, the result is a "canonical" + * PathKey, otherwise not. (But note it might be redundant anyway.) * - * This is a hack to support the rather bogus heuristics in - * convert_subquery_pathkeys. + * canonicalize should always be TRUE after EquivalenceClass merging has + * been performed, but FALSE if we haven't done EquivalenceClass merging yet. */ -static int -count_canonical_peers(PlannerInfo *root, PathKeyItem *item) +static PathKey * +make_pathkey_from_sortinfo(PlannerInfo *root, + Expr *expr, Oid ordering_op, + bool nulls_first, + bool canonicalize) { - ListCell *cursetlink; + Oid equality_op; + List *opfamilies; + Oid opfamily, + lefttype, + righttype; + int strategy; + ListCell *lc; + EquivalenceClass *eclass; - foreach(cursetlink, root->equi_key_list) - { - List *curset = (List *) lfirst(cursetlink); + /* + * An ordering operator fully determines the behavior of its opfamily, + * so could only meaningfully appear in one family --- or perhaps two + * if one builds a reverse-sort opfamily, but there's not much point in + * that anymore. But EquivalenceClasses need to contain opfamily lists + * based on the family membership of equality operators, which could + * easily be bigger. So, look up the equality operator that goes with + * the ordering operator (this should be unique) and get its membership. + */ + equality_op = get_equality_op_for_ordering_op(ordering_op); + if (!OidIsValid(equality_op)) /* shouldn't happen */ + elog(ERROR, "could not find equality operator for ordering operator %u", + ordering_op); + opfamilies = get_mergejoin_opfamilies(equality_op); + if (!opfamilies) /* certainly should find some */ + elog(ERROR, "could not find opfamilies for ordering operator %u", + ordering_op); - if (list_member(curset, item)) - return list_length(curset) - 1; + /* + * Next we have to determine the strategy number to put into the pathkey. + * In the presence of reverse-sort opclasses there might be two answers. + * We prefer the one associated with the first opfamilies member that + * this ordering_op appears in (this will be consistently defined in + * normal system operation; see comments for get_mergejoin_opfamilies()). + */ + opfamily = InvalidOid; + strategy = 0; + foreach(lc, opfamilies) + { + opfamily = lfirst_oid(lc); + strategy = get_op_opfamily_strategy(ordering_op, opfamily); + if (strategy) + break; } - return 0; + if (!(strategy == BTLessStrategyNumber || + strategy == BTGreaterStrategyNumber)) + elog(ERROR, "ordering operator %u is has wrong strategy number %d", + ordering_op, strategy); + + /* Need the declared input type of the operator, too */ + op_input_types(ordering_op, &lefttype, &righttype); + Assert(lefttype == righttype); + + /* Now find or create a matching EquivalenceClass */ + eclass = get_eclass_for_sort_expr(root, expr, lefttype, opfamilies); + + /* And finally we can find or create a PathKey node */ + if (canonicalize) + return make_canonical_pathkey(root, eclass, opfamily, + strategy, nulls_first); + else + return makePathKey(eclass, opfamily, strategy, nulls_first); } + /**************************************************************************** * PATHKEY COMPARISONS ****************************************************************************/ @@ -760,7 +314,7 @@ count_canonical_peers(PlannerInfo *root, PathKeyItem *item) * one is "better" than the other. * * This function may only be applied to canonicalized pathkey lists. - * In the canonical representation, sublists can be checked for equality + * In the canonical representation, pathkeys can be checked for equality * by simple pointer comparison. */ PathKeysComparison @@ -771,33 +325,25 @@ compare_pathkeys(List *keys1, List *keys2) forboth(key1, keys1, key2, keys2) { - List *subkey1 = (List *) lfirst(key1); - List *subkey2 = (List *) lfirst(key2); + PathKey *pathkey1 = (PathKey *) lfirst(key1); + PathKey *pathkey2 = (PathKey *) lfirst(key2); /* * XXX would like to check that we've been given canonicalized input, * but PlannerInfo not accessible here... */ #ifdef NOT_USED - Assert(list_member_ptr(root->equi_key_list, subkey1)); - Assert(list_member_ptr(root->equi_key_list, subkey2)); + Assert(list_member_ptr(root->canon_pathkeys, pathkey1)); + Assert(list_member_ptr(root->canon_pathkeys, pathkey2)); #endif - /* - * We will never have two subkeys where one is a subset of the other, - * because of the canonicalization process. Either they are equal or - * they ain't. Furthermore, we only need pointer comparison to detect - * equality. - */ - if (subkey1 != subkey2) + if (pathkey1 != pathkey2) return PATHKEYS_DIFFERENT; /* no need to keep looking */ } /* * If we reached the end of only one list, the other is longer and - * therefore not a subset. (We assume the additional sublist(s) of the - * other list are not NIL --- no pathkey list should ever have a NIL - * sublist.) + * therefore not a subset. */ if (key1 == NULL && key2 == NULL) return PATHKEYS_EQUAL; @@ -912,11 +458,8 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, * If 'scandir' is BackwardScanDirection, attempt to build pathkeys * representing a backwards scan of the index. Return NIL if can't do it. * - * If 'canonical' is TRUE, we remove duplicate pathkeys (which can occur - * if two index columns are equijoined, eg WHERE x = 1 AND y = 1). This - * is required if the result is to be compared directly to a canonical query - * pathkeys list. However, some callers want a list with exactly one entry - * per index column, and they must pass FALSE. + * The result is canonical, meaning that redundant pathkeys are removed; + * it may therefore have fewer entries than there are index columns. * * We generate the full pathkeys list whether or not all are useful for the * current query. Caller should do truncate_useless_pathkeys(). @@ -924,8 +467,7 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, List * build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, - ScanDirection scandir, - bool canonical) + ScanDirection scandir) { List *retval = NIL; ListCell *indexprs_item = list_head(index->indexprs); @@ -936,9 +478,8 @@ build_index_pathkeys(PlannerInfo *root, Oid sortop; bool nulls_first; int ikey; - Node *indexkey; - PathKeyItem *item; - List *cpathkey; + Expr *indexkey; + PathKey *cpathkey; if (ScanDirectionIsBackward(scandir)) { @@ -958,25 +499,26 @@ build_index_pathkeys(PlannerInfo *root, if (ikey != 0) { /* simple index column */ - indexkey = (Node *) find_indexkey_var(root, index->rel, ikey); + indexkey = (Expr *) find_indexkey_var(root, index->rel, ikey); } else { /* expression --- assume we need not copy it */ if (indexprs_item == NULL) elog(ERROR, "wrong number of index expressions"); - indexkey = (Node *) lfirst(indexprs_item); + indexkey = (Expr *) lfirst(indexprs_item); indexprs_item = lnext(indexprs_item); } - /* OK, make a sublist for this sort key */ - item = makePathKeyItem(indexkey, sortop, nulls_first, true); - cpathkey = make_canonical_pathkey(root, item); + /* OK, make a canonical pathkey for this sort key */ + cpathkey = make_pathkey_from_sortinfo(root, + indexkey, + sortop, + nulls_first, + true); - /* Eliminate redundant ordering info if requested */ - if (canonical) - retval = list_append_unique_ptr(retval, cpathkey); - else + /* Add to list unless redundant */ + if (!pathkey_is_redundant(cpathkey, retval)) retval = lappend(retval, cpathkey); } @@ -1042,31 +584,31 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, foreach(i, subquery_pathkeys) { - List *sub_pathkey = (List *) lfirst(i); + PathKey *sub_pathkey = (PathKey *) lfirst(i); + EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass; + PathKey *best_pathkey = NULL; + int best_score = -1; ListCell *j; - PathKeyItem *best_item = NULL; - int best_score = 0; - List *cpathkey; /* - * The sub_pathkey could contain multiple elements (representing - * knowledge that multiple items are effectively equal). Each element - * might match none, one, or more of the output columns that are - * visible to the outer query. This means we may have multiple - * possible representations of the sub_pathkey in the context of the - * outer query. Ideally we would generate them all and put them all - * into a pathkey list of the outer query, thereby propagating - * equality knowledge up to the outer query. Right now we cannot do - * so, because the outer query's canonical pathkey sets are already - * frozen when this is called. Instead we prefer the one that has the - * highest "score" (number of canonical pathkey peers, plus one if it - * matches the outer query_pathkeys). This is the most likely to be - * useful in the outer query. + * The sub_pathkey's EquivalenceClass could contain multiple elements + * (representing knowledge that multiple items are effectively equal). + * Each element might match none, one, or more of the output columns + * that are visible to the outer query. This means we may have + * multiple possible representations of the sub_pathkey in the context + * of the outer query. Ideally we would generate them all and put + * them all into an EC of the outer query, thereby propagating + * equality knowledge up to the outer query. Right now we cannot do + * so, because the outer query's EquivalenceClasses are already frozen + * when this is called. Instead we prefer the one that has the highest + * "score" (number of EC peers, plus one if it matches the outer + * query_pathkeys). This is the most likely to be useful in the outer + * query. */ - foreach(j, sub_pathkey) + foreach(j, sub_eclass->ec_members) { - PathKeyItem *sub_item = (PathKeyItem *) lfirst(j); - Node *sub_key = sub_item->key; + EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j); + Expr *sub_expr = sub_member->em_expr; Expr *rtarg; ListCell *k; @@ -1076,26 +618,27 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, * entry. (The latter case is worth extra code because it arises * frequently in connection with varchar fields.) */ - if (IsA(sub_key, RelabelType)) - rtarg = ((RelabelType *) sub_key)->arg; + if (IsA(sub_expr, RelabelType)) + rtarg = ((RelabelType *) sub_expr)->arg; else rtarg = NULL; foreach(k, sub_tlist) { TargetEntry *tle = (TargetEntry *) lfirst(k); - Node *outer_expr; - PathKeyItem *outer_item; + Expr *outer_expr; + EquivalenceClass *outer_ec; + PathKey *outer_pk; int score; /* resjunk items aren't visible to outer query */ if (tle->resjunk) continue; - if (equal(tle->expr, sub_key)) + if (equal(tle->expr, sub_expr)) { /* Exact match */ - outer_expr = (Node *) + outer_expr = (Expr *) makeVar(rel->relid, tle->resno, exprType((Node *) tle->expr), @@ -1105,35 +648,40 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, else if (rtarg && equal(tle->expr, rtarg)) { /* Match after discarding RelabelType */ - outer_expr = (Node *) + outer_expr = (Expr *) makeVar(rel->relid, tle->resno, exprType((Node *) tle->expr), exprTypmod((Node *) tle->expr), 0); - outer_expr = (Node *) + outer_expr = (Expr *) makeRelabelType((Expr *) outer_expr, - ((RelabelType *) sub_key)->resulttype, - ((RelabelType *) sub_key)->resulttypmod, - ((RelabelType *) sub_key)->relabelformat); + ((RelabelType *) sub_expr)->resulttype, + ((RelabelType *) sub_expr)->resulttypmod, + ((RelabelType *) sub_expr)->relabelformat); } else continue; - /* Found a representation for this sub_key */ - outer_item = makePathKeyItem(outer_expr, - sub_item->sortop, - sub_item->nulls_first, - true); - /* score = # of mergejoin peers */ - score = count_canonical_peers(root, outer_item); + /* Found a representation for this sub_pathkey */ + outer_ec = get_eclass_for_sort_expr(root, + outer_expr, + sub_member->em_datatype, + sub_eclass->ec_opfamilies); + outer_pk = make_canonical_pathkey(root, + outer_ec, + sub_pathkey->pk_opfamily, + sub_pathkey->pk_strategy, + sub_pathkey->pk_nulls_first); + /* score = # of equivalence peers */ + score = list_length(outer_ec->ec_members) - 1; /* +1 if it matches the proper query_pathkeys item */ if (retvallen < outer_query_keys && - list_member(list_nth(root->query_pathkeys, retvallen), outer_item)) + list_nth(root->query_pathkeys, retvallen) == outer_pk) score++; if (score > best_score) { - best_item = outer_item; + best_pathkey = outer_pk; best_score = score; } } @@ -1143,19 +691,16 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, * If we couldn't find a representation of this sub_pathkey, we're * done (we can't use the ones to its right, either). */ - if (!best_item) + if (!best_pathkey) break; - /* Canonicalize the chosen item (we did not before) */ - cpathkey = make_canonical_pathkey(root, best_item); - /* * Eliminate redundant ordering info; could happen if outer query - * equijoins subquery keys... + * equivalences subquery keys... */ - if (!list_member_ptr(retval, cpathkey)) + if (!pathkey_is_redundant(best_pathkey, retval)) { - retval = lappend(retval, cpathkey); + retval = lappend(retval, best_pathkey); retvallen++; } } @@ -1166,19 +711,14 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, /* * build_join_pathkeys * Build the path keys for a join relation constructed by mergejoin or - * nestloop join. These keys should include all the path key vars of the - * outer path (since the join will retain the ordering of the outer path) - * plus any vars of the inner path that are equijoined to the outer vars. - * - * Per the discussion in backend/optimizer/README, equijoined inner vars - * can be considered path keys of the result, just the same as the outer - * vars they were joined with; furthermore, it doesn't matter what kind - * of join algorithm is actually used. + * nestloop join. This is normally the same as the outer path's keys. * * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as * having the outer path's path keys, because null lefthand rows may be * inserted at random points. It must be treated as unsorted. * + * We truncate away any pathkeys that are uninteresting for higher joins. + * * 'joinrel' is the join relation that paths are being formed for * 'jointype' is the join type (inner, left, full, etc) * 'outer_pathkeys' is the list of the current outer path's path keys @@ -1197,8 +737,7 @@ build_join_pathkeys(PlannerInfo *root, /* * This used to be quite a complex bit of code, but now that all pathkey * sublists start out life canonicalized, we don't have to do a darn thing - * here! The inner-rel vars we used to need to add are *already* part of - * the outer pathkey! + * here! * * We do, however, need to truncate the pathkeys list, since it may * contain pathkeys that were useful for forming this joinrel but are @@ -1216,18 +755,21 @@ build_join_pathkeys(PlannerInfo *root, * Generate a pathkeys list that represents the sort order specified * by a list of SortClauses (GroupClauses will work too!) * - * NB: the result is NOT in canonical form, but must be passed through - * canonicalize_pathkeys() before it can be used for comparisons or - * labeling relation sort orders. (We do things this way because - * grouping_planner needs to be able to construct requested pathkeys - * before the pathkey equivalence sets have been created for the query.) + * If canonicalize is TRUE, the resulting PathKeys are all in canonical form; + * otherwise not. canonicalize should always be TRUE after EquivalenceClass + * merging has been performed, but FALSE if we haven't done EquivalenceClass + * merging yet. (We provide this option because grouping_planner() needs to + * be able to represent requested pathkeys before the equivalence classes have + * been created for the query.) * * 'sortclauses' is a list of SortClause or GroupClause nodes * 'tlist' is the targetlist to find the referenced tlist entries in */ List * -make_pathkeys_for_sortclauses(List *sortclauses, - List *tlist) +make_pathkeys_for_sortclauses(PlannerInfo *root, + List *sortclauses, + List *tlist, + bool canonicalize) { List *pathkeys = NIL; ListCell *l; @@ -1235,19 +777,24 @@ make_pathkeys_for_sortclauses(List *sortclauses, foreach(l, sortclauses) { SortClause *sortcl = (SortClause *) lfirst(l); - Node *sortkey; - PathKeyItem *pathkey; - - sortkey = get_sortgroupclause_expr(sortcl, tlist); - pathkey = makePathKeyItem(sortkey, sortcl->sortop, sortcl->nulls_first, - true); - - /* - * The pathkey becomes a one-element sublist, for now; - * canonicalize_pathkeys() might replace it with a longer sublist - * later. - */ - pathkeys = lappend(pathkeys, list_make1(pathkey)); + Expr *sortkey; + PathKey *pathkey; + + sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist); + pathkey = make_pathkey_from_sortinfo(root, + sortkey, + sortcl->sortop, + sortcl->nulls_first, + canonicalize); + + /* Canonical form eliminates redundant ordering keys */ + if (canonicalize) + { + if (!pathkey_is_redundant(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); + } + else + pathkeys = lappend(pathkeys, pathkey); } return pathkeys; } @@ -1257,49 +804,44 @@ make_pathkeys_for_sortclauses(List *sortclauses, ****************************************************************************/ /* - * cache_mergeclause_pathkeys - * Make the cached pathkeys valid in a mergeclause restrictinfo. + * cache_mergeclause_eclasses + * Make the cached EquivalenceClass links valid in a mergeclause + * restrictinfo. * - * RestrictInfo contains fields in which we may cache the result - * of looking up the canonical pathkeys for the left and right sides - * of the mergeclause. (Note that in normal cases they will be the - * same, but not if the mergeclause appears above an OUTER JOIN.) - * This is a worthwhile savings because these routines will be invoked - * many times when dealing with a many-relation query. - * - * We have to be careful that the cached values are palloc'd in the same - * context the RestrictInfo node itself is in. This is not currently a - * problem for normal planning, but it is an issue for GEQO planning. + * RestrictInfo contains fields in which we may cache pointers to + * EquivalenceClasses for the left and right inputs of the mergeclause. + * (If the mergeclause is a true equivalence clause these will be the + * same EquivalenceClass, otherwise not.) */ void -cache_mergeclause_pathkeys(PlannerInfo *root, RestrictInfo *restrictinfo) +cache_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo) { - Node *key; - PathKeyItem *item; - MemoryContext oldcontext; - - Assert(restrictinfo->mergejoinoperator != InvalidOid); + Assert(restrictinfo->mergeopfamilies != NIL); - if (restrictinfo->left_pathkey == NIL) - { - oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(restrictinfo)); - key = get_leftop(restrictinfo->clause); - item = makePathKeyItem(key, restrictinfo->left_sortop, - false, /* XXX nulls_first? */ - false); - restrictinfo->left_pathkey = make_canonical_pathkey(root, item); - MemoryContextSwitchTo(oldcontext); - } - if (restrictinfo->right_pathkey == NIL) + /* the cached values should be either both set or both not */ + if (restrictinfo->left_ec == NULL) { - oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(restrictinfo)); - key = get_rightop(restrictinfo->clause); - item = makePathKeyItem(key, restrictinfo->right_sortop, - false, /* XXX nulls_first? */ - false); - restrictinfo->right_pathkey = make_canonical_pathkey(root, item); - MemoryContextSwitchTo(oldcontext); + Expr *clause = restrictinfo->clause; + Oid lefttype, + righttype; + + /* Need the declared input types of the operator */ + op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype); + + /* Find or create a matching EquivalenceClass for each side */ + restrictinfo->left_ec = + get_eclass_for_sort_expr(root, + (Expr *) get_leftop(clause), + lefttype, + restrictinfo->mergeopfamilies); + restrictinfo->right_ec = + get_eclass_for_sort_expr(root, + (Expr *) get_rightop(clause), + righttype, + restrictinfo->mergeopfamilies); } + else + Assert(restrictinfo->right_ec != NULL); } /* @@ -1309,72 +851,82 @@ cache_mergeclause_pathkeys(PlannerInfo *root, RestrictInfo *restrictinfo) * If successful, it returns a list of mergeclauses. * * 'pathkeys' is a pathkeys list showing the ordering of an input path. - * It doesn't matter whether it is for the inner or outer path. + * 'outer_keys' is TRUE if these keys are for the outer input path, + * FALSE if for inner. * 'restrictinfos' is a list of mergejoinable restriction clauses for the * join relation being formed. * + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * * The result is NIL if no merge can be done, else a maximal list of * usable mergeclauses (represented as a list of their restrictinfo nodes). - * - * XXX Ideally we ought to be considering context, ie what path orderings - * are available on the other side of the join, rather than just making - * an arbitrary choice among the mergeclauses that will work for this side - * of the join. */ List * find_mergeclauses_for_pathkeys(PlannerInfo *root, List *pathkeys, + bool outer_keys, List *restrictinfos) { List *mergeclauses = NIL; ListCell *i; - /* make sure we have pathkeys cached in the clauses */ + /* make sure we have eclasses cached in the clauses */ foreach(i, restrictinfos) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); - cache_mergeclause_pathkeys(root, restrictinfo); + cache_mergeclause_eclasses(root, rinfo); } foreach(i, pathkeys) { - List *pathkey = (List *) lfirst(i); + PathKey *pathkey = (PathKey *) lfirst(i); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; List *matched_restrictinfos = NIL; ListCell *j; - /* - * We can match a pathkey against either left or right side of any - * mergejoin clause. (We examine both sides since we aren't told if - * the given pathkeys are for inner or outer input path; no confusion - * is possible.) Furthermore, if there are multiple matching clauses, - * take them all. In plain inner-join scenarios we expect only one - * match, because redundant-mergeclause elimination will have removed - * any redundant mergeclauses from the input list. However, in - * outer-join scenarios there might be multiple matches. An example is + /*---------- + * A mergejoin clause matches a pathkey if it has the same EC. + * If there are multiple matching clauses, take them all. In plain + * inner-join scenarios we expect only one match, because + * equivalence-class processing will have removed any redundant + * mergeclauses. However, in outer-join scenarios there might be + * multiple matches. An example is * - * select * from a full join b on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 - * = b.v2; + * select * from a full join b + * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2; * - * Given the pathkeys ((a.v1), (a.v2)) it is okay to return all three + * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed * we *must* do so or we will be unable to form a valid plan. + * + * We expect that the given pathkeys list is canonical, which means + * no two members have the same EC, so it's not possible for this + * code to enter the same mergeclause into the result list twice. + * + * XXX it's possible that multiple matching clauses might have + * different ECs on the other side, in which case the order we put + * them into our result makes a difference in the pathkeys required + * for the other input path. However this routine hasn't got any info + * about which order would be best, so for now we disregard that case + * (which is probably a corner case anyway). + *---------- */ foreach(j, restrictinfos) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); + RestrictInfo *rinfo = (RestrictInfo *) lfirst(j); + EquivalenceClass *clause_ec; - /* - * We can compare canonical pathkey sublists by simple pointer - * equality; see compare_pathkeys. - */ - if ((pathkey == restrictinfo->left_pathkey || - pathkey == restrictinfo->right_pathkey) && - !list_member_ptr(mergeclauses, restrictinfo)) - { - matched_restrictinfos = lappend(matched_restrictinfos, - restrictinfo); - } + if (outer_keys) + clause_ec = rinfo->outer_is_left ? + rinfo->left_ec : rinfo->right_ec; + else + clause_ec = rinfo->outer_is_left ? + rinfo->right_ec : rinfo->left_ec; + if (clause_ec == pathkey_ec) + matched_restrictinfos = lappend(matched_restrictinfos, rinfo); } /* @@ -1396,63 +948,270 @@ find_mergeclauses_for_pathkeys(PlannerInfo *root, } /* - * make_pathkeys_for_mergeclauses + * select_outer_pathkeys_for_merge + * Builds a pathkey list representing a possible sort ordering + * that can be used with the given mergeclauses. + * + * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses + * that will be used in a merge join. + * 'joinrel' is the join relation we are trying to construct. + * + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * + * Returns a pathkeys list that can be applied to the outer relation. + * + * Since we assume here that a sort is required, there is no particular use + * in matching any available ordering of the outerrel. (joinpath.c has an + * entirely separate code path for considering sort-free mergejoins.) Rather, + * it's interesting to try to match the requested query_pathkeys so that a + * second output sort may be avoided; and failing that, we try to list "more + * popular" keys (those with the most unmatched EquivalenceClass peers) + * earlier, in hopes of making the resulting ordering useful for as many + * higher-level mergejoins as possible. + */ +List * +select_outer_pathkeys_for_merge(PlannerInfo *root, + List *mergeclauses, + RelOptInfo *joinrel) +{ + List *pathkeys = NIL; + int nClauses = list_length(mergeclauses); + EquivalenceClass **ecs; + int *scores; + int necs; + ListCell *lc; + int j; + + /* Might have no mergeclauses */ + if (nClauses == 0) + return NIL; + + /* + * Make arrays of the ECs used by the mergeclauses (dropping any + * duplicates) and their "popularity" scores. + */ + ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *)); + scores = (int *) palloc(nClauses * sizeof(int)); + necs = 0; + + foreach(lc, mergeclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + EquivalenceClass *oeclass; + int score; + ListCell *lc2; + + /* get the outer eclass */ + cache_mergeclause_eclasses(root, rinfo); + + if (rinfo->outer_is_left) + oeclass = rinfo->left_ec; + else + oeclass = rinfo->right_ec; + + /* reject duplicates */ + for (j = 0; j < necs; j++) + { + if (ecs[j] == oeclass) + break; + } + if (j < necs) + continue; + + /* compute score */ + score = 0; + foreach(lc2, oeclass->ec_members) + { + EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2); + + /* Potential future join partner? */ + if (!em->em_is_const && !em->em_is_child && + !bms_overlap(em->em_relids, joinrel->relids)) + score++; + } + + ecs[necs] = oeclass; + scores[necs] = score; + necs++; + } + + /* + * Find out if we have all the ECs mentioned in query_pathkeys; if so + * we can generate a sort order that's also useful for final output. + * There is no percentage in a partial match, though, so we have to + * have 'em all. + */ + if (root->query_pathkeys) + { + foreach(lc, root->query_pathkeys) + { + PathKey *query_pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *query_ec = query_pathkey->pk_eclass; + + for (j = 0; j < necs; j++) + { + if (ecs[j] == query_ec) + break; /* found match */ + } + if (j >= necs) + break; /* didn't find match */ + } + /* if we got to the end of the list, we have them all */ + if (lc == NULL) + { + /* copy query_pathkeys as starting point for our output */ + pathkeys = list_copy(root->query_pathkeys); + /* mark their ECs as already-emitted */ + foreach(lc, root->query_pathkeys) + { + PathKey *query_pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *query_ec = query_pathkey->pk_eclass; + + for (j = 0; j < necs; j++) + { + if (ecs[j] == query_ec) + { + scores[j] = -1; + break; + } + } + } + } + } + + /* + * Add remaining ECs to the list in popularity order, using a default + * sort ordering. (We could use qsort() here, but the list length is + * usually so small it's not worth it.) + */ + for (;;) + { + int best_j; + int best_score; + EquivalenceClass *ec; + PathKey *pathkey; + + best_j = 0; + best_score = scores[0]; + for (j = 1; j < necs; j++) + { + if (scores[j] > best_score) + { + best_j = j; + best_score = scores[j]; + } + } + if (best_score < 0) + break; /* all done */ + ec = ecs[best_j]; + scores[best_j] = -1; + pathkey = make_canonical_pathkey(root, + ec, + linitial_oid(ec->ec_opfamilies), + BTLessStrategyNumber, + false); + /* can't be redundant because no duplicate ECs */ + Assert(!pathkey_is_redundant(pathkey, pathkeys)); + pathkeys = lappend(pathkeys, pathkey); + } + + pfree(ecs); + pfree(scores); + + return pathkeys; +} + +/* + * make_inner_pathkeys_for_merge * Builds a pathkey list representing the explicit sort order that - * must be applied to a path in order to make it usable for the + * must be applied to an inner path to make it usable with the * given mergeclauses. * * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses * that will be used in a merge join. - * 'rel' is the relation the pathkeys will apply to (ie, either the inner - * or outer side of the proposed join rel). + * 'outer_pathkeys' are the already-known canonical pathkeys for the outer + * side of the join. * - * Returns a pathkeys list that can be applied to the indicated relation. + * The restrictinfos must be marked (via outer_is_left) to show which side + * of each clause is associated with the current outer path. (See + * select_mergejoin_clauses()) + * + * Returns a pathkeys list that can be applied to the inner relation. * * Note that it is not this routine's job to decide whether sorting is * actually needed for a particular input path. Assume a sort is necessary; * just make the keys, eh? */ List * -make_pathkeys_for_mergeclauses(PlannerInfo *root, - List *mergeclauses, - RelOptInfo *rel) +make_inner_pathkeys_for_merge(PlannerInfo *root, + List *mergeclauses, + List *outer_pathkeys) { List *pathkeys = NIL; - ListCell *l; + EquivalenceClass *lastoeclass; + PathKey *opathkey; + ListCell *lc; + ListCell *lop; - foreach(l, mergeclauses) + lastoeclass = NULL; + opathkey = NULL; + lop = list_head(outer_pathkeys); + + foreach(lc, mergeclauses) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); - List *pathkey; + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + EquivalenceClass *oeclass; + EquivalenceClass *ieclass; + PathKey *pathkey; - cache_mergeclause_pathkeys(root, restrictinfo); + cache_mergeclause_eclasses(root, rinfo); - if (bms_is_subset(restrictinfo->left_relids, rel->relids)) + if (rinfo->outer_is_left) { - /* Rel is left side of mergeclause */ - pathkey = restrictinfo->left_pathkey; + oeclass = rinfo->left_ec; + ieclass = rinfo->right_ec; } - else if (bms_is_subset(restrictinfo->right_relids, rel->relids)) + else { - /* Rel is right side of mergeclause */ - pathkey = restrictinfo->right_pathkey; + oeclass = rinfo->right_ec; + ieclass = rinfo->left_ec; } - else + + /* outer eclass should match current or next pathkeys */ + /* we check this carefully for debugging reasons */ + if (oeclass != lastoeclass) { - elog(ERROR, "could not identify which side of mergeclause to use"); - pathkey = NIL; /* keep compiler quiet */ + if (!lop) + elog(ERROR, "too few pathkeys for mergeclauses"); + opathkey = (PathKey *) lfirst(lop); + lop = lnext(lop); + lastoeclass = opathkey->pk_eclass; + if (oeclass != lastoeclass) + elog(ERROR, "outer pathkeys do not match mergeclause"); } /* - * When we are given multiple merge clauses, it's possible that some - * clauses refer to the same vars as earlier clauses. There's no - * reason for us to specify sort keys like (A,B,A) when (A,B) will do - * --- and adding redundant sort keys makes add_path think that this - * sort order is different from ones that are really the same, so - * don't do it. Since we now have a canonicalized pathkey, a simple - * ptrMember test is sufficient to detect redundant keys. + * Often, we'll have same EC on both sides, in which case the outer + * pathkey is also canonical for the inner side, and we can skip a + * useless search. + */ + if (ieclass == oeclass) + pathkey = opathkey; + else + pathkey = make_canonical_pathkey(root, + ieclass, + opathkey->pk_opfamily, + opathkey->pk_strategy, + opathkey->pk_nulls_first); + + /* + * Don't generate redundant pathkeys (can happen if multiple + * mergeclauses refer to same EC). */ - pathkeys = list_append_unique_ptr(pathkeys, pathkey); + if (!pathkey_is_redundant(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); } return pathkeys; @@ -1471,7 +1230,7 @@ make_pathkeys_for_mergeclauses(PlannerInfo *root, /* * pathkeys_useful_for_merging * Count the number of pathkeys that may be useful for mergejoins - * above the given relation (by looking at its joininfo list). + * above the given relation. * * We consider a pathkey potentially useful if it corresponds to the merge * ordering of either side of any joinclause for the rel. This might be @@ -1487,27 +1246,39 @@ pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys) foreach(i, pathkeys) { - List *pathkey = (List *) lfirst(i); + PathKey *pathkey = (PathKey *) lfirst(i); bool matched = false; ListCell *j; - foreach(j, rel->joininfo) + /* + * First look into the EquivalenceClass of the pathkey, to see if + * there are any members not yet joined to the rel. If so, it's + * surely possible to generate a mergejoin clause using them. + */ + if (rel->has_eclass_joins && + eclass_useful_for_merging(pathkey->pk_eclass, rel)) + matched = true; + else { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); - - if (restrictinfo->mergejoinoperator == InvalidOid) - continue; - cache_mergeclause_pathkeys(root, restrictinfo); - /* - * We can compare canonical pathkey sublists by simple pointer - * equality; see compare_pathkeys. + * Otherwise search the rel's joininfo list, which contains + * non-EquivalenceClass-derivable join clauses that might + * nonetheless be mergejoinable. */ - if (pathkey == restrictinfo->left_pathkey || - pathkey == restrictinfo->right_pathkey) + foreach(j, rel->joininfo) { - matched = true; - break; + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); + + if (restrictinfo->mergeopfamilies == NIL) + continue; + cache_mergeclause_eclasses(root, restrictinfo); + + if (pathkey->pk_eclass == restrictinfo->left_ec || + pathkey->pk_eclass == restrictinfo->right_ec) + { + matched = true; + break; + } } } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 9a8204392a..143b04ca9c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.221 2007/01/10 18:06:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.222 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -121,8 +121,6 @@ static MergeJoin *make_mergejoin(List *tlist, JoinType jointype); static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols, AttrNumber *sortColIdx, Oid *sortOperators, bool *nullsFirst); -static Sort *make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, - List *pathkeys); /* @@ -1425,23 +1423,21 @@ create_nestloop_plan(PlannerInfo *root, * that have to be checked as qpquals at the join node. * * We can also remove any join clauses that are redundant with those - * being used in the index scan; prior redundancy checks will not have - * caught this case because the join clauses would never have been put - * in the same joininfo list. + * being used in the index scan; this check is needed because + * find_eclass_clauses_for_index_join() may emit different clauses + * than generate_join_implied_equalities() did. * * We can skip this if the index path is an ordinary indexpath and not - * a special innerjoin path. + * a special innerjoin path, since it then wouldn't be using any join + * clauses. */ IndexPath *innerpath = (IndexPath *) best_path->innerjoinpath; if (innerpath->isjoininner) - { joinrestrictclauses = select_nonredundant_join_clauses(root, joinrestrictclauses, - innerpath->indexclauses, - IS_OUTER_JOIN(best_path->jointype)); - } + innerpath->indexclauses); } else if (IsA(best_path->innerjoinpath, BitmapHeapPath)) { @@ -1471,8 +1467,7 @@ create_nestloop_plan(PlannerInfo *root, joinrestrictclauses = select_nonredundant_join_clauses(root, joinrestrictclauses, - bitmapclauses, - IS_OUTER_JOIN(best_path->jointype)); + bitmapclauses); } } @@ -1516,7 +1511,21 @@ create_mergejoin_plan(PlannerInfo *root, List *joinclauses; List *otherclauses; List *mergeclauses; + List *outerpathkeys; + List *innerpathkeys; + int nClauses; + Oid *mergefamilies; + int *mergestrategies; + bool *mergenullsfirst; MergeJoin *join_plan; + int i; + EquivalenceClass *lastoeclass; + EquivalenceClass *lastieclass; + PathKey *opathkey; + PathKey *ipathkey; + ListCell *lc; + ListCell *lop; + ListCell *lip; /* Get the join qual clauses (in plain expression form) */ /* Any pseudoconstant clauses are ignored here */ @@ -1542,7 +1551,8 @@ create_mergejoin_plan(PlannerInfo *root, /* * Rearrange mergeclauses, if needed, so that the outer variable is always - * on the left. + * on the left; mark the mergeclause restrictinfos with correct + * outer_is_left status. */ mergeclauses = get_switched_clauses(best_path->path_mergeclauses, best_path->jpath.outerjoinpath->parent->relids); @@ -1564,7 +1574,10 @@ create_mergejoin_plan(PlannerInfo *root, make_sort_from_pathkeys(root, outer_plan, best_path->outersortkeys); + outerpathkeys = best_path->outersortkeys; } + else + outerpathkeys = best_path->jpath.outerjoinpath->pathkeys; if (best_path->innersortkeys) { @@ -1573,8 +1586,87 @@ create_mergejoin_plan(PlannerInfo *root, make_sort_from_pathkeys(root, inner_plan, best_path->innersortkeys); + innerpathkeys = best_path->innersortkeys; + } + else + innerpathkeys = best_path->jpath.innerjoinpath->pathkeys; + + /* + * Compute the opfamily/strategy/nullsfirst arrays needed by the executor. + * The information is in the pathkeys for the two inputs, but we need to + * be careful about the possibility of mergeclauses sharing a pathkey + * (compare find_mergeclauses_for_pathkeys()). + */ + nClauses = list_length(mergeclauses); + Assert(nClauses == list_length(best_path->path_mergeclauses)); + mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid)); + mergestrategies = (int *) palloc(nClauses * sizeof(int)); + mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool)); + + lastoeclass = NULL; + lastieclass = NULL; + opathkey = NULL; + ipathkey = NULL; + lop = list_head(outerpathkeys); + lip = list_head(innerpathkeys); + i = 0; + foreach(lc, best_path->path_mergeclauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + EquivalenceClass *oeclass; + EquivalenceClass *ieclass; + + /* fetch outer/inner eclass from mergeclause */ + Assert(IsA(rinfo, RestrictInfo)); + if (rinfo->outer_is_left) + { + oeclass = rinfo->left_ec; + ieclass = rinfo->right_ec; + } + else + { + oeclass = rinfo->right_ec; + ieclass = rinfo->left_ec; + } + Assert(oeclass != NULL); + Assert(ieclass != NULL); + + /* should match current or next pathkeys */ + /* we check this carefully for debugging reasons */ + if (oeclass != lastoeclass) + { + if (!lop) + elog(ERROR, "too few pathkeys for mergeclauses"); + opathkey = (PathKey *) lfirst(lop); + lop = lnext(lop); + lastoeclass = opathkey->pk_eclass; + if (oeclass != lastoeclass) + elog(ERROR, "outer pathkeys do not match mergeclause"); + } + if (ieclass != lastieclass) + { + if (!lip) + elog(ERROR, "too few pathkeys for mergeclauses"); + ipathkey = (PathKey *) lfirst(lip); + lip = lnext(lip); + lastieclass = ipathkey->pk_eclass; + if (ieclass != lastieclass) + elog(ERROR, "inner pathkeys do not match mergeclause"); + } + /* pathkeys should match each other too (more debugging) */ + if (opathkey->pk_opfamily != ipathkey->pk_opfamily || + opathkey->pk_strategy != ipathkey->pk_strategy || + opathkey->pk_nulls_first != ipathkey->pk_nulls_first) + elog(ERROR, "left and right pathkeys do not match in mergejoin"); + + /* OK, save info for executor */ + mergefamilies[i] = opathkey->pk_opfamily; + mergestrategies[i] = opathkey->pk_strategy; + mergenullsfirst[i] = opathkey->pk_nulls_first; + i++; } + /* * Now we can build the mergejoin node. */ @@ -1582,9 +1674,9 @@ create_mergejoin_plan(PlannerInfo *root, joinclauses, otherclauses, mergeclauses, - best_path->path_mergeFamilies, - best_path->path_mergeStrategies, - best_path->path_mergeNullsFirst, + mergefamilies, + mergestrategies, + mergenullsfirst, outer_plan, inner_plan, best_path->jpath.jointype); @@ -1921,8 +2013,9 @@ fix_indexqual_operand(Node *node, IndexOptInfo *index, Oid *opfamily) * Given a list of merge or hash joinclauses (as RestrictInfo nodes), * extract the bare clauses, and rearrange the elements within the * clauses, if needed, so the outer join variable is on the left and - * the inner is on the right. The original data structure is not touched; - * a modified list is returned. + * the inner is on the right. The original clause data structure is not + * touched; a modified list is returned. We do, however, set the transient + * outer_is_left field in each RestrictInfo to show which side was which. */ static List * get_switched_clauses(List *clauses, Relids outerrelids) @@ -1953,9 +2046,14 @@ get_switched_clauses(List *clauses, Relids outerrelids) /* Commute it --- note this modifies the temp node in-place. */ CommuteOpExpr(temp); t_list = lappend(t_list, temp); + restrictinfo->outer_is_left = false; } else + { + Assert(bms_is_subset(restrictinfo->left_relids, outerrelids)); t_list = lappend(t_list, clause); + restrictinfo->outer_is_left = true; + } } return t_list; } @@ -2490,7 +2588,7 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, bool nulls_first, * If the input plan type isn't one that can do projections, this means * adding a Result node just to do the projection. */ -static Sort * +Sort * make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys) { List *tlist = lefttree->targetlist; @@ -2512,41 +2610,55 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys) foreach(i, pathkeys) { - List *keysublist = (List *) lfirst(i); - PathKeyItem *pathkey = NULL; + PathKey *pathkey = (PathKey *) lfirst(i); TargetEntry *tle = NULL; + Oid pk_datatype = InvalidOid; + Oid sortop; ListCell *j; /* - * We can sort by any one of the sort key items listed in this - * sublist. For now, we take the first one that corresponds to an - * available Var in the tlist. If there isn't any, use the first one - * that is an expression in the input's vars. + * We can sort by any non-constant expression listed in the pathkey's + * EquivalenceClass. For now, we take the first one that corresponds + * to an available Var in the tlist. If there isn't any, use the first + * one that is an expression in the input's vars. (The non-const + * restriction only matters if the EC is below_outer_join; but if it + * isn't, it won't contain consts anyway, else we'd have discarded + * the pathkey as redundant.) * * XXX if we have a choice, is there any way of figuring out which * might be cheapest to execute? (For example, int4lt is likely much * cheaper to execute than numericlt, but both might appear in the - * same pathkey sublist...) Not clear that we ever will have a choice - * in practice, so it may not matter. + * same equivalence class...) Not clear that we ever will have an + * interesting choice in practice, so it may not matter. */ - foreach(j, keysublist) + foreach(j, pathkey->pk_eclass->ec_members) { - pathkey = (PathKeyItem *) lfirst(j); - Assert(IsA(pathkey, PathKeyItem)); - tle = tlist_member(pathkey->key, tlist); + EquivalenceMember *em = (EquivalenceMember *) lfirst(j); + + if (em->em_is_const || em->em_is_child) + continue; + tle = tlist_member((Node *) em->em_expr, tlist); if (tle) - break; + { + pk_datatype = em->em_datatype; + break; /* found expr already in tlist */ + } } if (!tle) { /* No matching Var; look for a computable expression */ - foreach(j, keysublist) + Expr *sortexpr = NULL; + + foreach(j, pathkey->pk_eclass->ec_members) { + EquivalenceMember *em = (EquivalenceMember *) lfirst(j); List *exprvars; ListCell *k; - pathkey = (PathKeyItem *) lfirst(j); - exprvars = pull_var_clause(pathkey->key, false); + if (em->em_is_const || em->em_is_child) + continue; + sortexpr = em->em_expr; + exprvars = pull_var_clause((Node *) sortexpr, false); foreach(k, exprvars) { if (!tlist_member(lfirst(k), tlist)) @@ -2554,7 +2666,10 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys) } list_free(exprvars); if (!k) + { + pk_datatype = em->em_datatype; break; /* found usable expression */ + } } if (!j) elog(ERROR, "could not find pathkey item to sort"); @@ -2571,7 +2686,7 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys) /* * Add resjunk entry to input's tlist */ - tle = makeTargetEntry((Expr *) pathkey->key, + tle = makeTargetEntry(sortexpr, list_length(tlist) + 1, NULL, true); @@ -2579,14 +2694,28 @@ make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, List *pathkeys) lefttree->targetlist = tlist; /* just in case NIL before */ } + /* + * Look up the correct sort operator from the PathKey's slightly + * abstracted representation. + */ + sortop = get_opfamily_member(pathkey->pk_opfamily, + pk_datatype, + pk_datatype, + pathkey->pk_strategy); + if (!OidIsValid(sortop)) /* should not happen */ + elog(ERROR, "could not find member %d(%u,%u) of opfamily %u", + pathkey->pk_strategy, pk_datatype, pk_datatype, + pathkey->pk_opfamily); + /* * The column might already be selected as a sort key, if the pathkeys * contain duplicate entries. (This can happen in scenarios where * multiple mergejoinable clauses mention the same var, for example.) * So enter it only once in the sort arrays. */ - numsortkeys = add_sort_column(tle->resno, pathkey->sortop, - pathkey->nulls_first, + numsortkeys = add_sort_column(tle->resno, + sortop, + pathkey->pk_nulls_first, numsortkeys, sortColIdx, sortOperators, nullsFirst); } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 3dc7229cf3..b49df21270 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.127 2007/01/08 16:47:30 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.128 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,8 +37,6 @@ int from_collapse_limit; int join_collapse_limit; -static void add_vars_to_targetlist(PlannerInfo *root, List *vars, - Relids where_needed); static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, Relids *qualscope); static OuterJoinInfo *make_outerjoininfo(PlannerInfo *root, @@ -51,8 +49,7 @@ static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, Relids qualscope, Relids ojscope, Relids outerjoin_nonnullable); -static bool qual_is_redundant(PlannerInfo *root, RestrictInfo *restrictinfo, - List *restrictlist); +static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p); static void check_mergejoinable(RestrictInfo *restrictinfo); static void check_hashjoinable(RestrictInfo *restrictinfo); @@ -144,7 +141,7 @@ build_base_rel_tlists(PlannerInfo *root, List *final_tlist) * as being needed for the indicated join (or for final output if * where_needed includes "relation 0"). */ -static void +void add_vars_to_targetlist(PlannerInfo *root, List *vars, Relids where_needed) { ListCell *temp; @@ -590,17 +587,17 @@ make_outerjoininfo(PlannerInfo *root, * Add clause information to either the baserestrictinfo or joininfo list * (depending on whether the clause is a join) of each base relation * mentioned in the clause. A RestrictInfo node is created and added to - * the appropriate list for each rel. Also, if the clause uses a + * the appropriate list for each rel. Alternatively, if the clause uses a * mergejoinable operator and is not delayed by outer-join rules, enter - * the left- and right-side expressions into the query's lists of - * equijoined vars. + * the left- and right-side expressions into the query's list of + * EquivalenceClasses. * * 'clause': the qual clause to be distributed * 'is_pushed_down': if TRUE, force the clause to be marked 'is_pushed_down' * (this indicates the clause came from a FromExpr, not a JoinExpr) * 'is_deduced': TRUE if the qual came from implied-equality deduction * 'below_outer_join': TRUE if the qual is from a JOIN/ON that is below the - * nullable side of a higher-level outer join. + * nullable side of a higher-level outer join * 'qualscope': set of baserels the qual's syntactic scope covers * 'ojscope': NULL if not an outer-join qual, else the minimum set of baserels * needed to form this join @@ -625,11 +622,9 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, Relids relids; bool outerjoin_delayed; bool pseudoconstant = false; - bool maybe_equijoin; + bool maybe_equivalence; bool maybe_outer_join; RestrictInfo *restrictinfo; - RelOptInfo *rel; - List *vars; /* * Retrieve all relids mentioned within the clause. @@ -705,108 +700,57 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, if (is_deduced) { /* - * If the qual came from implied-equality deduction, we always - * evaluate the qual at its natural semantic level. It is the - * responsibility of the deducer not to create any quals that should - * be delayed by outer-join rules. + * If the qual came from implied-equality deduction, it should + * not be outerjoin-delayed, else deducer blew it. But we can't + * check this because the ojinfo list may now contain OJs above + * where the qual belongs. */ - Assert(bms_equal(relids, qualscope)); Assert(!ojscope); - Assert(!pseudoconstant); - /* Needn't feed it back for more deductions */ outerjoin_delayed = false; - maybe_equijoin = false; + /* Don't feed it back for more deductions */ + maybe_equivalence = false; maybe_outer_join = false; } else if (bms_overlap(relids, outerjoin_nonnullable)) { /* * The qual is attached to an outer join and mentions (some of the) - * rels on the nonnullable side. Force the qual to be evaluated - * exactly at the level of joining corresponding to the outer join. We - * cannot let it get pushed down into the nonnullable side, since then - * we'd produce no output rows, rather than the intended single - * null-extended row, for any nonnullable-side rows failing the qual. + * rels on the nonnullable side. * * Note: an outer-join qual that mentions only nullable-side rels can * be pushed down into the nullable side without changing the join - * result, so we treat it the same as an ordinary inner-join qual, - * except for not setting maybe_equijoin (see below). + * result, so we treat it almost the same as an ordinary inner-join + * qual (see below). + * + * We can't use such a clause to deduce equivalence (the left and right + * sides might be unequal above the join because one of them has gone + * to NULL) ... but we might be able to use it for more limited + * deductions, if there are no lower outer joins that delay its + * application. If so, consider adding it to the lists of set-aside + * clauses. + */ + maybe_equivalence = false; + maybe_outer_join = !check_outerjoin_delay(root, &relids); + + /* + * Now force the qual to be evaluated exactly at the level of joining + * corresponding to the outer join. We cannot let it get pushed down + * into the nonnullable side, since then we'd produce no output rows, + * rather than the intended single null-extended row, for any + * nonnullable-side rows failing the qual. + * + * (Do this step after calling check_outerjoin_delay, because that + * trashes relids.) */ Assert(ojscope); relids = ojscope; outerjoin_delayed = true; Assert(!pseudoconstant); - - /* - * We can't use such a clause to deduce equijoin (the left and right - * sides might be unequal above the join because one of them has gone - * to NULL) ... but we might be able to use it for more limited - * purposes. Note: for the current uses of deductions from an - * outer-join clause, it seems safe to make the deductions even when - * the clause is below a higher-level outer join; so we do not check - * below_outer_join here. - */ - maybe_equijoin = false; - maybe_outer_join = true; } else { - /* - * For a non-outer-join qual, we can evaluate the qual as soon as (1) - * we have all the rels it mentions, and (2) we are at or above any - * outer joins that can null any of these rels and are below the - * syntactic location of the given qual. We must enforce (2) because - * pushing down such a clause below the OJ might cause the OJ to emit - * null-extended rows that should not have been formed, or that should - * have been rejected by the clause. (This is only an issue for - * non-strict quals, since if we can prove a qual mentioning only - * nullable rels is strict, we'd have reduced the outer join to an - * inner join in reduce_outer_joins().) - * - * To enforce (2), scan the oj_info_list and merge the required-relid - * sets of any such OJs into the clause's own reference list. At the - * time we are called, the oj_info_list contains only outer joins - * below this qual. We have to repeat the scan until no new relids - * get added; this ensures that the qual is suitably delayed regardless - * of the order in which OJs get executed. As an example, if we have - * one OJ with LHS=A, RHS=B, and one with LHS=B, RHS=C, it is implied - * that these can be done in either order; if the B/C join is done - * first then the join to A can null C, so a qual actually mentioning - * only C cannot be applied below the join to A. - */ - bool found_some; - - outerjoin_delayed = false; - do { - ListCell *l; - - found_some = false; - foreach(l, root->oj_info_list) - { - OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l); - - /* do we have any nullable rels of this OJ? */ - if (bms_overlap(relids, ojinfo->min_righthand) || - (ojinfo->is_full_join && - bms_overlap(relids, ojinfo->min_lefthand))) - { - /* yes; do we have all its rels? */ - if (!bms_is_subset(ojinfo->min_lefthand, relids) || - !bms_is_subset(ojinfo->min_righthand, relids)) - { - /* no, so add them in */ - relids = bms_add_members(relids, - ojinfo->min_lefthand); - relids = bms_add_members(relids, - ojinfo->min_righthand); - outerjoin_delayed = true; - /* we'll need another iteration */ - found_some = true; - } - } - } - } while (found_some); + /* Normal qual clause; check to see if must be delayed by outer join */ + outerjoin_delayed = check_outerjoin_delay(root, &relids); if (outerjoin_delayed) { @@ -816,26 +760,27 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, * Because application of the qual will be delayed by outer join, * we mustn't assume its vars are equal everywhere. */ - maybe_equijoin = false; + maybe_equivalence = false; } else { /* - * Qual is not delayed by any lower outer-join restriction. If it - * is not itself below or within an outer join, we can consider it - * "valid everywhere", so consider feeding it to the equijoin - * machinery. (If it is within an outer join, we can't consider - * it "valid everywhere": once the contained variables have gone - * to NULL, we'd be asserting things like NULL = NULL, which is - * not true.) + * Qual is not delayed by any lower outer-join restriction, so + * we can consider feeding it to the equivalence machinery. + * However, if it's itself within an outer-join clause, treat it + * as though it appeared below that outer join (note that we can + * only get here when the clause references only nullable-side + * rels). */ - if (!below_outer_join && outerjoin_nonnullable == NULL) - maybe_equijoin = true; - else - maybe_equijoin = false; + maybe_equivalence = true; + if (outerjoin_nonnullable != NULL) + below_outer_join = true; } - /* Since it doesn't mention the LHS, it's certainly not an OJ clause */ + /* + * Since it doesn't mention the LHS, it's certainly not useful as a + * set-aside OJ clause, even if it's in an OJ. + */ maybe_outer_join = false; } @@ -860,118 +805,65 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, relids); /* - * Figure out where to attach it. + * If it's a join clause (either naturally, or because delayed by + * outer-join rules), add vars used in the clause to targetlists of + * their relations, so that they will be emitted by the plan nodes that + * scan those relations (else they won't be available at the join node!). + * + * Note: if the clause gets absorbed into an EquivalenceClass then this + * may be unnecessary, but for now we have to do it to cover the case + * where the EC becomes ec_broken and we end up reinserting the original + * clauses into the plan. */ - switch (bms_membership(relids)) + if (bms_membership(relids) == BMS_MULTIPLE) { - case BMS_SINGLETON: - - /* - * There is only one relation participating in 'clause', so - * 'clause' is a restriction clause for that relation. - */ - rel = find_base_rel(root, bms_singleton_member(relids)); - - /* - * Check for a "mergejoinable" clause even though it's not a join - * clause. This is so that we can recognize that "a.x = a.y" - * makes x and y eligible to be considered equal, even when they - * belong to the same rel. Without this, we would not recognize - * that "a.x = a.y AND a.x = b.z AND a.y = c.q" allows us to - * consider z and q equal after their rels are joined. - */ - check_mergejoinable(restrictinfo); + List *vars = pull_var_clause(clause, false); - /* - * If the clause was deduced from implied equality, check to see - * whether it is redundant with restriction clauses we already - * have for this rel. Note we cannot apply this check to - * user-written clauses, since we haven't found the canonical - * pathkey sets yet while processing user clauses. (NB: no - * comparable check is done in the join-clause case; redundancy - * will be detected when the join clause is moved into a join - * rel's restriction list.) - */ - if (!is_deduced || - !qual_is_redundant(root, restrictinfo, - rel->baserestrictinfo)) - { - /* Add clause to rel's restriction list */ - rel->baserestrictinfo = lappend(rel->baserestrictinfo, - restrictinfo); - } - break; - case BMS_MULTIPLE: - - /* - * 'clause' is a join clause, since there is more than one rel in - * the relid set. - */ - - /* - * Check for hash or mergejoinable operators. - * - * We don't bother setting the hashjoin info if we're not going to - * need it. We do want to know about mergejoinable ops in all - * cases, however, because we use mergejoinable ops for other - * purposes such as detecting redundant clauses. - */ - check_mergejoinable(restrictinfo); - if (enable_hashjoin) - check_hashjoinable(restrictinfo); - - /* - * Add clause to the join lists of all the relevant relations. - */ - add_join_clause_to_rels(root, restrictinfo, relids); - - /* - * Add vars used in the join clause to targetlists of their - * relations, so that they will be emitted by the plan nodes that - * scan those relations (else they won't be available at the join - * node!). - */ - vars = pull_var_clause(clause, false); - add_vars_to_targetlist(root, vars, relids); - list_free(vars); - break; - default: - - /* - * 'clause' references no rels, and therefore we have no place to - * attach it. Shouldn't get here if callers are working properly. - */ - elog(ERROR, "cannot cope with variable-free clause"); - break; + add_vars_to_targetlist(root, vars, relids); + list_free(vars); } /* - * If the clause has a mergejoinable operator, we may be able to deduce - * more things from it under the principle of transitivity. + * We check "mergejoinability" of every clause, not only join clauses, + * because we want to know about equivalences between vars of the same + * relation, or between vars and consts. + */ + check_mergejoinable(restrictinfo); + + /* + * If it is a true equivalence clause, send it to the EquivalenceClass + * machinery. We do *not* attach it directly to any restriction or join + * lists. The EC code will propagate it to the appropriate places later. * - * If it is not an outer-join qualification nor bubbled up due to an outer - * join, then the two sides represent equivalent PathKeyItems for path - * keys: any path that is sorted by one side will also be sorted by the - * other (as soon as the two rels are joined, that is). Pass such clauses - * to add_equijoined_keys. + * If the clause has a mergejoinable operator and is not outerjoin-delayed, + * yet isn't an equivalence because it is an outer-join clause, the EC + * code may yet be able to do something with it. We add it to appropriate + * lists for further consideration later. Specifically: * - * If it is a left or right outer-join qualification that relates the two - * sides of the outer join (no funny business like leftvar1 = leftvar2 + - * rightvar), we add it to root->left_join_clauses or + * If it is a left or right outer-join qualification that relates the + * two sides of the outer join (no funny business like leftvar1 = + * leftvar2 + rightvar), we add it to root->left_join_clauses or * root->right_join_clauses according to which side the nonnullable * variable appears on. * * If it is a full outer-join qualification, we add it to * root->full_join_clauses. (Ideally we'd discard cases that aren't * leftvar = rightvar, as we do for left/right joins, but this routine - * doesn't have the info needed to do that; and the current usage of the - * full_join_clauses list doesn't require that, so it's not currently - * worth complicating this routine's API to make it possible.) + * doesn't have the info needed to do that; and the current usage of + * the full_join_clauses list doesn't require that, so it's not + * currently worth complicating this routine's API to make it possible.) + * + * If none of the above hold, pass it off to + * distribute_restrictinfo_to_rels(). */ - if (restrictinfo->mergejoinoperator != InvalidOid) + if (restrictinfo->mergeopfamilies) { - if (maybe_equijoin) - add_equijoined_keys(root, restrictinfo); + if (maybe_equivalence) + { + if (process_equivalence(root, restrictinfo, below_outer_join)) + return; + /* EC rejected it, so pass to distribute_restrictinfo_to_rels */ + } else if (maybe_outer_join && restrictinfo->can_join) { if (bms_is_subset(restrictinfo->left_relids, @@ -982,8 +874,9 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, /* we have outervar = innervar */ root->left_join_clauses = lappend(root->left_join_clauses, restrictinfo); + return; } - else if (bms_is_subset(restrictinfo->right_relids, + if (bms_is_subset(restrictinfo->right_relids, outerjoin_nonnullable) && !bms_overlap(restrictinfo->left_relids, outerjoin_nonnullable)) @@ -991,166 +884,213 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, /* we have innervar = outervar */ root->right_join_clauses = lappend(root->right_join_clauses, restrictinfo); + return; } - else if (bms_equal(outerjoin_nonnullable, qualscope)) + if (bms_equal(outerjoin_nonnullable, qualscope)) { /* FULL JOIN (above tests cannot match in this case) */ root->full_join_clauses = lappend(root->full_join_clauses, restrictinfo); + return; } } } + + /* No EC special case applies, so push it into the clause lists */ + distribute_restrictinfo_to_rels(root, restrictinfo); } /* - * process_implied_equality - * Check to see whether we already have a restrictinfo item that says - * item1 = item2, and create one if not; or if delete_it is true, - * remove any such restrictinfo item. + * check_outerjoin_delay + * Detect whether a qual referencing the given relids must be delayed + * in application due to the presence of a lower outer join. * - * This processing is a consequence of transitivity of mergejoin equality: - * if we have mergejoinable clauses A = B and B = C, we can deduce A = C - * (where = is an appropriate mergejoinable operator). See path/pathkeys.c - * for more details. + * If so, add relids to *relids_p to reflect the lowest safe level for + * evaluating the qual, and return TRUE. + * + * For a non-outer-join qual, we can evaluate the qual as soon as (1) we have + * all the rels it mentions, and (2) we are at or above any outer joins that + * can null any of these rels and are below the syntactic location of the + * given qual. We must enforce (2) because pushing down such a clause below + * the OJ might cause the OJ to emit null-extended rows that should not have + * been formed, or that should have been rejected by the clause. (This is + * only an issue for non-strict quals, since if we can prove a qual mentioning + * only nullable rels is strict, we'd have reduced the outer join to an inner + * join in reduce_outer_joins().) + * + * To enforce (2), scan the oj_info_list and merge the required-relid sets of + * any such OJs into the clause's own reference list. At the time we are + * called, the oj_info_list contains only outer joins below this qual. We + * have to repeat the scan until no new relids get added; this ensures that + * the qual is suitably delayed regardless of the order in which OJs get + * executed. As an example, if we have one OJ with LHS=A, RHS=B, and one with + * LHS=B, RHS=C, it is implied that these can be done in either order; if the + * B/C join is done first then the join to A can null C, so a qual actually + * mentioning only C cannot be applied below the join to A. + * + * For an outer-join qual, this isn't going to determine where we place the + * qual, but we need to determine outerjoin_delayed anyway so we can decide + * whether the qual is potentially useful for equivalence deductions. */ -void -process_implied_equality(PlannerInfo *root, - Node *item1, Node *item2, - Oid sortop1, Oid sortop2, - Relids item1_relids, Relids item2_relids, - bool delete_it) +static bool +check_outerjoin_delay(PlannerInfo *root, Relids *relids_p) { - Relids relids; - BMS_Membership membership; - RelOptInfo *rel1; - List *restrictlist; - ListCell *itm; - Oid ltype, - rtype; - Operator eq_operator; - Form_pg_operator pgopform; - Expr *clause; - - /* Get set of relids referenced in the two expressions */ - relids = bms_union(item1_relids, item2_relids); - membership = bms_membership(relids); - - /* - * generate_implied_equalities() shouldn't call me on two constants. - */ - Assert(membership != BMS_EMPTY_SET); - - /* - * If the exprs involve a single rel, we need to look at that rel's - * baserestrictinfo list. If multiple rels, we can scan the joininfo list - * of any of 'em. - */ - if (membership == BMS_SINGLETON) - { - rel1 = find_base_rel(root, bms_singleton_member(relids)); - restrictlist = rel1->baserestrictinfo; - } - else - { - Relids other_rels; - int first_rel; - - /* Copy relids, find and remove one member */ - other_rels = bms_copy(relids); - first_rel = bms_first_member(other_rels); - bms_free(other_rels); + Relids relids = *relids_p; + bool outerjoin_delayed; + bool found_some; - rel1 = find_base_rel(root, first_rel); - restrictlist = rel1->joininfo; - } + outerjoin_delayed = false; + do { + ListCell *l; - /* - * Scan to see if equality is already known. If so, we're done in the add - * case, and done after removing it in the delete case. - */ - foreach(itm, restrictlist) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(itm); - Node *left, - *right; - - if (restrictinfo->mergejoinoperator == InvalidOid) - continue; /* ignore non-mergejoinable clauses */ - /* We now know the restrictinfo clause is a binary opclause */ - left = get_leftop(restrictinfo->clause); - right = get_rightop(restrictinfo->clause); - if ((equal(item1, left) && equal(item2, right)) || - (equal(item2, left) && equal(item1, right))) + found_some = false; + foreach(l, root->oj_info_list) { - /* found a matching clause */ - if (delete_it) + OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l); + + /* do we reference any nullable rels of this OJ? */ + if (bms_overlap(relids, ojinfo->min_righthand) || + (ojinfo->is_full_join && + bms_overlap(relids, ojinfo->min_lefthand))) { - if (membership == BMS_SINGLETON) - { - /* delete it from local restrictinfo list */ - rel1->baserestrictinfo = list_delete_ptr(rel1->baserestrictinfo, - restrictinfo); - } - else + /* yes; have we included all its rels in relids? */ + if (!bms_is_subset(ojinfo->min_lefthand, relids) || + !bms_is_subset(ojinfo->min_righthand, relids)) { - /* let joininfo.c do it */ - remove_join_clause_from_rels(root, restrictinfo, relids); + /* no, so add them in */ + relids = bms_add_members(relids, ojinfo->min_lefthand); + relids = bms_add_members(relids, ojinfo->min_righthand); + outerjoin_delayed = true; + /* we'll need another iteration */ + found_some = true; } } - return; /* done */ } - } + } while (found_some); - /* Didn't find it. Done if deletion requested */ - if (delete_it) - return; + *relids_p = relids; + return outerjoin_delayed; +} - /* - * This equality is new information, so construct a clause representing it - * to add to the query data structures. - */ - ltype = exprType(item1); - rtype = exprType(item2); - eq_operator = compatible_oper(NULL, list_make1(makeString("=")), - ltype, rtype, - true, -1); - if (!HeapTupleIsValid(eq_operator)) +/* + * distribute_restrictinfo_to_rels + * Push a completed RestrictInfo into the proper restriction or join + * clause list(s). + * + * This is the last step of distribute_qual_to_rels() for ordinary qual + * clauses. Clauses that are interesting for equivalence-class processing + * are diverted to the EC machinery, but may ultimately get fed back here. + */ +void +distribute_restrictinfo_to_rels(PlannerInfo *root, + RestrictInfo *restrictinfo) +{ + Relids relids = restrictinfo->required_relids; + RelOptInfo *rel; + + switch (bms_membership(relids)) { - /* - * Would it be safe to just not add the equality to the query if we - * have no suitable equality operator for the combination of - * datatypes? NO, because sortkey selection may screw up anyway. - */ - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("could not identify an equality operator for types %s and %s", - format_type_be(ltype), format_type_be(rtype)))); + case BMS_SINGLETON: + + /* + * There is only one relation participating in the clause, so + * it is a restriction clause for that relation. + */ + rel = find_base_rel(root, bms_singleton_member(relids)); + + /* Add clause to rel's restriction list */ + rel->baserestrictinfo = lappend(rel->baserestrictinfo, + restrictinfo); + break; + case BMS_MULTIPLE: + + /* + * The clause is a join clause, since there is more than one rel + * in its relid set. + */ + + /* + * Check for hashjoinable operators. (We don't bother setting + * the hashjoin info if we're not going to need it.) + */ + if (enable_hashjoin) + check_hashjoinable(restrictinfo); + + /* + * Add clause to the join lists of all the relevant relations. + */ + add_join_clause_to_rels(root, restrictinfo, relids); + break; + default: + + /* + * clause references no rels, and therefore we have no place to + * attach it. Shouldn't get here if callers are working properly. + */ + elog(ERROR, "cannot cope with variable-free clause"); + break; } - pgopform = (Form_pg_operator) GETSTRUCT(eq_operator); +} - /* - * Let's just make sure this appears to be a compatible operator. - * - * XXX needs work - */ - if (pgopform->oprresult != BOOLOID) - ereport(ERROR, - (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), - errmsg("equality operator for types %s and %s should be merge-joinable, but isn't", - format_type_be(ltype), format_type_be(rtype)))); +/* + * process_implied_equality + * Create a restrictinfo item that says "item1 op item2", and push it + * into the appropriate lists. (In practice opno is always a btree + * equality operator.) + * + * "qualscope" is the nominal syntactic level to impute to the restrictinfo. + * This must contain at least all the rels used in the expressions, but it + * is used only to set the qual application level when both exprs are + * variable-free. Otherwise the qual is applied at the lowest join level + * that provides all its variables. + * + * "both_const" indicates whether both items are known pseudo-constant; + * in this case it is worth applying eval_const_expressions() in case we + * can produce constant TRUE or constant FALSE. (Otherwise it's not, + * because the expressions went through eval_const_expressions already.) + * + * This is currently used only when an EquivalenceClass is found to + * contain pseudoconstants. See path/pathkeys.c for more details. + */ +void +process_implied_equality(PlannerInfo *root, + Oid opno, + Expr *item1, + Expr *item2, + Relids qualscope, + bool below_outer_join, + bool both_const) +{ + Expr *clause; /* - * Now we can build the new clause. Copy to ensure it shares no - * substructure with original (this is necessary in case there are - * subselects in there...) + * Build the new clause. Copy to ensure it shares no substructure with + * original (this is necessary in case there are subselects in there...) */ - clause = make_opclause(oprid(eq_operator), /* opno */ + clause = make_opclause(opno, BOOLOID, /* opresulttype */ false, /* opretset */ (Expr *) copyObject(item1), (Expr *) copyObject(item2)); - ReleaseSysCache(eq_operator); + /* If both constant, try to reduce to a boolean constant. */ + if (both_const) + { + clause = (Expr *) eval_const_expressions((Node *) clause); + + /* If we produced const TRUE, just drop the clause */ + if (clause && IsA(clause, Const)) + { + Const *cclause = (Const *) clause; + + Assert(cclause->consttype == BOOLOID); + if (!cclause->constisnull && DatumGetBool(cclause->constvalue)) + return; + } + } + + /* Make a copy of qualscope to avoid problems if source EC changes */ + qualscope = bms_copy(qualscope); /* * Push the new clause into all the appropriate restrictinfo lists. @@ -1159,119 +1099,53 @@ process_implied_equality(PlannerInfo *root, * taken for an original JOIN/ON clause. */ distribute_qual_to_rels(root, (Node *) clause, - true, true, false, relids, NULL, NULL); + true, true, below_outer_join, + qualscope, NULL, NULL); } /* - * qual_is_redundant - * Detect whether an implied-equality qual that turns out to be a - * restriction clause for a single base relation is redundant with - * already-known restriction clauses for that rel. This occurs with, - * for example, - * SELECT * FROM tab WHERE f1 = f2 AND f2 = f3; - * We need to suppress the redundant condition to avoid computing - * too-small selectivity, not to mention wasting time at execution. + * build_implied_join_equality --- build a RestrictInfo for a derived equality * - * Note: quals of the form "var = const" are never considered redundant, - * only those of the form "var = var". This is needed because when we - * have constants in an implied-equality set, we use a different strategy - * that suppresses all "var = var" deductions. We must therefore keep - * all the "var = const" quals. + * This overlaps the functionality of process_implied_equality(), but we + * must return the RestrictInfo, not push it into the joininfo tree. */ -static bool -qual_is_redundant(PlannerInfo *root, - RestrictInfo *restrictinfo, - List *restrictlist) +RestrictInfo * +build_implied_join_equality(Oid opno, + Expr *item1, + Expr *item2, + Relids qualscope) { - Node *newleft; - Node *newright; - List *oldquals; - ListCell *olditem; - List *equalexprs; - bool someadded; - - /* Never redundant unless vars appear on both sides */ - if (bms_is_empty(restrictinfo->left_relids) || - bms_is_empty(restrictinfo->right_relids)) - return false; - - newleft = get_leftop(restrictinfo->clause); - newright = get_rightop(restrictinfo->clause); - - /* - * Set cached pathkeys. NB: it is okay to do this now because this - * routine is only invoked while we are generating implied equalities. - * Therefore, the equi_key_list is already complete and so we can - * correctly determine canonical pathkeys. - */ - cache_mergeclause_pathkeys(root, restrictinfo); - /* If different, say "not redundant" (should never happen) */ - if (restrictinfo->left_pathkey != restrictinfo->right_pathkey) - return false; + RestrictInfo *restrictinfo; + Expr *clause; /* - * Scan existing quals to find those referencing same pathkeys. Usually - * there will be few, if any, so build a list of just the interesting - * ones. + * Build the new clause. Copy to ensure it shares no substructure with + * original (this is necessary in case there are subselects in there...) */ - oldquals = NIL; - foreach(olditem, restrictlist) - { - RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem); + clause = make_opclause(opno, + BOOLOID, /* opresulttype */ + false, /* opretset */ + (Expr *) copyObject(item1), + (Expr *) copyObject(item2)); - if (oldrinfo->mergejoinoperator != InvalidOid) - { - cache_mergeclause_pathkeys(root, oldrinfo); - if (restrictinfo->left_pathkey == oldrinfo->left_pathkey && - restrictinfo->right_pathkey == oldrinfo->right_pathkey) - oldquals = lcons(oldrinfo, oldquals); - } - } - if (oldquals == NIL) - return false; + /* Make a copy of qualscope to avoid problems if source EC changes */ + qualscope = bms_copy(qualscope); /* - * Now, we want to develop a list of exprs that are known equal to the - * left side of the new qual. We traverse the old-quals list repeatedly - * to transitively expand the exprs list. If at any point we find we can - * reach the right-side expr of the new qual, we are done. We give up - * when we can't expand the equalexprs list any more. + * Build the RestrictInfo node itself. */ - equalexprs = list_make1(newleft); - do - { - someadded = false; - /* cannot use foreach here because of possible list_delete */ - olditem = list_head(oldquals); - while (olditem) - { - RestrictInfo *oldrinfo = (RestrictInfo *) lfirst(olditem); - Node *oldleft = get_leftop(oldrinfo->clause); - Node *oldright = get_rightop(oldrinfo->clause); - Node *newguy = NULL; - - /* must advance olditem before list_delete possibly pfree's it */ - olditem = lnext(olditem); - - if (list_member(equalexprs, oldleft)) - newguy = oldright; - else if (list_member(equalexprs, oldright)) - newguy = oldleft; - else - continue; - if (equal(newguy, newright)) - return true; /* we proved new clause is redundant */ - equalexprs = lcons(newguy, equalexprs); - someadded = true; - - /* - * Remove this qual from list, since we don't need it anymore. - */ - oldquals = list_delete_ptr(oldquals, oldrinfo); - } - } while (someadded); - - return false; /* it's not redundant */ + restrictinfo = make_restrictinfo(clause, + true, /* is_pushed_down */ + false, /* outerjoin_delayed */ + false, /* pseudoconstant */ + qualscope); + + /* Set mergejoinability info always, and hashjoinability if enabled */ + check_mergejoinable(restrictinfo); + if (enable_hashjoin) + check_hashjoinable(restrictinfo); + + return restrictinfo; } @@ -1294,10 +1168,7 @@ static void check_mergejoinable(RestrictInfo *restrictinfo) { Expr *clause = restrictinfo->clause; - Oid opno, - leftOp, - rightOp; - Oid opfamily; + Oid opno; if (restrictinfo->pseudoconstant) return; @@ -1310,16 +1181,13 @@ check_mergejoinable(RestrictInfo *restrictinfo) if (op_mergejoinable(opno) && !contain_volatile_functions((Node *) clause)) - { - /* XXX for the moment, continue to force use of particular sortops */ - if (get_op_mergejoin_info(opno, &leftOp, &rightOp, &opfamily)) - { - restrictinfo->mergejoinoperator = opno; - restrictinfo->left_sortop = leftOp; - restrictinfo->right_sortop = rightOp; - restrictinfo->mergeopfamily = opfamily; - } - } + restrictinfo->mergeopfamilies = get_mergejoin_opfamilies(opno); + + /* + * Note: op_mergejoinable is just a hint; if we fail to find the + * operator in any btree opfamilies, mergeopfamilies remains NIL + * and so the clause is not treated as mergejoinable. + */ } /* diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 4d8d31045d..de3944b6b0 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.98 2007/01/05 22:19:32 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.99 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -110,14 +110,14 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction, * for "simple" rels. * * NOTE: in_info_list and append_rel_list were set up by subquery_planner, - * do not touch here + * do not touch here; eq_classes may contain data already, too. */ root->simple_rel_array_size = list_length(parse->rtable) + 1; root->simple_rel_array = (RelOptInfo **) palloc0(root->simple_rel_array_size * sizeof(RelOptInfo *)); root->join_rel_list = NIL; root->join_rel_hash = NULL; - root->equi_key_list = NIL; + root->canon_pathkeys = NIL; root->left_join_clauses = NIL; root->right_join_clauses = NIL; root->full_join_clauses = NIL; @@ -165,8 +165,8 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction, * Examine the targetlist and qualifications, adding entries to baserel * targetlists for all referenced Vars. Restrict and join clauses are * added to appropriate lists belonging to the mentioned relations. We - * also build lists of equijoined keys for pathkey construction, and form - * a target joinlist for make_one_rel() to work from. + * also build EquivalenceClasses for provably equivalent expressions, + * and form a target joinlist for make_one_rel() to work from. * * Note: all subplan nodes will have "flat" (var-only) tlists. This * implies that all expression evaluations are done at the root of the @@ -179,16 +179,23 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction, joinlist = deconstruct_jointree(root); /* - * Use the completed lists of equijoined keys to deduce any implied but - * unstated equalities (for example, A=B and B=C imply A=C). + * Reconsider any postponed outer-join quals now that we have built up + * equivalence classes. (This could result in further additions or + * mergings of classes.) */ - generate_implied_equalities(root); + reconsider_outer_join_clauses(root); /* - * We should now have all the pathkey equivalence sets built, so it's now - * possible to convert the requested query_pathkeys to canonical form. - * Also canonicalize the groupClause and sortClause pathkeys for use - * later. + * If we formed any equivalence classes, generate additional restriction + * clauses as appropriate. (Implied join clauses are formed on-the-fly + * later.) + */ + generate_base_implied_equalities(root); + + /* + * We have completed merging equivalence sets, so it's now possible to + * convert the requested query_pathkeys to canonical form. Also + * canonicalize the groupClause and sortClause pathkeys for use later. */ root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys); root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 5fcfc58bf9..ae9c10fc6b 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.211 2007/01/10 18:06:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.212 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -206,6 +206,8 @@ subquery_planner(Query *parse, double tuple_fraction, /* Create a PlannerInfo data structure for this subquery */ root = makeNode(PlannerInfo); root->parse = parse; + root->planner_cxt = CurrentMemoryContext; + root->eq_classes = NIL; root->in_info_list = NIL; root->append_rel_list = NIL; @@ -715,9 +717,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * operation's result. We have to do this before overwriting the sort * key information... */ - current_pathkeys = make_pathkeys_for_sortclauses(set_sortclauses, - result_plan->targetlist); - current_pathkeys = canonicalize_pathkeys(root, current_pathkeys); + current_pathkeys = make_pathkeys_for_sortclauses(root, + set_sortclauses, + result_plan->targetlist, + true); /* * We should not need to call preprocess_targetlist, since we must be @@ -742,9 +745,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* * Calculate pathkeys that represent result ordering requirements */ - sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause, - tlist); - sort_pathkeys = canonicalize_pathkeys(root, sort_pathkeys); + sort_pathkeys = make_pathkeys_for_sortclauses(root, + parse->sortClause, + tlist, + true); } else { @@ -778,12 +782,18 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* * Calculate pathkeys that represent grouping/ordering requirements. * Stash them in PlannerInfo so that query_planner can canonicalize - * them. + * them after EquivalenceClasses have been formed. */ root->group_pathkeys = - make_pathkeys_for_sortclauses(parse->groupClause, tlist); + make_pathkeys_for_sortclauses(root, + parse->groupClause, + tlist, + false); root->sort_pathkeys = - make_pathkeys_for_sortclauses(parse->sortClause, tlist); + make_pathkeys_for_sortclauses(root, + parse->sortClause, + tlist, + false); /* * Will need actual number of aggregates for estimating costs. @@ -1069,10 +1079,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) { if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys)) { - result_plan = (Plan *) - make_sort_from_sortclauses(root, - parse->sortClause, - result_plan); + result_plan = (Plan *) make_sort_from_pathkeys(root, + result_plan, + sort_pathkeys); current_pathkeys = sort_pathkeys; } } diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 1c2253a779..ce6f9cc38e 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.45 2007/01/05 22:19:32 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.46 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -292,6 +292,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, */ subroot = makeNode(PlannerInfo); subroot->parse = subquery; + subroot->planner_cxt = CurrentMemoryContext; subroot->in_info_list = NIL; subroot->append_rel_list = NIL; diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index cd7952aefd..ec93b0dad0 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -22,7 +22,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.135 2007/01/05 22:19:32 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.136 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1195,10 +1195,8 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context) */ newinfo->eval_cost.startup = -1; newinfo->this_selec = -1; - newinfo->left_pathkey = NIL; - newinfo->right_pathkey = NIL; - newinfo->left_mergescansel = -1; - newinfo->right_mergescansel = -1; + newinfo->left_ec = NULL; + newinfo->right_ec = NULL; newinfo->left_bucketsize = -1; newinfo->right_bucketsize = -1; diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c index 2971282bf3..e58903c5d0 100644 --- a/src/backend/optimizer/util/joininfo.c +++ b/src/backend/optimizer/util/joininfo.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/joininfo.c,v 1.46 2007/01/05 22:19:32 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/joininfo.c,v 1.47 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" +#include "optimizer/paths.h" /* @@ -54,6 +55,13 @@ have_relevant_joinclause(PlannerInfo *root, } } + /* + * We also need to check the EquivalenceClass data structure, which + * might contain relationships not emitted into the joininfo lists. + */ + if (!result && rel1->has_eclass_joins && rel2->has_eclass_joins) + result = have_relevant_eclass_joinclause(root, rel1, rel2); + /* * It's possible that the rels correspond to the left and right sides * of a degenerate outer join, that is, one with no joinclause mentioning @@ -124,37 +132,3 @@ add_join_clause_to_rels(PlannerInfo *root, } bms_free(tmprelids); } - -/* - * remove_join_clause_from_rels - * Delete 'restrictinfo' from all the joininfo lists it is in - * - * This reverses the effect of add_join_clause_to_rels. It's used when we - * discover that a join clause is redundant. - * - * 'restrictinfo' describes the join clause - * 'join_relids' is the list of relations participating in the join clause - * (there must be more than one) - */ -void -remove_join_clause_from_rels(PlannerInfo *root, - RestrictInfo *restrictinfo, - Relids join_relids) -{ - Relids tmprelids; - int cur_relid; - - tmprelids = bms_copy(join_relids); - while ((cur_relid = bms_first_member(tmprelids)) >= 0) - { - RelOptInfo *rel = find_base_rel(root, cur_relid); - - /* - * Remove the restrictinfo from the list. Pointer comparison is - * sufficient. - */ - Assert(list_member_ptr(rel->joininfo, restrictinfo)); - rel->joininfo = list_delete_ptr(rel->joininfo, restrictinfo); - } - bms_free(tmprelids); -} diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 8ee6346e5b..5832d145ef 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.136 2007/01/10 18:06:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.137 2007/01/20 20:45:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,7 +26,6 @@ #include "parser/parse_expr.h" #include "parser/parse_oper.h" #include "parser/parsetree.h" -#include "utils/memutils.h" #include "utils/selfuncs.h" #include "utils/lsyscache.h" #include "utils/syscache.h" @@ -747,11 +746,11 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) return (UniquePath *) rel->cheapest_unique_path; /* - * We must ensure path struct is allocated in same context as parent rel; + * We must ensure path struct is allocated in main planning context; * otherwise GEQO memory management causes trouble. (Compare * best_inner_indexscan().) */ - oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel)); + oldcontext = MemoryContextSwitchTo(root->planner_cxt); pathnode = makeNode(UniquePath); @@ -1198,11 +1197,6 @@ create_nestloop_path(PlannerInfo *root, * 'pathkeys' are the path keys of the new join path * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses * (this should be a subset of the restrict_clauses list) - * 'mergefamilies' are the btree opfamily OIDs identifying the merge - * ordering for each merge clause - * 'mergestrategies' are the btree operator strategies identifying the merge - * ordering for each merge clause - * 'mergenullsfirst' are the nulls first/last flags for each merge clause * 'outersortkeys' are the sort varkeys for the outer relation * 'innersortkeys' are the sort varkeys for the inner relation */ @@ -1215,9 +1209,6 @@ create_mergejoin_path(PlannerInfo *root, List *restrict_clauses, List *pathkeys, List *mergeclauses, - Oid *mergefamilies, - int *mergestrategies, - bool *mergenullsfirst, List *outersortkeys, List *innersortkeys) { @@ -1258,9 +1249,6 @@ create_mergejoin_path(PlannerInfo *root, pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->jpath.path.pathkeys = pathkeys; pathnode->path_mergeclauses = mergeclauses; - pathnode->path_mergeFamilies = mergefamilies; - pathnode->path_mergeStrategies = mergestrategies; - pathnode->path_mergeNullsFirst = mergenullsfirst; pathnode->outersortkeys = outersortkeys; pathnode->innersortkeys = innersortkeys; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 86d8107e63..8cc6c81746 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.84 2007/01/05 22:19:33 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.85 2007/01/20 20:45:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #include "optimizer/cost.h" #include "optimizer/pathnode.h" +#include "optimizer/paths.h" #include "optimizer/plancat.h" #include "optimizer/restrictinfo.h" #include "parser/parsetree.h" @@ -31,17 +32,18 @@ typedef struct JoinHashEntry static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *input_rel); static List *build_joinrel_restrictlist(PlannerInfo *root, - RelOptInfo *joinrel, - RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - JoinType jointype); + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); static void build_joinrel_joinlist(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel); static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel, - List *joininfo_list); -static void subbuild_joinrel_joinlist(RelOptInfo *joinrel, - List *joininfo_list); + List *joininfo_list, + List *new_restrictlist); +static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel, + List *joininfo_list, + List *new_joininfo); /* @@ -84,6 +86,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->baserestrictcost.startup = 0; rel->baserestrictcost.per_tuple = 0; rel->joininfo = NIL; + rel->has_eclass_joins = false; rel->index_outer_relids = NULL; rel->index_inner_paths = NIL; @@ -303,8 +306,7 @@ build_join_rel(PlannerInfo *root, *restrictlist_ptr = build_joinrel_restrictlist(root, joinrel, outer_rel, - inner_rel, - jointype); + inner_rel); return joinrel; } @@ -335,6 +337,7 @@ build_join_rel(PlannerInfo *root, joinrel->baserestrictcost.startup = 0; joinrel->baserestrictcost.per_tuple = 0; joinrel->joininfo = NIL; + joinrel->has_eclass_joins = false; joinrel->index_outer_relids = NULL; joinrel->index_inner_paths = NIL; @@ -354,15 +357,18 @@ build_join_rel(PlannerInfo *root, * caller might or might not need the restrictlist, but I need it anyway * for set_joinrel_size_estimates().) */ - restrictlist = build_joinrel_restrictlist(root, - joinrel, - outer_rel, - inner_rel, - jointype); + restrictlist = build_joinrel_restrictlist(root, joinrel, + outer_rel, inner_rel); if (restrictlist_ptr) *restrictlist_ptr = restrictlist; build_joinrel_joinlist(joinrel, outer_rel, inner_rel); + /* + * This is also the right place to check whether the joinrel has any + * pending EquivalenceClass joins. + */ + joinrel->has_eclass_joins = has_relevant_eclass_joinclause(root, joinrel); + /* * Set estimates of the joinrel's size. */ @@ -468,15 +474,15 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, * join paths made from this pair of sub-relations. (It will not need to * be considered further up the join tree.) * - * When building a restriction list, we eliminate redundant clauses. - * We don't try to do that for join clause lists, since the join clauses - * aren't really doing anything, just waiting to become part of higher - * levels' restriction lists. + * In many case we will find the same RestrictInfos in both input + * relations' joinlists, so be careful to eliminate duplicates. + * Pointer equality should be a sufficient test for dups, since all + * the various joinlist entries ultimately refer to RestrictInfos + * pushed into them by distribute_restrictinfo_to_rels(). * * 'joinrel' is a join relation node * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined * to form joinrel. - * 'jointype' is the type of join used. * * build_joinrel_restrictlist() returns a list of relevant restrictinfos, * whereas build_joinrel_joinlist() stores its results in the joinrel's @@ -491,33 +497,27 @@ static List * build_joinrel_restrictlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outer_rel, - RelOptInfo *inner_rel, - JoinType jointype) + RelOptInfo *inner_rel) { List *result; - List *rlist; /* - * Collect all the clauses that syntactically belong at this level. + * Collect all the clauses that syntactically belong at this level, + * eliminating any duplicates (important since we will see many of the + * same clauses arriving from both input relations). */ - rlist = list_concat(subbuild_joinrel_restrictlist(joinrel, - outer_rel->joininfo), - subbuild_joinrel_restrictlist(joinrel, - inner_rel->joininfo)); - + result = subbuild_joinrel_restrictlist(joinrel, outer_rel->joininfo, NIL); + result = subbuild_joinrel_restrictlist(joinrel, inner_rel->joininfo, result); /* - * Eliminate duplicate and redundant clauses. - * - * We must eliminate duplicates, since we will see many of the same - * clauses arriving from both input relations. Also, if a clause is a - * mergejoinable clause, it's possible that it is redundant with previous - * clauses (see optimizer/README for discussion). We detect that case and - * omit the redundant clause from the result list. + * Add on any clauses derived from EquivalenceClasses. These cannot be + * redundant with the clauses in the joininfo lists, so don't bother + * checking. */ - result = remove_redundant_join_clauses(root, rlist, - IS_OUTER_JOIN(jointype)); - - list_free(rlist); + result = list_concat(result, + generate_join_implied_equalities(root, + joinrel, + outer_rel, + inner_rel)); return result; } @@ -527,15 +527,24 @@ build_joinrel_joinlist(RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel) { - subbuild_joinrel_joinlist(joinrel, outer_rel->joininfo); - subbuild_joinrel_joinlist(joinrel, inner_rel->joininfo); + List *result; + + /* + * Collect all the clauses that syntactically belong above this level, + * eliminating any duplicates (important since we will see many of the + * same clauses arriving from both input relations). + */ + result = subbuild_joinrel_joinlist(joinrel, outer_rel->joininfo, NIL); + result = subbuild_joinrel_joinlist(joinrel, inner_rel->joininfo, result); + + joinrel->joininfo = result; } static List * subbuild_joinrel_restrictlist(RelOptInfo *joinrel, - List *joininfo_list) + List *joininfo_list, + List *new_restrictlist) { - List *restrictlist = NIL; ListCell *l; foreach(l, joininfo_list) @@ -546,10 +555,12 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel, { /* * This clause becomes a restriction clause for the joinrel, since - * it refers to no outside rels. We don't bother to check for - * duplicates here --- build_joinrel_restrictlist will do that. + * it refers to no outside rels. Add it to the list, being + * careful to eliminate duplicates. (Since RestrictInfo nodes in + * different joinlists will have been multiply-linked rather than + * copied, pointer equality should be a sufficient test.) */ - restrictlist = lappend(restrictlist, rinfo); + new_restrictlist = list_append_unique_ptr(new_restrictlist, rinfo); } else { @@ -560,12 +571,13 @@ subbuild_joinrel_restrictlist(RelOptInfo *joinrel, } } - return restrictlist; + return new_restrictlist; } -static void +static List * subbuild_joinrel_joinlist(RelOptInfo *joinrel, - List *joininfo_list) + List *joininfo_list, + List *new_joininfo) { ListCell *l; @@ -585,15 +597,14 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel, { /* * This clause is still a join clause at this level, so add it to - * the joininfo list for the joinrel, being careful to eliminate - * duplicates. (Since RestrictInfo nodes are normally - * multiply-linked rather than copied, pointer equality should be - * a sufficient test. If two equal() nodes should happen to sneak - * in, no great harm is done --- they'll be detected by - * redundant-clause testing when they reach a restriction list.) + * the new joininfo list, being careful to eliminate + * duplicates. (Since RestrictInfo nodes in different joinlists + * will have been multiply-linked rather than copied, pointer + * equality should be a sufficient test.) */ - joinrel->joininfo = list_append_unique_ptr(joinrel->joininfo, - rinfo); + new_joininfo = list_append_unique_ptr(new_joininfo, rinfo); } } + + return new_joininfo; } diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index e35d00bff7..ea8bb5c970 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.51 2007/01/05 22:19:33 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.52 2007/01/20 20:45:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,10 +33,9 @@ static Expr *make_sub_restrictinfos(Expr *clause, bool outerjoin_delayed, bool pseudoconstant, Relids required_relids); -static RestrictInfo *join_clause_is_redundant(PlannerInfo *root, +static bool join_clause_is_redundant(PlannerInfo *root, RestrictInfo *rinfo, - List *reference_list, - bool isouterjoin); + List *reference_list); /* @@ -336,19 +335,17 @@ make_restrictinfo_internal(Expr *clause, * that happens only if it appears in the right context (top level of a * joinclause list). */ + restrictinfo->parent_ec = NULL; + restrictinfo->eval_cost.startup = -1; restrictinfo->this_selec = -1; - restrictinfo->mergejoinoperator = InvalidOid; - restrictinfo->left_sortop = InvalidOid; - restrictinfo->right_sortop = InvalidOid; - restrictinfo->mergeopfamily = InvalidOid; + restrictinfo->mergeopfamilies = NIL; - restrictinfo->left_pathkey = NIL; - restrictinfo->right_pathkey = NIL; + restrictinfo->left_ec = NULL; + restrictinfo->right_ec = NULL; - restrictinfo->left_mergescansel = -1; - restrictinfo->right_mergescansel = -1; + restrictinfo->outer_is_left = false; restrictinfo->hashjoinoperator = InvalidOid; @@ -529,78 +526,18 @@ extract_actual_join_clauses(List *restrictinfo_list, } } -/* - * remove_redundant_join_clauses - * - * Given a list of RestrictInfo clauses that are to be applied in a join, - * remove any duplicate or redundant clauses. - * - * We must eliminate duplicates when forming the restrictlist for a joinrel, - * since we will see many of the same clauses arriving from both input - * relations. Also, if a clause is a mergejoinable clause, it's possible that - * it is redundant with previous clauses (see optimizer/README for - * discussion). We detect that case and omit the redundant clause from the - * result list. - * - * The result is a fresh List, but it points to the same member nodes - * as were in the input. - */ -List * -remove_redundant_join_clauses(PlannerInfo *root, List *restrictinfo_list, - bool isouterjoin) -{ - List *result = NIL; - ListCell *item; - QualCost cost; - - /* - * If there are any redundant clauses, we want to eliminate the ones that - * are more expensive in favor of the ones that are less so. Run - * cost_qual_eval() to ensure the eval_cost fields are set up. - */ - cost_qual_eval(&cost, restrictinfo_list); - - /* - * We don't have enough knowledge yet to be able to estimate the number of - * times a clause might be evaluated, so it's hard to weight the startup - * and per-tuple costs appropriately. For now just weight 'em the same. - */ -#define CLAUSECOST(r) ((r)->eval_cost.startup + (r)->eval_cost.per_tuple) - - foreach(item, restrictinfo_list) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(item); - RestrictInfo *prevrinfo; - - /* is it redundant with any prior clause? */ - prevrinfo = join_clause_is_redundant(root, rinfo, result, isouterjoin); - if (prevrinfo == NULL) - { - /* no, so add it to result list */ - result = lappend(result, rinfo); - } - else if (CLAUSECOST(rinfo) < CLAUSECOST(prevrinfo)) - { - /* keep this one, drop the previous one */ - result = list_delete_ptr(result, prevrinfo); - result = lappend(result, rinfo); - } - /* else, drop this one */ - } - - return result; -} - /* * select_nonredundant_join_clauses * * Given a list of RestrictInfo clauses that are to be applied in a join, * select the ones that are not redundant with any clause in the - * reference_list. + * reference_list. This is used only for nestloop-with-inner-indexscan + * joins: any clauses being checked by the index should be removed from + * the qpquals list. * - * This is similar to remove_redundant_join_clauses, but we are looking for - * redundancies with a separate list of clauses (i.e., clauses that have - * already been applied below the join itself). + * "Redundant" means either equal() or derived from the same EquivalenceClass. + * We have to check the latter because indxqual.c may select different derived + * clauses than were selected by generate_join_implied_equalities(). * * Note that we assume the given restrictinfo_list has already been checked * for local redundancies, so we don't check again. @@ -608,8 +545,7 @@ remove_redundant_join_clauses(PlannerInfo *root, List *restrictinfo_list, List * select_nonredundant_join_clauses(PlannerInfo *root, List *restrictinfo_list, - List *reference_list, - bool isouterjoin) + List *reference_list) { List *result = NIL; ListCell *item; @@ -619,7 +555,7 @@ select_nonredundant_join_clauses(PlannerInfo *root, RestrictInfo *rinfo = (RestrictInfo *) lfirst(item); /* drop it if redundant with any reference clause */ - if (join_clause_is_redundant(root, rinfo, reference_list, isouterjoin) != NULL) + if (join_clause_is_redundant(root, rinfo, reference_list)) continue; /* otherwise, add it to result list */ @@ -631,79 +567,28 @@ select_nonredundant_join_clauses(PlannerInfo *root, /* * join_clause_is_redundant - * If rinfo is redundant with any clause in reference_list, - * return one such clause; otherwise return NULL. - * - * This is the guts of both remove_redundant_join_clauses and - * select_nonredundant_join_clauses. See the docs above for motivation. - * - * We can detect redundant mergejoinable clauses very cheaply by using their - * left and right pathkeys, which uniquely identify the sets of equijoined - * variables in question. All the members of a pathkey set that are in the - * left relation have already been forced to be equal; likewise for those in - * the right relation. So, we need to have only one clause that checks - * equality between any set member on the left and any member on the right; - * by transitivity, all the rest are then equal. - * - * However, clauses that are of the form "var expr = const expr" cannot be - * eliminated as redundant. This is because when there are const expressions - * in a pathkey set, generate_implied_equalities() suppresses "var = var" - * clauses in favor of "var = const" clauses. We cannot afford to drop any - * of the latter, even though they might seem redundant by the pathkey - * membership test. - * - * Weird special case: if we have two clauses that seem redundant - * except one is pushed down into an outer join and the other isn't, - * then they're not really redundant, because one constrains the - * joined rows after addition of null fill rows, and the other doesn't. + * Test whether rinfo is redundant with any clause in reference_list. */ -static RestrictInfo * +static bool join_clause_is_redundant(PlannerInfo *root, RestrictInfo *rinfo, - List *reference_list, - bool isouterjoin) + List *reference_list) { ListCell *refitem; - /* always consider exact duplicates redundant */ foreach(refitem, reference_list) { RestrictInfo *refrinfo = (RestrictInfo *) lfirst(refitem); + /* always consider exact duplicates redundant */ if (equal(rinfo, refrinfo)) - return refrinfo; - } - - /* check for redundant merge clauses */ - if (rinfo->mergejoinoperator != InvalidOid) - { - /* do the cheap test first: is it a "var = const" clause? */ - if (bms_is_empty(rinfo->left_relids) || - bms_is_empty(rinfo->right_relids)) - return NULL; /* var = const, so not redundant */ - - cache_mergeclause_pathkeys(root, rinfo); - - foreach(refitem, reference_list) - { - RestrictInfo *refrinfo = (RestrictInfo *) lfirst(refitem); + return true; - if (refrinfo->mergejoinoperator != InvalidOid) - { - cache_mergeclause_pathkeys(root, refrinfo); - - if (rinfo->left_pathkey == refrinfo->left_pathkey && - rinfo->right_pathkey == refrinfo->right_pathkey && - (rinfo->is_pushed_down == refrinfo->is_pushed_down || - !isouterjoin)) - { - /* Yup, it's redundant */ - return refrinfo; - } - } - } + /* check if derived from same EquivalenceClass */ + if (rinfo->parent_ec != NULL && + rinfo->parent_ec == refrinfo->parent_ec) + return true; } - /* otherwise, not redundant */ - return NULL; + return false; } diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 9a6b8e8907..961b78aa20 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.75 2007/01/05 22:19:33 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_agg.c,v 1.76 2007/01/20 20:45:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -171,6 +171,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry) { root = makeNode(PlannerInfo); root->parse = qry; + root->planner_cxt = CurrentMemoryContext; root->hasJoinRTEs = true; groupClauses = (List *) flatten_join_alias_vars(root, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 875c7c524a..493df17b6c 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.219 2007/01/09 02:14:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.220 2007/01/20 20:45:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2345,7 +2345,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, * expressional index for which we have statistics, then we treat the * whole expression as though it were just a Var. * 2. If the list contains Vars of different relations that are known equal - * due to equijoin clauses, then drop all but one of the Vars from each + * due to equivalence classes, then drop all but one of the Vars from each * known-equal set, keeping the one with smallest estimated # of values * (since the extra values of the others can't appear in joined rows). * Note the reason we only consider Vars of different relations is that @@ -2365,10 +2365,9 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, * 4. If there are Vars from multiple rels, we repeat step 3 for each such * rel, and multiply the results together. * Note that rels not containing grouped Vars are ignored completely, as are - * join clauses other than the equijoin clauses used in step 2. Such rels - * cannot increase the number of groups, and we assume such clauses do not - * reduce the number either (somewhat bogus, but we don't have the info to - * do better). + * join clauses. Such rels cannot increase the number of groups, and we + * assume such clauses do not reduce the number either (somewhat bogus, + * but we don't have the info to do better). */ double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows) diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 40a74594fa..8988d2e10f 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.143 2007/01/10 18:06:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/lsyscache.c,v 1.144 2007/01/20 20:45:40 tgl Exp $ * * NOTES * Eventually, the index information should go through here, too. @@ -138,153 +138,6 @@ get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, return result; } -/* - * get_op_mergejoin_info - * Given the OIDs of a (putatively) mergejoinable equality operator - * and a sortop defining the sort ordering of the lefthand input of - * the merge clause, determine whether this sort ordering is actually - * usable for merging. If so, return the required sort ordering op - * for the righthand input, as well as the btree opfamily OID containing - * these operators and the operator strategy number of the two sortops - * (either BTLessStrategyNumber or BTGreaterStrategyNumber). - * - * We can mergejoin if we find the two operators in the same opfamily as - * equality and either less-than or greater-than respectively. If there - * are multiple such opfamilies, assume we can use any one. - */ -#ifdef NOT_YET -/* eventually should look like this */ -bool -get_op_mergejoin_info(Oid eq_op, Oid left_sortop, - Oid *right_sortop, Oid *opfamily, int *opstrategy) -{ - bool result = false; - Oid lefttype; - Oid righttype; - CatCList *catlist; - int i; - - /* Make sure output args are initialized even on failure */ - *right_sortop = InvalidOid; - *opfamily = InvalidOid; - *opstrategy = 0; - - /* Need the righthand input datatype */ - op_input_types(eq_op, &lefttype, &righttype); - - /* - * Search through all the pg_amop entries containing the equality operator - */ - catlist = SearchSysCacheList(AMOPOPID, 1, - ObjectIdGetDatum(eq_op), - 0, 0, 0); - - for (i = 0; i < catlist->n_members; i++) - { - HeapTuple op_tuple = &catlist->members[i]->tuple; - Form_pg_amop op_form = (Form_pg_amop) GETSTRUCT(op_tuple); - Oid opfamily_id; - StrategyNumber op_strategy; - - /* must be btree */ - if (op_form->amopmethod != BTREE_AM_OID) - continue; - /* must use the operator as equality */ - if (op_form->amopstrategy != BTEqualStrategyNumber) - continue; - - /* See if sort operator is also in this opfamily with OK semantics */ - opfamily_id = op_form->amopfamily; - op_strategy = get_op_opfamily_strategy(left_sortop, opfamily_id); - if (op_strategy == BTLessStrategyNumber || - op_strategy == BTGreaterStrategyNumber) - { - /* Yes, so find the corresponding righthand sortop */ - *right_sortop = get_opfamily_member(opfamily_id, - righttype, - righttype, - op_strategy); - if (OidIsValid(*right_sortop)) - { - /* Found a workable mergejoin semantics */ - *opfamily = opfamily_id; - *opstrategy = op_strategy; - result = true; - break; - } - } - } - - ReleaseSysCacheList(catlist); - - return result; -} -#else -/* temp implementation until planner gets smarter: left_sortop is output */ -bool -get_op_mergejoin_info(Oid eq_op, Oid *left_sortop, - Oid *right_sortop, Oid *opfamily) -{ - bool result = false; - Oid lefttype; - Oid righttype; - CatCList *catlist; - int i; - - /* Make sure output args are initialized even on failure */ - *left_sortop = InvalidOid; - *right_sortop = InvalidOid; - *opfamily = InvalidOid; - - /* Need the input datatypes */ - op_input_types(eq_op, &lefttype, &righttype); - - /* - * Search through all the pg_amop entries containing the equality operator - */ - catlist = SearchSysCacheList(AMOPOPID, 1, - ObjectIdGetDatum(eq_op), - 0, 0, 0); - - for (i = 0; i < catlist->n_members; i++) - { - HeapTuple op_tuple = &catlist->members[i]->tuple; - Form_pg_amop op_form = (Form_pg_amop) GETSTRUCT(op_tuple); - Oid opfamily_id; - - /* must be btree */ - if (op_form->amopmethod != BTREE_AM_OID) - continue; - /* must use the operator as equality */ - if (op_form->amopstrategy != BTEqualStrategyNumber) - continue; - - opfamily_id = op_form->amopfamily; - - /* Find the matching sortops */ - *left_sortop = get_opfamily_member(opfamily_id, - lefttype, - lefttype, - BTLessStrategyNumber); - *right_sortop = get_opfamily_member(opfamily_id, - righttype, - righttype, - BTLessStrategyNumber); - if (OidIsValid(*left_sortop) && OidIsValid(*right_sortop)) - { - /* Found a workable mergejoin semantics */ - *opfamily = opfamily_id; - result = true; - break; - } - } - - ReleaseSysCacheList(catlist); - - return result; -} -#endif - /* * get_compare_function_for_ordering_op * Get the OID of the datatype-specific btree comparison function @@ -469,6 +322,56 @@ get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type) return result; } +/* + * get_mergejoin_opfamilies + * Given a putatively mergejoinable operator, return a list of the OIDs + * of the btree opfamilies in which it represents equality. + * + * It is possible (though at present unusual) for an operator to be equality + * in more than one opfamily, hence the result is a list. This also lets us + * return NIL if the operator is not found in any opfamilies. + * + * The planner currently uses simple equal() tests to compare the lists + * returned by this function, which makes the list order relevant, though + * strictly speaking it should not be. Because of the way syscache list + * searches are handled, in normal operation the result will be sorted by OID + * so everything works fine. If running with system index usage disabled, + * the result ordering is unspecified and hence the planner might fail to + * recognize optimization opportunities ... but that's hardly a scenario in + * which performance is good anyway, so there's no point in expending code + * or cycles here to guarantee the ordering in that case. + */ +List * +get_mergejoin_opfamilies(Oid opno) +{ + List *result = NIL; + CatCList *catlist; + int i; + + /* + * Search pg_amop to see if the target operator is registered as the "=" + * operator of any btree opfamily. + */ + catlist = SearchSysCacheList(AMOPOPID, 1, + ObjectIdGetDatum(opno), + 0, 0, 0); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + /* must be btree equality */ + if (aform->amopmethod == BTREE_AM_OID && + aform->amopstrategy == BTEqualStrategyNumber) + result = lappend_oid(result, aform->amopfamily); + } + + ReleaseSysCacheList(catlist); + + return result; +} + /* * get_compatible_hash_operator * Get the OID of a hash equality operator compatible with the given diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index be151ac9f3..d3e84bdf69 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.191 2007/01/05 22:19:55 momjian Exp $ + * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.192 2007/01/20 20:45:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -190,7 +190,9 @@ typedef enum NodeTag T_ResultPath, T_MaterialPath, T_UniquePath, - T_PathKeyItem, + T_EquivalenceClass, + T_EquivalenceMember, + T_PathKey, T_RestrictInfo, T_InnerIndexscanInfo, T_OuterJoinInfo, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index f790037739..a83a20d21b 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.132 2007/01/10 18:06:04 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.133 2007/01/20 20:45:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -69,7 +69,7 @@ typedef struct PlannerInfo * does not correspond to a base relation, such as a join RTE or an * unreferenced view RTE; or if the RelOptInfo hasn't been made yet. */ - struct RelOptInfo **simple_rel_array; /* All 1-relation RelOptInfos */ + struct RelOptInfo **simple_rel_array; /* All 1-rel RelOptInfos */ int simple_rel_array_size; /* allocated size of array */ /* @@ -84,18 +84,20 @@ typedef struct PlannerInfo List *join_rel_list; /* list of join-relation RelOptInfos */ struct HTAB *join_rel_hash; /* optional hashtable for join relations */ - List *equi_key_list; /* list of lists of equijoined PathKeyItems */ + List *eq_classes; /* list of active EquivalenceClasses */ - List *left_join_clauses; /* list of RestrictInfos for outer - * join clauses w/nonnullable var on - * left */ + List *canon_pathkeys; /* list of "canonical" PathKeys */ - List *right_join_clauses; /* list of RestrictInfos for outer - * join clauses w/nonnullable var on - * right */ + List *left_join_clauses; /* list of RestrictInfos for + * mergejoinable outer join clauses + * w/nonnullable var on left */ - List *full_join_clauses; /* list of RestrictInfos for full - * outer join clauses */ + List *right_join_clauses; /* list of RestrictInfos for + * mergejoinable outer join clauses + * w/nonnullable var on right */ + + List *full_join_clauses; /* list of RestrictInfos for + * mergejoinable full join clauses */ List *oj_info_list; /* list of OuterJoinInfos */ @@ -109,6 +111,8 @@ typedef struct PlannerInfo List *group_pathkeys; /* groupClause pathkeys, if any */ List *sort_pathkeys; /* sortClause pathkeys, if any */ + MemoryContext planner_cxt; /* context holding PlannerInfo */ + double total_table_pages; /* # of pages in all tables of query */ double tuple_fraction; /* tuple_fraction passed to query_planner */ @@ -209,7 +213,10 @@ typedef struct PlannerInfo * baserestrictcost - Estimated cost of evaluating the baserestrictinfo * clauses at a single tuple (only used for base rels) * joininfo - List of RestrictInfo nodes, containing info about each - * join clause in which this relation participates + * join clause in which this relation participates (but + * note this excludes clauses that might be derivable from + * EquivalenceClasses) + * has_eclass_joins - flag that EquivalenceClass joins are possible * index_outer_relids - only used for base rels; set of outer relids * that participate in indexable joinclauses for this rel * index_inner_paths - only used for base rels; list of InnerIndexscanInfo @@ -278,6 +285,7 @@ typedef struct RelOptInfo QualCost baserestrictcost; /* cost of evaluating the above */ List *joininfo; /* RestrictInfo structures for join clauses * involving this rel */ + bool has_eclass_joins; /* T means joininfo is incomplete */ /* cached info about inner indexscan paths for relation: */ Relids index_outer_relids; /* other relids in indexable join @@ -348,32 +356,107 @@ typedef struct IndexOptInfo } IndexOptInfo; +/* + * EquivalenceClasses + * + * Whenever we can determine that a mergejoinable equality clause A = B is + * not delayed by any outer join, we create an EquivalenceClass containing + * the expressions A and B to record this knowledge. If we later find another + * equivalence B = C, we add C to the existing EquivalenceClass; this may + * require merging two existing EquivalenceClasses. At the end of the qual + * distribution process, we have sets of values that are known all transitively + * equal to each other, where "equal" is according to the rules of the btree + * operator family(s) shown in ec_opfamilies. (We restrict an EC to contain + * only equalities whose operators belong to the same set of opfamilies. This + * could probably be relaxed, but for now it's not worth the trouble, since + * nearly all equality operators belong to only one btree opclass anyway.) + * + * We also use EquivalenceClasses as the base structure for PathKeys, letting + * us represent knowledge about different sort orderings being equivalent. + * Since every PathKey must reference an EquivalenceClass, we will end up + * with single-member EquivalenceClasses whenever a sort key expression has + * not been equivalenced to anything else. It is also possible that such an + * EquivalenceClass will contain a volatile expression ("ORDER BY random()"), + * which is a case that can't arise otherwise since clauses containing + * volatile functions are never considered mergejoinable. We mark such + * EquivalenceClasses specially to prevent them from being merged with + * ordinary EquivalenceClasses. + * + * We allow equality clauses appearing below the nullable side of an outer join + * to form EquivalenceClasses, but these have a slightly different meaning: + * the included values might be all NULL rather than all the same non-null + * values. See src/backend/optimizer/README for more on that point. + * + * NB: if ec_merged isn't NULL, this class has been merged into another, and + * should be ignored in favor of using the pointed-to class. + */ +typedef struct EquivalenceClass +{ + NodeTag type; + + List *ec_opfamilies; /* btree operator family OIDs */ + List *ec_members; /* list of EquivalenceMembers */ + List *ec_sources; /* list of generating RestrictInfos */ + Relids ec_relids; /* all relids appearing in ec_members */ + bool ec_has_const; /* any pseudoconstants in ec_members? */ + bool ec_has_volatile; /* the (sole) member is a volatile expr */ + bool ec_below_outer_join; /* equivalence applies below an OJ */ + bool ec_broken; /* failed to generate needed clauses? */ + struct EquivalenceClass *ec_merged; /* set if merged into another EC */ +} EquivalenceClass; + +/* + * EquivalenceMember - one member expression of an EquivalenceClass + * + * em_is_child signifies that this element was built by transposing a member + * for an inheritance parent relation to represent the corresponding expression + * on an inheritance child. The element should be ignored for all purposes + * except constructing inner-indexscan paths for the child relation. (Other + * types of join are driven from transposed joininfo-list entries.) Note + * that the EC's ec_relids field does NOT include the child relation. + * + * em_datatype is usually the same as exprType(em_expr), but can be + * different when dealing with a binary-compatible opfamily; in particular + * anyarray_ops would never work without this. Use em_datatype when + * looking up a specific btree operator to work with this expression. + */ +typedef struct EquivalenceMember +{ + NodeTag type; + + Expr *em_expr; /* the expression represented */ + Relids em_relids; /* all relids appearing in em_expr */ + bool em_is_const; /* expression is pseudoconstant? */ + bool em_is_child; /* derived version for a child relation? */ + Oid em_datatype; /* the "nominal type" used by the opfamily */ +} EquivalenceMember; + /* * PathKeys * - * The sort ordering of a path is represented by a list of sublists of - * PathKeyItem nodes. An empty list implies no known ordering. Otherwise - * the first sublist represents the primary sort key, the second the - * first secondary sort key, etc. Each sublist contains one or more - * PathKeyItem nodes, each of which can be taken as the attribute that - * appears at that sort position. (See optimizer/README for more - * information.) + * The sort ordering of a path is represented by a list of PathKey nodes. + * An empty list implies no known ordering. Otherwise the first item + * represents the primary sort key, the second the first secondary sort key, + * etc. The value being sorted is represented by linking to an + * EquivalenceClass containing that value and including pk_opfamily among its + * ec_opfamilies. This is a convenient method because it makes it trivial + * to detect equivalent and closely-related orderings. (See optimizer/README + * for more information.) + * + * Note: pk_strategy is either BTLessStrategyNumber (for ASC) or + * BTGreaterStrategyNumber (for DESC). We assume that all ordering-capable + * index types will use btree-compatible strategy numbers. */ -typedef struct PathKeyItem +typedef struct PathKey { NodeTag type; - Node *key; /* the item that is ordered */ - Oid sortop; /* the ordering operator ('<' op) */ - bool nulls_first; /* do NULLs come before normal values? */ - - /* - * key typically points to a Var node, ie a relation attribute, but it can - * also point to an arbitrary expression representing the value indexed by - * an index expression. - */ -} PathKeyItem; + EquivalenceClass *pk_eclass; /* the value that is ordered */ + Oid pk_opfamily; /* btree opfamily defining the ordering */ + int pk_strategy; /* sort direction (ASC or DESC) */ + bool pk_nulls_first; /* do NULLs come before normal values? */ +} PathKey; /* * Type "Path" is used as-is for sequential-scan paths. For other @@ -398,7 +481,7 @@ typedef struct Path Cost total_cost; /* total cost (assuming all tuples fetched) */ List *pathkeys; /* sort ordering of path's output */ - /* pathkeys is a List of Lists of PathKeyItem nodes; see above */ + /* pathkeys is a List of PathKey nodes; see above */ } Path; /*---------- @@ -618,11 +701,7 @@ typedef JoinPath NestPath; * A mergejoin path has these fields. * * path_mergeclauses lists the clauses (in the form of RestrictInfos) - * that will be used in the merge. The parallel arrays path_mergeFamilies, - * path_mergeStrategies, and path_mergeNullsFirst specify the merge semantics - * for each clause (i.e., define the relevant sort ordering for each clause). - * (XXX is this the most reasonable path-time representation? It's at least - * partially redundant with the pathkeys of the input paths.) + * that will be used in the merge. * * Note that the mergeclauses are a subset of the parent relation's * restriction-clause list. Any join clauses that are not mergejoinable @@ -639,10 +718,6 @@ typedef struct MergePath { JoinPath jpath; List *path_mergeclauses; /* join clauses to be used for merge */ - /* these are arrays, but have the same length as the mergeclauses list: */ - Oid *path_mergeFamilies; /* per-clause OIDs of opfamilies */ - int *path_mergeStrategies; /* per-clause ordering (ASC or DESC) */ - bool *path_mergeNullsFirst; /* per-clause nulls ordering */ List *outersortkeys; /* keys for explicit sort, if any */ List *innersortkeys; /* keys for explicit sort, if any */ } MergePath; @@ -696,6 +771,15 @@ typedef struct HashPath * sequence we use. So, these clauses cannot be associated directly with * the join RelOptInfo, but must be kept track of on a per-join-path basis. * + * RestrictInfos that represent equivalence conditions (i.e., mergejoinable + * equalities that are not outerjoin-delayed) are handled a bit differently. + * Initially we attach them to the EquivalenceClasses that are derived from + * them. When we construct a scan or join path, we look through all the + * EquivalenceClasses and generate derived RestrictInfos representing the + * minimal set of conditions that need to be checked for this particular scan + * or join to enforce that all members of each EquivalenceClass are in fact + * equal in all rows emitted by the scan or join. + * * When dealing with outer joins we have to be very careful about pushing qual * clauses up and down the tree. An outer join's own JOIN/ON conditions must * be evaluated exactly at that join node, and any quals appearing in WHERE or @@ -728,9 +812,9 @@ typedef struct HashPath * * In general, the referenced clause might be arbitrarily complex. The * kinds of clauses we can handle as indexscan quals, mergejoin clauses, - * or hashjoin clauses are fairly limited --- the code for each kind of - * path is responsible for identifying the restrict clauses it can use - * and ignoring the rest. Clauses not implemented by an indexscan, + * or hashjoin clauses are limited (e.g., no volatile functions). The code + * for each kind of path is responsible for identifying the restrict clauses + * it can use and ignoring the rest. Clauses not implemented by an indexscan, * mergejoin, or hashjoin will be placed in the plan qual or joinqual field * of the finished Plan node, where they will be enforced by general-purpose * qual-expression-evaluation code. (But we are still entitled to count @@ -758,6 +842,12 @@ typedef struct HashPath * estimates. Note that a pseudoconstant clause can never be an indexqual * or merge or hash join clause, so it's of no interest to large parts of * the planner. + * + * When join clauses are generated from EquivalenceClasses, there may be + * several equally valid ways to enforce join equivalence, of which we need + * apply only one. We mark clauses of this kind by setting parent_ec to + * point to the generating EquivalenceClass. Multiple clauses with the same + * parent_ec in the same join are redundant. */ typedef struct RestrictInfo @@ -787,23 +877,22 @@ typedef struct RestrictInfo /* This field is NULL unless clause is an OR clause: */ Expr *orclause; /* modified clause with RestrictInfos */ + /* This field is NULL unless clause is potentially redundant: */ + EquivalenceClass *parent_ec; /* generating EquivalenceClass */ + /* cache space for cost and selectivity */ QualCost eval_cost; /* eval cost of clause; -1 if not yet set */ Selectivity this_selec; /* selectivity; -1 if not yet set */ - /* valid if clause is mergejoinable, else InvalidOid: */ - Oid mergejoinoperator; /* copy of clause operator */ - Oid left_sortop; /* leftside sortop needed for mergejoin */ - Oid right_sortop; /* rightside sortop needed for mergejoin */ - Oid mergeopfamily; /* btree opfamily relating these ops */ + /* valid if clause is mergejoinable, else NIL */ + List *mergeopfamilies; /* opfamilies containing clause operator */ - /* cache space for mergeclause processing; NIL if not yet set */ - List *left_pathkey; /* canonical pathkey for left side */ - List *right_pathkey; /* canonical pathkey for right side */ + /* cache space for mergeclause processing; NULL if not yet set */ + EquivalenceClass *left_ec; /* EquivalenceClass containing lefthand */ + EquivalenceClass *right_ec; /* EquivalenceClass containing righthand */ - /* cache space for mergeclause processing; -1 if not yet set */ - Selectivity left_mergescansel; /* fraction of left side to scan */ - Selectivity right_mergescansel; /* fraction of right side to scan */ + /* transient workspace for use while considering a specific join path */ + bool outer_is_left; /* T = outer var on left, F = on right */ /* valid if clause is hashjoinable, else InvalidOid: */ Oid hashjoinoperator; /* copy of clause operator */ diff --git a/src/include/optimizer/joininfo.h b/src/include/optimizer/joininfo.h index d491a368eb..f7c4bc07d3 100644 --- a/src/include/optimizer/joininfo.h +++ b/src/include/optimizer/joininfo.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/joininfo.h,v 1.33 2007/01/05 22:19:56 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/joininfo.h,v 1.34 2007/01/20 20:45:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,8 +23,5 @@ extern bool have_relevant_joinclause(PlannerInfo *root, extern void add_join_clause_to_rels(PlannerInfo *root, RestrictInfo *restrictinfo, Relids join_relids); -extern void remove_join_clause_from_rels(PlannerInfo *root, - RestrictInfo *restrictinfo, - Relids join_relids); #endif /* JOININFO_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index fd5c78372e..98a73ebc4e 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.75 2007/01/10 18:06:04 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.76 2007/01/20 20:45:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -71,9 +71,6 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root, List *restrict_clauses, List *pathkeys, List *mergeclauses, - Oid *mergefamilies, - int *mergestrategies, - bool *mergenullsfirst, List *outersortkeys, List *innersortkeys); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 14baf430a0..7e7eb15469 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.94 2007/01/05 22:19:56 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.95 2007/01/20 20:45:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -52,6 +52,9 @@ extern List *group_clauses_by_indexkey(IndexOptInfo *index, Relids outer_relids, SaOpControl saop_control, bool *found_clause); +extern bool eclass_matches_any_index(EquivalenceClass *ec, + EquivalenceMember *em, + RelOptInfo *rel); extern bool match_index_to_operand(Node *operand, int indexcol, IndexOptInfo *index); extern List *expand_indexqual_conditions(IndexOptInfo *index, @@ -89,6 +92,37 @@ extern List *make_rels_by_joins(PlannerInfo *root, int level, List **joinrels); extern RelOptInfo *make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2); +/* + * equivclass.c + * routines for managing EquivalenceClasses + */ +extern bool process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, + bool below_outer_join); +extern void reconsider_outer_join_clauses(PlannerInfo *root); +extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, + Expr *expr, + Oid expr_datatype, + List *opfamilies); +extern void generate_base_implied_equalities(PlannerInfo *root); +extern List *generate_join_implied_equalities(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); +extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2); +extern void add_child_rel_equivalences(PlannerInfo *root, + AppendRelInfo *appinfo, + RelOptInfo *parent_rel, + RelOptInfo *child_rel); +extern List *find_eclass_clauses_for_index_join(PlannerInfo *root, + RelOptInfo *rel, + Relids outer_relids); +extern bool have_relevant_eclass_joinclause(PlannerInfo *root, + RelOptInfo *rel1, RelOptInfo *rel2); +extern bool has_relevant_eclass_joinclause(PlannerInfo *root, + RelOptInfo *rel1); +extern bool eclass_useful_for_merging(EquivalenceClass *eclass, + RelOptInfo *rel); + /* * pathkeys.c * utilities for matching and building path keys @@ -101,9 +135,6 @@ typedef enum PATHKEYS_DIFFERENT /* neither pathkey includes the other */ } PathKeysComparison; -extern void add_equijoined_keys(PlannerInfo *root, RestrictInfo *restrictinfo); -extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2); -extern void generate_implied_equalities(PlannerInfo *root); extern List *canonicalize_pathkeys(PlannerInfo *root, List *pathkeys); extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); extern bool pathkeys_contained_in(List *keys1, List *keys2); @@ -113,23 +144,29 @@ extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths, List *pathkeys, double fraction); extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, - ScanDirection scandir, bool canonical); + ScanDirection scandir); extern List *convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *subquery_pathkeys); extern List *build_join_pathkeys(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, List *outer_pathkeys); -extern List *make_pathkeys_for_sortclauses(List *sortclauses, - List *tlist); -extern void cache_mergeclause_pathkeys(PlannerInfo *root, +extern List *make_pathkeys_for_sortclauses(PlannerInfo *root, + List *sortclauses, + List *tlist, + bool canonicalize); +extern void cache_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo); extern List *find_mergeclauses_for_pathkeys(PlannerInfo *root, List *pathkeys, + bool outer_keys, List *restrictinfos); -extern List *make_pathkeys_for_mergeclauses(PlannerInfo *root, - List *mergeclauses, - RelOptInfo *rel); +extern List *select_outer_pathkeys_for_merge(PlannerInfo *root, + List *mergeclauses, + RelOptInfo *joinrel); +extern List *make_inner_pathkeys_for_merge(PlannerInfo *root, + List *mergeclauses, + List *outer_pathkeys); extern int pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys); diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 0f6799338b..5118061182 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.97 2007/01/10 18:06:04 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.98 2007/01/20 20:45:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,6 +38,8 @@ extern Plan *create_plan(PlannerInfo *root, Path *best_path); extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual, Index scanrelid, Plan *subplan); extern Append *make_append(List *appendplans, bool isTarget, List *tlist); +extern Sort *make_sort_from_pathkeys(PlannerInfo *root, Plan *lefttree, + List *pathkeys); extern Sort *make_sort_from_sortclauses(PlannerInfo *root, List *sortcls, Plan *lefttree); extern Sort *make_sort_from_groupcols(PlannerInfo *root, List *groupcls, @@ -69,12 +71,22 @@ extern int join_collapse_limit; extern void add_base_rels_to_query(PlannerInfo *root, Node *jtnode); extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist); +extern void add_vars_to_targetlist(PlannerInfo *root, List *vars, + Relids where_needed); extern List *deconstruct_jointree(PlannerInfo *root); +extern void distribute_restrictinfo_to_rels(PlannerInfo *root, + RestrictInfo *restrictinfo); extern void process_implied_equality(PlannerInfo *root, - Node *item1, Node *item2, - Oid sortop1, Oid sortop2, - Relids item1_relids, Relids item2_relids, - bool delete_it); + Oid opno, + Expr *item1, + Expr *item2, + Relids qualscope, + bool below_outer_join, + bool both_const); +extern RestrictInfo *build_implied_join_equality(Oid opno, + Expr *item1, + Expr *item2, + Relids qualscope); /* * prototypes for plan/setrefs.c diff --git a/src/include/optimizer/restrictinfo.h b/src/include/optimizer/restrictinfo.h index 619e7cef39..272c5a6703 100644 --- a/src/include/optimizer/restrictinfo.h +++ b/src/include/optimizer/restrictinfo.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.39 2007/01/05 22:19:56 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.40 2007/01/20 20:45:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,12 +32,8 @@ extern List *extract_actual_clauses(List *restrictinfo_list, extern void extract_actual_join_clauses(List *restrictinfo_list, List **joinquals, List **otherquals); -extern List *remove_redundant_join_clauses(PlannerInfo *root, - List *restrictinfo_list, - bool isouterjoin); extern List *select_nonredundant_join_clauses(PlannerInfo *root, List *restrictinfo_list, - List *reference_list, - bool isouterjoin); + List *reference_list); #endif /* RESTRICTINFO_H */ diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 8d4c0d8e3e..f0e3f2c20d 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.112 2007/01/10 18:06:05 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.113 2007/01/20 20:45:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -35,12 +35,11 @@ extern void get_op_opfamily_properties(Oid opno, Oid opfamily, bool *recheck); extern Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy); -extern bool get_op_mergejoin_info(Oid eq_op, Oid *left_sortop, - Oid *right_sortop, Oid *opfamily); extern bool get_compare_function_for_ordering_op(Oid opno, Oid *cmpfunc, bool *reverse); extern Oid get_equality_op_for_ordering_op(Oid opno); extern Oid get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type); +extern List *get_mergejoin_opfamilies(Oid opno); extern Oid get_compatible_hash_operator(Oid opno, bool use_lhs_type); extern Oid get_op_hash_function(Oid opno); extern void get_op_btree_interpretation(Oid opno, -- 2.40.0