]> granicus.if.org Git - postgresql/commitdiff
Compute correct em_nullable_relids in get_eclass_for_sort_expr().
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 15 Nov 2013 21:46:18 +0000 (16:46 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 15 Nov 2013 21:46:18 +0000 (16:46 -0500)
Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr
must be able to compute valid em_nullable_relids for any new equivalence
class members it creates.  I'd worried about this in the commit message
for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a
problem because multi-member ECs should already exist when it runs.  That
is transparently wrong, though, because this function is also called by
initialize_mergeclause_eclasses, which runs during deconstruct_jointree.
The example given in the bug report (which the new regression test item
is based upon) fails because the COALESCE() expression is first seen by
initialize_mergeclause_eclasses rather than process_equivalence.

Fixing this requires passing the appropriate nullable_relids set to
get_eclass_for_sort_expr, and it requires new code to compute that set
for top-level expressions such as ORDER BY, GROUP BY, etc.  We store
the top-level nullable_relids in a new field in PlannerInfo to avoid
computing it many times.  In the back branches, I've added the new
field at the end of the struct to minimize ABI breakage for planner
plugins.  There doesn't seem to be a good alternative to changing
get_eclass_for_sort_expr's API signature, though.  There probably aren't
any third-party extensions calling that function directly; moreover,
if there are, they probably need to think about what to pass for
nullable_relids anyway.

Back-patch to 9.2, like the previous patch in this area.

src/backend/nodes/outfuncs.c
src/backend/optimizer/path/equivclass.c
src/backend/optimizer/path/pathkeys.c
src/backend/optimizer/plan/initsplan.c
src/include/nodes/relation.h
src/include/optimizer/paths.h
src/test/regress/expected/join.out
src/test/regress/sql/join.sql

index 817b14947775b808336123aa3e5292b73ff4ff22..b39927e02550fa3c9144e143ee39f8a785031bd2 100644 (file)
@@ -1698,6 +1698,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
        WRITE_UINT_FIELD(query_level);
        WRITE_NODE_FIELD(plan_params);
        WRITE_BITMAPSET_FIELD(all_baserels);
+       WRITE_BITMAPSET_FIELD(nullable_baserels);
        WRITE_NODE_FIELD(join_rel_list);
        WRITE_INT_FIELD(join_cur_level);
        WRITE_NODE_FIELD(init_plans);
index 711b161c0d1bb2366224063ed56feab8d92203dc..baddd34a741b32e21c36f7e94b2f2e25b107e0b4 100644 (file)
@@ -510,6 +510,13 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
  *       equivalence class it is a member of; if none, optionally build a new
  *       single-member EquivalenceClass for it.
  *
+ * expr is the expression, and nullable_relids is the set of base relids
+ * that are potentially nullable below it.     We actually only care about
+ * the set of such relids that are used in the expression; but for caller
+ * convenience, we perform that intersection step here.  The caller need
+ * only be sure that nullable_relids doesn't omit any nullable rels that
+ * might appear in the expr.
+ *
  * sortref is the SortGroupRef of the originating SortGroupClause, if any,
  * or zero if not.     (It should never be zero if the expression is volatile!)
  *
@@ -538,6 +545,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
 EquivalenceClass *
 get_eclass_for_sort_expr(PlannerInfo *root,
                                                 Expr *expr,
+                                                Relids nullable_relids,
                                                 List *opfamilies,
                                                 Oid opcintype,
                                                 Oid collation,
@@ -545,6 +553,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
                                                 Relids rel,
                                                 bool create_it)
 {
+       Relids          expr_relids;
        EquivalenceClass *newec;
        EquivalenceMember *newem;
        ListCell   *lc1;
@@ -555,6 +564,12 @@ get_eclass_for_sort_expr(PlannerInfo *root,
         */
        expr = canonicalize_ec_expression(expr, opcintype, collation);
 
+       /*
+        * Get the precise set of nullable relids appearing in the expression.
+        */
+       expr_relids = pull_varnos((Node *) expr);
+       nullable_relids = bms_intersect(nullable_relids, expr_relids);
+
        /*
         * Scan through the existing EquivalenceClasses for a match
         */
@@ -629,8 +644,8 @@ get_eclass_for_sort_expr(PlannerInfo *root,
        if (newec->ec_has_volatile && sortref == 0) /* should not happen */
                elog(ERROR, "volatile EquivalenceClass has no sortref");
 
-       newem = add_eq_member(newec, copyObject(expr), pull_varnos((Node *) expr),
-                                                 NULL, false, opcintype);
+       newem = add_eq_member(newec, copyObject(expr), expr_relids,
+                                                 nullable_relids, false, opcintype);
 
        /*
         * add_eq_member doesn't check for volatile functions, set-returning
index 672499691951f36c9a215ea0c5e0f7ecd039db97..032b2cdc1330e5b65508e84d6b48026e7e0d0897 100644 (file)
@@ -154,6 +154,9 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
  *       Given an expression and sort-order information, create a PathKey.
  *       The result is always a "canonical" PathKey, but it might be redundant.
  *
+ * expr is the expression, and nullable_relids is the set of base relids
+ * that are potentially nullable below it.
+ *
  * If the PathKey is being generated from a SortGroupClause, sortref should be
  * the SortGroupClause's SortGroupRef; otherwise zero.
  *
@@ -169,6 +172,7 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
 static PathKey *
 make_pathkey_from_sortinfo(PlannerInfo *root,
                                                   Expr *expr,
+                                                  Relids nullable_relids,
                                                   Oid opfamily,
                                                   Oid opcintype,
                                                   Oid collation,
@@ -204,8 +208,8 @@ make_pathkey_from_sortinfo(PlannerInfo *root,
                         equality_op);
 
        /* Now find or (optionally) create a matching EquivalenceClass */
-       eclass = get_eclass_for_sort_expr(root, expr, opfamilies,
-                                                                         opcintype, collation,
+       eclass = get_eclass_for_sort_expr(root, expr, nullable_relids,
+                                                                         opfamilies, opcintype, collation,
                                                                          sortref, rel, create_it);
 
        /* Fail if no EC and !create_it */
@@ -227,6 +231,7 @@ make_pathkey_from_sortinfo(PlannerInfo *root,
 static PathKey *
 make_pathkey_from_sortop(PlannerInfo *root,
                                                 Expr *expr,
+                                                Relids nullable_relids,
                                                 Oid ordering_op,
                                                 bool nulls_first,
                                                 Index sortref,
@@ -248,6 +253,7 @@ make_pathkey_from_sortop(PlannerInfo *root,
 
        return make_pathkey_from_sortinfo(root,
                                                                          expr,
+                                                                         nullable_relids,
                                                                          opfamily,
                                                                          opcintype,
                                                                          collation,
@@ -461,9 +467,13 @@ build_index_pathkeys(PlannerInfo *root,
                        nulls_first = index->nulls_first[i];
                }
 
-               /* OK, try to make a canonical pathkey for this sort key */
+               /*
+                * OK, try to make a canonical pathkey for this sort key.  Note we're
+                * underneath any outer joins, so nullable_relids should be NULL.
+                */
                cpathkey = make_pathkey_from_sortinfo(root,
                                                                                          indexkey,
+                                                                                         NULL,
                                                                                          index->sortopfamily[i],
                                                                                          index->opcintype[i],
                                                                                          index->indexcollations[i],
@@ -551,11 +561,14 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                                 * expression is *not* volatile in the outer query: it's just
                                 * a Var referencing whatever the subquery emitted. (IOW, the
                                 * outer query isn't going to re-execute the volatile
-                                * expression itself.)  So this is okay.
+                                * expression itself.)  So this is okay.  Likewise, it's
+                                * correct to pass nullable_relids = NULL, because we're
+                                * underneath any outer joins appearing in the outer query.
                                 */
                                outer_ec =
                                        get_eclass_for_sort_expr(root,
                                                                                         outer_expr,
+                                                                                        NULL,
                                                                                         sub_eclass->ec_opfamilies,
                                                                                         sub_member->em_datatype,
                                                                                         sub_eclass->ec_collation,
@@ -643,6 +656,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                                        /* See if we have a matching EC for that */
                                        outer_ec = get_eclass_for_sort_expr(root,
                                                                                                                outer_expr,
+                                                                                                               NULL,
                                                                                                   sub_eclass->ec_opfamilies,
                                                                                                                sub_expr_type,
                                                                                                                sub_expr_coll,
@@ -748,6 +762,13 @@ build_join_pathkeys(PlannerInfo *root,
  * The resulting PathKeys are always in canonical form.  (Actually, there
  * is no longer any code anywhere that creates non-canonical PathKeys.)
  *
+ * We assume that root->nullable_baserels is the set of base relids that could
+ * have gone to NULL below the SortGroupClause expressions.  This is okay if
+ * the expressions came from the query's top level (ORDER BY, DISTINCT, etc)
+ * and if this function is only invoked after deconstruct_jointree.  In the
+ * future we might have to make callers pass in the appropriate
+ * nullable-relids set, but for now it seems unnecessary.
+ *
  * 'sortclauses' is a list of SortGroupClause nodes
  * 'tlist' is the targetlist to find the referenced tlist entries in
  */
@@ -769,6 +790,7 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
                Assert(OidIsValid(sortcl->sortop));
                pathkey = make_pathkey_from_sortop(root,
                                                                                   sortkey,
+                                                                                  root->nullable_baserels,
                                                                                   sortcl->sortop,
                                                                                   sortcl->nulls_first,
                                                                                   sortcl->tleSortGroupRef,
@@ -824,6 +846,7 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
        restrictinfo->left_ec =
                get_eclass_for_sort_expr(root,
                                                                 (Expr *) get_leftop(clause),
+                                                                restrictinfo->nullable_relids,
                                                                 restrictinfo->mergeopfamilies,
                                                                 lefttype,
                                                                 ((OpExpr *) clause)->inputcollid,
@@ -833,6 +856,7 @@ initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
        restrictinfo->right_ec =
                get_eclass_for_sort_expr(root,
                                                                 (Expr *) get_rightop(clause),
+                                                                restrictinfo->nullable_relids,
                                                                 restrictinfo->mergeopfamilies,
                                                                 righttype,
                                                                 ((OpExpr *) clause)->inputcollid,
index c5998b9e2de21dfa5bff46554beddad4da8a93e9..04a399ee13cd549a1b3dec3a44f0f09bbefdc99c 100644 (file)
@@ -649,6 +649,9 @@ deconstruct_jointree(PlannerInfo *root)
        Assert(root->parse->jointree != NULL &&
                   IsA(root->parse->jointree, FromExpr));
 
+       /* this is filled as we scan the jointree */
+       root->nullable_baserels = NULL;
+
        result = deconstruct_recurse(root, (Node *) root->parse->jointree, false,
                                                                 &qualscope, &inner_join_rels,
                                                                 &postponed_qual_list);
@@ -790,6 +793,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
                                        left_inners,
                                        right_inners,
                                        nonnullable_rels,
+                                       nullable_rels,
                                        ojscope;
                List       *leftjoinlist,
                                   *rightjoinlist;
@@ -823,6 +827,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
                                *inner_join_rels = *qualscope;
                                /* Inner join adds no restrictions for quals */
                                nonnullable_rels = NULL;
+                               /* and it doesn't force anything to null, either */
+                               nullable_rels = NULL;
                                break;
                        case JOIN_LEFT:
                        case JOIN_ANTI:
@@ -837,6 +843,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
                                *qualscope = bms_union(leftids, rightids);
                                *inner_join_rels = bms_union(left_inners, right_inners);
                                nonnullable_rels = leftids;
+                               nullable_rels = rightids;
                                break;
                        case JOIN_SEMI:
                                leftjoinlist = deconstruct_recurse(root, j->larg,
@@ -851,6 +858,13 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
                                *inner_join_rels = bms_union(left_inners, right_inners);
                                /* Semi join adds no restrictions for quals */
                                nonnullable_rels = NULL;
+
+                               /*
+                                * Theoretically, a semijoin would null the RHS; but since the
+                                * RHS can't be accessed above the join, this is immaterial
+                                * and we needn't account for it.
+                                */
+                               nullable_rels = NULL;
                                break;
                        case JOIN_FULL:
                                leftjoinlist = deconstruct_recurse(root, j->larg,
@@ -865,16 +879,22 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
                                *inner_join_rels = bms_union(left_inners, right_inners);
                                /* each side is both outer and inner */
                                nonnullable_rels = *qualscope;
+                               nullable_rels = *qualscope;
                                break;
                        default:
                                /* JOIN_RIGHT was eliminated during reduce_outer_joins() */
                                elog(ERROR, "unrecognized join type: %d",
                                         (int) j->jointype);
                                nonnullable_rels = NULL;                /* keep compiler quiet */
+                               nullable_rels = NULL;
                                leftjoinlist = rightjoinlist = NIL;
                                break;
                }
 
+               /* Report all rels that will be nulled anywhere in the jointree */
+               root->nullable_baserels = bms_add_members(root->nullable_baserels,
+                                                                                                 nullable_rels);
+
                /*
                 * For an OJ, form the SpecialJoinInfo now, because we need the OJ's
                 * semantic scope (ojscope) to pass to distribute_qual_to_rels.  But
index a2853fbf044b73f5e1cf2c7bbce4668842630df4..6d7b5948cd9fb638b366587cfc596bf9adf93ec6 100644 (file)
@@ -150,10 +150,19 @@ typedef struct PlannerInfo
        /*
         * all_baserels is a Relids set of all base relids (but not "other"
         * relids) in the query; that is, the Relids identifier of the final join
-        * we need to form.
+        * we need to form.  This is computed in make_one_rel, just before we
+        * start making Paths.
         */
        Relids          all_baserels;
 
+       /*
+        * nullable_baserels is a Relids set of base relids that are nullable by
+        * some outer join in the jointree; these are rels that are potentially
+        * nullable below the WHERE clause, SELECT targetlist, etc.  This is
+        * computed in deconstruct_jointree.
+        */
+       Relids          nullable_baserels;
+
        /*
         * join_rel_list is a list of all join-relation RelOptInfos we have
         * considered in this planning run.  For small problems we just scan the
index 9ef93c70c6412c5fd1a6171519e0715c0bab1002..96ffdb195fe62e03e0f5a33ce854855392d5886b 100644 (file)
@@ -109,6 +109,7 @@ extern Expr *canonicalize_ec_expression(Expr *expr,
 extern void reconsider_outer_join_clauses(PlannerInfo *root);
 extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
                                                 Expr *expr,
+                                                Relids nullable_relids,
                                                 List *opfamilies,
                                                 Oid opcintype,
                                                 Oid collation,
index b59004ddf3cc33748e78ba7d579f2c641ab39a08..b9559ea607599572393c1f6cba7986fcaa298443 100644 (file)
@@ -2900,6 +2900,35 @@ select f1, unique2, case when unique2 is null then f1 else 0 end
   0 |       0 |    0
 (1 row)
 
+--
+-- another case with equivalence clauses above outer joins (bug #8591)
+--
+explain (costs off)
+select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
+  from tenk1 a left join tenk1 b on b.thousand = a.unique1                        left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
+  where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44;
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Nested Loop Left Join
+   ->  Nested Loop Left Join
+         Filter: (COALESCE(b.twothousand, a.twothousand) = 44)
+         ->  Index Scan using tenk1_unique2 on tenk1 a
+               Index Cond: (unique2 = 5530)
+         ->  Bitmap Heap Scan on tenk1 b
+               Recheck Cond: (thousand = a.unique1)
+               ->  Bitmap Index Scan on tenk1_thous_tenthous
+                     Index Cond: (thousand = a.unique1)
+   ->  Index Scan using tenk1_unique2 on tenk1 c
+         Index Cond: ((unique2 = COALESCE(b.twothousand, a.twothousand)) AND (unique2 = 44))
+(11 rows)
+
+select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
+  from tenk1 a left join tenk1 b on b.thousand = a.unique1                        left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
+  where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44;
+ unique1 | unique1 | unique1 | coalesce 
+---------+---------+---------+----------
+(0 rows)
+
 --
 -- test ability to push constants through outer join clauses
 --
index e43b0fc0ffcebb93c613dd38a5a67026b46bb36d..1edf9b3897b5a5637de0757bbd7d8f95900ce980 100644 (file)
@@ -790,6 +790,19 @@ select f1, unique2, case when unique2 is null then f1 else 0 end
   from int4_tbl a left join tenk1 b on f1 = unique2
   where (case when unique2 is null then f1 else 0 end) = 0;
 
+--
+-- another case with equivalence clauses above outer joins (bug #8591)
+--
+
+explain (costs off)
+select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
+  from tenk1 a left join tenk1 b on b.thousand = a.unique1                        left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
+  where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44;
+
+select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
+  from tenk1 a left join tenk1 b on b.thousand = a.unique1                        left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
+  where a.unique2 = 5530 and coalesce(b.twothousand, a.twothousand) = 44;
+
 --
 -- test ability to push constants through outer join clauses
 --