]> granicus.if.org Git - postgresql/commitdiff
Repair issues with faulty generation of merge-append plans.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 9 May 2019 20:52:49 +0000 (16:52 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 9 May 2019 20:52:49 +0000 (16:52 -0400)
create_merge_append_plan failed to honor the CP_EXACT_TLIST flag:
it would generate the expected targetlist but then it felt free to
add resjunk sort targets to it.  This demonstrably leads to assertion
failures in v11 and HEAD, and it's probably just accidental that we
don't see the same in older branches.  I've not looked into whether
there would be any real-world consequences in non-assert builds.
In HEAD, create_append_plan has sprouted the same problem, so fix
that too (although we do not have any test cases that seem able to
reach that bug).  This is an oversight in commit 3fc6e2d7f which
invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that
came in.

convert_subquery_pathkeys would create pathkeys for subquery output
values if they match any EquivalenceClass known in the outer query
and are available in the subquery's syntactic targetlist.  However,
the second part of that condition is wrong, because such values might
not appear in the subquery relation's reltarget list, which would
mean that they couldn't be accessed above the level of the subquery
scan.  We must check that they appear in the reltarget list, instead.
This can lead to dropping knowledge about the subquery's sort
ordering, but I believe it's okay, because any sort key that the
outer query actually has any interest in would appear in the
reltarget list.

This second issue is of very long standing, but right now there's no
evidence that it causes observable problems before 9.6, so I refrained
from back-patching further than that.  We can revisit that choice if
somebody finds a way to make it cause problems in older branches.
(Developing useful test cases for these issues is really problematic;
fixing convert_subquery_pathkeys removes the only known way to exhibit
the create_merge_append_plan bug, and neither of the test cases added
by this patch causes a problem in all branches, even when considering
the issues separately.)

The second issue explains bug #15795 from Suresh Kumar R ("could not
find pathkey item to sort" with nested DISTINCT queries).  I stumbled
across the first issue while investigating that.

Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org

src/backend/optimizer/path/pathkeys.c
src/backend/optimizer/plan/createplan.c
src/test/regress/expected/union.out
src/test/regress/sql/union.sql

index 66c13a7f133aba4234ecadb4214f94c9c78144ae..68431f12b9a1618f74c3935943f668f33d9bcb4a 100644 (file)
@@ -29,6 +29,7 @@
 
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
+static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle);
 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
 
 
@@ -599,9 +600,11 @@ build_expression_pathkey(PlannerInfo *root,
  * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
  * 'subquery_tlist': the subquery's output targetlist, in its terms.
  *
- * It is not necessary for caller to do truncate_useless_pathkeys(),
- * because we select keys in a way that takes usefulness of the keys into
- * account.
+ * We intentionally don't do truncate_useless_pathkeys() here, because there
+ * are situations where seeing the raw ordering of the subquery is helpful.
+ * For example, if it returns ORDER BY x DESC, that may prompt us to
+ * construct a mergejoin using DESC order rather than ASC order; but the
+ * right_merge_direction heuristic would have us throw the knowledge away.
  */
 List *
 convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
@@ -627,22 +630,22 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                         * that same targetlist entry.
                         */
                        TargetEntry *tle;
+                       Var                *outer_var;
 
                        if (sub_eclass->ec_sortref == 0)        /* can't happen */
                                elog(ERROR, "volatile EquivalenceClass has no sortref");
                        tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
                        Assert(tle);
-                       /* resjunk items aren't visible to outer query */
-                       if (!tle->resjunk)
+                       /* Is TLE actually available to the outer query? */
+                       outer_var = find_var_for_subquery_tle(rel, tle);
+                       if (outer_var)
                        {
                                /* We can represent this sub_pathkey */
                                EquivalenceMember *sub_member;
-                               Expr       *outer_expr;
                                EquivalenceClass *outer_ec;
 
                                Assert(list_length(sub_eclass->ec_members) == 1);
                                sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
-                               outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, tle);
 
                                /*
                                 * Note: it might look funny to be setting sortref = 0 for a
@@ -656,7 +659,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                                 */
                                outer_ec =
                                        get_eclass_for_sort_expr(root,
-                                                                                        outer_expr,
+                                                                                        (Expr *) outer_var,
                                                                                         NULL,
                                                                                         sub_eclass->ec_opfamilies,
                                                                                         sub_member->em_datatype,
@@ -713,14 +716,15 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                                foreach(k, subquery_tlist)
                                {
                                        TargetEntry *tle = (TargetEntry *) lfirst(k);
+                                       Var                *outer_var;
                                        Expr       *tle_expr;
-                                       Expr       *outer_expr;
                                        EquivalenceClass *outer_ec;
                                        PathKey    *outer_pk;
                                        int                     score;
 
-                                       /* resjunk items aren't visible to outer query */
-                                       if (tle->resjunk)
+                                       /* Is TLE actually available to the outer query? */
+                                       outer_var = find_var_for_subquery_tle(rel, tle);
+                                       if (!outer_var)
                                                continue;
 
                                        /*
@@ -735,16 +739,9 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                                        if (!equal(tle_expr, sub_expr))
                                                continue;
 
-                                       /*
-                                        * Build a representation of this targetlist entry as an
-                                        * outer Var.
-                                        */
-                                       outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid,
-                                                                                                                                tle);
-
-                                       /* See if we have a matching EC for that */
+                                       /* See if we have a matching EC for the TLE */
                                        outer_ec = get_eclass_for_sort_expr(root,
-                                                                                                               outer_expr,
+                                                                                                               (Expr *) outer_var,
                                                                                                                NULL,
                                                                                                                sub_eclass->ec_opfamilies,
                                                                                                                sub_expr_type,
@@ -801,6 +798,41 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
        return retval;
 }
 
+/*
+ * find_var_for_subquery_tle
+ *
+ * If the given subquery tlist entry is due to be emitted by the subquery's
+ * scan node, return a Var for it, else return NULL.
+ *
+ * We need this to ensure that we don't return pathkeys describing values
+ * that are unavailable above the level of the subquery scan.
+ */
+static Var *
+find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
+{
+       ListCell   *lc;
+
+       /* If the TLE is resjunk, it's certainly not visible to the outer query */
+       if (tle->resjunk)
+               return NULL;
+
+       /* Search the rel's targetlist to see what it will return */
+       foreach(lc, rel->reltarget->exprs)
+       {
+               Var                *var = (Var *) lfirst(lc);
+
+               /* Ignore placeholders */
+               if (!IsA(var, Var))
+                       continue;
+               Assert(var->varno == rel->relid);
+
+               /* If we find a Var referencing this TLE, we're good */
+               if (var->varattno == tle->resno)
+                       return copyObject(var); /* Make a copy for safety */
+       }
+       return NULL;
+}
+
 /*
  * build_join_pathkeys
  *       Build the path keys for a join relation constructed by mergejoin or
index c446f7cd2074313148bf283ddd843a2d125f1acd..3798bd28b1d76dfb20a4f36cf9bb943cb79f02c2 100644 (file)
@@ -78,7 +78,8 @@ static Plan *create_gating_plan(PlannerInfo *root, Path *path, Plan *plan,
                                   List *gating_quals);
 static Plan *create_join_plan(PlannerInfo *root, JoinPath *best_path);
 static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path);
-static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path);
+static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
+                                                int flags);
 static Result *create_result_plan(PlannerInfo *root, ResultPath *best_path);
 static ProjectSet *create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path);
 static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path,
@@ -386,7 +387,8 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
                        break;
                case T_MergeAppend:
                        plan = create_merge_append_plan(root,
-                                                                                       (MergeAppendPath *) best_path);
+                                                                                       (MergeAppendPath *) best_path,
+                                                                                       flags);
                        break;
                case T_Result:
                        if (IsA(best_path, ProjectionPath))
@@ -1064,11 +1066,14 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
  *       Returns a Plan node.
  */
 static Plan *
-create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
+create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
+                                                int flags)
 {
        MergeAppend *node = makeNode(MergeAppend);
        Plan       *plan = &node->plan;
        List       *tlist = build_path_tlist(root, &best_path->path);
+       int                     orig_tlist_length = list_length(tlist);
+       bool            tlist_was_changed;
        List       *pathkeys = best_path->path.pathkeys;
        List       *subplans = NIL;
        ListCell   *subpaths;
@@ -1085,7 +1090,12 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
        plan->lefttree = NULL;
        plan->righttree = NULL;
 
-       /* Compute sort column info, and adjust MergeAppend's tlist as needed */
+       /*
+        * Compute sort column info, and adjust MergeAppend's tlist as needed.
+        * Because we pass adjust_tlist_in_place = true, we may ignore the
+        * function result; it must be the same plan node.  However, we then need
+        * to detect whether any tlist entries were added.
+        */
        (void) prepare_sort_from_pathkeys(plan, pathkeys,
                                                                          best_path->path.parent->relids,
                                                                          NULL,
@@ -1095,6 +1105,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
                                                                          &node->sortOperators,
                                                                          &node->collations,
                                                                          &node->nullsFirst);
+       tlist_was_changed = (orig_tlist_length != list_length(plan->targetlist));
 
        /*
         * Now prepare the child plans.  We must apply prepare_sort_from_pathkeys
@@ -1160,7 +1171,18 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
        node->partitioned_rels = best_path->partitioned_rels;
        node->mergeplans = subplans;
 
-       return (Plan *) node;
+       /*
+        * If prepare_sort_from_pathkeys added sort columns, but we were told to
+        * produce either the exact tlist or a narrow tlist, we should get rid of
+        * the sort columns again.  We must inject a projection node to do so.
+        */
+       if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST)))
+       {
+               tlist = list_truncate(list_copy(plan->targetlist), orig_tlist_length);
+               return inject_projection_plan(plan, tlist, plan->parallel_safe);
+       }
+       else
+               return plan;
 }
 
 /*
index 92d427a690f5b4639400043b0a55dba7601b5266..48c27f8bb32bcb493cd89a6dc5e36d20d17c8fb8 100644 (file)
@@ -916,6 +916,79 @@ ORDER BY x;
  2 | 4
 (1 row)
 
+-- Test cases where the native ordering of a sub-select has more pathkeys
+-- than the outer query cares about
+explain (costs off)
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where q2 = q2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Unique
+   ->  Merge Append
+         Sort Key: "*SELECT* 1".q1
+         ->  Subquery Scan on "*SELECT* 1"
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i81.q1, i81.q2
+                           ->  Seq Scan on int8_tbl i81
+                                 Filter: (q2 = q2)
+         ->  Subquery Scan on "*SELECT* 2"
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i82.q1, i82.q2
+                           ->  Seq Scan on int8_tbl i82
+                                 Filter: (q2 = q2)
+(15 rows)
+
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where q2 = q2;
+        q1        
+------------------
+              123
+ 4567890123456789
+(2 rows)
+
+explain (costs off)
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where -q1 = q2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Unique
+   ->  Merge Append
+         Sort Key: "*SELECT* 1".q1
+         ->  Subquery Scan on "*SELECT* 1"
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i81.q1, i81.q2
+                           ->  Seq Scan on int8_tbl i81
+                                 Filter: ((- q1) = q2)
+         ->  Subquery Scan on "*SELECT* 2"
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i82.q1, i82.q2
+                           ->  Seq Scan on int8_tbl i82
+                                 Filter: ((- q1) = q2)
+(15 rows)
+
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where -q1 = q2;
+        q1        
+------------------
+ 4567890123456789
+(1 row)
+
 -- Test proper handling of parameterized appendrel paths when the
 -- potential join qual is expensive
 create function expensivefunc(int) returns int
index eed7c8d34be571228d315c2be5d9ae700da0f471..5f4881d5943a70202974bf20eacd75645a4c4bff 100644 (file)
@@ -379,6 +379,34 @@ SELECT * FROM
 WHERE x > 3
 ORDER BY x;
 
+-- Test cases where the native ordering of a sub-select has more pathkeys
+-- than the outer query cares about
+explain (costs off)
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where q2 = q2;
+
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where q2 = q2;
+
+explain (costs off)
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where -q1 = q2;
+
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where -q1 = q2;
+
 -- Test proper handling of parameterized appendrel paths when the
 -- potential join qual is expensive
 create function expensivefunc(int) returns int