]> granicus.if.org Git - postgresql/commitdiff
Repair issues with faulty generation of merge-append plans.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 9 May 2019 20:52:48 +0000 (16:52 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 9 May 2019 20:53:05 +0000 (16:53 -0400)
create_merge_append_plan failed to honor the CP_EXACT_TLIST flag:
it would generate the expected targetlist but then it felt free to
add resjunk sort targets to it.  This demonstrably leads to assertion
failures in v11 and HEAD, and it's probably just accidental that we
don't see the same in older branches.  I've not looked into whether
there would be any real-world consequences in non-assert builds.
In HEAD, create_append_plan has sprouted the same problem, so fix
that too (although we do not have any test cases that seem able to
reach that bug).  This is an oversight in commit 3fc6e2d7f which
invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that
came in.

convert_subquery_pathkeys would create pathkeys for subquery output
values if they match any EquivalenceClass known in the outer query
and are available in the subquery's syntactic targetlist.  However,
the second part of that condition is wrong, because such values might
not appear in the subquery relation's reltarget list, which would
mean that they couldn't be accessed above the level of the subquery
scan.  We must check that they appear in the reltarget list, instead.
This can lead to dropping knowledge about the subquery's sort
ordering, but I believe it's okay, because any sort key that the
outer query actually has any interest in would appear in the
reltarget list.

This second issue is of very long standing, but right now there's no
evidence that it causes observable problems before 9.6, so I refrained
from back-patching further than that.  We can revisit that choice if
somebody finds a way to make it cause problems in older branches.
(Developing useful test cases for these issues is really problematic;
fixing convert_subquery_pathkeys removes the only known way to exhibit
the create_merge_append_plan bug, and neither of the test cases added
by this patch causes a problem in all branches, even when considering
the issues separately.)

The second issue explains bug #15795 from Suresh Kumar R ("could not
find pathkey item to sort" with nested DISTINCT queries).  I stumbled
across the first issue while investigating that.

Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org

src/backend/optimizer/path/pathkeys.c
src/backend/optimizer/plan/createplan.c
src/test/regress/expected/union.out
src/test/regress/sql/union.sql

index 68b2204b3b492741a3cc62ae94d771cc5f52c567..c301e41a4a70094649442b2de33efe093f9d05e3 100644 (file)
@@ -33,6 +33,7 @@ static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
                                                                 RelOptInfo *partrel,
                                                                 int partkeycol);
+static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle);
 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
 
 
@@ -771,9 +772,11 @@ build_expression_pathkey(PlannerInfo *root,
  * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
  * 'subquery_tlist': the subquery's output targetlist, in its terms.
  *
- * It is not necessary for caller to do truncate_useless_pathkeys(),
- * because we select keys in a way that takes usefulness of the keys into
- * account.
+ * We intentionally don't do truncate_useless_pathkeys() here, because there
+ * are situations where seeing the raw ordering of the subquery is helpful.
+ * For example, if it returns ORDER BY x DESC, that may prompt us to
+ * construct a mergejoin using DESC order rather than ASC order; but the
+ * right_merge_direction heuristic would have us throw the knowledge away.
  */
 List *
 convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
@@ -799,22 +802,22 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                         * that same targetlist entry.
                         */
                        TargetEntry *tle;
+                       Var                *outer_var;
 
                        if (sub_eclass->ec_sortref == 0)        /* can't happen */
                                elog(ERROR, "volatile EquivalenceClass has no sortref");
                        tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
                        Assert(tle);
-                       /* resjunk items aren't visible to outer query */
-                       if (!tle->resjunk)
+                       /* Is TLE actually available to the outer query? */
+                       outer_var = find_var_for_subquery_tle(rel, tle);
+                       if (outer_var)
                        {
                                /* We can represent this sub_pathkey */
                                EquivalenceMember *sub_member;
-                               Expr       *outer_expr;
                                EquivalenceClass *outer_ec;
 
                                Assert(list_length(sub_eclass->ec_members) == 1);
                                sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
-                               outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, tle);
 
                                /*
                                 * Note: it might look funny to be setting sortref = 0 for a
@@ -828,7 +831,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                                 */
                                outer_ec =
                                        get_eclass_for_sort_expr(root,
-                                                                                        outer_expr,
+                                                                                        (Expr *) outer_var,
                                                                                         NULL,
                                                                                         sub_eclass->ec_opfamilies,
                                                                                         sub_member->em_datatype,
@@ -885,14 +888,15 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                                foreach(k, subquery_tlist)
                                {
                                        TargetEntry *tle = (TargetEntry *) lfirst(k);
+                                       Var                *outer_var;
                                        Expr       *tle_expr;
-                                       Expr       *outer_expr;
                                        EquivalenceClass *outer_ec;
                                        PathKey    *outer_pk;
                                        int                     score;
 
-                                       /* resjunk items aren't visible to outer query */
-                                       if (tle->resjunk)
+                                       /* Is TLE actually available to the outer query? */
+                                       outer_var = find_var_for_subquery_tle(rel, tle);
+                                       if (!outer_var)
                                                continue;
 
                                        /*
@@ -907,16 +911,9 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                                        if (!equal(tle_expr, sub_expr))
                                                continue;
 
-                                       /*
-                                        * Build a representation of this targetlist entry as an
-                                        * outer Var.
-                                        */
-                                       outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid,
-                                                                                                                                tle);
-
-                                       /* See if we have a matching EC for that */
+                                       /* See if we have a matching EC for the TLE */
                                        outer_ec = get_eclass_for_sort_expr(root,
-                                                                                                               outer_expr,
+                                                                                                               (Expr *) outer_var,
                                                                                                                NULL,
                                                                                                                sub_eclass->ec_opfamilies,
                                                                                                                sub_expr_type,
@@ -973,6 +970,41 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
        return retval;
 }
 
+/*
+ * find_var_for_subquery_tle
+ *
+ * If the given subquery tlist entry is due to be emitted by the subquery's
+ * scan node, return a Var for it, else return NULL.
+ *
+ * We need this to ensure that we don't return pathkeys describing values
+ * that are unavailable above the level of the subquery scan.
+ */
+static Var *
+find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
+{
+       ListCell   *lc;
+
+       /* If the TLE is resjunk, it's certainly not visible to the outer query */
+       if (tle->resjunk)
+               return NULL;
+
+       /* Search the rel's targetlist to see what it will return */
+       foreach(lc, rel->reltarget->exprs)
+       {
+               Var                *var = (Var *) lfirst(lc);
+
+               /* Ignore placeholders */
+               if (!IsA(var, Var))
+                       continue;
+               Assert(var->varno == rel->relid);
+
+               /* If we find a Var referencing this TLE, we're good */
+               if (var->varattno == tle->resno)
+                       return copyObject(var); /* Make a copy for safety */
+       }
+       return NULL;
+}
+
 /*
  * build_join_pathkeys
  *       Build the path keys for a join relation constructed by mergejoin or
index efe073a3eebe05968b7ccfc541fd4bd3bec6762c..270c11901b78f569fa1df95c0702ca38c85fb7e9 100644 (file)
@@ -81,8 +81,10 @@ static List *get_gating_quals(PlannerInfo *root, List *quals);
 static Plan *create_gating_plan(PlannerInfo *root, Path *path, Plan *plan,
                                   List *gating_quals);
 static Plan *create_join_plan(PlannerInfo *root, JoinPath *best_path);
-static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path);
-static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path);
+static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path,
+                                  int flags);
+static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
+                                                int flags);
 static Result *create_group_result_plan(PlannerInfo *root,
                                                 GroupResultPath *best_path);
 static ProjectSet *create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path);
@@ -390,11 +392,13 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
                        break;
                case T_Append:
                        plan = create_append_plan(root,
-                                                                         (AppendPath *) best_path);
+                                                                         (AppendPath *) best_path,
+                                                                         flags);
                        break;
                case T_MergeAppend:
                        plan = create_merge_append_plan(root,
-                                                                                       (MergeAppendPath *) best_path);
+                                                                                       (MergeAppendPath *) best_path,
+                                                                                       flags);
                        break;
                case T_Result:
                        if (IsA(best_path, ProjectionPath))
@@ -1054,10 +1058,12 @@ create_join_plan(PlannerInfo *root, JoinPath *best_path)
  *       Returns a Plan node.
  */
 static Plan *
-create_append_plan(PlannerInfo *root, AppendPath *best_path)
+create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
 {
        Append     *plan;
        List       *tlist = build_path_tlist(root, &best_path->path);
+       int                     orig_tlist_length = list_length(tlist);
+       bool            tlist_was_changed = false;
        List       *pathkeys = best_path->path.pathkeys;
        List       *subplans = NIL;
        ListCell   *subpaths;
@@ -1112,7 +1118,12 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
 
        if (pathkeys != NIL)
        {
-               /* Compute sort column info, and adjust the Append's tlist as needed */
+               /*
+                * Compute sort column info, and adjust the Append's tlist as needed.
+                * Because we pass adjust_tlist_in_place = true, we may ignore the
+                * function result; it must be the same plan node.  However, we then
+                * need to detect whether any tlist entries were added.
+                */
                (void) prepare_sort_from_pathkeys((Plan *) plan, pathkeys,
                                                                                  best_path->path.parent->relids,
                                                                                  NULL,
@@ -1122,6 +1133,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
                                                                                  &nodeSortOperators,
                                                                                  &nodeCollations,
                                                                                  &nodeNullsFirst);
+               tlist_was_changed = (orig_tlist_length != list_length(plan->plan.targetlist));
        }
 
        /* Build the plan for each child */
@@ -1231,7 +1243,20 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
 
        copy_generic_path_info(&plan->plan, (Path *) best_path);
 
-       return (Plan *) plan;
+       /*
+        * If prepare_sort_from_pathkeys added sort columns, but we were told to
+        * produce either the exact tlist or a narrow tlist, we should get rid of
+        * the sort columns again.  We must inject a projection node to do so.
+        */
+       if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST)))
+       {
+               tlist = list_truncate(list_copy(plan->plan.targetlist),
+                                                         orig_tlist_length);
+               return inject_projection_plan((Plan *) plan, tlist,
+                                                                         plan->plan.parallel_safe);
+       }
+       else
+               return (Plan *) plan;
 }
 
 /*
@@ -1242,11 +1267,14 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path)
  *       Returns a Plan node.
  */
 static Plan *
-create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
+create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
+                                                int flags)
 {
        MergeAppend *node = makeNode(MergeAppend);
        Plan       *plan = &node->plan;
        List       *tlist = build_path_tlist(root, &best_path->path);
+       int                     orig_tlist_length = list_length(tlist);
+       bool            tlist_was_changed;
        List       *pathkeys = best_path->path.pathkeys;
        List       *subplans = NIL;
        ListCell   *subpaths;
@@ -1265,7 +1293,12 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
        plan->lefttree = NULL;
        plan->righttree = NULL;
 
-       /* Compute sort column info, and adjust MergeAppend's tlist as needed */
+       /*
+        * Compute sort column info, and adjust MergeAppend's tlist as needed.
+        * Because we pass adjust_tlist_in_place = true, we may ignore the
+        * function result; it must be the same plan node.  However, we then need
+        * to detect whether any tlist entries were added.
+        */
        (void) prepare_sort_from_pathkeys(plan, pathkeys,
                                                                          best_path->path.parent->relids,
                                                                          NULL,
@@ -1275,6 +1308,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
                                                                          &node->sortOperators,
                                                                          &node->collations,
                                                                          &node->nullsFirst);
+       tlist_was_changed = (orig_tlist_length != list_length(plan->targetlist));
 
        /*
         * Now prepare the child plans.  We must apply prepare_sort_from_pathkeys
@@ -1371,7 +1405,18 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
        node->mergeplans = subplans;
        node->part_prune_info = partpruneinfo;
 
-       return (Plan *) node;
+       /*
+        * If prepare_sort_from_pathkeys added sort columns, but we were told to
+        * produce either the exact tlist or a narrow tlist, we should get rid of
+        * the sort columns again.  We must inject a projection node to do so.
+        */
+       if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST)))
+       {
+               tlist = list_truncate(list_copy(plan->targetlist), orig_tlist_length);
+               return inject_projection_plan(plan, tlist, plan->parallel_safe);
+       }
+       else
+               return plan;
 }
 
 /*
index da70438951dfe6967860162c821b1d3d5a37e88b..7189f5bd3d2db8e5e602a14b932281f07f82d983 100644 (file)
@@ -915,6 +915,79 @@ ORDER BY x;
  2 | 4
 (1 row)
 
+-- Test cases where the native ordering of a sub-select has more pathkeys
+-- than the outer query cares about
+explain (costs off)
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where q2 = q2;
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Unique
+   ->  Merge Append
+         Sort Key: "*SELECT* 1".q1
+         ->  Subquery Scan on "*SELECT* 1"
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i81.q1, i81.q2
+                           ->  Seq Scan on int8_tbl i81
+                                 Filter: (q2 IS NOT NULL)
+         ->  Subquery Scan on "*SELECT* 2"
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i82.q1, i82.q2
+                           ->  Seq Scan on int8_tbl i82
+                                 Filter: (q2 IS NOT NULL)
+(15 rows)
+
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where q2 = q2;
+        q1        
+------------------
+              123
+ 4567890123456789
+(2 rows)
+
+explain (costs off)
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where -q1 = q2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Unique
+   ->  Merge Append
+         Sort Key: "*SELECT* 1".q1
+         ->  Subquery Scan on "*SELECT* 1"
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i81.q1, i81.q2
+                           ->  Seq Scan on int8_tbl i81
+                                 Filter: ((- q1) = q2)
+         ->  Subquery Scan on "*SELECT* 2"
+               ->  Unique
+                     ->  Sort
+                           Sort Key: i82.q1, i82.q2
+                           ->  Seq Scan on int8_tbl i82
+                                 Filter: ((- q1) = q2)
+(15 rows)
+
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where -q1 = q2;
+        q1        
+------------------
+ 4567890123456789
+(1 row)
+
 -- Test proper handling of parameterized appendrel paths when the
 -- potential join qual is expensive
 create function expensivefunc(int) returns int
index eed7c8d34be571228d315c2be5d9ae700da0f471..5f4881d5943a70202974bf20eacd75645a4c4bff 100644 (file)
@@ -379,6 +379,34 @@ SELECT * FROM
 WHERE x > 3
 ORDER BY x;
 
+-- Test cases where the native ordering of a sub-select has more pathkeys
+-- than the outer query cares about
+explain (costs off)
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where q2 = q2;
+
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where q2 = q2;
+
+explain (costs off)
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where -q1 = q2;
+
+select distinct q1 from
+  (select distinct * from int8_tbl i81
+   union all
+   select distinct * from int8_tbl i82) ss
+where -q1 = q2;
+
 -- Test proper handling of parameterized appendrel paths when the
 -- potential join qual is expensive
 create function expensivefunc(int) returns int