From 24c19e9f66863d83009a370604e40b1eaa71bcdd Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 9 May 2019 16:52:48 -0400 Subject: [PATCH] Repair issues with faulty generation of merge-append plans. create_merge_append_plan failed to honor the CP_EXACT_TLIST flag: it would generate the expected targetlist but then it felt free to add resjunk sort targets to it. This demonstrably leads to assertion failures in v11 and HEAD, and it's probably just accidental that we don't see the same in older branches. I've not looked into whether there would be any real-world consequences in non-assert builds. In HEAD, create_append_plan has sprouted the same problem, so fix that too (although we do not have any test cases that seem able to reach that bug). This is an oversight in commit 3fc6e2d7f which invented the CP_EXACT_TLIST flag, so back-patch to 9.6 where that came in. convert_subquery_pathkeys would create pathkeys for subquery output values if they match any EquivalenceClass known in the outer query and are available in the subquery's syntactic targetlist. However, the second part of that condition is wrong, because such values might not appear in the subquery relation's reltarget list, which would mean that they couldn't be accessed above the level of the subquery scan. We must check that they appear in the reltarget list, instead. This can lead to dropping knowledge about the subquery's sort ordering, but I believe it's okay, because any sort key that the outer query actually has any interest in would appear in the reltarget list. This second issue is of very long standing, but right now there's no evidence that it causes observable problems before 9.6, so I refrained from back-patching further than that. We can revisit that choice if somebody finds a way to make it cause problems in older branches. (Developing useful test cases for these issues is really problematic; fixing convert_subquery_pathkeys removes the only known way to exhibit the create_merge_append_plan bug, and neither of the test cases added by this patch causes a problem in all branches, even when considering the issues separately.) The second issue explains bug #15795 from Suresh Kumar R ("could not find pathkey item to sort" with nested DISTINCT queries). I stumbled across the first issue while investigating that. Discussion: https://postgr.es/m/15795-fadb56c8e44ee73c@postgresql.org --- src/backend/optimizer/path/pathkeys.c | 72 +++++++++++++++++------- src/backend/optimizer/plan/createplan.c | 65 ++++++++++++++++++---- src/test/regress/expected/union.out | 73 +++++++++++++++++++++++++ src/test/regress/sql/union.sql | 28 ++++++++++ 4 files changed, 208 insertions(+), 30 deletions(-) diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 68b2204b3b..c301e41a4a 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -33,6 +33,7 @@ static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, RelOptInfo *partrel, int partkeycol); +static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); @@ -771,9 +772,11 @@ build_expression_pathkey(PlannerInfo *root, * 'subquery_pathkeys': the subquery's output pathkeys, in its terms. * 'subquery_tlist': the subquery's output targetlist, in its terms. * - * It is not necessary for caller to do truncate_useless_pathkeys(), - * because we select keys in a way that takes usefulness of the keys into - * account. + * We intentionally don't do truncate_useless_pathkeys() here, because there + * are situations where seeing the raw ordering of the subquery is helpful. + * For example, if it returns ORDER BY x DESC, that may prompt us to + * construct a mergejoin using DESC order rather than ASC order; but the + * right_merge_direction heuristic would have us throw the knowledge away. */ List * convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, @@ -799,22 +802,22 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, * that same targetlist entry. */ TargetEntry *tle; + Var *outer_var; if (sub_eclass->ec_sortref == 0) /* can't happen */ elog(ERROR, "volatile EquivalenceClass has no sortref"); tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist); Assert(tle); - /* resjunk items aren't visible to outer query */ - if (!tle->resjunk) + /* Is TLE actually available to the outer query? */ + outer_var = find_var_for_subquery_tle(rel, tle); + if (outer_var) { /* We can represent this sub_pathkey */ EquivalenceMember *sub_member; - Expr *outer_expr; EquivalenceClass *outer_ec; Assert(list_length(sub_eclass->ec_members) == 1); sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members); - outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, tle); /* * Note: it might look funny to be setting sortref = 0 for a @@ -828,7 +831,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, */ outer_ec = get_eclass_for_sort_expr(root, - outer_expr, + (Expr *) outer_var, NULL, sub_eclass->ec_opfamilies, sub_member->em_datatype, @@ -885,14 +888,15 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, foreach(k, subquery_tlist) { TargetEntry *tle = (TargetEntry *) lfirst(k); + Var *outer_var; Expr *tle_expr; - Expr *outer_expr; EquivalenceClass *outer_ec; PathKey *outer_pk; int score; - /* resjunk items aren't visible to outer query */ - if (tle->resjunk) + /* Is TLE actually available to the outer query? */ + outer_var = find_var_for_subquery_tle(rel, tle); + if (!outer_var) continue; /* @@ -907,16 +911,9 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, if (!equal(tle_expr, sub_expr)) continue; - /* - * Build a representation of this targetlist entry as an - * outer Var. - */ - outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, - tle); - - /* See if we have a matching EC for that */ + /* See if we have a matching EC for the TLE */ outer_ec = get_eclass_for_sort_expr(root, - outer_expr, + (Expr *) outer_var, NULL, sub_eclass->ec_opfamilies, sub_expr_type, @@ -973,6 +970,41 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, return retval; } +/* + * find_var_for_subquery_tle + * + * If the given subquery tlist entry is due to be emitted by the subquery's + * scan node, return a Var for it, else return NULL. + * + * We need this to ensure that we don't return pathkeys describing values + * that are unavailable above the level of the subquery scan. + */ +static Var * +find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle) +{ + ListCell *lc; + + /* If the TLE is resjunk, it's certainly not visible to the outer query */ + if (tle->resjunk) + return NULL; + + /* Search the rel's targetlist to see what it will return */ + foreach(lc, rel->reltarget->exprs) + { + Var *var = (Var *) lfirst(lc); + + /* Ignore placeholders */ + if (!IsA(var, Var)) + continue; + Assert(var->varno == rel->relid); + + /* If we find a Var referencing this TLE, we're good */ + if (var->varattno == tle->resno) + return copyObject(var); /* Make a copy for safety */ + } + return NULL; +} + /* * build_join_pathkeys * Build the path keys for a join relation constructed by mergejoin or diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index efe073a3ee..270c11901b 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -81,8 +81,10 @@ static List *get_gating_quals(PlannerInfo *root, List *quals); static Plan *create_gating_plan(PlannerInfo *root, Path *path, Plan *plan, List *gating_quals); static Plan *create_join_plan(PlannerInfo *root, JoinPath *best_path); -static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path); -static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path); +static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path, + int flags); +static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, + int flags); static Result *create_group_result_plan(PlannerInfo *root, GroupResultPath *best_path); static ProjectSet *create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path); @@ -390,11 +392,13 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) break; case T_Append: plan = create_append_plan(root, - (AppendPath *) best_path); + (AppendPath *) best_path, + flags); break; case T_MergeAppend: plan = create_merge_append_plan(root, - (MergeAppendPath *) best_path); + (MergeAppendPath *) best_path, + flags); break; case T_Result: if (IsA(best_path, ProjectionPath)) @@ -1054,10 +1058,12 @@ create_join_plan(PlannerInfo *root, JoinPath *best_path) * Returns a Plan node. */ static Plan * -create_append_plan(PlannerInfo *root, AppendPath *best_path) +create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) { Append *plan; List *tlist = build_path_tlist(root, &best_path->path); + int orig_tlist_length = list_length(tlist); + bool tlist_was_changed = false; List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; @@ -1112,7 +1118,12 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) if (pathkeys != NIL) { - /* Compute sort column info, and adjust the Append's tlist as needed */ + /* + * Compute sort column info, and adjust the Append's tlist as needed. + * Because we pass adjust_tlist_in_place = true, we may ignore the + * function result; it must be the same plan node. However, we then + * need to detect whether any tlist entries were added. + */ (void) prepare_sort_from_pathkeys((Plan *) plan, pathkeys, best_path->path.parent->relids, NULL, @@ -1122,6 +1133,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) &nodeSortOperators, &nodeCollations, &nodeNullsFirst); + tlist_was_changed = (orig_tlist_length != list_length(plan->plan.targetlist)); } /* Build the plan for each child */ @@ -1231,7 +1243,20 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) copy_generic_path_info(&plan->plan, (Path *) best_path); - return (Plan *) plan; + /* + * If prepare_sort_from_pathkeys added sort columns, but we were told to + * produce either the exact tlist or a narrow tlist, we should get rid of + * the sort columns again. We must inject a projection node to do so. + */ + if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST))) + { + tlist = list_truncate(list_copy(plan->plan.targetlist), + orig_tlist_length); + return inject_projection_plan((Plan *) plan, tlist, + plan->plan.parallel_safe); + } + else + return (Plan *) plan; } /* @@ -1242,11 +1267,14 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) * Returns a Plan node. */ static Plan * -create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) +create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, + int flags) { MergeAppend *node = makeNode(MergeAppend); Plan *plan = &node->plan; List *tlist = build_path_tlist(root, &best_path->path); + int orig_tlist_length = list_length(tlist); + bool tlist_was_changed; List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; @@ -1265,7 +1293,12 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) plan->lefttree = NULL; plan->righttree = NULL; - /* Compute sort column info, and adjust MergeAppend's tlist as needed */ + /* + * Compute sort column info, and adjust MergeAppend's tlist as needed. + * Because we pass adjust_tlist_in_place = true, we may ignore the + * function result; it must be the same plan node. However, we then need + * to detect whether any tlist entries were added. + */ (void) prepare_sort_from_pathkeys(plan, pathkeys, best_path->path.parent->relids, NULL, @@ -1275,6 +1308,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) &node->sortOperators, &node->collations, &node->nullsFirst); + tlist_was_changed = (orig_tlist_length != list_length(plan->targetlist)); /* * Now prepare the child plans. We must apply prepare_sort_from_pathkeys @@ -1371,7 +1405,18 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) node->mergeplans = subplans; node->part_prune_info = partpruneinfo; - return (Plan *) node; + /* + * If prepare_sort_from_pathkeys added sort columns, but we were told to + * produce either the exact tlist or a narrow tlist, we should get rid of + * the sort columns again. We must inject a projection node to do so. + */ + if (tlist_was_changed && (flags & (CP_EXACT_TLIST | CP_SMALL_TLIST))) + { + tlist = list_truncate(list_copy(plan->targetlist), orig_tlist_length); + return inject_projection_plan(plan, tlist, plan->parallel_safe); + } + else + return plan; } /* diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index da70438951..7189f5bd3d 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -915,6 +915,79 @@ ORDER BY x; 2 | 4 (1 row) +-- Test cases where the native ordering of a sub-select has more pathkeys +-- than the outer query cares about +explain (costs off) +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where q2 = q2; + QUERY PLAN +---------------------------------------------------------- + Unique + -> Merge Append + Sort Key: "*SELECT* 1".q1 + -> Subquery Scan on "*SELECT* 1" + -> Unique + -> Sort + Sort Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: (q2 IS NOT NULL) + -> Subquery Scan on "*SELECT* 2" + -> Unique + -> Sort + Sort Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: (q2 IS NOT NULL) +(15 rows) + +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where q2 = q2; + q1 +------------------ + 123 + 4567890123456789 +(2 rows) + +explain (costs off) +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where -q1 = q2; + QUERY PLAN +-------------------------------------------------------- + Unique + -> Merge Append + Sort Key: "*SELECT* 1".q1 + -> Subquery Scan on "*SELECT* 1" + -> Unique + -> Sort + Sort Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: ((- q1) = q2) + -> Subquery Scan on "*SELECT* 2" + -> Unique + -> Sort + Sort Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: ((- q1) = q2) +(15 rows) + +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where -q1 = q2; + q1 +------------------ + 4567890123456789 +(1 row) + -- Test proper handling of parameterized appendrel paths when the -- potential join qual is expensive create function expensivefunc(int) returns int diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql index eed7c8d34b..5f4881d594 100644 --- a/src/test/regress/sql/union.sql +++ b/src/test/regress/sql/union.sql @@ -379,6 +379,34 @@ SELECT * FROM WHERE x > 3 ORDER BY x; +-- Test cases where the native ordering of a sub-select has more pathkeys +-- than the outer query cares about +explain (costs off) +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where q2 = q2; + +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where q2 = q2; + +explain (costs off) +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where -q1 = q2; + +select distinct q1 from + (select distinct * from int8_tbl i81 + union all + select distinct * from int8_tbl i82) ss +where -q1 = q2; + -- Test proper handling of parameterized appendrel paths when the -- potential join qual is expensive create function expensivefunc(int) returns int -- 2.40.0