From 0816fad6eebddb8f1f0e21635e46625815d690b9 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 27 Jan 2012 19:46:41 -0500 Subject: [PATCH] Undo 8.4-era lobotomization of subquery pullup rules. After the planner was fixed to convert some IN/EXISTS subqueries into semijoins or antijoins, we had to prevent it from doing that in some cases where the plans risked getting much worse. The reason the plans got worse was that in the unoptimized implementation, subqueries could reference parameters from the outer query at any join level, and so full table scans could be avoided even if they were one or more levels of join below where the semi/anti join would be. Now that we have sufficient mechanism in the planner to handle such cases properly, it should no longer be necessary to play dumb here. This reverts commits 07b9936a0f10d746e5076239813a5e938f2f16be and cd1f0d04bf06938c0ee5728fc8424d62bcf2eef3. The latter was a stopgap fix that wasn't really sufficiently analyzed at the time. Rather than just restricting ourselves to cases where the new join can be stacked on the right-hand input, we should also consider whether it can be stacked on the left-hand input. --- src/backend/optimizer/prep/prepjointree.c | 218 +++++++++++++++------- 1 file changed, 149 insertions(+), 69 deletions(-) diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index b412c689b6..59a52687f9 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -58,7 +58,8 @@ typedef struct reduce_outer_joins_state static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, Relids *relids); static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, - Relids available_rels, Node **jtlink); + Node **jtlink1, Relids available_rels1, + Node **jtlink2, Relids available_rels2); static Node *pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, JoinExpr *lowest_outer_join, @@ -192,8 +193,9 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, /* Set up a link representing the rebuilt jointree */ jtlink = (Node *) newf; /* Now process qual --- all children are available for use */ - newf->quals = pull_up_sublinks_qual_recurse(root, f->quals, frelids, - &jtlink); + newf->quals = pull_up_sublinks_qual_recurse(root, f->quals, + &jtlink, frelids, + NULL, NULL); /* * Note that the result will be either newf, or a stack of JoinExprs @@ -237,16 +239,6 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, * point of the available_rels machinations is to ensure that we only * pull up quals for which that's okay. * - * XXX for the moment, we refrain from pulling up IN/EXISTS clauses - * appearing in LEFT or RIGHT join conditions. Although it is - * semantically valid to do so under the above conditions, we end up - * with a query in which the semijoin or antijoin must be evaluated - * below the outer join, which could perform far worse than leaving it - * as a sublink that is executed only for row pairs that meet the - * other join conditions. Fixing this seems to require considerable - * restructuring of the executor, but maybe someday it can happen. - * (See also the comparable case in pull_up_sublinks_qual_recurse.) - * * We don't expect to see any pre-existing JOIN_SEMI or JOIN_ANTI * nodes here. */ @@ -254,26 +246,25 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, { case JOIN_INNER: j->quals = pull_up_sublinks_qual_recurse(root, j->quals, + &jtlink, bms_union(leftrelids, rightrelids), - &jtlink); + NULL, NULL); break; case JOIN_LEFT: -#ifdef NOT_USED /* see XXX comment above */ j->quals = pull_up_sublinks_qual_recurse(root, j->quals, + &j->rarg, rightrelids, - &j->rarg); -#endif + NULL, NULL); break; case JOIN_FULL: /* can't do anything with full-join quals */ break; case JOIN_RIGHT: -#ifdef NOT_USED /* see XXX comment above */ j->quals = pull_up_sublinks_qual_recurse(root, j->quals, + &j->larg, leftrelids, - &j->larg); -#endif + NULL, NULL); break; default: elog(ERROR, "unrecognized join type: %d", @@ -303,14 +294,22 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, /* * Recurse through top-level qual nodes for pull_up_sublinks() * - * jtlink points to the link in the jointree where any new JoinExprs should be - * inserted. If we find multiple pull-up-able SubLinks, they'll get stacked - * there in the order we encounter them. We rely on subsequent optimization - * to rearrange the stack if appropriate. + * jtlink1 points to the link in the jointree where any new JoinExprs should + * be inserted if they reference available_rels1 (i.e., available_rels1 + * denotes the relations present underneath jtlink1). Optionally, jtlink2 can + * point to a second link where new JoinExprs should be inserted if they + * reference available_rels2 (pass NULL for both those arguments if not used). + * Note that SubLinks referencing both sets of variables cannot be optimized. + * If we find multiple pull-up-able SubLinks, they'll get stacked onto jtlink1 + * and/or jtlink2 in the order we encounter them. We rely on subsequent + * optimization to rearrange the stack if appropriate. + * + * Returns the replacement qual node, or NULL if the qual should be removed. */ static Node * pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, - Relids available_rels, Node **jtlink) + Node **jtlink1, Relids available_rels1, + Node **jtlink2, Relids available_rels2) { if (node == NULL) return NULL; @@ -323,45 +322,105 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, /* Is it a convertible ANY or EXISTS clause? */ if (sublink->subLinkType == ANY_SUBLINK) { - j = convert_ANY_sublink_to_join(root, sublink, - available_rels); - if (j) + if ((j = convert_ANY_sublink_to_join(root, sublink, + available_rels1)) != NULL) + { + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink1; + *jtlink1 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ + j->rarg = pull_up_sublinks_jointree_recurse(root, + j->rarg, + &child_rels); + /* + * Now recursively process the pulled-up quals. Any inserted + * joins can get stacked onto either j->larg or j->rarg, + * depending on which rels they reference. + */ + j->quals = pull_up_sublinks_qual_recurse(root, + j->quals, + &j->larg, + available_rels1, + &j->rarg, + child_rels); + /* Return NULL representing constant TRUE */ + return NULL; + } + if (available_rels2 != NULL && + (j = convert_ANY_sublink_to_join(root, sublink, + available_rels2)) != NULL) { - /* Yes; recursively process what we pulled up */ + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink2; + *jtlink2 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg, &child_rels); - /* Any inserted joins get stacked onto j->rarg */ + /* + * Now recursively process the pulled-up quals. Any inserted + * joins can get stacked onto either j->larg or j->rarg, + * depending on which rels they reference. + */ j->quals = pull_up_sublinks_qual_recurse(root, j->quals, - child_rels, - &j->rarg); - /* Now insert the new join node into the join tree */ - j->larg = *jtlink; - *jtlink = (Node *) j; - /* and return NULL representing constant TRUE */ + &j->larg, + available_rels2, + &j->rarg, + child_rels); + /* Return NULL representing constant TRUE */ return NULL; } } else if (sublink->subLinkType == EXISTS_SUBLINK) { - j = convert_EXISTS_sublink_to_join(root, sublink, false, - available_rels); - if (j) + if ((j = convert_EXISTS_sublink_to_join(root, sublink, false, + available_rels1)) != NULL) { - /* Yes; recursively process what we pulled up */ + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink1; + *jtlink1 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg, &child_rels); - /* Any inserted joins get stacked onto j->rarg */ + /* + * Now recursively process the pulled-up quals. Any inserted + * joins can get stacked onto either j->larg or j->rarg, + * depending on which rels they reference. + */ j->quals = pull_up_sublinks_qual_recurse(root, j->quals, - child_rels, - &j->rarg); - /* Now insert the new join node into the join tree */ - j->larg = *jtlink; - *jtlink = (Node *) j; - /* and return NULL representing constant TRUE */ + &j->larg, + available_rels1, + &j->rarg, + child_rels); + /* Return NULL representing constant TRUE */ + return NULL; + } + if (available_rels2 != NULL && + (j = convert_EXISTS_sublink_to_join(root, sublink, false, + available_rels2)) != NULL) + { + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink2; + *jtlink2 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ + j->rarg = pull_up_sublinks_jointree_recurse(root, + j->rarg, + &child_rels); + /* + * Now recursively process the pulled-up quals. Any inserted + * joins can get stacked onto either j->larg or j->rarg, + * depending on which rels they reference. + */ + j->quals = pull_up_sublinks_qual_recurse(root, + j->quals, + &j->larg, + available_rels2, + &j->rarg, + child_rels); + /* Return NULL representing constant TRUE */ return NULL; } } @@ -373,40 +432,59 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, /* If the immediate argument of NOT is EXISTS, try to convert */ SubLink *sublink = (SubLink *) get_notclausearg((Expr *) node); JoinExpr *j; + Relids child_rels; if (sublink && IsA(sublink, SubLink)) { if (sublink->subLinkType == EXISTS_SUBLINK) { - j = convert_EXISTS_sublink_to_join(root, sublink, true, - available_rels); - if (j) + if ((j = convert_EXISTS_sublink_to_join(root, sublink, true, + available_rels1)) != NULL) { + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink1; + *jtlink1 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ + j->rarg = pull_up_sublinks_jointree_recurse(root, + j->rarg, + &child_rels); /* - * For the moment, refrain from recursing underneath NOT. - * As in pull_up_sublinks_jointree_recurse, recursing here - * would result in inserting a join underneath an ANTI - * join with which it could not commute, and that could - * easily lead to a worse plan than what we've - * historically generated. + * Now recursively process the pulled-up quals. Because + * we are underneath a NOT, we can't pull up sublinks + * that reference the left-hand stuff, but it's still + * okay to pull up sublinks referencing j->rarg. */ -#ifdef NOT_USED - /* Yes; recursively process what we pulled up */ - Relids child_rels; - + j->quals = pull_up_sublinks_qual_recurse(root, + j->quals, + &j->rarg, + child_rels, + NULL, NULL); + /* Return NULL representing constant TRUE */ + return NULL; + } + if (available_rels2 != NULL && + (j = convert_EXISTS_sublink_to_join(root, sublink, true, + available_rels2)) != NULL) + { + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink2; + *jtlink2 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg, &child_rels); - /* Any inserted joins get stacked onto j->rarg */ + /* + * Now recursively process the pulled-up quals. Because + * we are underneath a NOT, we can't pull up sublinks + * that reference the left-hand stuff, but it's still + * okay to pull up sublinks referencing j->rarg. + */ j->quals = pull_up_sublinks_qual_recurse(root, j->quals, + &j->rarg, child_rels, - &j->rarg); -#endif - /* Now insert the new join node into the join tree */ - j->larg = *jtlink; - *jtlink = (Node *) j; - /* and return NULL representing constant TRUE */ + NULL, NULL); + /* Return NULL representing constant TRUE */ return NULL; } } @@ -427,8 +505,10 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, newclause = pull_up_sublinks_qual_recurse(root, oldclause, - available_rels, - jtlink); + jtlink1, + available_rels1, + jtlink2, + available_rels2); if (newclause) newclauses = lappend(newclauses, newclause); } -- 2.40.0