From 8309d006cbd2cca15a5f1be69644b91f2da5eb9e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 22 Nov 2008 22:47:06 +0000 Subject: [PATCH] Switch the planner over to treating qualifications of a JOIN_SEMI join as though it is an inner rather than outer join type. This essentially means that we don't bother to separate "pushed down" qual conditions from actual join quals at a semijoin plan node; which is okay because the restrictions of SQL syntax make it impossible to have a pushed-down qual that references the inner side of a semijoin. This allows noticeably better optimization of IN/EXISTS cases than we had before, since the equivalence-class machinery can now use those quals. Also fix a couple of other mistakes that had essentially disabled the ability to unique-ify the inner relation and then join it to just a subset of the left-hand relations. An example case using the regression database is select * from tenk1 a, tenk1 b where (a.unique1,b.unique2) in (select unique1,unique2 from tenk1 c); which is planned reasonably well by 8.3 and earlier but had been forcing a cartesian join of a/b in CVS HEAD. --- src/backend/optimizer/path/costsize.c | 4 +- src/backend/optimizer/path/indxpath.c | 4 +- src/backend/optimizer/path/joinpath.c | 4 +- src/backend/optimizer/path/joinrels.c | 57 ++++++++++++++++++++++---- src/backend/optimizer/plan/initsplan.c | 28 ++++++++----- src/include/nodes/nodes.h | 24 +++++++---- 6 files changed, 87 insertions(+), 34 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index ba33da829a..0b9c581982 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.200 2008/10/21 20:42:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.201 2008/11/22 22:47:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2481,7 +2481,7 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, break; case JOIN_SEMI: nrows = outer_rel->rows * jselec; - nrows *= pselec; + /* pselec not used */ break; case JOIN_ANTI: nrows = outer_rel->rows * (1.0 - jselec); diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 297c50389f..5d1e315492 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.233 2008/09/12 14:56:13 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.234 2008/11/22 22:47:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1647,10 +1647,10 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, switch (jointype) { case JOIN_INNER: + case JOIN_SEMI: isouterjoin = false; break; case JOIN_LEFT: - case JOIN_SEMI: case JOIN_ANTI: isouterjoin = true; break; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index eaf0ffa427..fe70df49ad 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.118 2008/10/04 21:56:53 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.119 2008/11/22 22:47:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -744,12 +744,12 @@ hash_inner_and_outer(PlannerInfo *root, switch (jointype) { case JOIN_INNER: + case JOIN_SEMI: case JOIN_UNIQUE_OUTER: case JOIN_UNIQUE_INNER: isouterjoin = false; break; case JOIN_LEFT: - case JOIN_SEMI: case JOIN_ANTI: isouterjoin = true; break; diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 9ac0cc9743..5f49ee7e29 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.94 2008/08/17 19:40:11 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.95 2008/11/22 22:47:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -419,6 +419,27 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, match_sjinfo = sjinfo; reversed = true; } + else if (sjinfo->jointype == JOIN_SEMI && + bms_equal(sjinfo->syn_righthand, rel2->relids)) + { + /* + * For a semijoin, we can join the RHS to anything else by + * unique-ifying the RHS. + */ + if (match_sjinfo) + return false; /* invalid join path */ + match_sjinfo = sjinfo; + reversed = false; + } + else if (sjinfo->jointype == JOIN_SEMI && + bms_equal(sjinfo->syn_righthand, rel1->relids)) + { + /* Reversed semijoin case */ + if (match_sjinfo) + return false; /* invalid join path */ + match_sjinfo = sjinfo; + reversed = true; + } else { /*---------- @@ -444,14 +465,24 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, * We assume that make_outerjoininfo() set things up correctly * so that we'll only match to some SJ if the join is valid. * Set flag here to check at bottom of loop. + * + * For a semijoin, assume it's okay if either side fully contains + * the RHS (per the unique-ification case above). *---------- */ - if (bms_overlap(rel1->relids, sjinfo->min_righthand) && + if (sjinfo->jointype != JOIN_SEMI && + bms_overlap(rel1->relids, sjinfo->min_righthand) && bms_overlap(rel2->relids, sjinfo->min_righthand)) { /* seems OK */ Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand)); } + else if (sjinfo->jointype == JOIN_SEMI && + (bms_is_subset(sjinfo->syn_righthand, rel1->relids) || + bms_is_subset(sjinfo->syn_righthand, rel2->relids))) + { + /* seems OK */ + } else is_valid_inner = false; } @@ -612,15 +643,23 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) restrictlist); break; case JOIN_SEMI: - if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || - restriction_is_constant_false(restrictlist)) + /* + * Do these steps only if we actually have a regular semijoin, + * as opposed to a case where we should unique-ify the RHS. + */ + if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && + bms_is_subset(sjinfo->min_righthand, rel2->relids)) { - mark_dummy_rel(joinrel); - break; + if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || + restriction_is_constant_false(restrictlist)) + { + mark_dummy_rel(joinrel); + break; + } + add_paths_to_joinrel(root, joinrel, rel1, rel2, + JOIN_SEMI, sjinfo, + restrictlist); } - add_paths_to_joinrel(root, joinrel, rel1, rel2, - JOIN_SEMI, sjinfo, - restrictlist); /* * If we know how to unique-ify the RHS and one input rel is diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index d1b590d063..1764656da3 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.144 2008/10/25 19:51:32 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.145 2008/11/22 22:47:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -851,16 +851,11 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, maybe_equivalence = false; maybe_outer_join = false; } - else if (bms_overlap(relids, outerjoin_nonnullable) && - (jointype != JOIN_SEMI || - bms_nonempty_difference(relids, outerjoin_nonnullable))) + else if (bms_overlap(relids, outerjoin_nonnullable)) { /* * The qual is attached to an outer join and mentions (some of the) - * rels on the nonnullable side, so it's not degenerate. (For a - * JOIN_SEMI qual, we consider it non-degenerate only if it mentions - * both sides of the join --- if it mentions only one side, it can - * be pushed down.) + * rels on the nonnullable side, so it's not degenerate. * * We can't use such a clause to deduce equivalence (the left and * right sides might be unequal above the join because one of them has @@ -1062,6 +1057,7 @@ distribute_sublink_quals_to_rels(PlannerInfo *root, SpecialJoinInfo *sjinfo; Relids qualscope; Relids ojscope; + Relids outerjoin_nonnullable; ListCell *l; /* @@ -1076,17 +1072,27 @@ distribute_sublink_quals_to_rels(PlannerInfo *root, fslink->jointype, quals); + /* Treat as inner join if SEMI, outer join if ANTI */ qualscope = bms_union(sjinfo->syn_lefthand, sjinfo->syn_righthand); - ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + if (fslink->jointype == JOIN_SEMI) + { + ojscope = outerjoin_nonnullable = NULL; + } + else + { + Assert(fslink->jointype == JOIN_ANTI); + ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + outerjoin_nonnullable = fslink->lefthand; + } - /* Distribute the join quals much as for a regular LEFT JOIN */ + /* Distribute the join quals much as for a regular JOIN node */ foreach(l, quals) { Node *qual = (Node *) lfirst(l); distribute_qual_to_rels(root, qual, false, below_outer_join, fslink->jointype, - qualscope, ojscope, fslink->lefthand); + qualscope, ojscope, outerjoin_nonnullable); } /* Now we can add the SpecialJoinInfo to join_info_list */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index ddd9934cf1..4161c08ec5 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.214 2008/10/21 20:42:53 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.215 2008/11/22 22:47:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -542,15 +542,23 @@ typedef enum JoinType /* * OUTER joins are those for which pushed-down quals must behave differently - * from the join's own quals. This is in fact everything except INNER joins. - * However, this macro must also exclude the JOIN_UNIQUE symbols since those - * are temporary proxies for what will eventually be an INNER join. + * from the join's own quals. This is in fact everything except INNER and + * SEMI joins. However, this macro must also exclude the JOIN_UNIQUE symbols + * since those are temporary proxies for what will eventually be an INNER + * join. * - * Note: in some places it is preferable to treat JOIN_SEMI as not being - * an outer join, since it doesn't produce null-extended rows. Be aware - * of that distinction when deciding whether to use this macro. + * Note: semijoins are a hybrid case, but we choose to treat them as not + * being outer joins. This is okay principally because the SQL syntax makes + * it impossible to have a pushed-down qual that refers to the inner relation + * of a semijoin; so there is no strong need to distinguish join quals from + * pushed-down quals. This is convenient because for almost all purposes, + * quals attached to a semijoin can be treated the same as innerjoin quals. */ #define IS_OUTER_JOIN(jointype) \ - ((jointype) > JOIN_INNER && (jointype) < JOIN_UNIQUE_OUTER) + (((1 << (jointype)) & \ + ((1 << JOIN_LEFT) | \ + (1 << JOIN_FULL) | \ + (1 << JOIN_RIGHT) | \ + (1 << JOIN_ANTI))) != 0) #endif /* NODES_H */ -- 2.40.0