]> granicus.if.org Git - postgresql/commitdiff
Basic partition-wise join functionality.
authorRobert Haas <rhaas@postgresql.org>
Fri, 6 Oct 2017 15:11:10 +0000 (11:11 -0400)
committerRobert Haas <rhaas@postgresql.org>
Fri, 6 Oct 2017 15:11:10 +0000 (11:11 -0400)
Instead of joining two partitioned tables in their entirety we can, if
it is an equi-join on the partition keys, join the matching partitions
individually.  This involves teaching the planner about "other join"
rels, which are related to regular join rels in the same way that
other member rels are related to baserels.  This can use significantly
more CPU time and memory than regular join planning, because there may
now be a set of "other" rels not only for every base relation but also
for every join relation.  In most practical cases, this probably
shouldn't be a problem, because (1) it's probably unusual to join many
tables each with many partitions using the partition keys for all
joins and (2) if you do that scenario then you probably have a big
enough machine to handle the increased memory cost of planning and (3)
the resulting plan is highly likely to be better, so what you spend in
planning you'll make up on the execution side.  All the same, for now,
turn this feature off by default.

Currently, we can only perform joins between two tables whose
partitioning schemes are absolutely identical.  It would be nice to
cope with other scenarios, such as extra partitions on one side or the
other with no match on the other side, but that will have to wait for
a future patch.

Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit
Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit
Khandekar, and by me.  A few final adjustments by me.

Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com
Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com

34 files changed:
contrib/postgres_fdw/expected/postgres_fdw.out
contrib/postgres_fdw/sql/postgres_fdw.sql
doc/src/sgml/config.sgml
doc/src/sgml/fdwhandler.sgml
src/backend/optimizer/README
src/backend/optimizer/geqo/geqo_eval.c
src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/joinpath.c
src/backend/optimizer/path/joinrels.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/plan/setrefs.c
src/backend/optimizer/prep/prepunion.c
src/backend/optimizer/util/pathnode.c
src/backend/optimizer/util/placeholder.c
src/backend/optimizer/util/plancat.c
src/backend/optimizer/util/relnode.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/include/foreign/fdwapi.h
src/include/nodes/extensible.h
src/include/nodes/relation.h
src/include/optimizer/cost.h
src/include/optimizer/pathnode.h
src/include/optimizer/paths.h
src/include/optimizer/placeholder.h
src/include/optimizer/planner.h
src/include/optimizer/prep.h
src/test/regress/expected/partition_join.out [new file with mode: 0644]
src/test/regress/expected/sysviews.out
src/test/regress/parallel_schedule
src/test/regress/serial_schedule
src/test/regress/sql/partition_join.sql [new file with mode: 0644]

index c19b3318c7259ca742c740d92414da897bf88efc..4339bbf9df66c186283b7e1c33b3512fd988acc2 100644 (file)
@@ -7346,3 +7346,123 @@ AND ftoptions @> array['fetch_size=60000'];
 (1 row)
 
 ROLLBACK;
+-- ===================================================================
+-- test partition-wise-joins
+-- ===================================================================
+SET enable_partition_wise_join=on;
+CREATE TABLE fprt1 (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE fprt1_p1 (LIKE fprt1);
+CREATE TABLE fprt1_p2 (LIKE fprt1);
+INSERT INTO fprt1_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 2) i;
+INSERT INTO fprt1_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 2) i;
+CREATE FOREIGN TABLE ftprt1_p1 PARTITION OF fprt1 FOR VALUES FROM (0) TO (250)
+       SERVER loopback OPTIONS (table_name 'fprt1_p1', use_remote_estimate 'true');
+CREATE FOREIGN TABLE ftprt1_p2 PARTITION OF fprt1 FOR VALUES FROM (250) TO (500)
+       SERVER loopback OPTIONS (TABLE_NAME 'fprt1_p2');
+ANALYZE fprt1;
+ANALYZE fprt1_p1;
+ANALYZE fprt1_p2;
+CREATE TABLE fprt2 (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE fprt2_p1 (LIKE fprt2);
+CREATE TABLE fprt2_p2 (LIKE fprt2);
+INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i;
+INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i;
+CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250)
+       SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true');
+CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500)
+       SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true');
+ANALYZE fprt2;
+ANALYZE fprt2_p1;
+ANALYZE fprt2_p2;
+-- inner join three tables
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
+                                                     QUERY PLAN                                                     
+--------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a, t3.c
+   ->  Append
+         ->  Foreign Scan
+               Relations: ((public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2)) INNER JOIN (public.ftprt1_p1 t3)
+         ->  Foreign Scan
+               Relations: ((public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2)) INNER JOIN (public.ftprt1_p2 t3)
+(7 rows)
+
+SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
+  a  |  b  |  c   
+-----+-----+------
+   0 |   0 | 0000
+ 150 | 150 | 0003
+ 250 | 250 | 0005
+ 400 | 400 | 0008
+(4 rows)
+
+-- left outer join + nullable clasue
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a, ftprt2_p1.b, ftprt2_p1.c
+   ->  Append
+         ->  Foreign Scan
+               Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
+(5 rows)
+
+SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
+ a | b |  c   
+---+---+------
+ 0 | 0 | 0000
+ 2 |   | 
+ 4 |   | 
+ 6 | 6 | 0000
+ 8 |   | 
+(5 rows)
+
+-- with whole-row reference
+EXPLAIN (COSTS OFF)
+SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Sort
+   Sort Key: ((t1.*)::fprt1), ((t2.*)::fprt2)
+   ->  Append
+         ->  Foreign Scan
+               Relations: (public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2)
+         ->  Foreign Scan
+               Relations: (public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2)
+(7 rows)
+
+SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
+       t1       |       t2       
+----------------+----------------
+ (0,0,0000)     | (0,0,0000)
+ (150,150,0003) | (150,150,0003)
+ (250,250,0005) | (250,250,0005)
+ (400,400,0008) | (400,400,0008)
+(4 rows)
+
+-- join with lateral reference
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a, t1.b
+   ->  Append
+         ->  Foreign Scan
+               Relations: (public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2)
+         ->  Foreign Scan
+               Relations: (public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2)
+(7 rows)
+
+SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
+  a  |  b  
+-----+-----
+   0 |   0
+ 150 | 150
+ 250 | 250
+ 400 | 400
+(4 rows)
+
+RESET enable_partition_wise_join;
index 5f65d9d966ce55e3e472e632c981b6ae902dffca..ddfec7930d50530950e58c8ab424321f958305e2 100644 (file)
@@ -1764,3 +1764,56 @@ WHERE ftrelid = 'table30000'::regclass
 AND ftoptions @> array['fetch_size=60000'];
 
 ROLLBACK;
+
+-- ===================================================================
+-- test partition-wise-joins
+-- ===================================================================
+SET enable_partition_wise_join=on;
+
+CREATE TABLE fprt1 (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE fprt1_p1 (LIKE fprt1);
+CREATE TABLE fprt1_p2 (LIKE fprt1);
+INSERT INTO fprt1_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 2) i;
+INSERT INTO fprt1_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 2) i;
+CREATE FOREIGN TABLE ftprt1_p1 PARTITION OF fprt1 FOR VALUES FROM (0) TO (250)
+       SERVER loopback OPTIONS (table_name 'fprt1_p1', use_remote_estimate 'true');
+CREATE FOREIGN TABLE ftprt1_p2 PARTITION OF fprt1 FOR VALUES FROM (250) TO (500)
+       SERVER loopback OPTIONS (TABLE_NAME 'fprt1_p2');
+ANALYZE fprt1;
+ANALYZE fprt1_p1;
+ANALYZE fprt1_p2;
+
+CREATE TABLE fprt2 (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE fprt2_p1 (LIKE fprt2);
+CREATE TABLE fprt2_p2 (LIKE fprt2);
+INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i;
+INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i;
+CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250)
+       SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true');
+CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500)
+       SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true');
+ANALYZE fprt2;
+ANALYZE fprt2_p1;
+ANALYZE fprt2_p2;
+
+-- inner join three tables
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
+SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
+
+-- left outer join + nullable clasue
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
+SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
+
+-- with whole-row reference
+EXPLAIN (COSTS OFF)
+SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
+SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
+
+-- join with lateral reference
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
+SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
+
+RESET enable_partition_wise_join;
index c13f60230f9a80904f43705e41637e8bd3d88377..b012a2699117816dc039bc13fb8403ba56ba3f98 100644 (file)
@@ -3632,6 +3632,26 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-enable-partition-wise-join" xreflabel="enable_partition_wise_join">
+      <term><varname>enable_partition_wise_join</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>enable_partition_wise_join</> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Enables or disables the query planner's use of partition-wise join,
+        which allows a join between partitioned tables to be performed by
+        joining the matching partitions.  Partition-wise join currently applies
+        only when the join conditions include all the partition keys, which
+        must be of the same data type and have exactly matching sets of child
+        partitions.  Because partition-wise join planning can use significantly
+        more CPU time and memory during planning, the default is
+        <literal>off</>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-enable-seqscan" xreflabel="enable_seqscan">
       <term><varname>enable_seqscan</varname> (<type>boolean</type>)
       <indexterm>
index a59e03af98beff199b6e476518c0df9287580324..e63e29fd96f35c3d783219c9724a28398c21899d 100644 (file)
@@ -1292,6 +1292,26 @@ ShutdownForeignScan(ForeignScanState *node);
    </para>
    </sect2>
 
+   <sect2 id="fdw-callbacks-reparameterize-paths">
+    <title>FDW Routines For reparameterization of paths</title>
+
+    <para>
+<programlisting>
+List *
+ReparameterizeForeignPathByChild(PlannerInfo *root, List *fdw_private,
+                                 RelOptInfo *child_rel);
+</programlisting>
+    This function is called while converting a path parameterized by the
+    top-most parent of the given child relation <literal>child_rel</> to be
+    parameterized by the child relation. The function is used to reparameterize
+    any paths or translate any expression nodes saved in the given
+    <literal>fdw_private</> member of a <structname>ForeignPath</>. The
+    callback may use <literal>reparameterize_path_by_child</>,
+    <literal>adjust_appendrel_attrs</> or
+    <literal>adjust_appendrel_attrs_multilevel</> as required.
+    </para>
+   </sect2>
+
    </sect1>
 
    <sect1 id="fdw-helpers">
index 62242e8564e296eed7920f3a2f1a3bbb4de96dde..031e5037617d98b11d8b5f8b6a7121ff6bd63104 100644 (file)
@@ -1075,3 +1075,29 @@ be desirable to postpone the Gather stage until as near to the top of the
 plan as possible.  Expanding the range of cases in which more work can be
 pushed below the Gather (and costing them accurately) is likely to keep us
 busy for a long time to come.
+
+Partition-wise joins
+--------------------
+A join between two similarly partitioned tables can be broken down into joins
+between their matching partitions if there exists an equi-join condition
+between the partition keys of the joining tables. The equi-join between
+partition keys implies that all join partners for a given row in one
+partitioned table must be in the corresponding partition of the other
+partitioned table. Because of this the join between partitioned tables to be
+broken into joins between the matching partitions. The resultant join is
+partitioned in the same way as the joining relations, thus allowing an N-way
+join between similarly partitioned tables having equi-join condition between
+their partition keys to be broken down into N-way joins between their matching
+partitions. This technique of breaking down a join between partition tables
+into join between their partitions is called partition-wise join. We will use
+term "partitioned relation" for either a partitioned table or a join between
+compatibly partitioned tables.
+
+The partitioning properties of a partitioned relation are stored in its
+RelOptInfo.  The information about data types of partition keys are stored in
+PartitionSchemeData structure. The planner maintains a list of canonical
+partition schemes (distinct PartitionSchemeData objects) so that RelOptInfo of
+any two partitioned relations with same partitioning scheme point to the same
+PartitionSchemeData object.  This reduces memory consumed by
+PartitionSchemeData objects and makes it easy to compare the partition schemes
+of joining relations.
index b5cab0c35189d6c48c0fd9aec1c65dfcba6bf710..3cf268cbd398d11f5a86ee6bda0ef4ae8a14196f 100644 (file)
@@ -264,6 +264,9 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
                        /* Keep searching if join order is not valid */
                        if (joinrel)
                        {
+                               /* Create paths for partition-wise joins. */
+                               generate_partition_wise_join_paths(root, joinrel);
+
                                /* Create GatherPaths for any useful partial paths for rel */
                                generate_gather_paths(root, joinrel);
 
index a7866a99e0080b27b6b41e5fa76bc4db4cc18c2b..5535b63803079d571f8c24ea96340be63371f48a 100644 (file)
@@ -920,12 +920,79 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
                childrel = find_base_rel(root, childRTindex);
                Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
 
+               if (rel->part_scheme)
+               {
+                       AttrNumber      attno;
+
+                       /*
+                        * We need attr_needed data for building targetlist of a join
+                        * relation representing join between matching partitions for
+                        * partition-wise join. A given attribute of a child will be
+                        * needed in the same highest joinrel where the corresponding
+                        * attribute of parent is needed. Hence it suffices to use the
+                        * same Relids set for parent and child.
+                        */
+                       for (attno = rel->min_attr; attno <= rel->max_attr; attno++)
+                       {
+                               int                     index = attno - rel->min_attr;
+                               Relids          attr_needed = rel->attr_needed[index];
+
+                               /* System attributes do not need translation. */
+                               if (attno <= 0)
+                               {
+                                       Assert(rel->min_attr == childrel->min_attr);
+                                       childrel->attr_needed[index] = attr_needed;
+                               }
+                               else
+                               {
+                                       Var                *var = list_nth_node(Var,
+                                                                                                       appinfo->translated_vars,
+                                                                                                       attno - 1);
+                                       int                     child_index;
+
+                                       child_index = var->varattno - childrel->min_attr;
+                                       childrel->attr_needed[child_index] = attr_needed;
+                               }
+                       }
+               }
+
+               /*
+                * Copy/Modify targetlist. Even if this child is deemed empty, we need
+                * its targetlist in case it falls on nullable side in a child-join
+                * because of partition-wise join.
+                *
+                * NB: the resulting childrel->reltarget->exprs may contain arbitrary
+                * expressions, which otherwise would not occur in a rel's targetlist.
+                * Code that might be looking at an appendrel child must cope with
+                * such.  (Normally, a rel's targetlist would only include Vars and
+                * PlaceHolderVars.)  XXX we do not bother to update the cost or width
+                * fields of childrel->reltarget; not clear if that would be useful.
+                */
+               childrel->reltarget->exprs = (List *)
+                       adjust_appendrel_attrs(root,
+                                                                  (Node *) rel->reltarget->exprs,
+                                                                  1, &appinfo);
+
+               /*
+                * We have to make child entries in the EquivalenceClass data
+                * structures as well.  This is needed either if the parent
+                * participates in some eclass joins (because we will want to consider
+                * inner-indexscan joins on the individual children) or if the parent
+                * has useful pathkeys (because we should try to build MergeAppend
+                * paths that produce those sort orderings). Even if this child is
+                * deemed dummy, it may fall on nullable side in a child-join, which
+                * in turn may participate in a MergeAppend, where we will need the
+                * EquivalenceClass data structures.
+                */
+               if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
+                       add_child_rel_equivalences(root, appinfo, rel, childrel);
+               childrel->has_eclass_joins = rel->has_eclass_joins;
+
                /*
-                * We have to copy the parent's targetlist and quals to the child,
-                * with appropriate substitution of variables.  However, only the
-                * baserestrictinfo quals are needed before we can check for
-                * constraint exclusion; so do that first and then check to see if we
-                * can disregard this child.
+                * We have to copy the parent's quals to the child, with appropriate
+                * substitution of variables.  However, only the baserestrictinfo
+                * quals are needed before we can check for constraint exclusion; so
+                * do that first and then check to see if we can disregard this child.
                 *
                 * The child rel's targetlist might contain non-Var expressions, which
                 * means that substitution into the quals could produce opportunities
@@ -1052,44 +1119,11 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
                        continue;
                }
 
-               /*
-                * CE failed, so finish copying/modifying targetlist and join quals.
-                *
-                * NB: the resulting childrel->reltarget->exprs may contain arbitrary
-                * expressions, which otherwise would not occur in a rel's targetlist.
-                * Code that might be looking at an appendrel child must cope with
-                * such.  (Normally, a rel's targetlist would only include Vars and
-                * PlaceHolderVars.)  XXX we do not bother to update the cost or width
-                * fields of childrel->reltarget; not clear if that would be useful.
-                */
+               /* CE failed, so finish copying/modifying join quals. */
                childrel->joininfo = (List *)
                        adjust_appendrel_attrs(root,
                                                                   (Node *) rel->joininfo,
                                                                   1, &appinfo);
-               childrel->reltarget->exprs = (List *)
-                       adjust_appendrel_attrs(root,
-                                                                  (Node *) rel->reltarget->exprs,
-                                                                  1, &appinfo);
-
-               /*
-                * We have to make child entries in the EquivalenceClass data
-                * structures as well.  This is needed either if the parent
-                * participates in some eclass joins (because we will want to consider
-                * inner-indexscan joins on the individual children) or if the parent
-                * has useful pathkeys (because we should try to build MergeAppend
-                * paths that produce those sort orderings).
-                */
-               if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
-                       add_child_rel_equivalences(root, appinfo, rel, childrel);
-               childrel->has_eclass_joins = rel->has_eclass_joins;
-
-               /*
-                * Note: we could compute appropriate attr_needed data for the child's
-                * variables, by transforming the parent's attr_needed through the
-                * translated_vars mapping.  However, currently there's no need
-                * because attr_needed is only examined for base relations not
-                * otherrels.  So we just leave the child's attr_needed empty.
-                */
 
                /*
                 * If parallelism is allowable for this query in general, see whether
@@ -1262,14 +1296,14 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                live_childrels = lappend(live_childrels, childrel);
        }
 
-       /* Add paths to the "append" relation. */
+       /* Add paths to the append relation. */
        add_paths_to_append_rel(root, rel, live_childrels);
 }
 
 
 /*
  * add_paths_to_append_rel
- *             Generate paths for given "append" relation given the set of non-dummy
+ *             Generate paths for the given append relation given the set of non-dummy
  *             child rels.
  *
  * The function collects all parameterizations and orderings supported by the
@@ -1293,30 +1327,44 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
        RangeTblEntry *rte;
        bool            build_partitioned_rels = false;
 
-       /*
-        * A root partition will already have a PartitionedChildRelInfo, and a
-        * non-root partitioned table doesn't need one, because its Append paths
-        * will get flattened into the parent anyway.  For a subquery RTE, no
-        * PartitionedChildRelInfo exists; we collect all partitioned_rels
-        * associated with any child.  (This assumes that we don't need to look
-        * through multiple levels of subquery RTEs; if we ever do, we could
-        * create a PartitionedChildRelInfo with the accumulated list of
-        * partitioned_rels which would then be found when populated our parent
-        * rel with paths.  For the present, that appears to be unnecessary.)
-        */
-       rte = planner_rt_fetch(rel->relid, root);
-       switch (rte->rtekind)
+       if (IS_SIMPLE_REL(rel))
        {
-               case RTE_RELATION:
-                       if (rte->relkind == RELKIND_PARTITIONED_TABLE)
-                               partitioned_rels =
-                                       get_partitioned_child_rels(root, rel->relid);
-                       break;
-               case RTE_SUBQUERY:
-                       build_partitioned_rels = true;
-                       break;
-               default:
-                       elog(ERROR, "unexpected rtekind: %d", (int) rte->rtekind);
+               /*
+                * A root partition will already have a PartitionedChildRelInfo, and a
+                * non-root partitioned table doesn't need one, because its Append
+                * paths will get flattened into the parent anyway.  For a subquery
+                * RTE, no PartitionedChildRelInfo exists; we collect all
+                * partitioned_rels associated with any child.  (This assumes that we
+                * don't need to look through multiple levels of subquery RTEs; if we
+                * ever do, we could create a PartitionedChildRelInfo with the
+                * accumulated list of partitioned_rels which would then be found when
+                * populated our parent rel with paths.  For the present, that appears
+                * to be unnecessary.)
+                */
+               rte = planner_rt_fetch(rel->relid, root);
+               switch (rte->rtekind)
+               {
+                       case RTE_RELATION:
+                               if (rte->relkind == RELKIND_PARTITIONED_TABLE)
+                                       partitioned_rels =
+                                               get_partitioned_child_rels(root, rel->relid);
+                               break;
+                       case RTE_SUBQUERY:
+                               build_partitioned_rels = true;
+                               break;
+                       default:
+                               elog(ERROR, "unexpcted rtekind: %d", (int) rte->rtekind);
+               }
+       }
+       else if (rel->reloptkind == RELOPT_JOINREL && rel->part_scheme)
+       {
+               /*
+                * Associate PartitionedChildRelInfo of the root partitioned tables
+                * being joined with the root partitioned join (indicated by
+                * RELOPT_JOINREL).
+                */
+               partitioned_rels = get_partitioned_child_rels_for_join(root,
+                                                                                                                          rel->relids);
        }
 
        /*
@@ -2422,16 +2470,22 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
                join_search_one_level(root, lev);
 
                /*
-                * Run generate_gather_paths() for each just-processed joinrel.  We
-                * could not do this earlier because both regular and partial paths
-                * can get added to a particular joinrel at multiple times within
-                * join_search_one_level.  After that, we're done creating paths for
-                * the joinrel, so run set_cheapest().
+                * Run generate_partition_wise_join_paths() and
+                * generate_gather_paths() for each just-processed joinrel.  We could
+                * not do this earlier because both regular and partial paths can get
+                * added to a particular joinrel at multiple times within
+                * join_search_one_level.
+                *
+                * After that, we're done creating paths for the joinrel, so run
+                * set_cheapest().
                 */
                foreach(lc, root->join_rel_level[lev])
                {
                        rel = (RelOptInfo *) lfirst(lc);
 
+                       /* Create paths for partition-wise joins. */
+                       generate_partition_wise_join_paths(root, rel);
+
                        /* Create GatherPaths for any useful partial paths for rel */
                        generate_gather_paths(root, rel);
 
@@ -3179,6 +3233,82 @@ compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages)
        return parallel_workers;
 }
 
+/*
+ * generate_partition_wise_join_paths
+ *             Create paths representing partition-wise join for given partitioned
+ *             join relation.
+ *
+ * This must not be called until after we are done adding paths for all
+ * child-joins. Otherwise, add_path might delete a path to which some path
+ * generated here has a reference.
+ */
+void
+generate_partition_wise_join_paths(PlannerInfo *root, RelOptInfo *rel)
+{
+       List       *live_children = NIL;
+       int                     cnt_parts;
+       int                     num_parts;
+       RelOptInfo **part_rels;
+
+       /* Handle only join relations here. */
+       if (!IS_JOIN_REL(rel))
+               return;
+
+       /*
+        * If we've already proven this join is empty, we needn't consider any
+        * more paths for it.
+        */
+       if (IS_DUMMY_REL(rel))
+               return;
+
+       /*
+        * Nothing to do if the relation is not partitioned. An outer join
+        * relation which had empty inner relation in every pair will have rest of
+        * the partitioning properties set except the child-join RelOptInfos. See
+        * try_partition_wise_join() for more explanation.
+        */
+       if (rel->nparts <= 0 || rel->part_rels == NULL)
+               return;
+
+       /* Guard against stack overflow due to overly deep partition hierarchy. */
+       check_stack_depth();
+
+       num_parts = rel->nparts;
+       part_rels = rel->part_rels;
+
+       /* Collect non-dummy child-joins. */
+       for (cnt_parts = 0; cnt_parts < num_parts; cnt_parts++)
+       {
+               RelOptInfo *child_rel = part_rels[cnt_parts];
+
+               /* Add partition-wise join paths for partitioned child-joins. */
+               generate_partition_wise_join_paths(root, child_rel);
+
+               /* Dummy children will not be scanned, so ingore those. */
+               if (IS_DUMMY_REL(child_rel))
+                       continue;
+
+               set_cheapest(child_rel);
+
+#ifdef OPTIMIZER_DEBUG
+               debug_print_rel(root, rel);
+#endif
+
+               live_children = lappend(live_children, child_rel);
+       }
+
+       /* If all child-joins are dummy, parent join is also dummy. */
+       if (!live_children)
+       {
+               mark_dummy_rel(rel);
+               return;
+       }
+
+       /* Build additional paths for this rel from child-join paths. */
+       add_paths_to_append_rel(root, rel, live_children);
+       list_free(live_children);
+}
+
 
 /*****************************************************************************
  *                     DEBUG SUPPORT
index f76da490447049b99a8bd79409b4bc1db29a25be..ce32b8a4b902f958b85409bc71923496edf5f850 100644 (file)
@@ -127,6 +127,7 @@ bool                enable_material = true;
 bool           enable_mergejoin = true;
 bool           enable_hashjoin = true;
 bool           enable_gathermerge = true;
+bool           enable_partition_wise_join = false;
 
 typedef struct
 {
index 43833ea9c9fa86b861ac9e799e029b4bd7b5b659..310262d87c07d758f1ea54330db79934197093a1 100644 (file)
 /* Hook for plugins to get control in add_paths_to_joinrel() */
 set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
 
-#define PATH_PARAM_BY_REL(path, rel)  \
+/*
+ * Paths parameterized by the parent can be considered to be parameterized by
+ * any of its child.
+ */
+#define PATH_PARAM_BY_PARENT(path, rel)        \
+       ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path),        \
+                                                                          (rel)->top_parent_relids))
+#define PATH_PARAM_BY_REL_SELF(path, rel)  \
        ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
 
+#define PATH_PARAM_BY_REL(path, rel)   \
+       (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
+
 static void try_partial_mergejoin_path(PlannerInfo *root,
                                                   RelOptInfo *joinrel,
                                                   Path *outer_path,
@@ -115,6 +125,19 @@ add_paths_to_joinrel(PlannerInfo *root,
        JoinPathExtraData extra;
        bool            mergejoin_allowed = true;
        ListCell   *lc;
+       Relids          joinrelids;
+
+       /*
+        * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
+        * between child relations, even if there is a SpecialJoinInfo node for
+        * the join between the topmost parents. So, while calculating Relids set
+        * representing the restriction, consider relids of topmost parent of
+        * partitions.
+        */
+       if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
+               joinrelids = joinrel->top_parent_relids;
+       else
+               joinrelids = joinrel->relids;
 
        extra.restrictlist = restrictlist;
        extra.mergeclause_list = NIL;
@@ -211,16 +234,16 @@ add_paths_to_joinrel(PlannerInfo *root,
                 * join has already been proven legal.)  If the SJ is relevant, it
                 * presents constraints for joining to anything not in its RHS.
                 */
-               if (bms_overlap(joinrel->relids, sjinfo2->min_righthand) &&
-                       !bms_overlap(joinrel->relids, sjinfo2->min_lefthand))
+               if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
+                       !bms_overlap(joinrelids, sjinfo2->min_lefthand))
                        extra.param_source_rels = bms_join(extra.param_source_rels,
                                                                                           bms_difference(root->all_baserels,
                                                                                                                          sjinfo2->min_righthand));
 
                /* full joins constrain both sides symmetrically */
                if (sjinfo2->jointype == JOIN_FULL &&
-                       bms_overlap(joinrel->relids, sjinfo2->min_lefthand) &&
-                       !bms_overlap(joinrel->relids, sjinfo2->min_righthand))
+                       bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
+                       !bms_overlap(joinrelids, sjinfo2->min_righthand))
                        extra.param_source_rels = bms_join(extra.param_source_rels,
                                                                                           bms_difference(root->all_baserels,
                                                                                                                          sjinfo2->min_lefthand));
@@ -347,11 +370,25 @@ try_nestloop_path(PlannerInfo *root,
        JoinCostWorkspace workspace;
        RelOptInfo *innerrel = inner_path->parent;
        RelOptInfo *outerrel = outer_path->parent;
-       Relids          innerrelids = innerrel->relids;
-       Relids          outerrelids = outerrel->relids;
+       Relids          innerrelids;
+       Relids          outerrelids;
        Relids          inner_paramrels = PATH_REQ_OUTER(inner_path);
        Relids          outer_paramrels = PATH_REQ_OUTER(outer_path);
 
+       /*
+        * Paths are parameterized by top-level parents, so run parameterization
+        * tests on the parent relids.
+        */
+       if (innerrel->top_parent_relids)
+               innerrelids = innerrel->top_parent_relids;
+       else
+               innerrelids = innerrel->relids;
+
+       if (outerrel->top_parent_relids)
+               outerrelids = outerrel->top_parent_relids;
+       else
+               outerrelids = outerrel->relids;
+
        /*
         * Check to see if proposed path is still parameterized, and reject if the
         * parameterization wouldn't be sensible --- unless allow_star_schema_join
@@ -387,6 +424,27 @@ try_nestloop_path(PlannerInfo *root,
                                                  workspace.startup_cost, workspace.total_cost,
                                                  pathkeys, required_outer))
        {
+               /*
+                * If the inner path is parameterized, it is parameterized by the
+                * topmost parent of the outer rel, not the outer rel itself.  Fix
+                * that.
+                */
+               if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
+               {
+                       inner_path = reparameterize_path_by_child(root, inner_path,
+                                                                                                         outer_path->parent);
+
+                       /*
+                        * If we could not translate the path, we can't create nest loop
+                        * path.
+                        */
+                       if (!inner_path)
+                       {
+                               bms_free(required_outer);
+                               return;
+                       }
+               }
+
                add_path(joinrel, (Path *)
                                 create_nestloop_path(root,
                                                                          joinrel,
@@ -432,8 +490,20 @@ try_partial_nestloop_path(PlannerInfo *root,
        if (inner_path->param_info != NULL)
        {
                Relids          inner_paramrels = inner_path->param_info->ppi_req_outer;
+               RelOptInfo *outerrel = outer_path->parent;
+               Relids          outerrelids;
+
+               /*
+                * The inner and outer paths are parameterized, if at all, by the top
+                * level parents, not the child relations, so we must use those relids
+                * for our paramaterization tests.
+                */
+               if (outerrel->top_parent_relids)
+                       outerrelids = outerrel->top_parent_relids;
+               else
+                       outerrelids = outerrel->relids;
 
-               if (!bms_is_subset(inner_paramrels, outer_path->parent->relids))
+               if (!bms_is_subset(inner_paramrels, outerrelids))
                        return;
        }
 
@@ -446,6 +516,22 @@ try_partial_nestloop_path(PlannerInfo *root,
        if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
                return;
 
+       /*
+        * If the inner path is parameterized, it is parameterized by the topmost
+        * parent of the outer rel, not the outer rel itself.  Fix that.
+        */
+       if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
+       {
+               inner_path = reparameterize_path_by_child(root, inner_path,
+                                                                                                 outer_path->parent);
+
+               /*
+                * If we could not translate the path, we can't create nest loop path.
+                */
+               if (!inner_path)
+                       return;
+       }
+
        /* Might be good enough to be worth trying, so let's try it. */
        add_partial_path(joinrel, (Path *)
                                         create_nestloop_path(root,
index 6ee23509c58d787fe0653762a31260829c90f321..2b868c52de4116e58a21ab03e64f701deb2639c2 100644 (file)
  */
 #include "postgres.h"
 
+#include "miscadmin.h"
+#include "catalog/partition.h"
+#include "nodes/relation.h"
+#include "optimizer/clauses.h"
 #include "optimizer/joininfo.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
+#include "optimizer/prep.h"
+#include "optimizer/cost.h"
 #include "utils/memutils.h"
+#include "utils/lsyscache.h"
 
 
 static void make_rels_by_clause_joins(PlannerInfo *root,
@@ -29,12 +36,17 @@ static void make_rels_by_clauseless_joins(PlannerInfo *root,
 static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
 static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel);
 static bool is_dummy_rel(RelOptInfo *rel);
-static void mark_dummy_rel(RelOptInfo *rel);
 static bool restriction_is_constant_false(List *restrictlist,
                                                          bool only_pushed_down);
 static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
                                                        RelOptInfo *rel2, RelOptInfo *joinrel,
                                                        SpecialJoinInfo *sjinfo, List *restrictlist);
+static void try_partition_wise_join(PlannerInfo *root, RelOptInfo *rel1,
+                                               RelOptInfo *rel2, RelOptInfo *joinrel,
+                                               SpecialJoinInfo *parent_sjinfo,
+                                               List *parent_restrictlist);
+static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel,
+                                                        bool strict_op);
 
 
 /*
@@ -892,6 +904,9 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
                        elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
                        break;
        }
+
+       /* Apply partition-wise join technique, if possible. */
+       try_partition_wise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
 }
 
 
@@ -1197,7 +1212,7 @@ is_dummy_rel(RelOptInfo *rel)
  * is that the best solution is to explicitly make the dummy path in the same
  * context the given RelOptInfo is in.
  */
-static void
+void
 mark_dummy_rel(RelOptInfo *rel)
 {
        MemoryContext oldcontext;
@@ -1268,3 +1283,300 @@ restriction_is_constant_false(List *restrictlist, bool only_pushed_down)
        }
        return false;
 }
+
+/*
+ * Assess whether join between given two partitioned relations can be broken
+ * down into joins between matching partitions; a technique called
+ * "partition-wise join"
+ *
+ * Partition-wise join is possible when a. Joining relations have same
+ * partitioning scheme b. There exists an equi-join between the partition keys
+ * of the two relations.
+ *
+ * Partition-wise join is planned as follows (details: optimizer/README.)
+ *
+ * 1. Create the RelOptInfos for joins between matching partitions i.e
+ * child-joins and add paths to them.
+ *
+ * 2. Construct Append or MergeAppend paths across the set of child joins.
+ * This second phase is implemented by generate_partition_wise_join_paths().
+ *
+ * The RelOptInfo, SpecialJoinInfo and restrictlist for each child join are
+ * obtained by translating the respective parent join structures.
+ */
+static void
+try_partition_wise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
+                                               RelOptInfo *joinrel, SpecialJoinInfo *parent_sjinfo,
+                                               List *parent_restrictlist)
+{
+       int                     nparts;
+       int                     cnt_parts;
+
+       /* Guard against stack overflow due to overly deep partition hierarchy. */
+       check_stack_depth();
+
+       /* Nothing to do, if the join relation is not partitioned. */
+       if (!IS_PARTITIONED_REL(joinrel))
+               return;
+
+       /*
+        * set_rel_pathlist() may not create paths in children of an empty
+        * partitioned table and so we can not add paths to child-joins. So, deem
+        * such a join as unpartitioned. When a partitioned relation is deemed
+        * empty because all its children are empty, dummy path will be set in
+        * each of the children.  In such a case we could still consider the join
+        * as partitioned, but it might not help much.
+        */
+       if (IS_DUMMY_REL(rel1) || IS_DUMMY_REL(rel2))
+               return;
+
+       /*
+        * Since this join relation is partitioned, all the base relations
+        * participating in this join must be partitioned and so are all the
+        * intermediate join relations.
+        */
+       Assert(IS_PARTITIONED_REL(rel1) && IS_PARTITIONED_REL(rel2));
+       Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2));
+
+       /*
+        * The partition scheme of the join relation should match that of the
+        * joining relations.
+        */
+       Assert(joinrel->part_scheme == rel1->part_scheme &&
+                  joinrel->part_scheme == rel2->part_scheme);
+
+       /*
+        * Since we allow partition-wise join only when the partition bounds of
+        * the joining relations exactly match, the partition bounds of the join
+        * should match those of the joining relations.
+        */
+       Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
+                                                                 joinrel->part_scheme->parttyplen,
+                                                                 joinrel->part_scheme->parttypbyval,
+                                                                 joinrel->boundinfo, rel1->boundinfo));
+       Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
+                                                                 joinrel->part_scheme->parttyplen,
+                                                                 joinrel->part_scheme->parttypbyval,
+                                                                 joinrel->boundinfo, rel2->boundinfo));
+
+       nparts = joinrel->nparts;
+
+       /* Allocate space to hold child-joins RelOptInfos, if not already done. */
+       if (!joinrel->part_rels)
+               joinrel->part_rels =
+                       (RelOptInfo **) palloc0(sizeof(RelOptInfo *) * nparts);
+
+       /*
+        * Create child-join relations for this partitioned join, if those don't
+        * exist. Add paths to child-joins for a pair of child relations
+        * corresponding to the given pair of parent relations.
+        */
+       for (cnt_parts = 0; cnt_parts < nparts; cnt_parts++)
+       {
+               RelOptInfo *child_rel1 = rel1->part_rels[cnt_parts];
+               RelOptInfo *child_rel2 = rel2->part_rels[cnt_parts];
+               SpecialJoinInfo *child_sjinfo;
+               List       *child_restrictlist;
+               RelOptInfo *child_joinrel;
+               Relids          child_joinrelids;
+               AppendRelInfo **appinfos;
+               int                     nappinfos;
+
+               /* We should never try to join two overlapping sets of rels. */
+               Assert(!bms_overlap(child_rel1->relids, child_rel2->relids));
+               child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids);
+               appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos);
+
+               /*
+                * Construct SpecialJoinInfo from parent join relations's
+                * SpecialJoinInfo.
+                */
+               child_sjinfo = build_child_join_sjinfo(root, parent_sjinfo,
+                                                                                          child_rel1->relids,
+                                                                                          child_rel2->relids);
+
+               /*
+                * Construct restrictions applicable to the child join from those
+                * applicable to the parent join.
+                */
+               child_restrictlist =
+                       (List *) adjust_appendrel_attrs(root,
+                                                                                       (Node *) parent_restrictlist,
+                                                                                       nappinfos, appinfos);
+               pfree(appinfos);
+
+               child_joinrel = joinrel->part_rels[cnt_parts];
+               if (!child_joinrel)
+               {
+                       child_joinrel = build_child_join_rel(root, child_rel1, child_rel2,
+                                                                                                joinrel, child_restrictlist,
+                                                                                                child_sjinfo,
+                                                                                                child_sjinfo->jointype);
+                       joinrel->part_rels[cnt_parts] = child_joinrel;
+               }
+
+               Assert(bms_equal(child_joinrel->relids, child_joinrelids));
+
+               populate_joinrel_with_paths(root, child_rel1, child_rel2,
+                                                                       child_joinrel, child_sjinfo,
+                                                                       child_restrictlist);
+       }
+}
+
+/*
+ * Returns true if there exists an equi-join condition for each pair of
+ * partition keys from given relations being joined.
+ */
+bool
+have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype,
+                                          List *restrictlist)
+{
+       PartitionScheme part_scheme = rel1->part_scheme;
+       ListCell   *lc;
+       int                     cnt_pks;
+       bool            pk_has_clause[PARTITION_MAX_KEYS];
+       bool            strict_op;
+
+       /*
+        * This function should be called when the joining relations have same
+        * partitioning scheme.
+        */
+       Assert(rel1->part_scheme == rel2->part_scheme);
+       Assert(part_scheme);
+
+       memset(pk_has_clause, 0, sizeof(pk_has_clause));
+       foreach(lc, restrictlist)
+       {
+               RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+               OpExpr     *opexpr;
+               Expr       *expr1;
+               Expr       *expr2;
+               int                     ipk1;
+               int                     ipk2;
+
+               /* If processing an outer join, only use its own join clauses. */
+               if (IS_OUTER_JOIN(jointype) && rinfo->is_pushed_down)
+                       continue;
+
+               /* Skip clauses which can not be used for a join. */
+               if (!rinfo->can_join)
+                       continue;
+
+               /* Skip clauses which are not equality conditions. */
+               if (!rinfo->mergeopfamilies)
+                       continue;
+
+               opexpr = (OpExpr *) rinfo->clause;
+               Assert(is_opclause(opexpr));
+
+               /*
+                * The equi-join between partition keys is strict if equi-join between
+                * at least one partition key is using a strict operator. See
+                * explanation about outer join reordering identity 3 in
+                * optimizer/README
+                */
+               strict_op = op_strict(opexpr->opno);
+
+               /* Match the operands to the relation. */
+               if (bms_is_subset(rinfo->left_relids, rel1->relids) &&
+                       bms_is_subset(rinfo->right_relids, rel2->relids))
+               {
+                       expr1 = linitial(opexpr->args);
+                       expr2 = lsecond(opexpr->args);
+               }
+               else if (bms_is_subset(rinfo->left_relids, rel2->relids) &&
+                                bms_is_subset(rinfo->right_relids, rel1->relids))
+               {
+                       expr1 = lsecond(opexpr->args);
+                       expr2 = linitial(opexpr->args);
+               }
+               else
+                       continue;
+
+               /*
+                * Only clauses referencing the partition keys are useful for
+                * partition-wise join.
+                */
+               ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op);
+               if (ipk1 < 0)
+                       continue;
+               ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op);
+               if (ipk2 < 0)
+                       continue;
+
+               /*
+                * If the clause refers to keys at different ordinal positions, it can
+                * not be used for partition-wise join.
+                */
+               if (ipk1 != ipk2)
+                       continue;
+
+               /*
+                * The clause allows partition-wise join if only it uses the same
+                * operator family as that specified by the partition key.
+                */
+               if (!list_member_oid(rinfo->mergeopfamilies,
+                                                        part_scheme->partopfamily[ipk1]))
+                       continue;
+
+               /* Mark the partition key as having an equi-join clause. */
+               pk_has_clause[ipk1] = true;
+       }
+
+       /* Check whether every partition key has an equi-join condition. */
+       for (cnt_pks = 0; cnt_pks < part_scheme->partnatts; cnt_pks++)
+       {
+               if (!pk_has_clause[cnt_pks])
+                       return false;
+       }
+
+       return true;
+}
+
+/*
+ * Find the partition key from the given relation matching the given
+ * expression. If found, return the index of the partition key, else return -1.
+ */
+static int
+match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
+{
+       int                     cnt;
+
+       /* This function should be called only for partitioned relations. */
+       Assert(rel->part_scheme);
+
+       /* Remove any relabel decorations. */
+       while (IsA(expr, RelabelType))
+               expr = (Expr *) (castNode(RelabelType, expr))->arg;
+
+       for (cnt = 0; cnt < rel->part_scheme->partnatts; cnt++)
+       {
+               ListCell   *lc;
+
+               Assert(rel->partexprs);
+               foreach(lc, rel->partexprs[cnt])
+               {
+                       if (equal(lfirst(lc), expr))
+                               return cnt;
+               }
+
+               if (!strict_op)
+                       continue;
+
+               /*
+                * If it's a strict equi-join a NULL partition key on one side will
+                * not join a NULL partition key on the other side. So, rows with NULL
+                * partition key from a partition on one side can not join with those
+                * from a non-matching partition on the other side. So, search the
+                * nullable partition keys as well.
+                */
+               Assert(rel->nullable_partexprs);
+               foreach(lc, rel->nullable_partexprs[cnt])
+               {
+                       if (equal(lfirst(lc), expr))
+                               return cnt;
+               }
+       }
+
+       return -1;
+}
index 28216629aa4643c06598897c40be0a024b6fa667..792ea84a81854ddf83e4ea171d321d71e9382b91 100644 (file)
@@ -250,7 +250,8 @@ static Plan *prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
 static EquivalenceMember *find_ec_member_for_tle(EquivalenceClass *ec,
                                           TargetEntry *tle,
                                           Relids relids);
-static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys);
+static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
+                                               Relids relids);
 static Sort *make_sort_from_groupcols(List *groupcls,
                                                 AttrNumber *grpColIdx,
                                                 Plan *lefttree);
@@ -1652,7 +1653,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags)
        subplan = create_plan_recurse(root, best_path->subpath,
                                                                  flags | CP_SMALL_TLIST);
 
-       plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys);
+       plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys, NULL);
 
        copy_generic_path_info(&plan->plan, (Path *) best_path);
 
@@ -3771,6 +3772,8 @@ create_mergejoin_plan(PlannerInfo *root,
        ListCell   *lc;
        ListCell   *lop;
        ListCell   *lip;
+       Path       *outer_path = best_path->jpath.outerjoinpath;
+       Path       *inner_path = best_path->jpath.innerjoinpath;
 
        /*
         * MergeJoin can project, so we don't have to demand exact tlists from the
@@ -3834,8 +3837,10 @@ create_mergejoin_plan(PlannerInfo *root,
         */
        if (best_path->outersortkeys)
        {
+               Relids          outer_relids = outer_path->parent->relids;
                Sort       *sort = make_sort_from_pathkeys(outer_plan,
-                                                                                                  best_path->outersortkeys);
+                                                                                                  best_path->outersortkeys,
+                                                                                                  outer_relids);
 
                label_sort_with_costsize(root, sort, -1.0);
                outer_plan = (Plan *) sort;
@@ -3846,8 +3851,10 @@ create_mergejoin_plan(PlannerInfo *root,
 
        if (best_path->innersortkeys)
        {
+               Relids          inner_relids = inner_path->parent->relids;
                Sort       *sort = make_sort_from_pathkeys(inner_plan,
-                                                                                                  best_path->innersortkeys);
+                                                                                                  best_path->innersortkeys,
+                                                                                                  inner_relids);
 
                label_sort_with_costsize(root, sort, -1.0);
                inner_plan = (Plan *) sort;
@@ -5525,8 +5532,9 @@ make_sort(Plan *lefttree, int numCols,
  * the output parameters *p_numsortkeys etc.
  *
  * When looking for matches to an EquivalenceClass's members, we will only
- * consider child EC members if they match 'relids'.  This protects against
- * possible incorrect matches to child expressions that contain no Vars.
+ * consider child EC members if they belong to given 'relids'.  This protects
+ * against possible incorrect matches to child expressions that contain no
+ * Vars.
  *
  * If reqColIdx isn't NULL then it contains sort key column numbers that
  * we should match.  This is used when making child plans for a MergeAppend;
@@ -5681,11 +5689,11 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
                                        continue;
 
                                /*
-                                * Ignore child members unless they match the rel being
+                                * Ignore child members unless they belong to the rel being
                                 * sorted.
                                 */
                                if (em->em_is_child &&
-                                       !bms_equal(em->em_relids, relids))
+                                       !bms_is_subset(em->em_relids, relids))
                                        continue;
 
                                sortexpr = em->em_expr;
@@ -5769,7 +5777,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
  * find_ec_member_for_tle
  *             Locate an EquivalenceClass member matching the given TLE, if any
  *
- * Child EC members are ignored unless they match 'relids'.
+ * Child EC members are ignored unless they belong to given 'relids'.
  */
 static EquivalenceMember *
 find_ec_member_for_tle(EquivalenceClass *ec,
@@ -5797,10 +5805,10 @@ find_ec_member_for_tle(EquivalenceClass *ec,
                        continue;
 
                /*
-                * Ignore child members unless they match the rel being sorted.
+                * Ignore child members unless they belong to the rel being sorted.
                 */
                if (em->em_is_child &&
-                       !bms_equal(em->em_relids, relids))
+                       !bms_is_subset(em->em_relids, relids))
                        continue;
 
                /* Match if same expression (after stripping relabel) */
@@ -5821,9 +5829,10 @@ find_ec_member_for_tle(EquivalenceClass *ec,
  *
  *       'lefttree' is the node which yields input tuples
  *       'pathkeys' is the list of pathkeys by which the result is to be sorted
+ *       'relids' is the set of relations required by prepare_sort_from_pathkeys()
  */
 static Sort *
-make_sort_from_pathkeys(Plan *lefttree, List *pathkeys)
+make_sort_from_pathkeys(Plan *lefttree, List *pathkeys, Relids relids)
 {
        int                     numsortkeys;
        AttrNumber *sortColIdx;
@@ -5833,7 +5842,7 @@ make_sort_from_pathkeys(Plan *lefttree, List *pathkeys)
 
        /* Compute sort column info, and adjust lefttree as needed */
        lefttree = prepare_sort_from_pathkeys(lefttree, pathkeys,
-                                                                                 NULL,
+                                                                                 relids,
                                                                                  NULL,
                                                                                  false,
                                                                                  &numsortkeys,
index e7ac11e9bb795e49fe9035ebfecf014bd54151c1..ecdd7280eb8b6a91855bfd7d6a1c89014bea9551 100644 (file)
@@ -6150,3 +6150,25 @@ get_partitioned_child_rels(PlannerInfo *root, Index rti)
 
        return result;
 }
+
+/*
+ * get_partitioned_child_rels_for_join
+ *             Build and return a list containing the RTI of every partitioned
+ *             relation which is a child of some rel included in the join.
+ */
+List *
+get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
+{
+       List       *result = NIL;
+       ListCell   *l;
+
+       foreach(l, root->pcinfo_list)
+       {
+               PartitionedChildRelInfo *pc = lfirst(l);
+
+               if (bms_is_member(pc->parent_relid, join_relids))
+                       result = list_concat(result, list_copy(pc->child_rels));
+       }
+
+       return result;
+}
index dee4414cec228c38420350385e8e85d402cd157f..1382b679748aa52151b1ee06f4489a18a25b70c8 100644 (file)
@@ -41,6 +41,9 @@ typedef struct
        int                     num_vars;               /* number of plain Var tlist entries */
        bool            has_ph_vars;    /* are there PlaceHolderVar entries? */
        bool            has_non_vars;   /* are there other entries? */
+       bool            has_conv_whole_rows;    /* are there ConvertRowtypeExpr
+                                                                                * entries encapsulating a whole-row
+                                                                                * Var? */
        tlist_vinfo vars[FLEXIBLE_ARRAY_MEMBER];        /* has num_vars entries */
 } indexed_tlist;
 
@@ -139,6 +142,7 @@ static List *set_returning_clause_references(PlannerInfo *root,
                                                                int rtoffset);
 static bool extract_query_dependencies_walker(Node *node,
                                                                  PlannerInfo *context);
+static bool is_converted_whole_row_reference(Node *node);
 
 /*****************************************************************************
  *
@@ -1944,6 +1948,7 @@ build_tlist_index(List *tlist)
        itlist->tlist = tlist;
        itlist->has_ph_vars = false;
        itlist->has_non_vars = false;
+       itlist->has_conv_whole_rows = false;
 
        /* Find the Vars and fill in the index array */
        vinfo = itlist->vars;
@@ -1962,6 +1967,8 @@ build_tlist_index(List *tlist)
                }
                else if (tle->expr && IsA(tle->expr, PlaceHolderVar))
                        itlist->has_ph_vars = true;
+               else if (is_converted_whole_row_reference((Node *) tle->expr))
+                       itlist->has_conv_whole_rows = true;
                else
                        itlist->has_non_vars = true;
        }
@@ -1977,7 +1984,10 @@ build_tlist_index(List *tlist)
  * This is like build_tlist_index, but we only index tlist entries that
  * are Vars belonging to some rel other than the one specified.  We will set
  * has_ph_vars (allowing PlaceHolderVars to be matched), but not has_non_vars
- * (so nothing other than Vars and PlaceHolderVars can be matched).
+ * (so nothing other than Vars and PlaceHolderVars can be matched). In case of
+ * DML, where this function will be used, returning lists from child relations
+ * will be appended similar to a simple append relation. That does not require
+ * fixing ConvertRowtypeExpr references. So, those are not considered here.
  */
 static indexed_tlist *
 build_tlist_index_other_vars(List *tlist, Index ignore_rel)
@@ -1994,6 +2004,7 @@ build_tlist_index_other_vars(List *tlist, Index ignore_rel)
        itlist->tlist = tlist;
        itlist->has_ph_vars = false;
        itlist->has_non_vars = false;
+       itlist->has_conv_whole_rows = false;
 
        /* Find the desired Vars and fill in the index array */
        vinfo = itlist->vars;
@@ -2197,6 +2208,7 @@ static Node *
 fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
 {
        Var                *newvar;
+       bool            converted_whole_row;
 
        if (node == NULL)
                return NULL;
@@ -2266,8 +2278,12 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
        }
        if (IsA(node, Param))
                return fix_param_node(context->root, (Param *) node);
+
        /* Try matching more complex expressions too, if tlists have any */
-       if (context->outer_itlist && context->outer_itlist->has_non_vars)
+       converted_whole_row = is_converted_whole_row_reference(node);
+       if (context->outer_itlist &&
+               (context->outer_itlist->has_non_vars ||
+                (context->outer_itlist->has_conv_whole_rows && converted_whole_row)))
        {
                newvar = search_indexed_tlist_for_non_var((Expr *) node,
                                                                                                  context->outer_itlist,
@@ -2275,7 +2291,9 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
                if (newvar)
                        return (Node *) newvar;
        }
-       if (context->inner_itlist && context->inner_itlist->has_non_vars)
+       if (context->inner_itlist &&
+               (context->inner_itlist->has_non_vars ||
+                (context->inner_itlist->has_conv_whole_rows && converted_whole_row)))
        {
                newvar = search_indexed_tlist_for_non_var((Expr *) node,
                                                                                                  context->inner_itlist,
@@ -2395,7 +2413,9 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context)
                /* If no match, just fall through to process it normally */
        }
        /* Try matching more complex expressions too, if tlist has any */
-       if (context->subplan_itlist->has_non_vars)
+       if (context->subplan_itlist->has_non_vars ||
+               (context->subplan_itlist->has_conv_whole_rows &&
+                is_converted_whole_row_reference(node)))
        {
                newvar = search_indexed_tlist_for_non_var((Expr *) node,
                                                                                                  context->subplan_itlist,
@@ -2602,3 +2622,33 @@ extract_query_dependencies_walker(Node *node, PlannerInfo *context)
        return expression_tree_walker(node, extract_query_dependencies_walker,
                                                                  (void *) context);
 }
+
+/*
+ * is_converted_whole_row_reference
+ *             If the given node is a ConvertRowtypeExpr encapsulating a whole-row
+ *             reference as implicit cast, return true. Otherwise return false.
+ */
+static bool
+is_converted_whole_row_reference(Node *node)
+{
+       ConvertRowtypeExpr *convexpr;
+
+       if (!node || !IsA(node, ConvertRowtypeExpr))
+               return false;
+
+       /* Traverse nested ConvertRowtypeExpr's. */
+       convexpr = castNode(ConvertRowtypeExpr, node);
+       while (convexpr->convertformat == COERCE_IMPLICIT_CAST &&
+                  IsA(convexpr->arg, ConvertRowtypeExpr))
+               convexpr = castNode(ConvertRowtypeExpr, convexpr->arg);
+
+       if (IsA(convexpr->arg, Var))
+       {
+               Var *var = castNode(Var, convexpr->arg);
+
+               if (var->varattno == 0)
+                       return true;
+       }
+
+       return false;
+}
index 3e0c3de86d54935b3e3accca8360899b3ec5f571..1c84a2cb2807e172a2b1745d7a88ed3dfd538295 100644 (file)
@@ -2269,6 +2269,59 @@ adjust_child_relids(Relids relids, int nappinfos, AppendRelInfo **appinfos)
        return relids;
 }
 
+/*
+ * Replace any relid present in top_parent_relids with its child in
+ * child_relids. Members of child_relids can be multiple levels below top
+ * parent in the partition hierarchy.
+ */
+Relids
+adjust_child_relids_multilevel(PlannerInfo *root, Relids relids,
+                                                          Relids child_relids, Relids top_parent_relids)
+{
+       AppendRelInfo **appinfos;
+       int                     nappinfos;
+       Relids          parent_relids = NULL;
+       Relids          result;
+       Relids          tmp_result = NULL;
+       int                     cnt;
+
+       /*
+        * If the given relids set doesn't contain any of the top parent relids,
+        * it will remain unchanged.
+        */
+       if (!bms_overlap(relids, top_parent_relids))
+               return relids;
+
+       appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
+
+       /* Construct relids set for the immediate parent of the given child. */
+       for (cnt = 0; cnt < nappinfos; cnt++)
+       {
+               AppendRelInfo *appinfo = appinfos[cnt];
+
+               parent_relids = bms_add_member(parent_relids, appinfo->parent_relid);
+       }
+
+       /* Recurse if immediate parent is not the top parent. */
+       if (!bms_equal(parent_relids, top_parent_relids))
+       {
+               tmp_result = adjust_child_relids_multilevel(root, relids,
+                                                                                                       parent_relids,
+                                                                                                       top_parent_relids);
+               relids = tmp_result;
+       }
+
+       result = adjust_child_relids(relids, nappinfos, appinfos);
+
+       /* Free memory consumed by any intermediate result. */
+       if (tmp_result)
+               bms_free(tmp_result);
+       bms_free(parent_relids);
+       pfree(appinfos);
+
+       return result;
+}
+
 /*
  * Adjust the targetlist entries of an inherited UPDATE operation
  *
@@ -2408,6 +2461,48 @@ adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node,
        return node;
 }
 
+/*
+ * Construct the SpecialJoinInfo for a child-join by translating
+ * SpecialJoinInfo for the join between parents. left_relids and right_relids
+ * are the relids of left and right side of the join respectively.
+ */
+SpecialJoinInfo *
+build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo,
+                                               Relids left_relids, Relids right_relids)
+{
+       SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
+       AppendRelInfo **left_appinfos;
+       int                     left_nappinfos;
+       AppendRelInfo **right_appinfos;
+       int                     right_nappinfos;
+
+       memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo));
+       left_appinfos = find_appinfos_by_relids(root, left_relids,
+                                                                                       &left_nappinfos);
+       right_appinfos = find_appinfos_by_relids(root, right_relids,
+                                                                                        &right_nappinfos);
+
+       sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand,
+                                                                                          left_nappinfos, left_appinfos);
+       sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand,
+                                                                                               right_nappinfos,
+                                                                                               right_appinfos);
+       sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand,
+                                                                                          left_nappinfos, left_appinfos);
+       sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand,
+                                                                                               right_nappinfos,
+                                                                                               right_appinfos);
+       sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root,
+                                                                                                                        (Node *) sjinfo->semi_rhs_exprs,
+                                                                                                                        right_nappinfos,
+                                                                                                                        right_appinfos);
+
+       pfree(left_appinfos);
+       pfree(right_appinfos);
+
+       return sjinfo;
+}
+
 /*
  * find_appinfos_by_relids
  *             Find AppendRelInfo structures for all relations specified by relids.
index 26567cb7f65b18b75920881629caf6e7c6b5b7a0..2d491eb0ba9c129b83b5d38ee0b1bfa9b276192c 100644 (file)
 
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
+#include "nodes/extensible.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
+#include "optimizer/prep.h"
 #include "optimizer/restrictinfo.h"
+#include "optimizer/tlist.h"
 #include "optimizer/var.h"
 #include "parser/parsetree.h"
+#include "foreign/fdwapi.h"
 #include "utils/lsyscache.h"
+#include "utils/memutils.h"
 #include "utils/selfuncs.h"
 
 
@@ -46,6 +51,9 @@ typedef enum
 #define STD_FUZZ_FACTOR 1.01
 
 static List *translate_sub_tlist(List *tlist, int relid);
+static List *reparameterize_pathlist_by_child(PlannerInfo *root,
+                                                                List *pathlist,
+                                                                RelOptInfo *child_rel);
 
 
 /*****************************************************************************
@@ -3429,3 +3437,358 @@ reparameterize_path(PlannerInfo *root, Path *path,
        }
        return NULL;
 }
+
+/*
+ * reparameterize_path_by_child
+ *             Given a path parameterized by the parent of the given child relation,
+ *             translate the path to be parameterized by the given child relation.
+ *
+ * The function creates a new path of the same type as the given path, but
+ * parameterized by the given child relation.  Most fields from the original
+ * path can simply be flat-copied, but any expressions must be adjusted to
+ * refer to the correct varnos, and any paths must be recursively
+ * reparameterized.  Other fields that refer to specific relids also need
+ * adjustment.
+ *
+ * The cost, number of rows, width and parallel path properties depend upon
+ * path->parent, which does not change during the translation. Hence those
+ * members are copied as they are.
+ *
+ * If the given path can not be reparameterized, the function returns NULL.
+ */
+Path *
+reparameterize_path_by_child(PlannerInfo *root, Path *path,
+                                                        RelOptInfo *child_rel)
+{
+
+#define FLAT_COPY_PATH(newnode, node, nodetype)  \
+       ( (newnode) = makeNode(nodetype), \
+         memcpy((newnode), (node), sizeof(nodetype)) )
+
+#define ADJUST_CHILD_ATTRS(node) \
+       ((node) = \
+        (List *) adjust_appendrel_attrs_multilevel(root, (Node *) (node), \
+                                                                                               child_rel->relids, \
+                                                                                               child_rel->top_parent_relids))
+
+#define REPARAMETERIZE_CHILD_PATH(path) \
+do { \
+       (path) = reparameterize_path_by_child(root, (path), child_rel); \
+       if ((path) == NULL) \
+               return NULL; \
+} while(0);
+
+#define REPARAMETERIZE_CHILD_PATH_LIST(pathlist) \
+do { \
+       if ((pathlist) != NIL) \
+       { \
+               (pathlist) = reparameterize_pathlist_by_child(root, (pathlist), \
+                                                                                                         child_rel); \
+               if ((pathlist) == NIL) \
+                       return NULL; \
+       } \
+} while(0);
+
+       Path       *new_path;
+       ParamPathInfo *new_ppi;
+       ParamPathInfo *old_ppi;
+       Relids          required_outer;
+
+       /*
+        * If the path is not parameterized by parent of the given relation, it
+        * doesn't need reparameterization.
+        */
+       if (!path->param_info ||
+               !bms_overlap(PATH_REQ_OUTER(path), child_rel->top_parent_relids))
+               return path;
+
+       /* Reparameterize a copy of given path. */
+       switch (nodeTag(path))
+       {
+               case T_Path:
+                       FLAT_COPY_PATH(new_path, path, Path);
+                       break;
+
+               case T_IndexPath:
+                       {
+                               IndexPath  *ipath;
+
+                               FLAT_COPY_PATH(ipath, path, IndexPath);
+                               ADJUST_CHILD_ATTRS(ipath->indexclauses);
+                               ADJUST_CHILD_ATTRS(ipath->indexquals);
+                               new_path = (Path *) ipath;
+                       }
+                       break;
+
+               case T_BitmapHeapPath:
+                       {
+                               BitmapHeapPath *bhpath;
+
+                               FLAT_COPY_PATH(bhpath, path, BitmapHeapPath);
+                               REPARAMETERIZE_CHILD_PATH(bhpath->bitmapqual);
+                               new_path = (Path *) bhpath;
+                       }
+                       break;
+
+               case T_BitmapAndPath:
+                       {
+                               BitmapAndPath *bapath;
+
+                               FLAT_COPY_PATH(bapath, path, BitmapAndPath);
+                               REPARAMETERIZE_CHILD_PATH_LIST(bapath->bitmapquals);
+                               new_path = (Path *) bapath;
+                       }
+                       break;
+
+               case T_BitmapOrPath:
+                       {
+                               BitmapOrPath *bopath;
+
+                               FLAT_COPY_PATH(bopath, path, BitmapOrPath);
+                               REPARAMETERIZE_CHILD_PATH_LIST(bopath->bitmapquals);
+                               new_path = (Path *) bopath;
+                       }
+                       break;
+
+               case T_TidPath:
+                       {
+                               TidPath    *tpath;
+
+                               /*
+                                * TidPath contains tidquals, which do not contain any
+                                * external parameters per create_tidscan_path(). So don't
+                                * bother to translate those.
+                                */
+                               FLAT_COPY_PATH(tpath, path, TidPath);
+                               new_path = (Path *) tpath;
+                       }
+                       break;
+
+               case T_ForeignPath:
+                       {
+                               ForeignPath *fpath;
+                               ReparameterizeForeignPathByChild_function rfpc_func;
+
+                               FLAT_COPY_PATH(fpath, path, ForeignPath);
+                               if (fpath->fdw_outerpath)
+                                       REPARAMETERIZE_CHILD_PATH(fpath->fdw_outerpath);
+
+                               /* Hand over to FDW if needed. */
+                               rfpc_func =
+                                       path->parent->fdwroutine->ReparameterizeForeignPathByChild;
+                               if (rfpc_func)
+                                       fpath->fdw_private = rfpc_func(root, fpath->fdw_private,
+                                                                                                  child_rel);
+                               new_path = (Path *) fpath;
+                       }
+                       break;
+
+               case T_CustomPath:
+                       {
+                               CustomPath *cpath;
+
+                               FLAT_COPY_PATH(cpath, path, CustomPath);
+                               REPARAMETERIZE_CHILD_PATH_LIST(cpath->custom_paths);
+                               if (cpath->methods &&
+                                       cpath->methods->ReparameterizeCustomPathByChild)
+                                       cpath->custom_private =
+                                               cpath->methods->ReparameterizeCustomPathByChild(root,
+                                                                                                                                               cpath->custom_private,
+                                                                                                                                               child_rel);
+                               new_path = (Path *) cpath;
+                       }
+                       break;
+
+               case T_NestPath:
+                       {
+                               JoinPath   *jpath;
+
+                               FLAT_COPY_PATH(jpath, path, NestPath);
+
+                               REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
+                               REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
+                               ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
+                               new_path = (Path *) jpath;
+                       }
+                       break;
+
+               case T_MergePath:
+                       {
+                               JoinPath   *jpath;
+                               MergePath  *mpath;
+
+                               FLAT_COPY_PATH(mpath, path, MergePath);
+
+                               jpath = (JoinPath *) mpath;
+                               REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
+                               REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
+                               ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
+                               ADJUST_CHILD_ATTRS(mpath->path_mergeclauses);
+                               new_path = (Path *) mpath;
+                       }
+                       break;
+
+               case T_HashPath:
+                       {
+                               JoinPath   *jpath;
+                               HashPath   *hpath;
+
+                               FLAT_COPY_PATH(hpath, path, HashPath);
+
+                               jpath = (JoinPath *) hpath;
+                               REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
+                               REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
+                               ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
+                               ADJUST_CHILD_ATTRS(hpath->path_hashclauses);
+                               new_path = (Path *) hpath;
+                       }
+                       break;
+
+               case T_AppendPath:
+                       {
+                               AppendPath *apath;
+
+                               FLAT_COPY_PATH(apath, path, AppendPath);
+                               REPARAMETERIZE_CHILD_PATH_LIST(apath->subpaths);
+                               new_path = (Path *) apath;
+                       }
+                       break;
+
+               case T_MergeAppend:
+                       {
+                               MergeAppendPath *mapath;
+
+                               FLAT_COPY_PATH(mapath, path, MergeAppendPath);
+                               REPARAMETERIZE_CHILD_PATH_LIST(mapath->subpaths);
+                               new_path = (Path *) mapath;
+                       }
+                       break;
+
+               case T_MaterialPath:
+                       {
+                               MaterialPath *mpath;
+
+                               FLAT_COPY_PATH(mpath, path, MaterialPath);
+                               REPARAMETERIZE_CHILD_PATH(mpath->subpath);
+                               new_path = (Path *) mpath;
+                       }
+                       break;
+
+               case T_UniquePath:
+                       {
+                               UniquePath *upath;
+
+                               FLAT_COPY_PATH(upath, path, UniquePath);
+                               REPARAMETERIZE_CHILD_PATH(upath->subpath);
+                               ADJUST_CHILD_ATTRS(upath->uniq_exprs);
+                               new_path = (Path *) upath;
+                       }
+                       break;
+
+               case T_GatherPath:
+                       {
+                               GatherPath *gpath;
+
+                               FLAT_COPY_PATH(gpath, path, GatherPath);
+                               REPARAMETERIZE_CHILD_PATH(gpath->subpath);
+                               new_path = (Path *) gpath;
+                       }
+                       break;
+
+               case T_GatherMergePath:
+                       {
+                               GatherMergePath *gmpath;
+
+                               FLAT_COPY_PATH(gmpath, path, GatherMergePath);
+                               REPARAMETERIZE_CHILD_PATH(gmpath->subpath);
+                               new_path = (Path *) gmpath;
+                       }
+                       break;
+
+               default:
+
+                       /* We don't know how to reparameterize this path. */
+                       return NULL;
+       }
+
+       /*
+        * Adjust the parameterization information, which refers to the topmost
+        * parent. The topmost parent can be multiple levels away from the given
+        * child, hence use multi-level expression adjustment routines.
+        */
+       old_ppi = new_path->param_info;
+       required_outer =
+               adjust_child_relids_multilevel(root, old_ppi->ppi_req_outer,
+                                                                          child_rel->relids,
+                                                                          child_rel->top_parent_relids);
+
+       /* If we already have a PPI for this parameterization, just return it */
+       new_ppi = find_param_path_info(new_path->parent, required_outer);
+
+       /*
+        * If not, build a new one and link it to the list of PPIs. For the same
+        * reason as explained in mark_dummy_rel(), allocate new PPI in the same
+        * context the given RelOptInfo is in.
+        */
+       if (new_ppi == NULL)
+       {
+               MemoryContext oldcontext;
+               RelOptInfo *rel = path->parent;
+
+               oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
+
+               new_ppi = makeNode(ParamPathInfo);
+               new_ppi->ppi_req_outer = bms_copy(required_outer);
+               new_ppi->ppi_rows = old_ppi->ppi_rows;
+               new_ppi->ppi_clauses = old_ppi->ppi_clauses;
+               ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses);
+               rel->ppilist = lappend(rel->ppilist, new_ppi);
+
+               MemoryContextSwitchTo(oldcontext);
+       }
+       bms_free(required_outer);
+
+       new_path->param_info = new_ppi;
+
+       /*
+        * Adjust the path target if the parent of the outer relation is
+        * referenced in the targetlist. This can happen when only the parent of
+        * outer relation is laterally referenced in this relation.
+        */
+       if (bms_overlap(path->parent->lateral_relids,
+                                       child_rel->top_parent_relids))
+       {
+               new_path->pathtarget = copy_pathtarget(new_path->pathtarget);
+               ADJUST_CHILD_ATTRS(new_path->pathtarget->exprs);
+       }
+
+       return new_path;
+}
+
+/*
+ * reparameterize_pathlist_by_child
+ *             Helper function to reparameterize a list of paths by given child rel.
+ */
+static List *
+reparameterize_pathlist_by_child(PlannerInfo *root,
+                                                                List *pathlist,
+                                                                RelOptInfo *child_rel)
+{
+       ListCell   *lc;
+       List       *result = NIL;
+
+       foreach(lc, pathlist)
+       {
+               Path       *path = reparameterize_path_by_child(root, lfirst(lc),
+                                                                                                               child_rel);
+               if (path == NULL)
+               {
+                       list_free(result);
+                       return NIL;
+               }
+
+               result = lappend(result, path);
+       }
+
+       return result;
+}
index 970542dde5bda2e3c873a452aecfd7f981d1e8e3..3343521b9710938f444d7e68330daafd72646684 100644 (file)
@@ -20,6 +20,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/placeholder.h"
 #include "optimizer/planmain.h"
+#include "optimizer/prep.h"
 #include "optimizer/var.h"
 #include "utils/lsyscache.h"
 
@@ -414,6 +415,10 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
        Relids          relids = joinrel->relids;
        ListCell   *lc;
 
+       /* This function is called only on the parent relations. */
+       Assert(!IS_OTHER_REL(joinrel) && !IS_OTHER_REL(outer_rel) &&
+                  !IS_OTHER_REL(inner_rel));
+
        foreach(lc, root->placeholder_list)
        {
                PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
@@ -459,3 +464,56 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
                }
        }
 }
+
+/*
+ * add_placeholders_to_child_joinrel
+ *             Translate the PHVs in parent's targetlist and add them to the child's
+ *             targetlist. Also adjust the cost
+ */
+void
+add_placeholders_to_child_joinrel(PlannerInfo *root, RelOptInfo *childrel,
+                                                                 RelOptInfo *parentrel)
+{
+       ListCell   *lc;
+       AppendRelInfo **appinfos;
+       int                     nappinfos;
+
+       Assert(IS_JOIN_REL(childrel) && IS_JOIN_REL(parentrel));
+       Assert(IS_OTHER_REL(childrel));
+
+       /* Nothing to do if no PHVs. */
+       if (root->placeholder_list == NIL)
+               return;
+
+       appinfos = find_appinfos_by_relids(root, childrel->relids, &nappinfos);
+       foreach(lc, parentrel->reltarget->exprs)
+       {
+               PlaceHolderVar *phv = lfirst(lc);
+
+               if (IsA(phv, PlaceHolderVar))
+               {
+                       /*
+                        * In case the placeholder Var refers to any of the parent
+                        * relations, translate it to refer to the corresponding child.
+                        */
+                       if (bms_overlap(phv->phrels, parentrel->relids) &&
+                               childrel->reloptkind == RELOPT_OTHER_JOINREL)
+                       {
+                               phv = (PlaceHolderVar *) adjust_appendrel_attrs(root,
+                                                                                                                               (Node *) phv,
+                                                                                                                               nappinfos,
+                                                                                                                               appinfos);
+                       }
+
+                       childrel->reltarget->exprs = lappend(childrel->reltarget->exprs,
+                                                                                                phv);
+               }
+       }
+
+       /* Adjust the cost and width of child targetlist. */
+       childrel->reltarget->cost.startup = parentrel->reltarget->cost.startup;
+       childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple;
+       childrel->reltarget->width = parentrel->reltarget->width;
+
+       pfree(appinfos);
+}
index cac46bedf9eaaa969638ca2b3fbf7a6f5c5e2835..93cc7576a064fe487c6118cba4a02e0d514c22ad 100644 (file)
@@ -71,7 +71,8 @@ static List *get_relation_statistics(RelOptInfo *rel, Relation relation);
 static void set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
                                                        Relation relation);
 static PartitionScheme find_partition_scheme(PlannerInfo *root, Relation rel);
-static List **build_baserel_partition_key_exprs(Relation relation, Index varno);
+static void set_baserel_partition_key_exprs(Relation relation,
+                                                               RelOptInfo *rel);
 
 /*
  * get_relation_info -
@@ -1832,7 +1833,7 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
        Assert(partdesc != NULL && rel->part_scheme != NULL);
        rel->boundinfo = partdesc->boundinfo;
        rel->nparts = partdesc->nparts;
-       rel->partexprs = build_baserel_partition_key_exprs(relation, rel->relid);
+       set_baserel_partition_key_exprs(relation, rel);
 }
 
 /*
@@ -1907,21 +1908,24 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
 }
 
 /*
- * build_baserel_partition_key_exprs
+ * set_baserel_partition_key_exprs
  *
- * Collects partition key expressions for a given base relation.  Any single
- * column partition keys are converted to Var nodes.  All Var nodes are set
- * to the given varno.  The partition key expressions are returned as an array
- * of single element lists to be stored in RelOptInfo of the base relation.
+ * Builds partition key expressions for the given base relation and sets them
+ * in given RelOptInfo.  Any single column partition keys are converted to Var
+ * nodes.  All Var nodes are restamped with the relid of given relation.
  */
-static List **
-build_baserel_partition_key_exprs(Relation relation, Index varno)
+static void
+set_baserel_partition_key_exprs(Relation relation,
+                                                               RelOptInfo *rel)
 {
        PartitionKey partkey = RelationGetPartitionKey(relation);
        int                     partnatts;
        int                     cnt;
        List      **partexprs;
        ListCell   *lc;
+       Index           varno = rel->relid;
+
+       Assert(IS_SIMPLE_REL(rel) && rel->relid > 0);
 
        /* A partitioned table should have a partition key. */
        Assert(partkey != NULL);
@@ -1959,5 +1963,13 @@ build_baserel_partition_key_exprs(Relation relation, Index varno)
                partexprs[cnt] = list_make1(partexpr);
        }
 
-       return partexprs;
+       rel->partexprs = partexprs;
+
+       /*
+        * A base relation can not have nullable partition key expressions. We
+        * still allocate array of empty expressions lists to keep partition key
+        * expression handling code simple. See build_joinrel_partition_info() and
+        * match_expr_to_partition_keys().
+        */
+       rel->nullable_partexprs = (List **) palloc0(sizeof(List *) * partnatts);
 }
index 077e89ae4357551a37322b75f3ef2d5734207a3e..3bd1063aa89c5fb0f04cfef87aedccfabf54dbf8 100644 (file)
 #include <limits.h>
 
 #include "miscadmin.h"
+#include "catalog/partition.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/placeholder.h"
 #include "optimizer/plancat.h"
+#include "optimizer/prep.h"
 #include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
 #include "utils/hsearch.h"
@@ -52,6 +54,9 @@ static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel,
 static void set_foreign_rel_properties(RelOptInfo *joinrel,
                                                   RelOptInfo *outer_rel, RelOptInfo *inner_rel);
 static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel);
+static void build_joinrel_partition_info(RelOptInfo *joinrel,
+                                                        RelOptInfo *outer_rel, RelOptInfo *inner_rel,
+                                                        List *restrictlist, JoinType jointype);
 
 
 /*
@@ -151,6 +156,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
        rel->boundinfo = NULL;
        rel->part_rels = NULL;
        rel->partexprs = NULL;
+       rel->nullable_partexprs = NULL;
 
        /*
         * Pass top parent's relids down the inheritance hierarchy. If the parent
@@ -481,6 +487,9 @@ build_join_rel(PlannerInfo *root,
        RelOptInfo *joinrel;
        List       *restrictlist;
 
+       /* This function should be used only for join between parents. */
+       Assert(!IS_OTHER_REL(outer_rel) && !IS_OTHER_REL(inner_rel));
+
        /*
         * See if we already have a joinrel for this set of base rels.
         */
@@ -560,6 +569,7 @@ build_join_rel(PlannerInfo *root,
        joinrel->boundinfo = NULL;
        joinrel->part_rels = NULL;
        joinrel->partexprs = NULL;
+       joinrel->nullable_partexprs = NULL;
 
        /* Compute information relevant to the foreign relations. */
        set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
@@ -605,6 +615,10 @@ build_join_rel(PlannerInfo *root,
         */
        joinrel->has_eclass_joins = has_relevant_eclass_joinclause(root, joinrel);
 
+       /* Store the partition information. */
+       build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist,
+                                                                sjinfo->jointype);
+
        /*
         * Set estimates of the joinrel's size.
         */
@@ -650,6 +664,138 @@ build_join_rel(PlannerInfo *root,
        return joinrel;
 }
 
+/*
+ * build_child_join_rel
+ *       Builds RelOptInfo representing join between given two child relations.
+ *
+ * 'outer_rel' and 'inner_rel' are the RelOptInfos of child relations being
+ *             joined
+ * 'parent_joinrel' is the RelOptInfo representing the join between parent
+ *             relations. Some of the members of new RelOptInfo are produced by
+ *             translating corresponding members of this RelOptInfo
+ * 'sjinfo': child-join context info
+ * 'restrictlist': list of RestrictInfo nodes that apply to this particular
+ *             pair of joinable relations
+ * 'join_appinfos': list of AppendRelInfo nodes for base child relations
+ *             involved in this join
+ */
+RelOptInfo *
+build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
+                                        RelOptInfo *inner_rel, RelOptInfo *parent_joinrel,
+                                        List *restrictlist, SpecialJoinInfo *sjinfo,
+                                        JoinType jointype)
+{
+       RelOptInfo *joinrel = makeNode(RelOptInfo);
+       AppendRelInfo **appinfos;
+       int                     nappinfos;
+
+       /* Only joins between "other" relations land here. */
+       Assert(IS_OTHER_REL(outer_rel) && IS_OTHER_REL(inner_rel));
+
+       joinrel->reloptkind = RELOPT_OTHER_JOINREL;
+       joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids);
+       joinrel->rows = 0;
+       /* cheap startup cost is interesting iff not all tuples to be retrieved */
+       joinrel->consider_startup = (root->tuple_fraction > 0);
+       joinrel->consider_param_startup = false;
+       joinrel->consider_parallel = false;
+       joinrel->reltarget = create_empty_pathtarget();
+       joinrel->pathlist = NIL;
+       joinrel->ppilist = NIL;
+       joinrel->partial_pathlist = NIL;
+       joinrel->cheapest_startup_path = NULL;
+       joinrel->cheapest_total_path = NULL;
+       joinrel->cheapest_unique_path = NULL;
+       joinrel->cheapest_parameterized_paths = NIL;
+       joinrel->direct_lateral_relids = NULL;
+       joinrel->lateral_relids = NULL;
+       joinrel->relid = 0;                     /* indicates not a baserel */
+       joinrel->rtekind = RTE_JOIN;
+       joinrel->min_attr = 0;
+       joinrel->max_attr = 0;
+       joinrel->attr_needed = NULL;
+       joinrel->attr_widths = NULL;
+       joinrel->lateral_vars = NIL;
+       joinrel->lateral_referencers = NULL;
+       joinrel->indexlist = NIL;
+       joinrel->pages = 0;
+       joinrel->tuples = 0;
+       joinrel->allvisfrac = 0;
+       joinrel->subroot = NULL;
+       joinrel->subplan_params = NIL;
+       joinrel->serverid = InvalidOid;
+       joinrel->userid = InvalidOid;
+       joinrel->useridiscurrent = false;
+       joinrel->fdwroutine = NULL;
+       joinrel->fdw_private = NULL;
+       joinrel->baserestrictinfo = NIL;
+       joinrel->baserestrictcost.startup = 0;
+       joinrel->baserestrictcost.per_tuple = 0;
+       joinrel->joininfo = NIL;
+       joinrel->has_eclass_joins = false;
+       joinrel->top_parent_relids = NULL;
+       joinrel->part_scheme = NULL;
+       joinrel->part_rels = NULL;
+       joinrel->partexprs = NULL;
+       joinrel->nullable_partexprs = NULL;
+
+       joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids,
+                                                                                  inner_rel->top_parent_relids);
+
+       /* Compute information relevant to foreign relations. */
+       set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
+
+       /* Build targetlist */
+       build_joinrel_tlist(root, joinrel, outer_rel);
+       build_joinrel_tlist(root, joinrel, inner_rel);
+       /* Add placeholder variables. */
+       add_placeholders_to_child_joinrel(root, joinrel, parent_joinrel);
+
+       /* Construct joininfo list. */
+       appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos);
+       joinrel->joininfo = (List *) adjust_appendrel_attrs(root,
+                                                                                                               (Node *) parent_joinrel->joininfo,
+                                                                                                               nappinfos,
+                                                                                                               appinfos);
+       pfree(appinfos);
+
+       /*
+        * Lateral relids referred in child join will be same as that referred in
+        * the parent relation. Throw any partial result computed while building
+        * the targetlist.
+        */
+       bms_free(joinrel->direct_lateral_relids);
+       bms_free(joinrel->lateral_relids);
+       joinrel->direct_lateral_relids = (Relids) bms_copy(parent_joinrel->direct_lateral_relids);
+       joinrel->lateral_relids = (Relids) bms_copy(parent_joinrel->lateral_relids);
+
+       /*
+        * If the parent joinrel has pending equivalence classes, so does the
+        * child.
+        */
+       joinrel->has_eclass_joins = parent_joinrel->has_eclass_joins;
+
+       /* Is the join between partitions itself partitioned? */
+       build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist,
+                                                                jointype);
+
+       /* Child joinrel is parallel safe if parent is parallel safe. */
+       joinrel->consider_parallel = parent_joinrel->consider_parallel;
+
+
+       /* Set estimates of the child-joinrel's size. */
+       set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
+                                                          sjinfo, restrictlist);
+
+       /* We build the join only once. */
+       Assert(!find_join_rel(root, joinrel->relids));
+
+       /* Add the relation to the PlannerInfo. */
+       add_join_rel(root, joinrel);
+
+       return joinrel;
+}
+
 /*
  * min_join_parameterization
  *
@@ -705,9 +851,15 @@ static void
 build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
                                        RelOptInfo *input_rel)
 {
-       Relids          relids = joinrel->relids;
+       Relids          relids;
        ListCell   *vars;
 
+       /* attrs_needed refers to parent relids and not those of a child. */
+       if (joinrel->top_parent_relids)
+               relids = joinrel->top_parent_relids;
+       else
+               relids = joinrel->relids;
+
        foreach(vars, input_rel->reltarget->exprs)
        {
                Var                *var = (Var *) lfirst(vars);
@@ -722,24 +874,55 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
                        continue;
 
                /*
-                * Otherwise, anything in a baserel or joinrel targetlist ought to be
-                * a Var.  (More general cases can only appear in appendrel child
-                * rels, which will never be seen here.)
+                * Otherwise, anything in a baserel or joinrel targetlist ought to be a
+                * Var. Children of a partitioned table may have ConvertRowtypeExpr
+                * translating whole-row Var of a child to that of the parent. Children
+                * of an inherited table or subquery child rels can not directly
+                * participate in a join, so other kinds of nodes here.
                 */
-               if (!IsA(var, Var))
+               if (IsA(var, Var))
+               {
+                       baserel = find_base_rel(root, var->varno);
+                       ndx = var->varattno - baserel->min_attr;
+               }
+               else if (IsA(var, ConvertRowtypeExpr))
+               {
+                       ConvertRowtypeExpr *child_expr = (ConvertRowtypeExpr *) var;
+                       Var                *childvar = (Var *) child_expr->arg;
+
+                       /*
+                        * Child's whole-row references are converted to look like those
+                        * of parent using ConvertRowtypeExpr. There can be as many
+                        * ConvertRowtypeExpr decorations as the depth of partition tree.
+                        * The argument to the deepest ConvertRowtypeExpr is expected to
+                        * be a whole-row reference of the child.
+                        */
+                       while (IsA(childvar, ConvertRowtypeExpr))
+                       {
+                               child_expr = (ConvertRowtypeExpr *) childvar;
+                               childvar = (Var *) child_expr->arg;
+                       }
+                       Assert(IsA(childvar, Var) && childvar->varattno == 0);
+
+                       baserel = find_base_rel(root, childvar->varno);
+                       ndx = 0 - baserel->min_attr;
+               }
+               else
                        elog(ERROR, "unexpected node type in rel targetlist: %d",
                                 (int) nodeTag(var));
 
-               /* Get the Var's original base rel */
-               baserel = find_base_rel(root, var->varno);
 
-               /* Is it still needed above this joinrel? */
-               ndx = var->varattno - baserel->min_attr;
+               /* Is the target expression still needed above this joinrel? */
                if (bms_nonempty_difference(baserel->attr_needed[ndx], relids))
                {
                        /* Yup, add it to the output */
                        joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, var);
-                       /* Vars have cost zero, so no need to adjust reltarget->cost */
+
+                       /*
+                        * Vars have cost zero, so no need to adjust reltarget->cost. Even
+                        * if it's a ConvertRowtypeExpr, it will be computed only for the
+                        * base relation, costing nothing for a join.
+                        */
                        joinrel->reltarget->width += baserel->attr_widths[ndx];
                }
        }
@@ -876,6 +1059,9 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
 {
        ListCell   *l;
 
+       /* Expected to be called only for join between parent relations. */
+       Assert(joinrel->reloptkind == RELOPT_JOINREL);
+
        foreach(l, joininfo_list)
        {
                RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
@@ -1399,3 +1585,165 @@ find_param_path_info(RelOptInfo *rel, Relids required_outer)
 
        return NULL;
 }
+
+/*
+ * build_joinrel_partition_info
+ *             If the two relations have same partitioning scheme, their join may be
+ *             partitioned and will follow the same partitioning scheme as the joining
+ *             relations. Set the partition scheme and partition key expressions in
+ *             the join relation.
+ */
+static void
+build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
+                                                        RelOptInfo *inner_rel, List *restrictlist,
+                                                        JoinType jointype)
+{
+       int                     partnatts;
+       int                     cnt;
+       PartitionScheme part_scheme;
+
+       /* Nothing to do if partition-wise join technique is disabled. */
+       if (!enable_partition_wise_join)
+       {
+               Assert(!IS_PARTITIONED_REL(joinrel));
+               return;
+       }
+
+       /*
+        * We can only consider this join as an input to further partition-wise
+        * joins if (a) the input relations are partitioned, (b) the partition
+        * schemes match, and (c) we can identify an equi-join between the
+        * partition keys.  Note that if it were possible for
+        * have_partkey_equi_join to return different answers for the same joinrel
+        * depending on which join ordering we try first, this logic would break.
+        * That shouldn't happen, though, because of the way the query planner
+        * deduces implied equalities and reorders the joins. Please see
+        * optimizer/README for details.
+        */
+       if (!IS_PARTITIONED_REL(outer_rel) || !IS_PARTITIONED_REL(inner_rel) ||
+               outer_rel->part_scheme != inner_rel->part_scheme ||
+               !have_partkey_equi_join(outer_rel, inner_rel, jointype, restrictlist))
+       {
+               Assert(!IS_PARTITIONED_REL(joinrel));
+               return;
+       }
+
+       part_scheme = outer_rel->part_scheme;
+
+       Assert(REL_HAS_ALL_PART_PROPS(outer_rel) &&
+                  REL_HAS_ALL_PART_PROPS(inner_rel));
+
+       /*
+        * For now, our partition matching algorithm can match partitions only
+        * when the partition bounds of the joining relations are exactly same.
+        * So, bail out otherwise.
+        */
+       if (outer_rel->nparts != inner_rel->nparts ||
+               !partition_bounds_equal(part_scheme->partnatts,
+                                                               part_scheme->parttyplen,
+                                                               part_scheme->parttypbyval,
+                                                               outer_rel->boundinfo, inner_rel->boundinfo))
+       {
+               Assert(!IS_PARTITIONED_REL(joinrel));
+               return;
+       }
+
+       /*
+        * This function will be called only once for each joinrel, hence it
+        * should not have partition scheme, partition bounds, partition key
+        * expressions and array for storing child relations set.
+        */
+       Assert(!joinrel->part_scheme && !joinrel->partexprs &&
+                  !joinrel->nullable_partexprs && !joinrel->part_rels &&
+                  !joinrel->boundinfo);
+
+       /*
+        * Join relation is partitioned using the same partitioning scheme as the
+        * joining relations and has same bounds.
+        */
+       joinrel->part_scheme = part_scheme;
+       joinrel->boundinfo = outer_rel->boundinfo;
+       joinrel->nparts = outer_rel->nparts;
+       partnatts = joinrel->part_scheme->partnatts;
+       joinrel->partexprs = (List **) palloc0(sizeof(List *) * partnatts);
+       joinrel->nullable_partexprs =
+               (List **) palloc0(sizeof(List *) *partnatts);
+
+       /*
+        * Construct partition keys for the join.
+        *
+        * An INNER join between two partitioned relations can be regarded as
+        * partitioned by either key expression.  For example, A INNER JOIN B ON A.a =
+        * B.b can be regarded as partitioned on A.a or on B.b; they are equivalent.
+        *
+        * For a SEMI or ANTI join, the result can only be regarded as being
+        * partitioned in the same manner as the outer side, since the inner columns
+        * are not retained.
+        *
+        * An OUTER join like (A LEFT JOIN B ON A.a = B.b) may produce rows with
+        * B.b NULL. These rows may not fit the partitioning conditions imposed on
+        * B.b. Hence, strictly speaking, the join is not partitioned by B.b and
+        * thus partition keys of an OUTER join should include partition key
+        * expressions from the OUTER side only.  However, because all
+        * commonly-used comparison operators are strict, the presence of nulls on
+        * the outer side doesn't cause any problem; they can't match anything at
+        * future join levels anyway.  Therefore, we track two sets of expressions:
+        * those that authentically partition the relation (partexprs) and those
+        * that partition the relation with the exception that extra nulls may be
+        * present (nullable_partexprs).  When the comparison operator is strict,
+        * the latter is just as good as the former.
+        */
+       for (cnt = 0; cnt < partnatts; cnt++)
+       {
+               List       *outer_expr;
+               List       *outer_null_expr;
+               List       *inner_expr;
+               List       *inner_null_expr;
+               List       *partexpr = NIL;
+               List       *nullable_partexpr = NIL;
+
+               outer_expr = list_copy(outer_rel->partexprs[cnt]);
+               outer_null_expr = list_copy(outer_rel->nullable_partexprs[cnt]);
+               inner_expr = list_copy(inner_rel->partexprs[cnt]);
+               inner_null_expr = list_copy(inner_rel->nullable_partexprs[cnt]);
+
+               switch (jointype)
+               {
+                       case JOIN_INNER:
+                               partexpr = list_concat(outer_expr, inner_expr);
+                               nullable_partexpr = list_concat(outer_null_expr,
+                                                                                               inner_null_expr);
+                               break;
+
+                       case JOIN_SEMI:
+                       case JOIN_ANTI:
+                               partexpr = outer_expr;
+                               nullable_partexpr = outer_null_expr;
+                               break;
+
+                       case JOIN_LEFT:
+                               partexpr = outer_expr;
+                               nullable_partexpr = list_concat(inner_expr,
+                                                                                               outer_null_expr);
+                               nullable_partexpr = list_concat(nullable_partexpr,
+                                                                                               inner_null_expr);
+                               break;
+
+                       case JOIN_FULL:
+                               nullable_partexpr = list_concat(outer_expr,
+                                                                                               inner_expr);
+                               nullable_partexpr = list_concat(nullable_partexpr,
+                                                                                               outer_null_expr);
+                               nullable_partexpr = list_concat(nullable_partexpr,
+                                                                                               inner_null_expr);
+                               break;
+
+                       default:
+                               elog(ERROR, "unrecognized join type: %d", (int) jointype);
+
+               }
+
+               joinrel->partexprs[cnt] = partexpr;
+               joinrel->nullable_partexprs[cnt] = nullable_partexpr;
+       }
+}
index 8292df00bbfac0de6ae97e7ef2e68323af8ed7ef..ae22185fbdbeaa954ca09e78a0c02c2b6091cea7 100644 (file)
@@ -911,6 +911,15 @@ static struct config_bool ConfigureNamesBool[] =
                true,
                NULL, NULL, NULL
        },
+       {
+               {"enable_partition_wise_join", PGC_USERSET, QUERY_TUNING_METHOD,
+                       gettext_noop("Enables partition-wise join."),
+                       NULL
+               },
+               &enable_partition_wise_join,
+               false,
+               NULL, NULL, NULL
+       },
 
        {
                {"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
index cf4ddcd94a7c079db52ff67d67393dd3fc54f7b5..368b280c8af7faf1ec8fca71a2891adab485683a 100644 (file)
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_partition_wise_join = off
 
 # - Planner Cost Constants -
 
index ef0fbe6f9c635a0871d34d3c49a27ec72b1f0246..04e43cc5e5e2fb716b8f4ec07f2e9ce654311be3 100644 (file)
@@ -158,6 +158,9 @@ typedef void (*ShutdownForeignScan_function) (ForeignScanState *node);
 typedef bool (*IsForeignScanParallelSafe_function) (PlannerInfo *root,
                                                                                                        RelOptInfo *rel,
                                                                                                        RangeTblEntry *rte);
+typedef List *(*ReparameterizeForeignPathByChild_function) (PlannerInfo *root,
+                                                                                                                       List *fdw_private,
+                                                                                                                       RelOptInfo *child_rel);
 
 /*
  * FdwRoutine is the struct returned by a foreign-data wrapper's handler
@@ -230,6 +233,9 @@ typedef struct FdwRoutine
        ReInitializeDSMForeignScan_function ReInitializeDSMForeignScan;
        InitializeWorkerForeignScan_function InitializeWorkerForeignScan;
        ShutdownForeignScan_function ShutdownForeignScan;
+
+       /* Support functions for path reparameterization. */
+       ReparameterizeForeignPathByChild_function ReparameterizeForeignPathByChild;
 } FdwRoutine;
 
 
index 0654e79c7bafacbe26fff2714762e27539740f12..c3436c7a4e47d9489b3089a3d02d142f569b88c3 100644 (file)
@@ -96,6 +96,9 @@ typedef struct CustomPathMethods
                                                                        List *tlist,
                                                                        List *clauses,
                                                                        List *custom_plans);
+       struct List *(*ReparameterizeCustomPathByChild) (PlannerInfo *root,
+                                                                                                        List *custom_private,
+                                                                                                        RelOptInfo *child_rel);
 }                      CustomPathMethods;
 
 /*
index 48e6012f7feebc800c53f69f42e3e1d19b05e597..e085cefb7ba26e78ff697e0307aa34fa95151dfe 100644 (file)
@@ -391,6 +391,11 @@ typedef struct PartitionSchemeData *PartitionScheme;
  * handling join alias Vars.  Currently this is not needed because all join
  * alias Vars are expanded to non-aliased form during preprocess_expression.
  *
+ * We also have relations representing joins between child relations of
+ * different partitioned tables. These relations are not added to
+ * join_rel_level lists as they are not joined directly by the dynamic
+ * programming algorithm.
+ *
  * There is also a RelOptKind for "upper" relations, which are RelOptInfos
  * that describe post-scan/join processing steps, such as aggregation.
  * Many of the fields in these RelOptInfos are meaningless, but their Path
@@ -525,14 +530,18 @@ typedef struct PartitionSchemeData *PartitionScheme;
  *             boundinfo - Partition bounds
  *             nparts - Number of partitions
  *             part_rels - RelOptInfos for each partition
- *             partexprs - Partition key expressions
+ *             partexprs, nullable_partexprs - Partition key expressions
  *
  * Note: A base relation always has only one set of partition keys, but a join
  * relation may have as many sets of partition keys as the number of relations
- * being joined. partexprs is an array containing part_scheme->partnatts
- * elements, each of which is a list of partition key expressions. For a base
- * relation each list contains only one expression, but for a join relation
- * there can be one per baserel.
+ * being joined. partexprs and nullable_partexprs are arrays containing
+ * part_scheme->partnatts elements each. Each of these elements is a list of
+ * partition key expressions.  For a base relation each list in partexprs
+ * contains only one expression and nullable_partexprs is not populated. For a
+ * join relation, partexprs and nullable_partexprs contain partition key
+ * expressions from non-nullable and nullable relations resp. Lists at any
+ * given position in those arrays together contain as many elements as the
+ * number of joining relations.
  *----------
  */
 typedef enum RelOptKind
@@ -540,6 +549,7 @@ typedef enum RelOptKind
        RELOPT_BASEREL,
        RELOPT_JOINREL,
        RELOPT_OTHER_MEMBER_REL,
+       RELOPT_OTHER_JOINREL,
        RELOPT_UPPER_REL,
        RELOPT_DEADREL
 } RelOptKind;
@@ -553,13 +563,17 @@ typedef enum RelOptKind
         (rel)->reloptkind == RELOPT_OTHER_MEMBER_REL)
 
 /* Is the given relation a join relation? */
-#define IS_JOIN_REL(rel) ((rel)->reloptkind == RELOPT_JOINREL)
+#define IS_JOIN_REL(rel)       \
+       ((rel)->reloptkind == RELOPT_JOINREL || \
+        (rel)->reloptkind == RELOPT_OTHER_JOINREL)
 
 /* Is the given relation an upper relation? */
 #define IS_UPPER_REL(rel) ((rel)->reloptkind == RELOPT_UPPER_REL)
 
 /* Is the given relation an "other" relation? */
-#define IS_OTHER_REL(rel) ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL)
+#define IS_OTHER_REL(rel) \
+       ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL || \
+        (rel)->reloptkind == RELOPT_OTHER_JOINREL)
 
 typedef struct RelOptInfo
 {
@@ -645,9 +659,29 @@ typedef struct RelOptInfo
        struct PartitionBoundInfoData *boundinfo;       /* Partition bounds */
        struct RelOptInfo **part_rels;  /* Array of RelOptInfos of partitions,
                                                                         * stored in the same order of bounds */
-       List      **partexprs;          /* Partition key expressions. */
+       List      **partexprs;          /* Non-nullable partition key expressions. */
+       List      **nullable_partexprs; /* Nullable partition key expressions. */
 } RelOptInfo;
 
+/*
+ * Is given relation partitioned?
+ *
+ * A join between two partitioned relations with same partitioning scheme
+ * without any matching partitions will not have any partition in it but will
+ * have partition scheme set. So a relation is deemed to be partitioned if it
+ * has a partitioning scheme, bounds and positive number of partitions.
+ */
+#define IS_PARTITIONED_REL(rel) \
+       ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0)
+
+/*
+ * Convenience macro to make sure that a partitioned relation has all the
+ * required members set.
+ */
+#define REL_HAS_ALL_PART_PROPS(rel)    \
+       ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \
+        (rel)->part_rels && (rel)->partexprs && (rel)->nullable_partexprs)
+
 /*
  * IndexOptInfo
  *             Per-index information for planning/optimization
index 63feba06e7e7f9f3e0f10be89db33af95584faf7..306d923a22acf49051086fd3032c5bac1a59cc0f 100644 (file)
@@ -67,6 +67,7 @@ extern bool enable_material;
 extern bool enable_mergejoin;
 extern bool enable_hashjoin;
 extern bool enable_gathermerge;
+extern bool enable_partition_wise_join;
 extern int     constraint_exclusion;
 
 extern double clamp_row_est(double nrows);
index e372f8862b8816b87e51011de181b9d5e286e7d1..e9ed16ad32138192420499c3148599b0c001ac77 100644 (file)
@@ -251,6 +251,8 @@ extern LimitPath *create_limit_path(PlannerInfo *root, RelOptInfo *rel,
 extern Path *reparameterize_path(PlannerInfo *root, Path *path,
                                        Relids required_outer,
                                        double loop_count);
+extern Path *reparameterize_path_by_child(PlannerInfo *root, Path *path,
+                                                        RelOptInfo *child_rel);
 
 /*
  * prototypes for relnode.c
@@ -290,5 +292,9 @@ extern ParamPathInfo *get_appendrel_parampathinfo(RelOptInfo *appendrel,
                                                        Relids required_outer);
 extern ParamPathInfo *find_param_path_info(RelOptInfo *rel,
                                         Relids required_outer);
+extern RelOptInfo *build_child_join_rel(PlannerInfo *root,
+                                        RelOptInfo *outer_rel, RelOptInfo *inner_rel,
+                                        RelOptInfo *parent_joinrel, List *restrictlist,
+                                        SpecialJoinInfo *sjinfo, JoinType jointype);
 
 #endif                                                 /* PATHNODE_H */
index 4e06b2e29931e7b43165dfba0466da9cf5b94ae4..a15eee54bb8a1e655817df1e27a1ae9fbc40cf09 100644 (file)
@@ -58,6 +58,8 @@ extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages,
                                                double index_pages);
 extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
                                                        Path *bitmapqual);
+extern void generate_partition_wise_join_paths(PlannerInfo *root,
+                                                                  RelOptInfo *rel);
 
 #ifdef OPTIMIZER_DEBUG
 extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
@@ -111,6 +113,9 @@ extern bool have_join_order_restriction(PlannerInfo *root,
                                                        RelOptInfo *rel1, RelOptInfo *rel2);
 extern bool have_dangerous_phv(PlannerInfo *root,
                                   Relids outer_relids, Relids inner_params);
+extern void mark_dummy_rel(RelOptInfo *rel);
+extern bool have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2,
+                                          JoinType jointype, List *restrictlist);
 
 /*
  * equivclass.c
index 5a4d46ba9d7000ce893b7866c8a4ae87934adbb8..a4a7b79f4da290c87e752528dcbe42541f2b33e0 100644 (file)
@@ -28,5 +28,7 @@ extern void fix_placeholder_input_needed_levels(PlannerInfo *root);
 extern void add_placeholders_to_base_rels(PlannerInfo *root);
 extern void add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
                                                        RelOptInfo *outer_rel, RelOptInfo *inner_rel);
+extern void add_placeholders_to_child_joinrel(PlannerInfo *root,
+                                                                 RelOptInfo *childrel, RelOptInfo *parentrel);
 
 #endif                                                 /* PLACEHOLDER_H */
index 2a4cf71e10216e8b6894373fb2b1e4069003e922..2801bfdfbe8c49246f5fa4ea8225202bf8395711 100644 (file)
@@ -58,5 +58,7 @@ extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr);
 extern bool plan_cluster_use_sort(Oid tableOid, Oid indexOid);
 
 extern List *get_partitioned_child_rels(PlannerInfo *root, Index rti);
+extern List *get_partitioned_child_rels_for_join(PlannerInfo *root,
+                                                                       Relids join_relids);
 
 #endif                                                 /* PLANNER_H */
index 4be0afd566022323d54367f04d0eeb155293d819..80fbfd6ea94b1ff13915927a05d6fdac12de8580 100644 (file)
@@ -62,4 +62,10 @@ extern Node *adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node,
 extern AppendRelInfo **find_appinfos_by_relids(PlannerInfo *root,
                                                Relids relids, int *nappinfos);
 
+extern SpecialJoinInfo *build_child_join_sjinfo(PlannerInfo *root,
+                                               SpecialJoinInfo *parent_sjinfo,
+                                               Relids left_relids, Relids right_relids);
+extern Relids adjust_child_relids_multilevel(PlannerInfo *root, Relids relids,
+                                                          Relids child_relids, Relids top_parent_relids);
+
 #endif                                                 /* PREP_H */
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
new file mode 100644 (file)
index 0000000..234b8b5
--- /dev/null
@@ -0,0 +1,1789 @@
+--
+-- PARTITION_JOIN
+-- Test partition-wise join between partitioned tables
+--
+-- Enable partition-wise join, which by default is disabled.
+SET enable_partition_wise_join to true;
+--
+-- partitioned by a single column
+--
+CREATE TABLE prt1 (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE prt1_p1 PARTITION OF prt1 FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_p3 PARTITION OF prt1 FOR VALUES FROM (500) TO (600);
+CREATE TABLE prt1_p2 PARTITION OF prt1 FOR VALUES FROM (250) TO (500);
+INSERT INTO prt1 SELECT i, i % 25, to_char(i, 'FM0000') FROM generate_series(0, 599) i WHERE i % 2 = 0;
+CREATE INDEX iprt1_p1_a on prt1_p1(a);
+CREATE INDEX iprt1_p2_a on prt1_p2(a);
+CREATE INDEX iprt1_p3_a on prt1_p3(a);
+ANALYZE prt1;
+CREATE TABLE prt2 (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE prt2_p1 PARTITION OF prt2 FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_p2 PARTITION OF prt2 FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt2_p3 PARTITION OF prt2 FOR VALUES FROM (500) TO (600);
+INSERT INTO prt2 SELECT i % 25, i, to_char(i, 'FM0000') FROM generate_series(0, 599) i WHERE i % 3 = 0;
+CREATE INDEX iprt2_p1_b on prt2_p1(b);
+CREATE INDEX iprt2_p2_b on prt2_p2(b);
+CREATE INDEX iprt2_p3_b on prt2_p3(b);
+ANALYZE prt2;
+-- inner join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Sort
+   Sort Key: t1.a
+   ->  Append
+         ->  Hash Join
+               Hash Cond: (t2.b = t1.a)
+               ->  Seq Scan on prt2_p1 t2
+               ->  Hash
+                     ->  Seq Scan on prt1_p1 t1
+                           Filter: (b = 0)
+         ->  Hash Join
+               Hash Cond: (t2_1.b = t1_1.a)
+               ->  Seq Scan on prt2_p2 t2_1
+               ->  Hash
+                     ->  Seq Scan on prt1_p2 t1_1
+                           Filter: (b = 0)
+         ->  Hash Join
+               Hash Cond: (t2_2.b = t1_2.a)
+               ->  Seq Scan on prt2_p3 t2_2
+               ->  Hash
+                     ->  Seq Scan on prt1_p3 t1_2
+                           Filter: (b = 0)
+(21 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+  a  |  c   |  b  |  c   
+-----+------+-----+------
+   0 | 0000 |   0 | 0000
+ 150 | 0150 | 150 | 0150
+ 300 | 0300 | 300 | 0300
+ 450 | 0450 | 450 | 0450
+(4 rows)
+
+-- left outer join, with whole-row reference
+EXPLAIN (COSTS OFF)
+SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Sort
+   Sort Key: t1.a, t2.b
+   ->  Result
+         ->  Append
+               ->  Hash Right Join
+                     Hash Cond: (t2.b = t1.a)
+                     ->  Seq Scan on prt2_p1 t2
+                     ->  Hash
+                           ->  Seq Scan on prt1_p1 t1
+                                 Filter: (b = 0)
+               ->  Hash Right Join
+                     Hash Cond: (t2_1.b = t1_1.a)
+                     ->  Seq Scan on prt2_p2 t2_1
+                     ->  Hash
+                           ->  Seq Scan on prt1_p2 t1_1
+                                 Filter: (b = 0)
+               ->  Hash Right Join
+                     Hash Cond: (t2_2.b = t1_2.a)
+                     ->  Seq Scan on prt2_p3 t2_2
+                     ->  Hash
+                           ->  Seq Scan on prt1_p3 t1_2
+                                 Filter: (b = 0)
+(22 rows)
+
+SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+      t1      |      t2      
+--------------+--------------
+ (0,0,0000)   | (0,0,0000)
+ (50,0,0050)  | 
+ (100,0,0100) | 
+ (150,0,0150) | (0,150,0150)
+ (200,0,0200) | 
+ (250,0,0250) | 
+ (300,0,0300) | (0,300,0300)
+ (350,0,0350) | 
+ (400,0,0400) | 
+ (450,0,0450) | (0,450,0450)
+ (500,0,0500) | 
+ (550,0,0550) | 
+(12 rows)
+
+-- right outer join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a, t2.b
+   ->  Result
+         ->  Append
+               ->  Hash Right Join
+                     Hash Cond: (t1.a = t2.b)
+                     ->  Seq Scan on prt1_p1 t1
+                     ->  Hash
+                           ->  Seq Scan on prt2_p1 t2
+                                 Filter: (a = 0)
+               ->  Hash Right Join
+                     Hash Cond: (t1_1.a = t2_1.b)
+                     ->  Seq Scan on prt1_p2 t1_1
+                     ->  Hash
+                           ->  Seq Scan on prt2_p2 t2_1
+                                 Filter: (a = 0)
+               ->  Nested Loop Left Join
+                     ->  Seq Scan on prt2_p3 t2_2
+                           Filter: (a = 0)
+                     ->  Index Scan using iprt1_p3_a on prt1_p3 t1_2
+                           Index Cond: (a = t2_2.b)
+(21 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+  a  |  c   |  b  |  c   
+-----+------+-----+------
+   0 | 0000 |   0 | 0000
+ 150 | 0150 | 150 | 0150
+ 300 | 0300 | 300 | 0300
+ 450 | 0450 | 450 | 0450
+     |      |  75 | 0075
+     |      | 225 | 0225
+     |      | 375 | 0375
+     |      | 525 | 0525
+(8 rows)
+
+-- full outer join, with placeholder vars
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b) WHERE t1.phv = t1.a OR t2.phv = t2.b ORDER BY t1.a, t2.b;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Sort
+   Sort Key: prt1_p1.a, prt2_p1.b
+   ->  Append
+         ->  Hash Full Join
+               Hash Cond: (prt1_p1.a = prt2_p1.b)
+               Filter: (((50) = prt1_p1.a) OR ((75) = prt2_p1.b))
+               ->  Seq Scan on prt1_p1
+                     Filter: (b = 0)
+               ->  Hash
+                     ->  Seq Scan on prt2_p1
+                           Filter: (a = 0)
+         ->  Hash Full Join
+               Hash Cond: (prt1_p2.a = prt2_p2.b)
+               Filter: (((50) = prt1_p2.a) OR ((75) = prt2_p2.b))
+               ->  Seq Scan on prt1_p2
+                     Filter: (b = 0)
+               ->  Hash
+                     ->  Seq Scan on prt2_p2
+                           Filter: (a = 0)
+         ->  Hash Full Join
+               Hash Cond: (prt1_p3.a = prt2_p3.b)
+               Filter: (((50) = prt1_p3.a) OR ((75) = prt2_p3.b))
+               ->  Seq Scan on prt1_p3
+                     Filter: (b = 0)
+               ->  Hash
+                     ->  Seq Scan on prt2_p3
+                           Filter: (a = 0)
+(27 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b) WHERE t1.phv = t1.a OR t2.phv = t2.b ORDER BY t1.a, t2.b;
+ a  |  c   | b  |  c   
+----+------+----+------
+ 50 | 0050 |    | 
+    |      | 75 | 0075
+(2 rows)
+
+-- Join with pruned partitions from joining relations
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a < 450 AND t2.b > 250 AND t1.b = 0 ORDER BY t1.a, t2.b;
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Sort
+   Sort Key: t1.a
+   ->  Append
+         ->  Hash Join
+               Hash Cond: (t2.b = t1.a)
+               ->  Seq Scan on prt2_p2 t2
+                     Filter: (b > 250)
+               ->  Hash
+                     ->  Seq Scan on prt1_p2 t1
+                           Filter: ((a < 450) AND (b = 0))
+(10 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a < 450 AND t2.b > 250 AND t1.b = 0 ORDER BY t1.a, t2.b;
+  a  |  c   |  b  |  c   
+-----+------+-----+------
+ 300 | 0300 | 300 | 0300
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Sort
+   Sort Key: prt1_p1.a, b
+   ->  Append
+         ->  Hash Left Join
+               Hash Cond: (prt1_p1.a = b)
+               ->  Seq Scan on prt1_p1
+                     Filter: ((a < 450) AND (b = 0))
+               ->  Hash
+                     ->  Result
+                           One-Time Filter: false
+         ->  Hash Right Join
+               Hash Cond: (prt2_p2.b = prt1_p2.a)
+               ->  Seq Scan on prt2_p2
+                     Filter: (b > 250)
+               ->  Hash
+                     ->  Seq Scan on prt1_p2
+                           Filter: ((a < 450) AND (b = 0))
+(17 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+  a  |  c   |  b  |  c   
+-----+------+-----+------
+   0 | 0000 |     | 
+  50 | 0050 |     | 
+ 100 | 0100 |     | 
+ 150 | 0150 |     | 
+ 200 | 0200 |     | 
+ 250 | 0250 |     | 
+ 300 | 0300 | 300 | 0300
+ 350 | 0350 |     | 
+ 400 | 0400 |     | 
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 FULL JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 OR t2.a = 0 ORDER BY t1.a, t2.b;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Sort
+   Sort Key: prt1_p1.a, b
+   ->  Append
+         ->  Hash Full Join
+               Hash Cond: (prt1_p1.a = b)
+               Filter: ((prt1_p1.b = 0) OR (a = 0))
+               ->  Seq Scan on prt1_p1
+                     Filter: (a < 450)
+               ->  Hash
+                     ->  Result
+                           One-Time Filter: false
+         ->  Hash Full Join
+               Hash Cond: (prt1_p2.a = prt2_p2.b)
+               Filter: ((prt1_p2.b = 0) OR (prt2_p2.a = 0))
+               ->  Seq Scan on prt1_p2
+                     Filter: (a < 450)
+               ->  Hash
+                     ->  Seq Scan on prt2_p2
+                           Filter: (b > 250)
+         ->  Hash Full Join
+               Hash Cond: (prt2_p3.b = a)
+               Filter: ((b = 0) OR (prt2_p3.a = 0))
+               ->  Seq Scan on prt2_p3
+                     Filter: (b > 250)
+               ->  Hash
+                     ->  Result
+                           One-Time Filter: false
+(27 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 FULL JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 OR t2.a = 0 ORDER BY t1.a, t2.b;
+  a  |  c   |  b  |  c   
+-----+------+-----+------
+   0 | 0000 |     | 
+  50 | 0050 |     | 
+ 100 | 0100 |     | 
+ 150 | 0150 |     | 
+ 200 | 0200 |     | 
+ 250 | 0250 |     | 
+ 300 | 0300 | 300 | 0300
+ 350 | 0350 |     | 
+ 400 | 0400 |     | 
+     |      | 375 | 0375
+     |      | 450 | 0450
+     |      | 525 | 0525
+(12 rows)
+
+-- Semi-join
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t2.b FROM prt2 t2 WHERE t2.a = 0) AND t1.b = 0 ORDER BY t1.a;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Sort
+   Sort Key: t1.a
+   ->  Append
+         ->  Hash Semi Join
+               Hash Cond: (t1.a = t2.b)
+               ->  Seq Scan on prt1_p1 t1
+                     Filter: (b = 0)
+               ->  Hash
+                     ->  Seq Scan on prt2_p1 t2
+                           Filter: (a = 0)
+         ->  Hash Semi Join
+               Hash Cond: (t1_1.a = t2_1.b)
+               ->  Seq Scan on prt1_p2 t1_1
+                     Filter: (b = 0)
+               ->  Hash
+                     ->  Seq Scan on prt2_p2 t2_1
+                           Filter: (a = 0)
+         ->  Nested Loop Semi Join
+               Join Filter: (t1_2.a = t2_2.b)
+               ->  Seq Scan on prt1_p3 t1_2
+                     Filter: (b = 0)
+               ->  Materialize
+                     ->  Seq Scan on prt2_p3 t2_2
+                           Filter: (a = 0)
+(24 rows)
+
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t2.b FROM prt2 t2 WHERE t2.a = 0) AND t1.b = 0 ORDER BY t1.a;
+  a  | b |  c   
+-----+---+------
+   0 | 0 | 0000
+ 150 | 0 | 0150
+ 300 | 0 | 0300
+ 450 | 0 | 0450
+(4 rows)
+
+-- Anti-join with aggregates
+EXPLAIN (COSTS OFF)
+SELECT sum(t1.a), avg(t1.a), sum(t1.b), avg(t1.b) FROM prt1 t1 WHERE NOT EXISTS (SELECT 1 FROM prt2 t2 WHERE t1.a = t2.b);
+                    QUERY PLAN                    
+--------------------------------------------------
+ Aggregate
+   ->  Append
+         ->  Hash Anti Join
+               Hash Cond: (t1.a = t2.b)
+               ->  Seq Scan on prt1_p1 t1
+               ->  Hash
+                     ->  Seq Scan on prt2_p1 t2
+         ->  Hash Anti Join
+               Hash Cond: (t1_1.a = t2_1.b)
+               ->  Seq Scan on prt1_p2 t1_1
+               ->  Hash
+                     ->  Seq Scan on prt2_p2 t2_1
+         ->  Hash Anti Join
+               Hash Cond: (t1_2.a = t2_2.b)
+               ->  Seq Scan on prt1_p3 t1_2
+               ->  Hash
+                     ->  Seq Scan on prt2_p3 t2_2
+(17 rows)
+
+SELECT sum(t1.a), avg(t1.a), sum(t1.b), avg(t1.b) FROM prt1 t1 WHERE NOT EXISTS (SELECT 1 FROM prt2 t2 WHERE t1.a = t2.b);
+  sum  |         avg          | sum  |         avg         
+-------+----------------------+------+---------------------
+ 60000 | 300.0000000000000000 | 2400 | 12.0000000000000000
+(1 row)
+
+-- lateral reference
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t3.a AS t3a, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+                         ON t1.a = ss.t2a WHERE t1.b = 0 ORDER BY t1.a;
+                                   QUERY PLAN                                   
+--------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a
+   ->  Result
+         ->  Append
+               ->  Nested Loop Left Join
+                     ->  Seq Scan on prt1_p1 t1
+                           Filter: (b = 0)
+                     ->  Nested Loop
+                           ->  Index Only Scan using iprt1_p1_a on prt1_p1 t2
+                                 Index Cond: (a = t1.a)
+                           ->  Index Scan using iprt2_p1_b on prt2_p1 t3
+                                 Index Cond: (b = t2.a)
+               ->  Nested Loop Left Join
+                     ->  Seq Scan on prt1_p2 t1_1
+                           Filter: (b = 0)
+                     ->  Nested Loop
+                           ->  Index Only Scan using iprt1_p2_a on prt1_p2 t2_1
+                                 Index Cond: (a = t1_1.a)
+                           ->  Index Scan using iprt2_p2_b on prt2_p2 t3_1
+                                 Index Cond: (b = t2_1.a)
+               ->  Nested Loop Left Join
+                     ->  Seq Scan on prt1_p3 t1_2
+                           Filter: (b = 0)
+                     ->  Nested Loop
+                           ->  Index Only Scan using iprt1_p3_a on prt1_p3 t2_2
+                                 Index Cond: (a = t1_2.a)
+                           ->  Index Scan using iprt2_p3_b on prt2_p3 t3_2
+                                 Index Cond: (b = t2_2.a)
+(28 rows)
+
+SELECT * FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t3.a AS t3a, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+                         ON t1.a = ss.t2a WHERE t1.b = 0 ORDER BY t1.a;
+  a  | b |  c   | t2a | t3a | least 
+-----+---+------+-----+-----+-------
+   0 | 0 | 0000 |   0 |   0 |     0
+  50 | 0 | 0050 |     |     |      
+ 100 | 0 | 0100 |     |     |      
+ 150 | 0 | 0150 | 150 |   0 |   150
+ 200 | 0 | 0200 |     |     |      
+ 250 | 0 | 0250 |     |     |      
+ 300 | 0 | 0300 | 300 |   0 |   300
+ 350 | 0 | 0350 |     |     |      
+ 400 | 0 | 0400 |     |     |      
+ 450 | 0 | 0450 | 450 |   0 |   450
+ 500 | 0 | 0500 |     |     |      
+ 550 | 0 | 0550 |     |     |      
+(12 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+                         ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Sort
+   Sort Key: t1.a
+   ->  Hash Left Join
+         Hash Cond: ((t1.c)::text = (t2.c)::text)
+         Filter: ((t1.b + COALESCE(t2.b, 0)) = 0)
+         ->  Append
+               ->  Seq Scan on prt1_p1 t1
+               ->  Seq Scan on prt1_p2 t1_1
+               ->  Seq Scan on prt1_p3 t1_2
+         ->  Hash
+               ->  Append
+                     ->  Hash Join
+                           Hash Cond: (t2.a = t3.b)
+                           ->  Seq Scan on prt1_p1 t2
+                           ->  Hash
+                                 ->  Seq Scan on prt2_p1 t3
+                     ->  Hash Join
+                           Hash Cond: (t2_1.a = t3_1.b)
+                           ->  Seq Scan on prt1_p2 t2_1
+                           ->  Hash
+                                 ->  Seq Scan on prt2_p2 t3_1
+                     ->  Hash Join
+                           Hash Cond: (t2_2.a = t3_2.b)
+                           ->  Seq Scan on prt1_p3 t2_2
+                           ->  Hash
+                                 ->  Seq Scan on prt2_p3 t3_2
+(26 rows)
+
+SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+                         ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
+  a  | t2a | t2c  
+-----+-----+------
+   0 |   0 | 0000
+  50 |     | 
+ 100 |     | 
+ 150 | 150 | 0150
+ 200 |     | 
+ 250 |     | 
+ 300 | 300 | 0300
+ 350 |     | 
+ 400 |     | 
+ 450 | 450 | 0450
+ 500 |     | 
+ 550 |     | 
+(12 rows)
+
+--
+-- partitioned by expression
+--
+CREATE TABLE prt1_e (a int, b int, c int) PARTITION BY RANGE(((a + b)/2));
+CREATE TABLE prt1_e_p1 PARTITION OF prt1_e FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_e_p2 PARTITION OF prt1_e FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt1_e_p3 PARTITION OF prt1_e FOR VALUES FROM (500) TO (600);
+INSERT INTO prt1_e SELECT i, i, i % 25 FROM generate_series(0, 599, 2) i;
+CREATE INDEX iprt1_e_p1_ab2 on prt1_e_p1(((a+b)/2));
+CREATE INDEX iprt1_e_p2_ab2 on prt1_e_p2(((a+b)/2));
+CREATE INDEX iprt1_e_p3_ab2 on prt1_e_p3(((a+b)/2));
+ANALYZE prt1_e;
+CREATE TABLE prt2_e (a int, b int, c int) PARTITION BY RANGE(((b + a)/2));
+CREATE TABLE prt2_e_p1 PARTITION OF prt2_e FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_e_p2 PARTITION OF prt2_e FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt2_e_p3 PARTITION OF prt2_e FOR VALUES FROM (500) TO (600);
+INSERT INTO prt2_e SELECT i, i, i % 25 FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_e;
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_e t1, prt2_e t2 WHERE (t1.a + t1.b)/2 = (t2.b + t2.a)/2 AND t1.c = 0 ORDER BY t1.a, t2.b;
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a, t2.b
+   ->  Append
+         ->  Hash Join
+               Hash Cond: (((t2.b + t2.a) / 2) = ((t1.a + t1.b) / 2))
+               ->  Seq Scan on prt2_e_p1 t2
+               ->  Hash
+                     ->  Seq Scan on prt1_e_p1 t1
+                           Filter: (c = 0)
+         ->  Hash Join
+               Hash Cond: (((t2_1.b + t2_1.a) / 2) = ((t1_1.a + t1_1.b) / 2))
+               ->  Seq Scan on prt2_e_p2 t2_1
+               ->  Hash
+                     ->  Seq Scan on prt1_e_p2 t1_1
+                           Filter: (c = 0)
+         ->  Hash Join
+               Hash Cond: (((t2_2.b + t2_2.a) / 2) = ((t1_2.a + t1_2.b) / 2))
+               ->  Seq Scan on prt2_e_p3 t2_2
+               ->  Hash
+                     ->  Seq Scan on prt1_e_p3 t1_2
+                           Filter: (c = 0)
+(21 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_e t1, prt2_e t2 WHERE (t1.a + t1.b)/2 = (t2.b + t2.a)/2 AND t1.c = 0 ORDER BY t1.a, t2.b;
+  a  | c |  b  | c 
+-----+---+-----+---
+   0 | 0 |   0 | 0
+ 150 | 0 | 150 | 0
+ 300 | 0 | 300 | 0
+ 450 | 0 | 450 | 0
+(4 rows)
+
+--
+-- N-way join
+--
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM prt1 t1, prt2 t2, prt1_e t3 WHERE t1.a = t2.b AND t1.a = (t3.a + t3.b)/2 AND t1.b = 0 ORDER BY t1.a, t2.b;
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a
+   ->  Result
+         ->  Append
+               ->  Nested Loop
+                     Join Filter: (t1.a = ((t3.a + t3.b) / 2))
+                     ->  Hash Join
+                           Hash Cond: (t2.b = t1.a)
+                           ->  Seq Scan on prt2_p1 t2
+                           ->  Hash
+                                 ->  Seq Scan on prt1_p1 t1
+                                       Filter: (b = 0)
+                     ->  Index Scan using iprt1_e_p1_ab2 on prt1_e_p1 t3
+                           Index Cond: (((a + b) / 2) = t2.b)
+               ->  Nested Loop
+                     Join Filter: (t1_1.a = ((t3_1.a + t3_1.b) / 2))
+                     ->  Hash Join
+                           Hash Cond: (t2_1.b = t1_1.a)
+                           ->  Seq Scan on prt2_p2 t2_1
+                           ->  Hash
+                                 ->  Seq Scan on prt1_p2 t1_1
+                                       Filter: (b = 0)
+                     ->  Index Scan using iprt1_e_p2_ab2 on prt1_e_p2 t3_1
+                           Index Cond: (((a + b) / 2) = t2_1.b)
+               ->  Nested Loop
+                     Join Filter: (t1_2.a = ((t3_2.a + t3_2.b) / 2))
+                     ->  Hash Join
+                           Hash Cond: (t2_2.b = t1_2.a)
+                           ->  Seq Scan on prt2_p3 t2_2
+                           ->  Hash
+                                 ->  Seq Scan on prt1_p3 t1_2
+                                       Filter: (b = 0)
+                     ->  Index Scan using iprt1_e_p3_ab2 on prt1_e_p3 t3_2
+                           Index Cond: (((a + b) / 2) = t2_2.b)
+(34 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM prt1 t1, prt2 t2, prt1_e t3 WHERE t1.a = t2.b AND t1.a = (t3.a + t3.b)/2 AND t1.b = 0 ORDER BY t1.a, t2.b;
+  a  |  c   |  b  |  c   | ?column? | c 
+-----+------+-----+------+----------+---
+   0 | 0000 |   0 | 0000 |        0 | 0
+ 150 | 0150 | 150 | 0150 |      300 | 0
+ 300 | 0300 | 300 | 0300 |      600 | 0
+ 450 | 0450 | 450 | 0450 |      900 | 0
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) LEFT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.b = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a, t2.b, ((t3.a + t3.b))
+   ->  Result
+         ->  Append
+               ->  Hash Right Join
+                     Hash Cond: (((t3.a + t3.b) / 2) = t1.a)
+                     ->  Seq Scan on prt1_e_p1 t3
+                     ->  Hash
+                           ->  Hash Right Join
+                                 Hash Cond: (t2.b = t1.a)
+                                 ->  Seq Scan on prt2_p1 t2
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_p1 t1
+                                             Filter: (b = 0)
+               ->  Hash Right Join
+                     Hash Cond: (((t3_1.a + t3_1.b) / 2) = t1_1.a)
+                     ->  Seq Scan on prt1_e_p2 t3_1
+                     ->  Hash
+                           ->  Hash Right Join
+                                 Hash Cond: (t2_1.b = t1_1.a)
+                                 ->  Seq Scan on prt2_p2 t2_1
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_p2 t1_1
+                                             Filter: (b = 0)
+               ->  Hash Right Join
+                     Hash Cond: (((t3_2.a + t3_2.b) / 2) = t1_2.a)
+                     ->  Seq Scan on prt1_e_p3 t3_2
+                     ->  Hash
+                           ->  Hash Right Join
+                                 Hash Cond: (t2_2.b = t1_2.a)
+                                 ->  Seq Scan on prt2_p3 t2_2
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_p3 t1_2
+                                             Filter: (b = 0)
+(34 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) LEFT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.b = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+  a  |  c   |  b  |  c   | ?column? | c 
+-----+------+-----+------+----------+---
+   0 | 0000 |   0 | 0000 |        0 | 0
+  50 | 0050 |     |      |      100 | 0
+ 100 | 0100 |     |      |      200 | 0
+ 150 | 0150 | 150 | 0150 |      300 | 0
+ 200 | 0200 |     |      |      400 | 0
+ 250 | 0250 |     |      |      500 | 0
+ 300 | 0300 | 300 | 0300 |      600 | 0
+ 350 | 0350 |     |      |      700 | 0
+ 400 | 0400 |     |      |      800 | 0
+ 450 | 0450 | 450 | 0450 |      900 | 0
+ 500 | 0500 |     |      |     1000 | 0
+ 550 | 0550 |     |      |     1100 | 0
+(12 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a, t2.b, ((t3.a + t3.b))
+   ->  Result
+         ->  Append
+               ->  Nested Loop Left Join
+                     ->  Hash Right Join
+                           Hash Cond: (t1.a = ((t3.a + t3.b) / 2))
+                           ->  Seq Scan on prt1_p1 t1
+                           ->  Hash
+                                 ->  Seq Scan on prt1_e_p1 t3
+                                       Filter: (c = 0)
+                     ->  Index Scan using iprt2_p1_b on prt2_p1 t2
+                           Index Cond: (t1.a = b)
+               ->  Nested Loop Left Join
+                     ->  Hash Right Join
+                           Hash Cond: (t1_1.a = ((t3_1.a + t3_1.b) / 2))
+                           ->  Seq Scan on prt1_p2 t1_1
+                           ->  Hash
+                                 ->  Seq Scan on prt1_e_p2 t3_1
+                                       Filter: (c = 0)
+                     ->  Index Scan using iprt2_p2_b on prt2_p2 t2_1
+                           Index Cond: (t1_1.a = b)
+               ->  Nested Loop Left Join
+                     ->  Hash Right Join
+                           Hash Cond: (t1_2.a = ((t3_2.a + t3_2.b) / 2))
+                           ->  Seq Scan on prt1_p3 t1_2
+                           ->  Hash
+                                 ->  Seq Scan on prt1_e_p3 t3_2
+                                       Filter: (c = 0)
+                     ->  Index Scan using iprt2_p3_b on prt2_p3 t2_2
+                           Index Cond: (t1_2.a = b)
+(31 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+  a  |  c   |  b  |  c   | ?column? | c 
+-----+------+-----+------+----------+---
+   0 | 0000 |   0 | 0000 |        0 | 0
+  50 | 0050 |     |      |      100 | 0
+ 100 | 0100 |     |      |      200 | 0
+ 150 | 0150 | 150 | 0150 |      300 | 0
+ 200 | 0200 |     |      |      400 | 0
+ 250 | 0250 |     |      |      500 | 0
+ 300 | 0300 | 300 | 0300 |      600 | 0
+ 350 | 0350 |     |      |      700 | 0
+ 400 | 0400 |     |      |      800 | 0
+ 450 | 0450 | 450 | 0450 |      900 | 0
+ 500 | 0500 |     |      |     1000 | 0
+ 550 | 0550 |     |      |     1100 | 0
+(12 rows)
+
+-- Cases with non-nullable expressions in subquery results;
+-- make sure these go to null as expected
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.phv, t2.b, t2.phv, t3.a + t3.b, t3.phv FROM ((SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b)) FULL JOIN (SELECT 50 phv, * FROM prt1_e WHERE prt1_e.c = 0) t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.a = t1.phv OR t2.b = t2.phv OR (t3.a + t3.b)/2 = t3.phv ORDER BY t1.a, t2.b, t3.a + t3.b;
+                                                      QUERY PLAN                                                      
+----------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: prt1_p1.a, prt2_p1.b, ((prt1_e_p1.a + prt1_e_p1.b))
+   ->  Result
+         ->  Append
+               ->  Hash Full Join
+                     Hash Cond: (prt1_p1.a = ((prt1_e_p1.a + prt1_e_p1.b) / 2))
+                     Filter: ((prt1_p1.a = (50)) OR (prt2_p1.b = (75)) OR (((prt1_e_p1.a + prt1_e_p1.b) / 2) = (50)))
+                     ->  Hash Full Join
+                           Hash Cond: (prt1_p1.a = prt2_p1.b)
+                           ->  Seq Scan on prt1_p1
+                                 Filter: (b = 0)
+                           ->  Hash
+                                 ->  Seq Scan on prt2_p1
+                                       Filter: (a = 0)
+                     ->  Hash
+                           ->  Seq Scan on prt1_e_p1
+                                 Filter: (c = 0)
+               ->  Hash Full Join
+                     Hash Cond: (prt1_p2.a = ((prt1_e_p2.a + prt1_e_p2.b) / 2))
+                     Filter: ((prt1_p2.a = (50)) OR (prt2_p2.b = (75)) OR (((prt1_e_p2.a + prt1_e_p2.b) / 2) = (50)))
+                     ->  Hash Full Join
+                           Hash Cond: (prt1_p2.a = prt2_p2.b)
+                           ->  Seq Scan on prt1_p2
+                                 Filter: (b = 0)
+                           ->  Hash
+                                 ->  Seq Scan on prt2_p2
+                                       Filter: (a = 0)
+                     ->  Hash
+                           ->  Seq Scan on prt1_e_p2
+                                 Filter: (c = 0)
+               ->  Hash Full Join
+                     Hash Cond: (prt1_p3.a = ((prt1_e_p3.a + prt1_e_p3.b) / 2))
+                     Filter: ((prt1_p3.a = (50)) OR (prt2_p3.b = (75)) OR (((prt1_e_p3.a + prt1_e_p3.b) / 2) = (50)))
+                     ->  Hash Full Join
+                           Hash Cond: (prt1_p3.a = prt2_p3.b)
+                           ->  Seq Scan on prt1_p3
+                                 Filter: (b = 0)
+                           ->  Hash
+                                 ->  Seq Scan on prt2_p3
+                                       Filter: (a = 0)
+                     ->  Hash
+                           ->  Seq Scan on prt1_e_p3
+                                 Filter: (c = 0)
+(43 rows)
+
+SELECT t1.a, t1.phv, t2.b, t2.phv, t3.a + t3.b, t3.phv FROM ((SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b)) FULL JOIN (SELECT 50 phv, * FROM prt1_e WHERE prt1_e.c = 0) t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.a = t1.phv OR t2.b = t2.phv OR (t3.a + t3.b)/2 = t3.phv ORDER BY t1.a, t2.b, t3.a + t3.b;
+ a  | phv | b  | phv | ?column? | phv 
+----+-----+----+-----+----------+-----
+ 50 |  50 |    |     |      100 |  50
+    |     | 75 |  75 |          |    
+(2 rows)
+
+-- Semi-join
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a
+   ->  Append
+         ->  Nested Loop
+               Join Filter: (t1.a = t1_3.b)
+               ->  HashAggregate
+                     Group Key: t1_3.b
+                     ->  Hash Join
+                           Hash Cond: (((t2.a + t2.b) / 2) = t1_3.b)
+                           ->  Seq Scan on prt1_e_p1 t2
+                           ->  Hash
+                                 ->  Seq Scan on prt2_p1 t1_3
+                                       Filter: (a = 0)
+               ->  Index Scan using iprt1_p1_a on prt1_p1 t1
+                     Index Cond: (a = ((t2.a + t2.b) / 2))
+                     Filter: (b = 0)
+         ->  Nested Loop
+               Join Filter: (t1_1.a = t1_4.b)
+               ->  HashAggregate
+                     Group Key: t1_4.b
+                     ->  Hash Join
+                           Hash Cond: (((t2_1.a + t2_1.b) / 2) = t1_4.b)
+                           ->  Seq Scan on prt1_e_p2 t2_1
+                           ->  Hash
+                                 ->  Seq Scan on prt2_p2 t1_4
+                                       Filter: (a = 0)
+               ->  Index Scan using iprt1_p2_a on prt1_p2 t1_1
+                     Index Cond: (a = ((t2_1.a + t2_1.b) / 2))
+                     Filter: (b = 0)
+         ->  Nested Loop
+               Join Filter: (t1_2.a = t1_5.b)
+               ->  HashAggregate
+                     Group Key: t1_5.b
+                     ->  Nested Loop
+                           ->  Seq Scan on prt2_p3 t1_5
+                                 Filter: (a = 0)
+                           ->  Index Scan using iprt1_e_p3_ab2 on prt1_e_p3 t2_2
+                                 Index Cond: (((a + b) / 2) = t1_5.b)
+               ->  Index Scan using iprt1_p3_a on prt1_p3 t1_2
+                     Index Cond: (a = ((t2_2.a + t2_2.b) / 2))
+                     Filter: (b = 0)
+(41 rows)
+
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
+  a  | b |  c   
+-----+---+------
+   0 | 0 | 0000
+ 150 | 0 | 0150
+ 300 | 0 | 0300
+ 450 | 0 | 0450
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a
+   ->  Append
+         ->  Nested Loop
+               ->  HashAggregate
+                     Group Key: t1_3.b
+                     ->  Hash Semi Join
+                           Hash Cond: (t1_3.b = ((t1_6.a + t1_6.b) / 2))
+                           ->  Seq Scan on prt2_p1 t1_3
+                           ->  Hash
+                                 ->  Seq Scan on prt1_e_p1 t1_6
+                                       Filter: (c = 0)
+               ->  Index Scan using iprt1_p1_a on prt1_p1 t1
+                     Index Cond: (a = t1_3.b)
+                     Filter: (b = 0)
+         ->  Nested Loop
+               ->  HashAggregate
+                     Group Key: t1_4.b
+                     ->  Hash Semi Join
+                           Hash Cond: (t1_4.b = ((t1_7.a + t1_7.b) / 2))
+                           ->  Seq Scan on prt2_p2 t1_4
+                           ->  Hash
+                                 ->  Seq Scan on prt1_e_p2 t1_7
+                                       Filter: (c = 0)
+               ->  Index Scan using iprt1_p2_a on prt1_p2 t1_1
+                     Index Cond: (a = t1_4.b)
+                     Filter: (b = 0)
+         ->  Nested Loop
+               ->  Unique
+                     ->  Sort
+                           Sort Key: t1_5.b
+                           ->  Hash Semi Join
+                                 Hash Cond: (t1_5.b = ((t1_8.a + t1_8.b) / 2))
+                                 ->  Seq Scan on prt2_p3 t1_5
+                                 ->  Hash
+                                       ->  Seq Scan on prt1_e_p3 t1_8
+                                             Filter: (c = 0)
+               ->  Index Scan using iprt1_p3_a on prt1_p3 t1_2
+                     Index Cond: (a = t1_5.b)
+                     Filter: (b = 0)
+(40 rows)
+
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+  a  | b |  c   
+-----+---+------
+   0 | 0 | 0000
+ 150 | 0 | 0150
+ 300 | 0 | 0300
+ 450 | 0 | 0450
+(4 rows)
+
+-- test merge joins
+SET enable_hashjoin TO off;
+SET enable_nestloop TO off;
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Merge Append
+   Sort Key: t1.a
+   ->  Merge Semi Join
+         Merge Cond: (t1.a = t1_3.b)
+         ->  Sort
+               Sort Key: t1.a
+               ->  Seq Scan on prt1_p1 t1
+                     Filter: (b = 0)
+         ->  Merge Semi Join
+               Merge Cond: (t1_3.b = (((t1_6.a + t1_6.b) / 2)))
+               ->  Sort
+                     Sort Key: t1_3.b
+                     ->  Seq Scan on prt2_p1 t1_3
+               ->  Sort
+                     Sort Key: (((t1_6.a + t1_6.b) / 2))
+                     ->  Seq Scan on prt1_e_p1 t1_6
+                           Filter: (c = 0)
+   ->  Merge Semi Join
+         Merge Cond: (t1_1.a = t1_4.b)
+         ->  Sort
+               Sort Key: t1_1.a
+               ->  Seq Scan on prt1_p2 t1_1
+                     Filter: (b = 0)
+         ->  Merge Semi Join
+               Merge Cond: (t1_4.b = (((t1_7.a + t1_7.b) / 2)))
+               ->  Sort
+                     Sort Key: t1_4.b
+                     ->  Seq Scan on prt2_p2 t1_4
+               ->  Sort
+                     Sort Key: (((t1_7.a + t1_7.b) / 2))
+                     ->  Seq Scan on prt1_e_p2 t1_7
+                           Filter: (c = 0)
+   ->  Merge Semi Join
+         Merge Cond: (t1_2.a = t1_5.b)
+         ->  Sort
+               Sort Key: t1_2.a
+               ->  Seq Scan on prt1_p3 t1_2
+                     Filter: (b = 0)
+         ->  Merge Semi Join
+               Merge Cond: (t1_5.b = (((t1_8.a + t1_8.b) / 2)))
+               ->  Sort
+                     Sort Key: t1_5.b
+                     ->  Seq Scan on prt2_p3 t1_5
+               ->  Sort
+                     Sort Key: (((t1_8.a + t1_8.b) / 2))
+                     ->  Seq Scan on prt1_e_p3 t1_8
+                           Filter: (c = 0)
+(47 rows)
+
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+  a  | b |  c   
+-----+---+------
+   0 | 0 | 0000
+ 150 | 0 | 0150
+ 300 | 0 | 0300
+ 450 | 0 | 0450
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a, t2.b, ((t3.a + t3.b))
+   ->  Result
+         ->  Append
+               ->  Merge Left Join
+                     Merge Cond: (t1.a = t2.b)
+                     ->  Sort
+                           Sort Key: t1.a
+                           ->  Merge Left Join
+                                 Merge Cond: ((((t3.a + t3.b) / 2)) = t1.a)
+                                 ->  Sort
+                                       Sort Key: (((t3.a + t3.b) / 2))
+                                       ->  Seq Scan on prt1_e_p1 t3
+                                             Filter: (c = 0)
+                                 ->  Sort
+                                       Sort Key: t1.a
+                                       ->  Seq Scan on prt1_p1 t1
+                     ->  Sort
+                           Sort Key: t2.b
+                           ->  Seq Scan on prt2_p1 t2
+               ->  Merge Left Join
+                     Merge Cond: (t1_1.a = t2_1.b)
+                     ->  Sort
+                           Sort Key: t1_1.a
+                           ->  Merge Left Join
+                                 Merge Cond: ((((t3_1.a + t3_1.b) / 2)) = t1_1.a)
+                                 ->  Sort
+                                       Sort Key: (((t3_1.a + t3_1.b) / 2))
+                                       ->  Seq Scan on prt1_e_p2 t3_1
+                                             Filter: (c = 0)
+                                 ->  Sort
+                                       Sort Key: t1_1.a
+                                       ->  Seq Scan on prt1_p2 t1_1
+                     ->  Sort
+                           Sort Key: t2_1.b
+                           ->  Seq Scan on prt2_p2 t2_1
+               ->  Merge Left Join
+                     Merge Cond: (t1_2.a = t2_2.b)
+                     ->  Sort
+                           Sort Key: t1_2.a
+                           ->  Merge Left Join
+                                 Merge Cond: ((((t3_2.a + t3_2.b) / 2)) = t1_2.a)
+                                 ->  Sort
+                                       Sort Key: (((t3_2.a + t3_2.b) / 2))
+                                       ->  Seq Scan on prt1_e_p3 t3_2
+                                             Filter: (c = 0)
+                                 ->  Sort
+                                       Sort Key: t1_2.a
+                                       ->  Seq Scan on prt1_p3 t1_2
+                     ->  Sort
+                           Sort Key: t2_2.b
+                           ->  Seq Scan on prt2_p3 t2_2
+(52 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+  a  |  c   |  b  |  c   | ?column? | c 
+-----+------+-----+------+----------+---
+   0 | 0000 |   0 | 0000 |        0 | 0
+  50 | 0050 |     |      |      100 | 0
+ 100 | 0100 |     |      |      200 | 0
+ 150 | 0150 | 150 | 0150 |      300 | 0
+ 200 | 0200 |     |      |      400 | 0
+ 250 | 0250 |     |      |      500 | 0
+ 300 | 0300 | 300 | 0300 |      600 | 0
+ 350 | 0350 |     |      |      700 | 0
+ 400 | 0400 |     |      |      800 | 0
+ 450 | 0450 | 450 | 0450 |      900 | 0
+ 500 | 0500 |     |      |     1000 | 0
+ 550 | 0550 |     |      |     1100 | 0
+(12 rows)
+
+-- MergeAppend on nullable column
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ Sort
+   Sort Key: prt1_p1.a, b
+   ->  Append
+         ->  Merge Left Join
+               Merge Cond: (prt1_p1.a = b)
+               ->  Sort
+                     Sort Key: prt1_p1.a
+                     ->  Seq Scan on prt1_p1
+                           Filter: ((a < 450) AND (b = 0))
+               ->  Sort
+                     Sort Key: b
+                     ->  Result
+                           One-Time Filter: false
+         ->  Merge Left Join
+               Merge Cond: (prt1_p2.a = prt2_p2.b)
+               ->  Sort
+                     Sort Key: prt1_p2.a
+                     ->  Seq Scan on prt1_p2
+                           Filter: ((a < 450) AND (b = 0))
+               ->  Sort
+                     Sort Key: prt2_p2.b
+                     ->  Seq Scan on prt2_p2
+                           Filter: (b > 250)
+(23 rows)
+
+SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+  a  |  b  
+-----+-----
+   0 |    
+  50 |    
+ 100 |    
+ 150 |    
+ 200 |    
+ 250 |    
+ 300 | 300
+ 350 |    
+ 400 |    
+(9 rows)
+
+RESET enable_hashjoin;
+RESET enable_nestloop;
+--
+-- partitioned by multiple columns
+--
+CREATE TABLE prt1_m (a int, b int, c int) PARTITION BY RANGE(a, ((a + b)/2));
+CREATE TABLE prt1_m_p1 PARTITION OF prt1_m FOR VALUES FROM (0, 0) TO (250, 250);
+CREATE TABLE prt1_m_p2 PARTITION OF prt1_m FOR VALUES FROM (250, 250) TO (500, 500);
+CREATE TABLE prt1_m_p3 PARTITION OF prt1_m FOR VALUES FROM (500, 500) TO (600, 600);
+INSERT INTO prt1_m SELECT i, i, i % 25 FROM generate_series(0, 599, 2) i;
+ANALYZE prt1_m;
+CREATE TABLE prt2_m (a int, b int, c int) PARTITION BY RANGE(((b + a)/2), b);
+CREATE TABLE prt2_m_p1 PARTITION OF prt2_m FOR VALUES FROM (0, 0) TO (250, 250);
+CREATE TABLE prt2_m_p2 PARTITION OF prt2_m FOR VALUES FROM (250, 250) TO (500, 500);
+CREATE TABLE prt2_m_p3 PARTITION OF prt2_m FOR VALUES FROM (500, 500) TO (600, 600);
+INSERT INTO prt2_m SELECT i, i, i % 25 FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_m;
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_m WHERE prt1_m.c = 0) t1 FULL JOIN (SELECT * FROM prt2_m WHERE prt2_m.c = 0) t2 ON (t1.a = (t2.b + t2.a)/2 AND t2.b = (t1.a + t1.b)/2) ORDER BY t1.a, t2.b;
+                                                             QUERY PLAN                                                             
+------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: prt1_m_p1.a, prt2_m_p1.b
+   ->  Append
+         ->  Hash Full Join
+               Hash Cond: ((prt1_m_p1.a = ((prt2_m_p1.b + prt2_m_p1.a) / 2)) AND (((prt1_m_p1.a + prt1_m_p1.b) / 2) = prt2_m_p1.b))
+               ->  Seq Scan on prt1_m_p1
+                     Filter: (c = 0)
+               ->  Hash
+                     ->  Seq Scan on prt2_m_p1
+                           Filter: (c = 0)
+         ->  Hash Full Join
+               Hash Cond: ((prt1_m_p2.a = ((prt2_m_p2.b + prt2_m_p2.a) / 2)) AND (((prt1_m_p2.a + prt1_m_p2.b) / 2) = prt2_m_p2.b))
+               ->  Seq Scan on prt1_m_p2
+                     Filter: (c = 0)
+               ->  Hash
+                     ->  Seq Scan on prt2_m_p2
+                           Filter: (c = 0)
+         ->  Hash Full Join
+               Hash Cond: ((prt1_m_p3.a = ((prt2_m_p3.b + prt2_m_p3.a) / 2)) AND (((prt1_m_p3.a + prt1_m_p3.b) / 2) = prt2_m_p3.b))
+               ->  Seq Scan on prt1_m_p3
+                     Filter: (c = 0)
+               ->  Hash
+                     ->  Seq Scan on prt2_m_p3
+                           Filter: (c = 0)
+(24 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_m WHERE prt1_m.c = 0) t1 FULL JOIN (SELECT * FROM prt2_m WHERE prt2_m.c = 0) t2 ON (t1.a = (t2.b + t2.a)/2 AND t2.b = (t1.a + t1.b)/2) ORDER BY t1.a, t2.b;
+  a  | c |  b  | c 
+-----+---+-----+---
+   0 | 0 |   0 | 0
+  50 | 0 |     |  
+ 100 | 0 |     |  
+ 150 | 0 | 150 | 0
+ 200 | 0 |     |  
+ 250 | 0 |     |  
+ 300 | 0 | 300 | 0
+ 350 | 0 |     |  
+ 400 | 0 |     |  
+ 450 | 0 | 450 | 0
+ 500 | 0 |     |  
+ 550 | 0 |     |  
+     |   |  75 | 0
+     |   | 225 | 0
+     |   | 375 | 0
+     |   | 525 | 0
+(16 rows)
+
+--
+-- tests for list partitioned tables.
+--
+CREATE TABLE plt1 (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE plt1_p1 PARTITION OF plt1 FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt1_p2 PARTITION OF plt1 FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt1_p3 PARTITION OF plt1 FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE plt1;
+CREATE TABLE plt2 (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE plt2_p1 PARTITION OF plt2 FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt2_p2 PARTITION OF plt2 FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt2_p3 PARTITION OF plt2 FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 3) i;
+ANALYZE plt2;
+--
+-- list partitioned by expression
+--
+CREATE TABLE plt1_e (a int, b int, c text) PARTITION BY LIST(ltrim(c, 'A'));
+CREATE TABLE plt1_e_p1 PARTITION OF plt1_e FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt1_e_p2 PARTITION OF plt1_e FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt1_e_p3 PARTITION OF plt1_e FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt1_e SELECT i, i, 'A' || to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE plt1_e;
+-- test partition matching with N-way join
+EXPLAIN (COSTS OFF)
+SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, plt2 t2, plt1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c;
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.c, t3.c
+   ->  HashAggregate
+         Group Key: t1.c, t2.c, t3.c
+         ->  Result
+               ->  Append
+                     ->  Hash Join
+                           Hash Cond: (t1.c = t2.c)
+                           ->  Seq Scan on plt1_p1 t1
+                           ->  Hash
+                                 ->  Hash Join
+                                       Hash Cond: (t2.c = ltrim(t3.c, 'A'::text))
+                                       ->  Seq Scan on plt2_p1 t2
+                                       ->  Hash
+                                             ->  Seq Scan on plt1_e_p1 t3
+                     ->  Hash Join
+                           Hash Cond: (t1_1.c = t2_1.c)
+                           ->  Seq Scan on plt1_p2 t1_1
+                           ->  Hash
+                                 ->  Hash Join
+                                       Hash Cond: (t2_1.c = ltrim(t3_1.c, 'A'::text))
+                                       ->  Seq Scan on plt2_p2 t2_1
+                                       ->  Hash
+                                             ->  Seq Scan on plt1_e_p2 t3_1
+                     ->  Hash Join
+                           Hash Cond: (t1_2.c = t2_2.c)
+                           ->  Seq Scan on plt1_p3 t1_2
+                           ->  Hash
+                                 ->  Hash Join
+                                       Hash Cond: (t2_2.c = ltrim(t3_2.c, 'A'::text))
+                                       ->  Seq Scan on plt2_p3 t2_2
+                                       ->  Hash
+                                             ->  Seq Scan on plt1_e_p3 t3_2
+(33 rows)
+
+SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, plt2 t2, plt1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c;
+         avg          |         avg          |          avg          |  c   |  c   |   c   
+----------------------+----------------------+-----------------------+------+------+-------
+  24.0000000000000000 |  24.0000000000000000 |   48.0000000000000000 | 0000 | 0000 | A0000
+  74.0000000000000000 |  75.0000000000000000 |  148.0000000000000000 | 0001 | 0001 | A0001
+ 124.0000000000000000 | 124.5000000000000000 |  248.0000000000000000 | 0002 | 0002 | A0002
+ 174.0000000000000000 | 174.0000000000000000 |  348.0000000000000000 | 0003 | 0003 | A0003
+ 224.0000000000000000 | 225.0000000000000000 |  448.0000000000000000 | 0004 | 0004 | A0004
+ 274.0000000000000000 | 274.5000000000000000 |  548.0000000000000000 | 0005 | 0005 | A0005
+ 324.0000000000000000 | 324.0000000000000000 |  648.0000000000000000 | 0006 | 0006 | A0006
+ 374.0000000000000000 | 375.0000000000000000 |  748.0000000000000000 | 0007 | 0007 | A0007
+ 424.0000000000000000 | 424.5000000000000000 |  848.0000000000000000 | 0008 | 0008 | A0008
+ 474.0000000000000000 | 474.0000000000000000 |  948.0000000000000000 | 0009 | 0009 | A0009
+ 524.0000000000000000 | 525.0000000000000000 | 1048.0000000000000000 | 0010 | 0010 | A0010
+ 574.0000000000000000 | 574.5000000000000000 | 1148.0000000000000000 | 0011 | 0011 | A0011
+(12 rows)
+
+-- joins where one of the relations is proven empty
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a = 1 AND t1.a = 2;
+        QUERY PLAN        
+--------------------------
+ Result
+   One-Time Filter: false
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 LEFT JOIN prt2 t2 ON t1.a = t2.b;
+        QUERY PLAN        
+--------------------------
+ Result
+   One-Time Filter: false
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+                 QUERY PLAN                 
+--------------------------------------------
+ Sort
+   Sort Key: a, t2.b
+   ->  Hash Left Join
+         Hash Cond: (t2.b = a)
+         ->  Append
+               ->  Seq Scan on prt2_p1 t2
+                     Filter: (a = 0)
+               ->  Seq Scan on prt2_p2 t2_1
+                     Filter: (a = 0)
+               ->  Seq Scan on prt2_p3 t2_2
+                     Filter: (a = 0)
+         ->  Hash
+               ->  Result
+                     One-Time Filter: false
+(14 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 FULL JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+                 QUERY PLAN                 
+--------------------------------------------
+ Sort
+   Sort Key: a, t2.b
+   ->  Hash Left Join
+         Hash Cond: (t2.b = a)
+         ->  Append
+               ->  Seq Scan on prt2_p1 t2
+                     Filter: (a = 0)
+               ->  Seq Scan on prt2_p2 t2_1
+                     Filter: (a = 0)
+               ->  Seq Scan on prt2_p3 t2_2
+                     Filter: (a = 0)
+         ->  Hash
+               ->  Result
+                     One-Time Filter: false
+(14 rows)
+
+--
+-- multiple levels of partitioning 
+--
+CREATE TABLE prt1_l (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE prt1_l_p1 PARTITION OF prt1_l FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_l_p2 PARTITION OF prt1_l FOR VALUES FROM (250) TO (500) PARTITION BY LIST (c);
+CREATE TABLE prt1_l_p2_p1 PARTITION OF prt1_l_p2 FOR VALUES IN ('0000', '0001');
+CREATE TABLE prt1_l_p2_p2 PARTITION OF prt1_l_p2 FOR VALUES IN ('0002', '0003');
+CREATE TABLE prt1_l_p3 PARTITION OF prt1_l FOR VALUES FROM (500) TO (600) PARTITION BY RANGE (b);
+CREATE TABLE prt1_l_p3_p1 PARTITION OF prt1_l_p3 FOR VALUES FROM (0) TO (13);
+CREATE TABLE prt1_l_p3_p2 PARTITION OF prt1_l_p3 FOR VALUES FROM (13) TO (25);
+INSERT INTO prt1_l SELECT i, i % 25, to_char(i % 4, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt1_l;
+CREATE TABLE prt2_l (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE prt2_l_p1 PARTITION OF prt2_l FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_l_p2 PARTITION OF prt2_l FOR VALUES FROM (250) TO (500) PARTITION BY LIST (c);
+CREATE TABLE prt2_l_p2_p1 PARTITION OF prt2_l_p2 FOR VALUES IN ('0000', '0001');
+CREATE TABLE prt2_l_p2_p2 PARTITION OF prt2_l_p2 FOR VALUES IN ('0002', '0003');
+CREATE TABLE prt2_l_p3 PARTITION OF prt2_l FOR VALUES FROM (500) TO (600) PARTITION BY RANGE (a);
+CREATE TABLE prt2_l_p3_p1 PARTITION OF prt2_l_p3 FOR VALUES FROM (0) TO (13);
+CREATE TABLE prt2_l_p3_p2 PARTITION OF prt2_l_p3 FOR VALUES FROM (13) TO (25);
+INSERT INTO prt2_l SELECT i % 25, i, to_char(i % 4, 'FM0000') FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_l;
+-- inner join, qual covering only top-level partitions
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1, prt2_l t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Sort
+   Sort Key: t1.a
+   ->  Append
+         ->  Hash Join
+               Hash Cond: (t2.b = t1.a)
+               ->  Seq Scan on prt2_l_p1 t2
+               ->  Hash
+                     ->  Seq Scan on prt1_l_p1 t1
+                           Filter: (b = 0)
+         ->  Hash Join
+               Hash Cond: (t2_1.b = t1_1.a)
+               ->  Append
+                     ->  Seq Scan on prt2_l_p2_p1 t2_1
+                     ->  Seq Scan on prt2_l_p2_p2 t2_2
+               ->  Hash
+                     ->  Append
+                           ->  Seq Scan on prt1_l_p2_p1 t1_1
+                                 Filter: (b = 0)
+                           ->  Seq Scan on prt1_l_p2_p2 t1_2
+                                 Filter: (b = 0)
+         ->  Hash Join
+               Hash Cond: (t2_3.b = t1_3.a)
+               ->  Append
+                     ->  Seq Scan on prt2_l_p3_p1 t2_3
+                     ->  Seq Scan on prt2_l_p3_p2 t2_4
+               ->  Hash
+                     ->  Append
+                           ->  Seq Scan on prt1_l_p3_p1 t1_3
+                                 Filter: (b = 0)
+(29 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1, prt2_l t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+  a  |  c   |  b  |  c   
+-----+------+-----+------
+   0 | 0000 |   0 | 0000
+ 150 | 0002 | 150 | 0002
+ 300 | 0000 | 300 | 0000
+ 450 | 0002 | 450 | 0002
+(4 rows)
+
+-- left join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 LEFT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+                                     QUERY PLAN                                     
+------------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a, t2.b
+   ->  Append
+         ->  Hash Right Join
+               Hash Cond: ((t2.b = t1.a) AND ((t2.c)::text = (t1.c)::text))
+               ->  Seq Scan on prt2_l_p1 t2
+               ->  Hash
+                     ->  Seq Scan on prt1_l_p1 t1
+                           Filter: (b = 0)
+         ->  Hash Right Join
+               Hash Cond: ((t2_1.b = t1_1.a) AND ((t2_1.c)::text = (t1_1.c)::text))
+               ->  Seq Scan on prt2_l_p2_p1 t2_1
+               ->  Hash
+                     ->  Seq Scan on prt1_l_p2_p1 t1_1
+                           Filter: (b = 0)
+         ->  Hash Right Join
+               Hash Cond: ((t2_2.b = t1_2.a) AND ((t2_2.c)::text = (t1_2.c)::text))
+               ->  Seq Scan on prt2_l_p2_p2 t2_2
+               ->  Hash
+                     ->  Seq Scan on prt1_l_p2_p2 t1_2
+                           Filter: (b = 0)
+         ->  Hash Right Join
+               Hash Cond: ((t2_3.b = t1_3.a) AND ((t2_3.c)::text = (t1_3.c)::text))
+               ->  Append
+                     ->  Seq Scan on prt2_l_p3_p1 t2_3
+                     ->  Seq Scan on prt2_l_p3_p2 t2_4
+               ->  Hash
+                     ->  Append
+                           ->  Seq Scan on prt1_l_p3_p1 t1_3
+                                 Filter: (b = 0)
+(30 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 LEFT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+  a  |  c   |  b  |  c   
+-----+------+-----+------
+   0 | 0000 |   0 | 0000
+  50 | 0002 |     | 
+ 100 | 0000 |     | 
+ 150 | 0002 | 150 | 0002
+ 200 | 0000 |     | 
+ 250 | 0002 |     | 
+ 300 | 0000 | 300 | 0000
+ 350 | 0002 |     | 
+ 400 | 0000 |     | 
+ 450 | 0002 | 450 | 0002
+ 500 | 0000 |     | 
+ 550 | 0002 |     | 
+(12 rows)
+
+-- right join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+                                        QUERY PLAN                                        
+------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a, t2.b
+   ->  Result
+         ->  Append
+               ->  Hash Right Join
+                     Hash Cond: ((t1.a = t2.b) AND ((t1.c)::text = (t2.c)::text))
+                     ->  Seq Scan on prt1_l_p1 t1
+                     ->  Hash
+                           ->  Seq Scan on prt2_l_p1 t2
+                                 Filter: (a = 0)
+               ->  Hash Right Join
+                     Hash Cond: ((t1_1.a = t2_1.b) AND ((t1_1.c)::text = (t2_1.c)::text))
+                     ->  Seq Scan on prt1_l_p2_p1 t1_1
+                     ->  Hash
+                           ->  Seq Scan on prt2_l_p2_p1 t2_1
+                                 Filter: (a = 0)
+               ->  Hash Right Join
+                     Hash Cond: ((t1_2.a = t2_2.b) AND ((t1_2.c)::text = (t2_2.c)::text))
+                     ->  Seq Scan on prt1_l_p2_p2 t1_2
+                     ->  Hash
+                           ->  Seq Scan on prt2_l_p2_p2 t2_2
+                                 Filter: (a = 0)
+               ->  Hash Right Join
+                     Hash Cond: ((t1_3.a = t2_3.b) AND ((t1_3.c)::text = (t2_3.c)::text))
+                     ->  Append
+                           ->  Seq Scan on prt1_l_p3_p1 t1_3
+                           ->  Seq Scan on prt1_l_p3_p2 t1_4
+                     ->  Hash
+                           ->  Append
+                                 ->  Seq Scan on prt2_l_p3_p1 t2_3
+                                       Filter: (a = 0)
+(31 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+  a  |  c   |  b  |  c   
+-----+------+-----+------
+   0 | 0000 |   0 | 0000
+ 150 | 0002 | 150 | 0002
+ 300 | 0000 | 300 | 0000
+ 450 | 0002 | 450 | 0002
+     |      |  75 | 0003
+     |      | 225 | 0001
+     |      | 375 | 0003
+     |      | 525 | 0001
+(8 rows)
+
+-- full join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE prt1_l.b = 0) t1 FULL JOIN (SELECT * FROM prt2_l WHERE prt2_l.a = 0) t2 ON (t1.a = t2.b AND t1.c = t2.c) ORDER BY t1.a, t2.b;
+                                                     QUERY PLAN                                                     
+--------------------------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: prt1_l_p1.a, prt2_l_p1.b
+   ->  Append
+         ->  Hash Full Join
+               Hash Cond: ((prt1_l_p1.a = prt2_l_p1.b) AND ((prt1_l_p1.c)::text = (prt2_l_p1.c)::text))
+               ->  Seq Scan on prt1_l_p1
+                     Filter: (b = 0)
+               ->  Hash
+                     ->  Seq Scan on prt2_l_p1
+                           Filter: (a = 0)
+         ->  Hash Full Join
+               Hash Cond: ((prt1_l_p2_p1.a = prt2_l_p2_p1.b) AND ((prt1_l_p2_p1.c)::text = (prt2_l_p2_p1.c)::text))
+               ->  Seq Scan on prt1_l_p2_p1
+                     Filter: (b = 0)
+               ->  Hash
+                     ->  Seq Scan on prt2_l_p2_p1
+                           Filter: (a = 0)
+         ->  Hash Full Join
+               Hash Cond: ((prt1_l_p2_p2.a = prt2_l_p2_p2.b) AND ((prt1_l_p2_p2.c)::text = (prt2_l_p2_p2.c)::text))
+               ->  Seq Scan on prt1_l_p2_p2
+                     Filter: (b = 0)
+               ->  Hash
+                     ->  Seq Scan on prt2_l_p2_p2
+                           Filter: (a = 0)
+         ->  Hash Full Join
+               Hash Cond: ((prt1_l_p3_p1.a = prt2_l_p3_p1.b) AND ((prt1_l_p3_p1.c)::text = (prt2_l_p3_p1.c)::text))
+               ->  Append
+                     ->  Seq Scan on prt1_l_p3_p1
+                           Filter: (b = 0)
+               ->  Hash
+                     ->  Append
+                           ->  Seq Scan on prt2_l_p3_p1
+                                 Filter: (a = 0)
+(33 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE prt1_l.b = 0) t1 FULL JOIN (SELECT * FROM prt2_l WHERE prt2_l.a = 0) t2 ON (t1.a = t2.b AND t1.c = t2.c) ORDER BY t1.a, t2.b;
+  a  |  c   |  b  |  c   
+-----+------+-----+------
+   0 | 0000 |   0 | 0000
+  50 | 0002 |     | 
+ 100 | 0000 |     | 
+ 150 | 0002 | 150 | 0002
+ 200 | 0000 |     | 
+ 250 | 0002 |     | 
+ 300 | 0000 | 300 | 0000
+ 350 | 0002 |     | 
+ 400 | 0000 |     | 
+ 450 | 0002 | 450 | 0002
+ 500 | 0000 |     | 
+ 550 | 0002 |     | 
+     |      |  75 | 0003
+     |      | 225 | 0001
+     |      | 375 | 0003
+     |      | 525 | 0001
+(16 rows)
+
+-- lateral partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss
+                         ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a;
+                                             QUERY PLAN                                              
+-----------------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: t1.a
+   ->  Result
+         ->  Append
+               ->  Nested Loop Left Join
+                     ->  Seq Scan on prt1_l_p1 t1
+                           Filter: (b = 0)
+                     ->  Hash Join
+                           Hash Cond: ((t3.b = t2.a) AND ((t3.c)::text = (t2.c)::text))
+                           ->  Seq Scan on prt2_l_p1 t3
+                           ->  Hash
+                                 ->  Seq Scan on prt1_l_p1 t2
+                                       Filter: ((t1.a = a) AND ((t1.c)::text = (c)::text))
+               ->  Nested Loop Left Join
+                     ->  Seq Scan on prt1_l_p2_p1 t1_1
+                           Filter: (b = 0)
+                     ->  Hash Join
+                           Hash Cond: ((t3_1.b = t2_1.a) AND ((t3_1.c)::text = (t2_1.c)::text))
+                           ->  Seq Scan on prt2_l_p2_p1 t3_1
+                           ->  Hash
+                                 ->  Seq Scan on prt1_l_p2_p1 t2_1
+                                       Filter: ((t1_1.a = a) AND ((t1_1.c)::text = (c)::text))
+               ->  Nested Loop Left Join
+                     ->  Seq Scan on prt1_l_p2_p2 t1_2
+                           Filter: (b = 0)
+                     ->  Hash Join
+                           Hash Cond: ((t3_2.b = t2_2.a) AND ((t3_2.c)::text = (t2_2.c)::text))
+                           ->  Seq Scan on prt2_l_p2_p2 t3_2
+                           ->  Hash
+                                 ->  Seq Scan on prt1_l_p2_p2 t2_2
+                                       Filter: ((t1_2.a = a) AND ((t1_2.c)::text = (c)::text))
+               ->  Nested Loop Left Join
+                     ->  Append
+                           ->  Seq Scan on prt1_l_p3_p1 t1_3
+                                 Filter: (b = 0)
+                     ->  Hash Join
+                           Hash Cond: ((t3_3.b = t2_3.a) AND ((t3_3.c)::text = (t2_3.c)::text))
+                           ->  Append
+                                 ->  Seq Scan on prt2_l_p3_p1 t3_3
+                                 ->  Seq Scan on prt2_l_p3_p2 t3_4
+                           ->  Hash
+                                 ->  Append
+                                       ->  Seq Scan on prt1_l_p3_p1 t2_3
+                                             Filter: ((t1_3.a = a) AND ((t1_3.c)::text = (c)::text))
+                                       ->  Seq Scan on prt1_l_p3_p2 t2_4
+                                             Filter: ((t1_3.a = a) AND ((t1_3.c)::text = (c)::text))
+(46 rows)
+
+SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss
+                         ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a;
+  a  | b |  c   | t2a | t2c  | t2b | t3b | least 
+-----+---+------+-----+------+-----+-----+-------
+   0 | 0 | 0000 |   0 | 0000 |   0 |   0 |     0
+  50 | 0 | 0002 |     |      |     |     |      
+ 100 | 0 | 0000 |     |      |     |     |      
+ 150 | 0 | 0002 | 150 | 0002 |   0 | 150 |   150
+ 200 | 0 | 0000 |     |      |     |     |      
+ 250 | 0 | 0002 |     |      |     |     |      
+ 300 | 0 | 0000 | 300 | 0000 |   0 | 300 |   300
+ 350 | 0 | 0002 |     |      |     |     |      
+ 400 | 0 | 0000 |     |      |     |     |      
+ 450 | 0 | 0002 | 450 | 0002 |   0 | 450 |   450
+ 500 | 0 | 0000 |     |      |     |     |      
+ 550 | 0 | 0002 |     |      |     |     |      
+(12 rows)
+
+-- join with one side empty
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Hash Left Join
+   Hash Cond: ((t2.b = a) AND (t2.a = b) AND ((t2.c)::text = (c)::text))
+   ->  Append
+         ->  Seq Scan on prt2_l_p1 t2
+         ->  Seq Scan on prt2_l_p2_p1 t2_1
+         ->  Seq Scan on prt2_l_p2_p2 t2_2
+         ->  Seq Scan on prt2_l_p3_p1 t2_3
+         ->  Seq Scan on prt2_l_p3_p2 t2_4
+   ->  Hash
+         ->  Result
+               One-Time Filter: false
+(11 rows)
+
+--
+-- negative testcases
+--
+CREATE TABLE prt1_n (a int, b int, c varchar) PARTITION BY RANGE(c);
+CREATE TABLE prt1_n_p1 PARTITION OF prt1_n FOR VALUES FROM ('0000') TO ('0250');
+CREATE TABLE prt1_n_p2 PARTITION OF prt1_n FOR VALUES FROM ('0250') TO ('0500');
+INSERT INTO prt1_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(0, 499, 2) i;
+ANALYZE prt1_n;
+CREATE TABLE prt2_n (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE prt2_n_p1 PARTITION OF prt2_n FOR VALUES IN ('0000', '0003', '0004', '0010', '0006', '0007');
+CREATE TABLE prt2_n_p2 PARTITION OF prt2_n FOR VALUES IN ('0001', '0005', '0002', '0009', '0008', '0011');
+INSERT INTO prt2_n SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt2_n;
+CREATE TABLE prt3_n (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE prt3_n_p1 PARTITION OF prt3_n FOR VALUES IN ('0000', '0004', '0006', '0007');
+CREATE TABLE prt3_n_p2 PARTITION OF prt3_n FOR VALUES IN ('0001', '0002', '0008', '0010');
+CREATE TABLE prt3_n_p3 PARTITION OF prt3_n FOR VALUES IN ('0003', '0005', '0009', '0011');
+INSERT INTO prt2_n SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt3_n;
+CREATE TABLE prt4_n (a int, b int, c text) PARTITION BY RANGE(a);
+CREATE TABLE prt4_n_p1 PARTITION OF prt4_n FOR VALUES FROM (0) TO (300);
+CREATE TABLE prt4_n_p2 PARTITION OF prt4_n FOR VALUES FROM (300) TO (500);
+CREATE TABLE prt4_n_p3 PARTITION OF prt4_n FOR VALUES FROM (500) TO (600);
+INSERT INTO prt4_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt4_n;
+-- partition-wise join can not be applied if the partition ranges differ
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt4_n t2 WHERE t1.a = t2.a;
+                  QUERY PLAN                  
+----------------------------------------------
+ Hash Join
+   Hash Cond: (t1.a = t2.a)
+   ->  Append
+         ->  Seq Scan on prt1_p1 t1
+         ->  Seq Scan on prt1_p2 t1_1
+         ->  Seq Scan on prt1_p3 t1_2
+   ->  Hash
+         ->  Append
+               ->  Seq Scan on prt4_n_p1 t2
+               ->  Seq Scan on prt4_n_p2 t2_1
+               ->  Seq Scan on prt4_n_p3 t2_2
+(11 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt4_n t2, prt2 t3 WHERE t1.a = t2.a and t1.a = t3.b;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Hash Join
+   Hash Cond: (t2.a = t1.a)
+   ->  Append
+         ->  Seq Scan on prt4_n_p1 t2
+         ->  Seq Scan on prt4_n_p2 t2_1
+         ->  Seq Scan on prt4_n_p3 t2_2
+   ->  Hash
+         ->  Append
+               ->  Hash Join
+                     Hash Cond: (t1.a = t3.b)
+                     ->  Seq Scan on prt1_p1 t1
+                     ->  Hash
+                           ->  Seq Scan on prt2_p1 t3
+               ->  Hash Join
+                     Hash Cond: (t1_1.a = t3_1.b)
+                     ->  Seq Scan on prt1_p2 t1_1
+                     ->  Hash
+                           ->  Seq Scan on prt2_p2 t3_1
+               ->  Hash Join
+                     Hash Cond: (t1_2.a = t3_2.b)
+                     ->  Seq Scan on prt1_p3 t1_2
+                     ->  Hash
+                           ->  Seq Scan on prt2_p3 t3_2
+(23 rows)
+
+-- partition-wise join can not be applied if there are no equi-join conditions
+-- between partition keys
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 LEFT JOIN prt2 t2 ON (t1.a < t2.b);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Nested Loop Left Join
+   ->  Append
+         ->  Seq Scan on prt1_p1 t1
+         ->  Seq Scan on prt1_p2 t1_1
+         ->  Seq Scan on prt1_p3 t1_2
+   ->  Append
+         ->  Index Scan using iprt2_p1_b on prt2_p1 t2
+               Index Cond: (t1.a < b)
+         ->  Index Scan using iprt2_p2_b on prt2_p2 t2_1
+               Index Cond: (t1.a < b)
+         ->  Index Scan using iprt2_p3_b on prt2_p3 t2_2
+               Index Cond: (t1.a < b)
+(12 rows)
+
+-- equi-join with join condition on partial keys does not qualify for
+-- partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1, prt2_m t2 WHERE t1.a = (t2.b + t2.a)/2;
+                  QUERY PLAN                  
+----------------------------------------------
+ Hash Join
+   Hash Cond: (((t2.b + t2.a) / 2) = t1.a)
+   ->  Append
+         ->  Seq Scan on prt2_m_p1 t2
+         ->  Seq Scan on prt2_m_p2 t2_1
+         ->  Seq Scan on prt2_m_p3 t2_2
+   ->  Hash
+         ->  Append
+               ->  Seq Scan on prt1_m_p1 t1
+               ->  Seq Scan on prt1_m_p2 t1_1
+               ->  Seq Scan on prt1_m_p3 t1_2
+(11 rows)
+
+-- equi-join between out-of-order partition key columns does not qualify for
+-- partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1 LEFT JOIN prt2_m t2 ON t1.a = t2.b;
+                  QUERY PLAN                  
+----------------------------------------------
+ Hash Left Join
+   Hash Cond: (t1.a = t2.b)
+   ->  Append
+         ->  Seq Scan on prt1_m_p1 t1
+         ->  Seq Scan on prt1_m_p2 t1_1
+         ->  Seq Scan on prt1_m_p3 t1_2
+   ->  Hash
+         ->  Append
+               ->  Seq Scan on prt2_m_p1 t2
+               ->  Seq Scan on prt2_m_p2 t2_1
+               ->  Seq Scan on prt2_m_p3 t2_2
+(11 rows)
+
+-- equi-join between non-key columns does not qualify for partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1 LEFT JOIN prt2_m t2 ON t1.c = t2.c;
+                  QUERY PLAN                  
+----------------------------------------------
+ Hash Left Join
+   Hash Cond: (t1.c = t2.c)
+   ->  Append
+         ->  Seq Scan on prt1_m_p1 t1
+         ->  Seq Scan on prt1_m_p2 t1_1
+         ->  Seq Scan on prt1_m_p3 t1_2
+   ->  Hash
+         ->  Append
+               ->  Seq Scan on prt2_m_p1 t2
+               ->  Seq Scan on prt2_m_p2 t2_1
+               ->  Seq Scan on prt2_m_p3 t2_2
+(11 rows)
+
+-- partition-wise join can not be applied between tables with different
+-- partition lists
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 LEFT JOIN prt2_n t2 ON (t1.c = t2.c);
+                  QUERY PLAN                  
+----------------------------------------------
+ Hash Right Join
+   Hash Cond: (t2.c = (t1.c)::text)
+   ->  Append
+         ->  Seq Scan on prt2_n_p1 t2
+         ->  Seq Scan on prt2_n_p2 t2_1
+   ->  Hash
+         ->  Append
+               ->  Seq Scan on prt1_n_p1 t1
+               ->  Seq Scan on prt1_n_p2 t1_1
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 JOIN prt2_n t2 ON (t1.c = t2.c) JOIN plt1 t3 ON (t1.c = t3.c);
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Hash Join
+   Hash Cond: (t2.c = (t1.c)::text)
+   ->  Append
+         ->  Seq Scan on prt2_n_p1 t2
+         ->  Seq Scan on prt2_n_p2 t2_1
+   ->  Hash
+         ->  Hash Join
+               Hash Cond: (t3.c = (t1.c)::text)
+               ->  Append
+                     ->  Seq Scan on plt1_p1 t3
+                     ->  Seq Scan on plt1_p2 t3_1
+                     ->  Seq Scan on plt1_p3 t3_2
+               ->  Hash
+                     ->  Append
+                           ->  Seq Scan on prt1_n_p1 t1
+                           ->  Seq Scan on prt1_n_p2 t1_1
+(16 rows)
+
+-- partition-wise join can not be applied for a join between list and range
+-- partitioned table
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 FULL JOIN prt1 t2 ON (t1.c = t2.c);
+                  QUERY PLAN                  
+----------------------------------------------
+ Hash Full Join
+   Hash Cond: ((t2.c)::text = (t1.c)::text)
+   ->  Append
+         ->  Seq Scan on prt1_p1 t2
+         ->  Seq Scan on prt1_p2 t2_1
+         ->  Seq Scan on prt1_p3 t2_2
+   ->  Hash
+         ->  Append
+               ->  Seq Scan on prt1_n_p1 t1
+               ->  Seq Scan on prt1_n_p2 t1_1
+(10 rows)
+
index 568b783f5edacbec24585071435b5ebd177ad68d..cd1f7f301d44bee4799fddeefd70acd0e9505e97 100644 (file)
@@ -70,21 +70,22 @@ select count(*) >= 0 as ok from pg_prepared_xacts;
 -- This is to record the prevailing planner enable_foo settings during
 -- a regression test run.
 select name, setting from pg_settings where name like 'enable%';
-         name         | setting 
-----------------------+---------
- enable_bitmapscan    | on
- enable_gathermerge   | on
- enable_hashagg       | on
- enable_hashjoin      | on
- enable_indexonlyscan | on
- enable_indexscan     | on
- enable_material      | on
- enable_mergejoin     | on
- enable_nestloop      | on
- enable_seqscan       | on
- enable_sort          | on
- enable_tidscan       | on
-(12 rows)
+            name            | setting 
+----------------------------+---------
+ enable_bitmapscan          | on
+ enable_gathermerge         | on
+ enable_hashagg             | on
+ enable_hashjoin            | on
+ enable_indexonlyscan       | on
+ enable_indexscan           | on
+ enable_material            | on
+ enable_mergejoin           | on
+ enable_nestloop            | on
+ enable_partition_wise_join | off
+ enable_seqscan             | on
+ enable_sort                | on
+ enable_tidscan             | on
+(13 rows)
 
 -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
 -- more-or-less working.  We can't test their contents in any great detail
index 860e8ab7951a75e77206032f2a8dd7228485a20e..e1d150b878f70d747810be230f0b064dae7c875a 100644 (file)
@@ -104,7 +104,8 @@ test: publication subscription
 # ----------
 # Another group of parallel tests
 # ----------
-test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock json jsonb json_encoding indirect_toast equivclass
+test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock json jsonb json_encoding indirect_toast equivclass partition_join
+
 # ----------
 # Another group of parallel tests
 # NB: temp.sql does a reconnect which transiently uses 2 connections,
index ef275d0d9a2b5d2f71575d45f21bc211bd4f0e7f..ed755f45fa22ce31b747efd421ae9c943bb045f4 100644 (file)
@@ -180,3 +180,4 @@ test: with
 test: xml
 test: event_trigger
 test: stats
+test: partition_join
diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql
new file mode 100644 (file)
index 0000000..ca525d9
--- /dev/null
@@ -0,0 +1,354 @@
+--
+-- PARTITION_JOIN
+-- Test partition-wise join between partitioned tables
+--
+
+-- Enable partition-wise join, which by default is disabled.
+SET enable_partition_wise_join to true;
+
+--
+-- partitioned by a single column
+--
+CREATE TABLE prt1 (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE prt1_p1 PARTITION OF prt1 FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_p3 PARTITION OF prt1 FOR VALUES FROM (500) TO (600);
+CREATE TABLE prt1_p2 PARTITION OF prt1 FOR VALUES FROM (250) TO (500);
+INSERT INTO prt1 SELECT i, i % 25, to_char(i, 'FM0000') FROM generate_series(0, 599) i WHERE i % 2 = 0;
+CREATE INDEX iprt1_p1_a on prt1_p1(a);
+CREATE INDEX iprt1_p2_a on prt1_p2(a);
+CREATE INDEX iprt1_p3_a on prt1_p3(a);
+ANALYZE prt1;
+
+CREATE TABLE prt2 (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE prt2_p1 PARTITION OF prt2 FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_p2 PARTITION OF prt2 FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt2_p3 PARTITION OF prt2 FOR VALUES FROM (500) TO (600);
+INSERT INTO prt2 SELECT i % 25, i, to_char(i, 'FM0000') FROM generate_series(0, 599) i WHERE i % 3 = 0;
+CREATE INDEX iprt2_p1_b on prt2_p1(b);
+CREATE INDEX iprt2_p2_b on prt2_p2(b);
+CREATE INDEX iprt2_p3_b on prt2_p3(b);
+ANALYZE prt2;
+
+-- inner join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+
+-- left outer join, with whole-row reference
+EXPLAIN (COSTS OFF)
+SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+
+-- right outer join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+
+-- full outer join, with placeholder vars
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b) WHERE t1.phv = t1.a OR t2.phv = t2.b ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b) WHERE t1.phv = t1.a OR t2.phv = t2.b ORDER BY t1.a, t2.b;
+
+-- Join with pruned partitions from joining relations
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a < 450 AND t2.b > 250 AND t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a < 450 AND t2.b > 250 AND t1.b = 0 ORDER BY t1.a, t2.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 FULL JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 OR t2.a = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 FULL JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 OR t2.a = 0 ORDER BY t1.a, t2.b;
+
+-- Semi-join
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t2.b FROM prt2 t2 WHERE t2.a = 0) AND t1.b = 0 ORDER BY t1.a;
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t2.b FROM prt2 t2 WHERE t2.a = 0) AND t1.b = 0 ORDER BY t1.a;
+
+-- Anti-join with aggregates
+EXPLAIN (COSTS OFF)
+SELECT sum(t1.a), avg(t1.a), sum(t1.b), avg(t1.b) FROM prt1 t1 WHERE NOT EXISTS (SELECT 1 FROM prt2 t2 WHERE t1.a = t2.b);
+SELECT sum(t1.a), avg(t1.a), sum(t1.b), avg(t1.b) FROM prt1 t1 WHERE NOT EXISTS (SELECT 1 FROM prt2 t2 WHERE t1.a = t2.b);
+
+-- lateral reference
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t3.a AS t3a, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+                         ON t1.a = ss.t2a WHERE t1.b = 0 ORDER BY t1.a;
+SELECT * FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t3.a AS t3a, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+                         ON t1.a = ss.t2a WHERE t1.b = 0 ORDER BY t1.a;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+                         ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
+SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+                         ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
+
+--
+-- partitioned by expression
+--
+CREATE TABLE prt1_e (a int, b int, c int) PARTITION BY RANGE(((a + b)/2));
+CREATE TABLE prt1_e_p1 PARTITION OF prt1_e FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_e_p2 PARTITION OF prt1_e FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt1_e_p3 PARTITION OF prt1_e FOR VALUES FROM (500) TO (600);
+INSERT INTO prt1_e SELECT i, i, i % 25 FROM generate_series(0, 599, 2) i;
+CREATE INDEX iprt1_e_p1_ab2 on prt1_e_p1(((a+b)/2));
+CREATE INDEX iprt1_e_p2_ab2 on prt1_e_p2(((a+b)/2));
+CREATE INDEX iprt1_e_p3_ab2 on prt1_e_p3(((a+b)/2));
+ANALYZE prt1_e;
+
+CREATE TABLE prt2_e (a int, b int, c int) PARTITION BY RANGE(((b + a)/2));
+CREATE TABLE prt2_e_p1 PARTITION OF prt2_e FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_e_p2 PARTITION OF prt2_e FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt2_e_p3 PARTITION OF prt2_e FOR VALUES FROM (500) TO (600);
+INSERT INTO prt2_e SELECT i, i, i % 25 FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_e;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_e t1, prt2_e t2 WHERE (t1.a + t1.b)/2 = (t2.b + t2.a)/2 AND t1.c = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_e t1, prt2_e t2 WHERE (t1.a + t1.b)/2 = (t2.b + t2.a)/2 AND t1.c = 0 ORDER BY t1.a, t2.b;
+
+--
+-- N-way join
+--
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM prt1 t1, prt2 t2, prt1_e t3 WHERE t1.a = t2.b AND t1.a = (t3.a + t3.b)/2 AND t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM prt1 t1, prt2 t2, prt1_e t3 WHERE t1.a = t2.b AND t1.a = (t3.a + t3.b)/2 AND t1.b = 0 ORDER BY t1.a, t2.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) LEFT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.b = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) LEFT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.b = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+
+-- Cases with non-nullable expressions in subquery results;
+-- make sure these go to null as expected
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.phv, t2.b, t2.phv, t3.a + t3.b, t3.phv FROM ((SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b)) FULL JOIN (SELECT 50 phv, * FROM prt1_e WHERE prt1_e.c = 0) t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.a = t1.phv OR t2.b = t2.phv OR (t3.a + t3.b)/2 = t3.phv ORDER BY t1.a, t2.b, t3.a + t3.b;
+SELECT t1.a, t1.phv, t2.b, t2.phv, t3.a + t3.b, t3.phv FROM ((SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b)) FULL JOIN (SELECT 50 phv, * FROM prt1_e WHERE prt1_e.c = 0) t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.a = t1.phv OR t2.b = t2.phv OR (t3.a + t3.b)/2 = t3.phv ORDER BY t1.a, t2.b, t3.a + t3.b;
+
+-- Semi-join
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+
+-- test merge joins
+SET enable_hashjoin TO off;
+SET enable_nestloop TO off;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+
+-- MergeAppend on nullable column
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+
+RESET enable_hashjoin;
+RESET enable_nestloop;
+
+--
+-- partitioned by multiple columns
+--
+CREATE TABLE prt1_m (a int, b int, c int) PARTITION BY RANGE(a, ((a + b)/2));
+CREATE TABLE prt1_m_p1 PARTITION OF prt1_m FOR VALUES FROM (0, 0) TO (250, 250);
+CREATE TABLE prt1_m_p2 PARTITION OF prt1_m FOR VALUES FROM (250, 250) TO (500, 500);
+CREATE TABLE prt1_m_p3 PARTITION OF prt1_m FOR VALUES FROM (500, 500) TO (600, 600);
+INSERT INTO prt1_m SELECT i, i, i % 25 FROM generate_series(0, 599, 2) i;
+ANALYZE prt1_m;
+
+CREATE TABLE prt2_m (a int, b int, c int) PARTITION BY RANGE(((b + a)/2), b);
+CREATE TABLE prt2_m_p1 PARTITION OF prt2_m FOR VALUES FROM (0, 0) TO (250, 250);
+CREATE TABLE prt2_m_p2 PARTITION OF prt2_m FOR VALUES FROM (250, 250) TO (500, 500);
+CREATE TABLE prt2_m_p3 PARTITION OF prt2_m FOR VALUES FROM (500, 500) TO (600, 600);
+INSERT INTO prt2_m SELECT i, i, i % 25 FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_m;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_m WHERE prt1_m.c = 0) t1 FULL JOIN (SELECT * FROM prt2_m WHERE prt2_m.c = 0) t2 ON (t1.a = (t2.b + t2.a)/2 AND t2.b = (t1.a + t1.b)/2) ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_m WHERE prt1_m.c = 0) t1 FULL JOIN (SELECT * FROM prt2_m WHERE prt2_m.c = 0) t2 ON (t1.a = (t2.b + t2.a)/2 AND t2.b = (t1.a + t1.b)/2) ORDER BY t1.a, t2.b;
+
+--
+-- tests for list partitioned tables.
+--
+CREATE TABLE plt1 (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE plt1_p1 PARTITION OF plt1 FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt1_p2 PARTITION OF plt1 FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt1_p3 PARTITION OF plt1 FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE plt1;
+
+CREATE TABLE plt2 (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE plt2_p1 PARTITION OF plt2 FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt2_p2 PARTITION OF plt2 FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt2_p3 PARTITION OF plt2 FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 3) i;
+ANALYZE plt2;
+
+--
+-- list partitioned by expression
+--
+CREATE TABLE plt1_e (a int, b int, c text) PARTITION BY LIST(ltrim(c, 'A'));
+CREATE TABLE plt1_e_p1 PARTITION OF plt1_e FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt1_e_p2 PARTITION OF plt1_e FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt1_e_p3 PARTITION OF plt1_e FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt1_e SELECT i, i, 'A' || to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE plt1_e;
+
+-- test partition matching with N-way join
+EXPLAIN (COSTS OFF)
+SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, plt2 t2, plt1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c;
+SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, plt2 t2, plt1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c;
+
+-- joins where one of the relations is proven empty
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a = 1 AND t1.a = 2;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 LEFT JOIN prt2 t2 ON t1.a = t2.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 FULL JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+
+--
+-- multiple levels of partitioning 
+--
+CREATE TABLE prt1_l (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE prt1_l_p1 PARTITION OF prt1_l FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_l_p2 PARTITION OF prt1_l FOR VALUES FROM (250) TO (500) PARTITION BY LIST (c);
+CREATE TABLE prt1_l_p2_p1 PARTITION OF prt1_l_p2 FOR VALUES IN ('0000', '0001');
+CREATE TABLE prt1_l_p2_p2 PARTITION OF prt1_l_p2 FOR VALUES IN ('0002', '0003');
+CREATE TABLE prt1_l_p3 PARTITION OF prt1_l FOR VALUES FROM (500) TO (600) PARTITION BY RANGE (b);
+CREATE TABLE prt1_l_p3_p1 PARTITION OF prt1_l_p3 FOR VALUES FROM (0) TO (13);
+CREATE TABLE prt1_l_p3_p2 PARTITION OF prt1_l_p3 FOR VALUES FROM (13) TO (25);
+INSERT INTO prt1_l SELECT i, i % 25, to_char(i % 4, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt1_l;
+
+CREATE TABLE prt2_l (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE prt2_l_p1 PARTITION OF prt2_l FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_l_p2 PARTITION OF prt2_l FOR VALUES FROM (250) TO (500) PARTITION BY LIST (c);
+CREATE TABLE prt2_l_p2_p1 PARTITION OF prt2_l_p2 FOR VALUES IN ('0000', '0001');
+CREATE TABLE prt2_l_p2_p2 PARTITION OF prt2_l_p2 FOR VALUES IN ('0002', '0003');
+CREATE TABLE prt2_l_p3 PARTITION OF prt2_l FOR VALUES FROM (500) TO (600) PARTITION BY RANGE (a);
+CREATE TABLE prt2_l_p3_p1 PARTITION OF prt2_l_p3 FOR VALUES FROM (0) TO (13);
+CREATE TABLE prt2_l_p3_p2 PARTITION OF prt2_l_p3 FOR VALUES FROM (13) TO (25);
+INSERT INTO prt2_l SELECT i % 25, i, to_char(i % 4, 'FM0000') FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_l;
+
+-- inner join, qual covering only top-level partitions
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1, prt2_l t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1, prt2_l t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+
+-- left join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 LEFT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 LEFT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+
+-- right join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+
+-- full join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE prt1_l.b = 0) t1 FULL JOIN (SELECT * FROM prt2_l WHERE prt2_l.a = 0) t2 ON (t1.a = t2.b AND t1.c = t2.c) ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE prt1_l.b = 0) t1 FULL JOIN (SELECT * FROM prt2_l WHERE prt2_l.a = 0) t2 ON (t1.a = t2.b AND t1.c = t2.c) ORDER BY t1.a, t2.b;
+
+-- lateral partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss
+                         ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a;
+SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
+                         (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss
+                         ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a;
+
+-- join with one side empty
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;
+
+--
+-- negative testcases
+--
+CREATE TABLE prt1_n (a int, b int, c varchar) PARTITION BY RANGE(c);
+CREATE TABLE prt1_n_p1 PARTITION OF prt1_n FOR VALUES FROM ('0000') TO ('0250');
+CREATE TABLE prt1_n_p2 PARTITION OF prt1_n FOR VALUES FROM ('0250') TO ('0500');
+INSERT INTO prt1_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(0, 499, 2) i;
+ANALYZE prt1_n;
+
+CREATE TABLE prt2_n (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE prt2_n_p1 PARTITION OF prt2_n FOR VALUES IN ('0000', '0003', '0004', '0010', '0006', '0007');
+CREATE TABLE prt2_n_p2 PARTITION OF prt2_n FOR VALUES IN ('0001', '0005', '0002', '0009', '0008', '0011');
+INSERT INTO prt2_n SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt2_n;
+
+CREATE TABLE prt3_n (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE prt3_n_p1 PARTITION OF prt3_n FOR VALUES IN ('0000', '0004', '0006', '0007');
+CREATE TABLE prt3_n_p2 PARTITION OF prt3_n FOR VALUES IN ('0001', '0002', '0008', '0010');
+CREATE TABLE prt3_n_p3 PARTITION OF prt3_n FOR VALUES IN ('0003', '0005', '0009', '0011');
+INSERT INTO prt2_n SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt3_n;
+
+CREATE TABLE prt4_n (a int, b int, c text) PARTITION BY RANGE(a);
+CREATE TABLE prt4_n_p1 PARTITION OF prt4_n FOR VALUES FROM (0) TO (300);
+CREATE TABLE prt4_n_p2 PARTITION OF prt4_n FOR VALUES FROM (300) TO (500);
+CREATE TABLE prt4_n_p3 PARTITION OF prt4_n FOR VALUES FROM (500) TO (600);
+INSERT INTO prt4_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt4_n;
+
+-- partition-wise join can not be applied if the partition ranges differ
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt4_n t2 WHERE t1.a = t2.a;
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt4_n t2, prt2 t3 WHERE t1.a = t2.a and t1.a = t3.b;
+
+-- partition-wise join can not be applied if there are no equi-join conditions
+-- between partition keys
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 LEFT JOIN prt2 t2 ON (t1.a < t2.b);
+
+-- equi-join with join condition on partial keys does not qualify for
+-- partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1, prt2_m t2 WHERE t1.a = (t2.b + t2.a)/2;
+
+-- equi-join between out-of-order partition key columns does not qualify for
+-- partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1 LEFT JOIN prt2_m t2 ON t1.a = t2.b;
+
+-- equi-join between non-key columns does not qualify for partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1 LEFT JOIN prt2_m t2 ON t1.c = t2.c;
+
+-- partition-wise join can not be applied between tables with different
+-- partition lists
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 LEFT JOIN prt2_n t2 ON (t1.c = t2.c);
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 JOIN prt2_n t2 ON (t1.c = t2.c) JOIN plt1 t3 ON (t1.c = t3.c);
+
+-- partition-wise join can not be applied for a join between list and range
+-- partitioned table
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 FULL JOIN prt1 t2 ON (t1.c = t2.c);