(1 row)
ROLLBACK;
+-- ===================================================================
+-- test partition-wise-joins
+-- ===================================================================
+SET enable_partition_wise_join=on;
+CREATE TABLE fprt1 (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE fprt1_p1 (LIKE fprt1);
+CREATE TABLE fprt1_p2 (LIKE fprt1);
+INSERT INTO fprt1_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 2) i;
+INSERT INTO fprt1_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 2) i;
+CREATE FOREIGN TABLE ftprt1_p1 PARTITION OF fprt1 FOR VALUES FROM (0) TO (250)
+ SERVER loopback OPTIONS (table_name 'fprt1_p1', use_remote_estimate 'true');
+CREATE FOREIGN TABLE ftprt1_p2 PARTITION OF fprt1 FOR VALUES FROM (250) TO (500)
+ SERVER loopback OPTIONS (TABLE_NAME 'fprt1_p2');
+ANALYZE fprt1;
+ANALYZE fprt1_p1;
+ANALYZE fprt1_p2;
+CREATE TABLE fprt2 (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE fprt2_p1 (LIKE fprt2);
+CREATE TABLE fprt2_p2 (LIKE fprt2);
+INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i;
+INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i;
+CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250)
+ SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true');
+CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500)
+ SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true');
+ANALYZE fprt2;
+ANALYZE fprt2_p1;
+ANALYZE fprt2_p2;
+-- inner join three tables
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a, t3.c
+ -> Append
+ -> Foreign Scan
+ Relations: ((public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2)) INNER JOIN (public.ftprt1_p1 t3)
+ -> Foreign Scan
+ Relations: ((public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2)) INNER JOIN (public.ftprt1_p2 t3)
+(7 rows)
+
+SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
+ a | b | c
+-----+-----+------
+ 0 | 0 | 0000
+ 150 | 150 | 0003
+ 250 | 250 | 0005
+ 400 | 400 | 0008
+(4 rows)
+
+-- left outer join + nullable clasue
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a, ftprt2_p1.b, ftprt2_p1.c
+ -> Append
+ -> Foreign Scan
+ Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
+(5 rows)
+
+SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
+ a | b | c
+---+---+------
+ 0 | 0 | 0000
+ 2 | |
+ 4 | |
+ 6 | 6 | 0000
+ 8 | |
+(5 rows)
+
+-- with whole-row reference
+EXPLAIN (COSTS OFF)
+SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
+ QUERY PLAN
+---------------------------------------------------------------------------------
+ Sort
+ Sort Key: ((t1.*)::fprt1), ((t2.*)::fprt2)
+ -> Append
+ -> Foreign Scan
+ Relations: (public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2)
+ -> Foreign Scan
+ Relations: (public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2)
+(7 rows)
+
+SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
+ t1 | t2
+----------------+----------------
+ (0,0,0000) | (0,0,0000)
+ (150,150,0003) | (150,150,0003)
+ (250,250,0005) | (250,250,0005)
+ (400,400,0008) | (400,400,0008)
+(4 rows)
+
+-- join with lateral reference
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
+ QUERY PLAN
+---------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a, t1.b
+ -> Append
+ -> Foreign Scan
+ Relations: (public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2)
+ -> Foreign Scan
+ Relations: (public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2)
+(7 rows)
+
+SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
+ a | b
+-----+-----
+ 0 | 0
+ 150 | 150
+ 250 | 250
+ 400 | 400
+(4 rows)
+
+RESET enable_partition_wise_join;
AND ftoptions @> array['fetch_size=60000'];
ROLLBACK;
+
+-- ===================================================================
+-- test partition-wise-joins
+-- ===================================================================
+SET enable_partition_wise_join=on;
+
+CREATE TABLE fprt1 (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE fprt1_p1 (LIKE fprt1);
+CREATE TABLE fprt1_p2 (LIKE fprt1);
+INSERT INTO fprt1_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 2) i;
+INSERT INTO fprt1_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 2) i;
+CREATE FOREIGN TABLE ftprt1_p1 PARTITION OF fprt1 FOR VALUES FROM (0) TO (250)
+ SERVER loopback OPTIONS (table_name 'fprt1_p1', use_remote_estimate 'true');
+CREATE FOREIGN TABLE ftprt1_p2 PARTITION OF fprt1 FOR VALUES FROM (250) TO (500)
+ SERVER loopback OPTIONS (TABLE_NAME 'fprt1_p2');
+ANALYZE fprt1;
+ANALYZE fprt1_p1;
+ANALYZE fprt1_p2;
+
+CREATE TABLE fprt2 (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE fprt2_p1 (LIKE fprt2);
+CREATE TABLE fprt2_p2 (LIKE fprt2);
+INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i;
+INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i;
+CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250)
+ SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true');
+CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500)
+ SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true');
+ANALYZE fprt2;
+ANALYZE fprt2_p1;
+ANALYZE fprt2_p2;
+
+-- inner join three tables
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
+SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3;
+
+-- left outer join + nullable clasue
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
+SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
+
+-- with whole-row reference
+EXPLAIN (COSTS OFF)
+SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
+SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
+
+-- join with lateral reference
+EXPLAIN (COSTS OFF)
+SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
+SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2;
+
+RESET enable_partition_wise_join;
</listitem>
</varlistentry>
+ <varlistentry id="guc-enable-partition-wise-join" xreflabel="enable_partition_wise_join">
+ <term><varname>enable_partition_wise_join</varname> (<type>boolean</type>)
+ <indexterm>
+ <primary><varname>enable_partition_wise_join</> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Enables or disables the query planner's use of partition-wise join,
+ which allows a join between partitioned tables to be performed by
+ joining the matching partitions. Partition-wise join currently applies
+ only when the join conditions include all the partition keys, which
+ must be of the same data type and have exactly matching sets of child
+ partitions. Because partition-wise join planning can use significantly
+ more CPU time and memory during planning, the default is
+ <literal>off</>.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-enable-seqscan" xreflabel="enable_seqscan">
<term><varname>enable_seqscan</varname> (<type>boolean</type>)
<indexterm>
</para>
</sect2>
+ <sect2 id="fdw-callbacks-reparameterize-paths">
+ <title>FDW Routines For reparameterization of paths</title>
+
+ <para>
+<programlisting>
+List *
+ReparameterizeForeignPathByChild(PlannerInfo *root, List *fdw_private,
+ RelOptInfo *child_rel);
+</programlisting>
+ This function is called while converting a path parameterized by the
+ top-most parent of the given child relation <literal>child_rel</> to be
+ parameterized by the child relation. The function is used to reparameterize
+ any paths or translate any expression nodes saved in the given
+ <literal>fdw_private</> member of a <structname>ForeignPath</>. The
+ callback may use <literal>reparameterize_path_by_child</>,
+ <literal>adjust_appendrel_attrs</> or
+ <literal>adjust_appendrel_attrs_multilevel</> as required.
+ </para>
+ </sect2>
+
</sect1>
<sect1 id="fdw-helpers">
plan as possible. Expanding the range of cases in which more work can be
pushed below the Gather (and costing them accurately) is likely to keep us
busy for a long time to come.
+
+Partition-wise joins
+--------------------
+A join between two similarly partitioned tables can be broken down into joins
+between their matching partitions if there exists an equi-join condition
+between the partition keys of the joining tables. The equi-join between
+partition keys implies that all join partners for a given row in one
+partitioned table must be in the corresponding partition of the other
+partitioned table. Because of this the join between partitioned tables to be
+broken into joins between the matching partitions. The resultant join is
+partitioned in the same way as the joining relations, thus allowing an N-way
+join between similarly partitioned tables having equi-join condition between
+their partition keys to be broken down into N-way joins between their matching
+partitions. This technique of breaking down a join between partition tables
+into join between their partitions is called partition-wise join. We will use
+term "partitioned relation" for either a partitioned table or a join between
+compatibly partitioned tables.
+
+The partitioning properties of a partitioned relation are stored in its
+RelOptInfo. The information about data types of partition keys are stored in
+PartitionSchemeData structure. The planner maintains a list of canonical
+partition schemes (distinct PartitionSchemeData objects) so that RelOptInfo of
+any two partitioned relations with same partitioning scheme point to the same
+PartitionSchemeData object. This reduces memory consumed by
+PartitionSchemeData objects and makes it easy to compare the partition schemes
+of joining relations.
/* Keep searching if join order is not valid */
if (joinrel)
{
+ /* Create paths for partition-wise joins. */
+ generate_partition_wise_join_paths(root, joinrel);
+
/* Create GatherPaths for any useful partial paths for rel */
generate_gather_paths(root, joinrel);
childrel = find_base_rel(root, childRTindex);
Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
+ if (rel->part_scheme)
+ {
+ AttrNumber attno;
+
+ /*
+ * We need attr_needed data for building targetlist of a join
+ * relation representing join between matching partitions for
+ * partition-wise join. A given attribute of a child will be
+ * needed in the same highest joinrel where the corresponding
+ * attribute of parent is needed. Hence it suffices to use the
+ * same Relids set for parent and child.
+ */
+ for (attno = rel->min_attr; attno <= rel->max_attr; attno++)
+ {
+ int index = attno - rel->min_attr;
+ Relids attr_needed = rel->attr_needed[index];
+
+ /* System attributes do not need translation. */
+ if (attno <= 0)
+ {
+ Assert(rel->min_attr == childrel->min_attr);
+ childrel->attr_needed[index] = attr_needed;
+ }
+ else
+ {
+ Var *var = list_nth_node(Var,
+ appinfo->translated_vars,
+ attno - 1);
+ int child_index;
+
+ child_index = var->varattno - childrel->min_attr;
+ childrel->attr_needed[child_index] = attr_needed;
+ }
+ }
+ }
+
+ /*
+ * Copy/Modify targetlist. Even if this child is deemed empty, we need
+ * its targetlist in case it falls on nullable side in a child-join
+ * because of partition-wise join.
+ *
+ * NB: the resulting childrel->reltarget->exprs may contain arbitrary
+ * expressions, which otherwise would not occur in a rel's targetlist.
+ * Code that might be looking at an appendrel child must cope with
+ * such. (Normally, a rel's targetlist would only include Vars and
+ * PlaceHolderVars.) XXX we do not bother to update the cost or width
+ * fields of childrel->reltarget; not clear if that would be useful.
+ */
+ childrel->reltarget->exprs = (List *)
+ adjust_appendrel_attrs(root,
+ (Node *) rel->reltarget->exprs,
+ 1, &appinfo);
+
+ /*
+ * We have to make child entries in the EquivalenceClass data
+ * structures as well. This is needed either if the parent
+ * participates in some eclass joins (because we will want to consider
+ * inner-indexscan joins on the individual children) or if the parent
+ * has useful pathkeys (because we should try to build MergeAppend
+ * paths that produce those sort orderings). Even if this child is
+ * deemed dummy, it may fall on nullable side in a child-join, which
+ * in turn may participate in a MergeAppend, where we will need the
+ * EquivalenceClass data structures.
+ */
+ if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
+ add_child_rel_equivalences(root, appinfo, rel, childrel);
+ childrel->has_eclass_joins = rel->has_eclass_joins;
+
/*
- * We have to copy the parent's targetlist and quals to the child,
- * with appropriate substitution of variables. However, only the
- * baserestrictinfo quals are needed before we can check for
- * constraint exclusion; so do that first and then check to see if we
- * can disregard this child.
+ * We have to copy the parent's quals to the child, with appropriate
+ * substitution of variables. However, only the baserestrictinfo
+ * quals are needed before we can check for constraint exclusion; so
+ * do that first and then check to see if we can disregard this child.
*
* The child rel's targetlist might contain non-Var expressions, which
* means that substitution into the quals could produce opportunities
continue;
}
- /*
- * CE failed, so finish copying/modifying targetlist and join quals.
- *
- * NB: the resulting childrel->reltarget->exprs may contain arbitrary
- * expressions, which otherwise would not occur in a rel's targetlist.
- * Code that might be looking at an appendrel child must cope with
- * such. (Normally, a rel's targetlist would only include Vars and
- * PlaceHolderVars.) XXX we do not bother to update the cost or width
- * fields of childrel->reltarget; not clear if that would be useful.
- */
+ /* CE failed, so finish copying/modifying join quals. */
childrel->joininfo = (List *)
adjust_appendrel_attrs(root,
(Node *) rel->joininfo,
1, &appinfo);
- childrel->reltarget->exprs = (List *)
- adjust_appendrel_attrs(root,
- (Node *) rel->reltarget->exprs,
- 1, &appinfo);
-
- /*
- * We have to make child entries in the EquivalenceClass data
- * structures as well. This is needed either if the parent
- * participates in some eclass joins (because we will want to consider
- * inner-indexscan joins on the individual children) or if the parent
- * has useful pathkeys (because we should try to build MergeAppend
- * paths that produce those sort orderings).
- */
- if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
- add_child_rel_equivalences(root, appinfo, rel, childrel);
- childrel->has_eclass_joins = rel->has_eclass_joins;
-
- /*
- * Note: we could compute appropriate attr_needed data for the child's
- * variables, by transforming the parent's attr_needed through the
- * translated_vars mapping. However, currently there's no need
- * because attr_needed is only examined for base relations not
- * otherrels. So we just leave the child's attr_needed empty.
- */
/*
* If parallelism is allowable for this query in general, see whether
live_childrels = lappend(live_childrels, childrel);
}
- /* Add paths to the "append" relation. */
+ /* Add paths to the append relation. */
add_paths_to_append_rel(root, rel, live_childrels);
}
/*
* add_paths_to_append_rel
- * Generate paths for given "append" relation given the set of non-dummy
+ * Generate paths for the given append relation given the set of non-dummy
* child rels.
*
* The function collects all parameterizations and orderings supported by the
RangeTblEntry *rte;
bool build_partitioned_rels = false;
- /*
- * A root partition will already have a PartitionedChildRelInfo, and a
- * non-root partitioned table doesn't need one, because its Append paths
- * will get flattened into the parent anyway. For a subquery RTE, no
- * PartitionedChildRelInfo exists; we collect all partitioned_rels
- * associated with any child. (This assumes that we don't need to look
- * through multiple levels of subquery RTEs; if we ever do, we could
- * create a PartitionedChildRelInfo with the accumulated list of
- * partitioned_rels which would then be found when populated our parent
- * rel with paths. For the present, that appears to be unnecessary.)
- */
- rte = planner_rt_fetch(rel->relid, root);
- switch (rte->rtekind)
+ if (IS_SIMPLE_REL(rel))
{
- case RTE_RELATION:
- if (rte->relkind == RELKIND_PARTITIONED_TABLE)
- partitioned_rels =
- get_partitioned_child_rels(root, rel->relid);
- break;
- case RTE_SUBQUERY:
- build_partitioned_rels = true;
- break;
- default:
- elog(ERROR, "unexpected rtekind: %d", (int) rte->rtekind);
+ /*
+ * A root partition will already have a PartitionedChildRelInfo, and a
+ * non-root partitioned table doesn't need one, because its Append
+ * paths will get flattened into the parent anyway. For a subquery
+ * RTE, no PartitionedChildRelInfo exists; we collect all
+ * partitioned_rels associated with any child. (This assumes that we
+ * don't need to look through multiple levels of subquery RTEs; if we
+ * ever do, we could create a PartitionedChildRelInfo with the
+ * accumulated list of partitioned_rels which would then be found when
+ * populated our parent rel with paths. For the present, that appears
+ * to be unnecessary.)
+ */
+ rte = planner_rt_fetch(rel->relid, root);
+ switch (rte->rtekind)
+ {
+ case RTE_RELATION:
+ if (rte->relkind == RELKIND_PARTITIONED_TABLE)
+ partitioned_rels =
+ get_partitioned_child_rels(root, rel->relid);
+ break;
+ case RTE_SUBQUERY:
+ build_partitioned_rels = true;
+ break;
+ default:
+ elog(ERROR, "unexpcted rtekind: %d", (int) rte->rtekind);
+ }
+ }
+ else if (rel->reloptkind == RELOPT_JOINREL && rel->part_scheme)
+ {
+ /*
+ * Associate PartitionedChildRelInfo of the root partitioned tables
+ * being joined with the root partitioned join (indicated by
+ * RELOPT_JOINREL).
+ */
+ partitioned_rels = get_partitioned_child_rels_for_join(root,
+ rel->relids);
}
/*
join_search_one_level(root, lev);
/*
- * Run generate_gather_paths() for each just-processed joinrel. We
- * could not do this earlier because both regular and partial paths
- * can get added to a particular joinrel at multiple times within
- * join_search_one_level. After that, we're done creating paths for
- * the joinrel, so run set_cheapest().
+ * Run generate_partition_wise_join_paths() and
+ * generate_gather_paths() for each just-processed joinrel. We could
+ * not do this earlier because both regular and partial paths can get
+ * added to a particular joinrel at multiple times within
+ * join_search_one_level.
+ *
+ * After that, we're done creating paths for the joinrel, so run
+ * set_cheapest().
*/
foreach(lc, root->join_rel_level[lev])
{
rel = (RelOptInfo *) lfirst(lc);
+ /* Create paths for partition-wise joins. */
+ generate_partition_wise_join_paths(root, rel);
+
/* Create GatherPaths for any useful partial paths for rel */
generate_gather_paths(root, rel);
return parallel_workers;
}
+/*
+ * generate_partition_wise_join_paths
+ * Create paths representing partition-wise join for given partitioned
+ * join relation.
+ *
+ * This must not be called until after we are done adding paths for all
+ * child-joins. Otherwise, add_path might delete a path to which some path
+ * generated here has a reference.
+ */
+void
+generate_partition_wise_join_paths(PlannerInfo *root, RelOptInfo *rel)
+{
+ List *live_children = NIL;
+ int cnt_parts;
+ int num_parts;
+ RelOptInfo **part_rels;
+
+ /* Handle only join relations here. */
+ if (!IS_JOIN_REL(rel))
+ return;
+
+ /*
+ * If we've already proven this join is empty, we needn't consider any
+ * more paths for it.
+ */
+ if (IS_DUMMY_REL(rel))
+ return;
+
+ /*
+ * Nothing to do if the relation is not partitioned. An outer join
+ * relation which had empty inner relation in every pair will have rest of
+ * the partitioning properties set except the child-join RelOptInfos. See
+ * try_partition_wise_join() for more explanation.
+ */
+ if (rel->nparts <= 0 || rel->part_rels == NULL)
+ return;
+
+ /* Guard against stack overflow due to overly deep partition hierarchy. */
+ check_stack_depth();
+
+ num_parts = rel->nparts;
+ part_rels = rel->part_rels;
+
+ /* Collect non-dummy child-joins. */
+ for (cnt_parts = 0; cnt_parts < num_parts; cnt_parts++)
+ {
+ RelOptInfo *child_rel = part_rels[cnt_parts];
+
+ /* Add partition-wise join paths for partitioned child-joins. */
+ generate_partition_wise_join_paths(root, child_rel);
+
+ /* Dummy children will not be scanned, so ingore those. */
+ if (IS_DUMMY_REL(child_rel))
+ continue;
+
+ set_cheapest(child_rel);
+
+#ifdef OPTIMIZER_DEBUG
+ debug_print_rel(root, rel);
+#endif
+
+ live_children = lappend(live_children, child_rel);
+ }
+
+ /* If all child-joins are dummy, parent join is also dummy. */
+ if (!live_children)
+ {
+ mark_dummy_rel(rel);
+ return;
+ }
+
+ /* Build additional paths for this rel from child-join paths. */
+ add_paths_to_append_rel(root, rel, live_children);
+ list_free(live_children);
+}
+
/*****************************************************************************
* DEBUG SUPPORT
bool enable_mergejoin = true;
bool enable_hashjoin = true;
bool enable_gathermerge = true;
+bool enable_partition_wise_join = false;
typedef struct
{
/* Hook for plugins to get control in add_paths_to_joinrel() */
set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
-#define PATH_PARAM_BY_REL(path, rel) \
+/*
+ * Paths parameterized by the parent can be considered to be parameterized by
+ * any of its child.
+ */
+#define PATH_PARAM_BY_PARENT(path, rel) \
+ ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
+ (rel)->top_parent_relids))
+#define PATH_PARAM_BY_REL_SELF(path, rel) \
((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
+#define PATH_PARAM_BY_REL(path, rel) \
+ (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
+
static void try_partial_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
Path *outer_path,
JoinPathExtraData extra;
bool mergejoin_allowed = true;
ListCell *lc;
+ Relids joinrelids;
+
+ /*
+ * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
+ * between child relations, even if there is a SpecialJoinInfo node for
+ * the join between the topmost parents. So, while calculating Relids set
+ * representing the restriction, consider relids of topmost parent of
+ * partitions.
+ */
+ if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
+ joinrelids = joinrel->top_parent_relids;
+ else
+ joinrelids = joinrel->relids;
extra.restrictlist = restrictlist;
extra.mergeclause_list = NIL;
* join has already been proven legal.) If the SJ is relevant, it
* presents constraints for joining to anything not in its RHS.
*/
- if (bms_overlap(joinrel->relids, sjinfo2->min_righthand) &&
- !bms_overlap(joinrel->relids, sjinfo2->min_lefthand))
+ if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
+ !bms_overlap(joinrelids, sjinfo2->min_lefthand))
extra.param_source_rels = bms_join(extra.param_source_rels,
bms_difference(root->all_baserels,
sjinfo2->min_righthand));
/* full joins constrain both sides symmetrically */
if (sjinfo2->jointype == JOIN_FULL &&
- bms_overlap(joinrel->relids, sjinfo2->min_lefthand) &&
- !bms_overlap(joinrel->relids, sjinfo2->min_righthand))
+ bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
+ !bms_overlap(joinrelids, sjinfo2->min_righthand))
extra.param_source_rels = bms_join(extra.param_source_rels,
bms_difference(root->all_baserels,
sjinfo2->min_lefthand));
JoinCostWorkspace workspace;
RelOptInfo *innerrel = inner_path->parent;
RelOptInfo *outerrel = outer_path->parent;
- Relids innerrelids = innerrel->relids;
- Relids outerrelids = outerrel->relids;
+ Relids innerrelids;
+ Relids outerrelids;
Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
+ /*
+ * Paths are parameterized by top-level parents, so run parameterization
+ * tests on the parent relids.
+ */
+ if (innerrel->top_parent_relids)
+ innerrelids = innerrel->top_parent_relids;
+ else
+ innerrelids = innerrel->relids;
+
+ if (outerrel->top_parent_relids)
+ outerrelids = outerrel->top_parent_relids;
+ else
+ outerrelids = outerrel->relids;
+
/*
* Check to see if proposed path is still parameterized, and reject if the
* parameterization wouldn't be sensible --- unless allow_star_schema_join
workspace.startup_cost, workspace.total_cost,
pathkeys, required_outer))
{
+ /*
+ * If the inner path is parameterized, it is parameterized by the
+ * topmost parent of the outer rel, not the outer rel itself. Fix
+ * that.
+ */
+ if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
+ {
+ inner_path = reparameterize_path_by_child(root, inner_path,
+ outer_path->parent);
+
+ /*
+ * If we could not translate the path, we can't create nest loop
+ * path.
+ */
+ if (!inner_path)
+ {
+ bms_free(required_outer);
+ return;
+ }
+ }
+
add_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
if (inner_path->param_info != NULL)
{
Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
+ RelOptInfo *outerrel = outer_path->parent;
+ Relids outerrelids;
+
+ /*
+ * The inner and outer paths are parameterized, if at all, by the top
+ * level parents, not the child relations, so we must use those relids
+ * for our paramaterization tests.
+ */
+ if (outerrel->top_parent_relids)
+ outerrelids = outerrel->top_parent_relids;
+ else
+ outerrelids = outerrel->relids;
- if (!bms_is_subset(inner_paramrels, outer_path->parent->relids))
+ if (!bms_is_subset(inner_paramrels, outerrelids))
return;
}
if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
return;
+ /*
+ * If the inner path is parameterized, it is parameterized by the topmost
+ * parent of the outer rel, not the outer rel itself. Fix that.
+ */
+ if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
+ {
+ inner_path = reparameterize_path_by_child(root, inner_path,
+ outer_path->parent);
+
+ /*
+ * If we could not translate the path, we can't create nest loop path.
+ */
+ if (!inner_path)
+ return;
+ }
+
/* Might be good enough to be worth trying, so let's try it. */
add_partial_path(joinrel, (Path *)
create_nestloop_path(root,
*/
#include "postgres.h"
+#include "miscadmin.h"
+#include "catalog/partition.h"
+#include "nodes/relation.h"
+#include "optimizer/clauses.h"
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
+#include "optimizer/prep.h"
+#include "optimizer/cost.h"
#include "utils/memutils.h"
+#include "utils/lsyscache.h"
static void make_rels_by_clause_joins(PlannerInfo *root,
static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel);
static bool is_dummy_rel(RelOptInfo *rel);
-static void mark_dummy_rel(RelOptInfo *rel);
static bool restriction_is_constant_false(List *restrictlist,
bool only_pushed_down);
static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
RelOptInfo *rel2, RelOptInfo *joinrel,
SpecialJoinInfo *sjinfo, List *restrictlist);
+static void try_partition_wise_join(PlannerInfo *root, RelOptInfo *rel1,
+ RelOptInfo *rel2, RelOptInfo *joinrel,
+ SpecialJoinInfo *parent_sjinfo,
+ List *parent_restrictlist);
+static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel,
+ bool strict_op);
/*
elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
break;
}
+
+ /* Apply partition-wise join technique, if possible. */
+ try_partition_wise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
}
* is that the best solution is to explicitly make the dummy path in the same
* context the given RelOptInfo is in.
*/
-static void
+void
mark_dummy_rel(RelOptInfo *rel)
{
MemoryContext oldcontext;
}
return false;
}
+
+/*
+ * Assess whether join between given two partitioned relations can be broken
+ * down into joins between matching partitions; a technique called
+ * "partition-wise join"
+ *
+ * Partition-wise join is possible when a. Joining relations have same
+ * partitioning scheme b. There exists an equi-join between the partition keys
+ * of the two relations.
+ *
+ * Partition-wise join is planned as follows (details: optimizer/README.)
+ *
+ * 1. Create the RelOptInfos for joins between matching partitions i.e
+ * child-joins and add paths to them.
+ *
+ * 2. Construct Append or MergeAppend paths across the set of child joins.
+ * This second phase is implemented by generate_partition_wise_join_paths().
+ *
+ * The RelOptInfo, SpecialJoinInfo and restrictlist for each child join are
+ * obtained by translating the respective parent join structures.
+ */
+static void
+try_partition_wise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
+ RelOptInfo *joinrel, SpecialJoinInfo *parent_sjinfo,
+ List *parent_restrictlist)
+{
+ int nparts;
+ int cnt_parts;
+
+ /* Guard against stack overflow due to overly deep partition hierarchy. */
+ check_stack_depth();
+
+ /* Nothing to do, if the join relation is not partitioned. */
+ if (!IS_PARTITIONED_REL(joinrel))
+ return;
+
+ /*
+ * set_rel_pathlist() may not create paths in children of an empty
+ * partitioned table and so we can not add paths to child-joins. So, deem
+ * such a join as unpartitioned. When a partitioned relation is deemed
+ * empty because all its children are empty, dummy path will be set in
+ * each of the children. In such a case we could still consider the join
+ * as partitioned, but it might not help much.
+ */
+ if (IS_DUMMY_REL(rel1) || IS_DUMMY_REL(rel2))
+ return;
+
+ /*
+ * Since this join relation is partitioned, all the base relations
+ * participating in this join must be partitioned and so are all the
+ * intermediate join relations.
+ */
+ Assert(IS_PARTITIONED_REL(rel1) && IS_PARTITIONED_REL(rel2));
+ Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2));
+
+ /*
+ * The partition scheme of the join relation should match that of the
+ * joining relations.
+ */
+ Assert(joinrel->part_scheme == rel1->part_scheme &&
+ joinrel->part_scheme == rel2->part_scheme);
+
+ /*
+ * Since we allow partition-wise join only when the partition bounds of
+ * the joining relations exactly match, the partition bounds of the join
+ * should match those of the joining relations.
+ */
+ Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
+ joinrel->part_scheme->parttyplen,
+ joinrel->part_scheme->parttypbyval,
+ joinrel->boundinfo, rel1->boundinfo));
+ Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
+ joinrel->part_scheme->parttyplen,
+ joinrel->part_scheme->parttypbyval,
+ joinrel->boundinfo, rel2->boundinfo));
+
+ nparts = joinrel->nparts;
+
+ /* Allocate space to hold child-joins RelOptInfos, if not already done. */
+ if (!joinrel->part_rels)
+ joinrel->part_rels =
+ (RelOptInfo **) palloc0(sizeof(RelOptInfo *) * nparts);
+
+ /*
+ * Create child-join relations for this partitioned join, if those don't
+ * exist. Add paths to child-joins for a pair of child relations
+ * corresponding to the given pair of parent relations.
+ */
+ for (cnt_parts = 0; cnt_parts < nparts; cnt_parts++)
+ {
+ RelOptInfo *child_rel1 = rel1->part_rels[cnt_parts];
+ RelOptInfo *child_rel2 = rel2->part_rels[cnt_parts];
+ SpecialJoinInfo *child_sjinfo;
+ List *child_restrictlist;
+ RelOptInfo *child_joinrel;
+ Relids child_joinrelids;
+ AppendRelInfo **appinfos;
+ int nappinfos;
+
+ /* We should never try to join two overlapping sets of rels. */
+ Assert(!bms_overlap(child_rel1->relids, child_rel2->relids));
+ child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids);
+ appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos);
+
+ /*
+ * Construct SpecialJoinInfo from parent join relations's
+ * SpecialJoinInfo.
+ */
+ child_sjinfo = build_child_join_sjinfo(root, parent_sjinfo,
+ child_rel1->relids,
+ child_rel2->relids);
+
+ /*
+ * Construct restrictions applicable to the child join from those
+ * applicable to the parent join.
+ */
+ child_restrictlist =
+ (List *) adjust_appendrel_attrs(root,
+ (Node *) parent_restrictlist,
+ nappinfos, appinfos);
+ pfree(appinfos);
+
+ child_joinrel = joinrel->part_rels[cnt_parts];
+ if (!child_joinrel)
+ {
+ child_joinrel = build_child_join_rel(root, child_rel1, child_rel2,
+ joinrel, child_restrictlist,
+ child_sjinfo,
+ child_sjinfo->jointype);
+ joinrel->part_rels[cnt_parts] = child_joinrel;
+ }
+
+ Assert(bms_equal(child_joinrel->relids, child_joinrelids));
+
+ populate_joinrel_with_paths(root, child_rel1, child_rel2,
+ child_joinrel, child_sjinfo,
+ child_restrictlist);
+ }
+}
+
+/*
+ * Returns true if there exists an equi-join condition for each pair of
+ * partition keys from given relations being joined.
+ */
+bool
+have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype,
+ List *restrictlist)
+{
+ PartitionScheme part_scheme = rel1->part_scheme;
+ ListCell *lc;
+ int cnt_pks;
+ bool pk_has_clause[PARTITION_MAX_KEYS];
+ bool strict_op;
+
+ /*
+ * This function should be called when the joining relations have same
+ * partitioning scheme.
+ */
+ Assert(rel1->part_scheme == rel2->part_scheme);
+ Assert(part_scheme);
+
+ memset(pk_has_clause, 0, sizeof(pk_has_clause));
+ foreach(lc, restrictlist)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+ OpExpr *opexpr;
+ Expr *expr1;
+ Expr *expr2;
+ int ipk1;
+ int ipk2;
+
+ /* If processing an outer join, only use its own join clauses. */
+ if (IS_OUTER_JOIN(jointype) && rinfo->is_pushed_down)
+ continue;
+
+ /* Skip clauses which can not be used for a join. */
+ if (!rinfo->can_join)
+ continue;
+
+ /* Skip clauses which are not equality conditions. */
+ if (!rinfo->mergeopfamilies)
+ continue;
+
+ opexpr = (OpExpr *) rinfo->clause;
+ Assert(is_opclause(opexpr));
+
+ /*
+ * The equi-join between partition keys is strict if equi-join between
+ * at least one partition key is using a strict operator. See
+ * explanation about outer join reordering identity 3 in
+ * optimizer/README
+ */
+ strict_op = op_strict(opexpr->opno);
+
+ /* Match the operands to the relation. */
+ if (bms_is_subset(rinfo->left_relids, rel1->relids) &&
+ bms_is_subset(rinfo->right_relids, rel2->relids))
+ {
+ expr1 = linitial(opexpr->args);
+ expr2 = lsecond(opexpr->args);
+ }
+ else if (bms_is_subset(rinfo->left_relids, rel2->relids) &&
+ bms_is_subset(rinfo->right_relids, rel1->relids))
+ {
+ expr1 = lsecond(opexpr->args);
+ expr2 = linitial(opexpr->args);
+ }
+ else
+ continue;
+
+ /*
+ * Only clauses referencing the partition keys are useful for
+ * partition-wise join.
+ */
+ ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op);
+ if (ipk1 < 0)
+ continue;
+ ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op);
+ if (ipk2 < 0)
+ continue;
+
+ /*
+ * If the clause refers to keys at different ordinal positions, it can
+ * not be used for partition-wise join.
+ */
+ if (ipk1 != ipk2)
+ continue;
+
+ /*
+ * The clause allows partition-wise join if only it uses the same
+ * operator family as that specified by the partition key.
+ */
+ if (!list_member_oid(rinfo->mergeopfamilies,
+ part_scheme->partopfamily[ipk1]))
+ continue;
+
+ /* Mark the partition key as having an equi-join clause. */
+ pk_has_clause[ipk1] = true;
+ }
+
+ /* Check whether every partition key has an equi-join condition. */
+ for (cnt_pks = 0; cnt_pks < part_scheme->partnatts; cnt_pks++)
+ {
+ if (!pk_has_clause[cnt_pks])
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Find the partition key from the given relation matching the given
+ * expression. If found, return the index of the partition key, else return -1.
+ */
+static int
+match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
+{
+ int cnt;
+
+ /* This function should be called only for partitioned relations. */
+ Assert(rel->part_scheme);
+
+ /* Remove any relabel decorations. */
+ while (IsA(expr, RelabelType))
+ expr = (Expr *) (castNode(RelabelType, expr))->arg;
+
+ for (cnt = 0; cnt < rel->part_scheme->partnatts; cnt++)
+ {
+ ListCell *lc;
+
+ Assert(rel->partexprs);
+ foreach(lc, rel->partexprs[cnt])
+ {
+ if (equal(lfirst(lc), expr))
+ return cnt;
+ }
+
+ if (!strict_op)
+ continue;
+
+ /*
+ * If it's a strict equi-join a NULL partition key on one side will
+ * not join a NULL partition key on the other side. So, rows with NULL
+ * partition key from a partition on one side can not join with those
+ * from a non-matching partition on the other side. So, search the
+ * nullable partition keys as well.
+ */
+ Assert(rel->nullable_partexprs);
+ foreach(lc, rel->nullable_partexprs[cnt])
+ {
+ if (equal(lfirst(lc), expr))
+ return cnt;
+ }
+ }
+
+ return -1;
+}
static EquivalenceMember *find_ec_member_for_tle(EquivalenceClass *ec,
TargetEntry *tle,
Relids relids);
-static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys);
+static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
+ Relids relids);
static Sort *make_sort_from_groupcols(List *groupcls,
AttrNumber *grpColIdx,
Plan *lefttree);
subplan = create_plan_recurse(root, best_path->subpath,
flags | CP_SMALL_TLIST);
- plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys);
+ plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys, NULL);
copy_generic_path_info(&plan->plan, (Path *) best_path);
ListCell *lc;
ListCell *lop;
ListCell *lip;
+ Path *outer_path = best_path->jpath.outerjoinpath;
+ Path *inner_path = best_path->jpath.innerjoinpath;
/*
* MergeJoin can project, so we don't have to demand exact tlists from the
*/
if (best_path->outersortkeys)
{
+ Relids outer_relids = outer_path->parent->relids;
Sort *sort = make_sort_from_pathkeys(outer_plan,
- best_path->outersortkeys);
+ best_path->outersortkeys,
+ outer_relids);
label_sort_with_costsize(root, sort, -1.0);
outer_plan = (Plan *) sort;
if (best_path->innersortkeys)
{
+ Relids inner_relids = inner_path->parent->relids;
Sort *sort = make_sort_from_pathkeys(inner_plan,
- best_path->innersortkeys);
+ best_path->innersortkeys,
+ inner_relids);
label_sort_with_costsize(root, sort, -1.0);
inner_plan = (Plan *) sort;
* the output parameters *p_numsortkeys etc.
*
* When looking for matches to an EquivalenceClass's members, we will only
- * consider child EC members if they match 'relids'. This protects against
- * possible incorrect matches to child expressions that contain no Vars.
+ * consider child EC members if they belong to given 'relids'. This protects
+ * against possible incorrect matches to child expressions that contain no
+ * Vars.
*
* If reqColIdx isn't NULL then it contains sort key column numbers that
* we should match. This is used when making child plans for a MergeAppend;
continue;
/*
- * Ignore child members unless they match the rel being
+ * Ignore child members unless they belong to the rel being
* sorted.
*/
if (em->em_is_child &&
- !bms_equal(em->em_relids, relids))
+ !bms_is_subset(em->em_relids, relids))
continue;
sortexpr = em->em_expr;
* find_ec_member_for_tle
* Locate an EquivalenceClass member matching the given TLE, if any
*
- * Child EC members are ignored unless they match 'relids'.
+ * Child EC members are ignored unless they belong to given 'relids'.
*/
static EquivalenceMember *
find_ec_member_for_tle(EquivalenceClass *ec,
continue;
/*
- * Ignore child members unless they match the rel being sorted.
+ * Ignore child members unless they belong to the rel being sorted.
*/
if (em->em_is_child &&
- !bms_equal(em->em_relids, relids))
+ !bms_is_subset(em->em_relids, relids))
continue;
/* Match if same expression (after stripping relabel) */
*
* 'lefttree' is the node which yields input tuples
* 'pathkeys' is the list of pathkeys by which the result is to be sorted
+ * 'relids' is the set of relations required by prepare_sort_from_pathkeys()
*/
static Sort *
-make_sort_from_pathkeys(Plan *lefttree, List *pathkeys)
+make_sort_from_pathkeys(Plan *lefttree, List *pathkeys, Relids relids)
{
int numsortkeys;
AttrNumber *sortColIdx;
/* Compute sort column info, and adjust lefttree as needed */
lefttree = prepare_sort_from_pathkeys(lefttree, pathkeys,
- NULL,
+ relids,
NULL,
false,
&numsortkeys,
return result;
}
+
+/*
+ * get_partitioned_child_rels_for_join
+ * Build and return a list containing the RTI of every partitioned
+ * relation which is a child of some rel included in the join.
+ */
+List *
+get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
+{
+ List *result = NIL;
+ ListCell *l;
+
+ foreach(l, root->pcinfo_list)
+ {
+ PartitionedChildRelInfo *pc = lfirst(l);
+
+ if (bms_is_member(pc->parent_relid, join_relids))
+ result = list_concat(result, list_copy(pc->child_rels));
+ }
+
+ return result;
+}
int num_vars; /* number of plain Var tlist entries */
bool has_ph_vars; /* are there PlaceHolderVar entries? */
bool has_non_vars; /* are there other entries? */
+ bool has_conv_whole_rows; /* are there ConvertRowtypeExpr
+ * entries encapsulating a whole-row
+ * Var? */
tlist_vinfo vars[FLEXIBLE_ARRAY_MEMBER]; /* has num_vars entries */
} indexed_tlist;
int rtoffset);
static bool extract_query_dependencies_walker(Node *node,
PlannerInfo *context);
+static bool is_converted_whole_row_reference(Node *node);
/*****************************************************************************
*
itlist->tlist = tlist;
itlist->has_ph_vars = false;
itlist->has_non_vars = false;
+ itlist->has_conv_whole_rows = false;
/* Find the Vars and fill in the index array */
vinfo = itlist->vars;
}
else if (tle->expr && IsA(tle->expr, PlaceHolderVar))
itlist->has_ph_vars = true;
+ else if (is_converted_whole_row_reference((Node *) tle->expr))
+ itlist->has_conv_whole_rows = true;
else
itlist->has_non_vars = true;
}
* This is like build_tlist_index, but we only index tlist entries that
* are Vars belonging to some rel other than the one specified. We will set
* has_ph_vars (allowing PlaceHolderVars to be matched), but not has_non_vars
- * (so nothing other than Vars and PlaceHolderVars can be matched).
+ * (so nothing other than Vars and PlaceHolderVars can be matched). In case of
+ * DML, where this function will be used, returning lists from child relations
+ * will be appended similar to a simple append relation. That does not require
+ * fixing ConvertRowtypeExpr references. So, those are not considered here.
*/
static indexed_tlist *
build_tlist_index_other_vars(List *tlist, Index ignore_rel)
itlist->tlist = tlist;
itlist->has_ph_vars = false;
itlist->has_non_vars = false;
+ itlist->has_conv_whole_rows = false;
/* Find the desired Vars and fill in the index array */
vinfo = itlist->vars;
fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
{
Var *newvar;
+ bool converted_whole_row;
if (node == NULL)
return NULL;
}
if (IsA(node, Param))
return fix_param_node(context->root, (Param *) node);
+
/* Try matching more complex expressions too, if tlists have any */
- if (context->outer_itlist && context->outer_itlist->has_non_vars)
+ converted_whole_row = is_converted_whole_row_reference(node);
+ if (context->outer_itlist &&
+ (context->outer_itlist->has_non_vars ||
+ (context->outer_itlist->has_conv_whole_rows && converted_whole_row)))
{
newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->outer_itlist,
if (newvar)
return (Node *) newvar;
}
- if (context->inner_itlist && context->inner_itlist->has_non_vars)
+ if (context->inner_itlist &&
+ (context->inner_itlist->has_non_vars ||
+ (context->inner_itlist->has_conv_whole_rows && converted_whole_row)))
{
newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->inner_itlist,
/* If no match, just fall through to process it normally */
}
/* Try matching more complex expressions too, if tlist has any */
- if (context->subplan_itlist->has_non_vars)
+ if (context->subplan_itlist->has_non_vars ||
+ (context->subplan_itlist->has_conv_whole_rows &&
+ is_converted_whole_row_reference(node)))
{
newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->subplan_itlist,
return expression_tree_walker(node, extract_query_dependencies_walker,
(void *) context);
}
+
+/*
+ * is_converted_whole_row_reference
+ * If the given node is a ConvertRowtypeExpr encapsulating a whole-row
+ * reference as implicit cast, return true. Otherwise return false.
+ */
+static bool
+is_converted_whole_row_reference(Node *node)
+{
+ ConvertRowtypeExpr *convexpr;
+
+ if (!node || !IsA(node, ConvertRowtypeExpr))
+ return false;
+
+ /* Traverse nested ConvertRowtypeExpr's. */
+ convexpr = castNode(ConvertRowtypeExpr, node);
+ while (convexpr->convertformat == COERCE_IMPLICIT_CAST &&
+ IsA(convexpr->arg, ConvertRowtypeExpr))
+ convexpr = castNode(ConvertRowtypeExpr, convexpr->arg);
+
+ if (IsA(convexpr->arg, Var))
+ {
+ Var *var = castNode(Var, convexpr->arg);
+
+ if (var->varattno == 0)
+ return true;
+ }
+
+ return false;
+}
return relids;
}
+/*
+ * Replace any relid present in top_parent_relids with its child in
+ * child_relids. Members of child_relids can be multiple levels below top
+ * parent in the partition hierarchy.
+ */
+Relids
+adjust_child_relids_multilevel(PlannerInfo *root, Relids relids,
+ Relids child_relids, Relids top_parent_relids)
+{
+ AppendRelInfo **appinfos;
+ int nappinfos;
+ Relids parent_relids = NULL;
+ Relids result;
+ Relids tmp_result = NULL;
+ int cnt;
+
+ /*
+ * If the given relids set doesn't contain any of the top parent relids,
+ * it will remain unchanged.
+ */
+ if (!bms_overlap(relids, top_parent_relids))
+ return relids;
+
+ appinfos = find_appinfos_by_relids(root, child_relids, &nappinfos);
+
+ /* Construct relids set for the immediate parent of the given child. */
+ for (cnt = 0; cnt < nappinfos; cnt++)
+ {
+ AppendRelInfo *appinfo = appinfos[cnt];
+
+ parent_relids = bms_add_member(parent_relids, appinfo->parent_relid);
+ }
+
+ /* Recurse if immediate parent is not the top parent. */
+ if (!bms_equal(parent_relids, top_parent_relids))
+ {
+ tmp_result = adjust_child_relids_multilevel(root, relids,
+ parent_relids,
+ top_parent_relids);
+ relids = tmp_result;
+ }
+
+ result = adjust_child_relids(relids, nappinfos, appinfos);
+
+ /* Free memory consumed by any intermediate result. */
+ if (tmp_result)
+ bms_free(tmp_result);
+ bms_free(parent_relids);
+ pfree(appinfos);
+
+ return result;
+}
+
/*
* Adjust the targetlist entries of an inherited UPDATE operation
*
return node;
}
+/*
+ * Construct the SpecialJoinInfo for a child-join by translating
+ * SpecialJoinInfo for the join between parents. left_relids and right_relids
+ * are the relids of left and right side of the join respectively.
+ */
+SpecialJoinInfo *
+build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo,
+ Relids left_relids, Relids right_relids)
+{
+ SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
+ AppendRelInfo **left_appinfos;
+ int left_nappinfos;
+ AppendRelInfo **right_appinfos;
+ int right_nappinfos;
+
+ memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo));
+ left_appinfos = find_appinfos_by_relids(root, left_relids,
+ &left_nappinfos);
+ right_appinfos = find_appinfos_by_relids(root, right_relids,
+ &right_nappinfos);
+
+ sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand,
+ left_nappinfos, left_appinfos);
+ sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand,
+ right_nappinfos,
+ right_appinfos);
+ sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand,
+ left_nappinfos, left_appinfos);
+ sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand,
+ right_nappinfos,
+ right_appinfos);
+ sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root,
+ (Node *) sjinfo->semi_rhs_exprs,
+ right_nappinfos,
+ right_appinfos);
+
+ pfree(left_appinfos);
+ pfree(right_appinfos);
+
+ return sjinfo;
+}
+
/*
* find_appinfos_by_relids
* Find AppendRelInfo structures for all relations specified by relids.
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
+#include "nodes/extensible.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
+#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
+#include "optimizer/tlist.h"
#include "optimizer/var.h"
#include "parser/parsetree.h"
+#include "foreign/fdwapi.h"
#include "utils/lsyscache.h"
+#include "utils/memutils.h"
#include "utils/selfuncs.h"
#define STD_FUZZ_FACTOR 1.01
static List *translate_sub_tlist(List *tlist, int relid);
+static List *reparameterize_pathlist_by_child(PlannerInfo *root,
+ List *pathlist,
+ RelOptInfo *child_rel);
/*****************************************************************************
}
return NULL;
}
+
+/*
+ * reparameterize_path_by_child
+ * Given a path parameterized by the parent of the given child relation,
+ * translate the path to be parameterized by the given child relation.
+ *
+ * The function creates a new path of the same type as the given path, but
+ * parameterized by the given child relation. Most fields from the original
+ * path can simply be flat-copied, but any expressions must be adjusted to
+ * refer to the correct varnos, and any paths must be recursively
+ * reparameterized. Other fields that refer to specific relids also need
+ * adjustment.
+ *
+ * The cost, number of rows, width and parallel path properties depend upon
+ * path->parent, which does not change during the translation. Hence those
+ * members are copied as they are.
+ *
+ * If the given path can not be reparameterized, the function returns NULL.
+ */
+Path *
+reparameterize_path_by_child(PlannerInfo *root, Path *path,
+ RelOptInfo *child_rel)
+{
+
+#define FLAT_COPY_PATH(newnode, node, nodetype) \
+ ( (newnode) = makeNode(nodetype), \
+ memcpy((newnode), (node), sizeof(nodetype)) )
+
+#define ADJUST_CHILD_ATTRS(node) \
+ ((node) = \
+ (List *) adjust_appendrel_attrs_multilevel(root, (Node *) (node), \
+ child_rel->relids, \
+ child_rel->top_parent_relids))
+
+#define REPARAMETERIZE_CHILD_PATH(path) \
+do { \
+ (path) = reparameterize_path_by_child(root, (path), child_rel); \
+ if ((path) == NULL) \
+ return NULL; \
+} while(0);
+
+#define REPARAMETERIZE_CHILD_PATH_LIST(pathlist) \
+do { \
+ if ((pathlist) != NIL) \
+ { \
+ (pathlist) = reparameterize_pathlist_by_child(root, (pathlist), \
+ child_rel); \
+ if ((pathlist) == NIL) \
+ return NULL; \
+ } \
+} while(0);
+
+ Path *new_path;
+ ParamPathInfo *new_ppi;
+ ParamPathInfo *old_ppi;
+ Relids required_outer;
+
+ /*
+ * If the path is not parameterized by parent of the given relation, it
+ * doesn't need reparameterization.
+ */
+ if (!path->param_info ||
+ !bms_overlap(PATH_REQ_OUTER(path), child_rel->top_parent_relids))
+ return path;
+
+ /* Reparameterize a copy of given path. */
+ switch (nodeTag(path))
+ {
+ case T_Path:
+ FLAT_COPY_PATH(new_path, path, Path);
+ break;
+
+ case T_IndexPath:
+ {
+ IndexPath *ipath;
+
+ FLAT_COPY_PATH(ipath, path, IndexPath);
+ ADJUST_CHILD_ATTRS(ipath->indexclauses);
+ ADJUST_CHILD_ATTRS(ipath->indexquals);
+ new_path = (Path *) ipath;
+ }
+ break;
+
+ case T_BitmapHeapPath:
+ {
+ BitmapHeapPath *bhpath;
+
+ FLAT_COPY_PATH(bhpath, path, BitmapHeapPath);
+ REPARAMETERIZE_CHILD_PATH(bhpath->bitmapqual);
+ new_path = (Path *) bhpath;
+ }
+ break;
+
+ case T_BitmapAndPath:
+ {
+ BitmapAndPath *bapath;
+
+ FLAT_COPY_PATH(bapath, path, BitmapAndPath);
+ REPARAMETERIZE_CHILD_PATH_LIST(bapath->bitmapquals);
+ new_path = (Path *) bapath;
+ }
+ break;
+
+ case T_BitmapOrPath:
+ {
+ BitmapOrPath *bopath;
+
+ FLAT_COPY_PATH(bopath, path, BitmapOrPath);
+ REPARAMETERIZE_CHILD_PATH_LIST(bopath->bitmapquals);
+ new_path = (Path *) bopath;
+ }
+ break;
+
+ case T_TidPath:
+ {
+ TidPath *tpath;
+
+ /*
+ * TidPath contains tidquals, which do not contain any
+ * external parameters per create_tidscan_path(). So don't
+ * bother to translate those.
+ */
+ FLAT_COPY_PATH(tpath, path, TidPath);
+ new_path = (Path *) tpath;
+ }
+ break;
+
+ case T_ForeignPath:
+ {
+ ForeignPath *fpath;
+ ReparameterizeForeignPathByChild_function rfpc_func;
+
+ FLAT_COPY_PATH(fpath, path, ForeignPath);
+ if (fpath->fdw_outerpath)
+ REPARAMETERIZE_CHILD_PATH(fpath->fdw_outerpath);
+
+ /* Hand over to FDW if needed. */
+ rfpc_func =
+ path->parent->fdwroutine->ReparameterizeForeignPathByChild;
+ if (rfpc_func)
+ fpath->fdw_private = rfpc_func(root, fpath->fdw_private,
+ child_rel);
+ new_path = (Path *) fpath;
+ }
+ break;
+
+ case T_CustomPath:
+ {
+ CustomPath *cpath;
+
+ FLAT_COPY_PATH(cpath, path, CustomPath);
+ REPARAMETERIZE_CHILD_PATH_LIST(cpath->custom_paths);
+ if (cpath->methods &&
+ cpath->methods->ReparameterizeCustomPathByChild)
+ cpath->custom_private =
+ cpath->methods->ReparameterizeCustomPathByChild(root,
+ cpath->custom_private,
+ child_rel);
+ new_path = (Path *) cpath;
+ }
+ break;
+
+ case T_NestPath:
+ {
+ JoinPath *jpath;
+
+ FLAT_COPY_PATH(jpath, path, NestPath);
+
+ REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
+ REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
+ ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
+ new_path = (Path *) jpath;
+ }
+ break;
+
+ case T_MergePath:
+ {
+ JoinPath *jpath;
+ MergePath *mpath;
+
+ FLAT_COPY_PATH(mpath, path, MergePath);
+
+ jpath = (JoinPath *) mpath;
+ REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
+ REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
+ ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
+ ADJUST_CHILD_ATTRS(mpath->path_mergeclauses);
+ new_path = (Path *) mpath;
+ }
+ break;
+
+ case T_HashPath:
+ {
+ JoinPath *jpath;
+ HashPath *hpath;
+
+ FLAT_COPY_PATH(hpath, path, HashPath);
+
+ jpath = (JoinPath *) hpath;
+ REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
+ REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
+ ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
+ ADJUST_CHILD_ATTRS(hpath->path_hashclauses);
+ new_path = (Path *) hpath;
+ }
+ break;
+
+ case T_AppendPath:
+ {
+ AppendPath *apath;
+
+ FLAT_COPY_PATH(apath, path, AppendPath);
+ REPARAMETERIZE_CHILD_PATH_LIST(apath->subpaths);
+ new_path = (Path *) apath;
+ }
+ break;
+
+ case T_MergeAppend:
+ {
+ MergeAppendPath *mapath;
+
+ FLAT_COPY_PATH(mapath, path, MergeAppendPath);
+ REPARAMETERIZE_CHILD_PATH_LIST(mapath->subpaths);
+ new_path = (Path *) mapath;
+ }
+ break;
+
+ case T_MaterialPath:
+ {
+ MaterialPath *mpath;
+
+ FLAT_COPY_PATH(mpath, path, MaterialPath);
+ REPARAMETERIZE_CHILD_PATH(mpath->subpath);
+ new_path = (Path *) mpath;
+ }
+ break;
+
+ case T_UniquePath:
+ {
+ UniquePath *upath;
+
+ FLAT_COPY_PATH(upath, path, UniquePath);
+ REPARAMETERIZE_CHILD_PATH(upath->subpath);
+ ADJUST_CHILD_ATTRS(upath->uniq_exprs);
+ new_path = (Path *) upath;
+ }
+ break;
+
+ case T_GatherPath:
+ {
+ GatherPath *gpath;
+
+ FLAT_COPY_PATH(gpath, path, GatherPath);
+ REPARAMETERIZE_CHILD_PATH(gpath->subpath);
+ new_path = (Path *) gpath;
+ }
+ break;
+
+ case T_GatherMergePath:
+ {
+ GatherMergePath *gmpath;
+
+ FLAT_COPY_PATH(gmpath, path, GatherMergePath);
+ REPARAMETERIZE_CHILD_PATH(gmpath->subpath);
+ new_path = (Path *) gmpath;
+ }
+ break;
+
+ default:
+
+ /* We don't know how to reparameterize this path. */
+ return NULL;
+ }
+
+ /*
+ * Adjust the parameterization information, which refers to the topmost
+ * parent. The topmost parent can be multiple levels away from the given
+ * child, hence use multi-level expression adjustment routines.
+ */
+ old_ppi = new_path->param_info;
+ required_outer =
+ adjust_child_relids_multilevel(root, old_ppi->ppi_req_outer,
+ child_rel->relids,
+ child_rel->top_parent_relids);
+
+ /* If we already have a PPI for this parameterization, just return it */
+ new_ppi = find_param_path_info(new_path->parent, required_outer);
+
+ /*
+ * If not, build a new one and link it to the list of PPIs. For the same
+ * reason as explained in mark_dummy_rel(), allocate new PPI in the same
+ * context the given RelOptInfo is in.
+ */
+ if (new_ppi == NULL)
+ {
+ MemoryContext oldcontext;
+ RelOptInfo *rel = path->parent;
+
+ oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
+
+ new_ppi = makeNode(ParamPathInfo);
+ new_ppi->ppi_req_outer = bms_copy(required_outer);
+ new_ppi->ppi_rows = old_ppi->ppi_rows;
+ new_ppi->ppi_clauses = old_ppi->ppi_clauses;
+ ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses);
+ rel->ppilist = lappend(rel->ppilist, new_ppi);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+ bms_free(required_outer);
+
+ new_path->param_info = new_ppi;
+
+ /*
+ * Adjust the path target if the parent of the outer relation is
+ * referenced in the targetlist. This can happen when only the parent of
+ * outer relation is laterally referenced in this relation.
+ */
+ if (bms_overlap(path->parent->lateral_relids,
+ child_rel->top_parent_relids))
+ {
+ new_path->pathtarget = copy_pathtarget(new_path->pathtarget);
+ ADJUST_CHILD_ATTRS(new_path->pathtarget->exprs);
+ }
+
+ return new_path;
+}
+
+/*
+ * reparameterize_pathlist_by_child
+ * Helper function to reparameterize a list of paths by given child rel.
+ */
+static List *
+reparameterize_pathlist_by_child(PlannerInfo *root,
+ List *pathlist,
+ RelOptInfo *child_rel)
+{
+ ListCell *lc;
+ List *result = NIL;
+
+ foreach(lc, pathlist)
+ {
+ Path *path = reparameterize_path_by_child(root, lfirst(lc),
+ child_rel);
+ if (path == NULL)
+ {
+ list_free(result);
+ return NIL;
+ }
+
+ result = lappend(result, path);
+ }
+
+ return result;
+}
#include "optimizer/pathnode.h"
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
+#include "optimizer/prep.h"
#include "optimizer/var.h"
#include "utils/lsyscache.h"
Relids relids = joinrel->relids;
ListCell *lc;
+ /* This function is called only on the parent relations. */
+ Assert(!IS_OTHER_REL(joinrel) && !IS_OTHER_REL(outer_rel) &&
+ !IS_OTHER_REL(inner_rel));
+
foreach(lc, root->placeholder_list)
{
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
}
}
}
+
+/*
+ * add_placeholders_to_child_joinrel
+ * Translate the PHVs in parent's targetlist and add them to the child's
+ * targetlist. Also adjust the cost
+ */
+void
+add_placeholders_to_child_joinrel(PlannerInfo *root, RelOptInfo *childrel,
+ RelOptInfo *parentrel)
+{
+ ListCell *lc;
+ AppendRelInfo **appinfos;
+ int nappinfos;
+
+ Assert(IS_JOIN_REL(childrel) && IS_JOIN_REL(parentrel));
+ Assert(IS_OTHER_REL(childrel));
+
+ /* Nothing to do if no PHVs. */
+ if (root->placeholder_list == NIL)
+ return;
+
+ appinfos = find_appinfos_by_relids(root, childrel->relids, &nappinfos);
+ foreach(lc, parentrel->reltarget->exprs)
+ {
+ PlaceHolderVar *phv = lfirst(lc);
+
+ if (IsA(phv, PlaceHolderVar))
+ {
+ /*
+ * In case the placeholder Var refers to any of the parent
+ * relations, translate it to refer to the corresponding child.
+ */
+ if (bms_overlap(phv->phrels, parentrel->relids) &&
+ childrel->reloptkind == RELOPT_OTHER_JOINREL)
+ {
+ phv = (PlaceHolderVar *) adjust_appendrel_attrs(root,
+ (Node *) phv,
+ nappinfos,
+ appinfos);
+ }
+
+ childrel->reltarget->exprs = lappend(childrel->reltarget->exprs,
+ phv);
+ }
+ }
+
+ /* Adjust the cost and width of child targetlist. */
+ childrel->reltarget->cost.startup = parentrel->reltarget->cost.startup;
+ childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple;
+ childrel->reltarget->width = parentrel->reltarget->width;
+
+ pfree(appinfos);
+}
static void set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
Relation relation);
static PartitionScheme find_partition_scheme(PlannerInfo *root, Relation rel);
-static List **build_baserel_partition_key_exprs(Relation relation, Index varno);
+static void set_baserel_partition_key_exprs(Relation relation,
+ RelOptInfo *rel);
/*
* get_relation_info -
Assert(partdesc != NULL && rel->part_scheme != NULL);
rel->boundinfo = partdesc->boundinfo;
rel->nparts = partdesc->nparts;
- rel->partexprs = build_baserel_partition_key_exprs(relation, rel->relid);
+ set_baserel_partition_key_exprs(relation, rel);
}
/*
}
/*
- * build_baserel_partition_key_exprs
+ * set_baserel_partition_key_exprs
*
- * Collects partition key expressions for a given base relation. Any single
- * column partition keys are converted to Var nodes. All Var nodes are set
- * to the given varno. The partition key expressions are returned as an array
- * of single element lists to be stored in RelOptInfo of the base relation.
+ * Builds partition key expressions for the given base relation and sets them
+ * in given RelOptInfo. Any single column partition keys are converted to Var
+ * nodes. All Var nodes are restamped with the relid of given relation.
*/
-static List **
-build_baserel_partition_key_exprs(Relation relation, Index varno)
+static void
+set_baserel_partition_key_exprs(Relation relation,
+ RelOptInfo *rel)
{
PartitionKey partkey = RelationGetPartitionKey(relation);
int partnatts;
int cnt;
List **partexprs;
ListCell *lc;
+ Index varno = rel->relid;
+
+ Assert(IS_SIMPLE_REL(rel) && rel->relid > 0);
/* A partitioned table should have a partition key. */
Assert(partkey != NULL);
partexprs[cnt] = list_make1(partexpr);
}
- return partexprs;
+ rel->partexprs = partexprs;
+
+ /*
+ * A base relation can not have nullable partition key expressions. We
+ * still allocate array of empty expressions lists to keep partition key
+ * expression handling code simple. See build_joinrel_partition_info() and
+ * match_expr_to_partition_keys().
+ */
+ rel->nullable_partexprs = (List **) palloc0(sizeof(List *) * partnatts);
}
#include <limits.h>
#include "miscadmin.h"
+#include "catalog/partition.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/placeholder.h"
#include "optimizer/plancat.h"
+#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
#include "utils/hsearch.h"
static void set_foreign_rel_properties(RelOptInfo *joinrel,
RelOptInfo *outer_rel, RelOptInfo *inner_rel);
static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel);
+static void build_joinrel_partition_info(RelOptInfo *joinrel,
+ RelOptInfo *outer_rel, RelOptInfo *inner_rel,
+ List *restrictlist, JoinType jointype);
/*
rel->boundinfo = NULL;
rel->part_rels = NULL;
rel->partexprs = NULL;
+ rel->nullable_partexprs = NULL;
/*
* Pass top parent's relids down the inheritance hierarchy. If the parent
RelOptInfo *joinrel;
List *restrictlist;
+ /* This function should be used only for join between parents. */
+ Assert(!IS_OTHER_REL(outer_rel) && !IS_OTHER_REL(inner_rel));
+
/*
* See if we already have a joinrel for this set of base rels.
*/
joinrel->boundinfo = NULL;
joinrel->part_rels = NULL;
joinrel->partexprs = NULL;
+ joinrel->nullable_partexprs = NULL;
/* Compute information relevant to the foreign relations. */
set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
*/
joinrel->has_eclass_joins = has_relevant_eclass_joinclause(root, joinrel);
+ /* Store the partition information. */
+ build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist,
+ sjinfo->jointype);
+
/*
* Set estimates of the joinrel's size.
*/
return joinrel;
}
+/*
+ * build_child_join_rel
+ * Builds RelOptInfo representing join between given two child relations.
+ *
+ * 'outer_rel' and 'inner_rel' are the RelOptInfos of child relations being
+ * joined
+ * 'parent_joinrel' is the RelOptInfo representing the join between parent
+ * relations. Some of the members of new RelOptInfo are produced by
+ * translating corresponding members of this RelOptInfo
+ * 'sjinfo': child-join context info
+ * 'restrictlist': list of RestrictInfo nodes that apply to this particular
+ * pair of joinable relations
+ * 'join_appinfos': list of AppendRelInfo nodes for base child relations
+ * involved in this join
+ */
+RelOptInfo *
+build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel, RelOptInfo *parent_joinrel,
+ List *restrictlist, SpecialJoinInfo *sjinfo,
+ JoinType jointype)
+{
+ RelOptInfo *joinrel = makeNode(RelOptInfo);
+ AppendRelInfo **appinfos;
+ int nappinfos;
+
+ /* Only joins between "other" relations land here. */
+ Assert(IS_OTHER_REL(outer_rel) && IS_OTHER_REL(inner_rel));
+
+ joinrel->reloptkind = RELOPT_OTHER_JOINREL;
+ joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids);
+ joinrel->rows = 0;
+ /* cheap startup cost is interesting iff not all tuples to be retrieved */
+ joinrel->consider_startup = (root->tuple_fraction > 0);
+ joinrel->consider_param_startup = false;
+ joinrel->consider_parallel = false;
+ joinrel->reltarget = create_empty_pathtarget();
+ joinrel->pathlist = NIL;
+ joinrel->ppilist = NIL;
+ joinrel->partial_pathlist = NIL;
+ joinrel->cheapest_startup_path = NULL;
+ joinrel->cheapest_total_path = NULL;
+ joinrel->cheapest_unique_path = NULL;
+ joinrel->cheapest_parameterized_paths = NIL;
+ joinrel->direct_lateral_relids = NULL;
+ joinrel->lateral_relids = NULL;
+ joinrel->relid = 0; /* indicates not a baserel */
+ joinrel->rtekind = RTE_JOIN;
+ joinrel->min_attr = 0;
+ joinrel->max_attr = 0;
+ joinrel->attr_needed = NULL;
+ joinrel->attr_widths = NULL;
+ joinrel->lateral_vars = NIL;
+ joinrel->lateral_referencers = NULL;
+ joinrel->indexlist = NIL;
+ joinrel->pages = 0;
+ joinrel->tuples = 0;
+ joinrel->allvisfrac = 0;
+ joinrel->subroot = NULL;
+ joinrel->subplan_params = NIL;
+ joinrel->serverid = InvalidOid;
+ joinrel->userid = InvalidOid;
+ joinrel->useridiscurrent = false;
+ joinrel->fdwroutine = NULL;
+ joinrel->fdw_private = NULL;
+ joinrel->baserestrictinfo = NIL;
+ joinrel->baserestrictcost.startup = 0;
+ joinrel->baserestrictcost.per_tuple = 0;
+ joinrel->joininfo = NIL;
+ joinrel->has_eclass_joins = false;
+ joinrel->top_parent_relids = NULL;
+ joinrel->part_scheme = NULL;
+ joinrel->part_rels = NULL;
+ joinrel->partexprs = NULL;
+ joinrel->nullable_partexprs = NULL;
+
+ joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids,
+ inner_rel->top_parent_relids);
+
+ /* Compute information relevant to foreign relations. */
+ set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
+
+ /* Build targetlist */
+ build_joinrel_tlist(root, joinrel, outer_rel);
+ build_joinrel_tlist(root, joinrel, inner_rel);
+ /* Add placeholder variables. */
+ add_placeholders_to_child_joinrel(root, joinrel, parent_joinrel);
+
+ /* Construct joininfo list. */
+ appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos);
+ joinrel->joininfo = (List *) adjust_appendrel_attrs(root,
+ (Node *) parent_joinrel->joininfo,
+ nappinfos,
+ appinfos);
+ pfree(appinfos);
+
+ /*
+ * Lateral relids referred in child join will be same as that referred in
+ * the parent relation. Throw any partial result computed while building
+ * the targetlist.
+ */
+ bms_free(joinrel->direct_lateral_relids);
+ bms_free(joinrel->lateral_relids);
+ joinrel->direct_lateral_relids = (Relids) bms_copy(parent_joinrel->direct_lateral_relids);
+ joinrel->lateral_relids = (Relids) bms_copy(parent_joinrel->lateral_relids);
+
+ /*
+ * If the parent joinrel has pending equivalence classes, so does the
+ * child.
+ */
+ joinrel->has_eclass_joins = parent_joinrel->has_eclass_joins;
+
+ /* Is the join between partitions itself partitioned? */
+ build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist,
+ jointype);
+
+ /* Child joinrel is parallel safe if parent is parallel safe. */
+ joinrel->consider_parallel = parent_joinrel->consider_parallel;
+
+
+ /* Set estimates of the child-joinrel's size. */
+ set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
+ sjinfo, restrictlist);
+
+ /* We build the join only once. */
+ Assert(!find_join_rel(root, joinrel->relids));
+
+ /* Add the relation to the PlannerInfo. */
+ add_join_rel(root, joinrel);
+
+ return joinrel;
+}
+
/*
* min_join_parameterization
*
build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *input_rel)
{
- Relids relids = joinrel->relids;
+ Relids relids;
ListCell *vars;
+ /* attrs_needed refers to parent relids and not those of a child. */
+ if (joinrel->top_parent_relids)
+ relids = joinrel->top_parent_relids;
+ else
+ relids = joinrel->relids;
+
foreach(vars, input_rel->reltarget->exprs)
{
Var *var = (Var *) lfirst(vars);
continue;
/*
- * Otherwise, anything in a baserel or joinrel targetlist ought to be
- * a Var. (More general cases can only appear in appendrel child
- * rels, which will never be seen here.)
+ * Otherwise, anything in a baserel or joinrel targetlist ought to be a
+ * Var. Children of a partitioned table may have ConvertRowtypeExpr
+ * translating whole-row Var of a child to that of the parent. Children
+ * of an inherited table or subquery child rels can not directly
+ * participate in a join, so other kinds of nodes here.
*/
- if (!IsA(var, Var))
+ if (IsA(var, Var))
+ {
+ baserel = find_base_rel(root, var->varno);
+ ndx = var->varattno - baserel->min_attr;
+ }
+ else if (IsA(var, ConvertRowtypeExpr))
+ {
+ ConvertRowtypeExpr *child_expr = (ConvertRowtypeExpr *) var;
+ Var *childvar = (Var *) child_expr->arg;
+
+ /*
+ * Child's whole-row references are converted to look like those
+ * of parent using ConvertRowtypeExpr. There can be as many
+ * ConvertRowtypeExpr decorations as the depth of partition tree.
+ * The argument to the deepest ConvertRowtypeExpr is expected to
+ * be a whole-row reference of the child.
+ */
+ while (IsA(childvar, ConvertRowtypeExpr))
+ {
+ child_expr = (ConvertRowtypeExpr *) childvar;
+ childvar = (Var *) child_expr->arg;
+ }
+ Assert(IsA(childvar, Var) && childvar->varattno == 0);
+
+ baserel = find_base_rel(root, childvar->varno);
+ ndx = 0 - baserel->min_attr;
+ }
+ else
elog(ERROR, "unexpected node type in rel targetlist: %d",
(int) nodeTag(var));
- /* Get the Var's original base rel */
- baserel = find_base_rel(root, var->varno);
- /* Is it still needed above this joinrel? */
- ndx = var->varattno - baserel->min_attr;
+ /* Is the target expression still needed above this joinrel? */
if (bms_nonempty_difference(baserel->attr_needed[ndx], relids))
{
/* Yup, add it to the output */
joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, var);
- /* Vars have cost zero, so no need to adjust reltarget->cost */
+
+ /*
+ * Vars have cost zero, so no need to adjust reltarget->cost. Even
+ * if it's a ConvertRowtypeExpr, it will be computed only for the
+ * base relation, costing nothing for a join.
+ */
joinrel->reltarget->width += baserel->attr_widths[ndx];
}
}
{
ListCell *l;
+ /* Expected to be called only for join between parent relations. */
+ Assert(joinrel->reloptkind == RELOPT_JOINREL);
+
foreach(l, joininfo_list)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
return NULL;
}
+
+/*
+ * build_joinrel_partition_info
+ * If the two relations have same partitioning scheme, their join may be
+ * partitioned and will follow the same partitioning scheme as the joining
+ * relations. Set the partition scheme and partition key expressions in
+ * the join relation.
+ */
+static void
+build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel, List *restrictlist,
+ JoinType jointype)
+{
+ int partnatts;
+ int cnt;
+ PartitionScheme part_scheme;
+
+ /* Nothing to do if partition-wise join technique is disabled. */
+ if (!enable_partition_wise_join)
+ {
+ Assert(!IS_PARTITIONED_REL(joinrel));
+ return;
+ }
+
+ /*
+ * We can only consider this join as an input to further partition-wise
+ * joins if (a) the input relations are partitioned, (b) the partition
+ * schemes match, and (c) we can identify an equi-join between the
+ * partition keys. Note that if it were possible for
+ * have_partkey_equi_join to return different answers for the same joinrel
+ * depending on which join ordering we try first, this logic would break.
+ * That shouldn't happen, though, because of the way the query planner
+ * deduces implied equalities and reorders the joins. Please see
+ * optimizer/README for details.
+ */
+ if (!IS_PARTITIONED_REL(outer_rel) || !IS_PARTITIONED_REL(inner_rel) ||
+ outer_rel->part_scheme != inner_rel->part_scheme ||
+ !have_partkey_equi_join(outer_rel, inner_rel, jointype, restrictlist))
+ {
+ Assert(!IS_PARTITIONED_REL(joinrel));
+ return;
+ }
+
+ part_scheme = outer_rel->part_scheme;
+
+ Assert(REL_HAS_ALL_PART_PROPS(outer_rel) &&
+ REL_HAS_ALL_PART_PROPS(inner_rel));
+
+ /*
+ * For now, our partition matching algorithm can match partitions only
+ * when the partition bounds of the joining relations are exactly same.
+ * So, bail out otherwise.
+ */
+ if (outer_rel->nparts != inner_rel->nparts ||
+ !partition_bounds_equal(part_scheme->partnatts,
+ part_scheme->parttyplen,
+ part_scheme->parttypbyval,
+ outer_rel->boundinfo, inner_rel->boundinfo))
+ {
+ Assert(!IS_PARTITIONED_REL(joinrel));
+ return;
+ }
+
+ /*
+ * This function will be called only once for each joinrel, hence it
+ * should not have partition scheme, partition bounds, partition key
+ * expressions and array for storing child relations set.
+ */
+ Assert(!joinrel->part_scheme && !joinrel->partexprs &&
+ !joinrel->nullable_partexprs && !joinrel->part_rels &&
+ !joinrel->boundinfo);
+
+ /*
+ * Join relation is partitioned using the same partitioning scheme as the
+ * joining relations and has same bounds.
+ */
+ joinrel->part_scheme = part_scheme;
+ joinrel->boundinfo = outer_rel->boundinfo;
+ joinrel->nparts = outer_rel->nparts;
+ partnatts = joinrel->part_scheme->partnatts;
+ joinrel->partexprs = (List **) palloc0(sizeof(List *) * partnatts);
+ joinrel->nullable_partexprs =
+ (List **) palloc0(sizeof(List *) *partnatts);
+
+ /*
+ * Construct partition keys for the join.
+ *
+ * An INNER join between two partitioned relations can be regarded as
+ * partitioned by either key expression. For example, A INNER JOIN B ON A.a =
+ * B.b can be regarded as partitioned on A.a or on B.b; they are equivalent.
+ *
+ * For a SEMI or ANTI join, the result can only be regarded as being
+ * partitioned in the same manner as the outer side, since the inner columns
+ * are not retained.
+ *
+ * An OUTER join like (A LEFT JOIN B ON A.a = B.b) may produce rows with
+ * B.b NULL. These rows may not fit the partitioning conditions imposed on
+ * B.b. Hence, strictly speaking, the join is not partitioned by B.b and
+ * thus partition keys of an OUTER join should include partition key
+ * expressions from the OUTER side only. However, because all
+ * commonly-used comparison operators are strict, the presence of nulls on
+ * the outer side doesn't cause any problem; they can't match anything at
+ * future join levels anyway. Therefore, we track two sets of expressions:
+ * those that authentically partition the relation (partexprs) and those
+ * that partition the relation with the exception that extra nulls may be
+ * present (nullable_partexprs). When the comparison operator is strict,
+ * the latter is just as good as the former.
+ */
+ for (cnt = 0; cnt < partnatts; cnt++)
+ {
+ List *outer_expr;
+ List *outer_null_expr;
+ List *inner_expr;
+ List *inner_null_expr;
+ List *partexpr = NIL;
+ List *nullable_partexpr = NIL;
+
+ outer_expr = list_copy(outer_rel->partexprs[cnt]);
+ outer_null_expr = list_copy(outer_rel->nullable_partexprs[cnt]);
+ inner_expr = list_copy(inner_rel->partexprs[cnt]);
+ inner_null_expr = list_copy(inner_rel->nullable_partexprs[cnt]);
+
+ switch (jointype)
+ {
+ case JOIN_INNER:
+ partexpr = list_concat(outer_expr, inner_expr);
+ nullable_partexpr = list_concat(outer_null_expr,
+ inner_null_expr);
+ break;
+
+ case JOIN_SEMI:
+ case JOIN_ANTI:
+ partexpr = outer_expr;
+ nullable_partexpr = outer_null_expr;
+ break;
+
+ case JOIN_LEFT:
+ partexpr = outer_expr;
+ nullable_partexpr = list_concat(inner_expr,
+ outer_null_expr);
+ nullable_partexpr = list_concat(nullable_partexpr,
+ inner_null_expr);
+ break;
+
+ case JOIN_FULL:
+ nullable_partexpr = list_concat(outer_expr,
+ inner_expr);
+ nullable_partexpr = list_concat(nullable_partexpr,
+ outer_null_expr);
+ nullable_partexpr = list_concat(nullable_partexpr,
+ inner_null_expr);
+ break;
+
+ default:
+ elog(ERROR, "unrecognized join type: %d", (int) jointype);
+
+ }
+
+ joinrel->partexprs[cnt] = partexpr;
+ joinrel->nullable_partexprs[cnt] = nullable_partexpr;
+ }
+}
true,
NULL, NULL, NULL
},
+ {
+ {"enable_partition_wise_join", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enables partition-wise join."),
+ NULL
+ },
+ &enable_partition_wise_join,
+ false,
+ NULL, NULL, NULL
+ },
{
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
#enable_seqscan = on
#enable_sort = on
#enable_tidscan = on
+#enable_partition_wise_join = off
# - Planner Cost Constants -
typedef bool (*IsForeignScanParallelSafe_function) (PlannerInfo *root,
RelOptInfo *rel,
RangeTblEntry *rte);
+typedef List *(*ReparameterizeForeignPathByChild_function) (PlannerInfo *root,
+ List *fdw_private,
+ RelOptInfo *child_rel);
/*
* FdwRoutine is the struct returned by a foreign-data wrapper's handler
ReInitializeDSMForeignScan_function ReInitializeDSMForeignScan;
InitializeWorkerForeignScan_function InitializeWorkerForeignScan;
ShutdownForeignScan_function ShutdownForeignScan;
+
+ /* Support functions for path reparameterization. */
+ ReparameterizeForeignPathByChild_function ReparameterizeForeignPathByChild;
} FdwRoutine;
List *tlist,
List *clauses,
List *custom_plans);
+ struct List *(*ReparameterizeCustomPathByChild) (PlannerInfo *root,
+ List *custom_private,
+ RelOptInfo *child_rel);
} CustomPathMethods;
/*
* handling join alias Vars. Currently this is not needed because all join
* alias Vars are expanded to non-aliased form during preprocess_expression.
*
+ * We also have relations representing joins between child relations of
+ * different partitioned tables. These relations are not added to
+ * join_rel_level lists as they are not joined directly by the dynamic
+ * programming algorithm.
+ *
* There is also a RelOptKind for "upper" relations, which are RelOptInfos
* that describe post-scan/join processing steps, such as aggregation.
* Many of the fields in these RelOptInfos are meaningless, but their Path
* boundinfo - Partition bounds
* nparts - Number of partitions
* part_rels - RelOptInfos for each partition
- * partexprs - Partition key expressions
+ * partexprs, nullable_partexprs - Partition key expressions
*
* Note: A base relation always has only one set of partition keys, but a join
* relation may have as many sets of partition keys as the number of relations
- * being joined. partexprs is an array containing part_scheme->partnatts
- * elements, each of which is a list of partition key expressions. For a base
- * relation each list contains only one expression, but for a join relation
- * there can be one per baserel.
+ * being joined. partexprs and nullable_partexprs are arrays containing
+ * part_scheme->partnatts elements each. Each of these elements is a list of
+ * partition key expressions. For a base relation each list in partexprs
+ * contains only one expression and nullable_partexprs is not populated. For a
+ * join relation, partexprs and nullable_partexprs contain partition key
+ * expressions from non-nullable and nullable relations resp. Lists at any
+ * given position in those arrays together contain as many elements as the
+ * number of joining relations.
*----------
*/
typedef enum RelOptKind
RELOPT_BASEREL,
RELOPT_JOINREL,
RELOPT_OTHER_MEMBER_REL,
+ RELOPT_OTHER_JOINREL,
RELOPT_UPPER_REL,
RELOPT_DEADREL
} RelOptKind;
(rel)->reloptkind == RELOPT_OTHER_MEMBER_REL)
/* Is the given relation a join relation? */
-#define IS_JOIN_REL(rel) ((rel)->reloptkind == RELOPT_JOINREL)
+#define IS_JOIN_REL(rel) \
+ ((rel)->reloptkind == RELOPT_JOINREL || \
+ (rel)->reloptkind == RELOPT_OTHER_JOINREL)
/* Is the given relation an upper relation? */
#define IS_UPPER_REL(rel) ((rel)->reloptkind == RELOPT_UPPER_REL)
/* Is the given relation an "other" relation? */
-#define IS_OTHER_REL(rel) ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL)
+#define IS_OTHER_REL(rel) \
+ ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL || \
+ (rel)->reloptkind == RELOPT_OTHER_JOINREL)
typedef struct RelOptInfo
{
struct PartitionBoundInfoData *boundinfo; /* Partition bounds */
struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions,
* stored in the same order of bounds */
- List **partexprs; /* Partition key expressions. */
+ List **partexprs; /* Non-nullable partition key expressions. */
+ List **nullable_partexprs; /* Nullable partition key expressions. */
} RelOptInfo;
+/*
+ * Is given relation partitioned?
+ *
+ * A join between two partitioned relations with same partitioning scheme
+ * without any matching partitions will not have any partition in it but will
+ * have partition scheme set. So a relation is deemed to be partitioned if it
+ * has a partitioning scheme, bounds and positive number of partitions.
+ */
+#define IS_PARTITIONED_REL(rel) \
+ ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0)
+
+/*
+ * Convenience macro to make sure that a partitioned relation has all the
+ * required members set.
+ */
+#define REL_HAS_ALL_PART_PROPS(rel) \
+ ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \
+ (rel)->part_rels && (rel)->partexprs && (rel)->nullable_partexprs)
+
/*
* IndexOptInfo
* Per-index information for planning/optimization
extern bool enable_mergejoin;
extern bool enable_hashjoin;
extern bool enable_gathermerge;
+extern bool enable_partition_wise_join;
extern int constraint_exclusion;
extern double clamp_row_est(double nrows);
extern Path *reparameterize_path(PlannerInfo *root, Path *path,
Relids required_outer,
double loop_count);
+extern Path *reparameterize_path_by_child(PlannerInfo *root, Path *path,
+ RelOptInfo *child_rel);
/*
* prototypes for relnode.c
Relids required_outer);
extern ParamPathInfo *find_param_path_info(RelOptInfo *rel,
Relids required_outer);
+extern RelOptInfo *build_child_join_rel(PlannerInfo *root,
+ RelOptInfo *outer_rel, RelOptInfo *inner_rel,
+ RelOptInfo *parent_joinrel, List *restrictlist,
+ SpecialJoinInfo *sjinfo, JoinType jointype);
#endif /* PATHNODE_H */
double index_pages);
extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
Path *bitmapqual);
+extern void generate_partition_wise_join_paths(PlannerInfo *root,
+ RelOptInfo *rel);
#ifdef OPTIMIZER_DEBUG
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
RelOptInfo *rel1, RelOptInfo *rel2);
extern bool have_dangerous_phv(PlannerInfo *root,
Relids outer_relids, Relids inner_params);
+extern void mark_dummy_rel(RelOptInfo *rel);
+extern bool have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2,
+ JoinType jointype, List *restrictlist);
/*
* equivclass.c
extern void add_placeholders_to_base_rels(PlannerInfo *root);
extern void add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outer_rel, RelOptInfo *inner_rel);
+extern void add_placeholders_to_child_joinrel(PlannerInfo *root,
+ RelOptInfo *childrel, RelOptInfo *parentrel);
#endif /* PLACEHOLDER_H */
extern bool plan_cluster_use_sort(Oid tableOid, Oid indexOid);
extern List *get_partitioned_child_rels(PlannerInfo *root, Index rti);
+extern List *get_partitioned_child_rels_for_join(PlannerInfo *root,
+ Relids join_relids);
#endif /* PLANNER_H */
extern AppendRelInfo **find_appinfos_by_relids(PlannerInfo *root,
Relids relids, int *nappinfos);
+extern SpecialJoinInfo *build_child_join_sjinfo(PlannerInfo *root,
+ SpecialJoinInfo *parent_sjinfo,
+ Relids left_relids, Relids right_relids);
+extern Relids adjust_child_relids_multilevel(PlannerInfo *root, Relids relids,
+ Relids child_relids, Relids top_parent_relids);
+
#endif /* PREP_H */
--- /dev/null
+--
+-- PARTITION_JOIN
+-- Test partition-wise join between partitioned tables
+--
+-- Enable partition-wise join, which by default is disabled.
+SET enable_partition_wise_join to true;
+--
+-- partitioned by a single column
+--
+CREATE TABLE prt1 (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE prt1_p1 PARTITION OF prt1 FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_p3 PARTITION OF prt1 FOR VALUES FROM (500) TO (600);
+CREATE TABLE prt1_p2 PARTITION OF prt1 FOR VALUES FROM (250) TO (500);
+INSERT INTO prt1 SELECT i, i % 25, to_char(i, 'FM0000') FROM generate_series(0, 599) i WHERE i % 2 = 0;
+CREATE INDEX iprt1_p1_a on prt1_p1(a);
+CREATE INDEX iprt1_p2_a on prt1_p2(a);
+CREATE INDEX iprt1_p3_a on prt1_p3(a);
+ANALYZE prt1;
+CREATE TABLE prt2 (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE prt2_p1 PARTITION OF prt2 FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_p2 PARTITION OF prt2 FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt2_p3 PARTITION OF prt2 FOR VALUES FROM (500) TO (600);
+INSERT INTO prt2 SELECT i % 25, i, to_char(i, 'FM0000') FROM generate_series(0, 599) i WHERE i % 3 = 0;
+CREATE INDEX iprt2_p1_b on prt2_p1(b);
+CREATE INDEX iprt2_p2_b on prt2_p2(b);
+CREATE INDEX iprt2_p3_b on prt2_p3(b);
+ANALYZE prt2;
+-- inner join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+--------------------------------------------------
+ Sort
+ Sort Key: t1.a
+ -> Append
+ -> Hash Join
+ Hash Cond: (t2.b = t1.a)
+ -> Seq Scan on prt2_p1 t2
+ -> Hash
+ -> Seq Scan on prt1_p1 t1
+ Filter: (b = 0)
+ -> Hash Join
+ Hash Cond: (t2_1.b = t1_1.a)
+ -> Seq Scan on prt2_p2 t2_1
+ -> Hash
+ -> Seq Scan on prt1_p2 t1_1
+ Filter: (b = 0)
+ -> Hash Join
+ Hash Cond: (t2_2.b = t1_2.a)
+ -> Seq Scan on prt2_p3 t2_2
+ -> Hash
+ -> Seq Scan on prt1_p3 t1_2
+ Filter: (b = 0)
+(21 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+ a | c | b | c
+-----+------+-----+------
+ 0 | 0000 | 0 | 0000
+ 150 | 0150 | 150 | 0150
+ 300 | 0300 | 300 | 0300
+ 450 | 0450 | 450 | 0450
+(4 rows)
+
+-- left outer join, with whole-row reference
+EXPLAIN (COSTS OFF)
+SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+--------------------------------------------------------
+ Sort
+ Sort Key: t1.a, t2.b
+ -> Result
+ -> Append
+ -> Hash Right Join
+ Hash Cond: (t2.b = t1.a)
+ -> Seq Scan on prt2_p1 t2
+ -> Hash
+ -> Seq Scan on prt1_p1 t1
+ Filter: (b = 0)
+ -> Hash Right Join
+ Hash Cond: (t2_1.b = t1_1.a)
+ -> Seq Scan on prt2_p2 t2_1
+ -> Hash
+ -> Seq Scan on prt1_p2 t1_1
+ Filter: (b = 0)
+ -> Hash Right Join
+ Hash Cond: (t2_2.b = t1_2.a)
+ -> Seq Scan on prt2_p3 t2_2
+ -> Hash
+ -> Seq Scan on prt1_p3 t1_2
+ Filter: (b = 0)
+(22 rows)
+
+SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+ t1 | t2
+--------------+--------------
+ (0,0,0000) | (0,0,0000)
+ (50,0,0050) |
+ (100,0,0100) |
+ (150,0,0150) | (0,150,0150)
+ (200,0,0200) |
+ (250,0,0250) |
+ (300,0,0300) | (0,300,0300)
+ (350,0,0350) |
+ (400,0,0400) |
+ (450,0,0450) | (0,450,0450)
+ (500,0,0500) |
+ (550,0,0550) |
+(12 rows)
+
+-- right outer join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+---------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a, t2.b
+ -> Result
+ -> Append
+ -> Hash Right Join
+ Hash Cond: (t1.a = t2.b)
+ -> Seq Scan on prt1_p1 t1
+ -> Hash
+ -> Seq Scan on prt2_p1 t2
+ Filter: (a = 0)
+ -> Hash Right Join
+ Hash Cond: (t1_1.a = t2_1.b)
+ -> Seq Scan on prt1_p2 t1_1
+ -> Hash
+ -> Seq Scan on prt2_p2 t2_1
+ Filter: (a = 0)
+ -> Nested Loop Left Join
+ -> Seq Scan on prt2_p3 t2_2
+ Filter: (a = 0)
+ -> Index Scan using iprt1_p3_a on prt1_p3 t1_2
+ Index Cond: (a = t2_2.b)
+(21 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+ a | c | b | c
+-----+------+-----+------
+ 0 | 0000 | 0 | 0000
+ 150 | 0150 | 150 | 0150
+ 300 | 0300 | 300 | 0300
+ 450 | 0450 | 450 | 0450
+ | | 75 | 0075
+ | | 225 | 0225
+ | | 375 | 0375
+ | | 525 | 0525
+(8 rows)
+
+-- full outer join, with placeholder vars
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b) WHERE t1.phv = t1.a OR t2.phv = t2.b ORDER BY t1.a, t2.b;
+ QUERY PLAN
+------------------------------------------------------------------
+ Sort
+ Sort Key: prt1_p1.a, prt2_p1.b
+ -> Append
+ -> Hash Full Join
+ Hash Cond: (prt1_p1.a = prt2_p1.b)
+ Filter: (((50) = prt1_p1.a) OR ((75) = prt2_p1.b))
+ -> Seq Scan on prt1_p1
+ Filter: (b = 0)
+ -> Hash
+ -> Seq Scan on prt2_p1
+ Filter: (a = 0)
+ -> Hash Full Join
+ Hash Cond: (prt1_p2.a = prt2_p2.b)
+ Filter: (((50) = prt1_p2.a) OR ((75) = prt2_p2.b))
+ -> Seq Scan on prt1_p2
+ Filter: (b = 0)
+ -> Hash
+ -> Seq Scan on prt2_p2
+ Filter: (a = 0)
+ -> Hash Full Join
+ Hash Cond: (prt1_p3.a = prt2_p3.b)
+ Filter: (((50) = prt1_p3.a) OR ((75) = prt2_p3.b))
+ -> Seq Scan on prt1_p3
+ Filter: (b = 0)
+ -> Hash
+ -> Seq Scan on prt2_p3
+ Filter: (a = 0)
+(27 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b) WHERE t1.phv = t1.a OR t2.phv = t2.b ORDER BY t1.a, t2.b;
+ a | c | b | c
+----+------+----+------
+ 50 | 0050 | |
+ | | 75 | 0075
+(2 rows)
+
+-- Join with pruned partitions from joining relations
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a < 450 AND t2.b > 250 AND t1.b = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+-----------------------------------------------------------
+ Sort
+ Sort Key: t1.a
+ -> Append
+ -> Hash Join
+ Hash Cond: (t2.b = t1.a)
+ -> Seq Scan on prt2_p2 t2
+ Filter: (b > 250)
+ -> Hash
+ -> Seq Scan on prt1_p2 t1
+ Filter: ((a < 450) AND (b = 0))
+(10 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a < 450 AND t2.b > 250 AND t1.b = 0 ORDER BY t1.a, t2.b;
+ a | c | b | c
+-----+------+-----+------
+ 300 | 0300 | 300 | 0300
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+-----------------------------------------------------------
+ Sort
+ Sort Key: prt1_p1.a, b
+ -> Append
+ -> Hash Left Join
+ Hash Cond: (prt1_p1.a = b)
+ -> Seq Scan on prt1_p1
+ Filter: ((a < 450) AND (b = 0))
+ -> Hash
+ -> Result
+ One-Time Filter: false
+ -> Hash Right Join
+ Hash Cond: (prt2_p2.b = prt1_p2.a)
+ -> Seq Scan on prt2_p2
+ Filter: (b > 250)
+ -> Hash
+ -> Seq Scan on prt1_p2
+ Filter: ((a < 450) AND (b = 0))
+(17 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+ a | c | b | c
+-----+------+-----+------
+ 0 | 0000 | |
+ 50 | 0050 | |
+ 100 | 0100 | |
+ 150 | 0150 | |
+ 200 | 0200 | |
+ 250 | 0250 | |
+ 300 | 0300 | 300 | 0300
+ 350 | 0350 | |
+ 400 | 0400 | |
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 FULL JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 OR t2.a = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+------------------------------------------------------------
+ Sort
+ Sort Key: prt1_p1.a, b
+ -> Append
+ -> Hash Full Join
+ Hash Cond: (prt1_p1.a = b)
+ Filter: ((prt1_p1.b = 0) OR (a = 0))
+ -> Seq Scan on prt1_p1
+ Filter: (a < 450)
+ -> Hash
+ -> Result
+ One-Time Filter: false
+ -> Hash Full Join
+ Hash Cond: (prt1_p2.a = prt2_p2.b)
+ Filter: ((prt1_p2.b = 0) OR (prt2_p2.a = 0))
+ -> Seq Scan on prt1_p2
+ Filter: (a < 450)
+ -> Hash
+ -> Seq Scan on prt2_p2
+ Filter: (b > 250)
+ -> Hash Full Join
+ Hash Cond: (prt2_p3.b = a)
+ Filter: ((b = 0) OR (prt2_p3.a = 0))
+ -> Seq Scan on prt2_p3
+ Filter: (b > 250)
+ -> Hash
+ -> Result
+ One-Time Filter: false
+(27 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 FULL JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 OR t2.a = 0 ORDER BY t1.a, t2.b;
+ a | c | b | c
+-----+------+-----+------
+ 0 | 0000 | |
+ 50 | 0050 | |
+ 100 | 0100 | |
+ 150 | 0150 | |
+ 200 | 0200 | |
+ 250 | 0250 | |
+ 300 | 0300 | 300 | 0300
+ 350 | 0350 | |
+ 400 | 0400 | |
+ | | 375 | 0375
+ | | 450 | 0450
+ | | 525 | 0525
+(12 rows)
+
+-- Semi-join
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t2.b FROM prt2 t2 WHERE t2.a = 0) AND t1.b = 0 ORDER BY t1.a;
+ QUERY PLAN
+--------------------------------------------------
+ Sort
+ Sort Key: t1.a
+ -> Append
+ -> Hash Semi Join
+ Hash Cond: (t1.a = t2.b)
+ -> Seq Scan on prt1_p1 t1
+ Filter: (b = 0)
+ -> Hash
+ -> Seq Scan on prt2_p1 t2
+ Filter: (a = 0)
+ -> Hash Semi Join
+ Hash Cond: (t1_1.a = t2_1.b)
+ -> Seq Scan on prt1_p2 t1_1
+ Filter: (b = 0)
+ -> Hash
+ -> Seq Scan on prt2_p2 t2_1
+ Filter: (a = 0)
+ -> Nested Loop Semi Join
+ Join Filter: (t1_2.a = t2_2.b)
+ -> Seq Scan on prt1_p3 t1_2
+ Filter: (b = 0)
+ -> Materialize
+ -> Seq Scan on prt2_p3 t2_2
+ Filter: (a = 0)
+(24 rows)
+
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t2.b FROM prt2 t2 WHERE t2.a = 0) AND t1.b = 0 ORDER BY t1.a;
+ a | b | c
+-----+---+------
+ 0 | 0 | 0000
+ 150 | 0 | 0150
+ 300 | 0 | 0300
+ 450 | 0 | 0450
+(4 rows)
+
+-- Anti-join with aggregates
+EXPLAIN (COSTS OFF)
+SELECT sum(t1.a), avg(t1.a), sum(t1.b), avg(t1.b) FROM prt1 t1 WHERE NOT EXISTS (SELECT 1 FROM prt2 t2 WHERE t1.a = t2.b);
+ QUERY PLAN
+--------------------------------------------------
+ Aggregate
+ -> Append
+ -> Hash Anti Join
+ Hash Cond: (t1.a = t2.b)
+ -> Seq Scan on prt1_p1 t1
+ -> Hash
+ -> Seq Scan on prt2_p1 t2
+ -> Hash Anti Join
+ Hash Cond: (t1_1.a = t2_1.b)
+ -> Seq Scan on prt1_p2 t1_1
+ -> Hash
+ -> Seq Scan on prt2_p2 t2_1
+ -> Hash Anti Join
+ Hash Cond: (t1_2.a = t2_2.b)
+ -> Seq Scan on prt1_p3 t1_2
+ -> Hash
+ -> Seq Scan on prt2_p3 t2_2
+(17 rows)
+
+SELECT sum(t1.a), avg(t1.a), sum(t1.b), avg(t1.b) FROM prt1 t1 WHERE NOT EXISTS (SELECT 1 FROM prt2 t2 WHERE t1.a = t2.b);
+ sum | avg | sum | avg
+-------+----------------------+------+---------------------
+ 60000 | 300.0000000000000000 | 2400 | 12.0000000000000000
+(1 row)
+
+-- lateral reference
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1 t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t3.a AS t3a, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+ ON t1.a = ss.t2a WHERE t1.b = 0 ORDER BY t1.a;
+ QUERY PLAN
+--------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a
+ -> Result
+ -> Append
+ -> Nested Loop Left Join
+ -> Seq Scan on prt1_p1 t1
+ Filter: (b = 0)
+ -> Nested Loop
+ -> Index Only Scan using iprt1_p1_a on prt1_p1 t2
+ Index Cond: (a = t1.a)
+ -> Index Scan using iprt2_p1_b on prt2_p1 t3
+ Index Cond: (b = t2.a)
+ -> Nested Loop Left Join
+ -> Seq Scan on prt1_p2 t1_1
+ Filter: (b = 0)
+ -> Nested Loop
+ -> Index Only Scan using iprt1_p2_a on prt1_p2 t2_1
+ Index Cond: (a = t1_1.a)
+ -> Index Scan using iprt2_p2_b on prt2_p2 t3_1
+ Index Cond: (b = t2_1.a)
+ -> Nested Loop Left Join
+ -> Seq Scan on prt1_p3 t1_2
+ Filter: (b = 0)
+ -> Nested Loop
+ -> Index Only Scan using iprt1_p3_a on prt1_p3 t2_2
+ Index Cond: (a = t1_2.a)
+ -> Index Scan using iprt2_p3_b on prt2_p3 t3_2
+ Index Cond: (b = t2_2.a)
+(28 rows)
+
+SELECT * FROM prt1 t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t3.a AS t3a, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+ ON t1.a = ss.t2a WHERE t1.b = 0 ORDER BY t1.a;
+ a | b | c | t2a | t3a | least
+-----+---+------+-----+-----+-------
+ 0 | 0 | 0000 | 0 | 0 | 0
+ 50 | 0 | 0050 | | |
+ 100 | 0 | 0100 | | |
+ 150 | 0 | 0150 | 150 | 0 | 150
+ 200 | 0 | 0200 | | |
+ 250 | 0 | 0250 | | |
+ 300 | 0 | 0300 | 300 | 0 | 300
+ 350 | 0 | 0350 | | |
+ 400 | 0 | 0400 | | |
+ 450 | 0 | 0450 | 450 | 0 | 450
+ 500 | 0 | 0500 | | |
+ 550 | 0 | 0550 | | |
+(12 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+ ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
+ QUERY PLAN
+--------------------------------------------------------------
+ Sort
+ Sort Key: t1.a
+ -> Hash Left Join
+ Hash Cond: ((t1.c)::text = (t2.c)::text)
+ Filter: ((t1.b + COALESCE(t2.b, 0)) = 0)
+ -> Append
+ -> Seq Scan on prt1_p1 t1
+ -> Seq Scan on prt1_p2 t1_1
+ -> Seq Scan on prt1_p3 t1_2
+ -> Hash
+ -> Append
+ -> Hash Join
+ Hash Cond: (t2.a = t3.b)
+ -> Seq Scan on prt1_p1 t2
+ -> Hash
+ -> Seq Scan on prt2_p1 t3
+ -> Hash Join
+ Hash Cond: (t2_1.a = t3_1.b)
+ -> Seq Scan on prt1_p2 t2_1
+ -> Hash
+ -> Seq Scan on prt2_p2 t3_1
+ -> Hash Join
+ Hash Cond: (t2_2.a = t3_2.b)
+ -> Seq Scan on prt1_p3 t2_2
+ -> Hash
+ -> Seq Scan on prt2_p3 t3_2
+(26 rows)
+
+SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+ ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
+ a | t2a | t2c
+-----+-----+------
+ 0 | 0 | 0000
+ 50 | |
+ 100 | |
+ 150 | 150 | 0150
+ 200 | |
+ 250 | |
+ 300 | 300 | 0300
+ 350 | |
+ 400 | |
+ 450 | 450 | 0450
+ 500 | |
+ 550 | |
+(12 rows)
+
+--
+-- partitioned by expression
+--
+CREATE TABLE prt1_e (a int, b int, c int) PARTITION BY RANGE(((a + b)/2));
+CREATE TABLE prt1_e_p1 PARTITION OF prt1_e FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_e_p2 PARTITION OF prt1_e FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt1_e_p3 PARTITION OF prt1_e FOR VALUES FROM (500) TO (600);
+INSERT INTO prt1_e SELECT i, i, i % 25 FROM generate_series(0, 599, 2) i;
+CREATE INDEX iprt1_e_p1_ab2 on prt1_e_p1(((a+b)/2));
+CREATE INDEX iprt1_e_p2_ab2 on prt1_e_p2(((a+b)/2));
+CREATE INDEX iprt1_e_p3_ab2 on prt1_e_p3(((a+b)/2));
+ANALYZE prt1_e;
+CREATE TABLE prt2_e (a int, b int, c int) PARTITION BY RANGE(((b + a)/2));
+CREATE TABLE prt2_e_p1 PARTITION OF prt2_e FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_e_p2 PARTITION OF prt2_e FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt2_e_p3 PARTITION OF prt2_e FOR VALUES FROM (500) TO (600);
+INSERT INTO prt2_e SELECT i, i, i % 25 FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_e;
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_e t1, prt2_e t2 WHERE (t1.a + t1.b)/2 = (t2.b + t2.a)/2 AND t1.c = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a, t2.b
+ -> Append
+ -> Hash Join
+ Hash Cond: (((t2.b + t2.a) / 2) = ((t1.a + t1.b) / 2))
+ -> Seq Scan on prt2_e_p1 t2
+ -> Hash
+ -> Seq Scan on prt1_e_p1 t1
+ Filter: (c = 0)
+ -> Hash Join
+ Hash Cond: (((t2_1.b + t2_1.a) / 2) = ((t1_1.a + t1_1.b) / 2))
+ -> Seq Scan on prt2_e_p2 t2_1
+ -> Hash
+ -> Seq Scan on prt1_e_p2 t1_1
+ Filter: (c = 0)
+ -> Hash Join
+ Hash Cond: (((t2_2.b + t2_2.a) / 2) = ((t1_2.a + t1_2.b) / 2))
+ -> Seq Scan on prt2_e_p3 t2_2
+ -> Hash
+ -> Seq Scan on prt1_e_p3 t1_2
+ Filter: (c = 0)
+(21 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_e t1, prt2_e t2 WHERE (t1.a + t1.b)/2 = (t2.b + t2.a)/2 AND t1.c = 0 ORDER BY t1.a, t2.b;
+ a | c | b | c
+-----+---+-----+---
+ 0 | 0 | 0 | 0
+ 150 | 0 | 150 | 0
+ 300 | 0 | 300 | 0
+ 450 | 0 | 450 | 0
+(4 rows)
+
+--
+-- N-way join
+--
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM prt1 t1, prt2 t2, prt1_e t3 WHERE t1.a = t2.b AND t1.a = (t3.a + t3.b)/2 AND t1.b = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a
+ -> Result
+ -> Append
+ -> Nested Loop
+ Join Filter: (t1.a = ((t3.a + t3.b) / 2))
+ -> Hash Join
+ Hash Cond: (t2.b = t1.a)
+ -> Seq Scan on prt2_p1 t2
+ -> Hash
+ -> Seq Scan on prt1_p1 t1
+ Filter: (b = 0)
+ -> Index Scan using iprt1_e_p1_ab2 on prt1_e_p1 t3
+ Index Cond: (((a + b) / 2) = t2.b)
+ -> Nested Loop
+ Join Filter: (t1_1.a = ((t3_1.a + t3_1.b) / 2))
+ -> Hash Join
+ Hash Cond: (t2_1.b = t1_1.a)
+ -> Seq Scan on prt2_p2 t2_1
+ -> Hash
+ -> Seq Scan on prt1_p2 t1_1
+ Filter: (b = 0)
+ -> Index Scan using iprt1_e_p2_ab2 on prt1_e_p2 t3_1
+ Index Cond: (((a + b) / 2) = t2_1.b)
+ -> Nested Loop
+ Join Filter: (t1_2.a = ((t3_2.a + t3_2.b) / 2))
+ -> Hash Join
+ Hash Cond: (t2_2.b = t1_2.a)
+ -> Seq Scan on prt2_p3 t2_2
+ -> Hash
+ -> Seq Scan on prt1_p3 t1_2
+ Filter: (b = 0)
+ -> Index Scan using iprt1_e_p3_ab2 on prt1_e_p3 t3_2
+ Index Cond: (((a + b) / 2) = t2_2.b)
+(34 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM prt1 t1, prt2 t2, prt1_e t3 WHERE t1.a = t2.b AND t1.a = (t3.a + t3.b)/2 AND t1.b = 0 ORDER BY t1.a, t2.b;
+ a | c | b | c | ?column? | c
+-----+------+-----+------+----------+---
+ 0 | 0000 | 0 | 0000 | 0 | 0
+ 150 | 0150 | 150 | 0150 | 300 | 0
+ 300 | 0300 | 300 | 0300 | 600 | 0
+ 450 | 0450 | 450 | 0450 | 900 | 0
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) LEFT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.b = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+ QUERY PLAN
+--------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a, t2.b, ((t3.a + t3.b))
+ -> Result
+ -> Append
+ -> Hash Right Join
+ Hash Cond: (((t3.a + t3.b) / 2) = t1.a)
+ -> Seq Scan on prt1_e_p1 t3
+ -> Hash
+ -> Hash Right Join
+ Hash Cond: (t2.b = t1.a)
+ -> Seq Scan on prt2_p1 t2
+ -> Hash
+ -> Seq Scan on prt1_p1 t1
+ Filter: (b = 0)
+ -> Hash Right Join
+ Hash Cond: (((t3_1.a + t3_1.b) / 2) = t1_1.a)
+ -> Seq Scan on prt1_e_p2 t3_1
+ -> Hash
+ -> Hash Right Join
+ Hash Cond: (t2_1.b = t1_1.a)
+ -> Seq Scan on prt2_p2 t2_1
+ -> Hash
+ -> Seq Scan on prt1_p2 t1_1
+ Filter: (b = 0)
+ -> Hash Right Join
+ Hash Cond: (((t3_2.a + t3_2.b) / 2) = t1_2.a)
+ -> Seq Scan on prt1_e_p3 t3_2
+ -> Hash
+ -> Hash Right Join
+ Hash Cond: (t2_2.b = t1_2.a)
+ -> Seq Scan on prt2_p3 t2_2
+ -> Hash
+ -> Seq Scan on prt1_p3 t1_2
+ Filter: (b = 0)
+(34 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) LEFT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.b = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+ a | c | b | c | ?column? | c
+-----+------+-----+------+----------+---
+ 0 | 0000 | 0 | 0000 | 0 | 0
+ 50 | 0050 | | | 100 | 0
+ 100 | 0100 | | | 200 | 0
+ 150 | 0150 | 150 | 0150 | 300 | 0
+ 200 | 0200 | | | 400 | 0
+ 250 | 0250 | | | 500 | 0
+ 300 | 0300 | 300 | 0300 | 600 | 0
+ 350 | 0350 | | | 700 | 0
+ 400 | 0400 | | | 800 | 0
+ 450 | 0450 | 450 | 0450 | 900 | 0
+ 500 | 0500 | | | 1000 | 0
+ 550 | 0550 | | | 1100 | 0
+(12 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+ QUERY PLAN
+-------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a, t2.b, ((t3.a + t3.b))
+ -> Result
+ -> Append
+ -> Nested Loop Left Join
+ -> Hash Right Join
+ Hash Cond: (t1.a = ((t3.a + t3.b) / 2))
+ -> Seq Scan on prt1_p1 t1
+ -> Hash
+ -> Seq Scan on prt1_e_p1 t3
+ Filter: (c = 0)
+ -> Index Scan using iprt2_p1_b on prt2_p1 t2
+ Index Cond: (t1.a = b)
+ -> Nested Loop Left Join
+ -> Hash Right Join
+ Hash Cond: (t1_1.a = ((t3_1.a + t3_1.b) / 2))
+ -> Seq Scan on prt1_p2 t1_1
+ -> Hash
+ -> Seq Scan on prt1_e_p2 t3_1
+ Filter: (c = 0)
+ -> Index Scan using iprt2_p2_b on prt2_p2 t2_1
+ Index Cond: (t1_1.a = b)
+ -> Nested Loop Left Join
+ -> Hash Right Join
+ Hash Cond: (t1_2.a = ((t3_2.a + t3_2.b) / 2))
+ -> Seq Scan on prt1_p3 t1_2
+ -> Hash
+ -> Seq Scan on prt1_e_p3 t3_2
+ Filter: (c = 0)
+ -> Index Scan using iprt2_p3_b on prt2_p3 t2_2
+ Index Cond: (t1_2.a = b)
+(31 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+ a | c | b | c | ?column? | c
+-----+------+-----+------+----------+---
+ 0 | 0000 | 0 | 0000 | 0 | 0
+ 50 | 0050 | | | 100 | 0
+ 100 | 0100 | | | 200 | 0
+ 150 | 0150 | 150 | 0150 | 300 | 0
+ 200 | 0200 | | | 400 | 0
+ 250 | 0250 | | | 500 | 0
+ 300 | 0300 | 300 | 0300 | 600 | 0
+ 350 | 0350 | | | 700 | 0
+ 400 | 0400 | | | 800 | 0
+ 450 | 0450 | 450 | 0450 | 900 | 0
+ 500 | 0500 | | | 1000 | 0
+ 550 | 0550 | | | 1100 | 0
+(12 rows)
+
+-- Cases with non-nullable expressions in subquery results;
+-- make sure these go to null as expected
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.phv, t2.b, t2.phv, t3.a + t3.b, t3.phv FROM ((SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b)) FULL JOIN (SELECT 50 phv, * FROM prt1_e WHERE prt1_e.c = 0) t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.a = t1.phv OR t2.b = t2.phv OR (t3.a + t3.b)/2 = t3.phv ORDER BY t1.a, t2.b, t3.a + t3.b;
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: prt1_p1.a, prt2_p1.b, ((prt1_e_p1.a + prt1_e_p1.b))
+ -> Result
+ -> Append
+ -> Hash Full Join
+ Hash Cond: (prt1_p1.a = ((prt1_e_p1.a + prt1_e_p1.b) / 2))
+ Filter: ((prt1_p1.a = (50)) OR (prt2_p1.b = (75)) OR (((prt1_e_p1.a + prt1_e_p1.b) / 2) = (50)))
+ -> Hash Full Join
+ Hash Cond: (prt1_p1.a = prt2_p1.b)
+ -> Seq Scan on prt1_p1
+ Filter: (b = 0)
+ -> Hash
+ -> Seq Scan on prt2_p1
+ Filter: (a = 0)
+ -> Hash
+ -> Seq Scan on prt1_e_p1
+ Filter: (c = 0)
+ -> Hash Full Join
+ Hash Cond: (prt1_p2.a = ((prt1_e_p2.a + prt1_e_p2.b) / 2))
+ Filter: ((prt1_p2.a = (50)) OR (prt2_p2.b = (75)) OR (((prt1_e_p2.a + prt1_e_p2.b) / 2) = (50)))
+ -> Hash Full Join
+ Hash Cond: (prt1_p2.a = prt2_p2.b)
+ -> Seq Scan on prt1_p2
+ Filter: (b = 0)
+ -> Hash
+ -> Seq Scan on prt2_p2
+ Filter: (a = 0)
+ -> Hash
+ -> Seq Scan on prt1_e_p2
+ Filter: (c = 0)
+ -> Hash Full Join
+ Hash Cond: (prt1_p3.a = ((prt1_e_p3.a + prt1_e_p3.b) / 2))
+ Filter: ((prt1_p3.a = (50)) OR (prt2_p3.b = (75)) OR (((prt1_e_p3.a + prt1_e_p3.b) / 2) = (50)))
+ -> Hash Full Join
+ Hash Cond: (prt1_p3.a = prt2_p3.b)
+ -> Seq Scan on prt1_p3
+ Filter: (b = 0)
+ -> Hash
+ -> Seq Scan on prt2_p3
+ Filter: (a = 0)
+ -> Hash
+ -> Seq Scan on prt1_e_p3
+ Filter: (c = 0)
+(43 rows)
+
+SELECT t1.a, t1.phv, t2.b, t2.phv, t3.a + t3.b, t3.phv FROM ((SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b)) FULL JOIN (SELECT 50 phv, * FROM prt1_e WHERE prt1_e.c = 0) t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.a = t1.phv OR t2.b = t2.phv OR (t3.a + t3.b)/2 = t3.phv ORDER BY t1.a, t2.b, t3.a + t3.b;
+ a | phv | b | phv | ?column? | phv
+----+-----+----+-----+----------+-----
+ 50 | 50 | | | 100 | 50
+ | | 75 | 75 | |
+(2 rows)
+
+-- Semi-join
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
+ QUERY PLAN
+---------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a
+ -> Append
+ -> Nested Loop
+ Join Filter: (t1.a = t1_3.b)
+ -> HashAggregate
+ Group Key: t1_3.b
+ -> Hash Join
+ Hash Cond: (((t2.a + t2.b) / 2) = t1_3.b)
+ -> Seq Scan on prt1_e_p1 t2
+ -> Hash
+ -> Seq Scan on prt2_p1 t1_3
+ Filter: (a = 0)
+ -> Index Scan using iprt1_p1_a on prt1_p1 t1
+ Index Cond: (a = ((t2.a + t2.b) / 2))
+ Filter: (b = 0)
+ -> Nested Loop
+ Join Filter: (t1_1.a = t1_4.b)
+ -> HashAggregate
+ Group Key: t1_4.b
+ -> Hash Join
+ Hash Cond: (((t2_1.a + t2_1.b) / 2) = t1_4.b)
+ -> Seq Scan on prt1_e_p2 t2_1
+ -> Hash
+ -> Seq Scan on prt2_p2 t1_4
+ Filter: (a = 0)
+ -> Index Scan using iprt1_p2_a on prt1_p2 t1_1
+ Index Cond: (a = ((t2_1.a + t2_1.b) / 2))
+ Filter: (b = 0)
+ -> Nested Loop
+ Join Filter: (t1_2.a = t1_5.b)
+ -> HashAggregate
+ Group Key: t1_5.b
+ -> Nested Loop
+ -> Seq Scan on prt2_p3 t1_5
+ Filter: (a = 0)
+ -> Index Scan using iprt1_e_p3_ab2 on prt1_e_p3 t2_2
+ Index Cond: (((a + b) / 2) = t1_5.b)
+ -> Index Scan using iprt1_p3_a on prt1_p3 t1_2
+ Index Cond: (a = ((t2_2.a + t2_2.b) / 2))
+ Filter: (b = 0)
+(41 rows)
+
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
+ a | b | c
+-----+---+------
+ 0 | 0 | 0000
+ 150 | 0 | 0150
+ 300 | 0 | 0300
+ 450 | 0 | 0450
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a
+ -> Append
+ -> Nested Loop
+ -> HashAggregate
+ Group Key: t1_3.b
+ -> Hash Semi Join
+ Hash Cond: (t1_3.b = ((t1_6.a + t1_6.b) / 2))
+ -> Seq Scan on prt2_p1 t1_3
+ -> Hash
+ -> Seq Scan on prt1_e_p1 t1_6
+ Filter: (c = 0)
+ -> Index Scan using iprt1_p1_a on prt1_p1 t1
+ Index Cond: (a = t1_3.b)
+ Filter: (b = 0)
+ -> Nested Loop
+ -> HashAggregate
+ Group Key: t1_4.b
+ -> Hash Semi Join
+ Hash Cond: (t1_4.b = ((t1_7.a + t1_7.b) / 2))
+ -> Seq Scan on prt2_p2 t1_4
+ -> Hash
+ -> Seq Scan on prt1_e_p2 t1_7
+ Filter: (c = 0)
+ -> Index Scan using iprt1_p2_a on prt1_p2 t1_1
+ Index Cond: (a = t1_4.b)
+ Filter: (b = 0)
+ -> Nested Loop
+ -> Unique
+ -> Sort
+ Sort Key: t1_5.b
+ -> Hash Semi Join
+ Hash Cond: (t1_5.b = ((t1_8.a + t1_8.b) / 2))
+ -> Seq Scan on prt2_p3 t1_5
+ -> Hash
+ -> Seq Scan on prt1_e_p3 t1_8
+ Filter: (c = 0)
+ -> Index Scan using iprt1_p3_a on prt1_p3 t1_2
+ Index Cond: (a = t1_5.b)
+ Filter: (b = 0)
+(40 rows)
+
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+ a | b | c
+-----+---+------
+ 0 | 0 | 0000
+ 150 | 0 | 0150
+ 300 | 0 | 0300
+ 450 | 0 | 0450
+(4 rows)
+
+-- test merge joins
+SET enable_hashjoin TO off;
+SET enable_nestloop TO off;
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+ QUERY PLAN
+----------------------------------------------------------------
+ Merge Append
+ Sort Key: t1.a
+ -> Merge Semi Join
+ Merge Cond: (t1.a = t1_3.b)
+ -> Sort
+ Sort Key: t1.a
+ -> Seq Scan on prt1_p1 t1
+ Filter: (b = 0)
+ -> Merge Semi Join
+ Merge Cond: (t1_3.b = (((t1_6.a + t1_6.b) / 2)))
+ -> Sort
+ Sort Key: t1_3.b
+ -> Seq Scan on prt2_p1 t1_3
+ -> Sort
+ Sort Key: (((t1_6.a + t1_6.b) / 2))
+ -> Seq Scan on prt1_e_p1 t1_6
+ Filter: (c = 0)
+ -> Merge Semi Join
+ Merge Cond: (t1_1.a = t1_4.b)
+ -> Sort
+ Sort Key: t1_1.a
+ -> Seq Scan on prt1_p2 t1_1
+ Filter: (b = 0)
+ -> Merge Semi Join
+ Merge Cond: (t1_4.b = (((t1_7.a + t1_7.b) / 2)))
+ -> Sort
+ Sort Key: t1_4.b
+ -> Seq Scan on prt2_p2 t1_4
+ -> Sort
+ Sort Key: (((t1_7.a + t1_7.b) / 2))
+ -> Seq Scan on prt1_e_p2 t1_7
+ Filter: (c = 0)
+ -> Merge Semi Join
+ Merge Cond: (t1_2.a = t1_5.b)
+ -> Sort
+ Sort Key: t1_2.a
+ -> Seq Scan on prt1_p3 t1_2
+ Filter: (b = 0)
+ -> Merge Semi Join
+ Merge Cond: (t1_5.b = (((t1_8.a + t1_8.b) / 2)))
+ -> Sort
+ Sort Key: t1_5.b
+ -> Seq Scan on prt2_p3 t1_5
+ -> Sort
+ Sort Key: (((t1_8.a + t1_8.b) / 2))
+ -> Seq Scan on prt1_e_p3 t1_8
+ Filter: (c = 0)
+(47 rows)
+
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+ a | b | c
+-----+---+------
+ 0 | 0 | 0000
+ 150 | 0 | 0150
+ 300 | 0 | 0300
+ 450 | 0 | 0450
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a, t2.b, ((t3.a + t3.b))
+ -> Result
+ -> Append
+ -> Merge Left Join
+ Merge Cond: (t1.a = t2.b)
+ -> Sort
+ Sort Key: t1.a
+ -> Merge Left Join
+ Merge Cond: ((((t3.a + t3.b) / 2)) = t1.a)
+ -> Sort
+ Sort Key: (((t3.a + t3.b) / 2))
+ -> Seq Scan on prt1_e_p1 t3
+ Filter: (c = 0)
+ -> Sort
+ Sort Key: t1.a
+ -> Seq Scan on prt1_p1 t1
+ -> Sort
+ Sort Key: t2.b
+ -> Seq Scan on prt2_p1 t2
+ -> Merge Left Join
+ Merge Cond: (t1_1.a = t2_1.b)
+ -> Sort
+ Sort Key: t1_1.a
+ -> Merge Left Join
+ Merge Cond: ((((t3_1.a + t3_1.b) / 2)) = t1_1.a)
+ -> Sort
+ Sort Key: (((t3_1.a + t3_1.b) / 2))
+ -> Seq Scan on prt1_e_p2 t3_1
+ Filter: (c = 0)
+ -> Sort
+ Sort Key: t1_1.a
+ -> Seq Scan on prt1_p2 t1_1
+ -> Sort
+ Sort Key: t2_1.b
+ -> Seq Scan on prt2_p2 t2_1
+ -> Merge Left Join
+ Merge Cond: (t1_2.a = t2_2.b)
+ -> Sort
+ Sort Key: t1_2.a
+ -> Merge Left Join
+ Merge Cond: ((((t3_2.a + t3_2.b) / 2)) = t1_2.a)
+ -> Sort
+ Sort Key: (((t3_2.a + t3_2.b) / 2))
+ -> Seq Scan on prt1_e_p3 t3_2
+ Filter: (c = 0)
+ -> Sort
+ Sort Key: t1_2.a
+ -> Seq Scan on prt1_p3 t1_2
+ -> Sort
+ Sort Key: t2_2.b
+ -> Seq Scan on prt2_p3 t2_2
+(52 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+ a | c | b | c | ?column? | c
+-----+------+-----+------+----------+---
+ 0 | 0000 | 0 | 0000 | 0 | 0
+ 50 | 0050 | | | 100 | 0
+ 100 | 0100 | | | 200 | 0
+ 150 | 0150 | 150 | 0150 | 300 | 0
+ 200 | 0200 | | | 400 | 0
+ 250 | 0250 | | | 500 | 0
+ 300 | 0300 | 300 | 0300 | 600 | 0
+ 350 | 0350 | | | 700 | 0
+ 400 | 0400 | | | 800 | 0
+ 450 | 0450 | 450 | 0450 | 900 | 0
+ 500 | 0500 | | | 1000 | 0
+ 550 | 0550 | | | 1100 | 0
+(12 rows)
+
+-- MergeAppend on nullable column
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+-----------------------------------------------------------
+ Sort
+ Sort Key: prt1_p1.a, b
+ -> Append
+ -> Merge Left Join
+ Merge Cond: (prt1_p1.a = b)
+ -> Sort
+ Sort Key: prt1_p1.a
+ -> Seq Scan on prt1_p1
+ Filter: ((a < 450) AND (b = 0))
+ -> Sort
+ Sort Key: b
+ -> Result
+ One-Time Filter: false
+ -> Merge Left Join
+ Merge Cond: (prt1_p2.a = prt2_p2.b)
+ -> Sort
+ Sort Key: prt1_p2.a
+ -> Seq Scan on prt1_p2
+ Filter: ((a < 450) AND (b = 0))
+ -> Sort
+ Sort Key: prt2_p2.b
+ -> Seq Scan on prt2_p2
+ Filter: (b > 250)
+(23 rows)
+
+SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+ a | b
+-----+-----
+ 0 |
+ 50 |
+ 100 |
+ 150 |
+ 200 |
+ 250 |
+ 300 | 300
+ 350 |
+ 400 |
+(9 rows)
+
+RESET enable_hashjoin;
+RESET enable_nestloop;
+--
+-- partitioned by multiple columns
+--
+CREATE TABLE prt1_m (a int, b int, c int) PARTITION BY RANGE(a, ((a + b)/2));
+CREATE TABLE prt1_m_p1 PARTITION OF prt1_m FOR VALUES FROM (0, 0) TO (250, 250);
+CREATE TABLE prt1_m_p2 PARTITION OF prt1_m FOR VALUES FROM (250, 250) TO (500, 500);
+CREATE TABLE prt1_m_p3 PARTITION OF prt1_m FOR VALUES FROM (500, 500) TO (600, 600);
+INSERT INTO prt1_m SELECT i, i, i % 25 FROM generate_series(0, 599, 2) i;
+ANALYZE prt1_m;
+CREATE TABLE prt2_m (a int, b int, c int) PARTITION BY RANGE(((b + a)/2), b);
+CREATE TABLE prt2_m_p1 PARTITION OF prt2_m FOR VALUES FROM (0, 0) TO (250, 250);
+CREATE TABLE prt2_m_p2 PARTITION OF prt2_m FOR VALUES FROM (250, 250) TO (500, 500);
+CREATE TABLE prt2_m_p3 PARTITION OF prt2_m FOR VALUES FROM (500, 500) TO (600, 600);
+INSERT INTO prt2_m SELECT i, i, i % 25 FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_m;
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_m WHERE prt1_m.c = 0) t1 FULL JOIN (SELECT * FROM prt2_m WHERE prt2_m.c = 0) t2 ON (t1.a = (t2.b + t2.a)/2 AND t2.b = (t1.a + t1.b)/2) ORDER BY t1.a, t2.b;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: prt1_m_p1.a, prt2_m_p1.b
+ -> Append
+ -> Hash Full Join
+ Hash Cond: ((prt1_m_p1.a = ((prt2_m_p1.b + prt2_m_p1.a) / 2)) AND (((prt1_m_p1.a + prt1_m_p1.b) / 2) = prt2_m_p1.b))
+ -> Seq Scan on prt1_m_p1
+ Filter: (c = 0)
+ -> Hash
+ -> Seq Scan on prt2_m_p1
+ Filter: (c = 0)
+ -> Hash Full Join
+ Hash Cond: ((prt1_m_p2.a = ((prt2_m_p2.b + prt2_m_p2.a) / 2)) AND (((prt1_m_p2.a + prt1_m_p2.b) / 2) = prt2_m_p2.b))
+ -> Seq Scan on prt1_m_p2
+ Filter: (c = 0)
+ -> Hash
+ -> Seq Scan on prt2_m_p2
+ Filter: (c = 0)
+ -> Hash Full Join
+ Hash Cond: ((prt1_m_p3.a = ((prt2_m_p3.b + prt2_m_p3.a) / 2)) AND (((prt1_m_p3.a + prt1_m_p3.b) / 2) = prt2_m_p3.b))
+ -> Seq Scan on prt1_m_p3
+ Filter: (c = 0)
+ -> Hash
+ -> Seq Scan on prt2_m_p3
+ Filter: (c = 0)
+(24 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_m WHERE prt1_m.c = 0) t1 FULL JOIN (SELECT * FROM prt2_m WHERE prt2_m.c = 0) t2 ON (t1.a = (t2.b + t2.a)/2 AND t2.b = (t1.a + t1.b)/2) ORDER BY t1.a, t2.b;
+ a | c | b | c
+-----+---+-----+---
+ 0 | 0 | 0 | 0
+ 50 | 0 | |
+ 100 | 0 | |
+ 150 | 0 | 150 | 0
+ 200 | 0 | |
+ 250 | 0 | |
+ 300 | 0 | 300 | 0
+ 350 | 0 | |
+ 400 | 0 | |
+ 450 | 0 | 450 | 0
+ 500 | 0 | |
+ 550 | 0 | |
+ | | 75 | 0
+ | | 225 | 0
+ | | 375 | 0
+ | | 525 | 0
+(16 rows)
+
+--
+-- tests for list partitioned tables.
+--
+CREATE TABLE plt1 (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE plt1_p1 PARTITION OF plt1 FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt1_p2 PARTITION OF plt1 FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt1_p3 PARTITION OF plt1 FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE plt1;
+CREATE TABLE plt2 (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE plt2_p1 PARTITION OF plt2 FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt2_p2 PARTITION OF plt2 FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt2_p3 PARTITION OF plt2 FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 3) i;
+ANALYZE plt2;
+--
+-- list partitioned by expression
+--
+CREATE TABLE plt1_e (a int, b int, c text) PARTITION BY LIST(ltrim(c, 'A'));
+CREATE TABLE plt1_e_p1 PARTITION OF plt1_e FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt1_e_p2 PARTITION OF plt1_e FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt1_e_p3 PARTITION OF plt1_e FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt1_e SELECT i, i, 'A' || to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE plt1_e;
+-- test partition matching with N-way join
+EXPLAIN (COSTS OFF)
+SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, plt2 t2, plt1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c;
+ QUERY PLAN
+--------------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.c, t3.c
+ -> HashAggregate
+ Group Key: t1.c, t2.c, t3.c
+ -> Result
+ -> Append
+ -> Hash Join
+ Hash Cond: (t1.c = t2.c)
+ -> Seq Scan on plt1_p1 t1
+ -> Hash
+ -> Hash Join
+ Hash Cond: (t2.c = ltrim(t3.c, 'A'::text))
+ -> Seq Scan on plt2_p1 t2
+ -> Hash
+ -> Seq Scan on plt1_e_p1 t3
+ -> Hash Join
+ Hash Cond: (t1_1.c = t2_1.c)
+ -> Seq Scan on plt1_p2 t1_1
+ -> Hash
+ -> Hash Join
+ Hash Cond: (t2_1.c = ltrim(t3_1.c, 'A'::text))
+ -> Seq Scan on plt2_p2 t2_1
+ -> Hash
+ -> Seq Scan on plt1_e_p2 t3_1
+ -> Hash Join
+ Hash Cond: (t1_2.c = t2_2.c)
+ -> Seq Scan on plt1_p3 t1_2
+ -> Hash
+ -> Hash Join
+ Hash Cond: (t2_2.c = ltrim(t3_2.c, 'A'::text))
+ -> Seq Scan on plt2_p3 t2_2
+ -> Hash
+ -> Seq Scan on plt1_e_p3 t3_2
+(33 rows)
+
+SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, plt2 t2, plt1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c;
+ avg | avg | avg | c | c | c
+----------------------+----------------------+-----------------------+------+------+-------
+ 24.0000000000000000 | 24.0000000000000000 | 48.0000000000000000 | 0000 | 0000 | A0000
+ 74.0000000000000000 | 75.0000000000000000 | 148.0000000000000000 | 0001 | 0001 | A0001
+ 124.0000000000000000 | 124.5000000000000000 | 248.0000000000000000 | 0002 | 0002 | A0002
+ 174.0000000000000000 | 174.0000000000000000 | 348.0000000000000000 | 0003 | 0003 | A0003
+ 224.0000000000000000 | 225.0000000000000000 | 448.0000000000000000 | 0004 | 0004 | A0004
+ 274.0000000000000000 | 274.5000000000000000 | 548.0000000000000000 | 0005 | 0005 | A0005
+ 324.0000000000000000 | 324.0000000000000000 | 648.0000000000000000 | 0006 | 0006 | A0006
+ 374.0000000000000000 | 375.0000000000000000 | 748.0000000000000000 | 0007 | 0007 | A0007
+ 424.0000000000000000 | 424.5000000000000000 | 848.0000000000000000 | 0008 | 0008 | A0008
+ 474.0000000000000000 | 474.0000000000000000 | 948.0000000000000000 | 0009 | 0009 | A0009
+ 524.0000000000000000 | 525.0000000000000000 | 1048.0000000000000000 | 0010 | 0010 | A0010
+ 574.0000000000000000 | 574.5000000000000000 | 1148.0000000000000000 | 0011 | 0011 | A0011
+(12 rows)
+
+-- joins where one of the relations is proven empty
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a = 1 AND t1.a = 2;
+ QUERY PLAN
+--------------------------
+ Result
+ One-Time Filter: false
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 LEFT JOIN prt2 t2 ON t1.a = t2.b;
+ QUERY PLAN
+--------------------------
+ Result
+ One-Time Filter: false
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+--------------------------------------------
+ Sort
+ Sort Key: a, t2.b
+ -> Hash Left Join
+ Hash Cond: (t2.b = a)
+ -> Append
+ -> Seq Scan on prt2_p1 t2
+ Filter: (a = 0)
+ -> Seq Scan on prt2_p2 t2_1
+ Filter: (a = 0)
+ -> Seq Scan on prt2_p3 t2_2
+ Filter: (a = 0)
+ -> Hash
+ -> Result
+ One-Time Filter: false
+(14 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 FULL JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+--------------------------------------------
+ Sort
+ Sort Key: a, t2.b
+ -> Hash Left Join
+ Hash Cond: (t2.b = a)
+ -> Append
+ -> Seq Scan on prt2_p1 t2
+ Filter: (a = 0)
+ -> Seq Scan on prt2_p2 t2_1
+ Filter: (a = 0)
+ -> Seq Scan on prt2_p3 t2_2
+ Filter: (a = 0)
+ -> Hash
+ -> Result
+ One-Time Filter: false
+(14 rows)
+
+--
+-- multiple levels of partitioning
+--
+CREATE TABLE prt1_l (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE prt1_l_p1 PARTITION OF prt1_l FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_l_p2 PARTITION OF prt1_l FOR VALUES FROM (250) TO (500) PARTITION BY LIST (c);
+CREATE TABLE prt1_l_p2_p1 PARTITION OF prt1_l_p2 FOR VALUES IN ('0000', '0001');
+CREATE TABLE prt1_l_p2_p2 PARTITION OF prt1_l_p2 FOR VALUES IN ('0002', '0003');
+CREATE TABLE prt1_l_p3 PARTITION OF prt1_l FOR VALUES FROM (500) TO (600) PARTITION BY RANGE (b);
+CREATE TABLE prt1_l_p3_p1 PARTITION OF prt1_l_p3 FOR VALUES FROM (0) TO (13);
+CREATE TABLE prt1_l_p3_p2 PARTITION OF prt1_l_p3 FOR VALUES FROM (13) TO (25);
+INSERT INTO prt1_l SELECT i, i % 25, to_char(i % 4, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt1_l;
+CREATE TABLE prt2_l (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE prt2_l_p1 PARTITION OF prt2_l FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_l_p2 PARTITION OF prt2_l FOR VALUES FROM (250) TO (500) PARTITION BY LIST (c);
+CREATE TABLE prt2_l_p2_p1 PARTITION OF prt2_l_p2 FOR VALUES IN ('0000', '0001');
+CREATE TABLE prt2_l_p2_p2 PARTITION OF prt2_l_p2 FOR VALUES IN ('0002', '0003');
+CREATE TABLE prt2_l_p3 PARTITION OF prt2_l FOR VALUES FROM (500) TO (600) PARTITION BY RANGE (a);
+CREATE TABLE prt2_l_p3_p1 PARTITION OF prt2_l_p3 FOR VALUES FROM (0) TO (13);
+CREATE TABLE prt2_l_p3_p2 PARTITION OF prt2_l_p3 FOR VALUES FROM (13) TO (25);
+INSERT INTO prt2_l SELECT i % 25, i, to_char(i % 4, 'FM0000') FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_l;
+-- inner join, qual covering only top-level partitions
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1, prt2_l t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+-------------------------------------------------------------
+ Sort
+ Sort Key: t1.a
+ -> Append
+ -> Hash Join
+ Hash Cond: (t2.b = t1.a)
+ -> Seq Scan on prt2_l_p1 t2
+ -> Hash
+ -> Seq Scan on prt1_l_p1 t1
+ Filter: (b = 0)
+ -> Hash Join
+ Hash Cond: (t2_1.b = t1_1.a)
+ -> Append
+ -> Seq Scan on prt2_l_p2_p1 t2_1
+ -> Seq Scan on prt2_l_p2_p2 t2_2
+ -> Hash
+ -> Append
+ -> Seq Scan on prt1_l_p2_p1 t1_1
+ Filter: (b = 0)
+ -> Seq Scan on prt1_l_p2_p2 t1_2
+ Filter: (b = 0)
+ -> Hash Join
+ Hash Cond: (t2_3.b = t1_3.a)
+ -> Append
+ -> Seq Scan on prt2_l_p3_p1 t2_3
+ -> Seq Scan on prt2_l_p3_p2 t2_4
+ -> Hash
+ -> Append
+ -> Seq Scan on prt1_l_p3_p1 t1_3
+ Filter: (b = 0)
+(29 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1, prt2_l t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+ a | c | b | c
+-----+------+-----+------
+ 0 | 0000 | 0 | 0000
+ 150 | 0002 | 150 | 0002
+ 300 | 0000 | 300 | 0000
+ 450 | 0002 | 450 | 0002
+(4 rows)
+
+-- left join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 LEFT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+------------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a, t2.b
+ -> Append
+ -> Hash Right Join
+ Hash Cond: ((t2.b = t1.a) AND ((t2.c)::text = (t1.c)::text))
+ -> Seq Scan on prt2_l_p1 t2
+ -> Hash
+ -> Seq Scan on prt1_l_p1 t1
+ Filter: (b = 0)
+ -> Hash Right Join
+ Hash Cond: ((t2_1.b = t1_1.a) AND ((t2_1.c)::text = (t1_1.c)::text))
+ -> Seq Scan on prt2_l_p2_p1 t2_1
+ -> Hash
+ -> Seq Scan on prt1_l_p2_p1 t1_1
+ Filter: (b = 0)
+ -> Hash Right Join
+ Hash Cond: ((t2_2.b = t1_2.a) AND ((t2_2.c)::text = (t1_2.c)::text))
+ -> Seq Scan on prt2_l_p2_p2 t2_2
+ -> Hash
+ -> Seq Scan on prt1_l_p2_p2 t1_2
+ Filter: (b = 0)
+ -> Hash Right Join
+ Hash Cond: ((t2_3.b = t1_3.a) AND ((t2_3.c)::text = (t1_3.c)::text))
+ -> Append
+ -> Seq Scan on prt2_l_p3_p1 t2_3
+ -> Seq Scan on prt2_l_p3_p2 t2_4
+ -> Hash
+ -> Append
+ -> Seq Scan on prt1_l_p3_p1 t1_3
+ Filter: (b = 0)
+(30 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 LEFT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+ a | c | b | c
+-----+------+-----+------
+ 0 | 0000 | 0 | 0000
+ 50 | 0002 | |
+ 100 | 0000 | |
+ 150 | 0002 | 150 | 0002
+ 200 | 0000 | |
+ 250 | 0002 | |
+ 300 | 0000 | 300 | 0000
+ 350 | 0002 | |
+ 400 | 0000 | |
+ 450 | 0002 | 450 | 0002
+ 500 | 0000 | |
+ 550 | 0002 | |
+(12 rows)
+
+-- right join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+ QUERY PLAN
+------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a, t2.b
+ -> Result
+ -> Append
+ -> Hash Right Join
+ Hash Cond: ((t1.a = t2.b) AND ((t1.c)::text = (t2.c)::text))
+ -> Seq Scan on prt1_l_p1 t1
+ -> Hash
+ -> Seq Scan on prt2_l_p1 t2
+ Filter: (a = 0)
+ -> Hash Right Join
+ Hash Cond: ((t1_1.a = t2_1.b) AND ((t1_1.c)::text = (t2_1.c)::text))
+ -> Seq Scan on prt1_l_p2_p1 t1_1
+ -> Hash
+ -> Seq Scan on prt2_l_p2_p1 t2_1
+ Filter: (a = 0)
+ -> Hash Right Join
+ Hash Cond: ((t1_2.a = t2_2.b) AND ((t1_2.c)::text = (t2_2.c)::text))
+ -> Seq Scan on prt1_l_p2_p2 t1_2
+ -> Hash
+ -> Seq Scan on prt2_l_p2_p2 t2_2
+ Filter: (a = 0)
+ -> Hash Right Join
+ Hash Cond: ((t1_3.a = t2_3.b) AND ((t1_3.c)::text = (t2_3.c)::text))
+ -> Append
+ -> Seq Scan on prt1_l_p3_p1 t1_3
+ -> Seq Scan on prt1_l_p3_p2 t1_4
+ -> Hash
+ -> Append
+ -> Seq Scan on prt2_l_p3_p1 t2_3
+ Filter: (a = 0)
+(31 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+ a | c | b | c
+-----+------+-----+------
+ 0 | 0000 | 0 | 0000
+ 150 | 0002 | 150 | 0002
+ 300 | 0000 | 300 | 0000
+ 450 | 0002 | 450 | 0002
+ | | 75 | 0003
+ | | 225 | 0001
+ | | 375 | 0003
+ | | 525 | 0001
+(8 rows)
+
+-- full join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE prt1_l.b = 0) t1 FULL JOIN (SELECT * FROM prt2_l WHERE prt2_l.a = 0) t2 ON (t1.a = t2.b AND t1.c = t2.c) ORDER BY t1.a, t2.b;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: prt1_l_p1.a, prt2_l_p1.b
+ -> Append
+ -> Hash Full Join
+ Hash Cond: ((prt1_l_p1.a = prt2_l_p1.b) AND ((prt1_l_p1.c)::text = (prt2_l_p1.c)::text))
+ -> Seq Scan on prt1_l_p1
+ Filter: (b = 0)
+ -> Hash
+ -> Seq Scan on prt2_l_p1
+ Filter: (a = 0)
+ -> Hash Full Join
+ Hash Cond: ((prt1_l_p2_p1.a = prt2_l_p2_p1.b) AND ((prt1_l_p2_p1.c)::text = (prt2_l_p2_p1.c)::text))
+ -> Seq Scan on prt1_l_p2_p1
+ Filter: (b = 0)
+ -> Hash
+ -> Seq Scan on prt2_l_p2_p1
+ Filter: (a = 0)
+ -> Hash Full Join
+ Hash Cond: ((prt1_l_p2_p2.a = prt2_l_p2_p2.b) AND ((prt1_l_p2_p2.c)::text = (prt2_l_p2_p2.c)::text))
+ -> Seq Scan on prt1_l_p2_p2
+ Filter: (b = 0)
+ -> Hash
+ -> Seq Scan on prt2_l_p2_p2
+ Filter: (a = 0)
+ -> Hash Full Join
+ Hash Cond: ((prt1_l_p3_p1.a = prt2_l_p3_p1.b) AND ((prt1_l_p3_p1.c)::text = (prt2_l_p3_p1.c)::text))
+ -> Append
+ -> Seq Scan on prt1_l_p3_p1
+ Filter: (b = 0)
+ -> Hash
+ -> Append
+ -> Seq Scan on prt2_l_p3_p1
+ Filter: (a = 0)
+(33 rows)
+
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE prt1_l.b = 0) t1 FULL JOIN (SELECT * FROM prt2_l WHERE prt2_l.a = 0) t2 ON (t1.a = t2.b AND t1.c = t2.c) ORDER BY t1.a, t2.b;
+ a | c | b | c
+-----+------+-----+------
+ 0 | 0000 | 0 | 0000
+ 50 | 0002 | |
+ 100 | 0000 | |
+ 150 | 0002 | 150 | 0002
+ 200 | 0000 | |
+ 250 | 0002 | |
+ 300 | 0000 | 300 | 0000
+ 350 | 0002 | |
+ 400 | 0000 | |
+ 450 | 0002 | 450 | 0002
+ 500 | 0000 | |
+ 550 | 0002 | |
+ | | 75 | 0003
+ | | 225 | 0001
+ | | 375 | 0003
+ | | 525 | 0001
+(16 rows)
+
+-- lateral partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss
+ ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.a
+ -> Result
+ -> Append
+ -> Nested Loop Left Join
+ -> Seq Scan on prt1_l_p1 t1
+ Filter: (b = 0)
+ -> Hash Join
+ Hash Cond: ((t3.b = t2.a) AND ((t3.c)::text = (t2.c)::text))
+ -> Seq Scan on prt2_l_p1 t3
+ -> Hash
+ -> Seq Scan on prt1_l_p1 t2
+ Filter: ((t1.a = a) AND ((t1.c)::text = (c)::text))
+ -> Nested Loop Left Join
+ -> Seq Scan on prt1_l_p2_p1 t1_1
+ Filter: (b = 0)
+ -> Hash Join
+ Hash Cond: ((t3_1.b = t2_1.a) AND ((t3_1.c)::text = (t2_1.c)::text))
+ -> Seq Scan on prt2_l_p2_p1 t3_1
+ -> Hash
+ -> Seq Scan on prt1_l_p2_p1 t2_1
+ Filter: ((t1_1.a = a) AND ((t1_1.c)::text = (c)::text))
+ -> Nested Loop Left Join
+ -> Seq Scan on prt1_l_p2_p2 t1_2
+ Filter: (b = 0)
+ -> Hash Join
+ Hash Cond: ((t3_2.b = t2_2.a) AND ((t3_2.c)::text = (t2_2.c)::text))
+ -> Seq Scan on prt2_l_p2_p2 t3_2
+ -> Hash
+ -> Seq Scan on prt1_l_p2_p2 t2_2
+ Filter: ((t1_2.a = a) AND ((t1_2.c)::text = (c)::text))
+ -> Nested Loop Left Join
+ -> Append
+ -> Seq Scan on prt1_l_p3_p1 t1_3
+ Filter: (b = 0)
+ -> Hash Join
+ Hash Cond: ((t3_3.b = t2_3.a) AND ((t3_3.c)::text = (t2_3.c)::text))
+ -> Append
+ -> Seq Scan on prt2_l_p3_p1 t3_3
+ -> Seq Scan on prt2_l_p3_p2 t3_4
+ -> Hash
+ -> Append
+ -> Seq Scan on prt1_l_p3_p1 t2_3
+ Filter: ((t1_3.a = a) AND ((t1_3.c)::text = (c)::text))
+ -> Seq Scan on prt1_l_p3_p2 t2_4
+ Filter: ((t1_3.a = a) AND ((t1_3.c)::text = (c)::text))
+(46 rows)
+
+SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss
+ ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a;
+ a | b | c | t2a | t2c | t2b | t3b | least
+-----+---+------+-----+------+-----+-----+-------
+ 0 | 0 | 0000 | 0 | 0000 | 0 | 0 | 0
+ 50 | 0 | 0002 | | | | |
+ 100 | 0 | 0000 | | | | |
+ 150 | 0 | 0002 | 150 | 0002 | 0 | 150 | 150
+ 200 | 0 | 0000 | | | | |
+ 250 | 0 | 0002 | | | | |
+ 300 | 0 | 0000 | 300 | 0000 | 0 | 300 | 300
+ 350 | 0 | 0002 | | | | |
+ 400 | 0 | 0000 | | | | |
+ 450 | 0 | 0002 | 450 | 0002 | 0 | 450 | 450
+ 500 | 0 | 0000 | | | | |
+ 550 | 0 | 0002 | | | | |
+(12 rows)
+
+-- join with one side empty
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;
+ QUERY PLAN
+-------------------------------------------------------------------------
+ Hash Left Join
+ Hash Cond: ((t2.b = a) AND (t2.a = b) AND ((t2.c)::text = (c)::text))
+ -> Append
+ -> Seq Scan on prt2_l_p1 t2
+ -> Seq Scan on prt2_l_p2_p1 t2_1
+ -> Seq Scan on prt2_l_p2_p2 t2_2
+ -> Seq Scan on prt2_l_p3_p1 t2_3
+ -> Seq Scan on prt2_l_p3_p2 t2_4
+ -> Hash
+ -> Result
+ One-Time Filter: false
+(11 rows)
+
+--
+-- negative testcases
+--
+CREATE TABLE prt1_n (a int, b int, c varchar) PARTITION BY RANGE(c);
+CREATE TABLE prt1_n_p1 PARTITION OF prt1_n FOR VALUES FROM ('0000') TO ('0250');
+CREATE TABLE prt1_n_p2 PARTITION OF prt1_n FOR VALUES FROM ('0250') TO ('0500');
+INSERT INTO prt1_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(0, 499, 2) i;
+ANALYZE prt1_n;
+CREATE TABLE prt2_n (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE prt2_n_p1 PARTITION OF prt2_n FOR VALUES IN ('0000', '0003', '0004', '0010', '0006', '0007');
+CREATE TABLE prt2_n_p2 PARTITION OF prt2_n FOR VALUES IN ('0001', '0005', '0002', '0009', '0008', '0011');
+INSERT INTO prt2_n SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt2_n;
+CREATE TABLE prt3_n (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE prt3_n_p1 PARTITION OF prt3_n FOR VALUES IN ('0000', '0004', '0006', '0007');
+CREATE TABLE prt3_n_p2 PARTITION OF prt3_n FOR VALUES IN ('0001', '0002', '0008', '0010');
+CREATE TABLE prt3_n_p3 PARTITION OF prt3_n FOR VALUES IN ('0003', '0005', '0009', '0011');
+INSERT INTO prt2_n SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt3_n;
+CREATE TABLE prt4_n (a int, b int, c text) PARTITION BY RANGE(a);
+CREATE TABLE prt4_n_p1 PARTITION OF prt4_n FOR VALUES FROM (0) TO (300);
+CREATE TABLE prt4_n_p2 PARTITION OF prt4_n FOR VALUES FROM (300) TO (500);
+CREATE TABLE prt4_n_p3 PARTITION OF prt4_n FOR VALUES FROM (500) TO (600);
+INSERT INTO prt4_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt4_n;
+-- partition-wise join can not be applied if the partition ranges differ
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt4_n t2 WHERE t1.a = t2.a;
+ QUERY PLAN
+----------------------------------------------
+ Hash Join
+ Hash Cond: (t1.a = t2.a)
+ -> Append
+ -> Seq Scan on prt1_p1 t1
+ -> Seq Scan on prt1_p2 t1_1
+ -> Seq Scan on prt1_p3 t1_2
+ -> Hash
+ -> Append
+ -> Seq Scan on prt4_n_p1 t2
+ -> Seq Scan on prt4_n_p2 t2_1
+ -> Seq Scan on prt4_n_p3 t2_2
+(11 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt4_n t2, prt2 t3 WHERE t1.a = t2.a and t1.a = t3.b;
+ QUERY PLAN
+--------------------------------------------------------
+ Hash Join
+ Hash Cond: (t2.a = t1.a)
+ -> Append
+ -> Seq Scan on prt4_n_p1 t2
+ -> Seq Scan on prt4_n_p2 t2_1
+ -> Seq Scan on prt4_n_p3 t2_2
+ -> Hash
+ -> Append
+ -> Hash Join
+ Hash Cond: (t1.a = t3.b)
+ -> Seq Scan on prt1_p1 t1
+ -> Hash
+ -> Seq Scan on prt2_p1 t3
+ -> Hash Join
+ Hash Cond: (t1_1.a = t3_1.b)
+ -> Seq Scan on prt1_p2 t1_1
+ -> Hash
+ -> Seq Scan on prt2_p2 t3_1
+ -> Hash Join
+ Hash Cond: (t1_2.a = t3_2.b)
+ -> Seq Scan on prt1_p3 t1_2
+ -> Hash
+ -> Seq Scan on prt2_p3 t3_2
+(23 rows)
+
+-- partition-wise join can not be applied if there are no equi-join conditions
+-- between partition keys
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 LEFT JOIN prt2 t2 ON (t1.a < t2.b);
+ QUERY PLAN
+---------------------------------------------------------
+ Nested Loop Left Join
+ -> Append
+ -> Seq Scan on prt1_p1 t1
+ -> Seq Scan on prt1_p2 t1_1
+ -> Seq Scan on prt1_p3 t1_2
+ -> Append
+ -> Index Scan using iprt2_p1_b on prt2_p1 t2
+ Index Cond: (t1.a < b)
+ -> Index Scan using iprt2_p2_b on prt2_p2 t2_1
+ Index Cond: (t1.a < b)
+ -> Index Scan using iprt2_p3_b on prt2_p3 t2_2
+ Index Cond: (t1.a < b)
+(12 rows)
+
+-- equi-join with join condition on partial keys does not qualify for
+-- partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1, prt2_m t2 WHERE t1.a = (t2.b + t2.a)/2;
+ QUERY PLAN
+----------------------------------------------
+ Hash Join
+ Hash Cond: (((t2.b + t2.a) / 2) = t1.a)
+ -> Append
+ -> Seq Scan on prt2_m_p1 t2
+ -> Seq Scan on prt2_m_p2 t2_1
+ -> Seq Scan on prt2_m_p3 t2_2
+ -> Hash
+ -> Append
+ -> Seq Scan on prt1_m_p1 t1
+ -> Seq Scan on prt1_m_p2 t1_1
+ -> Seq Scan on prt1_m_p3 t1_2
+(11 rows)
+
+-- equi-join between out-of-order partition key columns does not qualify for
+-- partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1 LEFT JOIN prt2_m t2 ON t1.a = t2.b;
+ QUERY PLAN
+----------------------------------------------
+ Hash Left Join
+ Hash Cond: (t1.a = t2.b)
+ -> Append
+ -> Seq Scan on prt1_m_p1 t1
+ -> Seq Scan on prt1_m_p2 t1_1
+ -> Seq Scan on prt1_m_p3 t1_2
+ -> Hash
+ -> Append
+ -> Seq Scan on prt2_m_p1 t2
+ -> Seq Scan on prt2_m_p2 t2_1
+ -> Seq Scan on prt2_m_p3 t2_2
+(11 rows)
+
+-- equi-join between non-key columns does not qualify for partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1 LEFT JOIN prt2_m t2 ON t1.c = t2.c;
+ QUERY PLAN
+----------------------------------------------
+ Hash Left Join
+ Hash Cond: (t1.c = t2.c)
+ -> Append
+ -> Seq Scan on prt1_m_p1 t1
+ -> Seq Scan on prt1_m_p2 t1_1
+ -> Seq Scan on prt1_m_p3 t1_2
+ -> Hash
+ -> Append
+ -> Seq Scan on prt2_m_p1 t2
+ -> Seq Scan on prt2_m_p2 t2_1
+ -> Seq Scan on prt2_m_p3 t2_2
+(11 rows)
+
+-- partition-wise join can not be applied between tables with different
+-- partition lists
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 LEFT JOIN prt2_n t2 ON (t1.c = t2.c);
+ QUERY PLAN
+----------------------------------------------
+ Hash Right Join
+ Hash Cond: (t2.c = (t1.c)::text)
+ -> Append
+ -> Seq Scan on prt2_n_p1 t2
+ -> Seq Scan on prt2_n_p2 t2_1
+ -> Hash
+ -> Append
+ -> Seq Scan on prt1_n_p1 t1
+ -> Seq Scan on prt1_n_p2 t1_1
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 JOIN prt2_n t2 ON (t1.c = t2.c) JOIN plt1 t3 ON (t1.c = t3.c);
+ QUERY PLAN
+----------------------------------------------------------
+ Hash Join
+ Hash Cond: (t2.c = (t1.c)::text)
+ -> Append
+ -> Seq Scan on prt2_n_p1 t2
+ -> Seq Scan on prt2_n_p2 t2_1
+ -> Hash
+ -> Hash Join
+ Hash Cond: (t3.c = (t1.c)::text)
+ -> Append
+ -> Seq Scan on plt1_p1 t3
+ -> Seq Scan on plt1_p2 t3_1
+ -> Seq Scan on plt1_p3 t3_2
+ -> Hash
+ -> Append
+ -> Seq Scan on prt1_n_p1 t1
+ -> Seq Scan on prt1_n_p2 t1_1
+(16 rows)
+
+-- partition-wise join can not be applied for a join between list and range
+-- partitioned table
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 FULL JOIN prt1 t2 ON (t1.c = t2.c);
+ QUERY PLAN
+----------------------------------------------
+ Hash Full Join
+ Hash Cond: ((t2.c)::text = (t1.c)::text)
+ -> Append
+ -> Seq Scan on prt1_p1 t2
+ -> Seq Scan on prt1_p2 t2_1
+ -> Seq Scan on prt1_p3 t2_2
+ -> Hash
+ -> Append
+ -> Seq Scan on prt1_n_p1 t1
+ -> Seq Scan on prt1_n_p2 t1_1
+(10 rows)
+
-- This is to record the prevailing planner enable_foo settings during
-- a regression test run.
select name, setting from pg_settings where name like 'enable%';
- name | setting
-----------------------+---------
- enable_bitmapscan | on
- enable_gathermerge | on
- enable_hashagg | on
- enable_hashjoin | on
- enable_indexonlyscan | on
- enable_indexscan | on
- enable_material | on
- enable_mergejoin | on
- enable_nestloop | on
- enable_seqscan | on
- enable_sort | on
- enable_tidscan | on
-(12 rows)
+ name | setting
+----------------------------+---------
+ enable_bitmapscan | on
+ enable_gathermerge | on
+ enable_hashagg | on
+ enable_hashjoin | on
+ enable_indexonlyscan | on
+ enable_indexscan | on
+ enable_material | on
+ enable_mergejoin | on
+ enable_nestloop | on
+ enable_partition_wise_join | off
+ enable_seqscan | on
+ enable_sort | on
+ enable_tidscan | on
+(13 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail
# ----------
# Another group of parallel tests
# ----------
-test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock json jsonb json_encoding indirect_toast equivclass
+test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock json jsonb json_encoding indirect_toast equivclass partition_join
+
# ----------
# Another group of parallel tests
# NB: temp.sql does a reconnect which transiently uses 2 connections,
test: xml
test: event_trigger
test: stats
+test: partition_join
--- /dev/null
+--
+-- PARTITION_JOIN
+-- Test partition-wise join between partitioned tables
+--
+
+-- Enable partition-wise join, which by default is disabled.
+SET enable_partition_wise_join to true;
+
+--
+-- partitioned by a single column
+--
+CREATE TABLE prt1 (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE prt1_p1 PARTITION OF prt1 FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_p3 PARTITION OF prt1 FOR VALUES FROM (500) TO (600);
+CREATE TABLE prt1_p2 PARTITION OF prt1 FOR VALUES FROM (250) TO (500);
+INSERT INTO prt1 SELECT i, i % 25, to_char(i, 'FM0000') FROM generate_series(0, 599) i WHERE i % 2 = 0;
+CREATE INDEX iprt1_p1_a on prt1_p1(a);
+CREATE INDEX iprt1_p2_a on prt1_p2(a);
+CREATE INDEX iprt1_p3_a on prt1_p3(a);
+ANALYZE prt1;
+
+CREATE TABLE prt2 (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE prt2_p1 PARTITION OF prt2 FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_p2 PARTITION OF prt2 FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt2_p3 PARTITION OF prt2 FOR VALUES FROM (500) TO (600);
+INSERT INTO prt2 SELECT i % 25, i, to_char(i, 'FM0000') FROM generate_series(0, 599) i WHERE i % 3 = 0;
+CREATE INDEX iprt2_p1_b on prt2_p1(b);
+CREATE INDEX iprt2_p2_b on prt2_p2(b);
+CREATE INDEX iprt2_p3_b on prt2_p3(b);
+ANALYZE prt2;
+
+-- inner join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+
+-- left outer join, with whole-row reference
+EXPLAIN (COSTS OFF)
+SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+
+-- right outer join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+
+-- full outer join, with placeholder vars
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b) WHERE t1.phv = t1.a OR t2.phv = t2.b ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b) WHERE t1.phv = t1.a OR t2.phv = t2.b ORDER BY t1.a, t2.b;
+
+-- Join with pruned partitions from joining relations
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a < 450 AND t2.b > 250 AND t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a < 450 AND t2.b > 250 AND t1.b = 0 ORDER BY t1.a, t2.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 FULL JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 OR t2.a = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 FULL JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 OR t2.a = 0 ORDER BY t1.a, t2.b;
+
+-- Semi-join
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t2.b FROM prt2 t2 WHERE t2.a = 0) AND t1.b = 0 ORDER BY t1.a;
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t2.b FROM prt2 t2 WHERE t2.a = 0) AND t1.b = 0 ORDER BY t1.a;
+
+-- Anti-join with aggregates
+EXPLAIN (COSTS OFF)
+SELECT sum(t1.a), avg(t1.a), sum(t1.b), avg(t1.b) FROM prt1 t1 WHERE NOT EXISTS (SELECT 1 FROM prt2 t2 WHERE t1.a = t2.b);
+SELECT sum(t1.a), avg(t1.a), sum(t1.b), avg(t1.b) FROM prt1 t1 WHERE NOT EXISTS (SELECT 1 FROM prt2 t2 WHERE t1.a = t2.b);
+
+-- lateral reference
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1 t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t3.a AS t3a, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+ ON t1.a = ss.t2a WHERE t1.b = 0 ORDER BY t1.a;
+SELECT * FROM prt1 t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t3.a AS t3a, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+ ON t1.a = ss.t2a WHERE t1.b = 0 ORDER BY t1.a;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+ ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
+SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
+ ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
+
+--
+-- partitioned by expression
+--
+CREATE TABLE prt1_e (a int, b int, c int) PARTITION BY RANGE(((a + b)/2));
+CREATE TABLE prt1_e_p1 PARTITION OF prt1_e FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_e_p2 PARTITION OF prt1_e FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt1_e_p3 PARTITION OF prt1_e FOR VALUES FROM (500) TO (600);
+INSERT INTO prt1_e SELECT i, i, i % 25 FROM generate_series(0, 599, 2) i;
+CREATE INDEX iprt1_e_p1_ab2 on prt1_e_p1(((a+b)/2));
+CREATE INDEX iprt1_e_p2_ab2 on prt1_e_p2(((a+b)/2));
+CREATE INDEX iprt1_e_p3_ab2 on prt1_e_p3(((a+b)/2));
+ANALYZE prt1_e;
+
+CREATE TABLE prt2_e (a int, b int, c int) PARTITION BY RANGE(((b + a)/2));
+CREATE TABLE prt2_e_p1 PARTITION OF prt2_e FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_e_p2 PARTITION OF prt2_e FOR VALUES FROM (250) TO (500);
+CREATE TABLE prt2_e_p3 PARTITION OF prt2_e FOR VALUES FROM (500) TO (600);
+INSERT INTO prt2_e SELECT i, i, i % 25 FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_e;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_e t1, prt2_e t2 WHERE (t1.a + t1.b)/2 = (t2.b + t2.a)/2 AND t1.c = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_e t1, prt2_e t2 WHERE (t1.a + t1.b)/2 = (t2.b + t2.a)/2 AND t1.c = 0 ORDER BY t1.a, t2.b;
+
+--
+-- N-way join
+--
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM prt1 t1, prt2 t2, prt1_e t3 WHERE t1.a = t2.b AND t1.a = (t3.a + t3.b)/2 AND t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM prt1 t1, prt2 t2, prt1_e t3 WHERE t1.a = t2.b AND t1.a = (t3.a + t3.b)/2 AND t1.b = 0 ORDER BY t1.a, t2.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) LEFT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.b = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) LEFT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.b = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+
+-- Cases with non-nullable expressions in subquery results;
+-- make sure these go to null as expected
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.phv, t2.b, t2.phv, t3.a + t3.b, t3.phv FROM ((SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b)) FULL JOIN (SELECT 50 phv, * FROM prt1_e WHERE prt1_e.c = 0) t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.a = t1.phv OR t2.b = t2.phv OR (t3.a + t3.b)/2 = t3.phv ORDER BY t1.a, t2.b, t3.a + t3.b;
+SELECT t1.a, t1.phv, t2.b, t2.phv, t3.a + t3.b, t3.phv FROM ((SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b)) FULL JOIN (SELECT 50 phv, * FROM prt1_e WHERE prt1_e.c = 0) t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.a = t1.phv OR t2.b = t2.phv OR (t3.a + t3.b)/2 = t3.phv ORDER BY t1.a, t2.b, t3.a + t3.b;
+
+-- Semi-join
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHERE t1.a = 0 AND t1.b = (t2.a + t2.b)/2) AND t1.b = 0 ORDER BY t1.a;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+
+-- test merge joins
+SET enable_hashjoin TO off;
+SET enable_nestloop TO off;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
+
+-- MergeAppend on nullable column
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+
+RESET enable_hashjoin;
+RESET enable_nestloop;
+
+--
+-- partitioned by multiple columns
+--
+CREATE TABLE prt1_m (a int, b int, c int) PARTITION BY RANGE(a, ((a + b)/2));
+CREATE TABLE prt1_m_p1 PARTITION OF prt1_m FOR VALUES FROM (0, 0) TO (250, 250);
+CREATE TABLE prt1_m_p2 PARTITION OF prt1_m FOR VALUES FROM (250, 250) TO (500, 500);
+CREATE TABLE prt1_m_p3 PARTITION OF prt1_m FOR VALUES FROM (500, 500) TO (600, 600);
+INSERT INTO prt1_m SELECT i, i, i % 25 FROM generate_series(0, 599, 2) i;
+ANALYZE prt1_m;
+
+CREATE TABLE prt2_m (a int, b int, c int) PARTITION BY RANGE(((b + a)/2), b);
+CREATE TABLE prt2_m_p1 PARTITION OF prt2_m FOR VALUES FROM (0, 0) TO (250, 250);
+CREATE TABLE prt2_m_p2 PARTITION OF prt2_m FOR VALUES FROM (250, 250) TO (500, 500);
+CREATE TABLE prt2_m_p3 PARTITION OF prt2_m FOR VALUES FROM (500, 500) TO (600, 600);
+INSERT INTO prt2_m SELECT i, i, i % 25 FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_m;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_m WHERE prt1_m.c = 0) t1 FULL JOIN (SELECT * FROM prt2_m WHERE prt2_m.c = 0) t2 ON (t1.a = (t2.b + t2.a)/2 AND t2.b = (t1.a + t1.b)/2) ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_m WHERE prt1_m.c = 0) t1 FULL JOIN (SELECT * FROM prt2_m WHERE prt2_m.c = 0) t2 ON (t1.a = (t2.b + t2.a)/2 AND t2.b = (t1.a + t1.b)/2) ORDER BY t1.a, t2.b;
+
+--
+-- tests for list partitioned tables.
+--
+CREATE TABLE plt1 (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE plt1_p1 PARTITION OF plt1 FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt1_p2 PARTITION OF plt1 FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt1_p3 PARTITION OF plt1 FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE plt1;
+
+CREATE TABLE plt2 (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE plt2_p1 PARTITION OF plt2 FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt2_p2 PARTITION OF plt2 FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt2_p3 PARTITION OF plt2 FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 3) i;
+ANALYZE plt2;
+
+--
+-- list partitioned by expression
+--
+CREATE TABLE plt1_e (a int, b int, c text) PARTITION BY LIST(ltrim(c, 'A'));
+CREATE TABLE plt1_e_p1 PARTITION OF plt1_e FOR VALUES IN ('0000', '0003', '0004', '0010');
+CREATE TABLE plt1_e_p2 PARTITION OF plt1_e FOR VALUES IN ('0001', '0005', '0002', '0009');
+CREATE TABLE plt1_e_p3 PARTITION OF plt1_e FOR VALUES IN ('0006', '0007', '0008', '0011');
+INSERT INTO plt1_e SELECT i, i, 'A' || to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE plt1_e;
+
+-- test partition matching with N-way join
+EXPLAIN (COSTS OFF)
+SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, plt2 t2, plt1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c;
+SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, plt2 t2, plt1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c;
+
+-- joins where one of the relations is proven empty
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a = 1 AND t1.a = 2;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 LEFT JOIN prt2 t2 ON t1.a = t2.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 FULL JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+
+--
+-- multiple levels of partitioning
+--
+CREATE TABLE prt1_l (a int, b int, c varchar) PARTITION BY RANGE(a);
+CREATE TABLE prt1_l_p1 PARTITION OF prt1_l FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt1_l_p2 PARTITION OF prt1_l FOR VALUES FROM (250) TO (500) PARTITION BY LIST (c);
+CREATE TABLE prt1_l_p2_p1 PARTITION OF prt1_l_p2 FOR VALUES IN ('0000', '0001');
+CREATE TABLE prt1_l_p2_p2 PARTITION OF prt1_l_p2 FOR VALUES IN ('0002', '0003');
+CREATE TABLE prt1_l_p3 PARTITION OF prt1_l FOR VALUES FROM (500) TO (600) PARTITION BY RANGE (b);
+CREATE TABLE prt1_l_p3_p1 PARTITION OF prt1_l_p3 FOR VALUES FROM (0) TO (13);
+CREATE TABLE prt1_l_p3_p2 PARTITION OF prt1_l_p3 FOR VALUES FROM (13) TO (25);
+INSERT INTO prt1_l SELECT i, i % 25, to_char(i % 4, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt1_l;
+
+CREATE TABLE prt2_l (a int, b int, c varchar) PARTITION BY RANGE(b);
+CREATE TABLE prt2_l_p1 PARTITION OF prt2_l FOR VALUES FROM (0) TO (250);
+CREATE TABLE prt2_l_p2 PARTITION OF prt2_l FOR VALUES FROM (250) TO (500) PARTITION BY LIST (c);
+CREATE TABLE prt2_l_p2_p1 PARTITION OF prt2_l_p2 FOR VALUES IN ('0000', '0001');
+CREATE TABLE prt2_l_p2_p2 PARTITION OF prt2_l_p2 FOR VALUES IN ('0002', '0003');
+CREATE TABLE prt2_l_p3 PARTITION OF prt2_l FOR VALUES FROM (500) TO (600) PARTITION BY RANGE (a);
+CREATE TABLE prt2_l_p3_p1 PARTITION OF prt2_l_p3 FOR VALUES FROM (0) TO (13);
+CREATE TABLE prt2_l_p3_p2 PARTITION OF prt2_l_p3 FOR VALUES FROM (13) TO (25);
+INSERT INTO prt2_l SELECT i % 25, i, to_char(i % 4, 'FM0000') FROM generate_series(0, 599, 3) i;
+ANALYZE prt2_l;
+
+-- inner join, qual covering only top-level partitions
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1, prt2_l t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1, prt2_l t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
+
+-- left join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 LEFT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 LEFT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+
+-- right join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.c = t2.c WHERE t2.a = 0 ORDER BY t1.a, t2.b;
+
+-- full join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE prt1_l.b = 0) t1 FULL JOIN (SELECT * FROM prt2_l WHERE prt2_l.a = 0) t2 ON (t1.a = t2.b AND t1.c = t2.c) ORDER BY t1.a, t2.b;
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE prt1_l.b = 0) t1 FULL JOIN (SELECT * FROM prt2_l WHERE prt2_l.a = 0) t2 ON (t1.a = t2.b AND t1.c = t2.c) ORDER BY t1.a, t2.b;
+
+-- lateral partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss
+ ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a;
+SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
+ (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss
+ ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a;
+
+-- join with one side empty
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;
+
+--
+-- negative testcases
+--
+CREATE TABLE prt1_n (a int, b int, c varchar) PARTITION BY RANGE(c);
+CREATE TABLE prt1_n_p1 PARTITION OF prt1_n FOR VALUES FROM ('0000') TO ('0250');
+CREATE TABLE prt1_n_p2 PARTITION OF prt1_n FOR VALUES FROM ('0250') TO ('0500');
+INSERT INTO prt1_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(0, 499, 2) i;
+ANALYZE prt1_n;
+
+CREATE TABLE prt2_n (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE prt2_n_p1 PARTITION OF prt2_n FOR VALUES IN ('0000', '0003', '0004', '0010', '0006', '0007');
+CREATE TABLE prt2_n_p2 PARTITION OF prt2_n FOR VALUES IN ('0001', '0005', '0002', '0009', '0008', '0011');
+INSERT INTO prt2_n SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt2_n;
+
+CREATE TABLE prt3_n (a int, b int, c text) PARTITION BY LIST(c);
+CREATE TABLE prt3_n_p1 PARTITION OF prt3_n FOR VALUES IN ('0000', '0004', '0006', '0007');
+CREATE TABLE prt3_n_p2 PARTITION OF prt3_n FOR VALUES IN ('0001', '0002', '0008', '0010');
+CREATE TABLE prt3_n_p3 PARTITION OF prt3_n FOR VALUES IN ('0003', '0005', '0009', '0011');
+INSERT INTO prt2_n SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt3_n;
+
+CREATE TABLE prt4_n (a int, b int, c text) PARTITION BY RANGE(a);
+CREATE TABLE prt4_n_p1 PARTITION OF prt4_n FOR VALUES FROM (0) TO (300);
+CREATE TABLE prt4_n_p2 PARTITION OF prt4_n FOR VALUES FROM (300) TO (500);
+CREATE TABLE prt4_n_p3 PARTITION OF prt4_n FOR VALUES FROM (500) TO (600);
+INSERT INTO prt4_n SELECT i, i, to_char(i, 'FM0000') FROM generate_series(0, 599, 2) i;
+ANALYZE prt4_n;
+
+-- partition-wise join can not be applied if the partition ranges differ
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt4_n t2 WHERE t1.a = t2.a;
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt4_n t2, prt2 t3 WHERE t1.a = t2.a and t1.a = t3.b;
+
+-- partition-wise join can not be applied if there are no equi-join conditions
+-- between partition keys
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1 LEFT JOIN prt2 t2 ON (t1.a < t2.b);
+
+-- equi-join with join condition on partial keys does not qualify for
+-- partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1, prt2_m t2 WHERE t1.a = (t2.b + t2.a)/2;
+
+-- equi-join between out-of-order partition key columns does not qualify for
+-- partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1 LEFT JOIN prt2_m t2 ON t1.a = t2.b;
+
+-- equi-join between non-key columns does not qualify for partition-wise join
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_m t1 LEFT JOIN prt2_m t2 ON t1.c = t2.c;
+
+-- partition-wise join can not be applied between tables with different
+-- partition lists
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 LEFT JOIN prt2_n t2 ON (t1.c = t2.c);
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 JOIN prt2_n t2 ON (t1.c = t2.c) JOIN plt1 t3 ON (t1.c = t3.c);
+
+-- partition-wise join can not be applied for a join between list and range
+-- partitioned table
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_n t1 FULL JOIN prt1 t2 ON (t1.c = t2.c);