ALTER TABLE fprt2_p2 SET (autovacuum_enabled = 'false');
INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i;
INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i;
-CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250)
+CREATE FOREIGN TABLE ftprt2_p1 (b int, c varchar, a int)
SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true');
+ALTER TABLE fprt2 ATTACH PARTITION ftprt2_p1 FOR VALUES FROM (0) TO (250);
CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500)
SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true');
ANALYZE fprt2;
8 | |
(5 rows)
--- with whole-row reference
+-- with whole-row reference; partitionwise join does not apply
EXPLAIN (COSTS OFF)
-SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
- QUERY PLAN
----------------------------------------------------------------------------------
+SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2;
+ QUERY PLAN
+--------------------------------------------------------
Sort
Sort Key: ((t1.*)::fprt1), ((t2.*)::fprt2)
- -> Append
- -> Foreign Scan
- Relations: (public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2)
- -> Foreign Scan
- Relations: (public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2)
-(7 rows)
+ -> Hash Full Join
+ Hash Cond: (t1.a = t2.b)
+ -> Append
+ -> Foreign Scan on ftprt1_p1 t1
+ -> Foreign Scan on ftprt1_p2 t1_1
+ -> Hash
+ -> Append
+ -> Foreign Scan on ftprt2_p1 t2
+ -> Foreign Scan on ftprt2_p2 t2_1
+(11 rows)
-SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
- t1 | t2
+SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2;
+ wr | wr
----------------+----------------
(0,0,0000) | (0,0,0000)
+ (50,50,0001) |
+ (100,100,0002) |
(150,150,0003) | (150,150,0003)
+ (200,200,0004) |
(250,250,0005) | (250,250,0005)
+ (300,300,0006) |
+ (350,350,0007) |
(400,400,0008) | (400,400,0008)
-(4 rows)
+ (450,450,0009) |
+ | (75,75,0001)
+ | (225,225,0004)
+ | (325,325,0006)
+ | (475,475,0009)
+(14 rows)
-- join with lateral reference
EXPLAIN (COSTS OFF)
| | 475 | t2_phv
(14 rows)
+-- test FOR UPDATE; partitionwise join does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
+ QUERY PLAN
+--------------------------------------------------------------
+ LockRows
+ -> Sort
+ Sort Key: t1.a
+ -> Hash Join
+ Hash Cond: (t2.b = t1.a)
+ -> Append
+ -> Foreign Scan on ftprt2_p1 t2
+ -> Foreign Scan on ftprt2_p2 t2_1
+ -> Hash
+ -> Append
+ -> Foreign Scan on ftprt1_p1 t1
+ -> Foreign Scan on ftprt1_p2 t1_1
+(12 rows)
+
+SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
+ a | b
+-----+-----
+ 0 | 0
+ 150 | 150
+ 250 | 250
+ 400 | 400
+(4 rows)
+
RESET enable_partitionwise_join;
-- ===================================================================
-- test partitionwise aggregates
ALTER TABLE fprt2_p2 SET (autovacuum_enabled = 'false');
INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i;
INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i;
-CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250)
+CREATE FOREIGN TABLE ftprt2_p1 (b int, c varchar, a int)
SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true');
+ALTER TABLE fprt2 ATTACH PARTITION ftprt2_p1 FOR VALUES FROM (0) TO (250);
CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500)
SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true');
ANALYZE fprt2;
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
--- with whole-row reference
+-- with whole-row reference; partitionwise join does not apply
EXPLAIN (COSTS OFF)
-SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
-SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
+SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2;
+SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2;
-- join with lateral reference
EXPLAIN (COSTS OFF)
SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE a % 25 = 0) t1 FULL JOIN (SELECT 't2_phv' phv, * FROM fprt2 WHERE b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY t1.a, t2.b;
SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE a % 25 = 0) t1 FULL JOIN (SELECT 't2_phv' phv, * FROM fprt2 WHERE b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY t1.a, t2.b;
+-- test FOR UPDATE; partitionwise join does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
+SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
+
RESET enable_partitionwise_join;
WRITE_UINT_FIELD(baserestrict_min_security);
WRITE_NODE_FIELD(joininfo);
WRITE_BOOL_FIELD(has_eclass_joins);
+ WRITE_BOOL_FIELD(consider_partitionwise_join);
WRITE_BITMAPSET_FIELD(top_parent_relids);
WRITE_NODE_FIELD(partitioned_child_rels);
}
did_pruning = true;
}
+ /*
+ * If this is a partitioned baserel, set the consider_partitionwise_join
+ * flag; currently, we only consider partitionwise joins with the baserel
+ * if its targetlist doesn't contain a whole-row Var.
+ */
+ if (enable_partitionwise_join &&
+ rel->reloptkind == RELOPT_BASEREL &&
+ rte->relkind == RELKIND_PARTITIONED_TABLE &&
+ rel->attr_needed[InvalidAttrNumber - rel->min_attr] == NULL)
+ rel->consider_partitionwise_join = true;
+
/*
* Initialize to compute size estimates for whole append relation.
*
childrel = find_base_rel(root, childRTindex);
Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
- if (rel->part_scheme)
- {
- AttrNumber attno;
-
- /*
- * We need attr_needed data for building targetlist of a join
- * relation representing join between matching partitions for
- * partitionwise join. A given attribute of a child will be needed
- * in the same highest joinrel where the corresponding attribute
- * of parent is needed. Hence it suffices to use the same Relids
- * set for parent and child.
- */
- for (attno = rel->min_attr; attno <= rel->max_attr; attno++)
- {
- int index = attno - rel->min_attr;
- Relids attr_needed = rel->attr_needed[index];
-
- /* System attributes do not need translation. */
- if (attno <= 0)
- {
- Assert(rel->min_attr == childrel->min_attr);
- childrel->attr_needed[index] = attr_needed;
- }
- else
- {
- Var *var = list_nth_node(Var,
- appinfo->translated_vars,
- attno - 1);
- int child_index;
-
- /*
- * Ignore any column dropped from the parent.
- * Corresponding Var won't have any translation. It won't
- * have attr_needed information, since it can not be
- * referenced in the query.
- */
- if (var == NULL)
- {
- Assert(attr_needed == NULL);
- continue;
- }
-
- child_index = var->varattno - childrel->min_attr;
- childrel->attr_needed[child_index] = attr_needed;
- }
- }
- }
-
/*
* Copy/Modify targetlist. Even if this child is deemed empty, we need
* its targetlist in case it falls on nullable side in a child-join
(Node *) rel->joininfo,
1, &appinfo);
+ /*
+ * Note: we could compute appropriate attr_needed data for the child's
+ * variables, by transforming the parent's attr_needed through the
+ * translated_vars mapping. However, currently there's no need
+ * because attr_needed is only examined for base relations not
+ * otherrels. So we just leave the child's attr_needed empty.
+ */
+
+ /*
+ * If we consider partitionwise joins with the parent rel, do the same
+ * for partitioned child rels.
+ */
+ if (rel->consider_partitionwise_join &&
+ childRTE->relkind == RELKIND_PARTITIONED_TABLE)
+ childrel->consider_partitionwise_join = true;
+
/*
* If parallelism is allowable for this query in general, see whether
* it's allowable for this childrel in particular. But if we've
if (!IS_PARTITIONED_REL(rel))
return;
+ /* The relation should have consider_partitionwise_join set. */
+ Assert(rel->consider_partitionwise_join);
+
/* Guard against stack overflow due to overly deep partition hierarchy. */
check_stack_depth();
if (!IS_PARTITIONED_REL(joinrel))
return;
+ /* The join relation should have consider_partitionwise_join set. */
+ Assert(joinrel->consider_partitionwise_join);
+
/*
* Since this join relation is partitioned, all the base relations
* participating in this join must be partitioned and so are all the
Assert(IS_PARTITIONED_REL(rel1) && IS_PARTITIONED_REL(rel2));
Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2));
+ /* The joining relations should have consider_partitionwise_join set. */
+ Assert(rel1->consider_partitionwise_join &&
+ rel2->consider_partitionwise_join);
+
/*
* The partition scheme of the join relation should match that of the
* joining relations.
int num_vars; /* number of plain Var tlist entries */
bool has_ph_vars; /* are there PlaceHolderVar entries? */
bool has_non_vars; /* are there other entries? */
- bool has_conv_whole_rows; /* are there ConvertRowtypeExpr
- * entries encapsulating a whole-row
- * Var? */
tlist_vinfo vars[FLEXIBLE_ARRAY_MEMBER]; /* has num_vars entries */
} indexed_tlist;
int rtoffset);
static bool extract_query_dependencies_walker(Node *node,
PlannerInfo *context);
-static bool is_converted_whole_row_reference(Node *node);
/*****************************************************************************
*
itlist->tlist = tlist;
itlist->has_ph_vars = false;
itlist->has_non_vars = false;
- itlist->has_conv_whole_rows = false;
/* Find the Vars and fill in the index array */
vinfo = itlist->vars;
}
else if (tle->expr && IsA(tle->expr, PlaceHolderVar))
itlist->has_ph_vars = true;
- else if (is_converted_whole_row_reference((Node *) tle->expr))
- itlist->has_conv_whole_rows = true;
else
itlist->has_non_vars = true;
}
* This is like build_tlist_index, but we only index tlist entries that
* are Vars belonging to some rel other than the one specified. We will set
* has_ph_vars (allowing PlaceHolderVars to be matched), but not has_non_vars
- * (so nothing other than Vars and PlaceHolderVars can be matched). In case of
- * DML, where this function will be used, returning lists from child relations
- * will be appended similar to a simple append relation. That does not require
- * fixing ConvertRowtypeExpr references. So, those are not considered here.
+ * (so nothing other than Vars and PlaceHolderVars can be matched).
*/
static indexed_tlist *
build_tlist_index_other_vars(List *tlist, Index ignore_rel)
itlist->tlist = tlist;
itlist->has_ph_vars = false;
itlist->has_non_vars = false;
- itlist->has_conv_whole_rows = false;
/* Find the desired Vars and fill in the index array */
vinfo = itlist->vars;
fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
{
Var *newvar;
- bool converted_whole_row;
if (node == NULL)
return NULL;
}
if (IsA(node, Param))
return fix_param_node(context->root, (Param *) node);
-
/* Try matching more complex expressions too, if tlists have any */
- converted_whole_row = is_converted_whole_row_reference(node);
- if (context->outer_itlist &&
- (context->outer_itlist->has_non_vars ||
- (context->outer_itlist->has_conv_whole_rows && converted_whole_row)))
+ if (context->outer_itlist && context->outer_itlist->has_non_vars)
{
newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->outer_itlist,
if (newvar)
return (Node *) newvar;
}
- if (context->inner_itlist &&
- (context->inner_itlist->has_non_vars ||
- (context->inner_itlist->has_conv_whole_rows && converted_whole_row)))
+ if (context->inner_itlist && context->inner_itlist->has_non_vars)
{
newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->inner_itlist,
/* If no match, just fall through to process it normally */
}
/* Try matching more complex expressions too, if tlist has any */
- if (context->subplan_itlist->has_non_vars ||
- (context->subplan_itlist->has_conv_whole_rows &&
- is_converted_whole_row_reference(node)))
+ if (context->subplan_itlist->has_non_vars)
{
newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->subplan_itlist,
return expression_tree_walker(node, extract_query_dependencies_walker,
(void *) context);
}
-
-/*
- * is_converted_whole_row_reference
- * If the given node is a ConvertRowtypeExpr encapsulating a whole-row
- * reference as implicit cast, return true. Otherwise return false.
- */
-static bool
-is_converted_whole_row_reference(Node *node)
-{
- ConvertRowtypeExpr *convexpr;
-
- if (!node || !IsA(node, ConvertRowtypeExpr))
- return false;
-
- /* Traverse nested ConvertRowtypeExpr's. */
- convexpr = castNode(ConvertRowtypeExpr, node);
- while (convexpr->convertformat == COERCE_IMPLICIT_CAST &&
- IsA(convexpr->arg, ConvertRowtypeExpr))
- convexpr = castNode(ConvertRowtypeExpr, convexpr->arg);
-
- if (IsA(convexpr->arg, Var))
- {
- Var *var = castNode(Var, convexpr->arg);
-
- if (var->varattno == 0)
- return true;
- }
-
- return false;
-}
#include "optimizer/pathnode.h"
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
-#include "optimizer/prep.h"
#include "optimizer/var.h"
#include "utils/lsyscache.h"
Relids relids = joinrel->relids;
ListCell *lc;
- /* This function is called only on the parent relations. */
- Assert(!IS_OTHER_REL(joinrel) && !IS_OTHER_REL(outer_rel) &&
- !IS_OTHER_REL(inner_rel));
-
foreach(lc, root->placeholder_list)
{
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
}
}
}
-
-/*
- * add_placeholders_to_child_joinrel
- * Translate the PHVs in parent's targetlist and add them to the child's
- * targetlist. Also adjust the cost
- */
-void
-add_placeholders_to_child_joinrel(PlannerInfo *root, RelOptInfo *childrel,
- RelOptInfo *parentrel)
-{
- ListCell *lc;
- AppendRelInfo **appinfos;
- int nappinfos;
-
- Assert(IS_JOIN_REL(childrel) && IS_JOIN_REL(parentrel));
- Assert(IS_OTHER_REL(childrel));
-
- /* Nothing to do if no PHVs. */
- if (root->placeholder_list == NIL)
- return;
-
- appinfos = find_appinfos_by_relids(root, childrel->relids, &nappinfos);
- foreach(lc, parentrel->reltarget->exprs)
- {
- PlaceHolderVar *phv = lfirst(lc);
-
- if (IsA(phv, PlaceHolderVar))
- {
- /*
- * In case the placeholder Var refers to any of the parent
- * relations, translate it to refer to the corresponding child.
- */
- if (bms_overlap(phv->phrels, parentrel->relids) &&
- childrel->reloptkind == RELOPT_OTHER_JOINREL)
- {
- phv = (PlaceHolderVar *) adjust_appendrel_attrs(root,
- (Node *) phv,
- nappinfos,
- appinfos);
- }
-
- childrel->reltarget->exprs = lappend(childrel->reltarget->exprs,
- phv);
- }
- }
-
- /* Adjust the cost and width of child targetlist. */
- childrel->reltarget->cost.startup = parentrel->reltarget->cost.startup;
- childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple;
- childrel->reltarget->width = parentrel->reltarget->width;
-
- pfree(appinfos);
-}
static void build_joinrel_partition_info(RelOptInfo *joinrel,
RelOptInfo *outer_rel, RelOptInfo *inner_rel,
List *restrictlist, JoinType jointype);
+static void build_child_join_reltarget(PlannerInfo *root,
+ RelOptInfo *parentrel,
+ RelOptInfo *childrel,
+ int nappinfos,
+ AppendRelInfo **appinfos);
/*
rel->baserestrict_min_security = UINT_MAX;
rel->joininfo = NIL;
rel->has_eclass_joins = false;
+ rel->consider_partitionwise_join = false; /* might get changed later */
rel->part_scheme = NULL;
rel->nparts = 0;
rel->boundinfo = NULL;
joinrel->baserestrict_min_security = UINT_MAX;
joinrel->joininfo = NIL;
joinrel->has_eclass_joins = false;
+ joinrel->consider_partitionwise_join = false; /* might get changed later */
joinrel->top_parent_relids = NULL;
joinrel->part_scheme = NULL;
joinrel->nparts = 0;
/* Only joins between "other" relations land here. */
Assert(IS_OTHER_REL(outer_rel) && IS_OTHER_REL(inner_rel));
+ /* The parent joinrel should have consider_partitionwise_join set. */
+ Assert(parent_joinrel->consider_partitionwise_join);
+
joinrel->reloptkind = RELOPT_OTHER_JOINREL;
joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids);
joinrel->rows = 0;
joinrel->baserestrictcost.per_tuple = 0;
joinrel->joininfo = NIL;
joinrel->has_eclass_joins = false;
+ joinrel->consider_partitionwise_join = false; /* might get changed later */
joinrel->top_parent_relids = NULL;
joinrel->part_scheme = NULL;
joinrel->nparts = 0;
/* Compute information relevant to foreign relations. */
set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
- /* Build targetlist */
- build_joinrel_tlist(root, joinrel, outer_rel);
- build_joinrel_tlist(root, joinrel, inner_rel);
- /* Add placeholder variables. */
- add_placeholders_to_child_joinrel(root, joinrel, parent_joinrel);
+ appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos);
+
+ /* Set up reltarget struct */
+ build_child_join_reltarget(root, parent_joinrel, joinrel,
+ nappinfos, appinfos);
/* Construct joininfo list. */
- appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos);
joinrel->joininfo = (List *) adjust_appendrel_attrs(root,
(Node *) parent_joinrel->joininfo,
nappinfos,
/* Child joinrel is parallel safe if parent is parallel safe. */
joinrel->consider_parallel = parent_joinrel->consider_parallel;
-
/* Set estimates of the child-joinrel's size. */
set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
sjinfo, restrictlist);
build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *input_rel)
{
- Relids relids;
+ Relids relids = joinrel->relids;
ListCell *vars;
- /* attrs_needed refers to parent relids and not those of a child. */
- if (joinrel->top_parent_relids)
- relids = joinrel->top_parent_relids;
- else
- relids = joinrel->relids;
-
foreach(vars, input_rel->reltarget->exprs)
{
Var *var = (Var *) lfirst(vars);
/*
* Otherwise, anything in a baserel or joinrel targetlist ought to be
- * a Var. Children of a partitioned table may have ConvertRowtypeExpr
- * translating whole-row Var of a child to that of the parent.
- * Children of an inherited table or subquery child rels can not
- * directly participate in a join, so other kinds of nodes here.
+ * a Var. (More general cases can only appear in appendrel child
+ * rels, which will never be seen here.)
*/
- if (IsA(var, Var))
- {
- baserel = find_base_rel(root, var->varno);
- ndx = var->varattno - baserel->min_attr;
- }
- else if (IsA(var, ConvertRowtypeExpr))
- {
- ConvertRowtypeExpr *child_expr = (ConvertRowtypeExpr *) var;
- Var *childvar = (Var *) child_expr->arg;
-
- /*
- * Child's whole-row references are converted to look like those
- * of parent using ConvertRowtypeExpr. There can be as many
- * ConvertRowtypeExpr decorations as the depth of partition tree.
- * The argument to the deepest ConvertRowtypeExpr is expected to
- * be a whole-row reference of the child.
- */
- while (IsA(childvar, ConvertRowtypeExpr))
- {
- child_expr = (ConvertRowtypeExpr *) childvar;
- childvar = (Var *) child_expr->arg;
- }
- Assert(IsA(childvar, Var) &&childvar->varattno == 0);
-
- baserel = find_base_rel(root, childvar->varno);
- ndx = 0 - baserel->min_attr;
- }
- else
+ if (!IsA(var, Var))
elog(ERROR, "unexpected node type in rel targetlist: %d",
(int) nodeTag(var));
+ /* Get the Var's original base rel */
+ baserel = find_base_rel(root, var->varno);
- /* Is the target expression still needed above this joinrel? */
+ /* Is it still needed above this joinrel? */
+ ndx = var->varattno - baserel->min_attr;
if (bms_nonempty_difference(baserel->attr_needed[ndx], relids))
{
/* Yup, add it to the output */
joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, var);
-
- /*
- * Vars have cost zero, so no need to adjust reltarget->cost. Even
- * if it's a ConvertRowtypeExpr, it will be computed only for the
- * base relation, costing nothing for a join.
- */
+ /* Vars have cost zero, so no need to adjust reltarget->cost */
joinrel->reltarget->width += baserel->attr_widths[ndx];
}
}
/*
* We can only consider this join as an input to further partitionwise
- * joins if (a) the input relations are partitioned, (b) the partition
- * schemes match, and (c) we can identify an equi-join between the
- * partition keys. Note that if it were possible for
- * have_partkey_equi_join to return different answers for the same joinrel
- * depending on which join ordering we try first, this logic would break.
- * That shouldn't happen, though, because of the way the query planner
- * deduces implied equalities and reorders the joins. Please see
- * optimizer/README for details.
+ * joins if (a) the input relations are partitioned and have
+ * consider_partitionwise_join=true, (b) the partition schemes match, and
+ * (c) we can identify an equi-join between the partition keys. Note that
+ * if it were possible for have_partkey_equi_join to return different
+ * answers for the same joinrel depending on which join ordering we try
+ * first, this logic would break. That shouldn't happen, though, because
+ * of the way the query planner deduces implied equalities and reorders
+ * the joins. Please see optimizer/README for details.
*/
if (!IS_PARTITIONED_REL(outer_rel) || !IS_PARTITIONED_REL(inner_rel) ||
+ !outer_rel->consider_partitionwise_join ||
+ !inner_rel->consider_partitionwise_join ||
outer_rel->part_scheme != inner_rel->part_scheme ||
!have_partkey_equi_join(joinrel, outer_rel, inner_rel,
jointype, restrictlist))
joinrel->part_rels =
(RelOptInfo **) palloc0(sizeof(RelOptInfo *) * joinrel->nparts);
+ /*
+ * Set the consider_partitionwise_join flag.
+ */
+ Assert(outer_rel->consider_partitionwise_join);
+ Assert(inner_rel->consider_partitionwise_join);
+ joinrel->consider_partitionwise_join = true;
/*
* Construct partition keys for the join.
joinrel->nullable_partexprs[cnt] = nullable_partexpr;
}
}
+
+/*
+ * build_child_join_reltarget
+ * Set up a child-join relation's reltarget from a parent-join relation.
+ */
+static void
+build_child_join_reltarget(PlannerInfo *root,
+ RelOptInfo *parentrel,
+ RelOptInfo *childrel,
+ int nappinfos,
+ AppendRelInfo **appinfos)
+{
+ /* Build the targetlist */
+ childrel->reltarget->exprs = (List *)
+ adjust_appendrel_attrs(root,
+ (Node *) parentrel->reltarget->exprs,
+ nappinfos, appinfos);
+
+ /* Set the cost and width fields */
+ childrel->reltarget->cost.startup = parentrel->reltarget->cost.startup;
+ childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple;
+ childrel->reltarget->width = parentrel->reltarget->width;
+}
* involving this rel */
bool has_eclass_joins; /* T means joininfo is incomplete */
- /* used by "other" relations */
- Relids top_parent_relids; /* Relids of topmost parents */
+ /* used by partitionwise joins: */
+ bool consider_partitionwise_join; /* consider partitionwise
+ * join paths? (if
+ * partitioned rel) */
+ Relids top_parent_relids; /* Relids of topmost parents (if "other"
+ * rel) */
/* used for partitioned relations */
PartitionScheme part_scheme; /* Partitioning scheme. */
24 | 900 | 100
(5 rows)
+-- Check with whole-row reference; partitionwise aggregation does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
+ QUERY PLAN
+-------------------------------------------------------------
+ Sort
+ Sort Key: t1.x, (sum(t1.y)), (count(((t1.*)::pagg_tab1)))
+ -> HashAggregate
+ Group Key: t1.x
+ -> Hash Join
+ Hash Cond: (t1.x = t2.y)
+ -> Append
+ -> Seq Scan on pagg_tab1_p1 t1
+ -> Seq Scan on pagg_tab1_p2 t1_1
+ -> Seq Scan on pagg_tab1_p3 t1_2
+ -> Hash
+ -> Append
+ -> Seq Scan on pagg_tab2_p1 t2
+ -> Seq Scan on pagg_tab2_p2 t2_1
+ -> Seq Scan on pagg_tab2_p3 t2_2
+(15 rows)
+
+SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
+ x | sum | count
+----+------+-------
+ 0 | 500 | 100
+ 6 | 1100 | 100
+ 12 | 700 | 100
+ 18 | 1300 | 100
+ 24 | 900 | 100
+(5 rows)
+
-- GROUP BY having other matching key
EXPLAIN (COSTS OFF)
SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3;
450 | 0450 | 450 | 0450
(4 rows)
--- left outer join, with whole-row reference
+-- left outer join, with whole-row reference; partitionwise join does not apply
EXPLAIN (COSTS OFF)
SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
QUERY PLAN
--------------------------------------------------
Sort
Sort Key: t1.a, t2.b
- -> Append
- -> Hash Right Join
- Hash Cond: (t2.b = t1.a)
+ -> Hash Right Join
+ Hash Cond: (t2.b = t1.a)
+ -> Append
-> Seq Scan on prt2_p1 t2
- -> Hash
+ -> Seq Scan on prt2_p2 t2_1
+ -> Seq Scan on prt2_p3 t2_2
+ -> Hash
+ -> Append
-> Seq Scan on prt1_p1 t1
Filter: (b = 0)
- -> Hash Right Join
- Hash Cond: (t2_1.b = t1_1.a)
- -> Seq Scan on prt2_p2 t2_1
- -> Hash
-> Seq Scan on prt1_p2 t1_1
Filter: (b = 0)
- -> Hash Right Join
- Hash Cond: (t2_2.b = t1_2.a)
- -> Seq Scan on prt2_p3 t2_2
- -> Hash
-> Seq Scan on prt1_p3 t1_2
Filter: (b = 0)
-(21 rows)
+(16 rows)
SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
t1 | t2
400 |
(9 rows)
+-- merge join when expression with whole-row reference needs to be sorted;
+-- partitionwise join does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------
+ Merge Join
+ Merge Cond: ((t1.a = t2.b) AND (((((t1.*)::prt1))::text) = ((((t2.*)::prt2))::text)))
+ -> Sort
+ Sort Key: t1.a, ((((t1.*)::prt1))::text)
+ -> Result
+ -> Append
+ -> Seq Scan on prt1_p1 t1
+ -> Seq Scan on prt1_p2 t1_1
+ -> Seq Scan on prt1_p3 t1_2
+ -> Sort
+ Sort Key: t2.b, ((((t2.*)::prt2))::text)
+ -> Result
+ -> Append
+ -> Seq Scan on prt2_p1 t2
+ -> Seq Scan on prt2_p2 t2_1
+ -> Seq Scan on prt2_p3 t2_2
+(16 rows)
+
+SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
+ a | b
+----+----
+ 0 | 0
+ 6 | 6
+ 12 | 12
+ 18 | 18
+ 24 | 24
+(5 rows)
+
RESET enable_hashjoin;
RESET enable_nestloop;
--
SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
+-- Check with whole-row reference; partitionwise aggregation does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
+SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
+
-- GROUP BY having other matching key
EXPLAIN (COSTS OFF)
SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3;
SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
--- left outer join, with whole-row reference
+-- left outer join, with whole-row reference; partitionwise join does not apply
EXPLAIN (COSTS OFF)
SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+-- merge join when expression with whole-row reference needs to be sorted;
+-- partitionwise join does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
+SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
+
RESET enable_hashjoin;
RESET enable_nestloop;