From b30fb56b07a885f3476fe05920249f4832ca8da5 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 16 Mar 2017 13:34:59 -0400 Subject: [PATCH] postgres_fdw: Push down FULL JOINs with restriction clauses. The previous deparsing logic wasn't smart enough to produce subqueries when deparsing; make it smart enough to do that. However, we only do it that way when necessary, because it generates more complicated SQL which will be harder for any humans reading the queries to understand. Etsuro Fujita, reviewed by Ashutosh Bapat Discussion: http://postgr.es/m/c449261a-b033-dc02-9254-2fe5b7044795@lab.ntt.co.jp --- contrib/postgres_fdw/deparse.c | 272 +++++++++++++++++- .../postgres_fdw/expected/postgres_fdw.out | 160 +++++++++-- contrib/postgres_fdw/postgres_fdw.c | 67 ++++- contrib/postgres_fdw/postgres_fdw.h | 16 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 23 ++ 5 files changed, 498 insertions(+), 40 deletions(-) diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index d2b94aaf3b..1d7ec28806 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -109,6 +109,8 @@ typedef struct deparse_expr_cxt /* Handy macro to add relation name qualification */ #define ADD_REL_QUALIFIER(buf, varno) \ appendStringInfo((buf), "%s%d.", REL_ALIAS_PREFIX, (varno)) +#define SUBQUERY_REL_ALIAS_PREFIX "s" +#define SUBQUERY_COL_ALIAS_PREFIX "c" /* * Functions to determine whether an expression can be evaluated safely on @@ -132,6 +134,7 @@ static void deparseTargetList(StringInfo buf, List **retrieved_attrs); static void deparseExplicitTargetList(List *tlist, List **retrieved_attrs, deparse_expr_cxt *context); +static void deparseSubqueryTargetList(deparse_expr_cxt *context); static void deparseReturningList(StringInfo buf, PlannerInfo *root, Index rtindex, Relation rel, bool trig_after_row, @@ -159,7 +162,7 @@ static void printRemoteParam(int paramindex, Oid paramtype, int32 paramtypmod, deparse_expr_cxt *context); static void printRemotePlaceholder(Oid paramtype, int32 paramtypmod, deparse_expr_cxt *context); -static void deparseSelectSql(List *tlist, List **retrieved_attrs, +static void deparseSelectSql(List *tlist, bool is_subquery, List **retrieved_attrs, deparse_expr_cxt *context); static void deparseLockingClause(deparse_expr_cxt *context); static void appendOrderByClause(List *pathkeys, deparse_expr_cxt *context); @@ -167,6 +170,9 @@ static void appendConditions(List *exprs, deparse_expr_cxt *context); static void deparseFromExprForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *joinrel, bool use_alias, List **params_list); static void deparseFromExpr(List *quals, deparse_expr_cxt *context); +static void deparseRangeTblRef(StringInfo buf, PlannerInfo *root, + RelOptInfo *foreignrel, bool make_subquery, + List **params_list); static void deparseAggref(Aggref *node, deparse_expr_cxt *context); static void appendGroupByClause(List *tlist, deparse_expr_cxt *context); static void appendAggOrderBy(List *orderList, List *targetList, @@ -175,6 +181,14 @@ static void appendFunctionName(Oid funcid, deparse_expr_cxt *context); static Node *deparseSortGroupClause(Index ref, List *tlist, deparse_expr_cxt *context); +/* + * Helper functions + */ +static bool is_subquery_var(Var *node, RelOptInfo *foreignrel, + int *relno, int *colno); +static void get_relation_column_alias_ids(Var *node, RelOptInfo *foreignrel, + int *relno, int *colno); + /* * Examine each qual clause in input_conds, and classify them into two groups, @@ -896,12 +910,16 @@ build_tlist_to_deparse(RelOptInfo *foreignrel) * * pathkeys is the list of pathkeys to order the result by. * + * is_subquery is the flag to indicate whether to deparse the specified + * relation as a subquery. + * * List of columns selected is returned in retrieved_attrs. */ extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel, List *tlist, List *remote_conds, List *pathkeys, - List **retrieved_attrs, List **params_list) + bool is_subquery, List **retrieved_attrs, + List **params_list) { deparse_expr_cxt context; PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) rel->fdw_private; @@ -925,7 +943,7 @@ deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel, context.params_list = params_list; /* Construct SELECT clause */ - deparseSelectSql(tlist, retrieved_attrs, &context); + deparseSelectSql(tlist, is_subquery, retrieved_attrs, &context); /* * For upper relations, the WHERE clause is built from the remote @@ -972,13 +990,16 @@ deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel, * contains just "SELECT ... ". * * We also create an integer List of the columns being retrieved, which is - * returned to *retrieved_attrs. + * returned to *retrieved_attrs, unless we deparse the specified relation + * as a subquery. * - * tlist is the list of desired columns. Read prologue of - * deparseSelectStmtForRel() for details. + * tlist is the list of desired columns. is_subquery is the flag to + * indicate whether to deparse the specified relation as a subquery. + * Read prologue of deparseSelectStmtForRel() for details. */ static void -deparseSelectSql(List *tlist, List **retrieved_attrs, deparse_expr_cxt *context) +deparseSelectSql(List *tlist, bool is_subquery, List **retrieved_attrs, + deparse_expr_cxt *context) { StringInfo buf = context->buf; RelOptInfo *foreignrel = context->foreignrel; @@ -990,10 +1011,22 @@ deparseSelectSql(List *tlist, List **retrieved_attrs, deparse_expr_cxt *context) */ appendStringInfoString(buf, "SELECT "); - if (foreignrel->reloptkind == RELOPT_JOINREL || - foreignrel->reloptkind == RELOPT_UPPER_REL) + if (is_subquery) + { + /* + * For a relation that is deparsed as a subquery, emit expressions + * specified in the relation's reltarget. Note that since this is + * for the subquery, no need to care about *retrieved_attrs. + */ + deparseSubqueryTargetList(context); + } + else if (foreignrel->reloptkind == RELOPT_JOINREL || + foreignrel->reloptkind == RELOPT_UPPER_REL) { - /* For a join relation use the input tlist */ + /* + * For a join or upper relation the input tlist gives the list of + * columns required to be fetched from the foreign server. + */ deparseExplicitTargetList(tlist, retrieved_attrs, context); } else @@ -1155,10 +1188,18 @@ deparseLockingClause(deparse_expr_cxt *context) StringInfo buf = context->buf; PlannerInfo *root = context->root; RelOptInfo *rel = context->scanrel; + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) rel->fdw_private; int relid = -1; while ((relid = bms_next_member(rel->relids, relid)) >= 0) { + /* + * Ignore relation if it appears in a lower subquery. Locking clause + * for such a relation is included in the subquery if necessary. + */ + if (bms_is_member(relid, fpinfo->lower_subquery_rels)) + continue; + /* * Add FOR UPDATE/SHARE if appropriate. We apply locking during the * initial row fetch, rather than later on as is done for local @@ -1329,6 +1370,40 @@ deparseExplicitTargetList(List *tlist, List **retrieved_attrs, appendStringInfoString(buf, "NULL"); } +/* + * Emit expressions specified in the given relation's reltarget. + * + * This is used for deparsing the given relation as a subquery. + */ +static void +deparseSubqueryTargetList(deparse_expr_cxt *context) +{ + StringInfo buf = context->buf; + RelOptInfo *foreignrel = context->foreignrel; + bool first; + ListCell *lc; + + /* Should only be called in these cases. */ + Assert(foreignrel->reloptkind == RELOPT_BASEREL || + foreignrel->reloptkind == RELOPT_JOINREL); + + first = true; + foreach(lc, foreignrel->reltarget->exprs) + { + Node *node = (Node *) lfirst(lc); + + if (!first) + appendStringInfoString(buf, ", "); + first = false; + + deparseExpr((Expr *) node, context); + } + + /* Don't generate bad syntax if no expressions */ + if (first) + appendStringInfoString(buf, "NULL"); +} + /* * Construct FROM clause for given relation * @@ -1344,18 +1419,18 @@ deparseFromExprForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, if (foreignrel->reloptkind == RELOPT_JOINREL) { - RelOptInfo *rel_o = fpinfo->outerrel; - RelOptInfo *rel_i = fpinfo->innerrel; StringInfoData join_sql_o; StringInfoData join_sql_i; /* Deparse outer relation */ initStringInfo(&join_sql_o); - deparseFromExprForRel(&join_sql_o, root, rel_o, true, params_list); + deparseRangeTblRef(&join_sql_o, root, fpinfo->outerrel, + fpinfo->make_outerrel_subquery, params_list); /* Deparse inner relation */ initStringInfo(&join_sql_i); - deparseFromExprForRel(&join_sql_i, root, rel_i, true, params_list); + deparseRangeTblRef(&join_sql_i, root, fpinfo->innerrel, + fpinfo->make_innerrel_subquery, params_list); /* * For a join relation FROM clause entry is deparsed as @@ -1410,6 +1485,63 @@ deparseFromExprForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, } } +/* + * Append FROM clause entry for the given relation into buf. + */ +static void +deparseRangeTblRef(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, + bool make_subquery, List **params_list) +{ + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; + + /* Should only be called in these cases. */ + Assert(foreignrel->reloptkind == RELOPT_BASEREL || + foreignrel->reloptkind == RELOPT_JOINREL); + + Assert(fpinfo->local_conds == NIL); + + /* If make_subquery is true, deparse the relation as a subquery. */ + if (make_subquery) + { + List *retrieved_attrs; + int ncols; + + /* Deparse the subquery representing the relation. */ + appendStringInfoChar(buf, '('); + deparseSelectStmtForRel(buf, root, foreignrel, NIL, + fpinfo->remote_conds, NIL, true, + &retrieved_attrs, params_list); + appendStringInfoChar(buf, ')'); + + /* Append the relation alias. */ + appendStringInfo(buf, " %s%d", SUBQUERY_REL_ALIAS_PREFIX, + fpinfo->relation_index); + + /* + * Append the column aliases if needed. Note that the subquery emits + * expressions specified in the relation's reltarget (see + * deparseSubqueryTargetList). + */ + ncols = list_length(foreignrel->reltarget->exprs); + if (ncols > 0) + { + int i; + + appendStringInfoChar(buf, '('); + for (i = 1; i <= ncols; i++) + { + if (i > 1) + appendStringInfoString(buf, ", "); + + appendStringInfo(buf, "%s%d", SUBQUERY_COL_ALIAS_PREFIX, i); + } + appendStringInfoChar(buf, ')'); + } + } + else + deparseFromExprForRel(buf, root, foreignrel, true, params_list); +} + /* * deparse remote INSERT statement * @@ -2054,10 +2186,25 @@ static void deparseVar(Var *node, deparse_expr_cxt *context) { Relids relids = context->scanrel->relids; + int relno; + int colno; /* Qualify columns when multiple relations are involved. */ bool qualify_col = (bms_num_members(relids) > 1); + /* + * If the Var belongs to the foreign relation that is deparsed as a + * subquery, use the relation and column alias to the Var provided + * by the subquery, instead of the remote name. + */ + if (is_subquery_var(node, context->scanrel, &relno, &colno)) + { + appendStringInfo(context->buf, "%s%d.%s%d", + SUBQUERY_REL_ALIAS_PREFIX, relno, + SUBQUERY_COL_ALIAS_PREFIX, colno); + return; + } + if (bms_is_member(node->varno, relids) && node->varlevelsup == 0) deparseColumnRef(context->buf, node->varno, node->varattno, context->root, qualify_col); @@ -2935,3 +3082,100 @@ deparseSortGroupClause(Index ref, List *tlist, deparse_expr_cxt *context) return (Node *) expr; } + + +/* + * Returns true if given Var is deparsed as a subquery output column, in + * which case, *relno and *colno are set to the IDs for the relation and + * column alias to the Var provided by the subquery. + */ +static bool +is_subquery_var(Var *node, RelOptInfo *foreignrel, int *relno, int *colno) +{ + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; + RelOptInfo *outerrel = fpinfo->outerrel; + RelOptInfo *innerrel = fpinfo->innerrel; + + /* Should only be called in these cases. */ + Assert(foreignrel->reloptkind == RELOPT_BASEREL || + foreignrel->reloptkind == RELOPT_JOINREL || + foreignrel->reloptkind == RELOPT_OTHER_MEMBER_REL); + + /* + * If the given relation isn't a join relation, it doesn't have any lower + * subqueries, so the Var isn't a subquery output column. + */ + if (foreignrel->reloptkind != RELOPT_JOINREL) + return false; + + /* + * If the Var doesn't belong to any lower subqueries, it isn't a subquery + * output column. + */ + if (!bms_is_member(node->varno, fpinfo->lower_subquery_rels)) + return false; + + if (bms_is_member(node->varno, outerrel->relids)) + { + /* + * If outer relation is deparsed as a subquery, the Var is an output + * column of the subquery; get the IDs for the relation/column alias. + */ + if (fpinfo->make_outerrel_subquery) + { + get_relation_column_alias_ids(node, outerrel, relno, colno); + return true; + } + + /* Otherwise, recurse into the outer relation. */ + return is_subquery_var(node, outerrel, relno, colno); + } + else + { + Assert(bms_is_member(node->varno, innerrel->relids)); + + /* + * If inner relation is deparsed as a subquery, the Var is an output + * column of the subquery; get the IDs for the relation/column alias. + */ + if (fpinfo->make_innerrel_subquery) + { + get_relation_column_alias_ids(node, innerrel, relno, colno); + return true; + } + + /* Otherwise, recurse into the inner relation. */ + return is_subquery_var(node, innerrel, relno, colno); + } +} + +/* + * Get the IDs for the relation and column alias to given Var belonging to + * given relation, which are returned into *relno and *colno. + */ +static void +get_relation_column_alias_ids(Var *node, RelOptInfo *foreignrel, + int *relno, int *colno) +{ + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private; + int i; + ListCell *lc; + + /* Get the relation alias ID */ + *relno = fpinfo->relation_index; + + /* Get the column alias ID */ + i = 1; + foreach(lc, foreignrel->reltarget->exprs) + { + if (equal(lfirst(lc), (Node *) node)) + { + *colno = i; + return; + } + i++; + } + + /* Shouldn't get here */ + elog(ERROR, "unexpected expression in subquery output"); +} diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 0b9e3e4537..059c5c309a 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -1217,25 +1217,16 @@ SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1. (10 rows) -- full outer join with restrictions on the joining relations +-- a. the joining relations are both base relations EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1; - QUERY PLAN ------------------------------------------------------------------------------------------------- - Sort + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan Output: ft4.c1, ft5.c1 - Sort Key: ft4.c1, ft5.c1 - -> Hash Full Join - Output: ft4.c1, ft5.c1 - Hash Cond: (ft4.c1 = ft5.c1) - -> Foreign Scan on public.ft4 - Output: ft4.c1, ft4.c2, ft4.c3 - Remote SQL: SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60)) - -> Hash - Output: ft5.c1 - -> Foreign Scan on public.ft5 - Output: ft5.c1 - Remote SQL: SELECT c1 FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60)) -(14 rows) + Relations: (public.ft4) FULL JOIN (public.ft5) + Remote SQL: SELECT s4.c1, s5.c1 FROM ((SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s4(c1) FULL JOIN (SELECT c1 FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s5(c1) ON (((s4.c1 = s5.c1)))) ORDER BY s4.c1 ASC NULLS LAST, s5.c1 ASC NULLS LAST +(4 rows) SELECT t1.c1, t2.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1; c1 | c1 @@ -1250,6 +1241,125 @@ SELECT t1.c1, t2.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL | 57 (8 rows) +EXPLAIN (VERBOSE, COSTS OFF) +SELECT 1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (TRUE) OFFSET 10 LIMIT 10; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + Output: 1 + -> Foreign Scan + Output: 1 + Relations: (public.ft4) FULL JOIN (public.ft5) + Remote SQL: SELECT NULL FROM ((SELECT NULL FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s4 FULL JOIN (SELECT NULL FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s5 ON (TRUE)) +(6 rows) + +SELECT 1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (TRUE) OFFSET 10 LIMIT 10; + ?column? +---------- + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 +(10 rows) + +-- b. one of the joining relations is a base relation and the other is a join +-- relation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM ft4 t2 LEFT JOIN ft5 t3 ON (t2.c1 = t3.c1) WHERE (t2.c1 between 50 and 60)) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: ft4.c1, t2.c1, t3.c1 + Relations: (public.ft4) FULL JOIN ((public.ft4 t2) LEFT JOIN (public.ft5 t3)) + Remote SQL: SELECT s4.c1, s8.c1, s8.c2 FROM ((SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s4(c1) FULL JOIN (SELECT r5.c1, r6.c1 FROM ("S 1"."T 3" r5 LEFT JOIN "S 1"."T 4" r6 ON (((r5.c1 = r6.c1)))) WHERE ((r5.c1 >= 50)) AND ((r5.c1 <= 60))) s8(c1, c2) ON (((s4.c1 = s8.c1)))) ORDER BY s4.c1 ASC NULLS LAST, s8.c1 ASC NULLS LAST, s8.c2 ASC NULLS LAST +(4 rows) + +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM ft4 t2 LEFT JOIN ft5 t3 ON (t2.c1 = t3.c1) WHERE (t2.c1 between 50 and 60)) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b; + c1 | a | b +----+----+---- + 50 | 50 | + 52 | 52 | + 54 | 54 | 54 + 56 | 56 | + 58 | 58 | + 60 | 60 | 60 +(6 rows) + +-- c. test deparsing the remote query as nested subqueries +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: ft4.c1, ft4_1.c1, ft5.c1 + Relations: (public.ft4) FULL JOIN ((public.ft4) FULL JOIN (public.ft5)) + Remote SQL: SELECT s4.c1, s10.c1, s10.c2 FROM ((SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s4(c1) FULL JOIN (SELECT s8.c1, s9.c1 FROM ((SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s8(c1) FULL JOIN (SELECT c1 FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s9(c1) ON (((s8.c1 = s9.c1)))) WHERE (((s8.c1 IS NULL) OR (s8.c1 IS NOT NULL)))) s10(c1, c2) ON (((s4.c1 = s10.c1)))) ORDER BY s4.c1 ASC NULLS LAST, s10.c1 ASC NULLS LAST, s10.c2 ASC NULLS LAST +(4 rows) + +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b; + c1 | a | b +----+----+---- + 50 | 50 | + 52 | 52 | + 54 | 54 | 54 + 56 | 56 | + 58 | 58 | + 60 | 60 | 60 + | | 51 + | | 57 +(8 rows) + +-- d. test deparsing rowmarked relations as subqueries +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + LockRows + Output: "T 3".c1, ft4.c1, ft5.c1, "T 3".ctid, ft4.*, ft5.* + -> Nested Loop + Output: "T 3".c1, ft4.c1, ft5.c1, "T 3".ctid, ft4.*, ft5.* + -> Foreign Scan + Output: ft4.c1, ft4.*, ft5.c1, ft5.* + Relations: (public.ft4) FULL JOIN (public.ft5) + Remote SQL: SELECT s8.c1, s8.c2, s9.c1, s9.c2 FROM ((SELECT c1, ROW(c1, c2, c3) FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s8(c1, c2) FULL JOIN (SELECT c1, ROW(c1, c2, c3) FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s9(c1, c2) ON (((s8.c1 = s9.c1)))) WHERE (((s8.c1 IS NULL) OR (s8.c1 IS NOT NULL))) ORDER BY s8.c1 ASC NULLS LAST, s9.c1 ASC NULLS LAST + -> Hash Full Join + Output: ft4.c1, ft4.*, ft5.c1, ft5.* + Hash Cond: (ft4.c1 = ft5.c1) + Filter: ((ft4.c1 IS NULL) OR (ft4.c1 IS NOT NULL)) + -> Foreign Scan on public.ft4 + Output: ft4.c1, ft4.* + Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60)) + -> Hash + Output: ft5.c1, ft5.* + -> Foreign Scan on public.ft5 + Output: ft5.c1, ft5.* + Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60)) + -> Materialize + Output: "T 3".c1, "T 3".ctid + -> Seq Scan on "S 1"."T 3" + Output: "T 3".c1, "T 3".ctid + Filter: ("T 3".c1 = 50) +(25 rows) + +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1; + c1 | a | b +----+----+---- + 50 | 50 | + 50 | 52 | + 50 | 54 | 54 + 50 | 56 | + 50 | 58 | + 50 | 60 | 60 + 50 | | 51 + 50 | | 57 +(8 rows) + -- full outer join + inner join EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c1, t3.c1 FROM ft4 t1 INNER JOIN ft5 t2 ON (t1.c1 = t2.c1 + 1 and t1.c1 between 50 and 60) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) ORDER BY t1.c1, t2.c1, t3.c1 LIMIT 10; @@ -3062,6 +3172,24 @@ select avg(t1.c1), sum(t2.c1) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) gr | 9 (3 rows) +-- Aggregate over FULL join needing to deparse the joining relations as +-- subqueries. +explain (verbose, costs off) +select count(*), sum(t1.c1), avg(t2.c1) from (select c1 from ft4 where c1 between 50 and 60) t1 full join (select c1 from ft5 where c1 between 50 and 60) t2 on (t1.c1 = t2.c1); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Foreign Scan + Output: (count(*)), (sum(ft4.c1)), (avg(ft5.c1)) + Relations: Aggregate on ((public.ft4) FULL JOIN (public.ft5)) + Remote SQL: SELECT count(*), sum(s4.c1), avg(s5.c1) FROM ((SELECT c1 FROM "S 1"."T 3" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s4(c1) FULL JOIN (SELECT c1 FROM "S 1"."T 4" WHERE ((c1 >= 50)) AND ((c1 <= 60))) s5(c1) ON (((s4.c1 = s5.c1)))) +(4 rows) + +select count(*), sum(t1.c1), avg(t2.c1) from (select c1 from ft4 where c1 between 50 and 60) t1 full join (select c1 from ft5 where c1 between 50 and 60) t2 on (t1.c1 = t2.c1); + count | sum | avg +-------+-----+--------------------- + 8 | 330 | 55.5000000000000000 +(1 row) + -- ORDER BY expression is part of the target list but not pushed down to -- foreign server. explain (verbose, costs off) diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 990313a597..e8cb2d0b4d 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -669,6 +669,13 @@ postgresGetForeignRelSize(PlannerInfo *root, if (*refname && strcmp(refname, relname) != 0) appendStringInfo(fpinfo->relation_name, " %s", quote_identifier(rte->eref->aliasname)); + + /* No outer and inner relations. */ + fpinfo->make_outerrel_subquery = false; + fpinfo->make_innerrel_subquery = false; + fpinfo->lower_subquery_rels = NULL; + /* Set the relation index. */ + fpinfo->relation_index = baserel->relid; } /* @@ -1238,7 +1245,7 @@ postgresGetForeignPlan(PlannerInfo *root, initStringInfo(&sql); deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist, remote_conds, best_path->path.pathkeys, - &retrieved_attrs, ¶ms_list); + false, &retrieved_attrs, ¶ms_list); /* * Build the fdw_private list that will be available to the executor. @@ -2550,8 +2557,8 @@ estimate_path_cost_size(PlannerInfo *root, initStringInfo(&sql); appendStringInfoString(&sql, "EXPLAIN "); deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist, - remote_conds, pathkeys, &retrieved_attrs, - NULL); + remote_conds, pathkeys, false, + &retrieved_attrs, NULL); /* Get the remote estimate */ conn = GetConnection(fpinfo->user, false); @@ -4149,11 +4156,23 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, fpinfo->innerrel = innerrel; fpinfo->jointype = jointype; + /* + * By default, both the input relations are not required to be deparsed + * as subqueries, but there might be some relations covered by the input + * relations that are required to be deparsed as subqueries, so save the + * relids of those relations for later use by the deparser. + */ + fpinfo->make_outerrel_subquery = false; + fpinfo->make_innerrel_subquery = false; + Assert(bms_is_subset(fpinfo_o->lower_subquery_rels, outerrel->relids)); + Assert(bms_is_subset(fpinfo_i->lower_subquery_rels, innerrel->relids)); + fpinfo->lower_subquery_rels = bms_union(fpinfo_o->lower_subquery_rels, + fpinfo_i->lower_subquery_rels); + /* * Pull the other remote conditions from the joining relations into join * clauses or other remote clauses (remote_conds) of this relation - * wherever possible. This avoids building subqueries at every join step, - * which is not currently supported by the deparser logic. + * wherever possible. This avoids building subqueries at every join step. * * For an inner join, clauses from both the relations are added to the * other remote clauses. For LEFT and RIGHT OUTER join, the clauses from @@ -4164,8 +4183,7 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, * * For a FULL OUTER JOIN, the other clauses from either relation can not * be added to the joinclauses or remote_conds, since each relation acts - * as an outer relation for the other. Consider such full outer join as - * unshippable because of the reasons mentioned above in this comment. + * as an outer relation for the other. * * The joining sides can not have local conditions, thus no need to test * shippability of the clauses being pulled up. @@ -4194,8 +4212,29 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, break; case JOIN_FULL: - if (fpinfo_i->remote_conds || fpinfo_o->remote_conds) - return false; + + /* + * In this case, if any of the input relations has conditions, + * we need to deparse that relation as a subquery so that the + * conditions can be evaluated before the join. Remember it in + * the fpinfo of this relation so that the deparser can take + * appropriate action. Also, save the relids of base relations + * covered by that relation for later use by the deparser. + */ + if (fpinfo_o->remote_conds) + { + fpinfo->make_outerrel_subquery = true; + fpinfo->lower_subquery_rels = + bms_add_members(fpinfo->lower_subquery_rels, + outerrel->relids); + } + if (fpinfo_i->remote_conds) + { + fpinfo->make_innerrel_subquery = true; + fpinfo->lower_subquery_rels = + bms_add_members(fpinfo->lower_subquery_rels, + innerrel->relids); + } break; default: @@ -4276,6 +4315,16 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, get_jointype_name(fpinfo->jointype), fpinfo_i->relation_name->data); + /* + * Set the relation index. This is defined as the position of this + * joinrel in the join_rel_list list plus the length of the rtable list. + * Note that since this joinrel is at the end of the join_rel_list list + * when we are called, we can get the position by list_length. + */ + Assert(fpinfo->relation_index == 0); /* shouldn't be set yet */ + fpinfo->relation_index = + list_length(root->parse->rtable) + list_length(root->join_rel_list); + return true; } diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index 46cac55e98..57dbb798fd 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -95,6 +95,20 @@ typedef struct PgFdwRelationInfo /* Grouping information */ List *grouped_tlist; + + /* Subquery information */ + bool make_outerrel_subquery; /* do we deparse outerrel as a + * subquery? */ + bool make_innerrel_subquery; /* do we deparse innerrel as a + * subquery? */ + Relids lower_subquery_rels; /* all relids appearing in lower + * subqueries */ + + /* + * Index of the relation. It is used to create an alias to a subquery + * representing the relation. + */ + int relation_index; } PgFdwRelationInfo; /* in postgres_fdw.c */ @@ -161,7 +175,7 @@ extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel); extern List *build_tlist_to_deparse(RelOptInfo *foreignrel); extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel, List *tlist, - List *remote_conds, List *pathkeys, + List *remote_conds, List *pathkeys, bool is_subquery, List **retrieved_attrs, List **params_list); /* in shippable.c */ diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 56b01d0490..8f3edc13e1 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -391,9 +391,26 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10; SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10; -- full outer join with restrictions on the joining relations +-- a. the joining relations are both base relations EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1; SELECT t1.c1, t2.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT 1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (TRUE) OFFSET 10 LIMIT 10; +SELECT 1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (TRUE) OFFSET 10 LIMIT 10; +-- b. one of the joining relations is a base relation and the other is a join +-- relation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM ft4 t2 LEFT JOIN ft5 t3 ON (t2.c1 = t3.c1) WHERE (t2.c1 between 50 and 60)) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b; +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM ft4 t2 LEFT JOIN ft5 t3 ON (t2.c1 = t3.c1) WHERE (t2.c1 between 50 and 60)) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b; +-- c. test deparsing the remote query as nested subqueries +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b; +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b; +-- d. test deparsing rowmarked relations as subqueries +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1; +SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1; -- full outer join + inner join EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c1, t3.c1 FROM ft4 t1 INNER JOIN ft5 t2 ON (t1.c1 = t2.c1 + 1 and t1.c1 between 50 and 60) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) ORDER BY t1.c1, t2.c1, t3.c1 LIMIT 10; @@ -793,6 +810,12 @@ explain (verbose, costs off) select avg(t1.c1), sum(t2.c1) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) group by t2.c1 having (avg(t1.c1) is null and sum(t2.c1) < 10) or sum(t2.c1) is null order by 1 nulls last, 2; select avg(t1.c1), sum(t2.c1) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) group by t2.c1 having (avg(t1.c1) is null and sum(t2.c1) < 10) or sum(t2.c1) is null order by 1 nulls last, 2; +-- Aggregate over FULL join needing to deparse the joining relations as +-- subqueries. +explain (verbose, costs off) +select count(*), sum(t1.c1), avg(t2.c1) from (select c1 from ft4 where c1 between 50 and 60) t1 full join (select c1 from ft5 where c1 between 50 and 60) t2 on (t1.c1 = t2.c1); +select count(*), sum(t1.c1), avg(t2.c1) from (select c1 from ft4 where c1 between 50 and 60) t1 full join (select c1 from ft5 where c1 between 50 and 60) t2 on (t1.c1 = t2.c1); + -- ORDER BY expression is part of the target list but not pushed down to -- foreign server. explain (verbose, costs off) -- 2.40.0