From 34ea1ab7fd305afe1124a6e73ada0ebae04b6ebb Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 7 Feb 2019 12:59:47 -0500 Subject: [PATCH] Split create_foreignscan_path() into three functions. Up to now postgres_fdw has been using create_foreignscan_path() to generate not only base-relation paths, but also paths for foreign joins and foreign upperrels. This is wrong, because create_foreignscan_path() calls get_baserel_parampathinfo() which will only do the right thing for baserels. It accidentally fails to fail for unparameterized paths, which are the only ones postgres_fdw (thought it) was handling, but we really need different APIs for the baserel and join cases. In HEAD, the best thing to do seems to be to split up the baserel, joinrel, and upperrel cases into three functions so that they can have different APIs. I haven't actually given create_foreign_join_path a different API in this commit: we should spend a bit of time thinking about just what we want to do there, since perhaps FDWs would want to do something different from the build-up-a-join-pairwise approach that get_joinrel_parampathinfo expects. In the meantime, since postgres_fdw isn't prepared to generate parameterized joins anyway, just give it a defense against trying to plan joins with lateral refs. In addition (and this is what triggered this whole mess) fix bug #15613 from Srinivasan S A, by teaching file_fdw and postgres_fdw that plain baserel foreign paths still have outer refs if the relation has lateral_relids. Add some assertions in relnode.c to catch future occurrences of the same error --- in particular, to catch other FDWs doing that, but also as backstop against core-code mistakes like the one fixed by commit bdd9a99aa. Bug #15613 also needs to be fixed in the back branches, but the appropriate fix will look quite a bit different there, since we don't want to assume that existing FDWs get the word right away. Discussion: https://postgr.es/m/15613-092be1be9576c728@postgresql.org --- contrib/file_fdw/file_fdw.c | 6 +- .../postgres_fdw/expected/postgres_fdw.out | 56 +++++++++ contrib/postgres_fdw/postgres_fdw.c | 90 ++++++++------ contrib/postgres_fdw/sql/postgres_fdw.sql | 26 ++++ doc/src/sgml/fdwhandler.sgml | 8 +- src/backend/optimizer/util/pathnode.c | 111 ++++++++++++++++-- src/backend/optimizer/util/relnode.c | 9 ++ src/include/optimizer/pathnode.h | 13 ++ 8 files changed, 275 insertions(+), 44 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index be626bef6e..391cd0d778 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -556,6 +556,10 @@ fileGetForeignPaths(PlannerInfo *root, * Create a ForeignPath node and add it as only possible path. We use the * fdw_private list of the path to carry the convert_selectively option; * it will be propagated into the fdw_private list of the Plan node. + * + * We don't support pushing join clauses into the quals of this path, but + * it could still have required parameterization due to LATERAL refs in + * its tlist. */ add_path(baserel, (Path *) create_foreignscan_path(root, baserel, @@ -564,7 +568,7 @@ fileGetForeignPaths(PlannerInfo *root, startup_cost, total_cost, NIL, /* no pathkeys */ - NULL, /* no outer rel either */ + baserel->lateral_relids, NULL, /* no extra plan */ coptions)); diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index b3894d0760..029552b152 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -3429,6 +3429,62 @@ select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum fr (2 rows) reset enable_hashagg; +-- bug #15613: bad plan for foreign table scan with lateral reference +EXPLAIN (VERBOSE, COSTS OFF) +SELECT ref_0.c2, subq_1.* +FROM + "S 1"."T 1" AS ref_0, + LATERAL ( + SELECT ref_0."C 1" c1, subq_0.* + FROM (SELECT ref_0.c2, ref_1.c3 + FROM ft1 AS ref_1) AS subq_0 + RIGHT JOIN ft2 AS ref_3 ON (subq_0.c3 = ref_3.c3) + ) AS subq_1 +WHERE ref_0."C 1" < 10 AND subq_1.c3 = '00001' +ORDER BY ref_0."C 1"; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Nested Loop + Output: ref_0.c2, ref_0."C 1", (ref_0.c2), ref_1.c3, ref_0."C 1" + -> Nested Loop + Output: ref_0.c2, ref_0."C 1", ref_1.c3, (ref_0.c2) + -> Index Scan using t1_pkey on "S 1"."T 1" ref_0 + Output: ref_0."C 1", ref_0.c2, ref_0.c3, ref_0.c4, ref_0.c5, ref_0.c6, ref_0.c7, ref_0.c8 + Index Cond: (ref_0."C 1" < 10) + -> Foreign Scan on public.ft1 ref_1 + Output: ref_1.c3, ref_0.c2 + Remote SQL: SELECT c3 FROM "S 1"."T 1" WHERE ((c3 = '00001'::text)) + -> Materialize + Output: ref_3.c3 + -> Foreign Scan on public.ft2 ref_3 + Output: ref_3.c3 + Remote SQL: SELECT c3 FROM "S 1"."T 1" WHERE ((c3 = '00001'::text)) +(15 rows) + +SELECT ref_0.c2, subq_1.* +FROM + "S 1"."T 1" AS ref_0, + LATERAL ( + SELECT ref_0."C 1" c1, subq_0.* + FROM (SELECT ref_0.c2, ref_1.c3 + FROM ft1 AS ref_1) AS subq_0 + RIGHT JOIN ft2 AS ref_3 ON (subq_0.c3 = ref_3.c3) + ) AS subq_1 +WHERE ref_0."C 1" < 10 AND subq_1.c3 = '00001' +ORDER BY ref_0."C 1"; + c2 | c1 | c2 | c3 +----+----+----+------- + 1 | 1 | 1 | 00001 + 2 | 2 | 2 | 00001 + 3 | 3 | 3 | 00001 + 4 | 4 | 4 | 00001 + 5 | 5 | 5 | 00001 + 6 | 6 | 6 | 00001 + 7 | 7 | 7 | 00001 + 8 | 8 | 8 | 00001 + 9 | 9 | 9 | 00001 +(9 rows) + -- Check with placeHolderVars explain (verbose, costs off) select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.c1) from ft1 right join ft2 on (ft1.c1 = ft2.c1)) q(a, b, c) on (ft4.c1 <= q.b); diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 7fcac81e2e..994cec50ce 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -937,6 +937,9 @@ postgresGetForeignPaths(PlannerInfo *root, * baserestrict conditions we were able to send to remote, there might * actually be an indexscan happening there). We already did all the work * to estimate cost and size of this path. + * + * Although this path uses no join clauses, it could still have required + * parameterization due to LATERAL refs in its tlist. */ path = create_foreignscan_path(root, baserel, NULL, /* default pathtarget */ @@ -944,7 +947,7 @@ postgresGetForeignPaths(PlannerInfo *root, fpinfo->startup_cost, fpinfo->total_cost, NIL, /* no pathkeys */ - NULL, /* no outer rel either */ + baserel->lateral_relids, NULL, /* no extra plan */ NIL); /* no fdw_private list */ add_path(baserel, (Path *) path); @@ -3295,7 +3298,7 @@ execute_foreign_modify(EState *estate, TupleTableSlot *planSlot) { PgFdwModifyState *fmstate = (PgFdwModifyState *) resultRelInfo->ri_FdwState; - ItemPointer ctid = NULL; + ItemPointer ctid = NULL; const char **p_values; PGresult *res; int n_rows; @@ -3936,8 +3939,9 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate, ExecStoreVirtualTuple(resultSlot); /* - * If we have any system columns to return, materialize a heap tuple in the - * slot from column values set above and install system columns in that tuple. + * If we have any system columns to return, materialize a heap tuple in + * the slot from column values set above and install system columns in + * that tuple. */ if (dmstate->hasSystemCols) { @@ -4943,16 +4947,28 @@ add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel, useful_pathkeys, -1.0); - add_path(rel, (Path *) - create_foreignscan_path(root, rel, - NULL, - rows, - startup_cost, - total_cost, - useful_pathkeys, - NULL, - sorted_epq_path, - NIL)); + if (IS_SIMPLE_REL(rel)) + add_path(rel, (Path *) + create_foreignscan_path(root, rel, + NULL, + rows, + startup_cost, + total_cost, + useful_pathkeys, + rel->lateral_relids, + sorted_epq_path, + NIL)); + else + add_path(rel, (Path *) + create_foreign_join_path(root, rel, + NULL, + rows, + startup_cost, + total_cost, + useful_pathkeys, + rel->lateral_relids, + sorted_epq_path, + NIL)); } } @@ -5087,6 +5103,13 @@ postgresGetForeignJoinPaths(PlannerInfo *root, if (joinrel->fdw_private) return; + /* + * This code does not work for joins with lateral references, since those + * must have parameterized paths, which we don't generate yet. + */ + if (!bms_is_empty(joinrel->lateral_relids)) + return; + /* * Create unfinished PgFdwRelationInfo entry which is used to indicate * that the join relation is already considered, so that we won't waste @@ -5171,16 +5194,16 @@ postgresGetForeignJoinPaths(PlannerInfo *root, * Create a new join path and add it to the joinrel which represents a * join between foreign tables. */ - joinpath = create_foreignscan_path(root, - joinrel, - NULL, /* default pathtarget */ - rows, - startup_cost, - total_cost, - NIL, /* no pathkeys */ - NULL, /* no required_outer */ - epq_path, - NIL); /* no fdw_private */ + joinpath = create_foreign_join_path(root, + joinrel, + NULL, /* default pathtarget */ + rows, + startup_cost, + total_cost, + NIL, /* no pathkeys */ + joinrel->lateral_relids, + epq_path, + NIL); /* no fdw_private */ /* Add generated path into joinrel by add_path(). */ add_path(joinrel, (Path *) joinpath); @@ -5515,16 +5538,15 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, fpinfo->total_cost = total_cost; /* Create and add foreign path to the grouping relation. */ - grouppath = create_foreignscan_path(root, - grouped_rel, - grouped_rel->reltarget, - rows, - startup_cost, - total_cost, - NIL, /* no pathkeys */ - NULL, /* no required_outer */ - NULL, - NIL); /* no fdw_private */ + grouppath = create_foreign_upper_path(root, + grouped_rel, + grouped_rel->reltarget, + rows, + startup_cost, + total_cost, + NIL, /* no pathkeys */ + NULL, + NIL); /* no fdw_private */ /* Add generated path into grouped_rel by add_path(). */ add_path(grouped_rel, (Path *) grouppath); diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index f438165650..bab8479cf6 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -879,6 +879,32 @@ select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum fr select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum from ft2 t2 group by t2.c1) qry where t1.c2 * 2 = qry.sum and t1.c2 < 3 and t1."C 1" < 100 order by 1; reset enable_hashagg; +-- bug #15613: bad plan for foreign table scan with lateral reference +EXPLAIN (VERBOSE, COSTS OFF) +SELECT ref_0.c2, subq_1.* +FROM + "S 1"."T 1" AS ref_0, + LATERAL ( + SELECT ref_0."C 1" c1, subq_0.* + FROM (SELECT ref_0.c2, ref_1.c3 + FROM ft1 AS ref_1) AS subq_0 + RIGHT JOIN ft2 AS ref_3 ON (subq_0.c3 = ref_3.c3) + ) AS subq_1 +WHERE ref_0."C 1" < 10 AND subq_1.c3 = '00001' +ORDER BY ref_0."C 1"; + +SELECT ref_0.c2, subq_1.* +FROM + "S 1"."T 1" AS ref_0, + LATERAL ( + SELECT ref_0."C 1" c1, subq_0.* + FROM (SELECT ref_0.c2, ref_1.c3 + FROM ft1 AS ref_1) AS subq_0 + RIGHT JOIN ft2 AS ref_3 ON (subq_0.c3 = ref_3.c3) + ) AS subq_1 +WHERE ref_0."C 1" < 10 AND subq_1.c3 = '00001' +ORDER BY ref_0."C 1"; + -- Check with placeHolderVars explain (verbose, costs off) select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.c1) from ft1 right join ft2 on (ft1.c1 = ft2.c1)) q(a, b, c) on (ft4.c1 <= q.b); diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml index 452b776b9e..77038f53f9 100644 --- a/doc/src/sgml/fdwhandler.sgml +++ b/doc/src/sgml/fdwhandler.sgml @@ -309,7 +309,9 @@ GetForeignJoinPaths(PlannerInfo *root, function is called during query planning. As with GetForeignPaths, this function should generate ForeignPath path(s) for the - supplied joinrel, and call add_path to add these + supplied joinrel + (use create_foreign_join_path to build them), + and call add_path to add these paths to the set of paths considered for the join. But unlike GetForeignPaths, it is not necessary that this function succeed in creating at least one path, since paths involving local @@ -369,7 +371,9 @@ GetForeignUpperPaths(PlannerInfo *root, called only if all base relation(s) involved in the query belong to the same FDW. This function should generate ForeignPath path(s) for any post-scan/join processing that the FDW knows how to - perform remotely, and call add_path to add these paths to + perform remotely + (use create_foreign_upper_path to build them), + and call add_path to add these paths to the indicated upper relation. As with GetForeignJoinPaths, it is not necessary that this function succeed in creating any paths, since paths involving local processing are always possible. diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index b57de6b4c6..08133a28fd 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2079,15 +2079,14 @@ create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel, /* * create_foreignscan_path - * Creates a path corresponding to a scan of a foreign table, foreign join, - * or foreign upper-relation processing, returning the pathnode. + * Creates a path corresponding to a scan of a foreign base table, + * returning the pathnode. * * This function is never called from core Postgres; rather, it's expected - * to be called by the GetForeignPaths, GetForeignJoinPaths, or - * GetForeignUpperPaths function of a foreign data wrapper. We make the FDW - * supply all fields of the path, since we do not have any way to calculate - * them in core. However, there is a usually-sane default for the pathtarget - * (rel->reltarget), so we let a NULL for "target" select that. + * to be called by the GetForeignPaths function of a foreign data wrapper. + * We make the FDW supply all fields of the path, since we do not have any way + * to calculate them in core. However, there is a usually-sane default for + * the pathtarget (rel->reltarget), so we let a NULL for "target" select that. */ ForeignPath * create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel, @@ -2100,6 +2099,9 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel, { ForeignPath *pathnode = makeNode(ForeignPath); + /* Historically some FDWs were confused about when to use this */ + Assert(IS_SIMPLE_REL(rel)); + pathnode->path.pathtype = T_ForeignScan; pathnode->path.parent = rel; pathnode->path.pathtarget = target ? target : rel->reltarget; @@ -2119,6 +2121,101 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel, return pathnode; } +/* + * create_foreign_join_path + * Creates a path corresponding to a scan of a foreign join, + * returning the pathnode. + * + * This function is never called from core Postgres; rather, it's expected + * to be called by the GetForeignJoinPaths function of a foreign data wrapper. + * We make the FDW supply all fields of the path, since we do not have any way + * to calculate them in core. However, there is a usually-sane default for + * the pathtarget (rel->reltarget), so we let a NULL for "target" select that. + */ +ForeignPath * +create_foreign_join_path(PlannerInfo *root, RelOptInfo *rel, + PathTarget *target, + double rows, Cost startup_cost, Cost total_cost, + List *pathkeys, + Relids required_outer, + Path *fdw_outerpath, + List *fdw_private) +{ + ForeignPath *pathnode = makeNode(ForeignPath); + + /* + * We should use get_joinrel_parampathinfo to handle parameterized paths, + * but the API of this function doesn't support it, and existing + * extensions aren't yet trying to build such paths anyway. For the + * moment just throw an error if someone tries it; eventually we should + * revisit this. + */ + if (!bms_is_empty(required_outer) || !bms_is_empty(rel->lateral_relids)) + elog(ERROR, "parameterized foreign joins are not supported yet"); + + pathnode->path.pathtype = T_ForeignScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target ? target : rel->reltarget; + pathnode->path.param_info = NULL; /* XXX see above */ + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.rows = rows; + pathnode->path.startup_cost = startup_cost; + pathnode->path.total_cost = total_cost; + pathnode->path.pathkeys = pathkeys; + + pathnode->fdw_outerpath = fdw_outerpath; + pathnode->fdw_private = fdw_private; + + return pathnode; +} + +/* + * create_foreign_upper_path + * Creates a path corresponding to an upper relation that's computed + * directly by an FDW, returning the pathnode. + * + * This function is never called from core Postgres; rather, it's expected to + * be called by the GetForeignUpperPaths function of a foreign data wrapper. + * We make the FDW supply all fields of the path, since we do not have any way + * to calculate them in core. However, there is a usually-sane default for + * the pathtarget (rel->reltarget), so we let a NULL for "target" select that. + */ +ForeignPath * +create_foreign_upper_path(PlannerInfo *root, RelOptInfo *rel, + PathTarget *target, + double rows, Cost startup_cost, Cost total_cost, + List *pathkeys, + Path *fdw_outerpath, + List *fdw_private) +{ + ForeignPath *pathnode = makeNode(ForeignPath); + + /* + * Upper relations should never have any lateral references, since joining + * is complete. + */ + Assert(bms_is_empty(rel->lateral_relids)); + + pathnode->path.pathtype = T_ForeignScan; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target ? target : rel->reltarget; + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel; + pathnode->path.parallel_workers = 0; + pathnode->path.rows = rows; + pathnode->path.startup_cost = startup_cost; + pathnode->path.total_cost = total_cost; + pathnode->path.pathkeys = pathkeys; + + pathnode->fdw_outerpath = fdw_outerpath; + pathnode->fdw_private = fdw_private; + + return pathnode; +} + /* * calc_nestloop_required_outer * Compute the required_outer set for a nestloop join path diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index f04c6b76f4..4130514952 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -1225,6 +1225,9 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel, double rows; ListCell *lc; + /* If rel has LATERAL refs, every path for it should account for them */ + Assert(bms_is_subset(baserel->lateral_relids, required_outer)); + /* Unparameterized paths have no ParamPathInfo */ if (bms_is_empty(required_outer)) return NULL; @@ -1320,6 +1323,9 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel, double rows; ListCell *lc; + /* If rel has LATERAL refs, every path for it should account for them */ + Assert(bms_is_subset(joinrel->lateral_relids, required_outer)); + /* Unparameterized paths have no ParamPathInfo or extra join clauses */ if (bms_is_empty(required_outer)) return NULL; @@ -1511,6 +1517,9 @@ get_appendrel_parampathinfo(RelOptInfo *appendrel, Relids required_outer) { ParamPathInfo *ppi; + /* If rel has LATERAL refs, every path for it should account for them */ + Assert(bms_is_subset(appendrel->lateral_relids, required_outer)); + /* Unparameterized paths have no ParamPathInfo */ if (bms_is_empty(required_outer)) return NULL; diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index d0c8f99d0a..ef2c9b4728 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -118,6 +118,19 @@ extern ForeignPath *create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer, Path *fdw_outerpath, List *fdw_private); +extern ForeignPath *create_foreign_join_path(PlannerInfo *root, RelOptInfo *rel, + PathTarget *target, + double rows, Cost startup_cost, Cost total_cost, + List *pathkeys, + Relids required_outer, + Path *fdw_outerpath, + List *fdw_private); +extern ForeignPath *create_foreign_upper_path(PlannerInfo *root, RelOptInfo *rel, + PathTarget *target, + double rows, Cost startup_cost, Cost total_cost, + List *pathkeys, + Path *fdw_outerpath, + List *fdw_private); extern Relids calc_nestloop_required_outer(Relids outerrelids, Relids outer_paramrels, -- 2.40.0