From f18c944b6137329ac4a6b2dce5745c5dc21a8578 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 3 Nov 2015 12:46:06 -0500 Subject: [PATCH] postgres_fdw: Add ORDER BY to some remote SQL queries. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If the join problem's entire ORDER BY clause can be pushed to the remote server, consider a path that adds this ORDER BY clause. If use_remote_estimate is on, we cost this path using an additional remote EXPLAIN. If not, we just estimate that the path costs 20% more, which is intended to be large enough that we won't request a remote sort when it's not helpful, but small enough that we'll have the remote side do the sort when in doubt. In some cases, the remote sort might actually be free, because the remote query plan might happen to produce output that is ordered the way we need, but without remote estimates we have no way of knowing that. It might also be useful to request sorted output from the remote side if it enables an efficient merge join, but this patch doesn't attempt to handle that case. Ashutosh Bapat with revisions by me. Also reviewed by Fabrízio de Royes Mello and Jeevan Chalke. --- contrib/postgres_fdw/deparse.c | 56 ++++++++- .../postgres_fdw/expected/postgres_fdw.out | 64 ++++++---- contrib/postgres_fdw/postgres_fdw.c | 116 +++++++++++++++++- contrib/postgres_fdw/postgres_fdw.h | 3 + contrib/postgres_fdw/sql/postgres_fdw.sql | 13 +- 5 files changed, 222 insertions(+), 30 deletions(-) diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 697de60dfe..3cb728fa69 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -193,9 +193,12 @@ is_foreign_expr(PlannerInfo *root, if (!foreign_expr_walker((Node *) expr, &glob_cxt, &loc_cxt)) return false; - /* Expressions examined here should be boolean, ie noncollatable */ - Assert(loc_cxt.collation == InvalidOid); - Assert(loc_cxt.state == FDW_COLLATE_NONE); + /* + * If the expression has a valid collation that does not arise from a + * foreign var, the expression can not be sent over. + */ + if (loc_cxt.state == FDW_COLLATE_UNSAFE) + return false; /* * An expression which includes any mutable functions can't be sent over @@ -1877,3 +1880,50 @@ printRemotePlaceholder(Oid paramtype, int32 paramtypmod, appendStringInfo(buf, "((SELECT null::%s)::%s)", ptypename, ptypename); } + +/* + * Deparse ORDER BY clause according to the given pathkeys for given base + * relation. From given pathkeys expressions belonging entirely to the given + * base relation are obtained and deparsed. + */ +void +appendOrderByClause(StringInfo buf, PlannerInfo *root, RelOptInfo *baserel, + List *pathkeys) +{ + ListCell *lcell; + deparse_expr_cxt context; + int nestlevel; + char *delim = " "; + + /* Set up context struct for recursion */ + context.root = root; + context.foreignrel = baserel; + context.buf = buf; + context.params_list = NULL; + + /* Make sure any constants in the exprs are printed portably */ + nestlevel = set_transmission_modes(); + + appendStringInfo(buf, " ORDER BY"); + foreach(lcell, pathkeys) + { + PathKey *pathkey = lfirst(lcell); + Expr *em_expr; + + em_expr = find_em_expr_for_rel(pathkey->pk_eclass, baserel); + Assert(em_expr != NULL); + + appendStringInfoString(buf, delim); + deparseExpr(em_expr, &context); + if (pathkey->pk_strategy == BTLessStrategyNumber) + appendStringInfoString(buf, " ASC"); + else + appendStringInfoString(buf, " DESC"); + + if (pathkey->pk_nulls_first) + appendStringInfoString(buf, " NULLS FIRST"); + + delim = ", "; + } + reset_transmission_modes(nestlevel); +} diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 65ea6e8d7d..0159bf52ad 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -134,15 +134,13 @@ ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true'); -- =================================================================== -- simple queries -- =================================================================== --- single table, with/without alias +-- single table without alias EXPLAIN (COSTS false) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10; - QUERY PLAN ---------------------------------- + QUERY PLAN +--------------------------- Limit - -> Sort - Sort Key: c3, c1 - -> Foreign Scan on ft1 -(4 rows) + -> Foreign Scan on ft1 +(2 rows) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10; c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 @@ -159,20 +157,21 @@ SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10; 110 | 0 | 00110 | Sun Jan 11 00:00:00 1970 PST | Sun Jan 11 00:00:00 1970 | 0 | 0 | foo (10 rows) -EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; +-- single table with alias - also test that tableoid sort is not pushed to remote side +EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10; QUERY PLAN ------------------------------------------------------------------------------------- Limit - Output: c1, c2, c3, c4, c5, c6, c7, c8 + Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid -> Sort - Output: c1, c2, c3, c4, c5, c6, c7, c8 - Sort Key: t1.c3, t1.c1 + Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid + Sort Key: t1.c3, t1.c1, t1.tableoid -> Foreign Scan on public.ft1 t1 - Output: c1, c2, c3, c4, c5, c6, c7, c8 + Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" (8 rows) -SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; +SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10; c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 -----+----+-------+------------------------------+--------------------------+----+------------+----- 101 | 1 | 00101 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo @@ -189,17 +188,14 @@ SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; -- whole-row reference EXPLAIN (VERBOSE, COSTS false) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; - QUERY PLAN -------------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------------------------------- Limit Output: t1.*, c3, c1 - -> Sort + -> Foreign Scan on public.ft1 t1 Output: t1.*, c3, c1 - Sort Key: t1.c3, t1.c1 - -> Foreign Scan on public.ft1 t1 - Output: t1.*, c3, c1 - Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" -(8 rows) + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY c3 ASC, "C 1" ASC +(5 rows) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; t1 @@ -650,6 +646,32 @@ SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5)); 4 | 4 | 00004 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4 | 4 | foo (4 rows) +-- we should not push order by clause with volatile expressions or unsafe +-- collations +EXPLAIN (VERBOSE, COSTS false) + SELECT * FROM ft2 ORDER BY ft2.c1, random(); + QUERY PLAN +------------------------------------------------------------------------------- + Sort + Output: c1, c2, c3, c4, c5, c6, c7, c8, (random()) + Sort Key: ft2.c1, (random()) + -> Foreign Scan on public.ft2 + Output: c1, c2, c3, c4, c5, c6, c7, c8, random() + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" +(6 rows) + +EXPLAIN (VERBOSE, COSTS false) + SELECT * FROM ft2 ORDER BY ft2.c1, ft2.c3 collate "C"; + QUERY PLAN +------------------------------------------------------------------------------- + Sort + Output: c1, c2, c3, c4, c5, c6, c7, c8, ((c3)::text) + Sort Key: ft2.c1, ft2.c3 COLLATE "C" + -> Foreign Scan on public.ft2 + Output: c1, c2, c3, c4, c5, c6, c7, c8, c3 + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" +(6 rows) + -- =================================================================== -- parameterized queries -- =================================================================== diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 1902f1f4ea..914e6704c2 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -47,6 +47,9 @@ PG_MODULE_MAGIC; /* Default CPU cost to process 1 row (above and beyond cpu_tuple_cost). */ #define DEFAULT_FDW_TUPLE_COST 0.01 +/* If no remote estimates, assume a sort costs 20% extra */ +#define DEFAULT_FDW_SORT_MULTIPLIER 1.2 + /* * FDW-specific planner information kept in RelOptInfo.fdw_private for a * foreign table. This information is collected by postgresGetForeignRelSize. @@ -296,6 +299,7 @@ static List *postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, static void estimate_path_cost_size(PlannerInfo *root, RelOptInfo *baserel, List *join_conds, + List *pathkeys, double *p_rows, int *p_width, Cost *p_startup_cost, Cost *p_total_cost); static void get_remote_estimate(const char *sql, @@ -497,7 +501,7 @@ postgresGetForeignRelSize(PlannerInfo *root, * values in fpinfo so we don't need to do it again to generate the * basic foreign path. */ - estimate_path_cost_size(root, baserel, NIL, + estimate_path_cost_size(root, baserel, NIL, NIL, &fpinfo->rows, &fpinfo->width, &fpinfo->startup_cost, &fpinfo->total_cost); @@ -527,7 +531,7 @@ postgresGetForeignRelSize(PlannerInfo *root, set_baserel_size_estimates(root, baserel); /* Fill in basically-bogus cost estimates for use later. */ - estimate_path_cost_size(root, baserel, NIL, + estimate_path_cost_size(root, baserel, NIL, NIL, &fpinfo->rows, &fpinfo->width, &fpinfo->startup_cost, &fpinfo->total_cost); } @@ -546,6 +550,7 @@ postgresGetForeignPaths(PlannerInfo *root, ForeignPath *path; List *ppi_list; ListCell *lc; + List *usable_pathkeys = NIL; /* * Create simplest ForeignScan path node and add it to baserel. This path @@ -563,6 +568,60 @@ postgresGetForeignPaths(PlannerInfo *root, NIL); /* no fdw_private list */ add_path(baserel, (Path *) path); + /* + * Determine whether we can potentially push query pathkeys to the remote + * side, avoiding a local sort. + */ + foreach(lc, root->query_pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + Expr *em_expr; + + /* + * is_foreign_expr would detect volatile expressions as well, but + * ec_has_volatile saves some cycles. + */ + if (!pathkey_ec->ec_has_volatile && + (em_expr = find_em_expr_for_rel(pathkey_ec, baserel)) && + is_foreign_expr(root, baserel, em_expr)) + usable_pathkeys = lappend(usable_pathkeys, pathkey); + else + { + /* + * The planner and executor don't have any clever strategy for + * taking data sorted by a prefix of the query's pathkeys and + * getting it to be sorted by all of those pathekeys. We'll just + * end up resorting the entire data set. So, unless we can push + * down all of the query pathkeys, forget it. + */ + list_free(usable_pathkeys); + usable_pathkeys = NIL; + break; + } + } + + /* Create a path with useful pathkeys, if we found one. */ + if (usable_pathkeys != NULL) + { + double rows; + int width; + Cost startup_cost; + Cost total_cost; + + estimate_path_cost_size(root, baserel, NIL, usable_pathkeys, + &rows, &width, &startup_cost, &total_cost); + + add_path(baserel, (Path *) + create_foreignscan_path(root, baserel, + rows, + startup_cost, + total_cost, + usable_pathkeys, + NULL, + NIL)); + } + /* * If we're not using remote estimates, stop here. We have no way to * estimate whether any join clauses would be worth sending across, so @@ -710,7 +769,7 @@ postgresGetForeignPaths(PlannerInfo *root, /* Get a cost estimate from the remote */ estimate_path_cost_size(root, baserel, - param_info->ppi_clauses, + param_info->ppi_clauses, NIL, &rows, &width, &startup_cost, &total_cost); @@ -811,6 +870,10 @@ postgresGetForeignPlan(PlannerInfo *root, appendWhereClause(&sql, root, baserel, remote_conds, true, ¶ms_list); + /* Add ORDER BY clause if we found any useful pathkeys */ + if (best_path->path.pathkeys) + appendOrderByClause(&sql, root, baserel, best_path->path.pathkeys); + /* * Add FOR UPDATE/SHARE if appropriate. We apply locking during the * initial row fetch, rather than later on as is done for local tables. @@ -1728,6 +1791,7 @@ static void estimate_path_cost_size(PlannerInfo *root, RelOptInfo *baserel, List *join_conds, + List *pathkeys, double *p_rows, int *p_width, Cost *p_startup_cost, Cost *p_total_cost) { @@ -1780,6 +1844,9 @@ estimate_path_cost_size(PlannerInfo *root, appendWhereClause(&sql, root, baserel, remote_join_conds, (fpinfo->remote_conds == NIL), NULL); + if (pathkeys) + appendOrderByClause(&sql, root, baserel, pathkeys); + /* Get the remote estimate */ conn = GetConnection(fpinfo->server, fpinfo->user, false); get_remote_estimate(sql.data, conn, &rows, &width, @@ -1837,6 +1904,21 @@ estimate_path_cost_size(PlannerInfo *root, cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; run_cost += cpu_per_tuple * baserel->tuples; + /* + * Without remote estimates, we have no real way to estimate the cost + * of generating sorted output. It could be free if the query plan + * the remote side would have chosen generates properly-sorted output + * anyway, but in most cases it will cost something. Estimate a value + * high enough that we won't pick the sorted path when the ordering + * isn't locally useful, but low enough that we'll err on the side of + * pushing down the ORDER BY clause when it's useful to do so. + */ + if (pathkeys != NIL) + { + startup_cost *= DEFAULT_FDW_SORT_MULTIPLIER; + run_cost *= DEFAULT_FDW_SORT_MULTIPLIER; + } + total_cost = startup_cost + run_cost; } @@ -3002,3 +3084,31 @@ conversion_error_callback(void *arg) NameStr(tupdesc->attrs[errpos->cur_attno - 1]->attname), RelationGetRelationName(errpos->rel)); } + +/* + * Find an equivalence class member expression, all of whose Vars, come from + * the indicated relation. + */ +extern Expr * +find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) +{ + ListCell *lc_em; + + foreach(lc_em, ec->ec_members) + { + EquivalenceMember *em = lfirst(lc_em); + + if (bms_equal(em->em_relids, rel->relids)) + { + /* + * If there is more than one equivalence member whose Vars are + * taken entirely from this relation, we'll be content to choose + * any one of those. + */ + return em->em_expr; + } + } + + /* We didn't find any suitable equivalence class expression */ + return NULL; +} diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index 3835ddb79a..8956cd2cf1 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -74,5 +74,8 @@ extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel); extern void deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs); extern void deparseStringLiteral(StringInfo buf, const char *val); +extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel); +extern void appendOrderByClause(StringInfo buf, PlannerInfo *root, + RelOptInfo *baserel, List *pathkeys); #endif /* POSTGRES_FDW_H */ diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 11160f8245..a87a63fe84 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -138,11 +138,12 @@ ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true'); -- =================================================================== -- simple queries -- =================================================================== --- single table, with/without alias +-- single table without alias EXPLAIN (COSTS false) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10; SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10; -EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; -SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; +-- single table with alias - also test that tableoid sort is not pushed to remote side +EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10; +SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10; -- whole-row reference EXPLAIN (VERBOSE, COSTS false) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; @@ -214,6 +215,12 @@ WHERE a.c2 = 6 AND b.c1 = a.c1 AND a.c8 = 'foo' AND b.c7 = upper(a.c7); -- bug before 9.3.5 due to sloppy handling of remote-estimate parameters SELECT * FROM ft1 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft2 WHERE c1 < 5)); SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5)); +-- we should not push order by clause with volatile expressions or unsafe +-- collations +EXPLAIN (VERBOSE, COSTS false) + SELECT * FROM ft2 ORDER BY ft2.c1, random(); +EXPLAIN (VERBOSE, COSTS false) + SELECT * FROM ft2 ORDER BY ft2.c1, ft2.c3 collate "C"; -- =================================================================== -- parameterized queries -- 2.40.0