From: Tom Lane Date: Sat, 16 Feb 2019 21:11:12 +0000 (-0500) Subject: Allow user control of CTE materialization, and change the default behavior. X-Git-Tag: REL_12_BETA1~728 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=608b167f9f9c4553c35bb1ec0eab9ddae643989b;p=postgresql Allow user control of CTE materialization, and change the default behavior. Historically we've always materialized the full output of a CTE query, treating WITH as an optimization fence (so that, for example, restrictions from the outer query cannot be pushed into it). This is appropriate when the CTE query is INSERT/UPDATE/DELETE, or is recursive; but when the CTE query is non-recursive and side-effect-free, there's no hazard of changing the query results by pushing restrictions down. Another argument for materialization is that it can avoid duplicate computation of an expensive WITH query --- but that only applies if the WITH query is called more than once in the outer query. Even then it could still be a net loss, if each call has restrictions that would allow just a small part of the WITH query to be computed. Hence, let's change the behavior for WITH queries that are non-recursive and side-effect-free. By default, we will inline them into the outer query (removing the optimization fence) if they are called just once. If they are called more than once, we will keep the old behavior by default, but the user can override this and force inlining by specifying NOT MATERIALIZED. Lastly, the user can force the old behavior by specifying MATERIALIZED; this would mainly be useful when the query had deliberately been employing WITH as an optimization fence to prevent a poor choice of plan. Andreas Karlsson, Andrew Gierth, David Fetter Discussion: https://postgr.es/m/87sh48ffhb.fsf@news-spur.riddles.org.uk --- diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index ea2e4bc242..9905593661 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2927,6 +2927,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) /* we store the string name because RTE_CTE RTEs need it */ APP_JUMB_STRING(cte->ctename); + APP_JUMB(cte->ctematerialized); JumbleQuery(jstate, castNode(Query, cte->ctequery)); } break; diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 029552b152..42108bd3d4 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -1888,7 +1888,7 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t -- join in CTE EXPLAIN (VERBOSE, COSTS OFF) -WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; +WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------- Limit @@ -1905,7 +1905,7 @@ WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 Output: t.c1_1, t.c2_1, t.c1_3 (12 rows) -WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; +WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; c1_1 | c2_1 ------+------ 101 | 101 diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index bab8479cf6..eb9d1ad59d 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -493,8 +493,8 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE; -- join in CTE EXPLAIN (VERBOSE, COSTS OFF) -WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; -WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; +WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; +WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10; -- ctid with whole-row reference EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml index 88bc189646..22252556be 100644 --- a/doc/src/sgml/queries.sgml +++ b/doc/src/sgml/queries.sgml @@ -2199,22 +2199,94 @@ SELECT n FROM t LIMIT 100; - A useful property of WITH queries is that they are evaluated - only once per execution of the parent query, even if they are referred to - more than once by the parent query or sibling WITH queries. + A useful property of WITH queries is that they are + normally evaluated only once per execution of the parent query, even if + they are referred to more than once by the parent query or + sibling WITH queries. Thus, expensive calculations that are needed in multiple places can be placed within a WITH query to avoid redundant work. Another possible application is to prevent unwanted multiple evaluations of functions with side-effects. - However, the other side of this coin is that the optimizer is less able to - push restrictions from the parent query down into a WITH query - than an ordinary subquery. The WITH query will generally be + However, the other side of this coin is that the optimizer is not able to + push restrictions from the parent query down into a multiply-referenced + WITH query, since that might affect all uses of the + WITH query's output when it should affect only one. + The multiply-referenced WITH query will be evaluated as written, without suppression of rows that the parent query might discard afterwards. (But, as mentioned above, evaluation might stop early if the reference(s) to the query demand only a limited number of rows.) + + However, if a WITH query is non-recursive and + side-effect-free (that is, it is a SELECT containing + no volatile functions) then it can be folded into the parent query, + allowing joint optimization of the two query levels. By default, this + happens if the parent query references the WITH query + just once, but not if it references the WITH query + more than once. You can override that decision by + specifying MATERIALIZED to force separate calculation + of the WITH query, or by specifying NOT + MATERIALIZED to force it to be merged into the parent query. + The latter choice risks duplicate computation of + the WITH query, but it can still give a net savings if + each usage of the WITH query needs only a small part + of the WITH query's full output. + + + + A simple example of these rules is + +WITH w AS ( + SELECT * FROM big_table +) +SELECT * FROM w WHERE key = 123; + + This WITH query will be folded, producing the same + execution plan as + +SELECT * FROM big_table WHERE key = 123; + + In particular, if there's an index on key, + it will probably be used to fetch just the rows having key = + 123. On the other hand, in + +WITH w AS ( + SELECT * FROM big_table +) +SELECT * FROM w AS w1 JOIN w AS w2 ON w1.key = w2.ref +WHERE w2.key = 123; + + the WITH query will be materialized, producing a + temporary copy of big_table that is then + joined with itself — without benefit of any index. This query + will be executed much more efficiently if written as + +WITH w AS NOT MATERIALIZED ( + SELECT * FROM big_table +) +SELECT * FROM w AS w1 JOIN w AS w2 ON w1.key = w2.ref +WHERE w2.key = 123; + + so that the parent query's restrictions can be applied directly + to scans of big_table. + + + + An example where NOT MATERIALIZED could be + undesirable is + +WITH w AS ( + SELECT key, very_expensive_function(val) as f FROM some_table +) +SELECT * FROM w AS w1 JOIN w AS w2 ON w1.f = w2.f; + + Here, materialization of the WITH query ensures + that very_expensive_function is evaluated only + once per table row, not twice. + + The examples above only show WITH being used with SELECT, but it can be attached in the same way to diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 4db8142afa..06d611b64c 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -72,7 +72,7 @@ SELECT [ ALL | DISTINCT [ ON ( expressionand with_query is: - with_query_name [ ( column_name [, ...] ) ] AS ( select | values | insert | update | delete ) + with_query_name [ ( column_name [, ...] ) ] AS [ [ NOT ] MATERIALIZED ] ( select | values | insert | update | delete ) TABLE [ ONLY ] table_name [ * ] @@ -93,7 +93,8 @@ TABLE [ ONLY ] table_name [ * ] These effectively serve as temporary tables that can be referenced in the FROM list. A WITH query that is referenced more than once in FROM is - computed only once. + computed only once, + unless specified otherwise with NOT MATERIALIZED. (See below.) @@ -272,9 +273,18 @@ TABLE [ ONLY ] table_name [ * ] that are earlier in the WITH list. + + The primary query and the WITH queries are all + (notionally) executed at the same time. This implies that the effects of + a data-modifying statement in WITH cannot be seen from + other parts of the query, other than by reading its RETURNING + output. If two such data-modifying statements attempt to modify the same + row, the results are unspecified. + + A key property of WITH queries is that they - are evaluated only once per execution of the primary query, + are normally evaluated only once per execution of the primary query, even if the primary query refers to them more than once. In particular, data-modifying statements are guaranteed to be executed once and only once, regardless of whether the primary query @@ -282,12 +292,35 @@ TABLE [ ONLY ] table_name [ * ] - The primary query and the WITH queries are all - (notionally) executed at the same time. This implies that the effects of - a data-modifying statement in WITH cannot be seen from - other parts of the query, other than by reading its RETURNING - output. If two such data-modifying statements attempt to modify the same - row, the results are unspecified. + However, a WITH query can be marked + NOT MATERIALIZED to remove this guarantee. In that + case, the WITH query can be folded into the primary + query much as though it were a simple sub-SELECT in + the primary query's FROM clause. This results in + duplicate computations if the primary query refers to + that WITH query more than once; but if each such use + requires only a few rows of the WITH query's total + output, NOT MATERIALIZED can provide a net savings by + allowing the queries to be optimized jointly. + NOT MATERIALIZED is ignored if it is attached to + a WITH query that is recursive or is not + side-effect-free (i.e., is not a plain SELECT + containing no volatile functions). + + + + By default, a side-effect-free WITH query is folded + into the primary query if it is used exactly once in the primary + query's FROM clause. This allows joint optimization + of the two query levels in situations where that should be semantically + invisible. However, such folding can be prevented by marking the + WITH query as MATERIALIZED. + That might be useful, for example, if the WITH query + is being used as an optimization fence to prevent the planner from + choosing a bad plan. + PostgreSQL versions before v12 never did + such folding, so queries written for older versions might rely on + WITH to act as an optimization fence. @@ -2087,6 +2120,12 @@ SELECT distributors.* WHERE distributors.name = 'Westward'; ROWS FROM( ... ) is an extension of the SQL standard. + + + The MATERIALIZED and NOT + MATERIALIZED options of WITH are extensions + of the SQL standard. + diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index b44ead269f..e15724bb0e 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2536,6 +2536,7 @@ _copyCommonTableExpr(const CommonTableExpr *from) COPY_STRING_FIELD(ctename); COPY_NODE_FIELD(aliascolnames); + COPY_SCALAR_FIELD(ctematerialized); COPY_NODE_FIELD(ctequery); COPY_LOCATION_FIELD(location); COPY_SCALAR_FIELD(cterecursive); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 1e169e0b9c..31499eb798 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2791,6 +2791,7 @@ _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b) { COMPARE_STRING_FIELD(ctename); COMPARE_NODE_FIELD(aliascolnames); + COMPARE_SCALAR_FIELD(ctematerialized); COMPARE_NODE_FIELD(ctequery); COMPARE_LOCATION_FIELD(location); COMPARE_SCALAR_FIELD(cterecursive); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 10038a22cf..65302fe65b 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2989,6 +2989,7 @@ _outCommonTableExpr(StringInfo str, const CommonTableExpr *node) WRITE_STRING_FIELD(ctename); WRITE_NODE_FIELD(aliascolnames); + WRITE_ENUM_FIELD(ctematerialized, CTEMaterialize); WRITE_NODE_FIELD(ctequery); WRITE_LOCATION_FIELD(location); WRITE_BOOL_FIELD(cterecursive); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 3b002778ad..5aa42242a9 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -418,6 +418,7 @@ _readCommonTableExpr(void) READ_STRING_FIELD(ctename); READ_NODE_FIELD(aliascolnames); + READ_ENUM_FIELD(ctematerialized, CTEMaterialize); READ_NODE_FIELD(ctequery); READ_LOCATION_FIELD(location); READ_BOOL_FIELD(cterecursive); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index ddb86bd0c3..4e52c42721 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -646,8 +646,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->partColsUpdated = false; /* - * If there is a WITH list, process each WITH query and build an initplan - * SubPlan structure for it. + * If there is a WITH list, process each WITH query and either convert it + * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it. */ if (parse->cteList) SS_process_ctes(root); diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 915c6d0cf6..c721054787 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -57,6 +57,14 @@ typedef struct finalize_primnode_context Bitmapset *paramids; /* Non-local PARAM_EXEC paramids found */ } finalize_primnode_context; +typedef struct inline_cte_walker_context +{ + const char *ctename; /* name and relative level of target CTE */ + int levelsup; + int refcount; /* number of remaining references */ + Query *ctequery; /* query to substitute */ +} inline_cte_walker_context; + static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, List *plan_params, @@ -75,6 +83,10 @@ static Node *convert_testexpr_mutator(Node *node, static bool subplan_is_hashable(Plan *plan); static bool testexpr_is_hashable(Node *testexpr); static bool hash_ok_operator(OpExpr *expr); +static bool contain_dml(Node *node); +static bool contain_dml_walker(Node *node, void *context); +static void inline_cte(PlannerInfo *root, CommonTableExpr *cte); +static bool inline_cte_walker(Node *node, inline_cte_walker_context *context); static bool simplify_EXISTS_query(PlannerInfo *root, Query *query); static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, Node **testexpr, List **paramIds); @@ -804,10 +816,13 @@ hash_ok_operator(OpExpr *expr) /* * SS_process_ctes: process a query's WITH list * - * We plan each interesting WITH item and convert it to an initplan. + * Consider each CTE in the WITH list and either ignore it (if it's an + * unreferenced SELECT), "inline" it to create a regular sub-SELECT-in-FROM, + * or convert it to an initplan. + * * A side effect is to fill in root->cte_plan_ids with a list that * parallels root->parse->cteList and provides the subplan ID for - * each CTE's initplan. + * each CTE's initplan, or a dummy ID (-1) if we didn't make an initplan. */ void SS_process_ctes(PlannerInfo *root) @@ -838,6 +853,44 @@ SS_process_ctes(PlannerInfo *root) continue; } + /* + * Consider inlining the CTE (creating RTE_SUBQUERY RTE(s)) instead of + * implementing it as a separately-planned CTE. + * + * We cannot inline if any of these conditions hold: + * + * 1. The user said not to (the CTEMaterializeAlways option). + * + * 2. The CTE is recursive. + * + * 3. The CTE has side-effects; this includes either not being a plain + * SELECT, or containing volatile functions. Inlining might change + * the side-effects, which would be bad. + * + * Otherwise, we have an option whether to inline or not. That should + * always be a win if there's just a single reference, but if the CTE + * is multiply-referenced then it's unclear: inlining adds duplicate + * computations, but the ability to absorb restrictions from the outer + * query level could outweigh that. We do not have nearly enough + * information at this point to tell whether that's true, so we let + * the user express a preference. Our default behavior is to inline + * only singly-referenced CTEs, but a CTE marked CTEMaterializeNever + * will be inlined even if multiply referenced. + */ + if ((cte->ctematerialized == CTEMaterializeNever || + (cte->ctematerialized == CTEMaterializeDefault && + cte->cterefcount == 1)) && + !cte->cterecursive && + cmdType == CMD_SELECT && + !contain_dml(cte->ctequery) && + !contain_volatile_functions(cte->ctequery)) + { + inline_cte(root, cte); + /* Make a dummy entry in cte_plan_ids */ + root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1); + continue; + } + /* * Copy the source Query node. Probably not necessary, but let's keep * this similar to make_subplan. @@ -934,6 +987,127 @@ SS_process_ctes(PlannerInfo *root) } } +/* + * contain_dml: is any subquery not a plain SELECT? + * + * We reject SELECT FOR UPDATE/SHARE as well as INSERT etc. + */ +static bool +contain_dml(Node *node) +{ + return contain_dml_walker(node, NULL); +} + +static bool +contain_dml_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, Query)) + { + Query *query = (Query *) node; + + if (query->commandType != CMD_SELECT || + query->rowMarks != NIL) + return true; + + return query_tree_walker(query, contain_dml_walker, context, 0); + } + return expression_tree_walker(node, contain_dml_walker, context); +} + +/* + * inline_cte: convert RTE_CTE references to given CTE into RTE_SUBQUERYs + */ +static void +inline_cte(PlannerInfo *root, CommonTableExpr *cte) +{ + struct inline_cte_walker_context context; + + context.ctename = cte->ctename; + /* Start at levelsup = -1 because we'll immediately increment it */ + context.levelsup = -1; + context.refcount = cte->cterefcount; + context.ctequery = castNode(Query, cte->ctequery); + + (void) inline_cte_walker((Node *) root->parse, &context); + + /* Assert we replaced all references */ + Assert(context.refcount == 0); +} + +static bool +inline_cte_walker(Node *node, inline_cte_walker_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Query)) + { + Query *query = (Query *) node; + + context->levelsup++; + + /* + * Visit the query's RTE nodes after their contents; otherwise + * query_tree_walker would descend into the newly inlined CTE query, + * which we don't want. + */ + (void) query_tree_walker(query, inline_cte_walker, context, + QTW_EXAMINE_RTES_AFTER); + + context->levelsup--; + + return false; + } + else if (IsA(node, RangeTblEntry)) + { + RangeTblEntry *rte = (RangeTblEntry *) node; + + if (rte->rtekind == RTE_CTE && + strcmp(rte->ctename, context->ctename) == 0 && + rte->ctelevelsup == context->levelsup) + { + /* + * Found a reference to replace. Generate a copy of the CTE query + * with appropriate level adjustment for outer references (e.g., + * to other CTEs). + */ + Query *newquery = copyObject(context->ctequery); + + if (context->levelsup > 0) + IncrementVarSublevelsUp((Node *) newquery, context->levelsup, 1); + + /* + * Convert the RTE_CTE RTE into a RTE_SUBQUERY. + * + * Historically, a FOR UPDATE clause has been treated as extending + * into views and subqueries, but not into CTEs. We preserve this + * distinction by not trying to push rowmarks into the new + * subquery. + */ + rte->rtekind = RTE_SUBQUERY; + rte->subquery = newquery; + rte->security_barrier = false; + + /* Zero out CTE-specific fields */ + rte->ctename = NULL; + rte->ctelevelsup = 0; + rte->self_reference = false; + rte->coltypes = NIL; + rte->coltypmods = NIL; + rte->colcollations = NIL; + + /* Count the number of replacements we've done */ + context->refcount--; + } + + return false; + } + + return expression_tree_walker(node, inline_cte_walker, context); +} + + /* * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join * diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 7bb2b0b2a5..a68f78e0e0 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -479,7 +479,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type row explicit_row implicit_row type_list array_expr_list %type case_expr case_arg when_clause case_default %type when_clause_list -%type sub_type +%type sub_type opt_materialized %type NumericOnly %type NumericOnly_list %type alias_clause opt_alias_clause @@ -11344,17 +11344,24 @@ cte_list: | cte_list ',' common_table_expr { $$ = lappend($1, $3); } ; -common_table_expr: name opt_name_list AS '(' PreparableStmt ')' +common_table_expr: name opt_name_list AS opt_materialized '(' PreparableStmt ')' { CommonTableExpr *n = makeNode(CommonTableExpr); n->ctename = $1; n->aliascolnames = $2; - n->ctequery = $5; + n->ctematerialized = $4; + n->ctequery = $6; n->location = @1; $$ = (Node *) n; } ; +opt_materialized: + MATERIALIZED { $$ = CTEMaterializeAlways; } + | NOT MATERIALIZED { $$ = CTEMaterializeNever; } + | /*EMPTY*/ { $$ = CTEMaterializeDefault; } + ; + opt_with_clause: with_clause { $$ = $1; } | /*EMPTY*/ { $$ = NULL; } @@ -16237,6 +16244,7 @@ makeRecursiveViewSelect(char *relname, List *aliases, Node *query) /* create common table expression */ cte->ctename = relname; cte->aliascolnames = aliases; + cte->ctematerialized = CTEMaterializeDefault; cte->ctequery = query; cte->location = -1; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 9fd1ebf3e5..1258092dc8 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -5181,7 +5181,19 @@ get_with_clause(Query *query, deparse_context *context) } appendStringInfoChar(buf, ')'); } - appendStringInfoString(buf, " AS ("); + appendStringInfoString(buf, " AS "); + switch (cte->ctematerialized) + { + case CTEMaterializeDefault: + break; + case CTEMaterializeAlways: + appendStringInfoString(buf, "MATERIALIZED "); + break; + case CTEMaterializeNever: + appendStringInfoString(buf, "NOT MATERIALIZED "); + break; + } + appendStringInfoChar(buf, '('); if (PRETTY_INDENT(context)) appendContextKeyword(context, "", 0, 0, 0); get_query_def((Query *) cte->ctequery, buf, context->namespaces, NULL, diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 3f4e199762..4887d322a3 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201902112 +#define CATALOG_VERSION_NO 201902161 #endif diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 2fe14d7db2..6d035a072e 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1402,11 +1402,19 @@ typedef struct OnConflictClause * * We don't currently support the SEARCH or CYCLE clause. */ +typedef enum CTEMaterialize +{ + CTEMaterializeDefault, /* no option specified */ + CTEMaterializeAlways, /* MATERIALIZED */ + CTEMaterializeNever /* NOT MATERIALIZED */ +} CTEMaterialize; + typedef struct CommonTableExpr { NodeTag type; char *ctename; /* query name (never qualified) */ List *aliascolnames; /* optional list of column names */ + CTEMaterialize ctematerialized; /* is this an optimization fence? */ /* SelectStmt/InsertStmt/etc before parse analysis, Query afterwards: */ Node *ctequery; /* the CTE's subquery */ int location; /* token location, or -1 if unknown */ diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index 1d12b01068..2e170497c9 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -2179,7 +2179,7 @@ EXPLAIN (COSTS OFF) EXECUTE plancache_test; Filter: (((a % 2) = 0) AND f_leak(b)) (2 rows) -PREPARE plancache_test2 AS WITH q AS (SELECT * FROM z1 WHERE f_leak(b)) SELECT * FROM q,z2; +PREPARE plancache_test2 AS WITH q AS MATERIALIZED (SELECT * FROM z1 WHERE f_leak(b)) SELECT * FROM q,z2; EXPLAIN (COSTS OFF) EXECUTE plancache_test2; QUERY PLAN ------------------------------------------------- @@ -2192,7 +2192,7 @@ EXPLAIN (COSTS OFF) EXECUTE plancache_test2; -> Seq Scan on z2 (7 rows) -PREPARE plancache_test3 AS WITH q AS (SELECT * FROM z2) SELECT * FROM q,z1 WHERE f_leak(z1.b); +PREPARE plancache_test3 AS WITH q AS MATERIALIZED (SELECT * FROM z2) SELECT * FROM q,z1 WHERE f_leak(z1.b); EXPLAIN (COSTS OFF) EXECUTE plancache_test3; QUERY PLAN ----------------------------------------------------- @@ -2826,7 +2826,7 @@ ALTER TABLE t1 ENABLE ROW LEVEL SECURITY; GRANT ALL ON t1 TO regress_rls_bob; INSERT INTO t1 (SELECT x, md5(x::text) FROM generate_series(0,20) x); SET SESSION AUTHORIZATION regress_rls_bob; -WITH cte1 AS (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1; +WITH cte1 AS MATERIALIZED (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1; NOTICE: f_leak => cfcd208495d565ef66e7dff9f98764da NOTICE: f_leak => c81e728d9d4c2f636f067f89cc14862c NOTICE: f_leak => a87ff679a2f3e71d9181a67b7542122c @@ -2853,7 +2853,8 @@ NOTICE: f_leak => 98f13708210194c475687be6106a3b84 20 | 98f13708210194c475687be6106a3b84 (11 rows) -EXPLAIN (COSTS OFF) WITH cte1 AS (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1; +EXPLAIN (COSTS OFF) +WITH cte1 AS MATERIALIZED (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1; QUERY PLAN ------------------------------------------------- CTE Scan on cte1 diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out index 6ff2fd3274..ffccaa5c9d 100644 --- a/src/test/regress/expected/rowtypes.out +++ b/src/test/regress/expected/rowtypes.out @@ -1147,7 +1147,7 @@ from (values (1,row(1,2)), (1,row(null,null)), (1,null), (6 rows) explain (verbose, costs off) -with r(a,b) as +with r(a,b) as materialized (values (1,row(1,2)), (1,row(null,null)), (1,null), (null,row(1,2)), (null,row(null,null)), (null,null) ) select r, r is null as isnull, r is not null as isnotnull from r; @@ -1160,7 +1160,7 @@ select r, r is null as isnull, r is not null as isnotnull from r; Output: "*VALUES*".column1, "*VALUES*".column2 (5 rows) -with r(a,b) as +with r(a,b) as materialized (values (1,row(1,2)), (1,row(null,null)), (1,null), (null,row(1,2)), (null,row(null,null)), (null,null) ) select r, r is null as isnull, r is not null as isnotnull from r; diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index b31594a7b5..95b8a95fa2 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -3110,7 +3110,7 @@ explain (costs off) INSERT INTO hats VALUES ('h8', 'forbidden') RETURNING *; (5 rows) -- ensure upserting into a rule, with a CTE (different offsets!) works -WITH data(hat_name, hat_color) AS ( +WITH data(hat_name, hat_color) AS MATERIALIZED ( VALUES ('h8', 'green'), ('h9', 'blue'), ('h7', 'forbidden') @@ -3124,7 +3124,8 @@ RETURNING *; h9 | blue (2 rows) -EXPLAIN (costs off) WITH data(hat_name, hat_color) AS ( +EXPLAIN (costs off) +WITH data(hat_name, hat_color) AS MATERIALIZED ( VALUES ('h8', 'green'), ('h9', 'blue'), ('h7', 'forbidden') diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 6e238e88b3..cc3f5f3737 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1154,3 +1154,159 @@ fetch backward all in c1; (2 rows) commit; +-- +-- Tests for CTE inlining behavior +-- +-- Basic subquery that can be inlined +explain (verbose, costs off) +with x as (select * from (select f1 from subselect_tbl) ss) +select * from x where f1 = 1; + QUERY PLAN +---------------------------------- + Seq Scan on public.subselect_tbl + Output: subselect_tbl.f1 + Filter: (subselect_tbl.f1 = 1) +(3 rows) + +-- Explicitly request materialization +explain (verbose, costs off) +with x as materialized (select * from (select f1 from subselect_tbl) ss) +select * from x where f1 = 1; + QUERY PLAN +------------------------------------------ + CTE Scan on x + Output: x.f1 + Filter: (x.f1 = 1) + CTE x + -> Seq Scan on public.subselect_tbl + Output: subselect_tbl.f1 +(6 rows) + +-- Stable functions are safe to inline +explain (verbose, costs off) +with x as (select * from (select f1, now() from subselect_tbl) ss) +select * from x where f1 = 1; + QUERY PLAN +----------------------------------- + Seq Scan on public.subselect_tbl + Output: subselect_tbl.f1, now() + Filter: (subselect_tbl.f1 = 1) +(3 rows) + +-- Volatile functions prevent inlining +explain (verbose, costs off) +with x as (select * from (select f1, random() from subselect_tbl) ss) +select * from x where f1 = 1; + QUERY PLAN +---------------------------------------------- + CTE Scan on x + Output: x.f1, x.random + Filter: (x.f1 = 1) + CTE x + -> Seq Scan on public.subselect_tbl + Output: subselect_tbl.f1, random() +(6 rows) + +-- SELECT FOR UPDATE cannot be inlined +explain (verbose, costs off) +with x as (select * from (select f1 from subselect_tbl for update) ss) +select * from x where f1 = 1; + QUERY PLAN +-------------------------------------------------------------------- + CTE Scan on x + Output: x.f1 + Filter: (x.f1 = 1) + CTE x + -> Subquery Scan on ss + Output: ss.f1 + -> LockRows + Output: subselect_tbl.f1, subselect_tbl.ctid + -> Seq Scan on public.subselect_tbl + Output: subselect_tbl.f1, subselect_tbl.ctid +(10 rows) + +-- Multiply-referenced CTEs are inlined only when requested +explain (verbose, costs off) +with x as (select * from (select f1, now() as n from subselect_tbl) ss) +select * from x, x x2 where x.n = x2.n; + QUERY PLAN +------------------------------------------- + Merge Join + Output: x.f1, x.n, x2.f1, x2.n + Merge Cond: (x.n = x2.n) + CTE x + -> Seq Scan on public.subselect_tbl + Output: subselect_tbl.f1, now() + -> Sort + Output: x.f1, x.n + Sort Key: x.n + -> CTE Scan on x + Output: x.f1, x.n + -> Sort + Output: x2.f1, x2.n + Sort Key: x2.n + -> CTE Scan on x x2 + Output: x2.f1, x2.n +(16 rows) + +explain (verbose, costs off) +with x as not materialized (select * from (select f1, now() as n from subselect_tbl) ss) +select * from x, x x2 where x.n = x2.n; + QUERY PLAN +---------------------------------------------------------------------------- + Result + Output: subselect_tbl.f1, now(), subselect_tbl_1.f1, now() + One-Time Filter: (now() = now()) + -> Nested Loop + Output: subselect_tbl.f1, subselect_tbl_1.f1 + -> Seq Scan on public.subselect_tbl + Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3 + -> Materialize + Output: subselect_tbl_1.f1 + -> Seq Scan on public.subselect_tbl subselect_tbl_1 + Output: subselect_tbl_1.f1 +(11 rows) + +-- Check handling of outer references +explain (verbose, costs off) +with x as (select * from int4_tbl) +select * from (with y as (select * from x) select * from y) ss; + QUERY PLAN +----------------------------- + Seq Scan on public.int4_tbl + Output: int4_tbl.f1 +(2 rows) + +explain (verbose, costs off) +with x as materialized (select * from int4_tbl) +select * from (with y as (select * from x) select * from y) ss; + QUERY PLAN +------------------------------------- + CTE Scan on x + Output: x.f1 + CTE x + -> Seq Scan on public.int4_tbl + Output: int4_tbl.f1 +(5 rows) + +-- Ensure that we inline the currect CTE when there are +-- multiple CTEs with the same name +explain (verbose, costs off) +with x as (select 1 as y) +select * from (with x as (select 2 as y) select * from x) ss; + QUERY PLAN +------------- + Result + Output: 2 +(2 rows) + +-- Row marks are not pushed into CTEs +explain (verbose, costs off) +with x as (select * from subselect_tbl) +select * from x for update; + QUERY PLAN +---------------------------------------------------------------- + Seq Scan on public.subselect_tbl + Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3 +(2 rows) + diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql index 38e9b38bc4..52da276b88 100644 --- a/src/test/regress/sql/rowsecurity.sql +++ b/src/test/regress/sql/rowsecurity.sql @@ -840,10 +840,10 @@ EXPLAIN (COSTS OFF) SELECT * FROM z1 WHERE f_leak(b); PREPARE plancache_test AS SELECT * FROM z1 WHERE f_leak(b); EXPLAIN (COSTS OFF) EXECUTE plancache_test; -PREPARE plancache_test2 AS WITH q AS (SELECT * FROM z1 WHERE f_leak(b)) SELECT * FROM q,z2; +PREPARE plancache_test2 AS WITH q AS MATERIALIZED (SELECT * FROM z1 WHERE f_leak(b)) SELECT * FROM q,z2; EXPLAIN (COSTS OFF) EXECUTE plancache_test2; -PREPARE plancache_test3 AS WITH q AS (SELECT * FROM z2) SELECT * FROM q,z1 WHERE f_leak(z1.b); +PREPARE plancache_test3 AS WITH q AS MATERIALIZED (SELECT * FROM z2) SELECT * FROM q,z1 WHERE f_leak(z1.b); EXPLAIN (COSTS OFF) EXECUTE plancache_test3; SET ROLE regress_rls_group1; @@ -1071,8 +1071,9 @@ INSERT INTO t1 (SELECT x, md5(x::text) FROM generate_series(0,20) x); SET SESSION AUTHORIZATION regress_rls_bob; -WITH cte1 AS (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1; -EXPLAIN (COSTS OFF) WITH cte1 AS (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1; +WITH cte1 AS MATERIALIZED (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1; +EXPLAIN (COSTS OFF) +WITH cte1 AS MATERIALIZED (SELECT * FROM t1 WHERE f_leak(b)) SELECT * FROM cte1; WITH cte1 AS (UPDATE t1 SET a = a + 1 RETURNING *) SELECT * FROM cte1; --fail WITH cte1 AS (UPDATE t1 SET a = a RETURNING *) SELECT * FROM cte1; --ok diff --git a/src/test/regress/sql/rowtypes.sql b/src/test/regress/sql/rowtypes.sql index ea93347caf..45f60e8625 100644 --- a/src/test/regress/sql/rowtypes.sql +++ b/src/test/regress/sql/rowtypes.sql @@ -461,12 +461,12 @@ from (values (1,row(1,2)), (1,row(null,null)), (1,null), (null,row(1,2)), (null,row(null,null)), (null,null) ) r(a,b); explain (verbose, costs off) -with r(a,b) as +with r(a,b) as materialized (values (1,row(1,2)), (1,row(null,null)), (1,null), (null,row(1,2)), (null,row(null,null)), (null,null) ) select r, r is null as isnull, r is not null as isnotnull from r; -with r(a,b) as +with r(a,b) as materialized (values (1,row(1,2)), (1,row(null,null)), (1,null), (null,row(1,2)), (null,row(null,null)), (null,null) ) select r, r is null as isnull, r is not null as isnotnull from r; diff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql index f4ee30ec8f..6666951dfe 100644 --- a/src/test/regress/sql/rules.sql +++ b/src/test/regress/sql/rules.sql @@ -1132,7 +1132,7 @@ SELECT tablename, rulename, definition FROM pg_rules explain (costs off) INSERT INTO hats VALUES ('h8', 'forbidden') RETURNING *; -- ensure upserting into a rule, with a CTE (different offsets!) works -WITH data(hat_name, hat_color) AS ( +WITH data(hat_name, hat_color) AS MATERIALIZED ( VALUES ('h8', 'green'), ('h9', 'blue'), ('h7', 'forbidden') @@ -1140,7 +1140,8 @@ WITH data(hat_name, hat_color) AS ( INSERT INTO hats SELECT * FROM data RETURNING *; -EXPLAIN (costs off) WITH data(hat_name, hat_color) AS ( +EXPLAIN (costs off) +WITH data(hat_name, hat_color) AS MATERIALIZED ( VALUES ('h8', 'green'), ('h9', 'blue'), ('h7', 'forbidden') diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index ccbe8a1df5..8bca1f5d55 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -625,3 +625,61 @@ move forward all in c1; fetch backward all in c1; commit; + +-- +-- Tests for CTE inlining behavior +-- + +-- Basic subquery that can be inlined +explain (verbose, costs off) +with x as (select * from (select f1 from subselect_tbl) ss) +select * from x where f1 = 1; + +-- Explicitly request materialization +explain (verbose, costs off) +with x as materialized (select * from (select f1 from subselect_tbl) ss) +select * from x where f1 = 1; + +-- Stable functions are safe to inline +explain (verbose, costs off) +with x as (select * from (select f1, now() from subselect_tbl) ss) +select * from x where f1 = 1; + +-- Volatile functions prevent inlining +explain (verbose, costs off) +with x as (select * from (select f1, random() from subselect_tbl) ss) +select * from x where f1 = 1; + +-- SELECT FOR UPDATE cannot be inlined +explain (verbose, costs off) +with x as (select * from (select f1 from subselect_tbl for update) ss) +select * from x where f1 = 1; + +-- Multiply-referenced CTEs are inlined only when requested +explain (verbose, costs off) +with x as (select * from (select f1, now() as n from subselect_tbl) ss) +select * from x, x x2 where x.n = x2.n; + +explain (verbose, costs off) +with x as not materialized (select * from (select f1, now() as n from subselect_tbl) ss) +select * from x, x x2 where x.n = x2.n; + +-- Check handling of outer references +explain (verbose, costs off) +with x as (select * from int4_tbl) +select * from (with y as (select * from x) select * from y) ss; + +explain (verbose, costs off) +with x as materialized (select * from int4_tbl) +select * from (with y as (select * from x) select * from y) ss; + +-- Ensure that we inline the currect CTE when there are +-- multiple CTEs with the same name +explain (verbose, costs off) +with x as (select 1 as y) +select * from (with x as (select 2 as y) select * from x) ss; + +-- Row marks are not pushed into CTEs +explain (verbose, costs off) +with x as (select * from subselect_tbl) +select * from x for update;