#include "utils/lsyscache.h"
+/* results of subquery_is_pushdown_safe */
+typedef struct pushdown_safety_info
+{
+ bool *unsafeColumns; /* which output columns are unsafe to use */
+ bool unsafeVolatile; /* don't push down volatile quals */
+ bool unsafeLeaky; /* don't push down leaky quals */
+} pushdown_safety_info;
+
/* These parameters are set by GUC */
bool enable_geqo = false; /* just in case GUC doesn't set it */
int geqo_threshold;
RangeTblEntry *rte);
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
- bool *unsafeColumns);
+ pushdown_safety_info *safetyInfo);
static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
- bool *unsafeColumns);
-static void check_output_expressions(Query *subquery, bool *unsafeColumns);
+ pushdown_safety_info *safetyInfo);
+static void check_output_expressions(Query *subquery,
+ pushdown_safety_info *safetyInfo);
static void compare_tlist_datatypes(List *tlist, List *colTypes,
- bool *unsafeColumns);
+ pushdown_safety_info *safetyInfo);
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
- bool *unsafeColumns);
+ pushdown_safety_info *safetyInfo);
static void subquery_push_qual(Query *subquery,
RangeTblEntry *rte, Index rti, Node *qual);
static void recurse_push_qual(Node *setOp, Query *topquery,
Query *parse = root->parse;
Query *subquery = rte->subquery;
Relids required_outer;
- bool *unsafeColumns;
+ pushdown_safety_info safetyInfo;
double tuple_fraction;
PlannerInfo *subroot;
List *pathkeys;
required_outer = rel->lateral_relids;
/*
- * We need a workspace for keeping track of unsafe-to-reference columns.
- * unsafeColumns[i] is set TRUE if we've found that output column i of the
- * subquery is unsafe to use in a pushed-down qual.
+ * Zero out result area for subquery_is_pushdown_safe, so that it can set
+ * flags as needed while recursing. In particular, we need a workspace
+ * for keeping track of unsafe-to-reference columns. unsafeColumns[i]
+ * will be set TRUE if we find that output column i of the subquery is
+ * unsafe to use in a pushed-down qual.
*/
- unsafeColumns = (bool *)
+ memset(&safetyInfo, 0, sizeof(safetyInfo));
+ safetyInfo.unsafeColumns = (bool *)
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
+ /*
+ * If the subquery has the "security_barrier" flag, it means the subquery
+ * originated from a view that must enforce row-level security. Then we
+ * must not push down quals that contain leaky functions. (Ideally this
+ * would be checked inside subquery_is_pushdown_safe, but since we don't
+ * currently pass the RTE to that function, we must do it here.)
+ */
+ safetyInfo.unsafeLeaky = rte->security_barrier;
+
/*
* If there are any restriction clauses that have been attached to the
* subquery relation, consider pushing them down to become WHERE or HAVING
* pseudoconstant clauses; better to have the gating node above the
* subquery.
*
- * Also, if the sub-query has the "security_barrier" flag, it means the
- * sub-query originated from a view that must enforce row-level security.
- * Then we must not push down quals that contain leaky functions.
- *
* Non-pushed-down clauses will get evaluated as qpquals of the
* SubqueryScan node.
*
* push down a pushable qual, because it'd result in a worse plan?
*/
if (rel->baserestrictinfo != NIL &&
- subquery_is_pushdown_safe(subquery, subquery, unsafeColumns))
+ subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
{
/* OK to consider pushing down individual quals */
List *upperrestrictlist = NIL;
Node *clause = (Node *) rinfo->clause;
if (!rinfo->pseudoconstant &&
- (!rte->security_barrier ||
- !contain_leaky_functions(clause)) &&
- qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns))
+ qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
{
/* Push it down */
subquery_push_qual(subquery, rte, rti, clause);
rel->baserestrictinfo = upperrestrictlist;
}
- pfree(unsafeColumns);
+ pfree(safetyInfo.unsafeColumns);
/*
* The upper query might not use all the subquery's output columns; if
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
* quals into it, because that could change the results.
*
- * In addition, we make several checks on the subquery's output columns
- * to see if it is safe to reference them in pushed-down quals. If output
- * column k is found to be unsafe to reference, we set unsafeColumns[k] to
- * TRUE, but we don't reject the subquery overall since column k might
- * not be referenced by some/all quals. The unsafeColumns[] array will be
- * consulted later by qual_is_pushdown_safe(). It's better to do it this
- * way than to make the checks directly in qual_is_pushdown_safe(), because
- * when the subquery involves set operations we have to check the output
+ * 4. If the subquery uses DISTINCT, we cannot push volatile quals into it.
+ * This is because upper-level quals should semantically be evaluated only
+ * once per distinct row, not once per original row, and if the qual is
+ * volatile then extra evaluations could change the results. (This issue
+ * does not apply to other forms of aggregation such as GROUP BY, because
+ * when those are present we push into HAVING not WHERE, so that the quals
+ * are still applied after aggregation.)
+ *
+ * In addition, we make several checks on the subquery's output columns to see
+ * if it is safe to reference them in pushed-down quals. If output column k
+ * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k]
+ * to TRUE, but we don't reject the subquery overall since column k might not
+ * be referenced by some/all quals. The unsafeColumns[] array will be
+ * consulted later by qual_is_pushdown_safe(). It's better to do it this way
+ * than to make the checks directly in qual_is_pushdown_safe(), because when
+ * the subquery involves set operations we have to check the output
* expressions in each arm of the set op.
+ *
+ * Note: pushing quals into a DISTINCT subquery is theoretically dubious:
+ * we're effectively assuming that the quals cannot distinguish values that
+ * the DISTINCT's equality operator sees as equal, yet there are many
+ * counterexamples to that assumption. However use of such a qual with a
+ * DISTINCT subquery would be unsafe anyway, since there's no guarantee which
+ * "equal" value will be chosen as the output value by the DISTINCT operation.
+ * So we don't worry too much about that. Another objection is that if the
+ * qual is expensive to evaluate, running it for each original row might cost
+ * more than we save by eliminating rows before the DISTINCT step. But it
+ * would be very hard to estimate that at this stage, and in practice pushdown
+ * seldom seems to make things worse, so we ignore that problem too.
*/
static bool
subquery_is_pushdown_safe(Query *subquery, Query *topquery,
- bool *unsafeColumns)
+ pushdown_safety_info *safetyInfo)
{
SetOperationStmt *topop;
if (subquery->hasWindowFuncs)
return false;
+ /* Check point 4 */
+ if (subquery->distinctClause)
+ safetyInfo->unsafeVolatile = true;
+
/*
* If we're at a leaf query, check for unsafe expressions in its target
* list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
* them.)
*/
if (subquery->setOperations == NULL)
- check_output_expressions(subquery, unsafeColumns);
+ check_output_expressions(subquery, safetyInfo);
/* Are we at top level, or looking at a setop component? */
if (subquery == topquery)
/* Top level, so check any component queries */
if (subquery->setOperations != NULL)
if (!recurse_pushdown_safe(subquery->setOperations, topquery,
- unsafeColumns))
+ safetyInfo))
return false;
}
else
Assert(topop && IsA(topop, SetOperationStmt));
compare_tlist_datatypes(subquery->targetList,
topop->colTypes,
- unsafeColumns);
+ safetyInfo);
}
return true;
}
*/
static bool
recurse_pushdown_safe(Node *setOp, Query *topquery,
- bool *unsafeColumns)
+ pushdown_safety_info *safetyInfo)
{
if (IsA(setOp, RangeTblRef))
{
Query *subquery = rte->subquery;
Assert(subquery != NULL);
- return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns);
+ return subquery_is_pushdown_safe(subquery, topquery, safetyInfo);
}
else if (IsA(setOp, SetOperationStmt))
{
if (op->op == SETOP_EXCEPT)
return false;
/* Else recurse */
- if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns))
+ if (!recurse_pushdown_safe(op->larg, topquery, safetyInfo))
return false;
- if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns))
+ if (!recurse_pushdown_safe(op->rarg, topquery, safetyInfo))
return false;
}
else
* 3. If the subquery uses DISTINCT ON, we must not push down any quals that
* refer to non-DISTINCT output columns, because that could change the set
* of rows returned. (This condition is vacuous for DISTINCT, because then
- * there are no non-DISTINCT output columns, so we needn't check. But note
- * we are assuming that the qual can't distinguish values that the DISTINCT
- * operator sees as equal. This is a bit shaky but we have no way to test
- * for the case, and it's unlikely enough that we shouldn't refuse the
- * optimization just because it could theoretically happen.)
+ * there are no non-DISTINCT output columns, so we needn't check. Note that
+ * subquery_is_pushdown_safe already reported that we can't use volatile
+ * quals if there's DISTINCT or DISTINCT ON.)
*/
static void
-check_output_expressions(Query *subquery, bool *unsafeColumns)
+check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
{
ListCell *lc;
continue; /* ignore resjunk columns */
/* We need not check further if output col is already known unsafe */
- if (unsafeColumns[tle->resno])
+ if (safetyInfo->unsafeColumns[tle->resno])
continue;
/* Functions returning sets are unsafe (point 1) */
if (expression_returns_set((Node *) tle->expr))
{
- unsafeColumns[tle->resno] = true;
+ safetyInfo->unsafeColumns[tle->resno] = true;
continue;
}
/* Volatile functions are unsafe (point 2) */
if (contain_volatile_functions((Node *) tle->expr))
{
- unsafeColumns[tle->resno] = true;
+ safetyInfo->unsafeColumns[tle->resno] = true;
continue;
}
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
{
/* non-DISTINCT column, so mark it unsafe */
- unsafeColumns[tle->resno] = true;
+ safetyInfo->unsafeColumns[tle->resno] = true;
continue;
}
}
*
* tlist is a subquery tlist.
* colTypes is an OID list of the top-level setop's output column types.
- * unsafeColumns[] is the result array.
+ * safetyInfo->unsafeColumns[] is the result array.
*/
static void
compare_tlist_datatypes(List *tlist, List *colTypes,
- bool *unsafeColumns)
+ pushdown_safety_info *safetyInfo)
{
ListCell *l;
ListCell *colType = list_head(colTypes);
if (colType == NULL)
elog(ERROR, "wrong number of tlist entries");
if (exprType((Node *) tle->expr) != lfirst_oid(colType))
- unsafeColumns[tle->resno] = true;
+ safetyInfo->unsafeColumns[tle->resno] = true;
colType = lnext(colType);
}
if (colType != NULL)
* it will work correctly: sublinks will already have been transformed into
* subplans in the qual, but not in the subquery).
*
- * 2. The qual must not refer to the whole-row output of the subquery
+ * 2. If unsafeVolatile is set, the qual must not contain any volatile
+ * functions.
+ *
+ * 3. If unsafeLeaky is set, the qual must not contain any leaky functions.
+ *
+ * 4. The qual must not refer to the whole-row output of the subquery
* (since there is no easy way to name that within the subquery itself).
*
- * 3. The qual must not refer to any subquery output columns that were
+ * 5. The qual must not refer to any subquery output columns that were
* found to be unsafe to reference by subquery_is_pushdown_safe().
*/
static bool
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
- bool *unsafeColumns)
+ pushdown_safety_info *safetyInfo)
{
bool safe = true;
List *vars;
if (contain_subplans(qual))
return false;
+ /* Refuse volatile quals if we found they'd be unsafe (point 2) */
+ if (safetyInfo->unsafeVolatile &&
+ contain_volatile_functions(qual))
+ return false;
+
+ /* Refuse leaky quals if told to (point 3) */
+ if (safetyInfo->unsafeLeaky &&
+ contain_leaky_functions(qual))
+ return false;
+
/*
* It would be unsafe to push down window function calls, but at least for
* the moment we could never see any in a qual anyhow. (The same applies
Assert(var->varno == rti);
Assert(var->varattno >= 0);
- /* Check point 2 */
+ /* Check point 4 */
if (var->varattno == 0)
{
safe = false;
break;
}
- /* Check point 3 */
- if (unsafeColumns[var->varattno])
+ /* Check point 5 */
+ if (safetyInfo->unsafeColumns[var->varattno])
{
safe = false;
break;