</sect1>
+ <sect1 id="planner-stats-security">
+ <title>Planner Statistics and Security</title>
+
+ <para>
+ Access to the table <structname>pg_statistic</structname> is restricted to
+ superusers, so that ordinary users cannot learn about the contents of the
+ tables of other users from it. Some selectivity estimation functions will
+ use a user-provided operator (either the operator appearing in the query or
+ a related operator) to analyze the stored statistics. For example, in order
+ to determine whether a stored most common value is applicable, the
+ selectivity estimator will have to run the appropriate <literal>=</literal>
+ operator to compare the constant in the query to the stored value.
+ Thus the data in <structname>pg_statistic</structname> is potentially
+ passed to user-defined operators. An appropriately crafted operator can
+ intentionally leak the passed operands (for example, by logging them
+ or writing them to a different table), or accidentally leak them by showing
+ their values in error messages, in either case possibly exposing data from
+ <structname>pg_statistic</structname> to a user who should not be able to
+ see it.
+ </para>
+
+ <para>
+ In order to prevent this, the following applies to all built-in selectivity
+ estimation functions. When planning a query, in order to be able to use
+ stored statistics, the current user must either
+ have <literal>SELECT</literal> privilege on the table or the involved
+ columns, or the operator used must be <literal>LEAKPROOF</literal> (more
+ accurately, the function that the operator is based on). If not, then the
+ selectivity estimator will behave as if no statistics are available, and
+ the planner will proceed with default or fall-back assumptions.
+ </para>
+
+ <para>
+ If a user does not have the required privilege on the table or columns,
+ then in many cases the query will ultimately receive a permission-denied
+ error, in which case this mechanism is invisible in practice. But if the
+ user is reading from a security-barrier view, then the planner might wish
+ to check the statistics of an underlying table that is otherwise
+ inaccessible to the user. In that case, the operator should be leak-proof
+ or the statistics will not be used. There is no direct feedback about
+ that, except that the plan might be suboptimal. If one suspects that this
+ is the case, one could try running the query as a more privileged user,
+ to see if a different plan results.
+ </para>
+
+ <para>
+ This restriction applies only to cases where the planner would need to
+ execute a user-defined operator on one or more values
+ from <structname>pg_statistic</structname>. Thus the planner is permitted
+ to use generic statistical information, such as the fraction of null values
+ or the number of distinct values in a column, regardless of access
+ privileges.
+ </para>
+
+ <para>
+ Selectivity estimation functions contained in third-party extensions that
+ potentially operate on statistics with user-defined operators should follow
+ the same security rules. Consult the PostgreSQL source code for guidance.
+ </para>
+ </sect1>
</chapter>
useOr = !useOr;
/* Get array element stats for var, if available */
- if (HeapTupleIsValid(vardata.statsTuple))
+ if (HeapTupleIsValid(vardata.statsTuple) &&
+ statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
{
Form_pg_statistic stats;
Datum *values;
*/
array = DatumGetArrayTypeP(constval);
- if (HeapTupleIsValid(vardata->statsTuple))
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata, cmpfunc->fn_oid))
{
Form_pg_statistic stats;
Datum *values;
if (nnumbers != 1)
elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */
empty_frac = numbers[0];
+ free_attstatsslot(vardata->atttype, NULL, 0, numbers, nnumbers);
}
else
{
bool empty;
double hist_selec;
+ /* Can't use the histogram with insecure range support functions */
+ if (!statistic_proc_security_check(vardata,
+ typcache->rng_cmp_proc_finfo.fn_oid))
+ return -1;
+ if (OidIsValid(typcache->rng_subdiff_finfo.fn_oid) &&
+ !statistic_proc_security_check(vardata,
+ typcache->rng_subdiff_finfo.fn_oid))
+ return -1;
+
/* Try to get histogram of ranges */
if (!(HeapTupleIsValid(vardata->statsTuple) &&
get_attstatsslot(vardata->statsTuple,
NULL,
&length_hist_values, &length_nhist,
NULL, NULL)))
+ {
+ free_attstatsslot(vardata->atttype, hist_values, nhist, NULL, 0);
return -1.0;
+ }
/* check that it's a histogram, not just a dummy entry */
if (length_nhist < 2)
+ {
+ free_attstatsslot(vardata->atttype,
+ length_hist_values, length_nhist, NULL, 0);
+ free_attstatsslot(vardata->atttype, hist_values, nhist, NULL, 0);
return -1.0;
+ }
}
/* Extract the bounds of the constant value. */
break;
}
+ free_attstatsslot(vardata->atttype,
+ length_hist_values, length_nhist, NULL, 0);
+ free_attstatsslot(vardata->atttype, hist_values, nhist, NULL, 0);
+
return hist_selec;
}
#include "catalog/pg_type.h"
#include "executor/executor.h"
#include "mb/pg_wchar.h"
+#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parsetree.h"
+#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/date.h"
{
double selec;
bool isdefault;
+ Oid opfuncoid;
/*
* If the constant is NULL, assume operator is strict and return zero, ie,
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
return 1.0 / vardata->rel->tuples;
- if (HeapTupleIsValid(vardata->statsTuple))
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata,
+ (opfuncoid = get_opcode(operator))))
{
Form_pg_statistic stats;
Datum *values;
{
FmgrInfo eqproc;
- fmgr_info(get_opcode(operator), &eqproc);
+ fmgr_info(opfuncoid, &eqproc);
for (i = 0; i < nvalues; i++)
{
sumcommon = 0.0;
if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata, opproc->fn_oid) &&
get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_MCV, InvalidOid,
Assert(min_hist_size > 2 * n_skip);
if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata, opproc->fn_oid) &&
get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_HISTOGRAM, InvalidOid,
* the reverse way if isgt is TRUE.
*/
if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata, opproc->fn_oid) &&
get_attstatsslot(vardata->statsTuple,
vardata->atttype, vardata->atttypmod,
STATISTIC_KIND_HISTOGRAM, InvalidOid,
double nd2;
bool isdefault1;
bool isdefault2;
+ Oid opfuncoid;
Form_pg_statistic stats1 = NULL;
Form_pg_statistic stats2 = NULL;
bool have_mcvs1 = false;
nd1 = get_variable_numdistinct(vardata1, &isdefault1);
nd2 = get_variable_numdistinct(vardata2, &isdefault2);
+ opfuncoid = get_opcode(operator);
+
if (HeapTupleIsValid(vardata1->statsTuple))
{
+ /* note we allow use of nullfrac regardless of security check */
stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
- have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
- vardata1->atttype,
- vardata1->atttypmod,
- STATISTIC_KIND_MCV,
- InvalidOid,
- NULL,
- &values1, &nvalues1,
- &numbers1, &nnumbers1);
+ if (statistic_proc_security_check(vardata1, opfuncoid))
+ have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
+ vardata1->atttype,
+ vardata1->atttypmod,
+ STATISTIC_KIND_MCV,
+ InvalidOid,
+ NULL,
+ &values1, &nvalues1,
+ &numbers1, &nnumbers1);
}
if (HeapTupleIsValid(vardata2->statsTuple))
{
+ /* note we allow use of nullfrac regardless of security check */
stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
- have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
- vardata2->atttype,
- vardata2->atttypmod,
- STATISTIC_KIND_MCV,
- InvalidOid,
- NULL,
- &values2, &nvalues2,
- &numbers2, &nnumbers2);
+ if (statistic_proc_security_check(vardata2, opfuncoid))
+ have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
+ vardata2->atttype,
+ vardata2->atttypmod,
+ STATISTIC_KIND_MCV,
+ InvalidOid,
+ NULL,
+ &values2, &nvalues2,
+ &numbers2, &nnumbers2);
}
if (have_mcvs1 && have_mcvs2)
int i,
nmatches;
- fmgr_info(get_opcode(operator), &eqproc);
+ fmgr_info(opfuncoid, &eqproc);
hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
*
* (Also used for anti join, which we are supposed to estimate the same way.)
* Caller has ensured that vardata1 is the LHS variable.
+ * Unlike eqjoinsel_inner, we have to cope with operator being InvalidOid.
*/
static double
eqjoinsel_semi(Oid operator,
double nd2;
bool isdefault1;
bool isdefault2;
+ Oid opfuncoid;
Form_pg_statistic stats1 = NULL;
bool have_mcvs1 = false;
Datum *values1 = NULL;
nd1 = get_variable_numdistinct(vardata1, &isdefault1);
nd2 = get_variable_numdistinct(vardata2, &isdefault2);
+ opfuncoid = OidIsValid(operator) ? get_opcode(operator) : InvalidOid;
+
/*
* We clamp nd2 to be not more than what we estimate the inner relation's
* size to be. This is intuitively somewhat reasonable since obviously
if (HeapTupleIsValid(vardata1->statsTuple))
{
+ /* note we allow use of nullfrac regardless of security check */
stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
- have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
- vardata1->atttype,
- vardata1->atttypmod,
- STATISTIC_KIND_MCV,
- InvalidOid,
- NULL,
- &values1, &nvalues1,
- &numbers1, &nnumbers1);
+ if (statistic_proc_security_check(vardata1, opfuncoid))
+ have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
+ vardata1->atttype,
+ vardata1->atttypmod,
+ STATISTIC_KIND_MCV,
+ InvalidOid,
+ NULL,
+ &values1, &nvalues1,
+ &numbers1, &nnumbers1);
}
- if (HeapTupleIsValid(vardata2->statsTuple))
+ if (HeapTupleIsValid(vardata2->statsTuple) &&
+ statistic_proc_security_check(vardata2, opfuncoid))
{
have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
vardata2->atttype,
*/
clamped_nvalues2 = Min(nvalues2, nd2);
- fmgr_info(get_opcode(operator), &eqproc);
+ fmgr_info(opfuncoid, &eqproc);
hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
* this query. (Caution: this should be trusted for statistical
* purposes only, since we do not check indimmediate nor verify that
* the exact same definition of equality applies.)
+ * acl_ok: TRUE if current user has permission to read the column(s)
+ * underlying the pg_statistic entry. This is consulted by
+ * statistic_proc_security_check().
*
* Caller is responsible for doing ReleaseVariableStats() before exiting.
*/
Int16GetDatum(pos + 1),
BoolGetDatum(false));
vardata->freefunc = ReleaseSysCache;
+
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ /* Get index's table for permission check */
+ RangeTblEntry *rte;
+
+ rte = planner_rt_fetch(index->rel->relid, root);
+ Assert(rte->rtekind == RTE_RELATION);
+
+ /*
+ * For simplicity, we insist on the whole
+ * table being selectable, rather than trying
+ * to identify which column(s) the index
+ * depends on.
+ */
+ vardata->acl_ok =
+ (pg_class_aclcheck(rte->relid, GetUserId(),
+ ACL_SELECT) == ACLCHECK_OK);
+ }
+ else
+ {
+ /* suppress leakproofness checks later */
+ vardata->acl_ok = true;
+ }
}
if (vardata->statsTuple)
break;
Int16GetDatum(var->varattno),
BoolGetDatum(rte->inh));
vardata->freefunc = ReleaseSysCache;
+
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ /* check if user has permission to read this column */
+ vardata->acl_ok =
+ (pg_class_aclcheck(rte->relid, GetUserId(),
+ ACL_SELECT) == ACLCHECK_OK) ||
+ (pg_attribute_aclcheck(rte->relid, var->varattno, GetUserId(),
+ ACL_SELECT) == ACLCHECK_OK);
+ }
+ else
+ {
+ /* suppress any possible leakproofness checks later */
+ vardata->acl_ok = true;
+ }
}
else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
{
}
}
+/*
+ * Check whether it is permitted to call func_oid passing some of the
+ * pg_statistic data in vardata. We allow this either if the user has SELECT
+ * privileges on the table or column underlying the pg_statistic data or if
+ * the function is marked leak-proof.
+ */
+bool
+statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
+{
+ if (vardata->acl_ok)
+ return true;
+
+ if (!OidIsValid(func_oid))
+ return false;
+
+ if (get_func_leakproof(func_oid))
+ return true;
+
+ ereport(DEBUG2,
+ (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
+ get_func_name(func_oid))));
+ return false;
+}
+
/*
* get_variable_numdistinct
* Estimate the number of distinct values of a variable.
bool have_data = false;
int16 typLen;
bool typByVal;
+ Oid opfuncoid;
Datum *values;
int nvalues;
int i;
return false;
}
+ /*
+ * If we can't apply the sortop to the stats data, just fail. In
+ * principle, if there's a histogram and no MCVs, we could return the
+ * histogram endpoints without ever applying the sortop ... but it's
+ * probably not worth trying, because whatever the caller wants to do with
+ * the endpoints would likely fail the security check too.
+ */
+ if (!statistic_proc_security_check(vardata,
+ (opfuncoid = get_opcode(sortop))))
+ return false;
+
get_typlenbyval(vardata->atttype, &typLen, &typByVal);
/*
bool tmax_is_mcv = false;
FmgrInfo opproc;
- fmgr_info(get_opcode(sortop), &opproc);
+ fmgr_info(opfuncoid, &opproc);
for (i = 0; i < nvalues; i++)
{
Oid atttype; /* type to pass to get_attstatsslot */
int32 atttypmod; /* typmod to pass to get_attstatsslot */
bool isunique; /* matches unique index or DISTINCT clause */
+ bool acl_ok; /* result of ACL check on table or column */
} VariableStatData;
#define ReleaseVariableStats(vardata) \
extern void examine_variable(PlannerInfo *root, Node *node, int varRelid,
VariableStatData *vardata);
+extern bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid);
extern bool get_restriction_variable(PlannerInfo *root, List *args,
int varRelid,
VariableStatData *vardata, Node **other,
1 | two
(2 rows)
+-- test leaky-function protections in selfuncs
+-- regress_user1 will own a table and provide a view for it.
+SET SESSION AUTHORIZATION regress_user1;
+CREATE TABLE atest12 as
+ SELECT x AS a, 10001 - x AS b FROM generate_series(1,10000) x;
+CREATE INDEX ON atest12 (a);
+CREATE INDEX ON atest12 (abs(a));
+VACUUM ANALYZE atest12;
+CREATE FUNCTION leak(integer,integer) RETURNS boolean
+ AS $$begin return $1 < $2; end$$
+ LANGUAGE plpgsql immutable;
+CREATE OPERATOR <<< (procedure = leak, leftarg = integer, rightarg = integer,
+ restrict = scalarltsel);
+-- view with leaky operator
+CREATE VIEW atest12v AS
+ SELECT * FROM atest12 WHERE b <<< 5;
+GRANT SELECT ON atest12v TO PUBLIC;
+-- This plan should use nestloop, knowing that few rows will be selected.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop
+ -> Seq Scan on atest12 atest12_1
+ Filter: (b <<< 5)
+ -> Index Scan using atest12_a_idx on atest12
+ Index Cond: (a = atest12_1.b)
+ Filter: (b <<< 5)
+(6 rows)
+
+-- And this one.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12 x, atest12 y
+ WHERE x.a = y.b and abs(y.a) <<< 5;
+ QUERY PLAN
+---------------------------------------------------
+ Nested Loop
+ -> Seq Scan on atest12 y
+ Filter: (abs(a) <<< 5)
+ -> Index Scan using atest12_a_idx on atest12 x
+ Index Cond: (a = y.b)
+(5 rows)
+
+-- Check if regress_user2 can break security.
+SET SESSION AUTHORIZATION regress_user2;
+CREATE FUNCTION leak2(integer,integer) RETURNS boolean
+ AS $$begin raise notice 'leak % %', $1, $2; return $1 > $2; end$$
+ LANGUAGE plpgsql immutable;
+CREATE OPERATOR >>> (procedure = leak2, leftarg = integer, rightarg = integer,
+ restrict = scalargtsel);
+-- This should not show any "leak" notices before failing.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12 WHERE a >>> 0;
+ERROR: permission denied for relation atest12
+-- This plan should use hashjoin, as it will expect many rows to be selected.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
+ QUERY PLAN
+-------------------------------------------
+ Hash Join
+ Hash Cond: (atest12.a = atest12_1.b)
+ -> Seq Scan on atest12
+ Filter: (b <<< 5)
+ -> Hash
+ -> Seq Scan on atest12 atest12_1
+ Filter: (b <<< 5)
+(7 rows)
+
+-- Now regress_user1 grants sufficient access to regress_user2.
+SET SESSION AUTHORIZATION regress_user1;
+GRANT SELECT (a, b) ON atest12 TO PUBLIC;
+SET SESSION AUTHORIZATION regress_user2;
+-- Now regress_user2 will also get a good row estimate.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
+ QUERY PLAN
+-------------------------------------------------
+ Nested Loop
+ -> Seq Scan on atest12 atest12_1
+ Filter: (b <<< 5)
+ -> Index Scan using atest12_a_idx on atest12
+ Index Cond: (a = atest12_1.b)
+ Filter: (b <<< 5)
+(6 rows)
+
+-- But not for this, due to lack of table-wide permissions needed
+-- to make use of the expression index's statistics.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12 x, atest12 y
+ WHERE x.a = y.b and abs(y.a) <<< 5;
+ QUERY PLAN
+--------------------------------------
+ Hash Join
+ Hash Cond: (x.a = y.b)
+ -> Seq Scan on atest12 x
+ -> Hash
+ -> Seq Scan on atest12 y
+ Filter: (abs(a) <<< 5)
+(6 rows)
+
+-- clean up (regress_user1's objects are all dropped later)
+DROP FUNCTION leak2(integer, integer) CASCADE;
+NOTICE: drop cascades to operator >>>(integer,integer)
-- groups
SET SESSION AUTHORIZATION regress_user3;
CREATE TABLE atest3 (one int, two int, three int);
SELECT * FROM atest1; -- ok
+-- test leaky-function protections in selfuncs
+
+-- regress_user1 will own a table and provide a view for it.
+SET SESSION AUTHORIZATION regress_user1;
+
+CREATE TABLE atest12 as
+ SELECT x AS a, 10001 - x AS b FROM generate_series(1,10000) x;
+CREATE INDEX ON atest12 (a);
+CREATE INDEX ON atest12 (abs(a));
+VACUUM ANALYZE atest12;
+
+CREATE FUNCTION leak(integer,integer) RETURNS boolean
+ AS $$begin return $1 < $2; end$$
+ LANGUAGE plpgsql immutable;
+CREATE OPERATOR <<< (procedure = leak, leftarg = integer, rightarg = integer,
+ restrict = scalarltsel);
+
+-- view with leaky operator
+CREATE VIEW atest12v AS
+ SELECT * FROM atest12 WHERE b <<< 5;
+GRANT SELECT ON atest12v TO PUBLIC;
+
+-- This plan should use nestloop, knowing that few rows will be selected.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
+
+-- And this one.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12 x, atest12 y
+ WHERE x.a = y.b and abs(y.a) <<< 5;
+
+-- Check if regress_user2 can break security.
+SET SESSION AUTHORIZATION regress_user2;
+
+CREATE FUNCTION leak2(integer,integer) RETURNS boolean
+ AS $$begin raise notice 'leak % %', $1, $2; return $1 > $2; end$$
+ LANGUAGE plpgsql immutable;
+CREATE OPERATOR >>> (procedure = leak2, leftarg = integer, rightarg = integer,
+ restrict = scalargtsel);
+
+-- This should not show any "leak" notices before failing.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12 WHERE a >>> 0;
+
+-- This plan should use hashjoin, as it will expect many rows to be selected.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
+
+-- Now regress_user1 grants sufficient access to regress_user2.
+SET SESSION AUTHORIZATION regress_user1;
+GRANT SELECT (a, b) ON atest12 TO PUBLIC;
+SET SESSION AUTHORIZATION regress_user2;
+
+-- Now regress_user2 will also get a good row estimate.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
+
+-- But not for this, due to lack of table-wide permissions needed
+-- to make use of the expression index's statistics.
+EXPLAIN (COSTS OFF) SELECT * FROM atest12 x, atest12 y
+ WHERE x.a = y.b and abs(y.a) <<< 5;
+
+-- clean up (regress_user1's objects are all dropped later)
+DROP FUNCTION leak2(integer, integer) CASCADE;
+
+
-- groups
SET SESSION AUTHORIZATION regress_user3;