Commit e2d4ef8d authored by Peter Eisentraut's avatar Peter Eisentraut

Add security checks to selectivity estimation functions

Some selectivity estimation functions run user-supplied operators over
data obtained from pg_statistic without security checks, which allows
those operators to leak pg_statistic data without having privileges on
the underlying tables.  Fix by checking that one of the following is
satisfied: (1) the user has table or column privileges on the table
underlying the pg_statistic data, or (2) the function implementing the
user-supplied operator is leak-proof.  If neither is satisfied, planning
will proceed as if there are no statistics available.

At least one of these is satisfied in most cases in practice.  The only
situations that are negatively impacted are user-defined or
not-leak-proof operators on a security-barrier view.
Reported-by: default avatarRobert Haas <robertmhaas@gmail.com>
Author: Peter Eisentraut <peter_e@gmx.net>
Author: Tom Lane <tgl@sss.pgh.pa.us>

Security: CVE-2017-7484
parent eb61136d
......@@ -582,4 +582,65 @@ EXPLAIN (ANALYZE, TIMING OFF) SELECT COUNT(*) FROM t GROUP BY a, b;
</sect2>
</sect1>
<sect1 id="planner-stats-security">
<title>Planner Statistics and Security</title>
<para>
Access to the table <structname>pg_statistic</structname> is restricted to
superusers, so that ordinary users cannot learn about the contents of the
tables of other users from it. Some selectivity estimation functions will
use a user-provided operator (either the operator appearing in the query or
a related operator) to analyze the stored statistics. For example, in order
to determine whether a stored most common value is applicable, the
selectivity estimator will have to run the appropriate <literal>=</literal>
operator to compare the constant in the query to the stored value.
Thus the data in <structname>pg_statistic</structname> is potentially
passed to user-defined operators. An appropriately crafted operator can
intentionally leak the passed operands (for example, by logging them
or writing them to a different table), or accidentally leak them by showing
their values in error messages, in either case possibly exposing data from
<structname>pg_statistic</structname> to a user who should not be able to
see it.
</para>
<para>
In order to prevent this, the following applies to all built-in selectivity
estimation functions. When planning a query, in order to be able to use
stored statistics, the current user must either
have <literal>SELECT</literal> privilege on the table or the involved
columns, or the operator used must be <literal>LEAKPROOF</literal> (more
accurately, the function that the operator is based on). If not, then the
selectivity estimator will behave as if no statistics are available, and
the planner will proceed with default or fall-back assumptions.
</para>
<para>
If a user does not have the required privilege on the table or columns,
then in many cases the query will ultimately receive a permission-denied
error, in which case this mechanism is invisible in practice. But if the
user is reading from a security-barrier view, then the planner might wish
to check the statistics of an underlying table that is otherwise
inaccessible to the user. In that case, the operator should be leak-proof
or the statistics will not be used. There is no direct feedback about
that, except that the plan might be suboptimal. If one suspects that this
is the case, one could try running the query as a more privileged user,
to see if a different plan results.
</para>
<para>
This restriction applies only to cases where the planner would need to
execute a user-defined operator on one or more values
from <structname>pg_statistic</structname>. Thus the planner is permitted
to use generic statistical information, such as the fraction of null values
or the number of distinct values in a column, regardless of access
privileges.
</para>
<para>
Selectivity estimation functions contained in third-party extensions that
potentially operate on statistics with user-defined operators should follow
the same security rules. Consult the PostgreSQL source code for guidance.
</para>
</sect1>
</chapter>
......@@ -133,7 +133,8 @@ scalararraysel_containment(PlannerInfo *root,
useOr = !useOr;
/* Get array element stats for var, if available */
if (HeapTupleIsValid(vardata.statsTuple))
if (HeapTupleIsValid(vardata.statsTuple) &&
statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
{
Form_pg_statistic stats;
Datum *values;
......@@ -364,7 +365,8 @@ calc_arraycontsel(VariableStatData *vardata, Datum constval,
*/
array = DatumGetArrayTypeP(constval);
if (HeapTupleIsValid(vardata->statsTuple))
if (HeapTupleIsValid(vardata->statsTuple) &&
statistic_proc_security_check(vardata, cmpfunc->fn_oid))
{
Form_pg_statistic stats;
Datum *values;
......
......@@ -255,6 +255,7 @@ calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
if (nnumbers != 1)
elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */
empty_frac = numbers[0];
free_attstatsslot(vardata->atttype, NULL, 0, numbers, nnumbers);
}
else
{
......@@ -383,6 +384,15 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
bool empty;
double hist_selec;
/* Can't use the histogram with insecure range support functions */
if (!statistic_proc_security_check(vardata,
typcache->rng_cmp_proc_finfo.fn_oid))
return -1;
if (OidIsValid(typcache->rng_subdiff_finfo.fn_oid) &&
!statistic_proc_security_check(vardata,
typcache->rng_subdiff_finfo.fn_oid))
return -1;
/* Try to get histogram of ranges */
if (!(HeapTupleIsValid(vardata->statsTuple) &&
get_attstatsslot(vardata->statsTuple,
......@@ -420,11 +430,19 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
NULL,
&length_hist_values, &length_nhist,
NULL, NULL)))
{
free_attstatsslot(vardata->atttype, hist_values, nhist, NULL, 0);
return -1.0;
}
/* check that it's a histogram, not just a dummy entry */
if (length_nhist < 2)
{
free_attstatsslot(vardata->atttype,
length_hist_values, length_nhist, NULL, 0);
free_attstatsslot(vardata->atttype, hist_values, nhist, NULL, 0);
return -1.0;
}
}
/* Extract the bounds of the constant value. */
......@@ -560,6 +578,10 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
break;
}
free_attstatsslot(vardata->atttype,
length_hist_values, length_nhist, NULL, 0);
free_attstatsslot(vardata->atttype, hist_values, nhist, NULL, 0);
return hist_selec;
}
......
This diff is collapsed.
......@@ -75,6 +75,7 @@ typedef struct VariableStatData
Oid atttype; /* type to pass to get_attstatsslot */
int32 atttypmod; /* typmod to pass to get_attstatsslot */
bool isunique; /* matches unique index or DISTINCT clause */
bool acl_ok; /* result of ACL check on table or column */
} VariableStatData;
#define ReleaseVariableStats(vardata) \
......@@ -153,6 +154,7 @@ extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook;
extern void examine_variable(PlannerInfo *root, Node *node, int varRelid,
VariableStatData *vardata);
extern bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid);
extern bool get_restriction_variable(PlannerInfo *root, List *args,
int varRelid,
VariableStatData *vardata, Node **other,
......
......@@ -184,6 +184,103 @@ SELECT * FROM atest1; -- ok
1 | two
(2 rows)
-- test leaky-function protections in selfuncs
-- regress_user1 will own a table and provide a view for it.
SET SESSION AUTHORIZATION regress_user1;
CREATE TABLE atest12 as
SELECT x AS a, 10001 - x AS b FROM generate_series(1,10000) x;
CREATE INDEX ON atest12 (a);
CREATE INDEX ON atest12 (abs(a));
VACUUM ANALYZE atest12;
CREATE FUNCTION leak(integer,integer) RETURNS boolean
AS $$begin return $1 < $2; end$$
LANGUAGE plpgsql immutable;
CREATE OPERATOR <<< (procedure = leak, leftarg = integer, rightarg = integer,
restrict = scalarltsel);
-- view with leaky operator
CREATE VIEW atest12v AS
SELECT * FROM atest12 WHERE b <<< 5;
GRANT SELECT ON atest12v TO PUBLIC;
-- This plan should use nestloop, knowing that few rows will be selected.
EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
QUERY PLAN
-------------------------------------------------
Nested Loop
-> Seq Scan on atest12 atest12_1
Filter: (b <<< 5)
-> Index Scan using atest12_a_idx on atest12
Index Cond: (a = atest12_1.b)
Filter: (b <<< 5)
(6 rows)
-- And this one.
EXPLAIN (COSTS OFF) SELECT * FROM atest12 x, atest12 y
WHERE x.a = y.b and abs(y.a) <<< 5;
QUERY PLAN
---------------------------------------------------
Nested Loop
-> Seq Scan on atest12 y
Filter: (abs(a) <<< 5)
-> Index Scan using atest12_a_idx on atest12 x
Index Cond: (a = y.b)
(5 rows)
-- Check if regress_user2 can break security.
SET SESSION AUTHORIZATION regress_user2;
CREATE FUNCTION leak2(integer,integer) RETURNS boolean
AS $$begin raise notice 'leak % %', $1, $2; return $1 > $2; end$$
LANGUAGE plpgsql immutable;
CREATE OPERATOR >>> (procedure = leak2, leftarg = integer, rightarg = integer,
restrict = scalargtsel);
-- This should not show any "leak" notices before failing.
EXPLAIN (COSTS OFF) SELECT * FROM atest12 WHERE a >>> 0;
ERROR: permission denied for relation atest12
-- This plan should use hashjoin, as it will expect many rows to be selected.
EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
QUERY PLAN
-------------------------------------------
Hash Join
Hash Cond: (atest12.a = atest12_1.b)
-> Seq Scan on atest12
Filter: (b <<< 5)
-> Hash
-> Seq Scan on atest12 atest12_1
Filter: (b <<< 5)
(7 rows)
-- Now regress_user1 grants sufficient access to regress_user2.
SET SESSION AUTHORIZATION regress_user1;
GRANT SELECT (a, b) ON atest12 TO PUBLIC;
SET SESSION AUTHORIZATION regress_user2;
-- Now regress_user2 will also get a good row estimate.
EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
QUERY PLAN
-------------------------------------------------
Nested Loop
-> Seq Scan on atest12 atest12_1
Filter: (b <<< 5)
-> Index Scan using atest12_a_idx on atest12
Index Cond: (a = atest12_1.b)
Filter: (b <<< 5)
(6 rows)
-- But not for this, due to lack of table-wide permissions needed
-- to make use of the expression index's statistics.
EXPLAIN (COSTS OFF) SELECT * FROM atest12 x, atest12 y
WHERE x.a = y.b and abs(y.a) <<< 5;
QUERY PLAN
--------------------------------------
Hash Join
Hash Cond: (x.a = y.b)
-> Seq Scan on atest12 x
-> Hash
-> Seq Scan on atest12 y
Filter: (abs(a) <<< 5)
(6 rows)
-- clean up (regress_user1's objects are all dropped later)
DROP FUNCTION leak2(integer, integer) CASCADE;
NOTICE: drop cascades to operator >>>(integer,integer)
-- groups
SET SESSION AUTHORIZATION regress_user3;
CREATE TABLE atest3 (one int, two int, three int);
......
......@@ -127,6 +127,67 @@ bar true
SELECT * FROM atest1; -- ok
-- test leaky-function protections in selfuncs
-- regress_user1 will own a table and provide a view for it.
SET SESSION AUTHORIZATION regress_user1;
CREATE TABLE atest12 as
SELECT x AS a, 10001 - x AS b FROM generate_series(1,10000) x;
CREATE INDEX ON atest12 (a);
CREATE INDEX ON atest12 (abs(a));
VACUUM ANALYZE atest12;
CREATE FUNCTION leak(integer,integer) RETURNS boolean
AS $$begin return $1 < $2; end$$
LANGUAGE plpgsql immutable;
CREATE OPERATOR <<< (procedure = leak, leftarg = integer, rightarg = integer,
restrict = scalarltsel);
-- view with leaky operator
CREATE VIEW atest12v AS
SELECT * FROM atest12 WHERE b <<< 5;
GRANT SELECT ON atest12v TO PUBLIC;
-- This plan should use nestloop, knowing that few rows will be selected.
EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
-- And this one.
EXPLAIN (COSTS OFF) SELECT * FROM atest12 x, atest12 y
WHERE x.a = y.b and abs(y.a) <<< 5;
-- Check if regress_user2 can break security.
SET SESSION AUTHORIZATION regress_user2;
CREATE FUNCTION leak2(integer,integer) RETURNS boolean
AS $$begin raise notice 'leak % %', $1, $2; return $1 > $2; end$$
LANGUAGE plpgsql immutable;
CREATE OPERATOR >>> (procedure = leak2, leftarg = integer, rightarg = integer,
restrict = scalargtsel);
-- This should not show any "leak" notices before failing.
EXPLAIN (COSTS OFF) SELECT * FROM atest12 WHERE a >>> 0;
-- This plan should use hashjoin, as it will expect many rows to be selected.
EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
-- Now regress_user1 grants sufficient access to regress_user2.
SET SESSION AUTHORIZATION regress_user1;
GRANT SELECT (a, b) ON atest12 TO PUBLIC;
SET SESSION AUTHORIZATION regress_user2;
-- Now regress_user2 will also get a good row estimate.
EXPLAIN (COSTS OFF) SELECT * FROM atest12v x, atest12v y WHERE x.a = y.b;
-- But not for this, due to lack of table-wide permissions needed
-- to make use of the expression index's statistics.
EXPLAIN (COSTS OFF) SELECT * FROM atest12 x, atest12 y
WHERE x.a = y.b and abs(y.a) <<< 5;
-- clean up (regress_user1's objects are all dropped later)
DROP FUNCTION leak2(integer, integer) CASCADE;
-- groups
SET SESSION AUTHORIZATION regress_user3;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment