]> granicus.if.org Git - postgresql/commitdiff
Add pg_stats_ext view for extended statistics
authorTomas Vondra <tomas.vondra@postgresql.org>
Thu, 13 Jun 2019 15:25:04 +0000 (17:25 +0200)
committerTomas Vondra <tomas.vondra@postgresql.org>
Sat, 15 Jun 2019 23:20:39 +0000 (01:20 +0200)
Regular per-column statistics are stored in pg_statistics catalog, which
is however rather difficult to read, so we also have pg_stats view with
a human-reablable version of the data.

For extended statistic the catalog was fairly easy to read, so we did
not have such human-readable view so far.  Commit 9b6babfa2d however did
split the catalog into two, which makes querying harder.  Furthermore,
we want to show the multi-column MCV list in a way similar to per-column
stats (and not as a bytea value).

This commit introduces pg_stats_ext view, joining the two catalogs and
massaging the data to produce human-readable output similar to pg_stats.
It also considers RLS and access privileges - the data is shown only when
the user has access to all columns the extended statistic is defined on.

Bumped CATVERSION due to adding new system view.

Author: Dean Rasheed, with improvements by me
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAEZATCUhT9rt7Ui%3DVdx4N%3D%3DVV5XOK5dsXfnGgVOz_JhAicB%3DZA%40mail.gmail.com

doc/src/sgml/catalogs.sgml
src/backend/catalog/system_views.sql
src/include/catalog/catversion.h
src/test/regress/expected/rules.out

index ef4345524ad4e601d1f8eedf9efed6cb9287b0cc..1300c7bbaa31857696c19f5c6dce5f10d4e6b19c 100644 (file)
@@ -6373,6 +6373,28 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
    about those tables that are readable by the current user.
   </para>
 
+  <para>
+   <structname>pg_statistic</structname> should not be readable by the
+   public, since even statistical information about a table's contents
+   might be considered sensitive.  (Example: minimum and maximum values
+   of a salary column might be quite interesting.)
+   <link linkend="view-pg-stats"><structname>pg_stats</structname></link>
+   is a publicly readable view on
+   <structname>pg_statistic</structname> that only exposes information
+   about those tables that are readable by the current user.
+  </para>
+
+  <para>
+   Similarly, <structname>pg_statistic_ext_data</structname> should not be
+   readable by the public, since the contents might be considered sensitive.
+   (Example: most common combination of values in columns might be quite
+   interesting.)
+   <link linkend="view-pg-stats-ext"><structname>pg_stats_ext</structname></link>
+   is a publicly readable view on <structname>pg_statistic_ext_data</structname>
+   (after joining with <structname>pg_statistic_ext</structname>) that only exposes
+   information about those tables and columns that are readable by the current user.
+  </para>
+
   <table>
    <title><structname>pg_statistic</structname> Columns</title>
 
@@ -8343,6 +8365,11 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
       <entry>planner statistics</entry>
      </row>
 
+     <row>
+      <entry><link linkend="view-pg-stats-ext"><structname>pg_stats_ext</structname></link></entry>
+      <entry>extended planner statistics</entry>
+     </row>
+
      <row>
       <entry><link linkend="view-pg-tables"><structname>pg_tables</structname></link></entry>
       <entry>tables</entry>
@@ -10922,6 +10949,171 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
 
  </sect1>
 
+ <sect1 id="view-pg-stats-ext">
+  <title><structname>pg_stats_ext</structname></title>
+
+  <indexterm zone="view-pg-stats-ext">
+   <primary>pg_stats_ext</primary>
+  </indexterm>
+
+  <para>
+   The view <structname>pg_stats_ext</structname> provides access to
+   the information stored in the <link
+   linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link>
+   and <link linkend="catalog-pg-statistic-ext-data"><structname>pg_statistic_ext_data</structname></link>
+   catalogs.  This view allows access only to rows of
+   <structname>pg_statistic_ext</structname> and <structname>pg_statistic_ext_data</structname>
+   that correspond to tables the user has permission to read, and therefore
+   it is safe to allow public read access to this view.
+  </para>
+
+  <para>
+   <structname>pg_stats_ext</structname> is also designed to present the
+   information in a more readable format than the underlying catalog
+   &mdash; at the cost that its schema must be extended whenever new types
+   of extended statistics are added to <structname>pg_statistic_ext</structname>.
+  </para>
+
+  <table>
+   <title><structname>pg_stats_ext</structname> Columns</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+    <tbody>
+     <row>
+      <entry><structfield>schemaname</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry><literal><link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.nspname</literal></entry>
+      <entry>Name of schema containing table</entry>
+     </row>
+
+     <row>
+      <entry><structfield>tablename</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.relname</literal></entry>
+      <entry>Name of table</entry>
+     </row>
+
+     <row>
+      <entry><structfield>statistics_schemaname</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry><literal><link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.nspname</literal></entry>
+      <entry>Name of schema containing extended statistic</entry>
+     </row>
+
+     <row>
+      <entry><structfield>statistics_name</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry><literal><link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link>.stxname</literal></entry>
+      <entry>Name of extended statistics</entry>
+     </row>
+
+     <row>
+      <entry><structfield>statistics_owner</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.oid</literal></entry>
+      <entry>Owner of the extended statistics</entry>
+     </row>
+
+     <row>
+      <entry><structfield>attnames</structfield></entry>
+      <entry><type>name[]</type></entry>
+      <entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attname</literal></entry>
+      <entry>Names of the column the extended statistics is defined on</entry>
+     </row>
+
+     <row>
+      <entry><structfield>kinds</structfield></entry>
+      <entry><type>text[]</type></entry>
+      <entry></entry>
+      <entry>Types of exdended statistics enabled for this record</entry>
+     </row>
+
+     <row>
+      <entry><structfield>n_distinct</structfield></entry>
+      <entry><type>pg_ndistinct</type></entry>
+      <entry></entry>
+      <entry>N-distinct counts for combinations of columns. If greater than
+       zero, the estimated number of distinct values in the combination.  If
+       less than zero, the negative of the number of distinct values divided
+       by the number of rows.
+       (The negated form is used when <command>ANALYZE</command> believes that
+       the number of distinct values is likely to increase as the table grows;
+       the positive form is used when the column seems to have a fixed number
+       of possible values.)  For example, -1 indicates a unique combination of
+       columns in which the number of distinct combinations is the same as the
+       number of rows.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>dependencies</structfield></entry>
+      <entry><type>pg_dependencies</type></entry>
+      <entry></entry>
+      <entry>Functional dependency statistics</entry>
+     </row>
+
+     <row>
+      <entry><structfield>most_common_vals</structfield></entry>
+      <entry><type>anyarray</type></entry>
+      <entry></entry>
+      <entry>
+       A list of the most common combinations in the columns. (Null if
+       no values seem to be more common than any others.)
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>most_common_val_nulls</structfield></entry>
+      <entry><type>anyarray</type></entry>
+      <entry></entry>
+      <entry>
+       A list of NULL flags for the most common combinations of values.
+       (Null when <structfield>most_common_vals</structfield> is.)
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>most_common_freqs</structfield></entry>
+      <entry><type>real[]</type></entry>
+      <entry></entry>
+      <entry>
+       A list of the frequencies of the most common combinations,
+       i.e., number of occurrences of each divided by total number of rows.
+       (Null when <structfield>most_common_vals</structfield> is.)
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>most_common_base_freqs</structfield></entry>
+      <entry><type>real[]</type></entry>
+      <entry></entry>
+      <entry>
+       A list of the base frequencies of the most common combinations,
+       i.e., product of per-value frequencies.
+       (Null when <structfield>most_common_vals</structfield> is.)
+      </entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+  <para>
+   The maximum number of entries in the array fields can be controlled on a
+   column-by-column basis using the <command>ALTER TABLE SET STATISTICS</command>
+   command, or globally by setting the
+   <xref linkend="guc-default-statistics-target"/> run-time parameter.
+  </para>
+
+ </sect1>
+
  <sect1 id="view-pg-tables">
   <title><structname>pg_tables</structname></title>
 
index 78a103cdb95a426af9db4bf3aa271ae86cc5c477..c8898901181fb20625265d1c298a643c0d1f2518 100644 (file)
@@ -253,6 +253,47 @@ CREATE VIEW pg_stats WITH (security_barrier) AS
 
 REVOKE ALL on pg_statistic FROM public;
 
+CREATE VIEW pg_stats_ext WITH (security_barrier) AS
+    SELECT cn.nspname AS schemaname,
+           c.relname AS tablename,
+           sn.nspname AS statistics_schemaname,
+           s.stxname AS statistics_name,
+           pg_get_userbyid(s.stxowner) AS statistics_owner,
+           ( SELECT array_agg(a.attname ORDER BY a.attnum)
+             FROM unnest(s.stxkeys) k
+                  JOIN pg_attribute a
+                       ON (a.attrelid = s.stxrelid AND a.attnum = k)
+           ) AS attnames,
+           s.stxkind AS kinds,
+           sd.stxdndistinct AS n_distinct,
+           sd.stxddependencies AS dependencies,
+           m.most_common_vals,
+           m.most_common_val_nulls,
+           m.most_common_freqs,
+           m.most_common_base_freqs
+    FROM pg_statistic_ext s JOIN pg_class c ON (c.oid = s.stxrelid)
+         JOIN pg_statistic_ext_data sd ON (s.oid = sd.stxoid)
+         LEFT JOIN pg_namespace cn ON (cn.oid = c.relnamespace)
+         LEFT JOIN pg_namespace sn ON (sn.oid = s.stxnamespace)
+         LEFT JOIN LATERAL
+                   ( SELECT array_agg(values) AS most_common_vals,
+                            array_agg(nulls) AS most_common_val_nulls,
+                            array_agg(frequency) AS most_common_freqs,
+                            array_agg(base_frequency) AS most_common_base_freqs
+                     FROM pg_mcv_list_items(sd.stxdmcv)
+                   ) m ON sd.stxdmcv IS NOT NULL
+    WHERE NOT EXISTS
+              ( SELECT 1
+                FROM unnest(stxkeys) k
+                     JOIN pg_attribute a
+                          ON (a.attrelid = s.stxrelid AND a.attnum = k)
+                WHERE NOT has_column_privilege(c.oid, a.attnum, 'select') )
+    AND (c.relrowsecurity = false OR NOT row_security_active(c.oid));
+
+REVOKE ALL on pg_statistic_ext FROM public;
+GRANT SELECT (oid, stxrelid, stxname, stxnamespace, stxowner, stxkeys, stxkind)
+    ON pg_statistic_ext TO public;
+
 CREATE VIEW pg_publication_tables AS
     SELECT
         P.pubname AS pubname,
index e034506751da87b01852d4aac6083290144fe1ee..ed20a4faaffe680d21c1b1c97bbd190c2e3f253d 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201906151
+#define CATALOG_VERSION_NO     201906152
 
 #endif
index 7d365c48d1298de1cc8699d6cbf6b2313100ec90..210e9cd146cf3a9b9d54fb9b06abbbd365356b5b 100644 (file)
@@ -2284,6 +2284,35 @@ pg_stats| SELECT n.nspname AS schemaname,
      JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum))))
      LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
   WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid))));
+pg_stats_ext| SELECT cn.nspname AS schemaname,
+    c.relname AS tablename,
+    sn.nspname AS statistics_schemaname,
+    s.stxname AS statistics_name,
+    pg_get_userbyid(s.stxowner) AS statistics_owner,
+    ( SELECT array_agg(a.attname ORDER BY a.attnum) AS array_agg
+           FROM (unnest(s.stxkeys) k(k)
+             JOIN pg_attribute a ON (((a.attrelid = s.stxrelid) AND (a.attnum = k.k))))) AS attnames,
+    s.stxkind AS kinds,
+    sd.stxdndistinct AS n_distinct,
+    sd.stxddependencies AS dependencies,
+    m.most_common_vals,
+    m.most_common_val_nulls,
+    m.most_common_freqs,
+    m.most_common_base_freqs
+   FROM (((((pg_statistic_ext s
+     JOIN pg_class c ON ((c.oid = s.stxrelid)))
+     JOIN pg_statistic_ext_data sd ON ((s.oid = sd.stxoid)))
+     LEFT JOIN pg_namespace cn ON ((cn.oid = c.relnamespace)))
+     LEFT JOIN pg_namespace sn ON ((sn.oid = s.stxnamespace)))
+     LEFT JOIN LATERAL ( SELECT array_agg(pg_mcv_list_items."values") AS most_common_vals,
+            array_agg(pg_mcv_list_items.nulls) AS most_common_val_nulls,
+            array_agg(pg_mcv_list_items.frequency) AS most_common_freqs,
+            array_agg(pg_mcv_list_items.base_frequency) AS most_common_base_freqs
+           FROM pg_mcv_list_items(sd.stxdmcv) pg_mcv_list_items(index, "values", nulls, frequency, base_frequency)) m ON ((sd.stxdmcv IS NOT NULL)))
+  WHERE ((NOT (EXISTS ( SELECT 1
+           FROM (unnest(s.stxkeys) k(k)
+             JOIN pg_attribute a ON (((a.attrelid = s.stxrelid) AND (a.attnum = k.k))))
+          WHERE (NOT has_column_privilege(c.oid, a.attnum, 'select'::text))))) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid))));
 pg_tables| SELECT n.nspname AS schemaname,
     c.relname AS tablename,
     pg_get_userbyid(c.relowner) AS tableowner,