]> granicus.if.org Git - postgresql/commitdiff
Implement multivariate n-distinct coefficients
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Fri, 24 Mar 2017 17:06:10 +0000 (14:06 -0300)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Fri, 24 Mar 2017 17:06:10 +0000 (14:06 -0300)
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns.  Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too.  All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.

This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table.  This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve.  A new
special pseudo-type pg_ndistinct is used.

(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)

Author: Tomas Vondra.  Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
    Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
    https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql

77 files changed:
doc/src/sgml/catalogs.sgml
doc/src/sgml/func.sgml
doc/src/sgml/ref/allfiles.sgml
doc/src/sgml/ref/alter_statistics.sgml [new file with mode: 0644]
doc/src/sgml/ref/alter_table.sgml
doc/src/sgml/ref/comment.sgml
doc/src/sgml/ref/create_statistics.sgml [new file with mode: 0644]
doc/src/sgml/ref/drop_statistics.sgml [new file with mode: 0644]
doc/src/sgml/reference.sgml
src/backend/Makefile
src/backend/catalog/Makefile
src/backend/catalog/aclchk.c
src/backend/catalog/dependency.c
src/backend/catalog/heap.c
src/backend/catalog/namespace.c
src/backend/catalog/objectaddress.c
src/backend/catalog/pg_shdepend.c
src/backend/catalog/system_views.sql
src/backend/commands/Makefile
src/backend/commands/alter.c
src/backend/commands/analyze.c
src/backend/commands/dropcmds.c
src/backend/commands/event_trigger.c
src/backend/commands/statscmds.c [new file with mode: 0644]
src/backend/nodes/copyfuncs.c
src/backend/nodes/equalfuncs.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/util/plancat.c
src/backend/parser/gram.y
src/backend/statistics/Makefile [new file with mode: 0644]
src/backend/statistics/README [new file with mode: 0644]
src/backend/statistics/extended_stats.c [new file with mode: 0644]
src/backend/statistics/mvdistinct.c [new file with mode: 0644]
src/backend/tcop/utility.c
src/backend/utils/adt/ruleutils.c
src/backend/utils/adt/selfuncs.c
src/backend/utils/cache/relcache.c
src/backend/utils/cache/syscache.c
src/bin/pg_dump/common.c
src/bin/pg_dump/pg_backup_archiver.c
src/bin/pg_dump/pg_dump.c
src/bin/pg_dump/pg_dump.h
src/bin/pg_dump/pg_dump_sort.c
src/bin/psql/describe.c
src/include/catalog/catversion.h
src/include/catalog/dependency.h
src/include/catalog/heap.h
src/include/catalog/indexing.h
src/include/catalog/namespace.h
src/include/catalog/pg_cast.h
src/include/catalog/pg_proc.h
src/include/catalog/pg_statistic_ext.h [new file with mode: 0644]
src/include/catalog/pg_type.h
src/include/catalog/toasting.h
src/include/commands/defrem.h
src/include/nodes/nodes.h
src/include/nodes/parsenodes.h
src/include/nodes/relation.h
src/include/statistics/extended_stats_internal.h [new file with mode: 0644]
src/include/statistics/statistics.h [new file with mode: 0644]
src/include/utils/acl.h
src/include/utils/rel.h
src/include/utils/relcache.h
src/include/utils/syscache.h
src/test/regress/expected/alter_generic.out
src/test/regress/expected/object_address.out
src/test/regress/expected/opr_sanity.out
src/test/regress/expected/rules.out
src/test/regress/expected/sanity_check.out
src/test/regress/expected/stats_ext.out [new file with mode: 0644]
src/test/regress/expected/type_sanity.out
src/test/regress/parallel_schedule
src/test/regress/serial_schedule
src/test/regress/sql/alter_generic.sql
src/test/regress/sql/object_address.sql
src/test/regress/sql/stats_ext.sql [new file with mode: 0644]
src/test/regress/sql/type_sanity.sql

index c531c73aac9f661cad9e3c03e0f16197932edfd4..ac39c639edcbbc05b3b18c31cc5ab6b99b93efeb 100644 (file)
       <entry>planner statistics</entry>
      </row>
 
+     <row>
+      <entry><link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link></entry>
+      <entry>extended planner statistics</entry>
+     </row>
+
      <row>
       <entry><link linkend="catalog-pg-subscription"><structname>pg_subscription</structname></link></entry>
       <entry>logical replication subscriptions</entry>
   </table>
  </sect1>
 
+ <sect1 id="catalog-pg-statistic-ext">
+  <title><structname>pg_statistic_ext</structname></title>
+
+  <indexterm zone="catalog-pg-statistic-ext">
+   <primary>pg_statistic_ext</primary>
+  </indexterm>
+
+  <para>
+   The catalog <structname>pg_statistic_ext</structname>
+   holds extended planner statistics.
+  </para>
+
+  <table>
+   <title><structname>pg_statistic_ext</> Columns</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+
+     <row>
+      <entry><structfield>starelid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
+      <entry>The table that the described columns belongs to</entry>
+     </row>
+
+     <row>
+      <entry><structfield>staname</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry></entry>
+      <entry>Name of the statistic.</entry>
+     </row>
+
+     <row>
+      <entry><structfield>stanamespace</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.oid</literal></entry>
+      <entry>
+       The OID of the namespace that contains this statistic
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>staowner</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.oid</literal></entry>
+      <entry>Owner of the statistic</entry>
+     </row>
+
+     <row>
+      <entry><structfield>staenabled</structfield></entry>
+      <entry><type>char[]</type></entry>
+      <entry></entry>
+      <entry>
+        An array with the modes of the enabled statistic types, encoded as
+        <literal>d</literal> for ndistinct coefficients.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>stakeys</structfield></entry>
+      <entry><type>int2vector</type></entry>
+      <entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
+      <entry>
+       This is an array of values that indicate which table columns this
+       statistic covers. For example a value of <literal>1 3</literal> would
+       mean that the first and the third table columns make up the statistic key.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>standistinct</structfield></entry>
+      <entry><type>pg_ndistinct</type></entry>
+      <entry></entry>
+      <entry>
+       N-distinct coefficients, serialized as <structname>pg_ndistinct</> type.
+      </entry>
+     </row>
+
+    </tbody>
+   </tgroup>
+  </table>
+ </sect1>
+
  <sect1 id="catalog-pg-namespace">
   <title><structname>pg_namespace</structname></title>
 
index 3f0f7363b9bc7456cf65f5e33ed36cdb4c92d8e5..ba6f8dd8d2db5f82d155d6c98f6e1bd2d1e07188 100644 (file)
@@ -16720,6 +16720,10 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
     <primary>pg_get_serial_sequence</primary>
    </indexterm>
 
+   <indexterm>
+    <primary>pg_get_statisticsextdef</primary>
+   </indexterm>
+
    <indexterm>
     <primary>pg_get_triggerdef</primary>
    </indexterm>
@@ -16889,6 +16893,11 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
        <entry>get name of the sequence that a <type>serial</type>, <type>smallserial</type> or <type>bigserial</type> column
        uses</entry>
       </row>
+      <row>
+       <entry><literal><function>pg_get_statisticsextdef(<parameter>statext_oid</parameter>)</function></literal></entry>
+       <entry><type>text</type></entry>
+       <entry>get <command>CREATE STATISTICS</> command for extended statistics objects</entry>
+      </row>
       <row>
        <entry><function>pg_get_triggerdef</function>(<parameter>trigger_oid</parameter>)</entry>
        <entry><type>text</type></entry>
@@ -17034,19 +17043,20 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
   <para>
    <function>pg_get_constraintdef</function>,
    <function>pg_get_indexdef</function>, <function>pg_get_ruledef</function>,
-   and <function>pg_get_triggerdef</function>, respectively reconstruct the
-   creating command for a constraint, index, rule, or trigger. (Note that this
-   is a decompiled reconstruction, not the original text of the command.)
-   <function>pg_get_expr</function> decompiles the internal form of an
-   individual expression, such as the default value for a column.  It can be
-   useful when examining the contents of system catalogs.  If the expression
-   might contain Vars, specify the OID of the relation they refer to as the
-   second parameter; if no Vars are expected, zero is sufficient.
-   <function>pg_get_viewdef</function> reconstructs the <command>SELECT</>
-   query that defines a view. Most of these functions come in two variants,
-   one of which can optionally <quote>pretty-print</> the result.  The
-   pretty-printed format is more readable, but the default format is more
-   likely to be interpreted the same way by future versions of
+   <function>pg_get_statisticsextdef</function>, and
+   <function>pg_get_triggerdef</function>, respectively reconstruct the
+   creating command for a constraint, index, rule, extended statistics object,
+   or trigger. (Note that this is a decompiled reconstruction, not the
+   original text of the command.) <function>pg_get_expr</function> decompiles
+   the internal form of an individual expression, such as the default value
+   for a column.  It can be useful when examining the contents of system
+   catalogs.  If the expression might contain Vars, specify the OID of the
+   relation they refer to as the second parameter; if no Vars are expected,
+   zero is sufficient. <function>pg_get_viewdef</function> reconstructs the
+   <command>SELECT</> query that defines a view. Most of these functions come
+   in two variants, one of which can optionally <quote>pretty-print</> the
+   result.  The pretty-printed format is more readable, but the default format
+   is more likely to be interpreted the same way by future versions of
    <productname>PostgreSQL</>; avoid using pretty-printed output for dump
    purposes.  Passing <literal>false</> for the pretty-print parameter yields
    the same result as the variant that does not have the parameter at all.
index 974e1b74e42f786f7bedb86e91d378c6401f609b..01acc2ef9dad1986af16c7b4e46afbeb22f4c6c8 100644 (file)
@@ -34,6 +34,7 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY alterSequence      SYSTEM "alter_sequence.sgml">
 <!ENTITY alterSubscription  SYSTEM "alter_subscription.sgml">
 <!ENTITY alterSystem        SYSTEM "alter_system.sgml">
+<!ENTITY alterStatistics    SYSTEM "alter_statistics.sgml">
 <!ENTITY alterTable         SYSTEM "alter_table.sgml">
 <!ENTITY alterTableSpace    SYSTEM "alter_tablespace.sgml">
 <!ENTITY alterTSConfig      SYSTEM "alter_tsconfig.sgml">
@@ -80,6 +81,7 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY createSchema       SYSTEM "create_schema.sgml">
 <!ENTITY createSequence     SYSTEM "create_sequence.sgml">
 <!ENTITY createServer       SYSTEM "create_server.sgml">
+<!ENTITY createStatistics   SYSTEM "create_statistics.sgml">
 <!ENTITY createSubscription SYSTEM "create_subscription.sgml">
 <!ENTITY createTable        SYSTEM "create_table.sgml">
 <!ENTITY createTableAs      SYSTEM "create_table_as.sgml">
@@ -126,6 +128,7 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY dropSchema         SYSTEM "drop_schema.sgml">
 <!ENTITY dropSequence       SYSTEM "drop_sequence.sgml">
 <!ENTITY dropServer         SYSTEM "drop_server.sgml">
+<!ENTITY dropStatistics     SYSTEM "drop_statistics.sgml">
 <!ENTITY dropSubscription   SYSTEM "drop_subscription.sgml">
 <!ENTITY dropTable          SYSTEM "drop_table.sgml">
 <!ENTITY dropTableSpace     SYSTEM "drop_tablespace.sgml">
diff --git a/doc/src/sgml/ref/alter_statistics.sgml b/doc/src/sgml/ref/alter_statistics.sgml
new file mode 100644 (file)
index 0000000..3e4d286
--- /dev/null
@@ -0,0 +1,115 @@
+<!--
+doc/src/sgml/ref/alter_statistics.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-ALTERSTATISTICS">
+ <indexterm zone="sql-alterstatistics">
+  <primary>ALTER STATISTICS</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>ALTER STATISTICS</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>ALTER STATISTICS</refname>
+  <refpurpose>
+   change the definition of a extended statistics
+  </refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+ALTER STATISTICS <replaceable class="parameter">name</replaceable> OWNER TO { <replaceable class="PARAMETER">new_owner</replaceable> | CURRENT_USER | SESSION_USER }
+ALTER STATISTICS <replaceable class="parameter">name</replaceable> RENAME TO <replaceable class="parameter">new_name</replaceable>
+ALTER STATISTICS <replaceable class="parameter">name</replaceable> SET SCHEMA <replaceable class="parameter">new_schema</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>ALTER STATISTICS</command> changes the parameters of an existing
+   extended statistics.  Any parameters not specifically set in the
+   <command>ALTER STATISTICS</command> command retain their prior settings.
+  </para>
+
+  <para>
+   You must own the statistics to use <command>ALTER STATISTICS</>.
+   To change a statistics' schema, you must also have <literal>CREATE</>
+   privilege on the new schema.
+   To alter the owner, you must also be a direct or indirect member of the new
+   owning role, and that role must have <literal>CREATE</literal> privilege on
+   the statistics' schema.  (These restrictions enforce that altering the owner
+   doesn't do anything you couldn't do by dropping and recreating the statistics.
+   However, a superuser can alter ownership of any statistics anyway.)
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+   <para>
+    <variablelist>
+     <varlistentry>
+      <term><replaceable class="parameter">name</replaceable></term>
+      <listitem>
+       <para>
+        The name (optionally schema-qualified) of the statistics to be altered.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="PARAMETER">new_owner</replaceable></term>
+      <listitem>
+       <para>
+        The user name of the new owner of the statistics.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="parameter">new_name</replaceable></term>
+      <listitem>
+       <para>
+        The new name for the statistics.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="parameter">new_schema</replaceable></term>
+      <listitem>
+       <para>
+        The new schema for the statistics.
+       </para>
+      </listitem>
+     </varlistentry>
+
+    </variablelist>
+   </para>
+  </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   There's no <command>ALTER STATISTICS</command> command in the SQL standard.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-createstatistics"></member>
+   <member><xref linkend="sql-dropstatistics"></member>
+  </simplelist>
+ </refsect1>
+
+</refentry>
index 767ea321da7701371d138044bfb5a4ec3df26b4f..75de2262539a116cc6f270e05ff8f2a4abb63cbf 100644 (file)
@@ -119,9 +119,12 @@ ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
      <para>
       This form drops a column from a table.  Indexes and
       table constraints involving the column will be automatically
-      dropped as well.  You will need to say <literal>CASCADE</> if
-      anything outside the table depends on the column, for example,
-      foreign key references or views.
+      dropped as well.
+      Multivariate statistics referencing the dropped column will also be
+      removed if the removal of the column would cause the statistics to
+      contain data for only a single column.
+      You will need to say <literal>CASCADE</> if anything outside the table
+      depends on the column, for example, foreign key references or views.
       If <literal>IF EXISTS</literal> is specified and the column
       does not exist, no error is thrown. In this case a notice
       is issued instead.
index 7483c8c03fce741dc699c0de1d86ea6d8e05f607..8fe17a5767d7e70d0b548345378ea901c7e7d7c7 100644 (file)
@@ -51,6 +51,7 @@ COMMENT ON
   SCHEMA <replaceable class="PARAMETER">object_name</replaceable> |
   SEQUENCE <replaceable class="PARAMETER">object_name</replaceable> |
   SERVER <replaceable class="PARAMETER">object_name</replaceable> |
+  STATISTICS <replaceable class="PARAMETER">object_name</replaceable> |
   TABLE <replaceable class="PARAMETER">object_name</replaceable> |
   TABLESPACE <replaceable class="PARAMETER">object_name</replaceable> |
   TEXT SEARCH CONFIGURATION <replaceable class="PARAMETER">object_name</replaceable> |
@@ -125,8 +126,8 @@ COMMENT ON
       The name of the object to be commented.  Names of tables,
       aggregates, collations, conversions, domains, foreign tables, functions,
       indexes, operators, operator classes, operator families, sequences,
-      text search objects, types, and views can be schema-qualified.
-      When commenting on a column,
+      statistics, text search objects, types, and views can be
+      schema-qualified. When commenting on a column,
       <replaceable class="parameter">relation_name</replaceable> must refer
       to a table, view, composite type, or foreign table.
      </para>
@@ -327,6 +328,7 @@ COMMENT ON RULE my_rule ON my_table IS 'Logs updates of employee records';
 COMMENT ON SCHEMA my_schema IS 'Departmental data';
 COMMENT ON SEQUENCE my_sequence IS 'Used to generate primary keys';
 COMMENT ON SERVER myserver IS 'my foreign server';
+COMMENT ON STATISTICS my_statistics IS 'Improves planner row estimations';
 COMMENT ON TABLE my_schema.my_table IS 'Employee Information';
 COMMENT ON TABLESPACE my_tablespace IS 'Tablespace for indexes';
 COMMENT ON TEXT SEARCH CONFIGURATION my_config IS 'Special word filtering';
diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml
new file mode 100644 (file)
index 0000000..60184a3
--- /dev/null
@@ -0,0 +1,155 @@
+<!--
+doc/src/sgml/ref/create_statistics.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-CREATESTATISTICS">
+ <indexterm zone="sql-createstatistics">
+  <primary>CREATE STATISTICS</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>CREATE STATISTICS</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>CREATE STATISTICS</refname>
+  <refpurpose>define extended statistics</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_name</replaceable> ON (
+  <replaceable class="PARAMETER">column_name</replaceable>, <replaceable class="PARAMETER">column_name</replaceable> [, ...])
+  FROM <replaceable class="PARAMETER">table_name</replaceable>
+</synopsis>
+
+ </refsynopsisdiv>
+
+ <refsect1 id="SQL-CREATESTATISTICS-description">
+  <title>Description</title>
+
+  <para>
+   <command>CREATE STATISTICS</command> will create a new extended statistics
+   object on the specified table.
+   The statistics will be created in the current database and
+   will be owned by the user issuing the command.
+  </para>
+
+  <para>
+   If a schema name is given (for example, <literal>CREATE STATISTICS
+   myschema.mystat ...</>) then the statistics is created in the specified
+   schema.  Otherwise it is created in the current schema.  The name of
+   the statistics must be distinct from the name of any other statistics in the
+   same schema.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+
+   <varlistentry>
+    <term><literal>IF NOT EXISTS</></term>
+    <listitem>
+     <para>
+      Do not throw an error if a statistics with the same name already exists.
+      A notice is issued in this case.  Note that only the name of the
+      statistics object is considered here. The definition of the statistics is
+      not considered.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">statistics_name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the statistics to be created.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">column_name</replaceable></term>
+    <listitem>
+     <para>
+      The name of a column to be included in the statistics.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">table_name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the table the statistics should
+      be created on.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   You must be the owner of a table to create or change statistics on it.
+  </para>
+ </refsect1>
+
+ <refsect1 id="SQL-CREATESTATISTICS-examples">
+  <title>Examples</title>
+
+  <para>
+   Create table <structname>t1</> with two functionally dependent columns, i.e.
+   knowledge of a value in the first column is sufficient for determining the
+   value in the other column. Then functional dependencies are built on those
+   columns:
+
+<programlisting>
+CREATE TABLE t1 (
+    a   int,
+    b   int
+);
+
+INSERT INTO t1 SELECT i/100, i/500
+                 FROM generate_series(1,1000000) s(i);
+
+CREATE STATISTICS s1 ON (a, b) FROM t1;
+
+ANALYZE t1;
+
+-- valid combination of values
+EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0);
+
+-- invalid combination of values
+EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1);
+</programlisting>
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   There's no <command>CREATE STATISTICS</command> command in the SQL standard.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-alterstatistics"></member>
+   <member><xref linkend="sql-dropstatistics"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
diff --git a/doc/src/sgml/ref/drop_statistics.sgml b/doc/src/sgml/ref/drop_statistics.sgml
new file mode 100644 (file)
index 0000000..98c3381
--- /dev/null
@@ -0,0 +1,98 @@
+<!--
+doc/src/sgml/ref/drop_statistics.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-DROPSTATISTICS">
+ <indexterm zone="sql-dropstatistics">
+  <primary>DROP STATISTICS</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>DROP STATISTICS</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>DROP STATISTICS</refname>
+  <refpurpose>remove extended statistics</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+DROP STATISTICS [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable> [, ...]
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>DROP STATISTICS</command> removes statistics from the database.
+   Only the statistics owner, the schema owner, and superuser can drop a
+   statistics.
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><literal>IF EXISTS</literal></term>
+    <listitem>
+     <para>
+      Do not throw an error if the statistics do not exist. A notice is
+      issued in this case.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the statistics to drop.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   To destroy two statistics objects on different schemas, without failing
+   if they don't exist:
+
+<programlisting>
+DROP STATISTICS IF EXISTS
+    accounting.users_uid_creation,
+    public.grants_user_role;
+</programlisting>
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   There's no <command>DROP STATISTICS</command> command in the SQL standard.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-alterstatistics"></member>
+   <member><xref linkend="sql-createstatistics"></member>
+  </simplelist>
+ </refsect1>
+
+</refentry>
index 3d8ad232fa3e5155d978b5a882782b9212d3f1d0..9000b3aaaa716e54b1969077bad8138e17e3e4c3 100644 (file)
@@ -60,6 +60,7 @@
    &alterSchema;
    &alterSequence;
    &alterServer;
+   &alterStatistics;
    &alterSubscription;
    &alterSystem;
    &alterTable;
    &createSchema;
    &createSequence;
    &createServer;
+   &createStatistics;
    &createSubscription;
    &createTable;
    &createTableAs;
    &dropSchema;
    &dropSequence;
    &dropServer;
+   &dropStatistics;
    &dropSubscription;
    &dropTable;
    &dropTableSpace;
index fffb0d95bad3ab27c1f06f3c7fbd8157b0f5f82c..bce9d2c3ebb09a06f2e7e1a39a097ef37ce749d0 100644 (file)
@@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global
 
 SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \
        main nodes optimizer port postmaster regex replication rewrite \
-       storage tcop tsearch utils $(top_builddir)/src/timezone
+       statistics storage tcop tsearch utils $(top_builddir)/src/timezone
 
 include $(srcdir)/common.mk
 
index 159cab5c18c0552d19526db06727a8d8cc1e21ee..fd33426bad15164500bb8189f65f808758107f27 100644 (file)
@@ -33,6 +33,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
        pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
        pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
        pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
+       pg_statistic_ext.h \
        pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
        pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
        pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \
index be86d76a59ecf250876cc9bdf0065bc39acc08b1..d01930f4a80d0fdb8da8e98baaa1d0d764160881 100644 (file)
@@ -48,6 +48,7 @@
 #include "catalog/pg_operator.h"
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
@@ -3302,6 +3303,8 @@ static const char *const no_priv_msg[MAX_ACL_KIND] =
        gettext_noop("permission denied for collation %s"),
        /* ACL_KIND_CONVERSION */
        gettext_noop("permission denied for conversion %s"),
+       /* ACL_KIND_STATISTICS */
+       gettext_noop("permission denied for statistics %s"),
        /* ACL_KIND_TABLESPACE */
        gettext_noop("permission denied for tablespace %s"),
        /* ACL_KIND_TSDICTIONARY */
@@ -3352,6 +3355,8 @@ static const char *const not_owner_msg[MAX_ACL_KIND] =
        gettext_noop("must be owner of collation %s"),
        /* ACL_KIND_CONVERSION */
        gettext_noop("must be owner of conversion %s"),
+       /* ACL_KIND_STATISTICS */
+       gettext_noop("must be owner of statistics %s"),
        /* ACL_KIND_TABLESPACE */
        gettext_noop("must be owner of tablespace %s"),
        /* ACL_KIND_TSDICTIONARY */
@@ -3467,6 +3472,10 @@ pg_aclmask(AclObjectKind objkind, Oid table_oid, AttrNumber attnum, Oid roleid,
                                                                                                   mask, how, NULL);
                case ACL_KIND_NAMESPACE:
                        return pg_namespace_aclmask(table_oid, roleid, mask, how);
+               case ACL_KIND_STATISTICS:
+                       elog(ERROR, "grantable rights not supported for statistics");
+                       /* not reached, but keep compiler quiet */
+                       return ACL_NO_RIGHTS;
                case ACL_KIND_TABLESPACE:
                        return pg_tablespace_aclmask(table_oid, roleid, mask, how);
                case ACL_KIND_FDW:
@@ -5103,6 +5112,32 @@ pg_subscription_ownercheck(Oid sub_oid, Oid roleid)
        return has_privs_of_role(roleid, ownerId);
 }
 
+/*
+ * Ownership check for a extended statistics (specified by OID).
+ */
+bool
+pg_statistics_ownercheck(Oid stat_oid, Oid roleid)
+{
+       HeapTuple       tuple;
+       Oid                     ownerId;
+
+       /* Superusers bypass all permission checking. */
+       if (superuser_arg(roleid))
+               return true;
+
+       tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stat_oid));
+       if (!HeapTupleIsValid(tuple))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("statistics with OID %u do not exist", stat_oid)));
+
+       ownerId = ((Form_pg_statistic_ext) GETSTRUCT(tuple))->staowner;
+
+       ReleaseSysCache(tuple);
+
+       return has_privs_of_role(roleid, ownerId);
+}
+
 /*
  * Check whether specified role has CREATEROLE privilege (or is a superuser)
  *
index fc088b216589ccbb70f3201a128d59e8345b913c..ee27cae7df7b27aae61a593fa6f687f4e0079478 100644 (file)
@@ -51,6 +51,7 @@
 #include "catalog/pg_publication.h"
 #include "catalog/pg_publication_rel.h"
 #include "catalog/pg_rewrite.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_transform.h"
@@ -154,6 +155,7 @@ static const Oid object_classes[] = {
        RewriteRelationId,                      /* OCLASS_REWRITE */
        TriggerRelationId,                      /* OCLASS_TRIGGER */
        NamespaceRelationId,            /* OCLASS_SCHEMA */
+       StatisticExtRelationId,         /* OCLASS_STATISTIC_EXT */
        TSParserRelationId,                     /* OCLASS_TSPARSER */
        TSDictionaryRelationId,         /* OCLASS_TSDICT */
        TSTemplateRelationId,           /* OCLASS_TSTEMPLATE */
@@ -1263,6 +1265,10 @@ doDeletion(const ObjectAddress *object, int flags)
                        DropTransformById(object->objectId);
                        break;
 
+               case OCLASS_STATISTIC_EXT:
+                       RemoveStatisticsById(object->objectId);
+                       break;
+
                default:
                        elog(ERROR, "unrecognized object class: %u",
                                 object->classId);
@@ -2377,6 +2383,9 @@ getObjectClass(const ObjectAddress *object)
                case NamespaceRelationId:
                        return OCLASS_SCHEMA;
 
+               case StatisticExtRelationId:
+                       return OCLASS_STATISTIC_EXT;
+
                case TSParserRelationId:
                        return OCLASS_TSPARSER;
 
index d49dcdc015d9ca905ef15f6abc04992890e8a4d6..eee5e2f6caf378ca94f9f2f6b80786148a0b2bde 100644 (file)
@@ -52,6 +52,7 @@
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_partitioned_table.h"
 #include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription_rel.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
@@ -1609,7 +1610,10 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
        heap_close(attr_rel, RowExclusiveLock);
 
        if (attnum > 0)
+       {
                RemoveStatistics(relid, attnum);
+               RemoveStatisticsExt(relid, attnum);
+       }
 
        relation_close(rel, NoLock);
 }
@@ -1860,6 +1864,7 @@ heap_drop_with_catalog(Oid relid)
         * delete statistics
         */
        RemoveStatistics(relid, 0);
+       RemoveStatisticsExt(relid, 0);
 
        /*
         * delete attribute tuples
@@ -2771,6 +2776,75 @@ RemoveStatistics(Oid relid, AttrNumber attnum)
 }
 
 
+/*
+ * RemoveStatisticsExt --- remove entries in pg_statistic_ext for a relation
+ *
+ * If attnum is zero, remove all entries for rel; else remove only the
+ * one(s) involving that column.
+ */
+void
+RemoveStatisticsExt(Oid relid, AttrNumber attnum)
+{
+       Relation        pgstatisticext;
+       SysScanDesc scan;
+       ScanKeyData key;
+       HeapTuple       tuple;
+
+       /*
+        * Scan pg_statistic_ext to delete relevant tuples
+        */
+       pgstatisticext = heap_open(StatisticExtRelationId, RowExclusiveLock);
+
+       ScanKeyInit(&key,
+                               Anum_pg_statistic_ext_starelid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(relid));
+
+       scan = systable_beginscan(pgstatisticext,
+                                                         StatisticExtRelidIndexId,
+                                                         true, NULL, 1, &key);
+
+       while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+       {
+               bool            delete = false;
+
+               if (attnum == 0)
+                       delete = true;
+               else if (attnum != 0)
+               {
+                       Form_pg_statistic_ext   staForm;
+                       int                     i;
+
+                       /*
+                        * Decode the stakeys array and delete any stats that involve the
+                        * specified column.
+                        */
+                       staForm = (Form_pg_statistic_ext) GETSTRUCT(tuple);
+                       for (i = 0; i < staForm->stakeys.dim1; i++)
+                       {
+                               if (staForm->stakeys.values[i] == attnum)
+                               {
+                                       delete = true;
+                                       break;
+                               }
+                       }
+               }
+
+               if (delete)
+               {
+                       CatalogTupleDelete(pgstatisticext, &tuple->t_self);
+                       deleteDependencyRecordsFor(StatisticExtRelationId,
+                                                                          HeapTupleGetOid(tuple),
+                                                                          false);
+               }
+       }
+
+       systable_endscan(scan);
+
+       heap_close(pgstatisticext, RowExclusiveLock);
+}
+
+
 /*
  * RelationTruncateIndexes - truncate all indexes associated
  * with the heap relation to zero tuples.
index a38da3047ff45b8d6ea82a85019a9ff7ebac0849..e521bd908a22ff24a25a91554d5689b30cdf4029 100644 (file)
@@ -2085,6 +2085,62 @@ ConversionIsVisible(Oid conid)
        return visible;
 }
 
+/*
+ * get_statistics_oid - find a statistics by possibly qualified name
+ *
+ * If not found, returns InvalidOid if missing_ok, else throws error
+ */
+Oid
+get_statistics_oid(List *names, bool missing_ok)
+{
+       char       *schemaname;
+       char       *stats_name;
+       Oid                     namespaceId;
+       Oid                     stats_oid = InvalidOid;
+       ListCell   *l;
+
+       /* deconstruct the name list */
+       DeconstructQualifiedName(names, &schemaname, &stats_name);
+
+       if (schemaname)
+       {
+               /* use exact schema given */
+               namespaceId = LookupExplicitNamespace(schemaname, missing_ok);
+               if (missing_ok && !OidIsValid(namespaceId))
+                       stats_oid = InvalidOid;
+               else
+                       stats_oid = GetSysCacheOid2(STATEXTNAMENSP,
+                                                                               PointerGetDatum(stats_name),
+                                                                               ObjectIdGetDatum(namespaceId));
+       }
+       else
+       {
+               /* search for it in search path */
+               recomputeNamespacePath();
+
+               foreach(l, activeSearchPath)
+               {
+                       namespaceId = lfirst_oid(l);
+
+                       if (namespaceId == myTempNamespace)
+                               continue;               /* do not look in temp namespace */
+                       stats_oid = GetSysCacheOid2(STATEXTNAMENSP,
+                                                                               PointerGetDatum(stats_name),
+                                                                               ObjectIdGetDatum(namespaceId));
+                       if (OidIsValid(stats_oid))
+                               break;
+               }
+       }
+
+       if (!OidIsValid(stats_oid) && !missing_ok)
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("statistics \"%s\" do not exist",
+                                               NameListToString(names))));
+
+       return stats_oid;
+}
+
 /*
  * get_ts_parser_oid - find a TS parser by possibly qualified name
  *
index 61a831b4036c3c7efef567ca08810b4d5cb1828e..2948d64fa73a6ab7108e8d4b3730669d98777c0e 100644 (file)
@@ -48,6 +48,7 @@
 #include "catalog/pg_publication.h"
 #include "catalog/pg_publication_rel.h"
 #include "catalog/pg_rewrite.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_transform.h"
@@ -478,6 +479,18 @@ static const ObjectPropertyType ObjectProperty[] =
                InvalidAttrNumber,
                ACL_KIND_SUBSCRIPTION,
                true
+       },
+       {
+               StatisticExtRelationId,
+               StatisticExtOidIndexId,
+               STATEXTOID,
+               STATEXTNAMENSP,
+               Anum_pg_statistic_ext_staname,
+               Anum_pg_statistic_ext_stanamespace,
+               Anum_pg_statistic_ext_staowner,
+               InvalidAttrNumber,              /* no ACL (same as relation) */
+               ACL_KIND_STATISTICS,
+               true
        }
 };
 
@@ -696,6 +709,10 @@ static const struct object_type_map
        /* OCLASS_TRANSFORM */
        {
                "transform", OBJECT_TRANSFORM
+       },
+       /* OBJECT_STATISTIC_EXT */
+       {
+               "statistics", OBJECT_STATISTIC_EXT
        }
 };
 
@@ -974,6 +991,12 @@ get_object_address(ObjectType objtype, Node *object,
                                address = get_object_address_defacl(castNode(List, object),
                                                                                                        missing_ok);
                                break;
+                       case OBJECT_STATISTIC_EXT:
+                               address.classId = StatisticExtRelationId;
+                               address.objectId = get_statistics_oid(castNode(List, object),
+                                                                                                         missing_ok);
+                               address.objectSubId = 0;
+                               break;
                        default:
                                elog(ERROR, "unrecognized objtype: %d", (int) objtype);
                                /* placate compiler, in case it thinks elog might return */
@@ -2083,6 +2106,7 @@ pg_get_object_address(PG_FUNCTION_ARGS)
                case OBJECT_ATTRIBUTE:
                case OBJECT_COLLATION:
                case OBJECT_CONVERSION:
+               case OBJECT_STATISTIC_EXT:
                case OBJECT_TSPARSER:
                case OBJECT_TSDICTIONARY:
                case OBJECT_TSTEMPLATE:
@@ -2370,6 +2394,10 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address,
                                                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                                                 errmsg("must be superuser")));
                        break;
+               case OBJECT_STATISTIC_EXT:
+                       if (!pg_statistics_ownercheck(address.objectId, roleid))
+                               aclcheck_error_type(ACLCHECK_NOT_OWNER, address.objectId);
+                       break;
                default:
                        elog(ERROR, "unrecognized object type: %d",
                                 (int) objtype);
@@ -3857,6 +3885,10 @@ getObjectTypeDescription(const ObjectAddress *object)
                        appendStringInfoString(&buffer, "subscription");
                        break;
 
+               case OCLASS_STATISTIC_EXT:
+                       appendStringInfoString(&buffer, "statistics");
+                       break;
+
                default:
                        appendStringInfo(&buffer, "unrecognized %u", object->classId);
                        break;
@@ -4880,6 +4912,29 @@ getObjectIdentityParts(const ObjectAddress *object,
                                break;
                        }
 
+               case OCLASS_STATISTIC_EXT:
+                       {
+                               HeapTuple       tup;
+                               Form_pg_statistic_ext formStatistic;
+                               char       *schema;
+
+                               tup = SearchSysCache1(STATEXTOID,
+                                                                         ObjectIdGetDatum(object->objectId));
+                               if (!HeapTupleIsValid(tup))
+                                       elog(ERROR, "cache lookup failed for statistics %u",
+                                                object->objectId);
+                               formStatistic = (Form_pg_statistic_ext) GETSTRUCT(tup);
+                               schema = get_namespace_name_or_temp(formStatistic->stanamespace);
+                               appendStringInfoString(&buffer,
+                                                                          quote_qualified_identifier(schema,
+                                                                                  NameStr(formStatistic->staname)));
+                               if (objname)
+                                       *objname = list_make2(schema,
+                                                                  pstrdup(NameStr(formStatistic->staname)));
+                               ReleaseSysCache(tup);
+                       }
+                       break;
+
                default:
                        appendStringInfo(&buffer, "unrecognized object %u %u %d",
                                                         object->classId,
index 8d946ff44ccb341172fe4fa562a49ec46382f46a..d28a8afb47dac78739e4a3649e8957bb82b35ed8 100644 (file)
@@ -39,6 +39,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_shdepend.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_ts_config.h"
@@ -1415,6 +1416,7 @@ shdepReassignOwned(List *roleids, Oid newrole)
                                case OperatorFamilyRelationId:
                                case OperatorClassRelationId:
                                case ExtensionRelationId:
+                               case StatisticExtRelationId:
                                case TableSpaceRelationId:
                                case DatabaseRelationId:
                                case TSConfigRelationId:
index 80d14296de22bac8073bdcb00ec1832dbebf38de..b41882aa5210cda567f85f6db520008ca97b59fb 100644 (file)
@@ -186,6 +186,16 @@ CREATE OR REPLACE VIEW pg_sequences AS
     WHERE NOT pg_is_other_temp_schema(N.oid)
           AND relkind = 'S';
 
+CREATE VIEW pg_stats_ext AS
+    SELECT
+        N.nspname AS schemaname,
+        C.relname AS tablename,
+        S.staname AS staname,
+        S.stakeys AS attnums,
+        length(s.standistinct) AS ndistbytes
+    FROM (pg_statistic_ext S JOIN pg_class C ON (C.oid = S.starelid))
+        LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
+
 CREATE VIEW pg_stats WITH (security_barrier) AS
     SELECT
         nspname AS schemaname,
index e0fab38cbe19af855403be312d5495165d9f0533..4a6c99e09081a533f099d5f175f0806048b6ae9c 100644 (file)
@@ -18,8 +18,8 @@ OBJS = amcmds.o aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \
        event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \
        indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \
        policy.o portalcmds.o prepare.o proclang.o publicationcmds.o \
-       schemacmds.o seclabel.o sequence.o subscriptioncmds.o tablecmds.o \
-       tablespace.o trigger.o tsearchcmds.o typecmds.o user.o vacuum.o \
-       vacuumlazy.o variable.o view.o
+       schemacmds.o seclabel.o sequence.o statscmds.o subscriptioncmds.o \
+       tablecmds.o tablespace.o trigger.o tsearchcmds.o typecmds.o user.o \
+       vacuum.o vacuumlazy.o variable.o view.o
 
 include $(top_srcdir)/src/backend/common.mk
index cf1391c2e6ba31f722835a0ed0bebd54a0cafd2c..2c6435b75980482671752142d03727c9a0d0e883 100644 (file)
@@ -33,6 +33,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_subscription.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_ts_config.h"
 #include "catalog/pg_ts_dict.h"
 #include "catalog/pg_ts_parser.h"
@@ -120,6 +121,10 @@ report_namespace_conflict(Oid classId, const char *name, Oid nspOid)
                        Assert(OidIsValid(nspOid));
                        msgfmt = gettext_noop("conversion \"%s\" already exists in schema \"%s\"");
                        break;
+               case StatisticExtRelationId:
+                       Assert(OidIsValid(nspOid));
+                       msgfmt = gettext_noop("statistics \"%s\" already exists in schema \"%s\"");
+                       break;
                case TSParserRelationId:
                        Assert(OidIsValid(nspOid));
                        msgfmt = gettext_noop("text search parser \"%s\" already exists in schema \"%s\"");
@@ -373,6 +378,7 @@ ExecRenameStmt(RenameStmt *stmt)
                case OBJECT_OPCLASS:
                case OBJECT_OPFAMILY:
                case OBJECT_LANGUAGE:
+               case OBJECT_STATISTIC_EXT:
                case OBJECT_TSCONFIGURATION:
                case OBJECT_TSDICTIONARY:
                case OBJECT_TSPARSER:
@@ -489,6 +495,7 @@ ExecAlterObjectSchemaStmt(AlterObjectSchemaStmt *stmt,
                case OBJECT_OPERATOR:
                case OBJECT_OPCLASS:
                case OBJECT_OPFAMILY:
+               case OBJECT_STATISTIC_EXT:
                case OBJECT_TSCONFIGURATION:
                case OBJECT_TSDICTIONARY:
                case OBJECT_TSPARSER:
@@ -803,6 +810,7 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt)
                case OBJECT_OPERATOR:
                case OBJECT_OPCLASS:
                case OBJECT_OPFAMILY:
+               case OBJECT_STATISTIC_EXT:
                case OBJECT_TABLESPACE:
                case OBJECT_TSDICTIONARY:
                case OBJECT_TSCONFIGURATION:
index 055338fdff2c623ba56a02c6c020a973bc203918..c5b5c54babf0994567ad6bf729f49bf6ba888d52 100644 (file)
@@ -17,6 +17,7 @@
 #include <math.h>
 
 #include "access/multixact.h"
+#include "access/sysattr.h"
 #include "access/transam.h"
 #include "access/tupconvert.h"
 #include "access/tuptoaster.h"
@@ -28,6 +29,7 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_inherits_fn.h"
 #include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
 #include "commands/dbcommands.h"
 #include "commands/tablecmds.h"
 #include "commands/vacuum.h"
 #include "parser/parse_relation.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
 #include "storage/bufmgr.h"
 #include "storage/lmgr.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "utils/acl.h"
 #include "utils/attoptcache.h"
+#include "utils/builtins.h"
 #include "utils/datum.h"
+#include "utils/fmgroids.h"
 #include "utils/guc.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
@@ -566,6 +572,10 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
                        update_attstats(RelationGetRelid(Irel[ind]), false,
                                                        thisdata->attr_cnt, thisdata->vacattrstats);
                }
+
+               /* Build extended statistics (if there are any). */
+               BuildRelationExtStatistics(onerel, totalrows, numrows, rows, attr_cnt,
+                                                                  vacattrstats);
        }
 
        /*
@@ -1681,19 +1691,6 @@ ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
 /*
  * Extra information used by the default analysis routines
  */
-typedef struct
-{
-       Oid                     eqopr;                  /* '=' operator for datatype, if any */
-       Oid                     eqfunc;                 /* and associated function */
-       Oid                     ltopr;                  /* '<' operator for datatype, if any */
-} StdAnalyzeData;
-
-typedef struct
-{
-       Datum           value;                  /* a data value */
-       int                     tupno;                  /* position index for tuple it came from */
-} ScalarItem;
-
 typedef struct
 {
        int                     count;                  /* # of duplicates */
index ab73fbf961be2469107b71341cf3078d2f08df0b..cb948f0204d5723b1c9e4b097fd36a2b7e072e28 100644 (file)
@@ -286,6 +286,13 @@ does_not_exist_skipping(ObjectType objtype, Node *object)
                        msg = gettext_noop("schema \"%s\" does not exist, skipping");
                        name = strVal((Value *) object);
                        break;
+               case OBJECT_STATISTIC_EXT:
+                       if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+                       {
+                               msg = gettext_noop("extended statistics \"%s\" do not exist, skipping");
+                               name = NameListToString(castNode(List, object));
+                       }
+                       break;
                case OBJECT_TSPARSER:
                        if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
                        {
index 346b347ae17654e9c4e5155f5d1a2e6a294d9a7e..7366fc74bec757ab727ffc81e0c8857ca831c333 100644 (file)
@@ -112,6 +112,7 @@ static event_trigger_support_data event_trigger_support[] = {
        {"SCHEMA", true},
        {"SEQUENCE", true},
        {"SERVER", true},
+       {"STATISTICS", true},
        {"SUBSCRIPTION", true},
        {"TABLE", true},
        {"TABLESPACE", false},
@@ -1108,6 +1109,7 @@ EventTriggerSupportsObjectType(ObjectType obtype)
                case OBJECT_SCHEMA:
                case OBJECT_SEQUENCE:
                case OBJECT_SUBSCRIPTION:
+               case OBJECT_STATISTIC_EXT:
                case OBJECT_TABCONSTRAINT:
                case OBJECT_TABLE:
                case OBJECT_TRANSFORM:
@@ -1173,6 +1175,7 @@ EventTriggerSupportsObjectClass(ObjectClass objclass)
                case OCLASS_PUBLICATION:
                case OCLASS_PUBLICATION_REL:
                case OCLASS_SUBSCRIPTION:
+               case OCLASS_STATISTIC_EXT:
                        return true;
        }
 
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
new file mode 100644 (file)
index 0000000..4163091
--- /dev/null
@@ -0,0 +1,296 @@
+/*-------------------------------------------------------------------------
+ *
+ * statscmds.c
+ *       Commands for creating and altering extended statistics
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *       src/backend/commands/statscmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/inval.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+
+/* used for sorting the attnums in CreateStatistics */
+static int
+compare_int16(const void *a, const void *b)
+{
+       return memcmp(a, b, sizeof(int16));
+}
+
+/*
+ *             CREATE STATISTICS
+ */
+ObjectAddress
+CreateStatistics(CreateStatsStmt *stmt)
+{
+       int                     i;
+       ListCell   *l;
+       int16           attnums[STATS_MAX_DIMENSIONS];
+       int                     numcols = 0;
+       ObjectAddress address = InvalidObjectAddress;
+       char       *namestr;
+       NameData        staname;
+       Oid                     statoid;
+       Oid                     namespaceId;
+       HeapTuple       htup;
+       Datum           values[Natts_pg_statistic_ext];
+       bool            nulls[Natts_pg_statistic_ext];
+       int2vector *stakeys;
+       Relation        statrel;
+       Relation        rel;
+       Oid                     relid;
+       ObjectAddress parentobject,
+                               childobject;
+       Datum           types[1];               /* only ndistinct defined now */
+       int                     ntypes;
+       ArrayType  *staenabled;
+       bool            build_ndistinct;
+       bool            requested_type = false;
+
+       Assert(IsA(stmt, CreateStatsStmt));
+
+       /* resolve the pieces of the name (namespace etc.) */
+       namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr);
+       namestrcpy(&staname, namestr);
+
+       /*
+        * If if_not_exists was given and the statistics already exists, bail out.
+        */
+       if (SearchSysCacheExists2(STATEXTNAMENSP,
+                                                         PointerGetDatum(&staname),
+                                                         ObjectIdGetDatum(namespaceId)))
+       {
+               if (stmt->if_not_exists)
+               {
+                       ereport(NOTICE,
+                                       (errcode(ERRCODE_DUPLICATE_OBJECT),
+                                        errmsg("statistics \"%s\" already exist, skipping",
+                                                       namestr)));
+                       return InvalidObjectAddress;
+               }
+
+               ereport(ERROR,
+                               (errcode(ERRCODE_DUPLICATE_OBJECT),
+                                errmsg("statistics \"%s\" already exist", namestr)));
+       }
+
+       rel = heap_openrv(stmt->relation, AccessExclusiveLock);
+       relid = RelationGetRelid(rel);
+
+       if (rel->rd_rel->relkind != RELKIND_RELATION &&
+               rel->rd_rel->relkind != RELKIND_MATVIEW)
+               ereport(ERROR,
+                               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                errmsg("relation \"%s\" is not a table or materialized view",
+                                               RelationGetRelationName(rel))));
+
+       /*
+        * Transform column names to array of attnums. While at it, enforce some
+        * constraints.
+        */
+       foreach(l, stmt->keys)
+       {
+               char       *attname = strVal(lfirst(l));
+               HeapTuple       atttuple;
+               Form_pg_attribute attForm;
+               TypeCacheEntry *type;
+
+               atttuple = SearchSysCacheAttName(relid, attname);
+               if (!HeapTupleIsValid(atttuple))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_UNDEFINED_COLUMN),
+                         errmsg("column \"%s\" referenced in statistics does not exist",
+                                        attname)));
+               attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+               /* Disallow use of system attributes in extended stats */
+               if (attForm->attnum < 0)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("statistic creation on system columns is not supported")));
+
+               /* Disallow data types without a less-than operator */
+               type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
+               if (type->lt_opr == InvalidOid)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("only scalar types can be used in extended statistics")));
+
+               /* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
+               if (numcols >= STATS_MAX_DIMENSIONS)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_TOO_MANY_COLUMNS),
+                                        errmsg("cannot have more than %d keys in statistics",
+                                                       STATS_MAX_DIMENSIONS)));
+
+               attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum;
+               ReleaseSysCache(atttuple);
+               numcols++;
+       }
+
+       /*
+        * Check that at least two columns were specified in the statement. The
+        * upper bound was already checked in the loop above.
+        */
+       if (numcols < 2)
+               ereport(ERROR,
+                               (errcode(ERRCODE_TOO_MANY_COLUMNS),
+                                errmsg("statistics require at least 2 columns")));
+
+       /*
+        * Sort the attnums, which makes detecting duplicies somewhat easier, and
+        * it does not hurt (it does not affect the efficiency, unlike for
+        * indexes, for example).
+        */
+       qsort(attnums, numcols, sizeof(int16), compare_int16);
+
+       /*
+        * Look for duplicities in the list of columns. The attnums are sorted so
+        * just check consecutive elements.
+        */
+       for (i = 1; i < numcols; i++)
+               if (attnums[i] == attnums[i - 1])
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_UNDEFINED_COLUMN),
+                                 errmsg("duplicate column name in statistics definition")));
+
+       stakeys = buildint2vector(attnums, numcols);
+
+       /*
+        * Parse the statistics options.  Currently only statistics types are
+        * recognized.
+        */
+       build_ndistinct = false;
+       foreach(l, stmt->options)
+       {
+               DefElem    *opt = (DefElem *) lfirst(l);
+
+               if (strcmp(opt->defname, "ndistinct") == 0)
+               {
+                       build_ndistinct = defGetBoolean(opt);
+                       requested_type = true;
+               }
+               else
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_SYNTAX_ERROR),
+                                        errmsg("unrecognized STATISTICS option \"%s\"",
+                                                       opt->defname)));
+       }
+       /* If no statistic type was specified, build them all. */
+       if (!requested_type)
+               build_ndistinct = true;
+
+       /* construct the char array of enabled statistic types */
+       ntypes = 0;
+       if (build_ndistinct)
+               types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
+       Assert(ntypes > 0);
+       staenabled = construct_array(types, ntypes, CHAROID, 1, true, 'c');
+
+       /*
+        * Everything seems fine, so let's build the pg_statistic_ext tuple.
+        */
+       memset(values, 0, sizeof(values));
+       memset(nulls, false, sizeof(nulls));
+       values[Anum_pg_statistic_ext_starelid - 1] = ObjectIdGetDatum(relid);
+       values[Anum_pg_statistic_ext_staname - 1] = NameGetDatum(&staname);
+       values[Anum_pg_statistic_ext_stanamespace - 1] = ObjectIdGetDatum(namespaceId);
+       values[Anum_pg_statistic_ext_staowner - 1] = ObjectIdGetDatum(GetUserId());
+       values[Anum_pg_statistic_ext_stakeys - 1] = PointerGetDatum(stakeys);
+       values[Anum_pg_statistic_ext_staenabled - 1] = PointerGetDatum(staenabled);
+
+       /* no statistics build yet */
+       nulls[Anum_pg_statistic_ext_standistinct - 1] = true;
+
+       /* insert it into pg_statistic_ext */
+       statrel = heap_open(StatisticExtRelationId, RowExclusiveLock);
+       htup = heap_form_tuple(statrel->rd_att, values, nulls);
+       CatalogTupleInsert(statrel, htup);
+       statoid = HeapTupleGetOid(htup);
+       heap_freetuple(htup);
+       heap_close(statrel, RowExclusiveLock);
+       relation_close(rel, NoLock);
+
+       /*
+        * Add a dependency on a table, so that stats get dropped on DROP TABLE.
+        */
+       ObjectAddressSet(parentobject, RelationRelationId, relid);
+       ObjectAddressSet(childobject, StatisticExtRelationId, statoid);
+       recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
+
+       /*
+        * Also add dependency on the schema.  This is required to ensure that we
+        * drop the statistics on DROP SCHEMA.  This is not handled automatically
+        * by DROP TABLE because the statistics are not an object in the table's
+        * schema.
+        */
+       ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
+       recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
+
+       ObjectAddressSet(address, StatisticExtRelationId, statoid);
+
+       /*
+        * Invalidate relcache so that others see the new statistics.
+        */
+       CacheInvalidateRelcache(rel);
+
+       return address;
+}
+
+/*
+ * Guts of statistics deletion.
+ */
+void
+RemoveStatisticsById(Oid statsOid)
+{
+       Relation        relation;
+       Oid                     relid;
+       Relation        rel;
+       HeapTuple       tup;
+       Form_pg_statistic_ext statext;
+
+       /*
+        * Delete the pg_statistic_ext tuple.
+        */
+       relation = heap_open(StatisticExtRelationId, RowExclusiveLock);
+
+       tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
+
+       if (!HeapTupleIsValid(tup)) /* should not happen */
+               elog(ERROR, "cache lookup failed for statistics %u", statsOid);
+
+       statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
+       relid = statext->starelid;
+
+       rel = heap_open(relid, AccessExclusiveLock);
+
+       simple_heap_delete(relation, &tup->t_self);
+
+       CacheInvalidateRelcache(rel);
+
+       ReleaseSysCache(tup);
+
+       heap_close(relation, RowExclusiveLock);
+       heap_close(rel, NoLock);
+}
index 93bda42715310b260b741766ffad51efb2e790f0..c23d5c52851eea5bf29bb194709b19973d034bdf 100644 (file)
@@ -3337,6 +3337,20 @@ _copyIndexStmt(const IndexStmt *from)
        return newnode;
 }
 
+static CreateStatsStmt *
+_copyCreateStatsStmt(const CreateStatsStmt *from)
+{
+       CreateStatsStmt *newnode = makeNode(CreateStatsStmt);
+
+       COPY_NODE_FIELD(defnames);
+       COPY_NODE_FIELD(relation);
+       COPY_NODE_FIELD(keys);
+       COPY_NODE_FIELD(options);
+       COPY_SCALAR_FIELD(if_not_exists);
+
+       return newnode;
+}
+
 static CreateFunctionStmt *
 _copyCreateFunctionStmt(const CreateFunctionStmt *from)
 {
@@ -5050,6 +5064,9 @@ copyObject(const void *from)
                case T_IndexStmt:
                        retval = _copyIndexStmt(from);
                        break;
+               case T_CreateStatsStmt:
+                       retval = _copyCreateStatsStmt(from);
+                       break;
                case T_CreateFunctionStmt:
                        retval = _copyCreateFunctionStmt(from);
                        break;
index 0d12636d92cf86a2255af7bb4b890a45a15d238f..5941b7a2bfbe4c18426d8a35de11a1c240150d32 100644 (file)
@@ -1334,6 +1334,18 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b)
        return true;
 }
 
+static bool
+_equalCreateStatsStmt(const CreateStatsStmt *a, const CreateStatsStmt *b)
+{
+       COMPARE_NODE_FIELD(defnames);
+       COMPARE_NODE_FIELD(relation);
+       COMPARE_NODE_FIELD(keys);
+       COMPARE_NODE_FIELD(options);
+       COMPARE_SCALAR_FIELD(if_not_exists);
+
+       return true;
+}
+
 static bool
 _equalCreateFunctionStmt(const CreateFunctionStmt *a, const CreateFunctionStmt *b)
 {
@@ -3236,6 +3248,9 @@ equal(const void *a, const void *b)
                case T_IndexStmt:
                        retval = _equalIndexStmt(a, b);
                        break;
+               case T_CreateStatsStmt:
+                       retval = _equalCreateStatsStmt(a, b);
+                       break;
                case T_CreateFunctionStmt:
                        retval = _equalCreateFunctionStmt(a, b);
                        break;
index 1b9005fa537c985c4e112dc581dcfc9940525500..541af029353382d50b19884a178fbe74ad76dcc2 100644 (file)
@@ -2202,6 +2202,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
        WRITE_NODE_FIELD(lateral_vars);
        WRITE_BITMAPSET_FIELD(lateral_referencers);
        WRITE_NODE_FIELD(indexlist);
+       WRITE_NODE_FIELD(statlist);
        WRITE_UINT_FIELD(pages);
        WRITE_FLOAT_FIELD(tuples, "%.0f");
        WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
@@ -2274,6 +2275,18 @@ _outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node)
                appendStringInfo(str, " %d", list_length(node->rinfos[i]));
 }
 
+static void
+_outStatisticExtInfo(StringInfo str, const StatisticExtInfo *node)
+{
+       WRITE_NODE_TYPE("STATISTICEXTINFO");
+
+       /* NB: this isn't a complete set of fields */
+       WRITE_OID_FIELD(statOid);
+       /* don't write rel, leads to infinite recursion in plan tree dump */
+       WRITE_CHAR_FIELD(kind);
+       WRITE_BITMAPSET_FIELD(keys);
+}
+
 static void
 _outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
 {
@@ -2577,6 +2590,18 @@ _outIndexStmt(StringInfo str, const IndexStmt *node)
        WRITE_BOOL_FIELD(if_not_exists);
 }
 
+static void
+_outCreateStatsStmt(StringInfo str, const CreateStatsStmt *node)
+{
+       WRITE_NODE_TYPE("CREATESTATSSTMT");
+
+       WRITE_NODE_FIELD(defnames);
+       WRITE_NODE_FIELD(relation);
+       WRITE_NODE_FIELD(keys);
+       WRITE_NODE_FIELD(options);
+       WRITE_BOOL_FIELD(if_not_exists);
+}
+
 static void
 _outNotifyStmt(StringInfo str, const NotifyStmt *node)
 {
@@ -3936,6 +3961,9 @@ outNode(StringInfo str, const void *obj)
                        case T_PlannerParamItem:
                                _outPlannerParamItem(str, obj);
                                break;
+                       case T_StatisticExtInfo:
+                               _outStatisticExtInfo(str, obj);
+                               break;
 
                        case T_ExtensibleNode:
                                _outExtensibleNode(str, obj);
@@ -3953,6 +3981,9 @@ outNode(StringInfo str, const void *obj)
                        case T_IndexStmt:
                                _outIndexStmt(str, obj);
                                break;
+                       case T_CreateStatsStmt:
+                               _outCreateStatsStmt(str, obj);
+                               break;
                        case T_NotifyStmt:
                                _outNotifyStmt(str, obj);
                                break;
index 463f806467827f77181c69eca5bf5604edb72d38..cc88dcc28e4a837ed6e6e075ae2bc1f00a9d26a7 100644 (file)
@@ -29,6 +29,7 @@
 #include "catalog/heap.h"
 #include "catalog/partition.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_statistic_ext.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "parser/parse_relation.h"
 #include "parser/parsetree.h"
 #include "rewrite/rewriteManip.h"
+#include "statistics/statistics.h"
 #include "storage/bufmgr.h"
+#include "utils/builtins.h"
 #include "utils/lsyscache.h"
+#include "utils/syscache.h"
 #include "utils/rel.h"
 #include "utils/snapmgr.h"
 
@@ -63,7 +67,7 @@ static List *get_relation_constraints(PlannerInfo *root,
                                                 bool include_notnull);
 static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
                                  Relation heapRelation);
-
+static List *get_relation_statistics(RelOptInfo *rel, Relation relation);
 
 /*
  * get_relation_info -
@@ -398,6 +402,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 
        rel->indexlist = indexinfos;
 
+       rel->statlist = get_relation_statistics(rel, relation);
+
        /* Grab foreign-table info using the relcache, while we have it */
        if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
        {
@@ -1251,6 +1257,65 @@ get_relation_constraints(PlannerInfo *root,
        return result;
 }
 
+/*
+ * get_relation_statistics
+ *             Retrieve extended statistics defined on the table.
+ *
+ * Returns a List (possibly empty) of StatisticExtInfo objects describing
+ * the statistics.  Note that this doesn't load the actual statistics data,
+ * just the identifying metadata.  Only stats actually built are considered.
+ */
+static List *
+get_relation_statistics(RelOptInfo *rel, Relation relation)
+{
+       List       *statoidlist;
+       List       *stainfos = NIL;
+       ListCell   *l;
+
+       statoidlist = RelationGetStatExtList(relation);
+
+       foreach(l, statoidlist)
+       {
+               Oid                     statOid = lfirst_oid(l);
+               Form_pg_statistic_ext staForm;
+               HeapTuple       htup;
+               Bitmapset  *keys = NULL;
+               int                     i;
+
+               htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
+               if (!htup)
+                       elog(ERROR, "cache lookup failed for statistics %u", statOid);
+               staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
+
+               /*
+                * First, build the array of columns covered.  This is ultimately
+                * wasted if no stats are actually built, but it doesn't seem worth
+                * troubling over that case.
+                */
+               for (i = 0; i < staForm->stakeys.dim1; i++)
+                       keys = bms_add_member(keys, staForm->stakeys.values[i]);
+
+               /* add one StatisticExtInfo for each kind built */
+               if (statext_is_kind_built(htup, STATS_EXT_NDISTINCT))
+               {
+                       StatisticExtInfo *info = makeNode(StatisticExtInfo);
+
+                       info->statOid = statOid;
+                       info->rel = rel;
+                       info->kind = STATS_EXT_NDISTINCT;
+                       info->keys = bms_copy(keys);
+
+                       stainfos = lcons(info, stainfos);
+               }
+
+               ReleaseSysCache(htup);
+               bms_free(keys);
+       }
+
+       list_free(statoidlist);
+
+       return stainfos;
+}
 
 /*
  * relation_excluded_by_constraints
index 82844a0399d7ad6da9fd7df51cb32877eef9d2a4..bbcfc1fb4fdb50791efbf8f3b353b4dc16923065 100644 (file)
@@ -257,7 +257,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
                ConstraintsSetStmt CopyStmt CreateAsStmt CreateCastStmt
                CreateDomainStmt CreateExtensionStmt CreateGroupStmt CreateOpClassStmt
                CreateOpFamilyStmt AlterOpFamilyStmt CreatePLangStmt
-               CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt
+               CreateSchemaStmt CreateSeqStmt CreateStmt CreateStatsStmt CreateTableSpaceStmt
                CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt
                CreateAssertStmt CreateTransformStmt CreateTrigStmt CreateEventTrigStmt
                CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatePolicyStmt
@@ -874,6 +874,7 @@ stmt :
                        | CreateSeqStmt
                        | CreateStmt
                        | CreateSubscriptionStmt
+                       | CreateStatsStmt
                        | CreateTableSpaceStmt
                        | CreateTransformStmt
                        | CreateTrigStmt
@@ -3747,6 +3748,35 @@ OptConsTableSpace:   USING INDEX TABLESPACE name { $$ = $4; }
 ExistingIndex:   USING INDEX index_name                                { $$ = $3; }
                ;
 
+/*****************************************************************************
+ *
+ *             QUERY :
+ *                             CREATE STATISTICS stats_name WITH (options) ON (columns) FROM relname
+ *
+ *****************************************************************************/
+
+
+CreateStatsStmt:       CREATE STATISTICS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name
+                                               {
+                                                       CreateStatsStmt *n = makeNode(CreateStatsStmt);
+                                                       n->defnames = $3;
+                                                       n->relation = $10;
+                                                       n->keys = $7;
+                                                       n->options = $4;
+                                                       n->if_not_exists = false;
+                                                       $$ = (Node *)n;
+                                               }
+                                       | CREATE STATISTICS IF_P NOT EXISTS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name
+                                               {
+                                                       CreateStatsStmt *n = makeNode(CreateStatsStmt);
+                                                       n->defnames = $6;
+                                                       n->relation = $13;
+                                                       n->keys = $10;
+                                                       n->options = $7;
+                                                       n->if_not_exists = true;
+                                                       $$ = (Node *)n;
+                                               }
+                       ;
 
 /*****************************************************************************
  *
@@ -6042,6 +6072,7 @@ drop_type_any_name:
                        | FOREIGN TABLE                                                 { $$ = OBJECT_FOREIGN_TABLE; }
                        | COLLATION                                                             { $$ = OBJECT_COLLATION; }
                        | CONVERSION_P                                                  { $$ = OBJECT_CONVERSION; }
+                       | STATISTICS                                                    { $$ = OBJECT_STATISTIC_EXT; }
                        | TEXT_P SEARCH PARSER                                  { $$ = OBJECT_TSPARSER; }
                        | TEXT_P SEARCH DICTIONARY                              { $$ = OBJECT_TSDICTIONARY; }
                        | TEXT_P SEARCH TEMPLATE                                { $$ = OBJECT_TSTEMPLATE; }
@@ -6119,7 +6150,7 @@ opt_restart_seqs:
  *                 EXTENSION | EVENT TRIGGER | FOREIGN DATA WRAPPER |
  *                 FOREIGN TABLE | INDEX | [PROCEDURAL] LANGUAGE |
  *                 MATERIALIZED VIEW | POLICY | ROLE | SCHEMA | SEQUENCE |
- *                 SERVER | TABLE | TABLESPACE |
+ *                 SERVER | STATISTICS | TABLE | TABLESPACE |
  *                 TEXT SEARCH CONFIGURATION | TEXT SEARCH DICTIONARY |
  *                 TEXT SEARCH PARSER | TEXT SEARCH TEMPLATE | TYPE |
  *                 VIEW] <objname> |
@@ -6288,6 +6319,7 @@ comment_type_any_name:
                        COLUMN                                                          { $$ = OBJECT_COLUMN; }
                        | INDEX                                                         { $$ = OBJECT_INDEX; }
                        | SEQUENCE                                                      { $$ = OBJECT_SEQUENCE; }
+                       | STATISTICS                                            { $$ = OBJECT_STATISTIC_EXT; }
                        | TABLE                                                         { $$ = OBJECT_TABLE; }
                        | VIEW                                                          { $$ = OBJECT_VIEW; }
                        | MATERIALIZED VIEW                                     { $$ = OBJECT_MATVIEW; }
@@ -8428,6 +8460,15 @@ RenameStmt: ALTER AGGREGATE aggregate_with_argtypes RENAME TO name
                                        n->missing_ok = false;
                                        $$ = (Node *)n;
                                }
+                       | ALTER STATISTICS any_name RENAME TO name
+                               {
+                                       RenameStmt *n = makeNode(RenameStmt);
+                                       n->renameType = OBJECT_STATISTIC_EXT;
+                                       n->object = (Node *) $3;
+                                       n->newname = $6;
+                                       n->missing_ok = false;
+                                       $$ = (Node *)n;
+                               }
                        | ALTER TEXT_P SEARCH PARSER any_name RENAME TO name
                                {
                                        RenameStmt *n = makeNode(RenameStmt);
@@ -8643,6 +8684,15 @@ AlterObjectSchemaStmt:
                                        n->missing_ok = true;
                                        $$ = (Node *)n;
                                }
+                       | ALTER STATISTICS any_name SET SCHEMA name
+                               {
+                                       AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt);
+                                       n->objectType = OBJECT_STATISTIC_EXT;
+                                       n->object = (Node *) $3;
+                                       n->newschema = $6;
+                                       n->missing_ok = false;
+                                       $$ = (Node *)n;
+                               }
                        | ALTER TEXT_P SEARCH PARSER any_name SET SCHEMA name
                                {
                                        AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt);
@@ -8906,6 +8956,14 @@ AlterOwnerStmt: ALTER AGGREGATE aggregate_with_argtypes OWNER TO RoleSpec
                                        n->newowner = $6;
                                        $$ = (Node *)n;
                                }
+                       | ALTER STATISTICS any_name OWNER TO RoleSpec
+                               {
+                                       AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
+                                       n->objectType = OBJECT_STATISTIC_EXT;
+                                       n->object = (Node *) $3;
+                                       n->newowner = $6;
+                                       $$ = (Node *)n;
+                               }
                        | ALTER TEXT_P SEARCH DICTIONARY any_name OWNER TO RoleSpec
                                {
                                        AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
diff --git a/src/backend/statistics/Makefile b/src/backend/statistics/Makefile
new file mode 100644 (file)
index 0000000..b3615bd
--- /dev/null
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for statistics
+#
+# IDENTIFICATION
+#    src/backend/statistics/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/statistics
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = extended_stats.o mvdistinct.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/statistics/README b/src/backend/statistics/README
new file mode 100644 (file)
index 0000000..beb7c24
--- /dev/null
@@ -0,0 +1,34 @@
+Extended statistics
+===================
+
+When estimating various quantities (e.g. condition selectivities) the default
+approach relies on the assumption of independence. In practice that's often
+not true, resulting in estimation errors.
+
+Extended statistics track different types of dependencies between the columns,
+hopefully improving the estimates and producing better plans.
+
+Currently we only have one type of extended statistics - ndistinct
+coefficients, and we use it to improve estimates of grouping queries. See
+README.ndistinct for details.
+
+
+Size of sample in ANALYZE
+-------------------------
+When performing ANALYZE, the number of rows to sample is determined as
+
+    (300 * statistics_target)
+
+That works reasonably well for statistics on individual columns, but perhaps
+it's not enough for extended statistics. Papers analyzing estimation errors
+all use samples proportional to the table (usually finding that 1-3% of the
+table is enough to build accurate stats).
+
+The requested accuracy (number of MCV items or histogram bins) should also
+be considered when determining the sample size, and in extended statistics
+those are not necessarily limited by statistics_target.
+
+This however merits further discussion, because collecting the sample is quite
+expensive and increasing it further would make ANALYZE even more painful.
+Judging by the experiments with the current implementation, the fixed size
+seems to work reasonably well for now, so we leave this as a future work.
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
new file mode 100644 (file)
index 0000000..d2b9f6a
--- /dev/null
@@ -0,0 +1,389 @@
+/*-------------------------------------------------------------------------
+ *
+ * extended_stats.c
+ *       POSTGRES extended statistics
+ *
+ * Generic code supporting statistic objects created via CREATE STATISTICS.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       src/backend/statistics/extended_stats.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_statistic_ext.h"
+#include "nodes/relation.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+/*
+ * Used internally to refer to an individual pg_statistic_ext entry.
+ */
+typedef struct StatExtEntry
+{
+       Oid                     statOid;        /* OID of pg_statistic_ext entry */
+       Bitmapset  *columns;    /* attribute numbers covered by the statistics */
+       List       *types;              /* 'char' list of enabled statistic kinds */
+} StatExtEntry;
+
+
+static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
+static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+                                         int natts, VacAttrStats **vacattrstats);
+static void statext_store(Relation pg_stext, Oid relid,
+                         MVNDistinct *ndistinct,
+                         VacAttrStats **stats);
+
+
+/*
+ * Compute requested extended stats, using the rows sampled for the plain
+ * (single-column) stats.
+ *
+ * This fetches a list of stats from pg_statistic_ext, computes the stats
+ * and serializes them back into the catalog (as bytea values).
+ */
+void
+BuildRelationExtStatistics(Relation onerel, double totalrows,
+                                                  int numrows, HeapTuple *rows,
+                                                  int natts, VacAttrStats **vacattrstats)
+{
+       Relation        pg_stext;
+       ListCell   *lc;
+       List       *stats;
+
+       pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
+       stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
+
+       foreach(lc, stats)
+       {
+               StatExtEntry   *stat = (StatExtEntry *) lfirst(lc);
+               MVNDistinct        *ndistinct = NULL;
+               VacAttrStats  **stats;
+               ListCell           *lc2;
+
+               /* filter only the interesting vacattrstats records */
+               stats = lookup_var_attr_stats(onerel, stat->columns,
+                                                                         natts, vacattrstats);
+
+               /* check allowed number of dimensions */
+               Assert(bms_num_members(stat->columns) >= 2 &&
+                          bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
+
+               /* compute statistic of each type */
+               foreach(lc2, stat->types)
+               {
+                       char    t = (char) lfirst_int(lc2);
+
+                       if (t == STATS_EXT_NDISTINCT)
+                               ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
+                                                                                                       stat->columns, stats);
+               }
+
+               /* store the statistics in the catalog */
+               statext_store(pg_stext, stat->statOid, ndistinct, stats);
+       }
+
+       heap_close(pg_stext, RowExclusiveLock);
+}
+
+/*
+ * statext_is_kind_built
+ *             Is this stat kind built in the given pg_statistic_ext tuple?
+ */
+bool
+statext_is_kind_built(HeapTuple htup, char type)
+{
+       AttrNumber  attnum;
+
+       switch (type)
+       {
+               case STATS_EXT_NDISTINCT:
+                       attnum = Anum_pg_statistic_ext_standistinct;
+                       break;
+
+               default:
+                       elog(ERROR, "unexpected statistics type requested: %d", type);
+       }
+
+       return !heap_attisnull(htup, attnum);
+}
+
+/*
+ * Return a list (of StatExtEntry) of statistics for the given relation.
+ */
+static List *
+fetch_statentries_for_relation(Relation pg_statext, Oid relid)
+{
+       SysScanDesc scan;
+       ScanKeyData skey;
+       HeapTuple   htup;
+       List       *result = NIL;
+
+       /*
+        * Prepare to scan pg_statistic_ext for entries having indrelid = this
+        * rel.
+        */
+       ScanKeyInit(&skey,
+                               Anum_pg_statistic_ext_starelid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(relid));
+
+       scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
+                                                         NULL, 1, &skey);
+
+       while (HeapTupleIsValid(htup = systable_getnext(scan)))
+       {
+               StatExtEntry *entry;
+               Datum           datum;
+               bool            isnull;
+               int                     i;
+               ArrayType  *arr;
+               char       *enabled;
+               Form_pg_statistic_ext staForm;
+
+               entry = palloc0(sizeof(StatExtEntry));
+               entry->statOid = HeapTupleGetOid(htup);
+               staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
+               for (i = 0; i < staForm->stakeys.dim1; i++)
+               {
+                       entry->columns = bms_add_member(entry->columns,
+                                                                                       staForm->stakeys.values[i]);
+               }
+
+               /* decode the staenabled char array into a list of chars */
+               datum = SysCacheGetAttr(STATEXTOID, htup,
+                                                               Anum_pg_statistic_ext_staenabled, &isnull);
+               Assert(!isnull);
+               arr = DatumGetArrayTypeP(datum);
+               if (ARR_NDIM(arr) != 1 ||
+                       ARR_HASNULL(arr) ||
+                       ARR_ELEMTYPE(arr) != CHAROID)
+                       elog(ERROR, "staenabled is not a 1-D char array");
+               enabled = (char *) ARR_DATA_PTR(arr);
+               for (i = 0; i < ARR_DIMS(arr)[0]; i++)
+               {
+                       Assert(enabled[i] == STATS_EXT_NDISTINCT);
+                       entry->types = lappend_int(entry->types, (int) enabled[i]);
+               }
+
+               result = lappend(result, entry);
+       }
+
+       systable_endscan(scan);
+
+       return result;
+}
+
+/*
+ * Using 'vacattrstats' of size 'natts' as input data, return a newly built
+ * VacAttrStats array which includes only the items corresponding to attributes
+ * indicated by 'attrs'.
+ */
+static VacAttrStats **
+lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts,
+                                         VacAttrStats **vacattrstats)
+{
+       int                     i = 0;
+       int                     x = -1;
+       VacAttrStats **stats;
+       Bitmapset  *matched = NULL;
+
+       stats = (VacAttrStats **)
+               palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
+
+       /* lookup VacAttrStats info for the requested columns (same attnum) */
+       while ((x = bms_next_member(attrs, x)) >= 0)
+       {
+               int             j;
+
+               stats[i] = NULL;
+               for (j = 0; j < natts; j++)
+               {
+                       if (x == vacattrstats[j]->tupattnum)
+                       {
+                               stats[i] = vacattrstats[j];
+                               break;
+                       }
+               }
+
+               if (!stats[i])
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("extended statistics could not be collected for column \"%s\" of relation %s.%s",
+                                                       NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname),
+                                                       get_namespace_name(rel->rd_rel->relnamespace),
+                                                       RelationGetRelationName(rel)),
+                                        errhint("Consider ALTER TABLE \"%s\".\"%s\" ALTER \"%s\" SET STATISTICS -1",
+                                                        get_namespace_name(rel->rd_rel->relnamespace),
+                                                        RelationGetRelationName(rel),
+                                                        NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname))));
+
+               /*
+                * Check that we found a non-dropped column and that the attnum
+                * matches.
+                */
+               Assert(!stats[i]->attr->attisdropped);
+               matched = bms_add_member(matched, stats[i]->tupattnum);
+
+               i++;
+       }
+       if (bms_subset_compare(matched, attrs) != BMS_EQUAL)
+               elog(ERROR, "could not find all attributes in attribute stats array");
+       bms_free(matched);
+
+       return stats;
+}
+
+/*
+ * statext_store
+ *     Serializes the statistics and stores them into the pg_statistic_ext tuple.
+ */
+static void
+statext_store(Relation pg_stext, Oid statOid,
+                         MVNDistinct *ndistinct,
+                         VacAttrStats **stats)
+{
+       HeapTuple       stup,
+                               oldtup;
+       Datum           values[Natts_pg_statistic_ext];
+       bool            nulls[Natts_pg_statistic_ext];
+       bool            replaces[Natts_pg_statistic_ext];
+
+       memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool));
+       memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool));
+       memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum));
+
+       /*
+        * Construct a new pg_statistic_ext tuple, replacing the calculated stats.
+        */
+       if (ndistinct != NULL)
+       {
+               bytea      *data = statext_ndistinct_serialize(ndistinct);
+
+               nulls[Anum_pg_statistic_ext_standistinct - 1] = (data == NULL);
+               values[Anum_pg_statistic_ext_standistinct - 1] = PointerGetDatum(data);
+       }
+
+       /* always replace the value (either by bytea or NULL) */
+       replaces[Anum_pg_statistic_ext_standistinct - 1] = true;
+
+       /* there should already be a pg_statistic_ext tuple */
+       oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
+       if (!HeapTupleIsValid(oldtup))
+               elog(ERROR, "cache lookup failed for extended statistics %u", statOid);
+
+       /* replace it */
+       stup = heap_modify_tuple(oldtup,
+                                                        RelationGetDescr(pg_stext),
+                                                        values,
+                                                        nulls,
+                                                        replaces);
+       ReleaseSysCache(oldtup);
+       CatalogTupleUpdate(pg_stext, &stup->t_self, stup);
+
+       heap_freetuple(stup);
+}
+
+/* initialize multi-dimensional sort */
+MultiSortSupport
+multi_sort_init(int ndims)
+{
+       MultiSortSupport mss;
+
+       Assert(ndims >= 2);
+
+       mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
+                                                                        +sizeof(SortSupportData) * ndims);
+
+       mss->ndims = ndims;
+
+       return mss;
+}
+
+/*
+ * Prepare sort support info using the given sort operator
+ * at the position 'sortdim'
+ */
+void
+multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper)
+{
+       SortSupport             ssup = &mss->ssup[sortdim];
+
+       ssup->ssup_cxt = CurrentMemoryContext;
+       ssup->ssup_collation = DEFAULT_COLLATION_OID;
+       ssup->ssup_nulls_first = false;
+       ssup->ssup_cxt = CurrentMemoryContext;
+
+       PrepareSortSupportFromOrderingOp(oper, ssup);
+}
+
+/* compare all the dimensions in the selected order */
+int
+multi_sort_compare(const void *a, const void *b, void *arg)
+{
+       MultiSortSupport mss = (MultiSortSupport) arg;
+       SortItem   *ia = (SortItem *) a;
+       SortItem   *ib = (SortItem *) b;
+       int                     i;
+
+       for (i = 0; i < mss->ndims; i++)
+       {
+               int                     compare;
+
+               compare = ApplySortComparator(ia->values[i], ia->isnull[i],
+                                                                         ib->values[i], ib->isnull[i],
+                                                                         &mss->ssup[i]);
+
+               if (compare != 0)
+                       return compare;
+       }
+
+       /* equal by default */
+       return 0;
+}
+
+/* compare selected dimension */
+int
+multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
+                                          MultiSortSupport mss)
+{
+       return ApplySortComparator(a->values[dim], a->isnull[dim],
+                                                          b->values[dim], b->isnull[dim],
+                                                          &mss->ssup[dim]);
+}
+
+int
+multi_sort_compare_dims(int start, int end,
+                                               const SortItem *a, const SortItem *b,
+                                               MultiSortSupport mss)
+{
+       int                     dim;
+
+       for (dim = start; dim <= end; dim++)
+       {
+               int                     r = ApplySortComparator(a->values[dim], a->isnull[dim],
+                                                                                       b->values[dim], b->isnull[dim],
+                                                                                       &mss->ssup[dim]);
+
+               if (r != 0)
+                       return r;
+       }
+
+       return 0;
+}
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
new file mode 100644 (file)
index 0000000..5df4e29
--- /dev/null
@@ -0,0 +1,671 @@
+/*-------------------------------------------------------------------------
+ *
+ * mvdistinct.c
+ *       POSTGRES multivariate ndistinct coefficients
+ *
+ * Estimating number of groups in a combination of columns (e.g. for GROUP BY)
+ * is tricky, and the estimation error is often significant.
+
+ * The multivariate ndistinct coefficients address this by storing ndistinct
+ * estimates for combinations of the user-specified columns.  So for example
+ * given a statistics object on three columns (a,b,c), this module estimates
+ * and store n-distinct for (a,b), (a,c), (b,c) and (a,b,c).  The per-column
+ * estimates are already available in pg_statistic.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       src/backend/statistics/mvdistinct.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_statistic_ext.h"
+#include "utils/fmgrprotos.h"
+#include "utils/lsyscache.h"
+#include "lib/stringinfo.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+
+
+static double ndistinct_for_combination(double totalrows, int numrows,
+                                       HeapTuple *rows, VacAttrStats **stats,
+                                       int k, int *combination);
+static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
+static int     n_choose_k(int n, int k);
+static int     num_combinations(int n);
+
+/* Combination generator API */
+
+/* internal state for generator of k-combinations of n elements */
+typedef struct CombinationGenerator
+{
+       int             k;                              /* size of the combination */
+       int             n;                              /* total number of elements */
+       int             current;                /* index of the next combination to return */
+       int             ncombinations;  /* number of combinations (size of array) */
+       int        *combinations;       /* array of pre-built combinations */
+} CombinationGenerator;
+
+static CombinationGenerator *generator_init(int n, int k);
+static void generator_free(CombinationGenerator *state);
+static int *generator_next(CombinationGenerator *state);
+static void generate_combinations(CombinationGenerator *state);
+
+
+/*
+ * statext_ndistinct_build
+ *             Compute ndistinct coefficient for the combination of attributes.
+ *
+ * This computes the ndistinct estimate using the same estimator used
+ * in analyze.c and then computes the coefficient.
+ */
+MVNDistinct *
+statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
+                                               Bitmapset *attrs, VacAttrStats **stats)
+{
+       MVNDistinct *result;
+       int                     k;
+       int                     itemcnt;
+       int                     numattrs = bms_num_members(attrs);
+       int                     numcombs = num_combinations(numattrs);
+
+       result = palloc(offsetof(MVNDistinct, items) +
+                                       numcombs * sizeof(MVNDistinctItem));
+       result->magic = STATS_NDISTINCT_MAGIC;
+       result->type = STATS_NDISTINCT_TYPE_BASIC;
+       result->nitems = numcombs;
+
+       itemcnt = 0;
+       for (k = 2; k <= numattrs; k++)
+       {
+               int        *combination;
+               CombinationGenerator *generator;
+
+               /* generate combinations of K out of N elements */
+               generator = generator_init(numattrs, k);
+
+               while ((combination = generator_next(generator)))
+               {
+                       MVNDistinctItem *item = &result->items[itemcnt];
+                       int             j;
+
+                       item->attrs = NULL;
+                       for (j = 0; j < k; j++)
+                               item->attrs = bms_add_member(item->attrs,
+                                                                                        stats[combination[j]]->attr->attnum);
+                       item->ndistinct =
+                               ndistinct_for_combination(totalrows, numrows, rows,
+                                                                                 stats, k, combination);
+
+                       itemcnt++;
+                       Assert(itemcnt <= result->nitems);
+               }
+
+               generator_free(generator);
+       }
+
+       /* must consume exactly the whole output array */
+       Assert(itemcnt == result->nitems);
+
+       return result;
+}
+
+/*
+ * statext_ndistinct_load
+ *             Load the ndistinct value for the indicated pg_statistic_ext tuple
+ */
+MVNDistinct *
+statext_ndistinct_load(Oid mvoid)
+{
+       bool            isnull = false;
+       Datum           ndist;
+       HeapTuple       htup;
+
+       htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid));
+       if (!htup)
+               elog(ERROR, "cache lookup failed for statistics %u", mvoid);
+
+       ndist = SysCacheGetAttr(STATEXTOID, htup,
+                                                       Anum_pg_statistic_ext_standistinct, &isnull);
+       if (isnull)
+               elog(ERROR,
+                        "requested statistic kind %c not yet built for statistics %u",
+                        STATS_EXT_NDISTINCT, mvoid);
+
+       ReleaseSysCache(htup);
+
+       return statext_ndistinct_deserialize(DatumGetByteaP(ndist));
+}
+
+/*
+ * statext_ndistinct_serialize
+ *             serialize ndistinct to the on-disk bytea format
+ */
+bytea *
+statext_ndistinct_serialize(MVNDistinct *ndistinct)
+{
+       int                     i;
+       bytea      *output;
+       char       *tmp;
+       Size            len;
+
+       Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
+       Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
+
+       /*
+        * Base size is base struct size, plus one base struct for each items,
+        * including number of items for each.
+        */
+       len = VARHDRSZ + offsetof(MVNDistinct, items) +
+               ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + sizeof(int));
+
+       /* and also include space for the actual attribute numbers */
+       for (i = 0; i < ndistinct->nitems; i++)
+       {
+               int             nmembers;
+
+               nmembers = bms_num_members(ndistinct->items[i].attrs);
+               Assert(nmembers >= 2);
+               len += sizeof(AttrNumber) * nmembers;
+       }
+
+       output = (bytea *) palloc(len);
+       SET_VARSIZE(output, len);
+
+       tmp = VARDATA(output);
+
+       /* Store the base struct values */
+       memcpy(tmp, ndistinct, offsetof(MVNDistinct, items));
+       tmp += offsetof(MVNDistinct, items);
+
+       /*
+        * store number of attributes and attribute numbers for each ndistinct
+        * entry
+        */
+       for (i = 0; i < ndistinct->nitems; i++)
+       {
+               MVNDistinctItem item = ndistinct->items[i];
+               int             nmembers = bms_num_members(item.attrs);
+               int             x;
+
+               memcpy(tmp, &item.ndistinct, sizeof(double));
+               tmp += sizeof(double);
+               memcpy(tmp, &nmembers, sizeof(int));
+               tmp += sizeof(int);
+
+               x = -1;
+               while ((x = bms_next_member(item.attrs, x)) >= 0)
+               {
+                       AttrNumber      value = (AttrNumber) x;
+
+                       memcpy(tmp, &value, sizeof(AttrNumber));
+                       tmp += sizeof(AttrNumber);
+               }
+
+               Assert(tmp <= ((char *) output + len));
+       }
+
+       return output;
+}
+
+/*
+ * statext_ndistinct_deserialize
+ *             Read an on-disk bytea format MVNDistinct to in-memory format
+ */
+MVNDistinct *
+statext_ndistinct_deserialize(bytea *data)
+{
+       int                     i;
+       Size            expected_size;
+       MVNDistinct *ndistinct;
+       char       *tmp;
+
+       if (data == NULL)
+               return NULL;
+
+       if (VARSIZE_ANY_EXHDR(data) < offsetof(MVNDistinct, items))
+               elog(ERROR, "invalid MVNDistinct size %ld (expected at least %ld)",
+                        VARSIZE_ANY_EXHDR(data), offsetof(MVNDistinct, items));
+
+       /* read the MVNDistinct header */
+       ndistinct = (MVNDistinct *) palloc(sizeof(MVNDistinct));
+
+       /* initialize pointer to the data part (skip the varlena header) */
+       tmp = VARDATA_ANY(data);
+
+       /* get the header and perform basic sanity checks */
+       memcpy(ndistinct, tmp, offsetof(MVNDistinct, items));
+       tmp += offsetof(MVNDistinct, items);
+
+       if (ndistinct->magic != STATS_NDISTINCT_MAGIC)
+               elog(ERROR, "invalid ndistinct magic %d (expected %d)",
+                        ndistinct->magic, STATS_NDISTINCT_MAGIC);
+
+       if (ndistinct->type != STATS_NDISTINCT_TYPE_BASIC)
+               elog(ERROR, "invalid ndistinct type %d (expected %d)",
+                        ndistinct->type, STATS_NDISTINCT_TYPE_BASIC);
+
+       Assert(ndistinct->nitems > 0);
+
+       /* what minimum bytea size do we expect for those parameters */
+       expected_size = offsetof(MVNDistinct, items) +
+               ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) +
+                                                        sizeof(AttrNumber) * 2);
+
+       if (VARSIZE_ANY_EXHDR(data) < expected_size)
+               elog(ERROR, "invalid dependencies size %ld (expected at least %ld)",
+                        VARSIZE_ANY_EXHDR(data), expected_size);
+
+       /* allocate space for the ndistinct items */
+       ndistinct = repalloc(ndistinct, offsetof(MVNDistinct, items) +
+                                                (ndistinct->nitems * sizeof(MVNDistinctItem)));
+
+       for (i = 0; i < ndistinct->nitems; i++)
+       {
+               MVNDistinctItem *item = &ndistinct->items[i];
+               int                     nelems;
+
+               item->attrs = NULL;
+
+               /* ndistinct value */
+               memcpy(&item->ndistinct, tmp, sizeof(double));
+               tmp += sizeof(double);
+
+               /* number of attributes */
+               memcpy(&nelems, tmp, sizeof(int));
+               tmp += sizeof(int);
+               Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS));
+
+               while (nelems-- > 0)
+               {
+                       AttrNumber      attno;
+
+                       memcpy(&attno, tmp, sizeof(AttrNumber));
+                       tmp += sizeof(AttrNumber);
+                       item->attrs = bms_add_member(item->attrs, attno);
+               }
+
+               /* still within the bytea */
+               Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
+       }
+
+       /* we should have consumed the whole bytea exactly */
+       Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
+
+       return ndistinct;
+}
+
+/*
+ * pg_ndistinct_in
+ *             input routine for type pg_ndistinct
+ *
+ * pg_ndistinct is real enough to be a table column, but it has no
+ * operations of its own, and disallows input (jus like pg_node_tree).
+ */
+Datum
+pg_ndistinct_in(PG_FUNCTION_ARGS)
+{
+       ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("cannot accept a value of type %s", "pg_ndistinct")));
+
+       PG_RETURN_VOID();                       /* keep compiler quiet */
+}
+
+/*
+ * pg_ndistinct
+ *             output routine for type pg_ndistinct
+ *
+ * Produces a human-readable representation of the value.
+ */
+Datum
+pg_ndistinct_out(PG_FUNCTION_ARGS)
+{
+       bytea      *data = PG_GETARG_BYTEA_PP(0);
+       MVNDistinct *ndist = statext_ndistinct_deserialize(data);
+       int                     i;
+       StringInfoData str;
+
+       initStringInfo(&str);
+       appendStringInfoChar(&str, '[');
+
+       for (i = 0; i < ndist->nitems; i++)
+       {
+               MVNDistinctItem item = ndist->items[i];
+
+               if (i > 0)
+                       appendStringInfoString(&str, ", ");
+
+               appendStringInfoChar(&str, '{');
+               outBitmapset(&str, item.attrs);
+               appendStringInfo(&str, ", %f}", item.ndistinct);
+       }
+
+       appendStringInfoChar(&str, ']');
+
+       PG_RETURN_CSTRING(str.data);
+}
+
+/*
+ * pg_ndistinct_recv
+ *             binary input routine for type pg_ndistinct
+ */
+Datum
+pg_ndistinct_recv(PG_FUNCTION_ARGS)
+{
+       ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("cannot accept a value of type %s", "pg_ndistinct")));
+
+       PG_RETURN_VOID();                       /* keep compiler quiet */
+}
+
+/*
+ * pg_ndistinct_send
+ *             binary output routine for type pg_ndistinct
+ *
+ * n-distinct is serialized into a bytea value, so let's send that.
+ */
+Datum
+pg_ndistinct_send(PG_FUNCTION_ARGS)
+{
+       return byteasend(fcinfo);
+}
+
+/*
+ * ndistinct_for_combination
+ *             Estimates number of distinct values in a combination of columns.
+ *
+ * This uses the same ndistinct estimator as compute_scalar_stats() in
+ * ANALYZE, i.e.,
+ *             n*d / (n - f1 + f1*n/N)
+ *
+ * except that instead of values in a single column we are dealing with
+ * combination of multiple columns.
+ */
+static double
+ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
+                                                 VacAttrStats **stats, int k, int *combination)
+{
+       int                     i,
+                               j;
+       int                     f1,
+                               cnt,
+                               d;
+       bool       *isnull;
+       Datum      *values;
+       SortItem   *items;
+       MultiSortSupport mss;
+
+       mss = multi_sort_init(k);
+
+       /*
+        * In order to determine the number of distinct elements, create separate
+        * values[]/isnull[] arrays with all the data we have, then sort them
+        * using the specified column combination as dimensions.  We could try to
+        * sort in place, but it'd probably be more complex and bug-prone.
+        */
+       items = (SortItem *) palloc(numrows * sizeof(SortItem));
+       values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
+       isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
+
+       for (i = 0; i < numrows; i++)
+       {
+               items[i].values = &values[i * k];
+               items[i].isnull = &isnull[i * k];
+       }
+
+       /*
+        * For each dimension, set up sort-support and fill in the values from
+        * the sample data.
+        */
+       for (i = 0; i < k; i++)
+       {
+               VacAttrStats   *colstat = stats[combination[i]];
+               TypeCacheEntry *type;
+
+               type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
+               if (type->lt_opr == InvalidOid)         /* shouldn't happen */
+                       elog(ERROR, "cache lookup failed for ordering operator for type %u",
+                                colstat->attrtypid);
+
+               /* prepare the sort function for this dimension */
+               multi_sort_add_dimension(mss, i, type->lt_opr);
+
+               /* accumulate all the data for this dimension into the arrays */
+               for (j = 0; j < numrows; j++)
+               {
+                       items[j].values[i] =
+                               heap_getattr(rows[j],
+                                                        colstat->attr->attnum,
+                                                        colstat->tupDesc,
+                                                        &items[j].isnull[i]);
+               }
+       }
+
+       /* We can sort the array now ... */
+       qsort_arg((void *) items, numrows, sizeof(SortItem),
+                         multi_sort_compare, mss);
+
+       /* ... and count the number of distinct combinations */
+
+       f1 = 0;
+       cnt = 1;
+       d = 1;
+       for (i = 1; i < numrows; i++)
+       {
+               if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
+               {
+                       if (cnt == 1)
+                               f1 += 1;
+
+                       d++;
+                       cnt = 0;
+               }
+
+               cnt += 1;
+       }
+
+       if (cnt == 1)
+               f1 += 1;
+
+       return estimate_ndistinct(totalrows, numrows, d, f1);
+}
+
+/* The Duj1 estimator (already used in analyze.c). */
+static double
+estimate_ndistinct(double totalrows, int numrows, int d, int f1)
+{
+       double          numer,
+                               denom,
+                               ndistinct;
+
+       numer = (double) numrows * (double) d;
+
+       denom = (double) (numrows - f1) +
+               (double) f1 *(double) numrows / totalrows;
+
+       ndistinct = numer / denom;
+
+       /* Clamp to sane range in case of roundoff error */
+       if (ndistinct < (double) d)
+               ndistinct = (double) d;
+
+       if (ndistinct > totalrows)
+               ndistinct = totalrows;
+
+       return floor(ndistinct + 0.5);
+}
+
+/*
+ * n_choose_k
+ *             computes binomial coefficients using an algorithm that is both
+ *             efficient and prevents overflows
+ */
+static int
+n_choose_k(int n, int k)
+{
+       int                     d,
+                               r;
+
+       Assert((k > 0) && (n >= k));
+
+       /* use symmetry of the binomial coefficients */
+       k = Min(k, n - k);
+
+       r = 1;
+       for (d = 1; d <= k; ++d)
+       {
+               r *= n--;
+               r /= d;
+       }
+
+       return r;
+}
+
+/*
+ * num_combinations
+ *             number of combinations, excluding single-value combinations
+ */
+static int
+num_combinations(int n)
+{
+       int                     k;
+       int                     ncombs = 1;
+
+       for (k = 1; k <= n; k++)
+               ncombs *= 2;
+
+       ncombs -= (n + 1);
+
+       return ncombs;
+}
+
+/*
+ * generator_init
+ *             initialize the generator of combinations
+ *
+ * The generator produces combinations of K elements in the interval (0..N).
+ * We prebuild all the combinations in this method, which is simpler than
+ * generating them on the fly.
+ */
+static CombinationGenerator *
+generator_init(int n, int k)
+{
+       CombinationGenerator *state;
+
+       Assert((n >= k) && (k > 0));
+
+       /* allocate the generator state as a single chunk of memory */
+       state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator));
+
+       state->ncombinations = n_choose_k(n, k);
+
+       /* pre-allocate space for all combinations*/
+       state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
+
+       state->current = 0;
+       state->k = k;
+       state->n = n;
+
+       /* now actually pre-generate all the combinations of K elements */
+       generate_combinations(state);
+
+       /* make sure we got the expected number of combinations */
+       Assert(state->current == state->ncombinations);
+
+       /* reset the number, so we start with the first one */
+       state->current = 0;
+
+       return state;
+}
+
+/*
+ * generator_next
+ *             returns the next combination from the prebuilt list
+ *
+ * Returns a combination of K array indexes (0 .. N), as specified to
+ * generator_init), or NULL when there are no more combination.
+ */
+static int *
+generator_next(CombinationGenerator *state)
+{
+       if (state->current == state->ncombinations)
+               return NULL;
+
+       return &state->combinations[state->k * state->current++];
+}
+
+/*
+ * generator_free
+ *             free the internal state of the generator
+ *
+ * Releases the generator internal state (pre-built combinations).
+ */
+static void
+generator_free(CombinationGenerator *state)
+{
+       pfree(state->combinations);
+       pfree(state);
+}
+
+/*
+ * generate_combinations_recurse
+ *             given a prefix, generate all possible combinations
+ *
+ * Given a prefix (first few elements of the combination), generate following
+ * elements recursively. We generate the combinations in lexicographic order,
+ * which eliminates permutations of the same combination.
+ */
+static void
+generate_combinations_recurse(CombinationGenerator *state,
+                                                         int index, int start, int *current)
+{
+       /* If we haven't filled all the elements, simply recurse. */
+       if (index < state->k)
+       {
+               int             i;
+
+               /*
+                * The values have to be in ascending order, so make sure we start
+                * with the value passed by parameter.
+                */
+
+               for (i = start; i < state->n; i++)
+               {
+                       current[index] = i;
+                       generate_combinations_recurse(state, (index + 1), (i + 1), current);
+               }
+
+               return;
+       }
+       else
+       {
+               /* we got a valid combination, add it to the array */
+               memcpy(&state->combinations[(state->k * state->current)],
+                          current, state->k * sizeof(int));
+               state->current++;
+       }
+}
+
+/*
+ * generate_combinations
+ *             generate all k-combinations of N elements
+ */
+static void
+generate_combinations(CombinationGenerator *state)
+{
+       int        *current = (int *) palloc0(sizeof(int) * state->k);
+
+       generate_combinations_recurse(state, 0, 0, current);
+
+       pfree(current);
+}
index c8d20fffeafc4ce301136815cbe11f0acca29e2e..b59821bf97835a83c76c67621af070f4f497e343 100644 (file)
@@ -1623,6 +1623,10 @@ ProcessUtilitySlow(ParseState *pstate,
                                commandCollected = true;
                                break;
 
+                       case T_CreateStatsStmt:
+                               address = CreateStatistics((CreateStatsStmt *) parsetree);
+                               break;
+
                        case T_AlterCollationStmt:
                                address = AlterCollation((AlterCollationStmt *) parsetree);
                                break;
@@ -1992,6 +1996,8 @@ AlterObjectTypeCommandTag(ObjectType objtype)
                        break;
                case OBJECT_SUBSCRIPTION:
                        tag = "ALTER SUBSCRIPTION";
+               case OBJECT_STATISTIC_EXT:
+                       tag = "ALTER STATISTICS";
                        break;
                default:
                        tag = "???";
@@ -2286,6 +2292,8 @@ CreateCommandTag(Node *parsetree)
                                        break;
                                case OBJECT_PUBLICATION:
                                        tag = "DROP PUBLICATION";
+                               case OBJECT_STATISTIC_EXT:
+                                       tag = "DROP STATISTICS";
                                        break;
                                default:
                                        tag = "???";
@@ -2689,6 +2697,10 @@ CreateCommandTag(Node *parsetree)
                        tag = "EXECUTE";
                        break;
 
+               case T_CreateStatsStmt:
+                       tag = "CREATE STATISTICS";
+                       break;
+
                case T_DeallocateStmt:
                        {
                                DeallocateStmt *stmt = (DeallocateStmt *) parsetree;
index 5c823250bc21d4b054c205b76c7cf265ae8240ad..81c91039e40925a155c5b4794e5b61516a2523b4 100644 (file)
@@ -35,6 +35,7 @@
 #include "catalog/pg_operator.h"
 #include "catalog/pg_partitioned_table.h"
 #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_trigger.h"
 #include "catalog/pg_type.h"
 #include "commands/defrem.h"
@@ -317,6 +318,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
                                           const Oid *excludeOps,
                                           bool attrsOnly, bool showTblSpc,
                                           int prettyFlags, bool missing_ok);
+static char *pg_get_statisticsext_worker(Oid statextid, bool missing_ok);
 static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags,
                                                 bool attrsOnly);
 static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
@@ -1421,6 +1423,85 @@ pg_get_indexdef_worker(Oid indexrelid, int colno,
        return buf.data;
 }
 
+/*
+ * pg_get_statisticsextdef
+ *             Get the definition of an extended statistics object
+ */
+Datum
+pg_get_statisticsextdef(PG_FUNCTION_ARGS)
+{
+       Oid                     statextid = PG_GETARG_OID(0);
+       char       *res;
+
+       res = pg_get_statisticsext_worker(statextid, true);
+
+       if (res == NULL)
+               PG_RETURN_NULL();
+
+       PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/*
+ * Internal workhorse to decompile an extended statistics object.
+ */
+static char *
+pg_get_statisticsext_worker(Oid statextid, bool missing_ok)
+{
+       Form_pg_statistic_ext   statextrec;
+       Form_pg_class                   pgclassrec;
+       HeapTuple       statexttup;
+       HeapTuple       pgclasstup;
+       StringInfoData buf;
+       int                     colno;
+
+       statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
+
+       if (!HeapTupleIsValid(statexttup))
+       {
+               if (missing_ok)
+                       return NULL;
+               elog(ERROR, "cache lookup failed for extended statistics %u", statextid);
+       }
+
+       statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup);
+
+       pgclasstup = SearchSysCache1(RELOID, ObjectIdGetDatum(statextrec->starelid));
+
+       if (!HeapTupleIsValid(statexttup))
+       {
+               ReleaseSysCache(statexttup);
+               elog(ERROR, "cache lookup failed for relation %u", statextrec->starelid);
+       }
+
+       pgclassrec = (Form_pg_class) GETSTRUCT(pgclasstup);
+
+       initStringInfo(&buf);
+
+       appendStringInfo(&buf, "CREATE STATISTICS %s ON (",
+                                                       quote_identifier(NameStr(statextrec->staname)));
+
+       for (colno = 0; colno < statextrec->stakeys.dim1; colno++)
+       {
+               AttrNumber      attnum = statextrec->stakeys.values[colno];
+               char       *attname;
+
+               if (colno > 0)
+                       appendStringInfoString(&buf, ", ");
+
+               attname = get_relid_attribute_name(statextrec->starelid, attnum);
+
+               appendStringInfoString(&buf, quote_identifier(attname));
+       }
+
+       appendStringInfo(&buf, ") FROM %s",
+                                                       quote_identifier(NameStr(pgclassrec->relname)));
+
+       ReleaseSysCache(statexttup);
+       ReleaseSysCache(pgclasstup);
+
+       return buf.data;
+}
+
 /*
  * pg_get_partkeydef
  *
index f8b28fe0e612da3376cd95f6848f84de25fc5d39..cc24c8aeb56ef087bacb271f6fc0cc7257ad2d76 100644 (file)
 #include "catalog/pg_operator.h"
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_type.h"
 #include "executor/executor.h"
 #include "mb/pg_wchar.h"
 #include "parser/parse_clause.h"
 #include "parser/parse_coerce.h"
 #include "parser/parsetree.h"
+#include "statistics/statistics.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
 #include "utils/date.h"
@@ -164,6 +166,8 @@ static double eqjoinsel_inner(Oid operator,
 static double eqjoinsel_semi(Oid operator,
                           VariableStatData *vardata1, VariableStatData *vardata2,
                           RelOptInfo *inner_rel);
+static bool estimate_multivariate_ndistinct(PlannerInfo *root,
+                          RelOptInfo *rel, List **varinfos, double *ndistinct);
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                                  Datum lobound, Datum hibound, Oid boundstypid,
                                  double *scaledlobound, double *scaledhibound);
@@ -3398,25 +3402,25 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
        {
                GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
                RelOptInfo *rel = varinfo1->rel;
-               double          reldistinct = varinfo1->ndistinct;
+               double          reldistinct = 1;
                double          relmaxndistinct = reldistinct;
                int                     relvarcount = 1;
                List       *newvarinfos = NIL;
+               List       *relvarinfos = NIL;
 
                /*
-                * Get the product of numdistinct estimates of the Vars for this rel.
-                * Also, construct new varinfos list of remaining Vars.
+                * Split the list of varinfos in two - one for the current rel,
+                * one for remaining Vars on other rels.
                 */
+               relvarinfos = lcons(varinfo1, relvarinfos);
                for_each_cell(l, lnext(list_head(varinfos)))
                {
                        GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
 
                        if (varinfo2->rel == varinfo1->rel)
                        {
-                               reldistinct *= varinfo2->ndistinct;
-                               if (relmaxndistinct < varinfo2->ndistinct)
-                                       relmaxndistinct = varinfo2->ndistinct;
-                               relvarcount++;
+                               /* varinfos on current rel */
+                               relvarinfos = lcons(varinfo2, relvarinfos);
                        }
                        else
                        {
@@ -3425,6 +3429,43 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
                        }
                }
 
+               /*
+                * Get the numdistinct estimate for the Vars of this rel.  We
+                * iteratively search for multivariate n-distinct with maximum number
+                * of vars; assuming that each var group is independent of the others,
+                * we multiply them together.  Any remaining relvarinfos after
+                * no more multivariate matches are found are assumed independent too,
+                * so their individual ndistinct estimates are multiplied also.
+                */
+               while (relvarinfos)
+               {
+                       double          mvndistinct;
+
+                       if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
+                                                                                               &mvndistinct))
+                       {
+                               reldistinct *= mvndistinct;
+                               if (relmaxndistinct < mvndistinct)
+                                       relmaxndistinct = mvndistinct;
+                               relvarcount++;  /* inaccurate, but doesn't matter */
+                       }
+                       else
+                       {
+                               foreach (l, relvarinfos)
+                               {
+                                       GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+
+                                       reldistinct *= varinfo2->ndistinct;
+                                       if (relmaxndistinct < varinfo2->ndistinct)
+                                               relmaxndistinct = varinfo2->ndistinct;
+                                       relvarcount++;
+                               }
+
+                               /* we're done with this relation */
+                               relvarinfos = NIL;
+                       }
+               }
+
                /*
                 * Sanity check --- don't divide by zero if empty relation.
                 */
@@ -3667,6 +3708,132 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
  *-------------------------------------------------------------------------
  */
 
+/*
+ * Find applicable ndistinct statistics for the given list of VarInfos (which
+ * must all belong to the given rel), and update *ndistinct to the estimate of
+ * the MVNDistinctItem that best matches.  If a match it found, *varinfos is
+ * updated to remove the list of matched varinfos.
+ *
+ * Varinfos that aren't for simple Vars are ignored.
+ *
+ * Return TRUE if we're able to find a match, FALSE otherwise.
+ */
+static bool
+estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
+                                                               List **varinfos, double *ndistinct)
+{
+       ListCell   *lc;
+       Bitmapset  *attnums = NULL;
+       int                     nmatches;
+       Oid                     statOid = InvalidOid;
+       MVNDistinct *stats;
+       Bitmapset  *matched = NULL;
+
+       /* bail out immediately if the table has no extended statistics */
+       if (!rel->statlist)
+               return false;
+
+       /* Determine the attnums we're looking for */
+       foreach(lc, *varinfos)
+       {
+               GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+
+               Assert(varinfo->rel == rel);
+
+               if (IsA(varinfo->var, Var))
+               {
+                       attnums = bms_add_member(attnums,
+                                                                        ((Var *) varinfo->var)->varattno);
+               }
+       }
+
+       /* look for the ndistinct statistics matching the most vars */
+       nmatches = 1; /* we require at least two matches */
+       foreach(lc, rel->statlist)
+       {
+               StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+               Bitmapset  *shared;
+
+               /* skip statistics of other kinds */
+               if (info->kind != STATS_EXT_NDISTINCT)
+                       continue;
+
+               /* compute attnums shared by the vars and the statistic */
+               shared = bms_intersect(info->keys, attnums);
+
+               /*
+                * Does this statistics matches more columns than the currently
+                * best statistic?  If so, use this one instead.
+                *
+                * XXX This should break ties using name of the statistic, or
+                * something like that, to make the outcome stable.
+                */
+               if (bms_num_members(shared) > nmatches)
+               {
+                       statOid = info->statOid;
+                       nmatches = bms_num_members(shared);
+                       matched = shared;
+               }
+       }
+
+       /* No match? */
+       if (statOid == InvalidOid)
+               return false;
+       Assert(nmatches > 1 && matched != NULL);
+
+       stats = statext_ndistinct_load(statOid);
+
+       /*
+        * If we have a match, search it for the specific item that matches (there
+        * must be one), and construct the output values.
+        */
+       if (stats)
+       {
+               int             i;
+               List   *newlist = NIL;
+               MVNDistinctItem *item = NULL;
+
+               /* Find the specific item that exactly matches the combination */
+               for (i = 0; i < stats->nitems; i++)
+               {
+                       MVNDistinctItem *tmpitem = &stats->items[i];
+
+                       if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
+                       {
+                               item = tmpitem;
+                               break;
+                       }
+               }
+
+               /* make sure we found an item */
+               if (!item)
+                       elog(ERROR, "corrupt MVNDistinct entry");
+
+               /* Form the output varinfo list, keeping only unmatched ones */
+               foreach(lc, *varinfos)
+               {
+                       GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+                       AttrNumber      attnum;
+
+                       if (!IsA(varinfo->var, Var))
+                       {
+                               newlist = lappend(newlist, varinfo);
+                               continue;
+                       }
+
+                       attnum = ((Var *) varinfo->var)->varattno;
+                       if (!bms_is_member(attnum, matched))
+                               newlist = lappend(newlist, varinfo);
+               }
+
+               *varinfos = newlist;
+               *ndistinct = item->ndistinct;
+               return true;
+       }
+
+       return false;
+}
+
 /*
  * convert_to_scalar
  *       Convert non-NULL values of the indicated types to the comparison
index ce55fc5277746a54399fb955b568894e4548f1b6..a6b60c67caaf391784935443dbef6103c9958e12 100644 (file)
@@ -56,6 +56,7 @@
 #include "catalog/pg_publication.h"
 #include "catalog/pg_rewrite.h"
 #include "catalog/pg_shseclabel.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_trigger.h"
@@ -4451,6 +4452,82 @@ RelationGetIndexList(Relation relation)
        return result;
 }
 
+/*
+ * RelationGetStatExtList
+ *             get a list of OIDs of extended statistics on this relation
+ *
+ * The statistics list is created only if someone requests it, in a way
+ * similar to RelationGetIndexList().  We scan pg_statistic_ext to find
+ * relevant statistics, and add the list to the relcache entry so that we
+ * won't have to compute it again.  Note that shared cache inval of a
+ * relcache entry will delete the old list and set rd_statvalid to 0,
+ * so that we must recompute the statistics list on next request.  This
+ * handles creation or deletion of a statistic.
+ *
+ * The returned list is guaranteed to be sorted in order by OID, although
+ * this is not currently needed.
+ *
+ * Since shared cache inval causes the relcache's copy of the list to go away,
+ * we return a copy of the list palloc'd in the caller's context.  The caller
+ * may list_free() the returned list after scanning it. This is necessary
+ * since the caller will typically be doing syscache lookups on the relevant
+ * statistics, and syscache lookup could cause SI messages to be processed!
+ */
+List *
+RelationGetStatExtList(Relation relation)
+{
+       Relation        indrel;
+       SysScanDesc indscan;
+       ScanKeyData skey;
+       HeapTuple       htup;
+       List       *result;
+       List       *oldlist;
+       MemoryContext oldcxt;
+
+       /* Quick exit if we already computed the list. */
+       if (relation->rd_statvalid != 0)
+               return list_copy(relation->rd_statlist);
+
+       /*
+        * We build the list we intend to return (in the caller's context) while
+        * doing the scan.  After successfully completing the scan, we copy that
+        * list into the relcache entry.  This avoids cache-context memory leakage
+        * if we get some sort of error partway through.
+        */
+       result = NIL;
+
+       /* Prepare to scan pg_statistic_ext for entries having starelid = this rel. */
+       ScanKeyInit(&skey,
+                               Anum_pg_statistic_ext_starelid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(RelationGetRelid(relation)));
+
+       indrel = heap_open(StatisticExtRelationId, AccessShareLock);
+       indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
+                                                                NULL, 1, &skey);
+
+       while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+               /* TODO maybe include only already built statistics? */
+               result = insert_ordered_oid(result, HeapTupleGetOid(htup));
+
+       systable_endscan(indscan);
+
+       heap_close(indrel, AccessShareLock);
+
+       /* Now save a copy of the completed list in the relcache entry. */
+       oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+       oldlist = relation->rd_statlist;
+       relation->rd_statlist = list_copy(result);
+
+       relation->rd_statvalid = true;
+       MemoryContextSwitchTo(oldcxt);
+
+       /* Don't leak the old list, if there is one */
+       list_free(oldlist);
+
+       return result;
+}
+
 /*
  * insert_ordered_oid
  *             Insert a new Oid into a sorted list of Oids, preserving ordering
@@ -5560,6 +5637,8 @@ load_relcache_init_file(bool shared)
                rel->rd_pkattr = NULL;
                rel->rd_idattr = NULL;
                rel->rd_pubactions = NULL;
+               rel->rd_statvalid = false;
+               rel->rd_statlist = NIL;
                rel->rd_createSubid = InvalidSubTransactionId;
                rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
                rel->rd_amcache = NULL;
index d5a376406fee37810d1817dee0cb87151146d7cf..d8c823f42b529fbf9ced2964fa7ff01ddbca7be9 100644 (file)
@@ -61,6 +61,7 @@
 #include "catalog/pg_shseclabel.h"
 #include "catalog/pg_replication_origin.h"
 #include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_subscription_rel.h"
 #include "catalog/pg_tablespace.h"
@@ -726,6 +727,28 @@ static const struct cachedesc cacheinfo[] = {
                },
                32
        },
+       {StatisticExtRelationId,        /* STATEXTNAMENSP */
+               StatisticExtNameIndexId,
+               2,
+               {
+                       Anum_pg_statistic_ext_staname,
+                       Anum_pg_statistic_ext_stanamespace,
+                       0,
+                       0
+               },
+               4
+       },
+       {StatisticExtRelationId,        /* STATEXTOID */
+               StatisticExtOidIndexId,
+               1,
+               {
+                       ObjectIdAttributeNumber,
+                       0,
+                       0,
+                       0
+               },
+               4
+       },
        {StatisticRelationId,           /* STATRELATTINH */
                StatisticRelidAttnumInhIndexId,
                3,
index 89530a9f0fbe6cdeb06979a7e5ab192e69f7c454..e2bc3576dc34fe1b91abc66b01bdf3beb9d5835a 100644 (file)
@@ -272,6 +272,10 @@ getSchemaData(Archive *fout, int *numTablesPtr)
                write_msg(NULL, "reading indexes\n");
        getIndexes(fout, tblinfo, numTables);
 
+       if (g_verbose)
+               write_msg(NULL, "reading extended statistics\n");
+       getExtendedStatistics(fout, tblinfo, numTables);
+
        if (g_verbose)
                write_msg(NULL, "reading constraints\n");
        getConstraints(fout, tblinfo, numTables);
index dd0892539a3ccf00634901b3c2a1e918e4068255..f77581d6ec9b9879465501d405f590f4866c3db9 100644 (file)
@@ -3540,7 +3540,8 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass)
                                 strcmp(te->desc, "TRIGGER") == 0 ||
                                 strcmp(te->desc, "ROW SECURITY") == 0 ||
                                 strcmp(te->desc, "POLICY") == 0 ||
-                                strcmp(te->desc, "USER MAPPING") == 0)
+                                strcmp(te->desc, "USER MAPPING") == 0 ||
+                                strcmp(te->desc, "STATISTICS") == 0)
                {
                        /* these object types don't have separate owners */
                }
index b3d95d7f6ee3a3fa50a900b8131f8727f3900fd1..ba34cc163e97a1534499c5864a50289c88c97c68 100644 (file)
@@ -192,6 +192,7 @@ static void dumpAttrDef(Archive *fout, AttrDefInfo *adinfo);
 static void dumpSequence(Archive *fout, TableInfo *tbinfo);
 static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo);
 static void dumpIndex(Archive *fout, IndxInfo *indxinfo);
+static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo);
 static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo);
 static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo);
 static void dumpTSParser(Archive *fout, TSParserInfo *prsinfo);
@@ -6582,6 +6583,99 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
        destroyPQExpBuffer(query);
 }
 
+/*
+ * getExtendedStatistics
+ *       get information about extended statistics on a dumpable table
+ *       or materialized view.
+ *
+ * Note: extended statistics data is not returned directly to the caller, but
+ * it does get entered into the DumpableObject tables.
+ */
+void
+getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables)
+{
+       int                             i,
+                                       j;
+       PQExpBuffer             query;
+       PGresult           *res;
+       StatsExtInfo   *statsextinfo;
+       int                             ntups;
+       int                             i_tableoid;
+       int                             i_oid;
+       int                             i_staname;
+       int                             i_stadef;
+
+       /* Extended statistics were new in v10 */
+       if (fout->remoteVersion < 100000)
+               return;
+
+       query = createPQExpBuffer();
+
+       for (i = 0; i < numTables; i++)
+       {
+               TableInfo  *tbinfo = &tblinfo[i];
+
+               /* Only plain tables and materialized views can have extended statistics. */
+               if (tbinfo->relkind != RELKIND_RELATION &&
+                       tbinfo->relkind != RELKIND_MATVIEW)
+                       continue;
+
+               /*
+                * Ignore extended statistics of tables whose definitions are not to
+                * be dumped.
+                */
+               if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION))
+                       continue;
+
+               if (g_verbose)
+                       write_msg(NULL, "reading extended statistics for table \"%s.%s\"\n",
+                                         tbinfo->dobj.namespace->dobj.name,
+                                         tbinfo->dobj.name);
+
+               /* Make sure we are in proper schema so stadef is right */
+               selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name);
+
+               resetPQExpBuffer(query);
+
+               appendPQExpBuffer(query,
+                                                 "SELECT "
+                                                       "tableoid, "
+                                                       "oid, "
+                                                       "staname, "
+                                                 "pg_catalog.pg_get_statisticsextdef(oid) AS stadef "
+                                                 "FROM pg_statistic_ext "
+                                                 "WHERE starelid = '%u' "
+                                                 "ORDER BY staname", tbinfo->dobj.catId.oid);
+
+               res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+               ntups = PQntuples(res);
+
+               i_tableoid = PQfnumber(res, "tableoid");
+               i_oid = PQfnumber(res, "oid");
+               i_staname = PQfnumber(res, "staname");
+               i_stadef = PQfnumber(res, "stadef");
+
+               statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo));
+
+               for (j = 0; j < ntups; j++)
+               {
+                       statsextinfo[j].dobj.objType = DO_STATSEXT;
+                       statsextinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid));
+                       statsextinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid));
+                       AssignDumpId(&statsextinfo[j].dobj);
+                       statsextinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_staname));
+                       statsextinfo[j].dobj.namespace = tbinfo->dobj.namespace;
+                       statsextinfo[j].statsexttable = tbinfo;
+                       statsextinfo[j].statsextdef = pg_strdup(PQgetvalue(res, j, i_stadef));
+               }
+
+               PQclear(res);
+       }
+
+       destroyPQExpBuffer(query);
+}
+
 /*
  * getConstraints
  *
@@ -9234,6 +9328,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
                case DO_INDEX:
                        dumpIndex(fout, (IndxInfo *) dobj);
                        break;
+               case DO_STATSEXT:
+                       dumpStatisticsExt(fout, (StatsExtInfo *) dobj);
+                       break;
                case DO_REFRESH_MATVIEW:
                        refreshMatViewData(fout, (TableDataInfo *) dobj);
                        break;
@@ -15728,6 +15825,61 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo)
        destroyPQExpBuffer(labelq);
 }
 
+/*
+ * dumpStatisticsExt
+ *       write out to fout an extended statistics object
+ */
+static void
+dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo)
+{
+       DumpOptions *dopt = fout->dopt;
+       TableInfo  *tbinfo = statsextinfo->statsexttable;
+       PQExpBuffer q;
+       PQExpBuffer delq;
+       PQExpBuffer labelq;
+
+       if (dopt->dataOnly)
+               return;
+
+       q = createPQExpBuffer();
+       delq = createPQExpBuffer();
+       labelq = createPQExpBuffer();
+
+       appendPQExpBuffer(labelq, "STATISTICS %s",
+                                         fmtId(statsextinfo->dobj.name));
+
+       appendPQExpBuffer(q, "%s;\n", statsextinfo->statsextdef);
+
+       appendPQExpBuffer(delq, "DROP STATISTICS %s.",
+                                                 fmtId(tbinfo->dobj.namespace->dobj.name));
+       appendPQExpBuffer(delq, "%s;\n",
+                                                 fmtId(statsextinfo->dobj.name));
+
+       if (statsextinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)
+                       ArchiveEntry(fout, statsextinfo->dobj.catId,
+                                                statsextinfo->dobj.dumpId,
+                                                statsextinfo->dobj.name,
+                                                tbinfo->dobj.namespace->dobj.name,
+                                                NULL,
+                                                tbinfo->rolname, false,
+                                                "STATISTICS", SECTION_POST_DATA,
+                                                q->data, delq->data, NULL,
+                                                NULL, 0,
+                                                NULL, NULL);
+
+       /* Dump Statistics Comments */
+       if (statsextinfo->dobj.dump & DUMP_COMPONENT_COMMENT)
+               dumpComment(fout, labelq->data,
+                                       tbinfo->dobj.namespace->dobj.name,
+                                       tbinfo->rolname,
+                                       statsextinfo->dobj.catId, 0,
+                                       statsextinfo->dobj.dumpId);
+
+       destroyPQExpBuffer(q);
+       destroyPQExpBuffer(delq);
+       destroyPQExpBuffer(labelq);
+}
+
 /*
  * dumpConstraint
  *       write out to fout a user-defined constraint
@@ -17266,6 +17418,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
                                addObjectDependency(postDataBound, dobj->dumpId);
                                break;
                        case DO_INDEX:
+                       case DO_STATSEXT:
                        case DO_REFRESH_MATVIEW:
                        case DO_TRIGGER:
                        case DO_EVENT_TRIGGER:
index a466527ec685dba205761bc5b13e5fa3f3c3f1e2..cb22f63bd6ac7309e9542d58083d4158fb66d459 100644 (file)
@@ -56,6 +56,7 @@ typedef enum
        DO_TABLE,
        DO_ATTRDEF,
        DO_INDEX,
+       DO_STATSEXT,
        DO_RULE,
        DO_TRIGGER,
        DO_CONSTRAINT,
@@ -362,6 +363,13 @@ typedef struct _indxInfo
        int                     relpages;               /* relpages of the underlying table */
 } IndxInfo;
 
+typedef struct _statsExtInfo
+{
+       DumpableObject dobj;
+       TableInfo  *statsexttable;      /* link to table the stats ext is for */
+       char       *statsextdef;
+} StatsExtInfo;
+
 typedef struct _ruleInfo
 {
        DumpableObject dobj;
@@ -682,6 +690,7 @@ extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables);
 extern InhInfo *getInherits(Archive *fout, int *numInherits);
 extern PartInfo *getPartitions(Archive *fout, int *numPartitions);
 extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables);
+extern void getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables);
 extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables);
 extern RuleInfo *getRules(Archive *fout, int *numRules);
 extern void getTriggers(Archive *fout, TableInfo tblinfo[], int numTables);
index e555de885721c88af38b4f2dc045ae14f0f0cf5c..5c19b05ca486730f83018ed405507b19b10a9363 100644 (file)
@@ -53,10 +53,11 @@ static const int dbObjectTypePriority[] =
        18,                                                     /* DO_TABLE */
        20,                                                     /* DO_ATTRDEF */
        28,                                                     /* DO_INDEX */
-       29,                                                     /* DO_RULE */
-       30,                                                     /* DO_TRIGGER */
+       29,                                                     /* DO_STATSEXT */
+       30,                                                     /* DO_RULE */
+       31,                                                     /* DO_TRIGGER */
        27,                                                     /* DO_CONSTRAINT */
-       31,                                                     /* DO_FK_CONSTRAINT */
+       32,                                                     /* DO_FK_CONSTRAINT */
        2,                                                      /* DO_PROCLANG */
        10,                                                     /* DO_CAST */
        23,                                                     /* DO_TABLE_DATA */
@@ -1291,6 +1292,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize)
                                         "INDEX %s  (ID %d OID %u)",
                                         obj->name, obj->dumpId, obj->catId.oid);
                        return;
+               case DO_STATSEXT:
+                       snprintf(buf, bufsize,
+                                        "STATISTICS %s  (ID %d OID %u)",
+                                        obj->name, obj->dumpId, obj->catId.oid);
+                       return;
                case DO_REFRESH_MATVIEW:
                        snprintf(buf, bufsize,
                                         "REFRESH MATERIALIZED VIEW %s  (ID %d OID %u)",
index 8c583127fdd351aa0acf166a46cfac47b91560f1..3cf1742020f7a62a6caafb82aa3a315ed3a6c150 100644 (file)
@@ -2320,6 +2320,57 @@ describeOneTableDetails(const char *schemaname,
                        PQclear(result);
                }
 
+               /* print any extended statistics */
+               if (pset.sversion >= 100000)
+               {
+                       printfPQExpBuffer(&buf,
+                                                         "SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n"
+                                                         "  (SELECT pg_catalog.string_agg(pg_catalog.quote_ident(attname::text),', ') \n"
+                                                  "    FROM ((SELECT pg_catalog.unnest(stakeys) AS attnum) s\n"
+                          "         JOIN pg_catalog.pg_attribute a ON (starelid = a.attrelid AND\n"
+                                                         "a.attnum = s.attnum AND not attisdropped))) AS columns,\n"
+                                                         "  (staenabled::char[] @> '{d}'::char[]) AS ndist_enabled\n"
+                         "FROM pg_catalog.pg_statistic_ext stat WHERE starelid  = '%s'\n"
+                         "ORDER BY 1;",
+                                                         oid);
+
+                       result = PSQLexec(buf.data);
+                       if (!result)
+                               goto error_return;
+                       else
+                               tuples = PQntuples(result);
+
+                       if (tuples > 0)
+                       {
+                               printTableAddFooter(&cont, _("Statistics:"));
+
+                               for (i = 0; i < tuples; i++)
+                               {
+                                       int             cnt = 0;
+
+                                       printfPQExpBuffer(&buf, "    ");
+
+                                       /* statistics name (qualified with namespace) */
+                                       appendPQExpBuffer(&buf, "\"%s.%s\" WITH (",
+                                                                         PQgetvalue(result, i, 1),
+                                                                         PQgetvalue(result, i, 2));
+
+                                       /* options */
+                                       if (strcmp(PQgetvalue(result, i, 5), "t") == 0)
+                                       {
+                                               appendPQExpBufferStr(&buf, "ndistinct");
+                                               cnt++;
+                                       }
+
+                                       appendPQExpBuffer(&buf, ") ON (%s)",
+                                                                         PQgetvalue(result, i, 4));
+
+                                       printTableAddFooter(&cont, buf.data);
+                               }
+                       }
+                       PQclear(result);
+               }
+
                /* print rules */
                if (tableinfo.hasrules && tableinfo.relkind != RELKIND_MATVIEW)
                {
index c9c9a18777466d8815b2a89435bf2baf555dbe7f..b8fa18ae2ea024842202fb5bf278d1daf40350ab 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201703241
+#define CATALOG_VERSION_NO     201703242
 
 #endif
index 10759c7c58d0b02be095653e68ffa17b72e57361..9effbce2f115242342ced72955b0ba06b96ca7c4 100644 (file)
@@ -147,6 +147,7 @@ typedef enum ObjectClass
        OCLASS_REWRITE,                         /* pg_rewrite */
        OCLASS_TRIGGER,                         /* pg_trigger */
        OCLASS_SCHEMA,                          /* pg_namespace */
+       OCLASS_STATISTIC_EXT,           /* pg_statistic_ext */
        OCLASS_TSPARSER,                        /* pg_ts_parser */
        OCLASS_TSDICT,                          /* pg_ts_dict */
        OCLASS_TSTEMPLATE,                      /* pg_ts_template */
index 1187797fd9ef3a90e951432ee1475053b9496a46..473fe177ba4778998bc76a2a61869c7e316c74b7 100644 (file)
@@ -119,6 +119,7 @@ extern void RemoveAttrDefault(Oid relid, AttrNumber attnum,
                                  DropBehavior behavior, bool complain, bool internal);
 extern void RemoveAttrDefaultById(Oid attrdefId);
 extern void RemoveStatistics(Oid relid, AttrNumber attnum);
+extern void RemoveStatisticsExt(Oid relid, AttrNumber attnum);
 
 extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno,
                                                  bool relhasoids);
index 5d4190c05eba824dde8ee3a06b42612d4374951e..a7266860ceb0923cccf9e36918af6e8c36072cae 100644 (file)
@@ -182,6 +182,13 @@ DECLARE_UNIQUE_INDEX(pg_largeobject_loid_pn_index, 2683, on pg_largeobject using
 DECLARE_UNIQUE_INDEX(pg_largeobject_metadata_oid_index, 2996, on pg_largeobject_metadata using btree(oid oid_ops));
 #define LargeObjectMetadataOidIndexId  2996
 
+DECLARE_UNIQUE_INDEX(pg_statistic_ext_oid_index, 3380, on pg_statistic_ext using btree(oid oid_ops));
+#define StatisticExtOidIndexId 3380
+DECLARE_UNIQUE_INDEX(pg_statistic_ext_name_index, 3997, on pg_statistic_ext using btree(staname name_ops, stanamespace oid_ops));
+#define StatisticExtNameIndexId 3997
+DECLARE_INDEX(pg_statistic_ext_relid_index, 3379, on pg_statistic_ext using btree(starelid oid_ops));
+#define StatisticExtRelidIndexId 3379
+
 DECLARE_UNIQUE_INDEX(pg_namespace_nspname_index, 2684, on pg_namespace using btree(nspname name_ops));
 #define NamespaceNameIndexId  2684
 DECLARE_UNIQUE_INDEX(pg_namespace_oid_index, 2685, on pg_namespace using btree(oid oid_ops));
index dbeb25b1ac17b65257a6cd2bcce09d2f0678afc2..35e0e2b089b7811c69daa60346a489d3e4dcc962 100644 (file)
@@ -141,6 +141,8 @@ extern Oid  get_collation_oid(List *collname, bool missing_ok);
 extern Oid     get_conversion_oid(List *conname, bool missing_ok);
 extern Oid     FindDefaultConversionProc(int32 for_encoding, int32 to_encoding);
 
+extern Oid     get_statistics_oid(List *names, bool missing_ok);
+
 /* initialization & transaction cleanup code */
 extern void InitializeSearchPath(void);
 extern void AtEOXact_Namespace(bool isCommit, bool parallel);
index ce8dc59e5af709bc01c5c16fe0657fd66b719dc0..bc5d28a4fac6f27540ec4a47f817654013e15a3d 100644 (file)
@@ -254,6 +254,10 @@ DATA(insert (      23       18   78 e f ));
 /* pg_node_tree can be coerced to, but not from, text */
 DATA(insert (  194      25    0 i b ));
 
+/* pg_ndistinct can be coerced to, but not from, bytea and text */
+DATA(insert (  3361  17    0 i b ));
+DATA(insert (  3361  25    0 i i ));
+
 /*
  * Datetime category
  */
index a66d04510034a8466013fd3026dd9f1bbd0cf806..ee67459c32efb2bff87600e171a33f7666724702 100644 (file)
@@ -1983,6 +1983,8 @@ DESCR("select statement of a view");
 DATA(insert OID = 1642 (  pg_get_userbyid         PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 19 "26" _null_ _null_ _null_ _null_ _null_ pg_get_userbyid _null_ _null_ _null_ ));
 DESCR("role name by OID (with fallback)");
 DATA(insert OID = 1643 (  pg_get_indexdef         PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_indexdef _null_ _null_ _null_ ));
+DESCR("extended statistics description");
+DATA(insert OID = 3415 (  pg_get_statisticsextdef         PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_statisticsextdef _null_ _null_ _null_ ));
 DESCR("index description");
 DATA(insert OID = 3352 (  pg_get_partkeydef    PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_partkeydef _null_ _null_ _null_ ));
 DESCR("partition key description");
@@ -2758,6 +2760,15 @@ DESCR("current user privilege on any column by rel name");
 DATA(insert OID = 3029 (  has_any_column_privilege        PGNSP PGUID 12 10 0 0 0 f f f f t f s s 2 0 16 "26 25" _null_ _null_ _null_ _null_ _null_ has_any_column_privilege_id _null_ _null_ _null_ ));
 DESCR("current user privilege on any column by rel oid");
 
+DATA(insert OID = 3355 (  pg_ndistinct_in      PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3361 "2275" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3356 (  pg_ndistinct_out     PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_out _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3357 (  pg_ndistinct_recv PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 3361 "2281" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_recv _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3358 (  pg_ndistinct_send PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_send _null_ _null_ _null_ ));
+DESCR("I/O");
+
 DATA(insert OID = 1928 (  pg_stat_get_numscans                 PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
 DESCR("statistics: number of scans done for table/index");
 DATA(insert OID = 1929 (  pg_stat_get_tuples_returned  PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ ));
diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h
new file mode 100644 (file)
index 0000000..5f67fe7
--- /dev/null
@@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_statistic_ext.h
+ *       definition of the system "extended statistic" relation (pg_statistic_ext)
+ *       along with the relation's initial contents.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/pg_statistic_ext.h
+ *
+ * NOTES
+ *       the genbki.pl script reads this file and generates .bki
+ *       information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_STATISTIC_EXT_H
+#define PG_STATISTIC_EXT_H
+
+#include "catalog/genbki.h"
+
+/* ----------------
+ *             pg_statistic_ext definition.  cpp turns this into
+ *             typedef struct FormData_pg_statistic_ext
+ * ----------------
+ */
+#define StatisticExtRelationId 3381
+
+CATALOG(pg_statistic_ext,3381)
+{
+       /* These fields form the unique key for the entry: */
+       Oid                     starelid;               /* relation containing attributes */
+       NameData        staname;                /* statistics name */
+       Oid                     stanamespace;   /* OID of namespace containing this statistics */
+       Oid                     staowner;               /* statistics owner */
+
+       /*
+        * variable-length fields start here, but we allow direct access to
+        * stakeys
+        */
+       int2vector      stakeys;                /* array of column keys */
+
+#ifdef CATALOG_VARLEN
+       char            staenabled[1] BKI_FORCE_NOT_NULL;       /* statistic types
+                                                                                                        * requested to build */
+       pg_ndistinct standistinct;      /* ndistinct coefficients (serialized) */
+#endif
+
+} FormData_pg_statistic_ext;
+
+/* ----------------
+ *             Form_pg_statistic_ext corresponds to a pointer to a tuple with
+ *             the format of pg_statistic_ext relation.
+ * ----------------
+ */
+typedef FormData_pg_statistic_ext *Form_pg_statistic_ext;
+
+/* ----------------
+ *             compiler constants for pg_statistic_ext
+ * ----------------
+ */
+#define Natts_pg_statistic_ext                                 7
+#define Anum_pg_statistic_ext_starelid                 1
+#define Anum_pg_statistic_ext_staname                  2
+#define Anum_pg_statistic_ext_stanamespace             3
+#define Anum_pg_statistic_ext_staowner                 4
+#define Anum_pg_statistic_ext_stakeys                  5
+#define Anum_pg_statistic_ext_staenabled               6
+#define Anum_pg_statistic_ext_standistinct             7
+
+#define STATS_EXT_NDISTINCT            'd'
+
+#endif   /* PG_STATISTIC_EXT_H */
index 9f61238179bf766aae54390646b38e1c09bc77e5..9ad67258fef4d08d6f00219f919a637cef82ee5b 100644 (file)
@@ -364,6 +364,10 @@ DATA(insert OID = 194 ( pg_node_tree       PGNSP PGUID -1 f b S f t \054 0 0 0 pg_node
 DESCR("string representing an internal node tree");
 #define PGNODETREEOID  194
 
+DATA(insert OID = 3361 ( pg_ndistinct          PGNSP PGUID -1 f b S f t \054 0 0 0 pg_ndistinct_in pg_ndistinct_out pg_ndistinct_recv pg_ndistinct_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ ));
+DESCR("multivariate ndistinct coefficients");
+#define PGNDISTINCTOID 3361
+
 DATA(insert OID = 32 ( pg_ddl_command  PGNSP PGUID SIZEOF_POINTER t p P f t \054 0 0 0 pg_ddl_command_in pg_ddl_command_out pg_ddl_command_recv pg_ddl_command_send - - - ALIGNOF_POINTER p f 0 -1 0 0 _null_ _null_ _null_ ));
 DESCR("internal type for passing CollectedCommand");
 #define PGDDLCOMMANDOID 32
index db7f145b5f3fe74fc0cd6eec4f6214cdcf5c514f..00d0a8326f3dbbf9f7be44c3c9a64449ad5b6218 100644 (file)
@@ -53,6 +53,7 @@ DECLARE_TOAST(pg_proc, 2836, 2837);
 DECLARE_TOAST(pg_rewrite, 2838, 2839);
 DECLARE_TOAST(pg_seclabel, 3598, 3599);
 DECLARE_TOAST(pg_statistic, 2840, 2841);
+DECLARE_TOAST(pg_statistic_ext, 3439, 3440);
 DECLARE_TOAST(pg_trigger, 2336, 2337);
 
 /* shared catalogs */
index 8740cee94407f362da270e0d7c2c61defd7d5159..c323e81e6c5011e6d2b0bf0ffaf3f28904b51991 100644 (file)
@@ -77,6 +77,10 @@ extern ObjectAddress DefineOperator(List *names, List *parameters);
 extern void RemoveOperatorById(Oid operOid);
 extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt);
 
+/* commands/statscmds.c */
+extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt);
+extern void RemoveStatisticsById(Oid statsOid);
+
 /* commands/aggregatecmds.c */
 extern ObjectAddress DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle,
                                List *parameters);
index b2d8514f895fe96a53b5b3a889d2c02d78bda71e..fc883a6f3ec0744da50bebfb2eade049aa53a446 100644 (file)
@@ -279,6 +279,7 @@ typedef enum NodeTag
        T_PlaceHolderInfo,
        T_MinMaxAggInfo,
        T_PlannerParamItem,
+       T_StatisticExtInfo,
 
        /*
         * TAGS FOR MEMORY NODES (memnodes.h)
@@ -424,6 +425,7 @@ typedef enum NodeTag
        T_CreateSubscriptionStmt,
        T_AlterSubscriptionStmt,
        T_DropSubscriptionStmt,
+       T_CreateStatsStmt,
        T_AlterCollationStmt,
 
        /*
index f3773ca9294ec58f3b8dd4b2bf470f4c55e564c8..3a71dd5b37d1674585e6fd63faee5b0b7e066d5e 100644 (file)
@@ -1593,6 +1593,7 @@ typedef enum ObjectType
        OBJECT_SCHEMA,
        OBJECT_SEQUENCE,
        OBJECT_SUBSCRIPTION,
+       OBJECT_STATISTIC_EXT,
        OBJECT_TABCONSTRAINT,
        OBJECT_TABLE,
        OBJECT_TABLESPACE,
@@ -2656,6 +2657,20 @@ typedef struct IndexStmt
        bool            if_not_exists;  /* just do nothing if index already exists? */
 } IndexStmt;
 
+/* ----------------------
+ *             Create Statistics Statement
+ * ----------------------
+ */
+typedef struct CreateStatsStmt
+{
+       NodeTag         type;
+       List       *defnames;           /* qualified name (list of Value strings) */
+       RangeVar   *relation;           /* relation to build statistics on */
+       List       *keys;                       /* String nodes naming referenced columns */
+       List       *options;            /* list of DefElem */
+       bool            if_not_exists;  /* do nothing if statistics already exists */
+} CreateStatsStmt;
+
 /* ----------------------
  *             Create Function Statement
  * ----------------------
index 1c88a79a2172e1d2c5684701c65be4549e9c92b7..0a5187cef3ba823ec3b67fb58cc2040ea2ca846a 100644 (file)
@@ -529,6 +529,7 @@ typedef struct RelOptInfo
        List       *lateral_vars;       /* LATERAL Vars and PHVs referenced by rel */
        Relids          lateral_referencers;    /* rels that reference me laterally */
        List       *indexlist;          /* list of IndexOptInfo */
+       List       *statlist;           /* list of StatisticExtInfo */
        BlockNumber pages;                      /* size estimates derived from pg_class */
        double          tuples;
        double          allvisfrac;
@@ -668,6 +669,24 @@ typedef struct ForeignKeyOptInfo
        List       *rinfos[INDEX_MAX_KEYS];
 } ForeignKeyOptInfo;
 
+/*
+ * StatisticExtInfo
+ *             Information about extended statistics for planning/optimization
+ *
+ * This contains information about which columns are covered by the
+ * statistics (stakeys), which options were requested while adding the
+ * statistics (*_enabled), and which kinds of statistics were actually
+ * built and are available for the optimizer (*_built).
+ */
+typedef struct StatisticExtInfo
+{
+       NodeTag         type;
+
+       Oid                     statOid;                /* OID of the statistics row */
+       RelOptInfo *rel;                        /* back-link to index's table */
+       char            kind;                   /* statistic kind of this entry */
+       Bitmapset  *keys;                       /* attnums of the columns covered */
+} StatisticExtInfo;
 
 /*
  * EquivalenceClasses
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
new file mode 100644 (file)
index 0000000..961f1f7
--- /dev/null
@@ -0,0 +1,64 @@
+/*-------------------------------------------------------------------------
+ *
+ * extended_stats_internal.h
+ *       POSTGRES extended statistics internal declarations
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       src/include/statistics/extended_stats_internal.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef EXTENDED_STATS_INTERNAL_H
+#define EXTENDED_STATS_INTERNAL_H
+
+#include "utils/sortsupport.h"
+#include "statistics/statistics.h"
+
+
+typedef struct
+{
+       Oid                     eqopr;                  /* '=' operator for datatype, if any */
+       Oid                     eqfunc;                 /* and associated function */
+       Oid                     ltopr;                  /* '<' operator for datatype, if any */
+} StdAnalyzeData;
+
+typedef struct
+{
+       Datum           value;                  /* a data value */
+       int                     tupno;                  /* position index for tuple it came from */
+} ScalarItem;
+
+/* multi-sort */
+typedef struct MultiSortSupportData
+{
+       int                     ndims;                  /* number of dimensions supported by the */
+       SortSupportData ssup[1];        /* sort support data for each dimension */
+} MultiSortSupportData;
+
+typedef MultiSortSupportData *MultiSortSupport;
+
+typedef struct SortItem
+{
+       Datum      *values;
+       bool       *isnull;
+} SortItem;
+
+extern MVNDistinct *statext_ndistinct_build(double totalrows,
+                                               int numrows, HeapTuple *rows,
+                                               Bitmapset *attrs, VacAttrStats **stats);
+extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
+extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
+
+extern MultiSortSupport multi_sort_init(int ndims);
+extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
+                                                Oid oper);
+extern int     multi_sort_compare(const void *a, const void *b, void *arg);
+extern int multi_sort_compare_dim(int dim, const SortItem * a,
+                                          const SortItem * b, MultiSortSupport mss);
+extern int multi_sort_compare_dims(int start, int end, const SortItem * a,
+                                               const SortItem * b, MultiSortSupport mss);
+
+#endif   /* EXTENDED_STATS_INTERNAL_H */
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
new file mode 100644 (file)
index 0000000..a15e39e
--- /dev/null
@@ -0,0 +1,47 @@
+/*-------------------------------------------------------------------------
+ *
+ * statistics.h
+ *       Extended statistics and selectivity estimation functions.
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/statistics/statistics.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STATISTICS_H
+#define STATISTICS_H
+
+#include "commands/vacuum.h"
+
+#define STATS_MAX_DIMENSIONS   8               /* max number of attributes */
+
+/* Multivariate distinct coefficients */
+#define STATS_NDISTINCT_MAGIC          0xA352BFA4      /* struct identifier */
+#define STATS_NDISTINCT_TYPE_BASIC     1       /* struct version */
+
+/* MVDistinctItem represents a single combination of columns */
+typedef struct MVNDistinctItem
+{
+       double          ndistinct;              /* ndistinct value for this combination */
+       Bitmapset  *attrs;                      /* attr numbers of items */
+} MVNDistinctItem;
+
+/* A MVNDistinct object, comprising all possible combinations of columns */
+typedef struct MVNDistinct
+{
+       uint32          magic;                  /* magic constant marker */
+       uint32          type;                   /* type of ndistinct (BASIC) */
+       uint32          nitems;                 /* number of items in the statistic */
+       MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER];
+} MVNDistinct;
+
+extern MVNDistinct *statext_ndistinct_load(Oid mvoid);
+
+extern void BuildRelationExtStatistics(Relation onerel, double totalrows,
+                                                  int numrows, HeapTuple *rows,
+                                                  int natts, VacAttrStats **vacattrstats);
+extern bool statext_is_kind_built(HeapTuple htup, char kind);
+
+#endif   /* STATISTICS_H */
index 0d118525c9cb3dd1df43df82ff4c3ed24c10c4c1..c957d8e17011fa925faf68ed1e70b0bb2d24267d 100644 (file)
@@ -192,6 +192,7 @@ typedef enum AclObjectKind
        ACL_KIND_OPFAMILY,                      /* pg_opfamily */
        ACL_KIND_COLLATION,                     /* pg_collation */
        ACL_KIND_CONVERSION,            /* pg_conversion */
+       ACL_KIND_STATISTICS,            /* pg_statistic_ext */
        ACL_KIND_TABLESPACE,            /* pg_tablespace */
        ACL_KIND_TSDICTIONARY,          /* pg_ts_dict */
        ACL_KIND_TSCONFIGURATION,       /* pg_ts_config */
@@ -326,6 +327,7 @@ extern bool pg_event_trigger_ownercheck(Oid et_oid, Oid roleid);
 extern bool pg_extension_ownercheck(Oid ext_oid, Oid roleid);
 extern bool pg_publication_ownercheck(Oid pub_oid, Oid roleid);
 extern bool pg_subscription_ownercheck(Oid sub_oid, Oid roleid);
+extern bool pg_statistics_ownercheck(Oid stat_oid, Oid roleid);
 extern bool has_createrole_privilege(Oid roleid);
 extern bool has_bypassrls_privilege(Oid roleid);
 
index a617a7cf5661438713d4d4ab548c4042c883f2f5..ab875bb9d776f3ceac1e138648658ee590544cfb 100644 (file)
@@ -92,6 +92,7 @@ typedef struct RelationData
        bool            rd_isvalid;             /* relcache entry is valid */
        char            rd_indexvalid;  /* state of rd_indexlist: 0 = not valid, 1 =
                                                                 * valid, 2 = temporarily forced */
+       bool            rd_statvalid;   /* is rd_statlist valid? */
 
        /*
         * rd_createSubid is the ID of the highest subtransaction the rel has
@@ -136,6 +137,9 @@ typedef struct RelationData
        Oid                     rd_pkindex;             /* OID of primary key, if any */
        Oid                     rd_replidindex; /* OID of replica identity index, if any */
 
+       /* data managed by RelationGetStatExtList: */
+       List       *rd_statlist;        /* list of OIDs of extended stats */
+
        /* data managed by RelationGetIndexAttrBitmap: */
        Bitmapset  *rd_indexattr;       /* identifies columns used in indexes */
        Bitmapset  *rd_keyattr;         /* cols that can be ref'd by foreign keys */
index da36b6774fc002d5dd5cb7c962be38a9eae530e3..81af3aebb8de34e3a8e7a08d8268502a498524ae 100644 (file)
@@ -39,6 +39,7 @@ extern void RelationClose(Relation relation);
  */
 extern List *RelationGetFKeyList(Relation relation);
 extern List *RelationGetIndexList(Relation relation);
+extern List *RelationGetStatExtList(Relation relation);
 extern Oid     RelationGetOidIndex(Relation relation);
 extern Oid     RelationGetPrimaryKeyIndex(Relation relation);
 extern Oid     RelationGetReplicaIndex(Relation relation);
index b35faf81b9e42ae9dfed00f30ad7b100d56e0c48..36805ebefbad5d5cda9285a0e89630fecce966f0 100644 (file)
@@ -86,6 +86,8 @@ enum SysCacheIdentifier
        PUBLICATIONRELMAP,
        RULERELNAME,
        SEQRELID,
+       STATEXTNAMENSP,
+       STATEXTOID,
        STATRELATTINH,
        SUBSCRIPTIONOID,
        SUBSCRIPTIONNAME,
index b01be59bbbd5bb431d816d24922f44bf85e882d6..ce581bb93d5e3f6ec17d50b5979fea08b15a72bf 100644 (file)
@@ -496,6 +496,48 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD
 ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4);
 ERROR:  function 2(integer,integer) does not exist in operator family "alt_opf18"
 DROP OPERATOR FAMILY alt_opf18 USING btree;
+--
+-- Statistics
+--
+SET SESSION AUTHORIZATION regress_alter_user1;
+CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER);
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat2;   -- failed (name conflict)
+ERROR:  statistics "alt_stat2" already exists in schema "alt_nsp1"
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat3;   -- failed (name conflict)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2;  -- failed (no role membership)
+ERROR:  must be member of role "regress_alter_user2"
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3;  -- OK
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;    -- OK
+SET SESSION AUTHORIZATION regress_alter_user2;
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+ALTER STATISTICS alt_stat3 RENAME TO alt_stat4;    -- failed (not owner)
+ERROR:  must be owner of statistics alt_stat3
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat4;    -- OK
+ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner)
+ERROR:  must be owner of statistics alt_stat3
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership)
+ERROR:  must be member of role "regress_alter_user3"
+ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2;                -- failed (not owner)
+ERROR:  must be owner of statistics alt_stat3
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;                -- failed (name conflict)
+ERROR:  statistics "alt_stat2" already exists in schema "alt_nsp2"
+RESET SESSION AUTHORIZATION;
+SELECT nspname, staname, rolname
+  FROM pg_statistic_ext s, pg_namespace n, pg_authid a
+ WHERE s.stanamespace = n.oid AND s.staowner = a.oid
+   AND n.nspname in ('alt_nsp1', 'alt_nsp2')
+ ORDER BY nspname, staname;
+ nspname  |  staname  |       rolname       
+----------+-----------+---------------------
+ alt_nsp1 | alt_stat2 | regress_alter_user2
+ alt_nsp1 | alt_stat3 | regress_alter_user1
+ alt_nsp1 | alt_stat4 | regress_alter_user2
+ alt_nsp2 | alt_stat2 | regress_alter_user3
+(4 rows)
+
 --
 -- Text Search Dictionary
 --
@@ -639,7 +681,7 @@ DROP LANGUAGE alt_lang3 CASCADE;
 DROP LANGUAGE alt_lang4 CASCADE;
 ERROR:  language "alt_lang4" does not exist
 DROP SCHEMA alt_nsp1 CASCADE;
-NOTICE:  drop cascades to 26 other objects
+NOTICE:  drop cascades to 27 other objects
 DETAIL:  drop cascades to function alt_func3(integer)
 drop cascades to function alt_agg3(integer)
 drop cascades to function alt_func4(integer)
@@ -656,6 +698,7 @@ drop cascades to operator family alt_opc1 for access method hash
 drop cascades to operator family alt_opc2 for access method hash
 drop cascades to operator family alt_opf4 for access method hash
 drop cascades to operator family alt_opf2 for access method hash
+drop cascades to table alt_regress_1
 drop cascades to text search dictionary alt_ts_dict3
 drop cascades to text search dictionary alt_ts_dict4
 drop cascades to text search dictionary alt_ts_dict2
index 978d9a9a0f89914154ee612eef3aa4d547794c7c..814e05e4ef124c350c436a7f9fbe657ccb0c3c9c 100644 (file)
@@ -39,6 +39,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL (
 CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable;
 CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT);
 WARNING:  tables were not subscribed, you will have to run ALTER SUBSCRIPTION ... REFRESH PUBLICATION to subscribe the tables
+CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable;
 -- test some error cases
 SELECT pg_get_object_address('stone', '{}', '{}');
 ERROR:  unrecognized object type "stone"
@@ -409,7 +410,8 @@ WITH objects (type, name, args) AS (VALUES
                                ('access method', '{btree}', '{}'),
                                ('publication', '{addr_pub}', '{}'),
                                ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'),
-                               ('subscription', '{addr_sub}', '{}')
+                               ('subscription', '{addr_sub}', '{}'),
+                               ('statistics', '{addr_nsp, gentable_stat}', '{}')
         )
 SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
        -- test roundtrip through pg_identify_object_as_address
@@ -457,6 +459,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
  trigger                   |            |                   | t on addr_nsp.gentable                                               | t
  operator family           | pg_catalog | integer_ops       | pg_catalog.integer_ops USING btree                                   | t
  policy                    |            |                   | genpol on addr_nsp.gentable                                          | t
+ statistics                | addr_nsp   | gentable_stat     | addr_nsp.gentable_stat                                               | t
  collation                 | pg_catalog | "default"         | pg_catalog."default"                                                 | t
  transform                 |            |                   | for integer on language sql                                          | t
  text search dictionary    | addr_nsp   | addr_ts_dict      | addr_nsp.addr_ts_dict                                                | t
@@ -466,7 +469,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
  subscription              |            | addr_sub          | addr_sub                                                             | t
  publication               |            | addr_pub          | addr_pub                                                             | t
  publication relation      |            |                   | gentable in publication addr_pub                                     | t
-(45 rows)
+(46 rows)
 
 ---
 --- Cleanup resources
index 64d9dd605fdc4c90b6f191705156ca327eb327d4..262036ac4ff503ff79424c33f59a8dea4f84d74d 100644 (file)
@@ -823,11 +823,12 @@ WHERE c.castmethod = 'b' AND
  text              | character         |        0 | i
  character varying | character         |        0 | i
  pg_node_tree      | text              |        0 | i
+ pg_ndistinct      | bytea             |        0 | i
  cidr              | inet              |        0 | i
  xml               | text              |        0 | a
  xml               | character varying |        0 | a
  xml               | character         |        0 | a
-(7 rows)
+(8 rows)
 
 -- **************** pg_conversion ****************
 -- Look for illegal values in pg_conversion fields.
index c4c8450b830511f144d222ea46adc8d1548d2b4c..7f04c7a7cc8dabf41ef9066453dcac1386a9c44d 100644 (file)
@@ -2164,6 +2164,14 @@ pg_stats| SELECT n.nspname AS schemaname,
      JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum))))
      LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
   WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid))));
+pg_stats_ext| SELECT n.nspname AS schemaname,
+    c.relname AS tablename,
+    s.staname,
+    s.stakeys AS attnums,
+    length((s.standistinct)::text) AS ndistbytes
+   FROM ((pg_statistic_ext s
+     JOIN pg_class c ON ((c.oid = s.starelid)))
+     LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)));
 pg_tables| SELECT n.nspname AS schemaname,
     c.relname AS tablename,
     pg_get_userbyid(c.relowner) AS tableowner,
index 8e3028edaa265757eda78767de8df504eb2145fe..753ad81e43f5904a5497d6c65a800e20af8a22e5 100644 (file)
@@ -142,6 +142,7 @@ pg_shdepend|t
 pg_shdescription|t
 pg_shseclabel|t
 pg_statistic|t
+pg_statistic_ext|t
 pg_subscription|t
 pg_subscription_rel|t
 pg_tablespace|t
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
new file mode 100644 (file)
index 0000000..83d70bf
--- /dev/null
@@ -0,0 +1,155 @@
+-- Generic extended statistics support
+-- Ensure stats are dropped sanely
+CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS ab1_a_b_stats;
+CREATE SCHEMA regress_schema_2;
+CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS regress_schema_2.ab1_a_b_stats;
+-- Ensure statistics are dropped when columns are
+CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ALTER TABLE ab1 DROP COLUMN a;
+\d ab1
+                Table "public.ab1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ b      | integer |           |          | 
+ c      | integer |           |          | 
+Statistics:
+    "public.ab1_b_c_stats" WITH (ndistinct) ON (b, c)
+
+DROP TABLE ab1;
+-- Ensure things work sanely with SET STATISTICS 0
+CREATE TABLE ab1 (a INTEGER, b INTEGER);
+ALTER TABLE ab1 ALTER a SET STATISTICS 0;
+INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ANALYZE ab1;
+ERROR:  extended statistics could not be collected for column "a" of relation public.ab1
+HINT:  Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1
+ALTER TABLE ab1 ALTER a SET STATISTICS -1;
+ANALYZE ab1;
+DROP TABLE ab1;
+-- n-distinct tests
+CREATE TABLE ndistinct (
+    filler1 TEXT,
+    filler2 NUMERIC,
+    a INT,
+    b INT,
+    filler3 DATE,
+    c INT,
+    d INT
+);
+-- unknown column
+CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
+ERROR:  column "unknown_column" referenced in statistics does not exist
+-- single column
+CREATE STATISTICS s10 ON (a) FROM ndistinct;
+ERROR:  statistics require at least 2 columns
+-- single column, duplicated
+CREATE STATISTICS s10 ON (a,a) FROM ndistinct;
+ERROR:  duplicate column name in statistics definition
+-- two columns, one duplicated
+CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
+ERROR:  duplicate column name in statistics definition
+-- correct command
+CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
+-- perfectly correlated groups
+INSERT INTO ndistinct (a, b, c, filler1)
+     SELECT i/100, i/100, i/100, cash_words(i::money)
+       FROM generate_series(1,10000) s(i);
+ANALYZE ndistinct;
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+ staenabled |                                          standistinct                                          
+------------+------------------------------------------------------------------------------------------------
+ {d}        | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}]
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
+   Group Key: a, b
+   ->  Seq Scan on ndistinct
+(3 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
+   Group Key: a, b, c
+   ->  Seq Scan on ndistinct
+(3 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
+   Group Key: a, b, c, d
+   ->  Seq Scan on ndistinct
+(3 rows)
+
+TRUNCATE TABLE ndistinct;
+-- partially correlated groups
+INSERT INTO ndistinct (a, b, c)
+     SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE ndistinct;
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+ staenabled |                                          standistinct                                          
+------------+------------------------------------------------------------------------------------------------
+ {d}        | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}]
+(1 row)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ HashAggregate  (cost=230.00..232.01 rows=201 width=16)
+   Group Key: a, b
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=8)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ HashAggregate  (cost=255.00..257.01 rows=201 width=20)
+   Group Key: a, b, c
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=12)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ HashAggregate  (cost=280.00..290.00 rows=1000 width=24)
+   Group Key: a, b, c, d
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=16)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ HashAggregate  (cost=255.00..265.00 rows=1000 width=20)
+   Group Key: b, c, d
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=12)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ HashAggregate  (cost=230.00..240.00 rows=1000 width=16)
+   Group Key: a, d
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=8)
+(3 rows)
+
+DROP TABLE ndistinct;
index 8d75bbfab31b2a7de2330c99140d24f9daa41b43..84022f6a29867e9d5ade1bf6db1e876beedb0f0d 100644 (file)
@@ -59,7 +59,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR
 -- Look for types that should have an array type according to their typtype,
 -- but don't.  We exclude composites here because we have not bothered to
 -- make array types corresponding to the system catalogs' rowtypes.
--- NOTE: as of v10, this check finds pg_node_tree and smgr.
+-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr.
 SELECT p1.oid, p1.typname
 FROM pg_type as p1
 WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
@@ -67,11 +67,12 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
     (SELECT 1 FROM pg_type as p2
      WHERE p2.typname = ('_' || p1.typname)::name AND
            p2.typelem = p1.oid and p1.typarray = p2.oid);
- oid |   typname    
------+--------------
- 194 | pg_node_tree
- 210 | smgr
-(2 rows)
+ oid  |   typname    
+------+--------------
+  194 | pg_node_tree
+ 3361 | pg_ndistinct
+  210 | smgr
+(3 rows)
 
 -- Make sure typarray points to a varlena array type of our own base
 SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype,
index 38743d98c345b76514e7f349a0ae9cf066c55106..c283bdcb3721d85a54ac6b5c3567e3b29a2a1d85 100644 (file)
@@ -89,7 +89,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview
 # ----------
 # Another group of parallel tests
 # ----------
-test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan
+test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan stats_ext
 
 # rules cannot run concurrently with any test that creates a view
 test: rules psql_crosstab amutils
index d9f64c28738d1dec7de66adc3b2aa84cb030de4a..3a0d536a2ba9bb13ded2fb5be7773a73467870be 100644 (file)
@@ -130,6 +130,7 @@ test: misc_functions
 test: sysviews
 test: tsrf
 test: tidscan
+test: stats_ext
 test: rules
 test: psql_crosstab
 test: select_parallel
index c9ea47996725b8bed1c89ff4408787eb85e157c6..f6fa8d8bfdb8421059226419562b467b905dddd0 100644 (file)
@@ -433,6 +433,37 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD
 ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4);
 DROP OPERATOR FAMILY alt_opf18 USING btree;
 
+--
+-- Statistics
+--
+SET SESSION AUTHORIZATION regress_alter_user1;
+CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER);
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat2;   -- failed (name conflict)
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat3;   -- failed (name conflict)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2;  -- failed (no role membership)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3;  -- OK
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;    -- OK
+
+SET SESSION AUTHORIZATION regress_alter_user2;
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+
+ALTER STATISTICS alt_stat3 RENAME TO alt_stat4;    -- failed (not owner)
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat4;    -- OK
+ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership)
+ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2;                -- failed (not owner)
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;                -- failed (name conflict)
+
+RESET SESSION AUTHORIZATION;
+SELECT nspname, staname, rolname
+  FROM pg_statistic_ext s, pg_namespace n, pg_authid a
+ WHERE s.stanamespace = n.oid AND s.staowner = a.oid
+   AND n.nspname in ('alt_nsp1', 'alt_nsp2')
+ ORDER BY nspname, staname;
 
 --
 -- Text Search Dictionary
index 28476daff18b07c97c61ed906cd4d051cd5bfe62..c9219e47c4afe008a31c50532047f482d493bda8 100644 (file)
@@ -41,6 +41,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL (
        TO SQL WITH FUNCTION int4recv(internal));
 CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable;
 CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT);
+CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable;
 
 -- test some error cases
 SELECT pg_get_object_address('stone', '{}', '{}');
@@ -185,7 +186,8 @@ WITH objects (type, name, args) AS (VALUES
                                ('access method', '{btree}', '{}'),
                                ('publication', '{addr_pub}', '{}'),
                                ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'),
-                               ('subscription', '{addr_sub}', '{}')
+                               ('subscription', '{addr_sub}', '{}'),
+                               ('statistics', '{addr_nsp, gentable_stat}', '{}')
         )
 SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
        -- test roundtrip through pg_identify_object_as_address
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
new file mode 100644 (file)
index 0000000..946cb84
--- /dev/null
@@ -0,0 +1,102 @@
+-- Generic extended statistics support
+
+-- Ensure stats are dropped sanely
+CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS ab1_a_b_stats;
+
+CREATE SCHEMA regress_schema_2;
+CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS regress_schema_2.ab1_a_b_stats;
+
+-- Ensure statistics are dropped when columns are
+CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ALTER TABLE ab1 DROP COLUMN a;
+\d ab1
+DROP TABLE ab1;
+
+-- Ensure things work sanely with SET STATISTICS 0
+CREATE TABLE ab1 (a INTEGER, b INTEGER);
+ALTER TABLE ab1 ALTER a SET STATISTICS 0;
+INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ANALYZE ab1;
+ALTER TABLE ab1 ALTER a SET STATISTICS -1;
+ANALYZE ab1;
+DROP TABLE ab1;
+
+
+-- n-distinct tests
+CREATE TABLE ndistinct (
+    filler1 TEXT,
+    filler2 NUMERIC,
+    a INT,
+    b INT,
+    filler3 DATE,
+    c INT,
+    d INT
+);
+
+-- unknown column
+CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
+
+-- single column
+CREATE STATISTICS s10 ON (a) FROM ndistinct;
+
+-- single column, duplicated
+CREATE STATISTICS s10 ON (a,a) FROM ndistinct;
+
+-- two columns, one duplicated
+CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
+
+-- correct command
+CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
+
+-- perfectly correlated groups
+INSERT INTO ndistinct (a, b, c, filler1)
+     SELECT i/100, i/100, i/100, cash_words(i::money)
+       FROM generate_series(1,10000) s(i);
+
+ANALYZE ndistinct;
+
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+
+TRUNCATE TABLE ndistinct;
+
+-- partially correlated groups
+INSERT INTO ndistinct (a, b, c)
+     SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE ndistinct;
+
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
+
+DROP TABLE ndistinct;
index 0a31249f5d5b46f0b5cf4b2a8fb652847a0f8cf7..4c658140081503d873aefd65d198fbb53bd485dd 100644 (file)
@@ -53,7 +53,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR
 -- Look for types that should have an array type according to their typtype,
 -- but don't.  We exclude composites here because we have not bothered to
 -- make array types corresponding to the system catalogs' rowtypes.
--- NOTE: as of v10, this check finds pg_node_tree and smgr.
+-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr.
 
 SELECT p1.oid, p1.typname
 FROM pg_type as p1