Implement multivariate n-distinct coefficients

author Alvaro Herrera <alvherre@alvh.no-ip.org>

Fri, 24 Mar 2017 17:06:10 +0000 (14:06 -0300)

committer Alvaro Herrera <alvherre@alvh.no-ip.org>

Fri, 24 Mar 2017 17:06:10 +0000 (14:06 -0300)
author Alvaro Herrera <alvherre@alvh.no-ip.org>
Fri, 24 Mar 2017 17:06:10 +0000 (14:06 -0300)
committer Alvaro Herrera <alvherre@alvh.no-ip.org>
Fri, 24 Mar 2017 17:06:10 +0000 (14:06 -0300)
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml

index c531c73aac9f661cad9e3c03e0f16197932edfd4..ac39c639edcbbc05b3b18c31cc5ab6b99b93efeb 100644 (file)
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -295,6 +295,11 @@
        <entry>planner statistics</entry>
       </row>
  
+     <row>
+      <entry><link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link></entry>
+      <entry>extended planner statistics</entry>
+     </row>
+
       <row>
        <entry><link linkend="catalog-pg-subscription"><structname>pg_subscription</structname></link></entry>
        <entry>logical replication subscriptions</entry>
@@ -4247,6 +4252,98 @@
    </table>
   </sect1>
  
+ <sect1 id="catalog-pg-statistic-ext">
+  <title><structname>pg_statistic_ext</structname></title>
+
+  <indexterm zone="catalog-pg-statistic-ext">
+   <primary>pg_statistic_ext</primary>
+  </indexterm>
+
+  <para>
+   The catalog <structname>pg_statistic_ext</structname>
+   holds extended planner statistics.
+  </para>
+
+  <table>
+   <title><structname>pg_statistic_ext</> Columns</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+
+     <row>
+      <entry><structfield>starelid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
+      <entry>The table that the described columns belongs to</entry>
+     </row>
+
+     <row>
+      <entry><structfield>staname</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry></entry>
+      <entry>Name of the statistic.</entry>
+     </row>
+
+     <row>
+      <entry><structfield>stanamespace</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.oid</literal></entry>
+      <entry>
+       The OID of the namespace that contains this statistic
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>staowner</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.oid</literal></entry>
+      <entry>Owner of the statistic</entry>
+     </row>
+
+     <row>
+      <entry><structfield>staenabled</structfield></entry>
+      <entry><type>char[]</type></entry>
+      <entry></entry>
+      <entry>
+        An array with the modes of the enabled statistic types, encoded as
+        <literal>d</literal> for ndistinct coefficients.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>stakeys</structfield></entry>
+      <entry><type>int2vector</type></entry>
+      <entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
+      <entry>
+       This is an array of values that indicate which table columns this
+       statistic covers. For example a value of <literal>1 3</literal> would
+       mean that the first and the third table columns make up the statistic key.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>standistinct</structfield></entry>
+      <entry><type>pg_ndistinct</type></entry>
+      <entry></entry>
+      <entry>
+       N-distinct coefficients, serialized as <structname>pg_ndistinct</> type.
+      </entry>
+     </row>
+
+    </tbody>
+   </tgroup>
+  </table>
+ </sect1>
+
   <sect1 id="catalog-pg-namespace">
    <title><structname>pg_namespace</structname></title>
  
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml

index 3f0f7363b9bc7456cf65f5e33ed36cdb4c92d8e5..ba6f8dd8d2db5f82d155d6c98f6e1bd2d1e07188 100644 (file)
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -16720,6 +16720,10 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
      <primary>pg_get_serial_sequence</primary>
     </indexterm>
  
+   <indexterm>
+    <primary>pg_get_statisticsextdef</primary>
+   </indexterm>
+
     <indexterm>
      <primary>pg_get_triggerdef</primary>
     </indexterm>
@@ -16889,6 +16893,11 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
         <entry>get name of the sequence that a <type>serial</type>, <type>smallserial</type> or <type>bigserial</type> column
         uses</entry>
        </row>
+      <row>
+       <entry><literal><function>pg_get_statisticsextdef(<parameter>statext_oid</parameter>)</function></literal></entry>
+       <entry><type>text</type></entry>
+       <entry>get <command>CREATE STATISTICS</> command for extended statistics objects</entry>
+      </row>
        <row>
         <entry><function>pg_get_triggerdef</function>(<parameter>trigger_oid</parameter>)</entry>
         <entry><type>text</type></entry>
@@ -17034,19 +17043,20 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
    <para>
     <function>pg_get_constraintdef</function>,
     <function>pg_get_indexdef</function>, <function>pg_get_ruledef</function>,
-   and <function>pg_get_triggerdef</function>, respectively reconstruct the
-   creating command for a constraint, index, rule, or trigger. (Note that this
-   is a decompiled reconstruction, not the original text of the command.)
-   <function>pg_get_expr</function> decompiles the internal form of an
-   individual expression, such as the default value for a column.  It can be
-   useful when examining the contents of system catalogs.  If the expression
-   might contain Vars, specify the OID of the relation they refer to as the
-   second parameter; if no Vars are expected, zero is sufficient.
-   <function>pg_get_viewdef</function> reconstructs the <command>SELECT</>
-   query that defines a view. Most of these functions come in two variants,
-   one of which can optionally <quote>pretty-print</> the result.  The
-   pretty-printed format is more readable, but the default format is more
-   likely to be interpreted the same way by future versions of
+   <function>pg_get_statisticsextdef</function>, and
+   <function>pg_get_triggerdef</function>, respectively reconstruct the
+   creating command for a constraint, index, rule, extended statistics object,
+   or trigger. (Note that this is a decompiled reconstruction, not the
+   original text of the command.) <function>pg_get_expr</function> decompiles
+   the internal form of an individual expression, such as the default value
+   for a column.  It can be useful when examining the contents of system
+   catalogs.  If the expression might contain Vars, specify the OID of the
+   relation they refer to as the second parameter; if no Vars are expected,
+   zero is sufficient. <function>pg_get_viewdef</function> reconstructs the
+   <command>SELECT</> query that defines a view. Most of these functions come
+   in two variants, one of which can optionally <quote>pretty-print</> the
+   result.  The pretty-printed format is more readable, but the default format
+   is more likely to be interpreted the same way by future versions of
     <productname>PostgreSQL</>; avoid using pretty-printed output for dump
     purposes.  Passing <literal>false</> for the pretty-print parameter yields
     the same result as the variant that does not have the parameter at all.
diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml

index 974e1b74e42f786f7bedb86e91d378c6401f609b..01acc2ef9dad1986af16c7b4e46afbeb22f4c6c8 100644 (file)
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -34,6 +34,7 @@ Complete list of usable sgml source files in this directory.
  <!ENTITY alterSequence      SYSTEM "alter_sequence.sgml">
  <!ENTITY alterSubscription  SYSTEM "alter_subscription.sgml">
  <!ENTITY alterSystem        SYSTEM "alter_system.sgml">
+<!ENTITY alterStatistics    SYSTEM "alter_statistics.sgml">
  <!ENTITY alterTable         SYSTEM "alter_table.sgml">
  <!ENTITY alterTableSpace    SYSTEM "alter_tablespace.sgml">
  <!ENTITY alterTSConfig      SYSTEM "alter_tsconfig.sgml">
@@ -80,6 +81,7 @@ Complete list of usable sgml source files in this directory.
  <!ENTITY createSchema       SYSTEM "create_schema.sgml">
  <!ENTITY createSequence     SYSTEM "create_sequence.sgml">
  <!ENTITY createServer       SYSTEM "create_server.sgml">
+<!ENTITY createStatistics   SYSTEM "create_statistics.sgml">
  <!ENTITY createSubscription SYSTEM "create_subscription.sgml">
  <!ENTITY createTable        SYSTEM "create_table.sgml">
  <!ENTITY createTableAs      SYSTEM "create_table_as.sgml">
@@ -126,6 +128,7 @@ Complete list of usable sgml source files in this directory.
  <!ENTITY dropSchema         SYSTEM "drop_schema.sgml">
  <!ENTITY dropSequence       SYSTEM "drop_sequence.sgml">
  <!ENTITY dropServer         SYSTEM "drop_server.sgml">
+<!ENTITY dropStatistics     SYSTEM "drop_statistics.sgml">
  <!ENTITY dropSubscription   SYSTEM "drop_subscription.sgml">
  <!ENTITY dropTable          SYSTEM "drop_table.sgml">
  <!ENTITY dropTableSpace     SYSTEM "drop_tablespace.sgml">
diff --git a/doc/src/sgml/ref/alter_statistics.sgml b/doc/src/sgml/ref/alter_statistics.sgml

new file mode 100644 (file)

index 0000000..3e4d286
--- /dev/null
+++ b/doc/src/sgml/ref/alter_statistics.sgml
@@ -0,0 +1,115 @@
+<!--
+doc/src/sgml/ref/alter_statistics.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-ALTERSTATISTICS">
+ <indexterm zone="sql-alterstatistics">
+  <primary>ALTER STATISTICS</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>ALTER STATISTICS</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>ALTER STATISTICS</refname>
+  <refpurpose>
+   change the definition of a extended statistics
+  </refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+ALTER STATISTICS <replaceable class="parameter">name</replaceable> OWNER TO { <replaceable class="PARAMETER">new_owner</replaceable> | CURRENT_USER | SESSION_USER }
+ALTER STATISTICS <replaceable class="parameter">name</replaceable> RENAME TO <replaceable class="parameter">new_name</replaceable>
+ALTER STATISTICS <replaceable class="parameter">name</replaceable> SET SCHEMA <replaceable class="parameter">new_schema</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>ALTER STATISTICS</command> changes the parameters of an existing
+   extended statistics.  Any parameters not specifically set in the
+   <command>ALTER STATISTICS</command> command retain their prior settings.
+  </para>
+
+  <para>
+   You must own the statistics to use <command>ALTER STATISTICS</>.
+   To change a statistics' schema, you must also have <literal>CREATE</>
+   privilege on the new schema.
+   To alter the owner, you must also be a direct or indirect member of the new
+   owning role, and that role must have <literal>CREATE</literal> privilege on
+   the statistics' schema.  (These restrictions enforce that altering the owner
+   doesn't do anything you couldn't do by dropping and recreating the statistics.
+   However, a superuser can alter ownership of any statistics anyway.)
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+   <para>
+    <variablelist>
+     <varlistentry>
+      <term><replaceable class="parameter">name</replaceable></term>
+      <listitem>
+       <para>
+        The name (optionally schema-qualified) of the statistics to be altered.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="PARAMETER">new_owner</replaceable></term>
+      <listitem>
+       <para>
+        The user name of the new owner of the statistics.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="parameter">new_name</replaceable></term>
+      <listitem>
+       <para>
+        The new name for the statistics.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="parameter">new_schema</replaceable></term>
+      <listitem>
+       <para>
+        The new schema for the statistics.
+       </para>
+      </listitem>
+     </varlistentry>
+
+    </variablelist>
+   </para>
+  </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   There's no <command>ALTER STATISTICS</command> command in the SQL standard.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-createstatistics"></member>
+   <member><xref linkend="sql-dropstatistics"></member>
+  </simplelist>
+ </refsect1>
+
+</refentry>
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml

index 767ea321da7701371d138044bfb5a4ec3df26b4f..75de2262539a116cc6f270e05ff8f2a4abb63cbf 100644 (file)
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -119,9 +119,12 @@ ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
       <para>
        This form drops a column from a table.  Indexes and
        table constraints involving the column will be automatically
-      dropped as well.  You will need to say <literal>CASCADE</> if
-      anything outside the table depends on the column, for example,
-      foreign key references or views.
+      dropped as well.
+      Multivariate statistics referencing the dropped column will also be
+      removed if the removal of the column would cause the statistics to
+      contain data for only a single column.
+      You will need to say <literal>CASCADE</> if anything outside the table
+      depends on the column, for example, foreign key references or views.
        If <literal>IF EXISTS</literal> is specified and the column
        does not exist, no error is thrown. In this case a notice
        is issued instead.
diff --git a/doc/src/sgml/ref/comment.sgml b/doc/src/sgml/ref/comment.sgml

index 7483c8c03fce741dc699c0de1d86ea6d8e05f607..8fe17a5767d7e70d0b548345378ea901c7e7d7c7 100644 (file)
--- a/doc/src/sgml/ref/comment.sgml
+++ b/doc/src/sgml/ref/comment.sgml
@@ -51,6 +51,7 @@ COMMENT ON
    SCHEMA <replaceable class="PARAMETER">object_name</replaceable> |
    SEQUENCE <replaceable class="PARAMETER">object_name</replaceable> |
    SERVER <replaceable class="PARAMETER">object_name</replaceable> |
+  STATISTICS <replaceable class="PARAMETER">object_name</replaceable> |
    TABLE <replaceable class="PARAMETER">object_name</replaceable> |
    TABLESPACE <replaceable class="PARAMETER">object_name</replaceable> |
    TEXT SEARCH CONFIGURATION <replaceable class="PARAMETER">object_name</replaceable> |
@@ -125,8 +126,8 @@ COMMENT ON
        The name of the object to be commented.  Names of tables,
        aggregates, collations, conversions, domains, foreign tables, functions,
        indexes, operators, operator classes, operator families, sequences,
-      text search objects, types, and views can be schema-qualified.
-      When commenting on a column,
+      statistics, text search objects, types, and views can be
+      schema-qualified. When commenting on a column,
        <replaceable class="parameter">relation_name</replaceable> must refer
        to a table, view, composite type, or foreign table.
       </para>
@@ -327,6 +328,7 @@ COMMENT ON RULE my_rule ON my_table IS 'Logs updates of employee records';
  COMMENT ON SCHEMA my_schema IS 'Departmental data';
  COMMENT ON SEQUENCE my_sequence IS 'Used to generate primary keys';
  COMMENT ON SERVER myserver IS 'my foreign server';
+COMMENT ON STATISTICS my_statistics IS 'Improves planner row estimations';
  COMMENT ON TABLE my_schema.my_table IS 'Employee Information';
  COMMENT ON TABLESPACE my_tablespace IS 'Tablespace for indexes';
  COMMENT ON TEXT SEARCH CONFIGURATION my_config IS 'Special word filtering';
diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml

new file mode 100644 (file)

index 0000000..60184a3
--- /dev/null
+++ b/doc/src/sgml/ref/create_statistics.sgml
@@ -0,0 +1,155 @@
+<!--
+doc/src/sgml/ref/create_statistics.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-CREATESTATISTICS">
+ <indexterm zone="sql-createstatistics">
+  <primary>CREATE STATISTICS</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>CREATE STATISTICS</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>CREATE STATISTICS</refname>
+  <refpurpose>define extended statistics</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_name</replaceable> ON (
+  <replaceable class="PARAMETER">column_name</replaceable>, <replaceable class="PARAMETER">column_name</replaceable> [, ...])
+  FROM <replaceable class="PARAMETER">table_name</replaceable>
+</synopsis>
+
+ </refsynopsisdiv>
+
+ <refsect1 id="SQL-CREATESTATISTICS-description">
+  <title>Description</title>
+
+  <para>
+   <command>CREATE STATISTICS</command> will create a new extended statistics
+   object on the specified table.
+   The statistics will be created in the current database and
+   will be owned by the user issuing the command.
+  </para>
+
+  <para>
+   If a schema name is given (for example, <literal>CREATE STATISTICS
+   myschema.mystat ...</>) then the statistics is created in the specified
+   schema.  Otherwise it is created in the current schema.  The name of
+   the statistics must be distinct from the name of any other statistics in the
+   same schema.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+
+   <varlistentry>
+    <term><literal>IF NOT EXISTS</></term>
+    <listitem>
+     <para>
+      Do not throw an error if a statistics with the same name already exists.
+      A notice is issued in this case.  Note that only the name of the
+      statistics object is considered here. The definition of the statistics is
+      not considered.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">statistics_name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the statistics to be created.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">column_name</replaceable></term>
+    <listitem>
+     <para>
+      The name of a column to be included in the statistics.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">table_name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the table the statistics should
+      be created on.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   You must be the owner of a table to create or change statistics on it.
+  </para>
+ </refsect1>
+
+ <refsect1 id="SQL-CREATESTATISTICS-examples">
+  <title>Examples</title>
+
+  <para>
+   Create table <structname>t1</> with two functionally dependent columns, i.e.
+   knowledge of a value in the first column is sufficient for determining the
+   value in the other column. Then functional dependencies are built on those
+   columns:
+
+<programlisting>
+CREATE TABLE t1 (
+    a   int,
+    b   int
+);
+
+INSERT INTO t1 SELECT i/100, i/500
+                 FROM generate_series(1,1000000) s(i);
+
+CREATE STATISTICS s1 ON (a, b) FROM t1;
+
+ANALYZE t1;
+
+-- valid combination of values
+EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0);
+
+-- invalid combination of values
+EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1);
+</programlisting>
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   There's no <command>CREATE STATISTICS</command> command in the SQL standard.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-alterstatistics"></member>
+   <member><xref linkend="sql-dropstatistics"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
diff --git a/doc/src/sgml/ref/drop_statistics.sgml b/doc/src/sgml/ref/drop_statistics.sgml

new file mode 100644 (file)

index 0000000..98c3381
--- /dev/null
+++ b/doc/src/sgml/ref/drop_statistics.sgml
@@ -0,0 +1,98 @@
+<!--
+doc/src/sgml/ref/drop_statistics.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-DROPSTATISTICS">
+ <indexterm zone="sql-dropstatistics">
+  <primary>DROP STATISTICS</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>DROP STATISTICS</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>DROP STATISTICS</refname>
+  <refpurpose>remove extended statistics</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+DROP STATISTICS [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable> [, ...]
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>DROP STATISTICS</command> removes statistics from the database.
+   Only the statistics owner, the schema owner, and superuser can drop a
+   statistics.
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><literal>IF EXISTS</literal></term>
+    <listitem>
+     <para>
+      Do not throw an error if the statistics do not exist. A notice is
+      issued in this case.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the statistics to drop.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   To destroy two statistics objects on different schemas, without failing
+   if they don't exist:
+
+<programlisting>
+DROP STATISTICS IF EXISTS
+    accounting.users_uid_creation,
+    public.grants_user_role;
+</programlisting>
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   There's no <command>DROP STATISTICS</command> command in the SQL standard.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-alterstatistics"></member>
+   <member><xref linkend="sql-createstatistics"></member>
+  </simplelist>
+ </refsect1>
+
+</refentry>
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml

index 3d8ad232fa3e5155d978b5a882782b9212d3f1d0..9000b3aaaa716e54b1969077bad8138e17e3e4c3 100644 (file)
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -60,6 +60,7 @@
     &alterSchema;
     &alterSequence;
     &alterServer;
+   &alterStatistics;
     &alterSubscription;
     &alterSystem;
     &alterTable;
@@ -108,6 +109,7 @@
     &createSchema;
     &createSequence;
     &createServer;
+   &createStatistics;
     &createSubscription;
     &createTable;
     &createTableAs;
@@ -154,6 +156,7 @@
     &dropSchema;
     &dropSequence;
     &dropServer;
+   &dropStatistics;
     &dropSubscription;
     &dropTable;
     &dropTableSpace;
diff --git a/src/backend/Makefile b/src/backend/Makefile

index fffb0d95bad3ab27c1f06f3c7fbd8157b0f5f82c..bce9d2c3ebb09a06f2e7e1a39a097ef37ce749d0 100644 (file)
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global
  
  SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \
         main nodes optimizer port postmaster regex replication rewrite \
-       storage tcop tsearch utils $(top_builddir)/src/timezone
+       statistics storage tcop tsearch utils $(top_builddir)/src/timezone
  
  include $(srcdir)/common.mk
  
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile

index 159cab5c18c0552d19526db06727a8d8cc1e21ee..fd33426bad15164500bb8189f65f808758107f27 100644 (file)
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -33,6 +33,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
         pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
         pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
         pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
+       pg_statistic_ext.h \
         pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
         pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
         pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \
diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c

index be86d76a59ecf250876cc9bdf0065bc39acc08b1..d01930f4a80d0fdb8da8e98baaa1d0d764160881 100644 (file)
--- a/src/backend/catalog/aclchk.c
+++ b/src/backend/catalog/aclchk.c
@@ -48,6 +48,7 @@
  #include "catalog/pg_operator.h"
  #include "catalog/pg_opfamily.h"
  #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
  #include "catalog/pg_subscription.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_type.h"
@@ -3302,6 +3303,8 @@ static const char *const no_priv_msg[MAX_ACL_KIND] =
         gettext_noop("permission denied for collation %s"),
         /* ACL_KIND_CONVERSION */
         gettext_noop("permission denied for conversion %s"),
+       /* ACL_KIND_STATISTICS */
+       gettext_noop("permission denied for statistics %s"),
         /* ACL_KIND_TABLESPACE */
         gettext_noop("permission denied for tablespace %s"),
         /* ACL_KIND_TSDICTIONARY */
@@ -3352,6 +3355,8 @@ static const char *const not_owner_msg[MAX_ACL_KIND] =
         gettext_noop("must be owner of collation %s"),
         /* ACL_KIND_CONVERSION */
         gettext_noop("must be owner of conversion %s"),
+       /* ACL_KIND_STATISTICS */
+       gettext_noop("must be owner of statistics %s"),
         /* ACL_KIND_TABLESPACE */
         gettext_noop("must be owner of tablespace %s"),
         /* ACL_KIND_TSDICTIONARY */
@@ -3467,6 +3472,10 @@ pg_aclmask(AclObjectKind objkind, Oid table_oid, AttrNumber attnum, Oid roleid,
                                                                                                    mask, how, NULL);
                 case ACL_KIND_NAMESPACE:
                         return pg_namespace_aclmask(table_oid, roleid, mask, how);
+               case ACL_KIND_STATISTICS:
+                       elog(ERROR, "grantable rights not supported for statistics");
+                       /* not reached, but keep compiler quiet */
+                       return ACL_NO_RIGHTS;
                 case ACL_KIND_TABLESPACE:
                         return pg_tablespace_aclmask(table_oid, roleid, mask, how);
                 case ACL_KIND_FDW:
@@ -5103,6 +5112,32 @@ pg_subscription_ownercheck(Oid sub_oid, Oid roleid)
         return has_privs_of_role(roleid, ownerId);
  }
  
+/*
+ * Ownership check for a extended statistics (specified by OID).
+ */
+bool
+pg_statistics_ownercheck(Oid stat_oid, Oid roleid)
+{
+       HeapTuple       tuple;
+       Oid                     ownerId;
+
+       /* Superusers bypass all permission checking. */
+       if (superuser_arg(roleid))
+               return true;
+
+       tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stat_oid));
+       if (!HeapTupleIsValid(tuple))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("statistics with OID %u do not exist", stat_oid)));
+
+       ownerId = ((Form_pg_statistic_ext) GETSTRUCT(tuple))->staowner;
+
+       ReleaseSysCache(tuple);
+
+       return has_privs_of_role(roleid, ownerId);
+}
+
  /*
   * Check whether specified role has CREATEROLE privilege (or is a superuser)
   *
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c

index fc088b216589ccbb70f3201a128d59e8345b913c..ee27cae7df7b27aae61a593fa6f687f4e0079478 100644 (file)
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -51,6 +51,7 @@
  #include "catalog/pg_publication.h"
  #include "catalog/pg_publication_rel.h"
  #include "catalog/pg_rewrite.h"
+#include "catalog/pg_statistic_ext.h"
  #include "catalog/pg_subscription.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_transform.h"
@@ -154,6 +155,7 @@ static const Oid object_classes[] = {
         RewriteRelationId,                      /* OCLASS_REWRITE */
         TriggerRelationId,                      /* OCLASS_TRIGGER */
         NamespaceRelationId,            /* OCLASS_SCHEMA */
+       StatisticExtRelationId,         /* OCLASS_STATISTIC_EXT */
         TSParserRelationId,                     /* OCLASS_TSPARSER */
         TSDictionaryRelationId,         /* OCLASS_TSDICT */
         TSTemplateRelationId,           /* OCLASS_TSTEMPLATE */
@@ -1263,6 +1265,10 @@ doDeletion(const ObjectAddress *object, int flags)
                         DropTransformById(object->objectId);
                         break;
  
+               case OCLASS_STATISTIC_EXT:
+                       RemoveStatisticsById(object->objectId);
+                       break;
+
                 default:
                         elog(ERROR, "unrecognized object class: %u",
                                  object->classId);
@@ -2377,6 +2383,9 @@ getObjectClass(const ObjectAddress *object)
                 case NamespaceRelationId:
                         return OCLASS_SCHEMA;
  
+               case StatisticExtRelationId:
+                       return OCLASS_STATISTIC_EXT;
+
                 case TSParserRelationId:
                         return OCLASS_TSPARSER;
  
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c

index d49dcdc015d9ca905ef15f6abc04992890e8a4d6..eee5e2f6caf378ca94f9f2f6b80786148a0b2bde 100644 (file)
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -52,6 +52,7 @@
  #include "catalog/pg_opclass.h"
  #include "catalog/pg_partitioned_table.h"
  #include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
  #include "catalog/pg_subscription_rel.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_type.h"
@@ -1609,7 +1610,10 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
         heap_close(attr_rel, RowExclusiveLock);
  
         if (attnum > 0)
+       {
                 RemoveStatistics(relid, attnum);
+               RemoveStatisticsExt(relid, attnum);
+       }
  
         relation_close(rel, NoLock);
  }
@@ -1860,6 +1864,7 @@ heap_drop_with_catalog(Oid relid)
          * delete statistics
          */
         RemoveStatistics(relid, 0);
+       RemoveStatisticsExt(relid, 0);
  
         /*
          * delete attribute tuples
@@ -2771,6 +2776,75 @@ RemoveStatistics(Oid relid, AttrNumber attnum)
  }
  
  
+/*
+ * RemoveStatisticsExt --- remove entries in pg_statistic_ext for a relation
+ *
+ * If attnum is zero, remove all entries for rel; else remove only the
+ * one(s) involving that column.
+ */
+void
+RemoveStatisticsExt(Oid relid, AttrNumber attnum)
+{
+       Relation        pgstatisticext;
+       SysScanDesc scan;
+       ScanKeyData key;
+       HeapTuple       tuple;
+
+       /*
+        * Scan pg_statistic_ext to delete relevant tuples
+        */
+       pgstatisticext = heap_open(StatisticExtRelationId, RowExclusiveLock);
+
+       ScanKeyInit(&key,
+                               Anum_pg_statistic_ext_starelid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(relid));
+
+       scan = systable_beginscan(pgstatisticext,
+                                                         StatisticExtRelidIndexId,
+                                                         true, NULL, 1, &key);
+
+       while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+       {
+               bool            delete = false;
+
+               if (attnum == 0)
+                       delete = true;
+               else if (attnum != 0)
+               {
+                       Form_pg_statistic_ext   staForm;
+                       int                     i;
+
+                       /*
+                        * Decode the stakeys array and delete any stats that involve the
+                        * specified column.
+                        */
+                       staForm = (Form_pg_statistic_ext) GETSTRUCT(tuple);
+                       for (i = 0; i < staForm->stakeys.dim1; i++)
+                       {
+                               if (staForm->stakeys.values[i] == attnum)
+                               {
+                                       delete = true;
+                                       break;
+                               }
+                       }
+               }
+
+               if (delete)
+               {
+                       CatalogTupleDelete(pgstatisticext, &tuple->t_self);
+                       deleteDependencyRecordsFor(StatisticExtRelationId,
+                                                                          HeapTupleGetOid(tuple),
+                                                                          false);
+               }
+       }
+
+       systable_endscan(scan);
+
+       heap_close(pgstatisticext, RowExclusiveLock);
+}
+
+
  /*
   * RelationTruncateIndexes - truncate all indexes associated
   * with the heap relation to zero tuples.
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c

index a38da3047ff45b8d6ea82a85019a9ff7ebac0849..e521bd908a22ff24a25a91554d5689b30cdf4029 100644 (file)
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -2085,6 +2085,62 @@ ConversionIsVisible(Oid conid)
         return visible;
  }
  
+/*
+ * get_statistics_oid - find a statistics by possibly qualified name
+ *
+ * If not found, returns InvalidOid if missing_ok, else throws error
+ */
+Oid
+get_statistics_oid(List *names, bool missing_ok)
+{
+       char       *schemaname;
+       char       *stats_name;
+       Oid                     namespaceId;
+       Oid                     stats_oid = InvalidOid;
+       ListCell   *l;
+
+       /* deconstruct the name list */
+       DeconstructQualifiedName(names, &schemaname, &stats_name);
+
+       if (schemaname)
+       {
+               /* use exact schema given */
+               namespaceId = LookupExplicitNamespace(schemaname, missing_ok);
+               if (missing_ok && !OidIsValid(namespaceId))
+                       stats_oid = InvalidOid;
+               else
+                       stats_oid = GetSysCacheOid2(STATEXTNAMENSP,
+                                                                               PointerGetDatum(stats_name),
+                                                                               ObjectIdGetDatum(namespaceId));
+       }
+       else
+       {
+               /* search for it in search path */
+               recomputeNamespacePath();
+
+               foreach(l, activeSearchPath)
+               {
+                       namespaceId = lfirst_oid(l);
+
+                       if (namespaceId == myTempNamespace)
+                               continue;               /* do not look in temp namespace */
+                       stats_oid = GetSysCacheOid2(STATEXTNAMENSP,
+                                                                               PointerGetDatum(stats_name),
+                                                                               ObjectIdGetDatum(namespaceId));
+                       if (OidIsValid(stats_oid))
+                               break;
+               }
+       }
+
+       if (!OidIsValid(stats_oid) && !missing_ok)
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("statistics \"%s\" do not exist",
+                                               NameListToString(names))));
+
+       return stats_oid;
+}
+
  /*
   * get_ts_parser_oid - find a TS parser by possibly qualified name
   *
diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c

index 61a831b4036c3c7efef567ca08810b4d5cb1828e..2948d64fa73a6ab7108e8d4b3730669d98777c0e 100644 (file)
--- a/src/backend/catalog/objectaddress.c
+++ b/src/backend/catalog/objectaddress.c
@@ -48,6 +48,7 @@
  #include "catalog/pg_publication.h"
  #include "catalog/pg_publication_rel.h"
  #include "catalog/pg_rewrite.h"
+#include "catalog/pg_statistic_ext.h"
  #include "catalog/pg_subscription.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_transform.h"
@@ -478,6 +479,18 @@ static const ObjectPropertyType ObjectProperty[] =
                 InvalidAttrNumber,
                 ACL_KIND_SUBSCRIPTION,
                 true
+       },
+       {
+               StatisticExtRelationId,
+               StatisticExtOidIndexId,
+               STATEXTOID,
+               STATEXTNAMENSP,
+               Anum_pg_statistic_ext_staname,
+               Anum_pg_statistic_ext_stanamespace,
+               Anum_pg_statistic_ext_staowner,
+               InvalidAttrNumber,              /* no ACL (same as relation) */
+               ACL_KIND_STATISTICS,
+               true
         }
  };
  
@@ -696,6 +709,10 @@ static const struct object_type_map
         /* OCLASS_TRANSFORM */
         {
                 "transform", OBJECT_TRANSFORM
+       },
+       /* OBJECT_STATISTIC_EXT */
+       {
+               "statistics", OBJECT_STATISTIC_EXT
         }
  };
  
@@ -974,6 +991,12 @@ get_object_address(ObjectType objtype, Node *object,
                                 address = get_object_address_defacl(castNode(List, object),
                                                                                                         missing_ok);
                                 break;
+                       case OBJECT_STATISTIC_EXT:
+                               address.classId = StatisticExtRelationId;
+                               address.objectId = get_statistics_oid(castNode(List, object),
+                                                                                                         missing_ok);
+                               address.objectSubId = 0;
+                               break;
                         default:
                                 elog(ERROR, "unrecognized objtype: %d", (int) objtype);
                                 /* placate compiler, in case it thinks elog might return */
@@ -2083,6 +2106,7 @@ pg_get_object_address(PG_FUNCTION_ARGS)
                 case OBJECT_ATTRIBUTE:
                 case OBJECT_COLLATION:
                 case OBJECT_CONVERSION:
+               case OBJECT_STATISTIC_EXT:
                 case OBJECT_TSPARSER:
                 case OBJECT_TSDICTIONARY:
                 case OBJECT_TSTEMPLATE:
@@ -2370,6 +2394,10 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address,
                                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                                                  errmsg("must be superuser")));
                         break;
+               case OBJECT_STATISTIC_EXT:
+                       if (!pg_statistics_ownercheck(address.objectId, roleid))
+                               aclcheck_error_type(ACLCHECK_NOT_OWNER, address.objectId);
+                       break;
                 default:
                         elog(ERROR, "unrecognized object type: %d",
                                  (int) objtype);
@@ -3857,6 +3885,10 @@ getObjectTypeDescription(const ObjectAddress *object)
                         appendStringInfoString(&buffer, "subscription");
                         break;
  
+               case OCLASS_STATISTIC_EXT:
+                       appendStringInfoString(&buffer, "statistics");
+                       break;
+
                 default:
                         appendStringInfo(&buffer, "unrecognized %u", object->classId);
                         break;
@@ -4880,6 +4912,29 @@ getObjectIdentityParts(const ObjectAddress *object,
                                 break;
                         }
  
+               case OCLASS_STATISTIC_EXT:
+                       {
+                               HeapTuple       tup;
+                               Form_pg_statistic_ext formStatistic;
+                               char       *schema;
+
+                               tup = SearchSysCache1(STATEXTOID,
+                                                                         ObjectIdGetDatum(object->objectId));
+                               if (!HeapTupleIsValid(tup))
+                                       elog(ERROR, "cache lookup failed for statistics %u",
+                                                object->objectId);
+                               formStatistic = (Form_pg_statistic_ext) GETSTRUCT(tup);
+                               schema = get_namespace_name_or_temp(formStatistic->stanamespace);
+                               appendStringInfoString(&buffer,
+                                                                          quote_qualified_identifier(schema,
+                                                                                  NameStr(formStatistic->staname)));
+                               if (objname)
+                                       *objname = list_make2(schema,
+                                                                  pstrdup(NameStr(formStatistic->staname)));
+                               ReleaseSysCache(tup);
+                       }
+                       break;
+
                 default:
                         appendStringInfo(&buffer, "unrecognized object %u %u %d",
                                                          object->classId,
diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c

index 8d946ff44ccb341172fe4fa562a49ec46382f46a..d28a8afb47dac78739e4a3649e8957bb82b35ed8 100644 (file)
--- a/src/backend/catalog/pg_shdepend.c
+++ b/src/backend/catalog/pg_shdepend.c
@@ -39,6 +39,7 @@
  #include "catalog/pg_opfamily.h"
  #include "catalog/pg_proc.h"
  #include "catalog/pg_shdepend.h"
+#include "catalog/pg_statistic_ext.h"
  #include "catalog/pg_subscription.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_ts_config.h"
@@ -1415,6 +1416,7 @@ shdepReassignOwned(List *roleids, Oid newrole)
                                 case OperatorFamilyRelationId:
                                 case OperatorClassRelationId:
                                 case ExtensionRelationId:
+                               case StatisticExtRelationId:
                                 case TableSpaceRelationId:
                                 case DatabaseRelationId:
                                 case TSConfigRelationId:
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql

index 80d14296de22bac8073bdcb00ec1832dbebf38de..b41882aa5210cda567f85f6db520008ca97b59fb 100644 (file)
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -186,6 +186,16 @@ CREATE OR REPLACE VIEW pg_sequences AS
      WHERE NOT pg_is_other_temp_schema(N.oid)
            AND relkind = 'S';
  
+CREATE VIEW pg_stats_ext AS
+    SELECT
+        N.nspname AS schemaname,
+        C.relname AS tablename,
+        S.staname AS staname,
+        S.stakeys AS attnums,
+        length(s.standistinct) AS ndistbytes
+    FROM (pg_statistic_ext S JOIN pg_class C ON (C.oid = S.starelid))
+        LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
+
  CREATE VIEW pg_stats WITH (security_barrier) AS
      SELECT
          nspname AS schemaname,
diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile

index e0fab38cbe19af855403be312d5495165d9f0533..4a6c99e09081a533f099d5f175f0806048b6ae9c 100644 (file)
--- a/src/backend/commands/Makefile
+++ b/src/backend/commands/Makefile
@@ -18,8 +18,8 @@ OBJS = amcmds.o aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \
         event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \
         indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \
         policy.o portalcmds.o prepare.o proclang.o publicationcmds.o \
-       schemacmds.o seclabel.o sequence.o subscriptioncmds.o tablecmds.o \
-       tablespace.o trigger.o tsearchcmds.o typecmds.o user.o vacuum.o \
-       vacuumlazy.o variable.o view.o
+       schemacmds.o seclabel.o sequence.o statscmds.o subscriptioncmds.o \
+       tablecmds.o tablespace.o trigger.o tsearchcmds.o typecmds.o user.o \
+       vacuum.o vacuumlazy.o variable.o view.o
  
  include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c

index cf1391c2e6ba31f722835a0ed0bebd54a0cafd2c..2c6435b75980482671752142d03727c9a0d0e883 100644 (file)
--- a/src/backend/commands/alter.c
+++ b/src/backend/commands/alter.c
@@ -33,6 +33,7 @@
  #include "catalog/pg_opfamily.h"
  #include "catalog/pg_proc.h"
  #include "catalog/pg_subscription.h"
+#include "catalog/pg_statistic_ext.h"
  #include "catalog/pg_ts_config.h"
  #include "catalog/pg_ts_dict.h"
  #include "catalog/pg_ts_parser.h"
@@ -120,6 +121,10 @@ report_namespace_conflict(Oid classId, const char *name, Oid nspOid)
                         Assert(OidIsValid(nspOid));
                         msgfmt = gettext_noop("conversion \"%s\" already exists in schema \"%s\"");
                         break;
+               case StatisticExtRelationId:
+                       Assert(OidIsValid(nspOid));
+                       msgfmt = gettext_noop("statistics \"%s\" already exists in schema \"%s\"");
+                       break;
                 case TSParserRelationId:
                         Assert(OidIsValid(nspOid));
                         msgfmt = gettext_noop("text search parser \"%s\" already exists in schema \"%s\"");
@@ -373,6 +378,7 @@ ExecRenameStmt(RenameStmt *stmt)
                 case OBJECT_OPCLASS:
                 case OBJECT_OPFAMILY:
                 case OBJECT_LANGUAGE:
+               case OBJECT_STATISTIC_EXT:
                 case OBJECT_TSCONFIGURATION:
                 case OBJECT_TSDICTIONARY:
                 case OBJECT_TSPARSER:
@@ -489,6 +495,7 @@ ExecAlterObjectSchemaStmt(AlterObjectSchemaStmt *stmt,
                 case OBJECT_OPERATOR:
                 case OBJECT_OPCLASS:
                 case OBJECT_OPFAMILY:
+               case OBJECT_STATISTIC_EXT:
                 case OBJECT_TSCONFIGURATION:
                 case OBJECT_TSDICTIONARY:
                 case OBJECT_TSPARSER:
@@ -803,6 +810,7 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt)
                 case OBJECT_OPERATOR:
                 case OBJECT_OPCLASS:
                 case OBJECT_OPFAMILY:
+               case OBJECT_STATISTIC_EXT:
                 case OBJECT_TABLESPACE:
                 case OBJECT_TSDICTIONARY:
                 case OBJECT_TSCONFIGURATION:
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 055338fdff2c623ba56a02c6c020a973bc203918..c5b5c54babf0994567ad6bf729f49bf6ba888d52 100644 (file)
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -17,6 +17,7 @@
  #include <math.h>
  
  #include "access/multixact.h"
+#include "access/sysattr.h"
  #include "access/transam.h"
  #include "access/tupconvert.h"
  #include "access/tuptoaster.h"
@@ -28,6 +29,7 @@
  #include "catalog/pg_collation.h"
  #include "catalog/pg_inherits_fn.h"
  #include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
  #include "commands/dbcommands.h"
  #include "commands/tablecmds.h"
  #include "commands/vacuum.h"
@@ -39,13 +41,17 @@
  #include "parser/parse_relation.h"
  #include "pgstat.h"
  #include "postmaster/autovacuum.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
  #include "storage/bufmgr.h"
  #include "storage/lmgr.h"
  #include "storage/proc.h"
  #include "storage/procarray.h"
  #include "utils/acl.h"
  #include "utils/attoptcache.h"
+#include "utils/builtins.h"
  #include "utils/datum.h"
+#include "utils/fmgroids.h"
  #include "utils/guc.h"
  #include "utils/lsyscache.h"
  #include "utils/memutils.h"
@@ -566,6 +572,10 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
                         update_attstats(RelationGetRelid(Irel[ind]), false,
                                                         thisdata->attr_cnt, thisdata->vacattrstats);
                 }
+
+               /* Build extended statistics (if there are any). */
+               BuildRelationExtStatistics(onerel, totalrows, numrows, rows, attr_cnt,
+                                                                  vacattrstats);
         }
  
         /*
@@ -1681,19 +1691,6 @@ ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
  /*
   * Extra information used by the default analysis routines
   */
-typedef struct
-{
-       Oid                     eqopr;                  /* '=' operator for datatype, if any */
-       Oid                     eqfunc;                 /* and associated function */
-       Oid                     ltopr;                  /* '<' operator for datatype, if any */
-} StdAnalyzeData;
-
-typedef struct
-{
-       Datum           value;                  /* a data value */
-       int                     tupno;                  /* position index for tuple it came from */
-} ScalarItem;
-
  typedef struct
  {
         int                     count;                  /* # of duplicates */
diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c

index ab73fbf961be2469107b71341cf3078d2f08df0b..cb948f0204d5723b1c9e4b097fd36a2b7e072e28 100644 (file)
--- a/src/backend/commands/dropcmds.c
+++ b/src/backend/commands/dropcmds.c
@@ -286,6 +286,13 @@ does_not_exist_skipping(ObjectType objtype, Node *object)
                         msg = gettext_noop("schema \"%s\" does not exist, skipping");
                         name = strVal((Value *) object);
                         break;
+               case OBJECT_STATISTIC_EXT:
+                       if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+                       {
+                               msg = gettext_noop("extended statistics \"%s\" do not exist, skipping");
+                               name = NameListToString(castNode(List, object));
+                       }
+                       break;
                 case OBJECT_TSPARSER:
                         if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
                         {
diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c

index 346b347ae17654e9c4e5155f5d1a2e6a294d9a7e..7366fc74bec757ab727ffc81e0c8857ca831c333 100644 (file)
--- a/src/backend/commands/event_trigger.c
+++ b/src/backend/commands/event_trigger.c
@@ -112,6 +112,7 @@ static event_trigger_support_data event_trigger_support[] = {
         {"SCHEMA", true},
         {"SEQUENCE", true},
         {"SERVER", true},
+       {"STATISTICS", true},
         {"SUBSCRIPTION", true},
         {"TABLE", true},
         {"TABLESPACE", false},
@@ -1108,6 +1109,7 @@ EventTriggerSupportsObjectType(ObjectType obtype)
                 case OBJECT_SCHEMA:
                 case OBJECT_SEQUENCE:
                 case OBJECT_SUBSCRIPTION:
+               case OBJECT_STATISTIC_EXT:
                 case OBJECT_TABCONSTRAINT:
                 case OBJECT_TABLE:
                 case OBJECT_TRANSFORM:
@@ -1173,6 +1175,7 @@ EventTriggerSupportsObjectClass(ObjectClass objclass)
                 case OCLASS_PUBLICATION:
                 case OCLASS_PUBLICATION_REL:
                 case OCLASS_SUBSCRIPTION:
+               case OCLASS_STATISTIC_EXT:
                         return true;
         }
  
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c

new file mode 100644 (file)

index 0000000..4163091
--- /dev/null
+++ b/src/backend/commands/statscmds.c
@@ -0,0 +1,296 @@
+/*-------------------------------------------------------------------------
+ *
+ * statscmds.c
+ *       Commands for creating and altering extended statistics
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *       src/backend/commands/statscmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/inval.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+
+/* used for sorting the attnums in CreateStatistics */
+static int
+compare_int16(const void *a, const void *b)
+{
+       return memcmp(a, b, sizeof(int16));
+}
+
+/*
+ *             CREATE STATISTICS
+ */
+ObjectAddress
+CreateStatistics(CreateStatsStmt *stmt)
+{
+       int                     i;
+       ListCell   *l;
+       int16           attnums[STATS_MAX_DIMENSIONS];
+       int                     numcols = 0;
+       ObjectAddress address = InvalidObjectAddress;
+       char       *namestr;
+       NameData        staname;
+       Oid                     statoid;
+       Oid                     namespaceId;
+       HeapTuple       htup;
+       Datum           values[Natts_pg_statistic_ext];
+       bool            nulls[Natts_pg_statistic_ext];
+       int2vector *stakeys;
+       Relation        statrel;
+       Relation        rel;
+       Oid                     relid;
+       ObjectAddress parentobject,
+                               childobject;
+       Datum           types[1];               /* only ndistinct defined now */
+       int                     ntypes;
+       ArrayType  *staenabled;
+       bool            build_ndistinct;
+       bool            requested_type = false;
+
+       Assert(IsA(stmt, CreateStatsStmt));
+
+       /* resolve the pieces of the name (namespace etc.) */
+       namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr);
+       namestrcpy(&staname, namestr);
+
+       /*
+        * If if_not_exists was given and the statistics already exists, bail out.
+        */
+       if (SearchSysCacheExists2(STATEXTNAMENSP,
+                                                         PointerGetDatum(&staname),
+                                                         ObjectIdGetDatum(namespaceId)))
+       {
+               if (stmt->if_not_exists)
+               {
+                       ereport(NOTICE,
+                                       (errcode(ERRCODE_DUPLICATE_OBJECT),
+                                        errmsg("statistics \"%s\" already exist, skipping",
+                                                       namestr)));
+                       return InvalidObjectAddress;
+               }
+
+               ereport(ERROR,
+                               (errcode(ERRCODE_DUPLICATE_OBJECT),
+                                errmsg("statistics \"%s\" already exist", namestr)));
+       }
+
+       rel = heap_openrv(stmt->relation, AccessExclusiveLock);
+       relid = RelationGetRelid(rel);
+
+       if (rel->rd_rel->relkind != RELKIND_RELATION &&
+               rel->rd_rel->relkind != RELKIND_MATVIEW)
+               ereport(ERROR,
+                               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                errmsg("relation \"%s\" is not a table or materialized view",
+                                               RelationGetRelationName(rel))));
+
+       /*
+        * Transform column names to array of attnums. While at it, enforce some
+        * constraints.
+        */
+       foreach(l, stmt->keys)
+       {
+               char       *attname = strVal(lfirst(l));
+               HeapTuple       atttuple;
+               Form_pg_attribute attForm;
+               TypeCacheEntry *type;
+
+               atttuple = SearchSysCacheAttName(relid, attname);
+               if (!HeapTupleIsValid(atttuple))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_UNDEFINED_COLUMN),
+                         errmsg("column \"%s\" referenced in statistics does not exist",
+                                        attname)));
+               attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+               /* Disallow use of system attributes in extended stats */
+               if (attForm->attnum < 0)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("statistic creation on system columns is not supported")));
+
+               /* Disallow data types without a less-than operator */
+               type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
+               if (type->lt_opr == InvalidOid)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("only scalar types can be used in extended statistics")));
+
+               /* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
+               if (numcols >= STATS_MAX_DIMENSIONS)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_TOO_MANY_COLUMNS),
+                                        errmsg("cannot have more than %d keys in statistics",
+                                                       STATS_MAX_DIMENSIONS)));
+
+               attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum;
+               ReleaseSysCache(atttuple);
+               numcols++;
+       }
+
+       /*
+        * Check that at least two columns were specified in the statement. The
+        * upper bound was already checked in the loop above.
+        */
+       if (numcols < 2)
+               ereport(ERROR,
+                               (errcode(ERRCODE_TOO_MANY_COLUMNS),
+                                errmsg("statistics require at least 2 columns")));
+
+       /*
+        * Sort the attnums, which makes detecting duplicies somewhat easier, and
+        * it does not hurt (it does not affect the efficiency, unlike for
+        * indexes, for example).
+        */
+       qsort(attnums, numcols, sizeof(int16), compare_int16);
+
+       /*
+        * Look for duplicities in the list of columns. The attnums are sorted so
+        * just check consecutive elements.
+        */
+       for (i = 1; i < numcols; i++)
+               if (attnums[i] == attnums[i - 1])
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_UNDEFINED_COLUMN),
+                                 errmsg("duplicate column name in statistics definition")));
+
+       stakeys = buildint2vector(attnums, numcols);
+
+       /*
+        * Parse the statistics options.  Currently only statistics types are
+        * recognized.
+        */
+       build_ndistinct = false;
+       foreach(l, stmt->options)
+       {
+               DefElem    *opt = (DefElem *) lfirst(l);
+
+               if (strcmp(opt->defname, "ndistinct") == 0)
+               {
+                       build_ndistinct = defGetBoolean(opt);
+                       requested_type = true;
+               }
+               else
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_SYNTAX_ERROR),
+                                        errmsg("unrecognized STATISTICS option \"%s\"",
+                                                       opt->defname)));
+       }
+       /* If no statistic type was specified, build them all. */
+       if (!requested_type)
+               build_ndistinct = true;
+
+       /* construct the char array of enabled statistic types */
+       ntypes = 0;
+       if (build_ndistinct)
+               types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
+       Assert(ntypes > 0);
+       staenabled = construct_array(types, ntypes, CHAROID, 1, true, 'c');
+
+       /*
+        * Everything seems fine, so let's build the pg_statistic_ext tuple.
+        */
+       memset(values, 0, sizeof(values));
+       memset(nulls, false, sizeof(nulls));
+       values[Anum_pg_statistic_ext_starelid - 1] = ObjectIdGetDatum(relid);
+       values[Anum_pg_statistic_ext_staname - 1] = NameGetDatum(&staname);
+       values[Anum_pg_statistic_ext_stanamespace - 1] = ObjectIdGetDatum(namespaceId);
+       values[Anum_pg_statistic_ext_staowner - 1] = ObjectIdGetDatum(GetUserId());
+       values[Anum_pg_statistic_ext_stakeys - 1] = PointerGetDatum(stakeys);
+       values[Anum_pg_statistic_ext_staenabled - 1] = PointerGetDatum(staenabled);
+
+       /* no statistics build yet */
+       nulls[Anum_pg_statistic_ext_standistinct - 1] = true;
+
+       /* insert it into pg_statistic_ext */
+       statrel = heap_open(StatisticExtRelationId, RowExclusiveLock);
+       htup = heap_form_tuple(statrel->rd_att, values, nulls);
+       CatalogTupleInsert(statrel, htup);
+       statoid = HeapTupleGetOid(htup);
+       heap_freetuple(htup);
+       heap_close(statrel, RowExclusiveLock);
+       relation_close(rel, NoLock);
+
+       /*
+        * Add a dependency on a table, so that stats get dropped on DROP TABLE.
+        */
+       ObjectAddressSet(parentobject, RelationRelationId, relid);
+       ObjectAddressSet(childobject, StatisticExtRelationId, statoid);
+       recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
+
+       /*
+        * Also add dependency on the schema.  This is required to ensure that we
+        * drop the statistics on DROP SCHEMA.  This is not handled automatically
+        * by DROP TABLE because the statistics are not an object in the table's
+        * schema.
+        */
+       ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
+       recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
+
+       ObjectAddressSet(address, StatisticExtRelationId, statoid);
+
+       /*
+        * Invalidate relcache so that others see the new statistics.
+        */
+       CacheInvalidateRelcache(rel);
+
+       return address;
+}
+
+/*
+ * Guts of statistics deletion.
+ */
+void
+RemoveStatisticsById(Oid statsOid)
+{
+       Relation        relation;
+       Oid                     relid;
+       Relation        rel;
+       HeapTuple       tup;
+       Form_pg_statistic_ext statext;
+
+       /*
+        * Delete the pg_statistic_ext tuple.
+        */
+       relation = heap_open(StatisticExtRelationId, RowExclusiveLock);
+
+       tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
+
+       if (!HeapTupleIsValid(tup)) /* should not happen */
+               elog(ERROR, "cache lookup failed for statistics %u", statsOid);
+
+       statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
+       relid = statext->starelid;
+
+       rel = heap_open(relid, AccessExclusiveLock);
+
+       simple_heap_delete(relation, &tup->t_self);
+
+       CacheInvalidateRelcache(rel);
+
+       ReleaseSysCache(tup);
+
+       heap_close(relation, RowExclusiveLock);
+       heap_close(rel, NoLock);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index 93bda42715310b260b741766ffad51efb2e790f0..c23d5c52851eea5bf29bb194709b19973d034bdf 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -3337,6 +3337,20 @@ _copyIndexStmt(const IndexStmt *from)
         return newnode;
  }
  
+static CreateStatsStmt *
+_copyCreateStatsStmt(const CreateStatsStmt *from)
+{
+       CreateStatsStmt *newnode = makeNode(CreateStatsStmt);
+
+       COPY_NODE_FIELD(defnames);
+       COPY_NODE_FIELD(relation);
+       COPY_NODE_FIELD(keys);
+       COPY_NODE_FIELD(options);
+       COPY_SCALAR_FIELD(if_not_exists);
+
+       return newnode;
+}
+
  static CreateFunctionStmt *
  _copyCreateFunctionStmt(const CreateFunctionStmt *from)
  {
@@ -5050,6 +5064,9 @@ copyObject(const void *from)
                 case T_IndexStmt:
                         retval = _copyIndexStmt(from);
                         break;
+               case T_CreateStatsStmt:
+                       retval = _copyCreateStatsStmt(from);
+                       break;
                 case T_CreateFunctionStmt:
                         retval = _copyCreateFunctionStmt(from);
                         break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 0d12636d92cf86a2255af7bb4b890a45a15d238f..5941b7a2bfbe4c18426d8a35de11a1c240150d32 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -1334,6 +1334,18 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b)
         return true;
  }
  
+static bool
+_equalCreateStatsStmt(const CreateStatsStmt *a, const CreateStatsStmt *b)
+{
+       COMPARE_NODE_FIELD(defnames);
+       COMPARE_NODE_FIELD(relation);
+       COMPARE_NODE_FIELD(keys);
+       COMPARE_NODE_FIELD(options);
+       COMPARE_SCALAR_FIELD(if_not_exists);
+
+       return true;
+}
+
  static bool
  _equalCreateFunctionStmt(const CreateFunctionStmt *a, const CreateFunctionStmt *b)
  {
@@ -3236,6 +3248,9 @@ equal(const void *a, const void *b)
                 case T_IndexStmt:
                         retval = _equalIndexStmt(a, b);
                         break;
+               case T_CreateStatsStmt:
+                       retval = _equalCreateStatsStmt(a, b);
+                       break;
                 case T_CreateFunctionStmt:
                         retval = _equalCreateFunctionStmt(a, b);
                         break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 1b9005fa537c985c4e112dc581dcfc9940525500..541af029353382d50b19884a178fbe74ad76dcc2 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2202,6 +2202,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
         WRITE_NODE_FIELD(lateral_vars);
         WRITE_BITMAPSET_FIELD(lateral_referencers);
         WRITE_NODE_FIELD(indexlist);
+       WRITE_NODE_FIELD(statlist);
         WRITE_UINT_FIELD(pages);
         WRITE_FLOAT_FIELD(tuples, "%.0f");
         WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
@@ -2274,6 +2275,18 @@ _outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node)
                 appendStringInfo(str, " %d", list_length(node->rinfos[i]));
  }
  
+static void
+_outStatisticExtInfo(StringInfo str, const StatisticExtInfo *node)
+{
+       WRITE_NODE_TYPE("STATISTICEXTINFO");
+
+       /* NB: this isn't a complete set of fields */
+       WRITE_OID_FIELD(statOid);
+       /* don't write rel, leads to infinite recursion in plan tree dump */
+       WRITE_CHAR_FIELD(kind);
+       WRITE_BITMAPSET_FIELD(keys);
+}
+
  static void
  _outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
  {
@@ -2577,6 +2590,18 @@ _outIndexStmt(StringInfo str, const IndexStmt *node)
         WRITE_BOOL_FIELD(if_not_exists);
  }
  
+static void
+_outCreateStatsStmt(StringInfo str, const CreateStatsStmt *node)
+{
+       WRITE_NODE_TYPE("CREATESTATSSTMT");
+
+       WRITE_NODE_FIELD(defnames);
+       WRITE_NODE_FIELD(relation);
+       WRITE_NODE_FIELD(keys);
+       WRITE_NODE_FIELD(options);
+       WRITE_BOOL_FIELD(if_not_exists);
+}
+
  static void
  _outNotifyStmt(StringInfo str, const NotifyStmt *node)
  {
@@ -3936,6 +3961,9 @@ outNode(StringInfo str, const void *obj)
                         case T_PlannerParamItem:
                                 _outPlannerParamItem(str, obj);
                                 break;
+                       case T_StatisticExtInfo:
+                               _outStatisticExtInfo(str, obj);
+                               break;
  
                         case T_ExtensibleNode:
                                 _outExtensibleNode(str, obj);
@@ -3953,6 +3981,9 @@ outNode(StringInfo str, const void *obj)
                         case T_IndexStmt:
                                 _outIndexStmt(str, obj);
                                 break;
+                       case T_CreateStatsStmt:
+                               _outCreateStatsStmt(str, obj);
+                               break;
                         case T_NotifyStmt:
                                 _outNotifyStmt(str, obj);
                                 break;
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c

index 463f806467827f77181c69eca5bf5604edb72d38..cc88dcc28e4a837ed6e6e075ae2bc1f00a9d26a7 100644 (file)
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -29,6 +29,7 @@
  #include "catalog/heap.h"
  #include "catalog/partition.h"
  #include "catalog/pg_am.h"
+#include "catalog/pg_statistic_ext.h"
  #include "foreign/fdwapi.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
@@ -40,8 +41,11 @@
  #include "parser/parse_relation.h"
  #include "parser/parsetree.h"
  #include "rewrite/rewriteManip.h"
+#include "statistics/statistics.h"
  #include "storage/bufmgr.h"
+#include "utils/builtins.h"
  #include "utils/lsyscache.h"
+#include "utils/syscache.h"
  #include "utils/rel.h"
  #include "utils/snapmgr.h"
  
@@ -63,7 +67,7 @@ static List *get_relation_constraints(PlannerInfo *root,
                                                  bool include_notnull);
  static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
                                   Relation heapRelation);
-
+static List *get_relation_statistics(RelOptInfo *rel, Relation relation);
  
  /*
   * get_relation_info -
@@ -398,6 +402,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
  
         rel->indexlist = indexinfos;
  
+       rel->statlist = get_relation_statistics(rel, relation);
+
         /* Grab foreign-table info using the relcache, while we have it */
         if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
         {
@@ -1251,6 +1257,65 @@ get_relation_constraints(PlannerInfo *root,
         return result;
  }
  
+/*
+ * get_relation_statistics
+ *             Retrieve extended statistics defined on the table.
+ *
+ * Returns a List (possibly empty) of StatisticExtInfo objects describing
+ * the statistics.  Note that this doesn't load the actual statistics data,
+ * just the identifying metadata.  Only stats actually built are considered.
+ */
+static List *
+get_relation_statistics(RelOptInfo *rel, Relation relation)
+{
+       List       *statoidlist;
+       List       *stainfos = NIL;
+       ListCell   *l;
+
+       statoidlist = RelationGetStatExtList(relation);
+
+       foreach(l, statoidlist)
+       {
+               Oid                     statOid = lfirst_oid(l);
+               Form_pg_statistic_ext staForm;
+               HeapTuple       htup;
+               Bitmapset  *keys = NULL;
+               int                     i;
+
+               htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
+               if (!htup)
+                       elog(ERROR, "cache lookup failed for statistics %u", statOid);
+               staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
+
+               /*
+                * First, build the array of columns covered.  This is ultimately
+                * wasted if no stats are actually built, but it doesn't seem worth
+                * troubling over that case.
+                */
+               for (i = 0; i < staForm->stakeys.dim1; i++)
+                       keys = bms_add_member(keys, staForm->stakeys.values[i]);
+
+               /* add one StatisticExtInfo for each kind built */
+               if (statext_is_kind_built(htup, STATS_EXT_NDISTINCT))
+               {
+                       StatisticExtInfo *info = makeNode(StatisticExtInfo);
+
+                       info->statOid = statOid;
+                       info->rel = rel;
+                       info->kind = STATS_EXT_NDISTINCT;
+                       info->keys = bms_copy(keys);
+
+                       stainfos = lcons(info, stainfos);
+               }
+
+               ReleaseSysCache(htup);
+               bms_free(keys);
+       }
+
+       list_free(statoidlist);
+
+       return stainfos;
+}
  
  /*
   * relation_excluded_by_constraints
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index 82844a0399d7ad6da9fd7df51cb32877eef9d2a4..bbcfc1fb4fdb50791efbf8f3b353b4dc16923065 100644 (file)
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -257,7 +257,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
                 ConstraintsSetStmt CopyStmt CreateAsStmt CreateCastStmt
                 CreateDomainStmt CreateExtensionStmt CreateGroupStmt CreateOpClassStmt
                 CreateOpFamilyStmt AlterOpFamilyStmt CreatePLangStmt
-               CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt
+               CreateSchemaStmt CreateSeqStmt CreateStmt CreateStatsStmt CreateTableSpaceStmt
                 CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt
                 CreateAssertStmt CreateTransformStmt CreateTrigStmt CreateEventTrigStmt
                 CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatePolicyStmt
@@ -874,6 +874,7 @@ stmt :
                         | CreateSeqStmt
                         | CreateStmt
                         | CreateSubscriptionStmt
+                       | CreateStatsStmt
                         | CreateTableSpaceStmt
                         | CreateTransformStmt
                         | CreateTrigStmt
@@ -3747,6 +3748,35 @@ OptConsTableSpace:   USING INDEX TABLESPACE name { $$ = $4; }
  ExistingIndex:   USING INDEX index_name                                { $$ = $3; }
                 ;
  
+/*****************************************************************************
+ *
+ *             QUERY :
+ *                             CREATE STATISTICS stats_name WITH (options) ON (columns) FROM relname
+ *
+ *****************************************************************************/
+
+
+CreateStatsStmt:       CREATE STATISTICS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name
+                                               {
+                                                       CreateStatsStmt *n = makeNode(CreateStatsStmt);
+                                                       n->defnames = $3;
+                                                       n->relation = $10;
+                                                       n->keys = $7;
+                                                       n->options = $4;
+                                                       n->if_not_exists = false;
+                                                       $$ = (Node *)n;
+                                               }
+                                       | CREATE STATISTICS IF_P NOT EXISTS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name
+                                               {
+                                                       CreateStatsStmt *n = makeNode(CreateStatsStmt);
+                                                       n->defnames = $6;
+                                                       n->relation = $13;
+                                                       n->keys = $10;
+                                                       n->options = $7;
+                                                       n->if_not_exists = true;
+                                                       $$ = (Node *)n;
+                                               }
+                       ;
  
  /*****************************************************************************
   *
@@ -6042,6 +6072,7 @@ drop_type_any_name:
                         | FOREIGN TABLE                                                 { $$ = OBJECT_FOREIGN_TABLE; }
                         | COLLATION                                                             { $$ = OBJECT_COLLATION; }
                         | CONVERSION_P                                                  { $$ = OBJECT_CONVERSION; }
+                       | STATISTICS                                                    { $$ = OBJECT_STATISTIC_EXT; }
                         | TEXT_P SEARCH PARSER                                  { $$ = OBJECT_TSPARSER; }
                         | TEXT_P SEARCH DICTIONARY                              { $$ = OBJECT_TSDICTIONARY; }
                         | TEXT_P SEARCH TEMPLATE                                { $$ = OBJECT_TSTEMPLATE; }
@@ -6119,7 +6150,7 @@ opt_restart_seqs:
   *                 EXTENSION | EVENT TRIGGER | FOREIGN DATA WRAPPER |
   *                 FOREIGN TABLE | INDEX | [PROCEDURAL] LANGUAGE |
   *                 MATERIALIZED VIEW | POLICY | ROLE | SCHEMA | SEQUENCE |
- *                 SERVER | TABLE | TABLESPACE |
+ *                 SERVER | STATISTICS | TABLE | TABLESPACE |
   *                 TEXT SEARCH CONFIGURATION | TEXT SEARCH DICTIONARY |
   *                 TEXT SEARCH PARSER | TEXT SEARCH TEMPLATE | TYPE |
   *                 VIEW] <objname> |
@@ -6288,6 +6319,7 @@ comment_type_any_name:
                         COLUMN                                                          { $$ = OBJECT_COLUMN; }
                         | INDEX                                                         { $$ = OBJECT_INDEX; }
                         | SEQUENCE                                                      { $$ = OBJECT_SEQUENCE; }
+                       | STATISTICS                                            { $$ = OBJECT_STATISTIC_EXT; }
                         | TABLE                                                         { $$ = OBJECT_TABLE; }
                         | VIEW                                                          { $$ = OBJECT_VIEW; }
                         | MATERIALIZED VIEW                                     { $$ = OBJECT_MATVIEW; }
@@ -8428,6 +8460,15 @@ RenameStmt: ALTER AGGREGATE aggregate_with_argtypes RENAME TO name
                                         n->missing_ok = false;
                                         $$ = (Node *)n;
                                 }
+                       | ALTER STATISTICS any_name RENAME TO name
+                               {
+                                       RenameStmt *n = makeNode(RenameStmt);
+                                       n->renameType = OBJECT_STATISTIC_EXT;
+                                       n->object = (Node *) $3;
+                                       n->newname = $6;
+                                       n->missing_ok = false;
+                                       $$ = (Node *)n;
+                               }
                         | ALTER TEXT_P SEARCH PARSER any_name RENAME TO name
                                 {
                                         RenameStmt *n = makeNode(RenameStmt);
@@ -8643,6 +8684,15 @@ AlterObjectSchemaStmt:
                                         n->missing_ok = true;
                                         $$ = (Node *)n;
                                 }
+                       | ALTER STATISTICS any_name SET SCHEMA name
+                               {
+                                       AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt);
+                                       n->objectType = OBJECT_STATISTIC_EXT;
+                                       n->object = (Node *) $3;
+                                       n->newschema = $6;
+                                       n->missing_ok = false;
+                                       $$ = (Node *)n;
+                               }
                         | ALTER TEXT_P SEARCH PARSER any_name SET SCHEMA name
                                 {
                                         AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt);
@@ -8906,6 +8956,14 @@ AlterOwnerStmt: ALTER AGGREGATE aggregate_with_argtypes OWNER TO RoleSpec
                                         n->newowner = $6;
                                         $$ = (Node *)n;
                                 }
+                       | ALTER STATISTICS any_name OWNER TO RoleSpec
+                               {
+                                       AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
+                                       n->objectType = OBJECT_STATISTIC_EXT;
+                                       n->object = (Node *) $3;
+                                       n->newowner = $6;
+                                       $$ = (Node *)n;
+                               }
                         | ALTER TEXT_P SEARCH DICTIONARY any_name OWNER TO RoleSpec
                                 {
                                         AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
diff --git a/src/backend/statistics/Makefile b/src/backend/statistics/Makefile

new file mode 100644 (file)

index 0000000..b3615bd
--- /dev/null
+++ b/src/backend/statistics/Makefile
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for statistics
+#
+# IDENTIFICATION
+#    src/backend/statistics/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/statistics
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = extended_stats.o mvdistinct.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/statistics/README b/src/backend/statistics/README

new file mode 100644 (file)

index 0000000..beb7c24
--- /dev/null
+++ b/src/backend/statistics/README
@@ -0,0 +1,34 @@
+Extended statistics
+===================
+
+When estimating various quantities (e.g. condition selectivities) the default
+approach relies on the assumption of independence. In practice that's often
+not true, resulting in estimation errors.
+
+Extended statistics track different types of dependencies between the columns,
+hopefully improving the estimates and producing better plans.
+
+Currently we only have one type of extended statistics - ndistinct
+coefficients, and we use it to improve estimates of grouping queries. See
+README.ndistinct for details.
+
+
+Size of sample in ANALYZE
+-------------------------
+When performing ANALYZE, the number of rows to sample is determined as
+
+    (300 * statistics_target)
+
+That works reasonably well for statistics on individual columns, but perhaps
+it's not enough for extended statistics. Papers analyzing estimation errors
+all use samples proportional to the table (usually finding that 1-3% of the
+table is enough to build accurate stats).
+
+The requested accuracy (number of MCV items or histogram bins) should also
+be considered when determining the sample size, and in extended statistics
+those are not necessarily limited by statistics_target.
+
+This however merits further discussion, because collecting the sample is quite
+expensive and increasing it further would make ANALYZE even more painful.
+Judging by the experiments with the current implementation, the fixed size
+seems to work reasonably well for now, so we leave this as a future work.
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c

new file mode 100644 (file)

index 0000000..d2b9f6a
--- /dev/null
+++ b/src/backend/statistics/extended_stats.c
@@ -0,0 +1,389 @@
+/*-------------------------------------------------------------------------
+ *
+ * extended_stats.c
+ *       POSTGRES extended statistics
+ *
+ * Generic code supporting statistic objects created via CREATE STATISTICS.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       src/backend/statistics/extended_stats.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_statistic_ext.h"
+#include "nodes/relation.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+/*
+ * Used internally to refer to an individual pg_statistic_ext entry.
+ */
+typedef struct StatExtEntry
+{
+       Oid                     statOid;        /* OID of pg_statistic_ext entry */
+       Bitmapset  *columns;    /* attribute numbers covered by the statistics */
+       List       *types;              /* 'char' list of enabled statistic kinds */
+} StatExtEntry;
+
+
+static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
+static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+                                         int natts, VacAttrStats **vacattrstats);
+static void statext_store(Relation pg_stext, Oid relid,
+                         MVNDistinct *ndistinct,
+                         VacAttrStats **stats);
+
+
+/*
+ * Compute requested extended stats, using the rows sampled for the plain
+ * (single-column) stats.
+ *
+ * This fetches a list of stats from pg_statistic_ext, computes the stats
+ * and serializes them back into the catalog (as bytea values).
+ */
+void
+BuildRelationExtStatistics(Relation onerel, double totalrows,
+                                                  int numrows, HeapTuple *rows,
+                                                  int natts, VacAttrStats **vacattrstats)
+{
+       Relation        pg_stext;
+       ListCell   *lc;
+       List       *stats;
+
+       pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
+       stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
+
+       foreach(lc, stats)
+       {
+               StatExtEntry   *stat = (StatExtEntry *) lfirst(lc);
+               MVNDistinct        *ndistinct = NULL;
+               VacAttrStats  **stats;
+               ListCell           *lc2;
+
+               /* filter only the interesting vacattrstats records */
+               stats = lookup_var_attr_stats(onerel, stat->columns,
+                                                                         natts, vacattrstats);
+
+               /* check allowed number of dimensions */
+               Assert(bms_num_members(stat->columns) >= 2 &&
+                          bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
+
+               /* compute statistic of each type */
+               foreach(lc2, stat->types)
+               {
+                       char    t = (char) lfirst_int(lc2);
+
+                       if (t == STATS_EXT_NDISTINCT)
+                               ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
+                                                                                                       stat->columns, stats);
+               }
+
+               /* store the statistics in the catalog */
+               statext_store(pg_stext, stat->statOid, ndistinct, stats);
+       }
+
+       heap_close(pg_stext, RowExclusiveLock);
+}
+
+/*
+ * statext_is_kind_built
+ *             Is this stat kind built in the given pg_statistic_ext tuple?
+ */
+bool
+statext_is_kind_built(HeapTuple htup, char type)
+{
+       AttrNumber  attnum;
+
+       switch (type)
+       {
+               case STATS_EXT_NDISTINCT:
+                       attnum = Anum_pg_statistic_ext_standistinct;
+                       break;
+
+               default:
+                       elog(ERROR, "unexpected statistics type requested: %d", type);
+       }
+
+       return !heap_attisnull(htup, attnum);
+}
+
+/*
+ * Return a list (of StatExtEntry) of statistics for the given relation.
+ */
+static List *
+fetch_statentries_for_relation(Relation pg_statext, Oid relid)
+{
+       SysScanDesc scan;
+       ScanKeyData skey;
+       HeapTuple   htup;
+       List       *result = NIL;
+
+       /*
+        * Prepare to scan pg_statistic_ext for entries having indrelid = this
+        * rel.
+        */
+       ScanKeyInit(&skey,
+                               Anum_pg_statistic_ext_starelid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(relid));
+
+       scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
+                                                         NULL, 1, &skey);
+
+       while (HeapTupleIsValid(htup = systable_getnext(scan)))
+       {
+               StatExtEntry *entry;
+               Datum           datum;
+               bool            isnull;
+               int                     i;
+               ArrayType  *arr;
+               char       *enabled;
+               Form_pg_statistic_ext staForm;
+
+               entry = palloc0(sizeof(StatExtEntry));
+               entry->statOid = HeapTupleGetOid(htup);
+               staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
+               for (i = 0; i < staForm->stakeys.dim1; i++)
+               {
+                       entry->columns = bms_add_member(entry->columns,
+                                                                                       staForm->stakeys.values[i]);
+               }
+
+               /* decode the staenabled char array into a list of chars */
+               datum = SysCacheGetAttr(STATEXTOID, htup,
+                                                               Anum_pg_statistic_ext_staenabled, &isnull);
+               Assert(!isnull);
+               arr = DatumGetArrayTypeP(datum);
+               if (ARR_NDIM(arr) != 1 ||
+                       ARR_HASNULL(arr) ||
+                       ARR_ELEMTYPE(arr) != CHAROID)
+                       elog(ERROR, "staenabled is not a 1-D char array");
+               enabled = (char *) ARR_DATA_PTR(arr);
+               for (i = 0; i < ARR_DIMS(arr)[0]; i++)
+               {
+                       Assert(enabled[i] == STATS_EXT_NDISTINCT);
+                       entry->types = lappend_int(entry->types, (int) enabled[i]);
+               }
+
+               result = lappend(result, entry);
+       }
+
+       systable_endscan(scan);
+
+       return result;
+}
+
+/*
+ * Using 'vacattrstats' of size 'natts' as input data, return a newly built
+ * VacAttrStats array which includes only the items corresponding to attributes
+ * indicated by 'attrs'.
+ */
+static VacAttrStats **
+lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts,
+                                         VacAttrStats **vacattrstats)
+{
+       int                     i = 0;
+       int                     x = -1;
+       VacAttrStats **stats;
+       Bitmapset  *matched = NULL;
+
+       stats = (VacAttrStats **)
+               palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
+
+       /* lookup VacAttrStats info for the requested columns (same attnum) */
+       while ((x = bms_next_member(attrs, x)) >= 0)
+       {
+               int             j;
+
+               stats[i] = NULL;
+               for (j = 0; j < natts; j++)
+               {
+                       if (x == vacattrstats[j]->tupattnum)
+                       {
+                               stats[i] = vacattrstats[j];
+                               break;
+                       }
+               }
+
+               if (!stats[i])
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("extended statistics could not be collected for column \"%s\" of relation %s.%s",
+                                                       NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname),
+                                                       get_namespace_name(rel->rd_rel->relnamespace),
+                                                       RelationGetRelationName(rel)),
+                                        errhint("Consider ALTER TABLE \"%s\".\"%s\" ALTER \"%s\" SET STATISTICS -1",
+                                                        get_namespace_name(rel->rd_rel->relnamespace),
+                                                        RelationGetRelationName(rel),
+                                                        NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname))));
+
+               /*
+                * Check that we found a non-dropped column and that the attnum
+                * matches.
+                */
+               Assert(!stats[i]->attr->attisdropped);
+               matched = bms_add_member(matched, stats[i]->tupattnum);
+
+               i++;
+       }
+       if (bms_subset_compare(matched, attrs) != BMS_EQUAL)
+               elog(ERROR, "could not find all attributes in attribute stats array");
+       bms_free(matched);
+
+       return stats;
+}
+
+/*
+ * statext_store
+ *     Serializes the statistics and stores them into the pg_statistic_ext tuple.
+ */
+static void
+statext_store(Relation pg_stext, Oid statOid,
+                         MVNDistinct *ndistinct,
+                         VacAttrStats **stats)
+{
+       HeapTuple       stup,
+                               oldtup;
+       Datum           values[Natts_pg_statistic_ext];
+       bool            nulls[Natts_pg_statistic_ext];
+       bool            replaces[Natts_pg_statistic_ext];
+
+       memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool));
+       memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool));
+       memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum));
+
+       /*
+        * Construct a new pg_statistic_ext tuple, replacing the calculated stats.
+        */
+       if (ndistinct != NULL)
+       {
+               bytea      *data = statext_ndistinct_serialize(ndistinct);
+
+               nulls[Anum_pg_statistic_ext_standistinct - 1] = (data == NULL);
+               values[Anum_pg_statistic_ext_standistinct - 1] = PointerGetDatum(data);
+       }
+
+       /* always replace the value (either by bytea or NULL) */
+       replaces[Anum_pg_statistic_ext_standistinct - 1] = true;
+
+       /* there should already be a pg_statistic_ext tuple */
+       oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
+       if (!HeapTupleIsValid(oldtup))
+               elog(ERROR, "cache lookup failed for extended statistics %u", statOid);
+
+       /* replace it */
+       stup = heap_modify_tuple(oldtup,
+                                                        RelationGetDescr(pg_stext),
+                                                        values,
+                                                        nulls,
+                                                        replaces);
+       ReleaseSysCache(oldtup);
+       CatalogTupleUpdate(pg_stext, &stup->t_self, stup);
+
+       heap_freetuple(stup);
+}
+
+/* initialize multi-dimensional sort */
+MultiSortSupport
+multi_sort_init(int ndims)
+{
+       MultiSortSupport mss;
+
+       Assert(ndims >= 2);
+
+       mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
+                                                                        +sizeof(SortSupportData) * ndims);
+
+       mss->ndims = ndims;
+
+       return mss;
+}
+
+/*
+ * Prepare sort support info using the given sort operator
+ * at the position 'sortdim'
+ */
+void
+multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper)
+{
+       SortSupport             ssup = &mss->ssup[sortdim];
+
+       ssup->ssup_cxt = CurrentMemoryContext;
+       ssup->ssup_collation = DEFAULT_COLLATION_OID;
+       ssup->ssup_nulls_first = false;
+       ssup->ssup_cxt = CurrentMemoryContext;
+
+       PrepareSortSupportFromOrderingOp(oper, ssup);
+}
+
+/* compare all the dimensions in the selected order */
+int
+multi_sort_compare(const void *a, const void *b, void *arg)
+{
+       MultiSortSupport mss = (MultiSortSupport) arg;
+       SortItem   *ia = (SortItem *) a;
+       SortItem   *ib = (SortItem *) b;
+       int                     i;
+
+       for (i = 0; i < mss->ndims; i++)
+       {
+               int                     compare;
+
+               compare = ApplySortComparator(ia->values[i], ia->isnull[i],
+                                                                         ib->values[i], ib->isnull[i],
+                                                                         &mss->ssup[i]);
+
+               if (compare != 0)
+                       return compare;
+       }
+
+       /* equal by default */
+       return 0;
+}
+
+/* compare selected dimension */
+int
+multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
+                                          MultiSortSupport mss)
+{
+       return ApplySortComparator(a->values[dim], a->isnull[dim],
+                                                          b->values[dim], b->isnull[dim],
+                                                          &mss->ssup[dim]);
+}
+
+int
+multi_sort_compare_dims(int start, int end,
+                                               const SortItem *a, const SortItem *b,
+                                               MultiSortSupport mss)
+{
+       int                     dim;
+
+       for (dim = start; dim <= end; dim++)
+       {
+               int                     r = ApplySortComparator(a->values[dim], a->isnull[dim],
+                                                                                       b->values[dim], b->isnull[dim],
+                                                                                       &mss->ssup[dim]);
+
+               if (r != 0)
+                       return r;
+       }
+
+       return 0;
+}
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c

new file mode 100644 (file)

index 0000000..5df4e29
--- /dev/null
+++ b/src/backend/statistics/mvdistinct.c
@@ -0,0 +1,671 @@
+/*-------------------------------------------------------------------------
+ *
+ * mvdistinct.c
+ *       POSTGRES multivariate ndistinct coefficients
+ *
+ * Estimating number of groups in a combination of columns (e.g. for GROUP BY)
+ * is tricky, and the estimation error is often significant.
+
+ * The multivariate ndistinct coefficients address this by storing ndistinct
+ * estimates for combinations of the user-specified columns.  So for example
+ * given a statistics object on three columns (a,b,c), this module estimates
+ * and store n-distinct for (a,b), (a,c), (b,c) and (a,b,c).  The per-column
+ * estimates are already available in pg_statistic.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       src/backend/statistics/mvdistinct.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_statistic_ext.h"
+#include "utils/fmgrprotos.h"
+#include "utils/lsyscache.h"
+#include "lib/stringinfo.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+
+
+static double ndistinct_for_combination(double totalrows, int numrows,
+                                       HeapTuple *rows, VacAttrStats **stats,
+                                       int k, int *combination);
+static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
+static int     n_choose_k(int n, int k);
+static int     num_combinations(int n);
+
+/* Combination generator API */
+
+/* internal state for generator of k-combinations of n elements */
+typedef struct CombinationGenerator
+{
+       int             k;                              /* size of the combination */
+       int             n;                              /* total number of elements */
+       int             current;                /* index of the next combination to return */
+       int             ncombinations;  /* number of combinations (size of array) */
+       int        *combinations;       /* array of pre-built combinations */
+} CombinationGenerator;
+
+static CombinationGenerator *generator_init(int n, int k);
+static void generator_free(CombinationGenerator *state);
+static int *generator_next(CombinationGenerator *state);
+static void generate_combinations(CombinationGenerator *state);
+
+
+/*
+ * statext_ndistinct_build
+ *             Compute ndistinct coefficient for the combination of attributes.
+ *
+ * This computes the ndistinct estimate using the same estimator used
+ * in analyze.c and then computes the coefficient.
+ */
+MVNDistinct *
+statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
+                                               Bitmapset *attrs, VacAttrStats **stats)
+{
+       MVNDistinct *result;
+       int                     k;
+       int                     itemcnt;
+       int                     numattrs = bms_num_members(attrs);
+       int                     numcombs = num_combinations(numattrs);
+
+       result = palloc(offsetof(MVNDistinct, items) +
+                                       numcombs * sizeof(MVNDistinctItem));
+       result->magic = STATS_NDISTINCT_MAGIC;
+       result->type = STATS_NDISTINCT_TYPE_BASIC;
+       result->nitems = numcombs;
+
+       itemcnt = 0;
+       for (k = 2; k <= numattrs; k++)
+       {
+               int        *combination;
+               CombinationGenerator *generator;
+
+               /* generate combinations of K out of N elements */
+               generator = generator_init(numattrs, k);
+
+               while ((combination = generator_next(generator)))
+               {
+                       MVNDistinctItem *item = &result->items[itemcnt];
+                       int             j;
+
+                       item->attrs = NULL;
+                       for (j = 0; j < k; j++)
+                               item->attrs = bms_add_member(item->attrs,
+                                                                                        stats[combination[j]]->attr->attnum);
+                       item->ndistinct =
+                               ndistinct_for_combination(totalrows, numrows, rows,
+                                                                                 stats, k, combination);
+
+                       itemcnt++;
+                       Assert(itemcnt <= result->nitems);
+               }
+
+               generator_free(generator);
+       }
+
+       /* must consume exactly the whole output array */
+       Assert(itemcnt == result->nitems);
+
+       return result;
+}
+
+/*
+ * statext_ndistinct_load
+ *             Load the ndistinct value for the indicated pg_statistic_ext tuple
+ */
+MVNDistinct *
+statext_ndistinct_load(Oid mvoid)
+{
+       bool            isnull = false;
+       Datum           ndist;
+       HeapTuple       htup;
+
+       htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid));
+       if (!htup)
+               elog(ERROR, "cache lookup failed for statistics %u", mvoid);
+
+       ndist = SysCacheGetAttr(STATEXTOID, htup,
+                                                       Anum_pg_statistic_ext_standistinct, &isnull);
+       if (isnull)
+               elog(ERROR,
+                        "requested statistic kind %c not yet built for statistics %u",
+                        STATS_EXT_NDISTINCT, mvoid);
+
+       ReleaseSysCache(htup);
+
+       return statext_ndistinct_deserialize(DatumGetByteaP(ndist));
+}
+
+/*
+ * statext_ndistinct_serialize
+ *             serialize ndistinct to the on-disk bytea format
+ */
+bytea *
+statext_ndistinct_serialize(MVNDistinct *ndistinct)
+{
+       int                     i;
+       bytea      *output;
+       char       *tmp;
+       Size            len;
+
+       Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
+       Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
+
+       /*
+        * Base size is base struct size, plus one base struct for each items,
+        * including number of items for each.
+        */
+       len = VARHDRSZ + offsetof(MVNDistinct, items) +
+               ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + sizeof(int));
+
+       /* and also include space for the actual attribute numbers */
+       for (i = 0; i < ndistinct->nitems; i++)
+       {
+               int             nmembers;
+
+               nmembers = bms_num_members(ndistinct->items[i].attrs);
+               Assert(nmembers >= 2);
+               len += sizeof(AttrNumber) * nmembers;
+       }
+
+       output = (bytea *) palloc(len);
+       SET_VARSIZE(output, len);
+
+       tmp = VARDATA(output);
+
+       /* Store the base struct values */
+       memcpy(tmp, ndistinct, offsetof(MVNDistinct, items));
+       tmp += offsetof(MVNDistinct, items);
+
+       /*
+        * store number of attributes and attribute numbers for each ndistinct
+        * entry
+        */
+       for (i = 0; i < ndistinct->nitems; i++)
+       {
+               MVNDistinctItem item = ndistinct->items[i];
+               int             nmembers = bms_num_members(item.attrs);
+               int             x;
+
+               memcpy(tmp, &item.ndistinct, sizeof(double));
+               tmp += sizeof(double);
+               memcpy(tmp, &nmembers, sizeof(int));
+               tmp += sizeof(int);
+
+               x = -1;
+               while ((x = bms_next_member(item.attrs, x)) >= 0)
+               {
+                       AttrNumber      value = (AttrNumber) x;
+
+                       memcpy(tmp, &value, sizeof(AttrNumber));
+                       tmp += sizeof(AttrNumber);
+               }
+
+               Assert(tmp <= ((char *) output + len));
+       }
+
+       return output;
+}
+
+/*
+ * statext_ndistinct_deserialize
+ *             Read an on-disk bytea format MVNDistinct to in-memory format
+ */
+MVNDistinct *
+statext_ndistinct_deserialize(bytea *data)
+{
+       int                     i;
+       Size            expected_size;
+       MVNDistinct *ndistinct;
+       char       *tmp;
+
+       if (data == NULL)
+               return NULL;
+
+       if (VARSIZE_ANY_EXHDR(data) < offsetof(MVNDistinct, items))
+               elog(ERROR, "invalid MVNDistinct size %ld (expected at least %ld)",
+                        VARSIZE_ANY_EXHDR(data), offsetof(MVNDistinct, items));
+
+       /* read the MVNDistinct header */
+       ndistinct = (MVNDistinct *) palloc(sizeof(MVNDistinct));
+
+       /* initialize pointer to the data part (skip the varlena header) */
+       tmp = VARDATA_ANY(data);
+
+       /* get the header and perform basic sanity checks */
+       memcpy(ndistinct, tmp, offsetof(MVNDistinct, items));
+       tmp += offsetof(MVNDistinct, items);
+
+       if (ndistinct->magic != STATS_NDISTINCT_MAGIC)
+               elog(ERROR, "invalid ndistinct magic %d (expected %d)",
+                        ndistinct->magic, STATS_NDISTINCT_MAGIC);
+
+       if (ndistinct->type != STATS_NDISTINCT_TYPE_BASIC)
+               elog(ERROR, "invalid ndistinct type %d (expected %d)",
+                        ndistinct->type, STATS_NDISTINCT_TYPE_BASIC);
+
+       Assert(ndistinct->nitems > 0);
+
+       /* what minimum bytea size do we expect for those parameters */
+       expected_size = offsetof(MVNDistinct, items) +
+               ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) +
+                                                        sizeof(AttrNumber) * 2);
+
+       if (VARSIZE_ANY_EXHDR(data) < expected_size)
+               elog(ERROR, "invalid dependencies size %ld (expected at least %ld)",
+                        VARSIZE_ANY_EXHDR(data), expected_size);
+
+       /* allocate space for the ndistinct items */
+       ndistinct = repalloc(ndistinct, offsetof(MVNDistinct, items) +
+                                                (ndistinct->nitems * sizeof(MVNDistinctItem)));
+
+       for (i = 0; i < ndistinct->nitems; i++)
+       {
+               MVNDistinctItem *item = &ndistinct->items[i];
+               int                     nelems;
+
+               item->attrs = NULL;
+
+               /* ndistinct value */
+               memcpy(&item->ndistinct, tmp, sizeof(double));
+               tmp += sizeof(double);
+
+               /* number of attributes */
+               memcpy(&nelems, tmp, sizeof(int));
+               tmp += sizeof(int);
+               Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS));
+
+               while (nelems-- > 0)
+               {
+                       AttrNumber      attno;
+
+                       memcpy(&attno, tmp, sizeof(AttrNumber));
+                       tmp += sizeof(AttrNumber);
+                       item->attrs = bms_add_member(item->attrs, attno);
+               }
+
+               /* still within the bytea */
+               Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
+       }
+
+       /* we should have consumed the whole bytea exactly */
+       Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
+
+       return ndistinct;
+}
+
+/*
+ * pg_ndistinct_in
+ *             input routine for type pg_ndistinct
+ *
+ * pg_ndistinct is real enough to be a table column, but it has no
+ * operations of its own, and disallows input (jus like pg_node_tree).
+ */
+Datum
+pg_ndistinct_in(PG_FUNCTION_ARGS)
+{
+       ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("cannot accept a value of type %s", "pg_ndistinct")));
+
+       PG_RETURN_VOID();                       /* keep compiler quiet */
+}
+
+/*
+ * pg_ndistinct
+ *             output routine for type pg_ndistinct
+ *
+ * Produces a human-readable representation of the value.
+ */
+Datum
+pg_ndistinct_out(PG_FUNCTION_ARGS)
+{
+       bytea      *data = PG_GETARG_BYTEA_PP(0);
+       MVNDistinct *ndist = statext_ndistinct_deserialize(data);
+       int                     i;
+       StringInfoData str;
+
+       initStringInfo(&str);
+       appendStringInfoChar(&str, '[');
+
+       for (i = 0; i < ndist->nitems; i++)
+       {
+               MVNDistinctItem item = ndist->items[i];
+
+               if (i > 0)
+                       appendStringInfoString(&str, ", ");
+
+               appendStringInfoChar(&str, '{');
+               outBitmapset(&str, item.attrs);
+               appendStringInfo(&str, ", %f}", item.ndistinct);
+       }
+
+       appendStringInfoChar(&str, ']');
+
+       PG_RETURN_CSTRING(str.data);
+}
+
+/*
+ * pg_ndistinct_recv
+ *             binary input routine for type pg_ndistinct
+ */
+Datum
+pg_ndistinct_recv(PG_FUNCTION_ARGS)
+{
+       ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("cannot accept a value of type %s", "pg_ndistinct")));
+
+       PG_RETURN_VOID();                       /* keep compiler quiet */
+}
+
+/*
+ * pg_ndistinct_send
+ *             binary output routine for type pg_ndistinct
+ *
+ * n-distinct is serialized into a bytea value, so let's send that.
+ */
+Datum
+pg_ndistinct_send(PG_FUNCTION_ARGS)
+{
+       return byteasend(fcinfo);
+}
+
+/*
+ * ndistinct_for_combination
+ *             Estimates number of distinct values in a combination of columns.
+ *
+ * This uses the same ndistinct estimator as compute_scalar_stats() in
+ * ANALYZE, i.e.,
+ *             n*d / (n - f1 + f1*n/N)
+ *
+ * except that instead of values in a single column we are dealing with
+ * combination of multiple columns.
+ */
+static double
+ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
+                                                 VacAttrStats **stats, int k, int *combination)
+{
+       int                     i,
+                               j;
+       int                     f1,
+                               cnt,
+                               d;
+       bool       *isnull;
+       Datum      *values;
+       SortItem   *items;
+       MultiSortSupport mss;
+
+       mss = multi_sort_init(k);
+
+       /*
+        * In order to determine the number of distinct elements, create separate
+        * values[]/isnull[] arrays with all the data we have, then sort them
+        * using the specified column combination as dimensions.  We could try to
+        * sort in place, but it'd probably be more complex and bug-prone.
+        */
+       items = (SortItem *) palloc(numrows * sizeof(SortItem));
+       values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
+       isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
+
+       for (i = 0; i < numrows; i++)
+       {
+               items[i].values = &values[i * k];
+               items[i].isnull = &isnull[i * k];
+       }
+
+       /*
+        * For each dimension, set up sort-support and fill in the values from
+        * the sample data.
+        */
+       for (i = 0; i < k; i++)
+       {
+               VacAttrStats   *colstat = stats[combination[i]];
+               TypeCacheEntry *type;
+
+               type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
+               if (type->lt_opr == InvalidOid)         /* shouldn't happen */
+                       elog(ERROR, "cache lookup failed for ordering operator for type %u",
+                                colstat->attrtypid);
+
+               /* prepare the sort function for this dimension */
+               multi_sort_add_dimension(mss, i, type->lt_opr);
+
+               /* accumulate all the data for this dimension into the arrays */
+               for (j = 0; j < numrows; j++)
+               {
+                       items[j].values[i] =
+                               heap_getattr(rows[j],
+                                                        colstat->attr->attnum,
+                                                        colstat->tupDesc,
+                                                        &items[j].isnull[i]);
+               }
+       }
+
+       /* We can sort the array now ... */
+       qsort_arg((void *) items, numrows, sizeof(SortItem),
+                         multi_sort_compare, mss);
+
+       /* ... and count the number of distinct combinations */
+
+       f1 = 0;
+       cnt = 1;
+       d = 1;
+       for (i = 1; i < numrows; i++)
+       {
+               if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
+               {
+                       if (cnt == 1)
+                               f1 += 1;
+
+                       d++;
+                       cnt = 0;
+               }
+
+               cnt += 1;
+       }
+
+       if (cnt == 1)
+               f1 += 1;
+
+       return estimate_ndistinct(totalrows, numrows, d, f1);
+}
+
+/* The Duj1 estimator (already used in analyze.c). */
+static double
+estimate_ndistinct(double totalrows, int numrows, int d, int f1)
+{
+       double          numer,
+                               denom,
+                               ndistinct;
+
+       numer = (double) numrows * (double) d;
+
+       denom = (double) (numrows - f1) +
+               (double) f1 *(double) numrows / totalrows;
+
+       ndistinct = numer / denom;
+
+       /* Clamp to sane range in case of roundoff error */
+       if (ndistinct < (double) d)
+               ndistinct = (double) d;
+
+       if (ndistinct > totalrows)
+               ndistinct = totalrows;
+
+       return floor(ndistinct + 0.5);
+}
+
+/*
+ * n_choose_k
+ *             computes binomial coefficients using an algorithm that is both
+ *             efficient and prevents overflows
+ */
+static int
+n_choose_k(int n, int k)
+{
+       int                     d,
+                               r;
+
+       Assert((k > 0) && (n >= k));
+
+       /* use symmetry of the binomial coefficients */
+       k = Min(k, n - k);
+
+       r = 1;
+       for (d = 1; d <= k; ++d)
+       {
+               r *= n--;
+               r /= d;
+       }
+
+       return r;
+}
+
+/*
+ * num_combinations
+ *             number of combinations, excluding single-value combinations
+ */
+static int
+num_combinations(int n)
+{
+       int                     k;
+       int                     ncombs = 1;
+
+       for (k = 1; k <= n; k++)
+               ncombs *= 2;
+
+       ncombs -= (n + 1);
+
+       return ncombs;
+}
+
+/*
+ * generator_init
+ *             initialize the generator of combinations
+ *
+ * The generator produces combinations of K elements in the interval (0..N).
+ * We prebuild all the combinations in this method, which is simpler than
+ * generating them on the fly.
+ */
+static CombinationGenerator *
+generator_init(int n, int k)
+{
+       CombinationGenerator *state;
+
+       Assert((n >= k) && (k > 0));
+
+       /* allocate the generator state as a single chunk of memory */
+       state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator));
+
+       state->ncombinations = n_choose_k(n, k);
+
+       /* pre-allocate space for all combinations*/
+       state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
+
+       state->current = 0;
+       state->k = k;
+       state->n = n;
+
+       /* now actually pre-generate all the combinations of K elements */
+       generate_combinations(state);
+
+       /* make sure we got the expected number of combinations */
+       Assert(state->current == state->ncombinations);
+
+       /* reset the number, so we start with the first one */
+       state->current = 0;
+
+       return state;
+}
+
+/*
+ * generator_next
+ *             returns the next combination from the prebuilt list
+ *
+ * Returns a combination of K array indexes (0 .. N), as specified to
+ * generator_init), or NULL when there are no more combination.
+ */
+static int *
+generator_next(CombinationGenerator *state)
+{
+       if (state->current == state->ncombinations)
+               return NULL;
+
+       return &state->combinations[state->k * state->current++];
+}
+
+/*
+ * generator_free
+ *             free the internal state of the generator
+ *
+ * Releases the generator internal state (pre-built combinations).
+ */
+static void
+generator_free(CombinationGenerator *state)
+{
+       pfree(state->combinations);
+       pfree(state);
+}
+
+/*
+ * generate_combinations_recurse
+ *             given a prefix, generate all possible combinations
+ *
+ * Given a prefix (first few elements of the combination), generate following
+ * elements recursively. We generate the combinations in lexicographic order,
+ * which eliminates permutations of the same combination.
+ */
+static void
+generate_combinations_recurse(CombinationGenerator *state,
+                                                         int index, int start, int *current)
+{
+       /* If we haven't filled all the elements, simply recurse. */
+       if (index < state->k)
+       {
+               int             i;
+
+               /*
+                * The values have to be in ascending order, so make sure we start
+                * with the value passed by parameter.
+                */
+
+               for (i = start; i < state->n; i++)
+               {
+                       current[index] = i;
+                       generate_combinations_recurse(state, (index + 1), (i + 1), current);
+               }
+
+               return;
+       }
+       else
+       {
+               /* we got a valid combination, add it to the array */
+               memcpy(&state->combinations[(state->k * state->current)],
+                          current, state->k * sizeof(int));
+               state->current++;
+       }
+}
+
+/*
+ * generate_combinations
+ *             generate all k-combinations of N elements
+ */
+static void
+generate_combinations(CombinationGenerator *state)
+{
+       int        *current = (int *) palloc0(sizeof(int) * state->k);
+
+       generate_combinations_recurse(state, 0, 0, current);
+
+       pfree(current);
+}
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c

index c8d20fffeafc4ce301136815cbe11f0acca29e2e..b59821bf97835a83c76c67621af070f4f497e343 100644 (file)
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1623,6 +1623,10 @@ ProcessUtilitySlow(ParseState *pstate,
                                 commandCollected = true;
                                 break;
  
+                       case T_CreateStatsStmt:
+                               address = CreateStatistics((CreateStatsStmt *) parsetree);
+                               break;
+
                         case T_AlterCollationStmt:
                                 address = AlterCollation((AlterCollationStmt *) parsetree);
                                 break;
@@ -1992,6 +1996,8 @@ AlterObjectTypeCommandTag(ObjectType objtype)
                         break;
                 case OBJECT_SUBSCRIPTION:
                         tag = "ALTER SUBSCRIPTION";
+               case OBJECT_STATISTIC_EXT:
+                       tag = "ALTER STATISTICS";
                         break;
                 default:
                         tag = "???";
@@ -2286,6 +2292,8 @@ CreateCommandTag(Node *parsetree)
                                         break;
                                 case OBJECT_PUBLICATION:
                                         tag = "DROP PUBLICATION";
+                               case OBJECT_STATISTIC_EXT:
+                                       tag = "DROP STATISTICS";
                                         break;
                                 default:
                                         tag = "???";
@@ -2689,6 +2697,10 @@ CreateCommandTag(Node *parsetree)
                         tag = "EXECUTE";
                         break;
  
+               case T_CreateStatsStmt:
+                       tag = "CREATE STATISTICS";
+                       break;
+
                 case T_DeallocateStmt:
                         {
                                 DeallocateStmt *stmt = (DeallocateStmt *) parsetree;
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c

index 5c823250bc21d4b054c205b76c7cf265ae8240ad..81c91039e40925a155c5b4794e5b61516a2523b4 100644 (file)
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -35,6 +35,7 @@
  #include "catalog/pg_operator.h"
  #include "catalog/pg_partitioned_table.h"
  #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
  #include "catalog/pg_trigger.h"
  #include "catalog/pg_type.h"
  #include "commands/defrem.h"
@@ -317,6 +318,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
                                            const Oid *excludeOps,
                                            bool attrsOnly, bool showTblSpc,
                                            int prettyFlags, bool missing_ok);
+static char *pg_get_statisticsext_worker(Oid statextid, bool missing_ok);
  static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags,
                                                  bool attrsOnly);
  static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
@@ -1421,6 +1423,85 @@ pg_get_indexdef_worker(Oid indexrelid, int colno,
         return buf.data;
  }
  
+/*
+ * pg_get_statisticsextdef
+ *             Get the definition of an extended statistics object
+ */
+Datum
+pg_get_statisticsextdef(PG_FUNCTION_ARGS)
+{
+       Oid                     statextid = PG_GETARG_OID(0);
+       char       *res;
+
+       res = pg_get_statisticsext_worker(statextid, true);
+
+       if (res == NULL)
+               PG_RETURN_NULL();
+
+       PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/*
+ * Internal workhorse to decompile an extended statistics object.
+ */
+static char *
+pg_get_statisticsext_worker(Oid statextid, bool missing_ok)
+{
+       Form_pg_statistic_ext   statextrec;
+       Form_pg_class                   pgclassrec;
+       HeapTuple       statexttup;
+       HeapTuple       pgclasstup;
+       StringInfoData buf;
+       int                     colno;
+
+       statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
+
+       if (!HeapTupleIsValid(statexttup))
+       {
+               if (missing_ok)
+                       return NULL;
+               elog(ERROR, "cache lookup failed for extended statistics %u", statextid);
+       }
+
+       statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup);
+
+       pgclasstup = SearchSysCache1(RELOID, ObjectIdGetDatum(statextrec->starelid));
+
+       if (!HeapTupleIsValid(statexttup))
+       {
+               ReleaseSysCache(statexttup);
+               elog(ERROR, "cache lookup failed for relation %u", statextrec->starelid);
+       }
+
+       pgclassrec = (Form_pg_class) GETSTRUCT(pgclasstup);
+
+       initStringInfo(&buf);
+
+       appendStringInfo(&buf, "CREATE STATISTICS %s ON (",
+                                                       quote_identifier(NameStr(statextrec->staname)));
+
+       for (colno = 0; colno < statextrec->stakeys.dim1; colno++)
+       {
+               AttrNumber      attnum = statextrec->stakeys.values[colno];
+               char       *attname;
+
+               if (colno > 0)
+                       appendStringInfoString(&buf, ", ");
+
+               attname = get_relid_attribute_name(statextrec->starelid, attnum);
+
+               appendStringInfoString(&buf, quote_identifier(attname));
+       }
+
+       appendStringInfo(&buf, ") FROM %s",
+                                                       quote_identifier(NameStr(pgclassrec->relname)));
+
+       ReleaseSysCache(statexttup);
+       ReleaseSysCache(pgclasstup);
+
+       return buf.data;
+}
+
  /*
   * pg_get_partkeydef
   *
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index f8b28fe0e612da3376cd95f6848f84de25fc5d39..cc24c8aeb56ef087bacb271f6fc0cc7257ad2d76 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -110,6 +110,7 @@
  #include "catalog/pg_operator.h"
  #include "catalog/pg_opfamily.h"
  #include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
  #include "catalog/pg_type.h"
  #include "executor/executor.h"
  #include "mb/pg_wchar.h"
@@ -126,6 +127,7 @@
  #include "parser/parse_clause.h"
  #include "parser/parse_coerce.h"
  #include "parser/parsetree.h"
+#include "statistics/statistics.h"
  #include "utils/builtins.h"
  #include "utils/bytea.h"
  #include "utils/date.h"
@@ -164,6 +166,8 @@ static double eqjoinsel_inner(Oid operator,
  static double eqjoinsel_semi(Oid operator,
                            VariableStatData *vardata1, VariableStatData *vardata2,
                            RelOptInfo *inner_rel);
+static bool estimate_multivariate_ndistinct(PlannerInfo *root,
+                          RelOptInfo *rel, List **varinfos, double *ndistinct);
  static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                                   Datum lobound, Datum hibound, Oid boundstypid,
                                   double *scaledlobound, double *scaledhibound);
@@ -3398,25 +3402,25 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
         {
                 GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
                 RelOptInfo *rel = varinfo1->rel;
-               double          reldistinct = varinfo1->ndistinct;
+               double          reldistinct = 1;
                 double          relmaxndistinct = reldistinct;
                 int                     relvarcount = 1;
                 List       *newvarinfos = NIL;
+               List       *relvarinfos = NIL;
  
                 /*
-                * Get the product of numdistinct estimates of the Vars for this rel.
-                * Also, construct new varinfos list of remaining Vars.
+                * Split the list of varinfos in two - one for the current rel,
+                * one for remaining Vars on other rels.
                  */
+               relvarinfos = lcons(varinfo1, relvarinfos);
                 for_each_cell(l, lnext(list_head(varinfos)))
                 {
                         GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
  
                         if (varinfo2->rel == varinfo1->rel)
                         {
-                               reldistinct *= varinfo2->ndistinct;
-                               if (relmaxndistinct < varinfo2->ndistinct)
-                                       relmaxndistinct = varinfo2->ndistinct;
-                               relvarcount++;
+                               /* varinfos on current rel */
+                               relvarinfos = lcons(varinfo2, relvarinfos);
                         }
                         else
                         {
@@ -3425,6 +3429,43 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
                         }
                 }
  
+               /*
+                * Get the numdistinct estimate for the Vars of this rel.  We
+                * iteratively search for multivariate n-distinct with maximum number
+                * of vars; assuming that each var group is independent of the others,
+                * we multiply them together.  Any remaining relvarinfos after
+                * no more multivariate matches are found are assumed independent too,
+                * so their individual ndistinct estimates are multiplied also.
+                */
+               while (relvarinfos)
+               {
+                       double          mvndistinct;
+
+                       if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
+                                                                                               &mvndistinct))
+                       {
+                               reldistinct *= mvndistinct;
+                               if (relmaxndistinct < mvndistinct)
+                                       relmaxndistinct = mvndistinct;
+                               relvarcount++;  /* inaccurate, but doesn't matter */
+                       }
+                       else
+                       {
+                               foreach (l, relvarinfos)
+                               {
+                                       GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+
+                                       reldistinct *= varinfo2->ndistinct;
+                                       if (relmaxndistinct < varinfo2->ndistinct)
+                                               relmaxndistinct = varinfo2->ndistinct;
+                                       relvarcount++;
+                               }
+
+                               /* we're done with this relation */
+                               relvarinfos = NIL;
+                       }
+               }
+
                 /*
                  * Sanity check --- don't divide by zero if empty relation.
                  */
@@ -3667,6 +3708,132 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
   *-------------------------------------------------------------------------
   */
  
+/*
+ * Find applicable ndistinct statistics for the given list of VarInfos (which
+ * must all belong to the given rel), and update *ndistinct to the estimate of
+ * the MVNDistinctItem that best matches.  If a match it found, *varinfos is
+ * updated to remove the list of matched varinfos.
+ *
+ * Varinfos that aren't for simple Vars are ignored.
+ *
+ * Return TRUE if we're able to find a match, FALSE otherwise.
+ */
+static bool
+estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
+                                                               List **varinfos, double *ndistinct)
+{
+       ListCell   *lc;
+       Bitmapset  *attnums = NULL;
+       int                     nmatches;
+       Oid                     statOid = InvalidOid;
+       MVNDistinct *stats;
+       Bitmapset  *matched = NULL;
+
+       /* bail out immediately if the table has no extended statistics */
+       if (!rel->statlist)
+               return false;
+
+       /* Determine the attnums we're looking for */
+       foreach(lc, *varinfos)
+       {
+               GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+
+               Assert(varinfo->rel == rel);
+
+               if (IsA(varinfo->var, Var))
+               {
+                       attnums = bms_add_member(attnums,
+                                                                        ((Var *) varinfo->var)->varattno);
+               }
+       }
+
+       /* look for the ndistinct statistics matching the most vars */
+       nmatches = 1; /* we require at least two matches */
+       foreach(lc, rel->statlist)
+       {
+               StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+               Bitmapset  *shared;
+
+               /* skip statistics of other kinds */
+               if (info->kind != STATS_EXT_NDISTINCT)
+                       continue;
+
+               /* compute attnums shared by the vars and the statistic */
+               shared = bms_intersect(info->keys, attnums);
+
+               /*
+                * Does this statistics matches more columns than the currently
+                * best statistic?  If so, use this one instead.
+                *
+                * XXX This should break ties using name of the statistic, or
+                * something like that, to make the outcome stable.
+                */
+               if (bms_num_members(shared) > nmatches)
+               {
+                       statOid = info->statOid;
+                       nmatches = bms_num_members(shared);
+                       matched = shared;
+               }
+       }
+
+       /* No match? */
+       if (statOid == InvalidOid)
+               return false;
+       Assert(nmatches > 1 && matched != NULL);
+
+       stats = statext_ndistinct_load(statOid);
+
+       /*
+        * If we have a match, search it for the specific item that matches (there
+        * must be one), and construct the output values.
+        */
+       if (stats)
+       {
+               int             i;
+               List   *newlist = NIL;
+               MVNDistinctItem *item = NULL;
+
+               /* Find the specific item that exactly matches the combination */
+               for (i = 0; i < stats->nitems; i++)
+               {
+                       MVNDistinctItem *tmpitem = &stats->items[i];
+
+                       if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
+                       {
+                               item = tmpitem;
+                               break;
+                       }
+               }
+
+               /* make sure we found an item */
+               if (!item)
+                       elog(ERROR, "corrupt MVNDistinct entry");
+
+               /* Form the output varinfo list, keeping only unmatched ones */
+               foreach(lc, *varinfos)
+               {
+                       GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+                       AttrNumber      attnum;
+
+                       if (!IsA(varinfo->var, Var))
+                       {
+                               newlist = lappend(newlist, varinfo);
+                               continue;
+                       }
+
+                       attnum = ((Var *) varinfo->var)->varattno;
+                       if (!bms_is_member(attnum, matched))
+                               newlist = lappend(newlist, varinfo);
+               }
+
+               *varinfos = newlist;
+               *ndistinct = item->ndistinct;
+               return true;
+       }
+
+       return false;
+}
+
  /*
   * convert_to_scalar
   *       Convert non-NULL values of the indicated types to the comparison
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c

index ce55fc5277746a54399fb955b568894e4548f1b6..a6b60c67caaf391784935443dbef6103c9958e12 100644 (file)
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -56,6 +56,7 @@
  #include "catalog/pg_publication.h"
  #include "catalog/pg_rewrite.h"
  #include "catalog/pg_shseclabel.h"
+#include "catalog/pg_statistic_ext.h"
  #include "catalog/pg_subscription.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_trigger.h"
@@ -4451,6 +4452,82 @@ RelationGetIndexList(Relation relation)
         return result;
  }
  
+/*
+ * RelationGetStatExtList
+ *             get a list of OIDs of extended statistics on this relation
+ *
+ * The statistics list is created only if someone requests it, in a way
+ * similar to RelationGetIndexList().  We scan pg_statistic_ext to find
+ * relevant statistics, and add the list to the relcache entry so that we
+ * won't have to compute it again.  Note that shared cache inval of a
+ * relcache entry will delete the old list and set rd_statvalid to 0,
+ * so that we must recompute the statistics list on next request.  This
+ * handles creation or deletion of a statistic.
+ *
+ * The returned list is guaranteed to be sorted in order by OID, although
+ * this is not currently needed.
+ *
+ * Since shared cache inval causes the relcache's copy of the list to go away,
+ * we return a copy of the list palloc'd in the caller's context.  The caller
+ * may list_free() the returned list after scanning it. This is necessary
+ * since the caller will typically be doing syscache lookups on the relevant
+ * statistics, and syscache lookup could cause SI messages to be processed!
+ */
+List *
+RelationGetStatExtList(Relation relation)
+{
+       Relation        indrel;
+       SysScanDesc indscan;
+       ScanKeyData skey;
+       HeapTuple       htup;
+       List       *result;
+       List       *oldlist;
+       MemoryContext oldcxt;
+
+       /* Quick exit if we already computed the list. */
+       if (relation->rd_statvalid != 0)
+               return list_copy(relation->rd_statlist);
+
+       /*
+        * We build the list we intend to return (in the caller's context) while
+        * doing the scan.  After successfully completing the scan, we copy that
+        * list into the relcache entry.  This avoids cache-context memory leakage
+        * if we get some sort of error partway through.
+        */
+       result = NIL;
+
+       /* Prepare to scan pg_statistic_ext for entries having starelid = this rel. */
+       ScanKeyInit(&skey,
+                               Anum_pg_statistic_ext_starelid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(RelationGetRelid(relation)));
+
+       indrel = heap_open(StatisticExtRelationId, AccessShareLock);
+       indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
+                                                                NULL, 1, &skey);
+
+       while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+               /* TODO maybe include only already built statistics? */
+               result = insert_ordered_oid(result, HeapTupleGetOid(htup));
+
+       systable_endscan(indscan);
+
+       heap_close(indrel, AccessShareLock);
+
+       /* Now save a copy of the completed list in the relcache entry. */
+       oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+       oldlist = relation->rd_statlist;
+       relation->rd_statlist = list_copy(result);
+
+       relation->rd_statvalid = true;
+       MemoryContextSwitchTo(oldcxt);
+
+       /* Don't leak the old list, if there is one */
+       list_free(oldlist);
+
+       return result;
+}
+
  /*
   * insert_ordered_oid
   *             Insert a new Oid into a sorted list of Oids, preserving ordering
@@ -5560,6 +5637,8 @@ load_relcache_init_file(bool shared)
                 rel->rd_pkattr = NULL;
                 rel->rd_idattr = NULL;
                 rel->rd_pubactions = NULL;
+               rel->rd_statvalid = false;
+               rel->rd_statlist = NIL;
                 rel->rd_createSubid = InvalidSubTransactionId;
                 rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
                 rel->rd_amcache = NULL;
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c

index d5a376406fee37810d1817dee0cb87151146d7cf..d8c823f42b529fbf9ced2964fa7ff01ddbca7be9 100644 (file)
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -61,6 +61,7 @@
  #include "catalog/pg_shseclabel.h"
  #include "catalog/pg_replication_origin.h"
  #include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
  #include "catalog/pg_subscription.h"
  #include "catalog/pg_subscription_rel.h"
  #include "catalog/pg_tablespace.h"
@@ -726,6 +727,28 @@ static const struct cachedesc cacheinfo[] = {
                 },
                 32
         },
+       {StatisticExtRelationId,        /* STATEXTNAMENSP */
+               StatisticExtNameIndexId,
+               2,
+               {
+                       Anum_pg_statistic_ext_staname,
+                       Anum_pg_statistic_ext_stanamespace,
+                       0,
+                       0
+               },
+               4
+       },
+       {StatisticExtRelationId,        /* STATEXTOID */
+               StatisticExtOidIndexId,
+               1,
+               {
+                       ObjectIdAttributeNumber,
+                       0,
+                       0,
+                       0
+               },
+               4
+       },
         {StatisticRelationId,           /* STATRELATTINH */
                 StatisticRelidAttnumInhIndexId,
                 3,
diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c

index 89530a9f0fbe6cdeb06979a7e5ab192e69f7c454..e2bc3576dc34fe1b91abc66b01bdf3beb9d5835a 100644 (file)
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -272,6 +272,10 @@ getSchemaData(Archive *fout, int *numTablesPtr)
                 write_msg(NULL, "reading indexes\n");
         getIndexes(fout, tblinfo, numTables);
  
+       if (g_verbose)
+               write_msg(NULL, "reading extended statistics\n");
+       getExtendedStatistics(fout, tblinfo, numTables);
+
         if (g_verbose)
                 write_msg(NULL, "reading constraints\n");
         getConstraints(fout, tblinfo, numTables);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c

index dd0892539a3ccf00634901b3c2a1e918e4068255..f77581d6ec9b9879465501d405f590f4866c3db9 100644 (file)
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -3540,7 +3540,8 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass)
                                  strcmp(te->desc, "TRIGGER") == 0 ||
                                  strcmp(te->desc, "ROW SECURITY") == 0 ||
                                  strcmp(te->desc, "POLICY") == 0 ||
-                                strcmp(te->desc, "USER MAPPING") == 0)
+                                strcmp(te->desc, "USER MAPPING") == 0 ||
+                                strcmp(te->desc, "STATISTICS") == 0)
                 {
                         /* these object types don't have separate owners */
                 }
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c

index b3d95d7f6ee3a3fa50a900b8131f8727f3900fd1..ba34cc163e97a1534499c5864a50289c88c97c68 100644 (file)
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -192,6 +192,7 @@ static void dumpAttrDef(Archive *fout, AttrDefInfo *adinfo);
  static void dumpSequence(Archive *fout, TableInfo *tbinfo);
  static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo);
  static void dumpIndex(Archive *fout, IndxInfo *indxinfo);
+static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo);
  static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo);
  static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo);
  static void dumpTSParser(Archive *fout, TSParserInfo *prsinfo);
@@ -6582,6 +6583,99 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
         destroyPQExpBuffer(query);
  }
  
+/*
+ * getExtendedStatistics
+ *       get information about extended statistics on a dumpable table
+ *       or materialized view.
+ *
+ * Note: extended statistics data is not returned directly to the caller, but
+ * it does get entered into the DumpableObject tables.
+ */
+void
+getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables)
+{
+       int                             i,
+                                       j;
+       PQExpBuffer             query;
+       PGresult           *res;
+       StatsExtInfo   *statsextinfo;
+       int                             ntups;
+       int                             i_tableoid;
+       int                             i_oid;
+       int                             i_staname;
+       int                             i_stadef;
+
+       /* Extended statistics were new in v10 */
+       if (fout->remoteVersion < 100000)
+               return;
+
+       query = createPQExpBuffer();
+
+       for (i = 0; i < numTables; i++)
+       {
+               TableInfo  *tbinfo = &tblinfo[i];
+
+               /* Only plain tables and materialized views can have extended statistics. */
+               if (tbinfo->relkind != RELKIND_RELATION &&
+                       tbinfo->relkind != RELKIND_MATVIEW)
+                       continue;
+
+               /*
+                * Ignore extended statistics of tables whose definitions are not to
+                * be dumped.
+                */
+               if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION))
+                       continue;
+
+               if (g_verbose)
+                       write_msg(NULL, "reading extended statistics for table \"%s.%s\"\n",
+                                         tbinfo->dobj.namespace->dobj.name,
+                                         tbinfo->dobj.name);
+
+               /* Make sure we are in proper schema so stadef is right */
+               selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name);
+
+               resetPQExpBuffer(query);
+
+               appendPQExpBuffer(query,
+                                                 "SELECT "
+                                                       "tableoid, "
+                                                       "oid, "
+                                                       "staname, "
+                                                 "pg_catalog.pg_get_statisticsextdef(oid) AS stadef "
+                                                 "FROM pg_statistic_ext "
+                                                 "WHERE starelid = '%u' "
+                                                 "ORDER BY staname", tbinfo->dobj.catId.oid);
+
+               res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+               ntups = PQntuples(res);
+
+               i_tableoid = PQfnumber(res, "tableoid");
+               i_oid = PQfnumber(res, "oid");
+               i_staname = PQfnumber(res, "staname");
+               i_stadef = PQfnumber(res, "stadef");
+
+               statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo));
+
+               for (j = 0; j < ntups; j++)
+               {
+                       statsextinfo[j].dobj.objType = DO_STATSEXT;
+                       statsextinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid));
+                       statsextinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid));
+                       AssignDumpId(&statsextinfo[j].dobj);
+                       statsextinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_staname));
+                       statsextinfo[j].dobj.namespace = tbinfo->dobj.namespace;
+                       statsextinfo[j].statsexttable = tbinfo;
+                       statsextinfo[j].statsextdef = pg_strdup(PQgetvalue(res, j, i_stadef));
+               }
+
+               PQclear(res);
+       }
+
+       destroyPQExpBuffer(query);
+}
+
  /*
   * getConstraints
   *
@@ -9234,6 +9328,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
                 case DO_INDEX:
                         dumpIndex(fout, (IndxInfo *) dobj);
                         break;
+               case DO_STATSEXT:
+                       dumpStatisticsExt(fout, (StatsExtInfo *) dobj);
+                       break;
                 case DO_REFRESH_MATVIEW:
                         refreshMatViewData(fout, (TableDataInfo *) dobj);
                         break;
@@ -15728,6 +15825,61 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo)
         destroyPQExpBuffer(labelq);
  }
  
+/*
+ * dumpStatisticsExt
+ *       write out to fout an extended statistics object
+ */
+static void
+dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo)
+{
+       DumpOptions *dopt = fout->dopt;
+       TableInfo  *tbinfo = statsextinfo->statsexttable;
+       PQExpBuffer q;
+       PQExpBuffer delq;
+       PQExpBuffer labelq;
+
+       if (dopt->dataOnly)
+               return;
+
+       q = createPQExpBuffer();
+       delq = createPQExpBuffer();
+       labelq = createPQExpBuffer();
+
+       appendPQExpBuffer(labelq, "STATISTICS %s",
+                                         fmtId(statsextinfo->dobj.name));
+
+       appendPQExpBuffer(q, "%s;\n", statsextinfo->statsextdef);
+
+       appendPQExpBuffer(delq, "DROP STATISTICS %s.",
+                                                 fmtId(tbinfo->dobj.namespace->dobj.name));
+       appendPQExpBuffer(delq, "%s;\n",
+                                                 fmtId(statsextinfo->dobj.name));
+
+       if (statsextinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)
+                       ArchiveEntry(fout, statsextinfo->dobj.catId,
+                                                statsextinfo->dobj.dumpId,
+                                                statsextinfo->dobj.name,
+                                                tbinfo->dobj.namespace->dobj.name,
+                                                NULL,
+                                                tbinfo->rolname, false,
+                                                "STATISTICS", SECTION_POST_DATA,
+                                                q->data, delq->data, NULL,
+                                                NULL, 0,
+                                                NULL, NULL);
+
+       /* Dump Statistics Comments */
+       if (statsextinfo->dobj.dump & DUMP_COMPONENT_COMMENT)
+               dumpComment(fout, labelq->data,
+                                       tbinfo->dobj.namespace->dobj.name,
+                                       tbinfo->rolname,
+                                       statsextinfo->dobj.catId, 0,
+                                       statsextinfo->dobj.dumpId);
+
+       destroyPQExpBuffer(q);
+       destroyPQExpBuffer(delq);
+       destroyPQExpBuffer(labelq);
+}
+
  /*
   * dumpConstraint
   *       write out to fout a user-defined constraint
@@ -17266,6 +17418,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
                                 addObjectDependency(postDataBound, dobj->dumpId);
                                 break;
                         case DO_INDEX:
+                       case DO_STATSEXT:
                         case DO_REFRESH_MATVIEW:
                         case DO_TRIGGER:
                         case DO_EVENT_TRIGGER:
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h

index a466527ec685dba205761bc5b13e5fa3f3c3f1e2..cb22f63bd6ac7309e9542d58083d4158fb66d459 100644 (file)
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -56,6 +56,7 @@ typedef enum
         DO_TABLE,
         DO_ATTRDEF,
         DO_INDEX,
+       DO_STATSEXT,
         DO_RULE,
         DO_TRIGGER,
         DO_CONSTRAINT,
@@ -362,6 +363,13 @@ typedef struct _indxInfo
         int                     relpages;               /* relpages of the underlying table */
  } IndxInfo;
  
+typedef struct _statsExtInfo
+{
+       DumpableObject dobj;
+       TableInfo  *statsexttable;      /* link to table the stats ext is for */
+       char       *statsextdef;
+} StatsExtInfo;
+
  typedef struct _ruleInfo
  {
         DumpableObject dobj;
@@ -682,6 +690,7 @@ extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables);
  extern InhInfo *getInherits(Archive *fout, int *numInherits);
  extern PartInfo *getPartitions(Archive *fout, int *numPartitions);
  extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables);
+extern void getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables);
  extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables);
  extern RuleInfo *getRules(Archive *fout, int *numRules);
  extern void getTriggers(Archive *fout, TableInfo tblinfo[], int numTables);
diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c

index e555de885721c88af38b4f2dc045ae14f0f0cf5c..5c19b05ca486730f83018ed405507b19b10a9363 100644 (file)
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@@ -53,10 +53,11 @@ static const int dbObjectTypePriority[] =
         18,                                                     /* DO_TABLE */
         20,                                                     /* DO_ATTRDEF */
         28,                                                     /* DO_INDEX */
-       29,                                                     /* DO_RULE */
-       30,                                                     /* DO_TRIGGER */
+       29,                                                     /* DO_STATSEXT */
+       30,                                                     /* DO_RULE */
+       31,                                                     /* DO_TRIGGER */
         27,                                                     /* DO_CONSTRAINT */
-       31,                                                     /* DO_FK_CONSTRAINT */
+       32,                                                     /* DO_FK_CONSTRAINT */
         2,                                                      /* DO_PROCLANG */
         10,                                                     /* DO_CAST */
         23,                                                     /* DO_TABLE_DATA */
@@ -1291,6 +1292,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize)
                                          "INDEX %s  (ID %d OID %u)",
                                          obj->name, obj->dumpId, obj->catId.oid);
                         return;
+               case DO_STATSEXT:
+                       snprintf(buf, bufsize,
+                                        "STATISTICS %s  (ID %d OID %u)",
+                                        obj->name, obj->dumpId, obj->catId.oid);
+                       return;
                 case DO_REFRESH_MATVIEW:
                         snprintf(buf, bufsize,
                                          "REFRESH MATERIALIZED VIEW %s  (ID %d OID %u)",
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c

index 8c583127fdd351aa0acf166a46cfac47b91560f1..3cf1742020f7a62a6caafb82aa3a315ed3a6c150 100644 (file)
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -2320,6 +2320,57 @@ describeOneTableDetails(const char *schemaname,
                         PQclear(result);
                 }
  
+               /* print any extended statistics */
+               if (pset.sversion >= 100000)
+               {
+                       printfPQExpBuffer(&buf,
+                                                         "SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n"
+                                                         "  (SELECT pg_catalog.string_agg(pg_catalog.quote_ident(attname::text),', ') \n"
+                                                  "    FROM ((SELECT pg_catalog.unnest(stakeys) AS attnum) s\n"
+                          "         JOIN pg_catalog.pg_attribute a ON (starelid = a.attrelid AND\n"
+                                                         "a.attnum = s.attnum AND not attisdropped))) AS columns,\n"
+                                                         "  (staenabled::char[] @> '{d}'::char[]) AS ndist_enabled\n"
+                         "FROM pg_catalog.pg_statistic_ext stat WHERE starelid  = '%s'\n"
+                         "ORDER BY 1;",
+                                                         oid);
+
+                       result = PSQLexec(buf.data);
+                       if (!result)
+                               goto error_return;
+                       else
+                               tuples = PQntuples(result);
+
+                       if (tuples > 0)
+                       {
+                               printTableAddFooter(&cont, _("Statistics:"));
+
+                               for (i = 0; i < tuples; i++)
+                               {
+                                       int             cnt = 0;
+
+                                       printfPQExpBuffer(&buf, "    ");
+
+                                       /* statistics name (qualified with namespace) */
+                                       appendPQExpBuffer(&buf, "\"%s.%s\" WITH (",
+                                                                         PQgetvalue(result, i, 1),
+                                                                         PQgetvalue(result, i, 2));
+
+                                       /* options */
+                                       if (strcmp(PQgetvalue(result, i, 5), "t") == 0)
+                                       {
+                                               appendPQExpBufferStr(&buf, "ndistinct");
+                                               cnt++;
+                                       }
+
+                                       appendPQExpBuffer(&buf, ") ON (%s)",
+                                                                         PQgetvalue(result, i, 4));
+
+                                       printTableAddFooter(&cont, buf.data);
+                               }
+                       }
+                       PQclear(result);
+               }
+
                 /* print rules */
                 if (tableinfo.hasrules && tableinfo.relkind != RELKIND_MATVIEW)
                 {
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index c9c9a18777466d8815b2a89435bf2baf555dbe7f..b8fa18ae2ea024842202fb5bf278d1daf40350ab 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201703241
+#define CATALOG_VERSION_NO     201703242
  
  #endif
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h

index 10759c7c58d0b02be095653e68ffa17b72e57361..9effbce2f115242342ced72955b0ba06b96ca7c4 100644 (file)
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -147,6 +147,7 @@ typedef enum ObjectClass
         OCLASS_REWRITE,                         /* pg_rewrite */
         OCLASS_TRIGGER,                         /* pg_trigger */
         OCLASS_SCHEMA,                          /* pg_namespace */
+       OCLASS_STATISTIC_EXT,           /* pg_statistic_ext */
         OCLASS_TSPARSER,                        /* pg_ts_parser */
         OCLASS_TSDICT,                          /* pg_ts_dict */
         OCLASS_TSTEMPLATE,                      /* pg_ts_template */
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h

index 1187797fd9ef3a90e951432ee1475053b9496a46..473fe177ba4778998bc76a2a61869c7e316c74b7 100644 (file)
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -119,6 +119,7 @@ extern void RemoveAttrDefault(Oid relid, AttrNumber attnum,
                                   DropBehavior behavior, bool complain, bool internal);
  extern void RemoveAttrDefaultById(Oid attrdefId);
  extern void RemoveStatistics(Oid relid, AttrNumber attnum);
+extern void RemoveStatisticsExt(Oid relid, AttrNumber attnum);
  
  extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno,
                                                   bool relhasoids);
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h

index 5d4190c05eba824dde8ee3a06b42612d4374951e..a7266860ceb0923cccf9e36918af6e8c36072cae 100644 (file)
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -182,6 +182,13 @@ DECLARE_UNIQUE_INDEX(pg_largeobject_loid_pn_index, 2683, on pg_largeobject using
  DECLARE_UNIQUE_INDEX(pg_largeobject_metadata_oid_index, 2996, on pg_largeobject_metadata using btree(oid oid_ops));
  #define LargeObjectMetadataOidIndexId  2996
  
+DECLARE_UNIQUE_INDEX(pg_statistic_ext_oid_index, 3380, on pg_statistic_ext using btree(oid oid_ops));
+#define StatisticExtOidIndexId 3380
+DECLARE_UNIQUE_INDEX(pg_statistic_ext_name_index, 3997, on pg_statistic_ext using btree(staname name_ops, stanamespace oid_ops));
+#define StatisticExtNameIndexId 3997
+DECLARE_INDEX(pg_statistic_ext_relid_index, 3379, on pg_statistic_ext using btree(starelid oid_ops));
+#define StatisticExtRelidIndexId 3379
+
  DECLARE_UNIQUE_INDEX(pg_namespace_nspname_index, 2684, on pg_namespace using btree(nspname name_ops));
  #define NamespaceNameIndexId  2684
  DECLARE_UNIQUE_INDEX(pg_namespace_oid_index, 2685, on pg_namespace using btree(oid oid_ops));
diff --git a/src/include/catalog/namespace.h b/src/include/catalog/namespace.h

index dbeb25b1ac17b65257a6cd2bcce09d2f0678afc2..35e0e2b089b7811c69daa60346a489d3e4dcc962 100644 (file)
--- a/src/include/catalog/namespace.h
+++ b/src/include/catalog/namespace.h
@@ -141,6 +141,8 @@ extern Oid  get_collation_oid(List *collname, bool missing_ok);
  extern Oid     get_conversion_oid(List *conname, bool missing_ok);
  extern Oid     FindDefaultConversionProc(int32 for_encoding, int32 to_encoding);
  
+extern Oid     get_statistics_oid(List *names, bool missing_ok);
+
  /* initialization & transaction cleanup code */
  extern void InitializeSearchPath(void);
  extern void AtEOXact_Namespace(bool isCommit, bool parallel);
diff --git a/src/include/catalog/pg_cast.h b/src/include/catalog/pg_cast.h

index ce8dc59e5af709bc01c5c16fe0657fd66b719dc0..bc5d28a4fac6f27540ec4a47f817654013e15a3d 100644 (file)
--- a/src/include/catalog/pg_cast.h
+++ b/src/include/catalog/pg_cast.h
@@ -254,6 +254,10 @@ DATA(insert (      23       18   78 e f ));
  /* pg_node_tree can be coerced to, but not from, text */
  DATA(insert (  194      25    0 i b ));
  
+/* pg_ndistinct can be coerced to, but not from, bytea and text */
+DATA(insert (  3361  17    0 i b ));
+DATA(insert (  3361  25    0 i i ));
+
  /*
   * Datetime category
   */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index a66d04510034a8466013fd3026dd9f1bbd0cf806..ee67459c32efb2bff87600e171a33f7666724702 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -1983,6 +1983,8 @@ DESCR("select statement of a view");
  DATA(insert OID = 1642 (  pg_get_userbyid         PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 19 "26" _null_ _null_ _null_ _null_ _null_ pg_get_userbyid _null_ _null_ _null_ ));
  DESCR("role name by OID (with fallback)");
  DATA(insert OID = 1643 (  pg_get_indexdef         PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_indexdef _null_ _null_ _null_ ));
+DESCR("extended statistics description");
+DATA(insert OID = 3415 (  pg_get_statisticsextdef         PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_statisticsextdef _null_ _null_ _null_ ));
  DESCR("index description");
  DATA(insert OID = 3352 (  pg_get_partkeydef    PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_partkeydef _null_ _null_ _null_ ));
  DESCR("partition key description");
@@ -2758,6 +2760,15 @@ DESCR("current user privilege on any column by rel name");
  DATA(insert OID = 3029 (  has_any_column_privilege        PGNSP PGUID 12 10 0 0 0 f f f f t f s s 2 0 16 "26 25" _null_ _null_ _null_ _null_ _null_ has_any_column_privilege_id _null_ _null_ _null_ ));
  DESCR("current user privilege on any column by rel oid");
  
+DATA(insert OID = 3355 (  pg_ndistinct_in      PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3361 "2275" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3356 (  pg_ndistinct_out     PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_out _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3357 (  pg_ndistinct_recv PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 3361 "2281" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_recv _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3358 (  pg_ndistinct_send PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_send _null_ _null_ _null_ ));
+DESCR("I/O");
+
  DATA(insert OID = 1928 (  pg_stat_get_numscans                 PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
  DESCR("statistics: number of scans done for table/index");
  DATA(insert OID = 1929 (  pg_stat_get_tuples_returned  PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ ));
diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h

new file mode 100644 (file)

index 0000000..5f67fe7
--- /dev/null
+++ b/src/include/catalog/pg_statistic_ext.h
@@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_statistic_ext.h
+ *       definition of the system "extended statistic" relation (pg_statistic_ext)
+ *       along with the relation's initial contents.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/pg_statistic_ext.h
+ *
+ * NOTES
+ *       the genbki.pl script reads this file and generates .bki
+ *       information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_STATISTIC_EXT_H
+#define PG_STATISTIC_EXT_H
+
+#include "catalog/genbki.h"
+
+/* ----------------
+ *             pg_statistic_ext definition.  cpp turns this into
+ *             typedef struct FormData_pg_statistic_ext
+ * ----------------
+ */
+#define StatisticExtRelationId 3381
+
+CATALOG(pg_statistic_ext,3381)
+{
+       /* These fields form the unique key for the entry: */
+       Oid                     starelid;               /* relation containing attributes */
+       NameData        staname;                /* statistics name */
+       Oid                     stanamespace;   /* OID of namespace containing this statistics */
+       Oid                     staowner;               /* statistics owner */
+
+       /*
+        * variable-length fields start here, but we allow direct access to
+        * stakeys
+        */
+       int2vector      stakeys;                /* array of column keys */
+
+#ifdef CATALOG_VARLEN
+       char            staenabled[1] BKI_FORCE_NOT_NULL;       /* statistic types
+                                                                                                        * requested to build */
+       pg_ndistinct standistinct;      /* ndistinct coefficients (serialized) */
+#endif
+
+} FormData_pg_statistic_ext;
+
+/* ----------------
+ *             Form_pg_statistic_ext corresponds to a pointer to a tuple with
+ *             the format of pg_statistic_ext relation.
+ * ----------------
+ */
+typedef FormData_pg_statistic_ext *Form_pg_statistic_ext;
+
+/* ----------------
+ *             compiler constants for pg_statistic_ext
+ * ----------------
+ */
+#define Natts_pg_statistic_ext                                 7
+#define Anum_pg_statistic_ext_starelid                 1
+#define Anum_pg_statistic_ext_staname                  2
+#define Anum_pg_statistic_ext_stanamespace             3
+#define Anum_pg_statistic_ext_staowner                 4
+#define Anum_pg_statistic_ext_stakeys                  5
+#define Anum_pg_statistic_ext_staenabled               6
+#define Anum_pg_statistic_ext_standistinct             7
+
+#define STATS_EXT_NDISTINCT            'd'
+
+#endif   /* PG_STATISTIC_EXT_H */
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h

index 9f61238179bf766aae54390646b38e1c09bc77e5..9ad67258fef4d08d6f00219f919a637cef82ee5b 100644 (file)
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -364,6 +364,10 @@ DATA(insert OID = 194 ( pg_node_tree       PGNSP PGUID -1 f b S f t \054 0 0 0 pg_node
  DESCR("string representing an internal node tree");
  #define PGNODETREEOID  194
  
+DATA(insert OID = 3361 ( pg_ndistinct          PGNSP PGUID -1 f b S f t \054 0 0 0 pg_ndistinct_in pg_ndistinct_out pg_ndistinct_recv pg_ndistinct_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ ));
+DESCR("multivariate ndistinct coefficients");
+#define PGNDISTINCTOID 3361
+
  DATA(insert OID = 32 ( pg_ddl_command  PGNSP PGUID SIZEOF_POINTER t p P f t \054 0 0 0 pg_ddl_command_in pg_ddl_command_out pg_ddl_command_recv pg_ddl_command_send - - - ALIGNOF_POINTER p f 0 -1 0 0 _null_ _null_ _null_ ));
  DESCR("internal type for passing CollectedCommand");
  #define PGDDLCOMMANDOID 32
diff --git a/src/include/catalog/toasting.h b/src/include/catalog/toasting.h

index db7f145b5f3fe74fc0cd6eec4f6214cdcf5c514f..00d0a8326f3dbbf9f7be44c3c9a64449ad5b6218 100644 (file)
--- a/src/include/catalog/toasting.h
+++ b/src/include/catalog/toasting.h
@@ -53,6 +53,7 @@ DECLARE_TOAST(pg_proc, 2836, 2837);
  DECLARE_TOAST(pg_rewrite, 2838, 2839);
  DECLARE_TOAST(pg_seclabel, 3598, 3599);
  DECLARE_TOAST(pg_statistic, 2840, 2841);
+DECLARE_TOAST(pg_statistic_ext, 3439, 3440);
  DECLARE_TOAST(pg_trigger, 2336, 2337);
  
  /* shared catalogs */
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h

index 8740cee94407f362da270e0d7c2c61defd7d5159..c323e81e6c5011e6d2b0bf0ffaf3f28904b51991 100644 (file)
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -77,6 +77,10 @@ extern ObjectAddress DefineOperator(List *names, List *parameters);
  extern void RemoveOperatorById(Oid operOid);
  extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt);
  
+/* commands/statscmds.c */
+extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt);
+extern void RemoveStatisticsById(Oid statsOid);
+
  /* commands/aggregatecmds.c */
  extern ObjectAddress DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle,
                                 List *parameters);
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h

index b2d8514f895fe96a53b5b3a889d2c02d78bda71e..fc883a6f3ec0744da50bebfb2eade049aa53a446 100644 (file)
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -279,6 +279,7 @@ typedef enum NodeTag
         T_PlaceHolderInfo,
         T_MinMaxAggInfo,
         T_PlannerParamItem,
+       T_StatisticExtInfo,
  
         /*
          * TAGS FOR MEMORY NODES (memnodes.h)
@@ -424,6 +425,7 @@ typedef enum NodeTag
         T_CreateSubscriptionStmt,
         T_AlterSubscriptionStmt,
         T_DropSubscriptionStmt,
+       T_CreateStatsStmt,
         T_AlterCollationStmt,
  
         /*
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index f3773ca9294ec58f3b8dd4b2bf470f4c55e564c8..3a71dd5b37d1674585e6fd63faee5b0b7e066d5e 100644 (file)
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1593,6 +1593,7 @@ typedef enum ObjectType
         OBJECT_SCHEMA,
         OBJECT_SEQUENCE,
         OBJECT_SUBSCRIPTION,
+       OBJECT_STATISTIC_EXT,
         OBJECT_TABCONSTRAINT,
         OBJECT_TABLE,
         OBJECT_TABLESPACE,
@@ -2656,6 +2657,20 @@ typedef struct IndexStmt
         bool            if_not_exists;  /* just do nothing if index already exists? */
  } IndexStmt;
  
+/* ----------------------
+ *             Create Statistics Statement
+ * ----------------------
+ */
+typedef struct CreateStatsStmt
+{
+       NodeTag         type;
+       List       *defnames;           /* qualified name (list of Value strings) */
+       RangeVar   *relation;           /* relation to build statistics on */
+       List       *keys;                       /* String nodes naming referenced columns */
+       List       *options;            /* list of DefElem */
+       bool            if_not_exists;  /* do nothing if statistics already exists */
+} CreateStatsStmt;
+
  /* ----------------------
   *             Create Function Statement
   * ----------------------
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 1c88a79a2172e1d2c5684701c65be4549e9c92b7..0a5187cef3ba823ec3b67fb58cc2040ea2ca846a 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -529,6 +529,7 @@ typedef struct RelOptInfo
         List       *lateral_vars;       /* LATERAL Vars and PHVs referenced by rel */
         Relids          lateral_referencers;    /* rels that reference me laterally */
         List       *indexlist;          /* list of IndexOptInfo */
+       List       *statlist;           /* list of StatisticExtInfo */
         BlockNumber pages;                      /* size estimates derived from pg_class */
         double          tuples;
         double          allvisfrac;
@@ -668,6 +669,24 @@ typedef struct ForeignKeyOptInfo
         List       *rinfos[INDEX_MAX_KEYS];
  } ForeignKeyOptInfo;
  
+/*
+ * StatisticExtInfo
+ *             Information about extended statistics for planning/optimization
+ *
+ * This contains information about which columns are covered by the
+ * statistics (stakeys), which options were requested while adding the
+ * statistics (*_enabled), and which kinds of statistics were actually
+ * built and are available for the optimizer (*_built).
+ */
+typedef struct StatisticExtInfo
+{
+       NodeTag         type;
+
+       Oid                     statOid;                /* OID of the statistics row */
+       RelOptInfo *rel;                        /* back-link to index's table */
+       char            kind;                   /* statistic kind of this entry */
+       Bitmapset  *keys;                       /* attnums of the columns covered */
+} StatisticExtInfo;
  
  /*
   * EquivalenceClasses
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h

new file mode 100644 (file)

index 0000000..961f1f7
--- /dev/null
+++ b/src/include/statistics/extended_stats_internal.h
@@ -0,0 +1,64 @@
+/*-------------------------------------------------------------------------
+ *
+ * extended_stats_internal.h
+ *       POSTGRES extended statistics internal declarations
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       src/include/statistics/extended_stats_internal.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef EXTENDED_STATS_INTERNAL_H
+#define EXTENDED_STATS_INTERNAL_H
+
+#include "utils/sortsupport.h"
+#include "statistics/statistics.h"
+
+
+typedef struct
+{
+       Oid                     eqopr;                  /* '=' operator for datatype, if any */
+       Oid                     eqfunc;                 /* and associated function */
+       Oid                     ltopr;                  /* '<' operator for datatype, if any */
+} StdAnalyzeData;
+
+typedef struct
+{
+       Datum           value;                  /* a data value */
+       int                     tupno;                  /* position index for tuple it came from */
+} ScalarItem;
+
+/* multi-sort */
+typedef struct MultiSortSupportData
+{
+       int                     ndims;                  /* number of dimensions supported by the */
+       SortSupportData ssup[1];        /* sort support data for each dimension */
+} MultiSortSupportData;
+
+typedef MultiSortSupportData *MultiSortSupport;
+
+typedef struct SortItem
+{
+       Datum      *values;
+       bool       *isnull;
+} SortItem;
+
+extern MVNDistinct *statext_ndistinct_build(double totalrows,
+                                               int numrows, HeapTuple *rows,
+                                               Bitmapset *attrs, VacAttrStats **stats);
+extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
+extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
+
+extern MultiSortSupport multi_sort_init(int ndims);
+extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
+                                                Oid oper);
+extern int     multi_sort_compare(const void *a, const void *b, void *arg);
+extern int multi_sort_compare_dim(int dim, const SortItem * a,
+                                          const SortItem * b, MultiSortSupport mss);
+extern int multi_sort_compare_dims(int start, int end, const SortItem * a,
+                                               const SortItem * b, MultiSortSupport mss);
+
+#endif   /* EXTENDED_STATS_INTERNAL_H */
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h

new file mode 100644 (file)

index 0000000..a15e39e
--- /dev/null
+++ b/src/include/statistics/statistics.h
@@ -0,0 +1,47 @@
+/*-------------------------------------------------------------------------
+ *
+ * statistics.h
+ *       Extended statistics and selectivity estimation functions.
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/statistics/statistics.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STATISTICS_H
+#define STATISTICS_H
+
+#include "commands/vacuum.h"
+
+#define STATS_MAX_DIMENSIONS   8               /* max number of attributes */
+
+/* Multivariate distinct coefficients */
+#define STATS_NDISTINCT_MAGIC          0xA352BFA4      /* struct identifier */
+#define STATS_NDISTINCT_TYPE_BASIC     1       /* struct version */
+
+/* MVDistinctItem represents a single combination of columns */
+typedef struct MVNDistinctItem
+{
+       double          ndistinct;              /* ndistinct value for this combination */
+       Bitmapset  *attrs;                      /* attr numbers of items */
+} MVNDistinctItem;
+
+/* A MVNDistinct object, comprising all possible combinations of columns */
+typedef struct MVNDistinct
+{
+       uint32          magic;                  /* magic constant marker */
+       uint32          type;                   /* type of ndistinct (BASIC) */
+       uint32          nitems;                 /* number of items in the statistic */
+       MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER];
+} MVNDistinct;
+
+extern MVNDistinct *statext_ndistinct_load(Oid mvoid);
+
+extern void BuildRelationExtStatistics(Relation onerel, double totalrows,
+                                                  int numrows, HeapTuple *rows,
+                                                  int natts, VacAttrStats **vacattrstats);
+extern bool statext_is_kind_built(HeapTuple htup, char kind);
+
+#endif   /* STATISTICS_H */
diff --git a/src/include/utils/acl.h b/src/include/utils/acl.h

index 0d118525c9cb3dd1df43df82ff4c3ed24c10c4c1..c957d8e17011fa925faf68ed1e70b0bb2d24267d 100644 (file)
--- a/src/include/utils/acl.h
+++ b/src/include/utils/acl.h
@@ -192,6 +192,7 @@ typedef enum AclObjectKind
         ACL_KIND_OPFAMILY,                      /* pg_opfamily */
         ACL_KIND_COLLATION,                     /* pg_collation */
         ACL_KIND_CONVERSION,            /* pg_conversion */
+       ACL_KIND_STATISTICS,            /* pg_statistic_ext */
         ACL_KIND_TABLESPACE,            /* pg_tablespace */
         ACL_KIND_TSDICTIONARY,          /* pg_ts_dict */
         ACL_KIND_TSCONFIGURATION,       /* pg_ts_config */
@@ -326,6 +327,7 @@ extern bool pg_event_trigger_ownercheck(Oid et_oid, Oid roleid);
  extern bool pg_extension_ownercheck(Oid ext_oid, Oid roleid);
  extern bool pg_publication_ownercheck(Oid pub_oid, Oid roleid);
  extern bool pg_subscription_ownercheck(Oid sub_oid, Oid roleid);
+extern bool pg_statistics_ownercheck(Oid stat_oid, Oid roleid);
  extern bool has_createrole_privilege(Oid roleid);
  extern bool has_bypassrls_privilege(Oid roleid);
  
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h

index a617a7cf5661438713d4d4ab548c4042c883f2f5..ab875bb9d776f3ceac1e138648658ee590544cfb 100644 (file)
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -92,6 +92,7 @@ typedef struct RelationData
         bool            rd_isvalid;             /* relcache entry is valid */
         char            rd_indexvalid;  /* state of rd_indexlist: 0 = not valid, 1 =
                                                                  * valid, 2 = temporarily forced */
+       bool            rd_statvalid;   /* is rd_statlist valid? */
  
         /*
          * rd_createSubid is the ID of the highest subtransaction the rel has
@@ -136,6 +137,9 @@ typedef struct RelationData
         Oid                     rd_pkindex;             /* OID of primary key, if any */
         Oid                     rd_replidindex; /* OID of replica identity index, if any */
  
+       /* data managed by RelationGetStatExtList: */
+       List       *rd_statlist;        /* list of OIDs of extended stats */
+
         /* data managed by RelationGetIndexAttrBitmap: */
         Bitmapset  *rd_indexattr;       /* identifies columns used in indexes */
         Bitmapset  *rd_keyattr;         /* cols that can be ref'd by foreign keys */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h

index da36b6774fc002d5dd5cb7c962be38a9eae530e3..81af3aebb8de34e3a8e7a08d8268502a498524ae 100644 (file)
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -39,6 +39,7 @@ extern void RelationClose(Relation relation);
   */
  extern List *RelationGetFKeyList(Relation relation);
  extern List *RelationGetIndexList(Relation relation);
+extern List *RelationGetStatExtList(Relation relation);
  extern Oid     RelationGetOidIndex(Relation relation);
  extern Oid     RelationGetPrimaryKeyIndex(Relation relation);
  extern Oid     RelationGetReplicaIndex(Relation relation);
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h

index b35faf81b9e42ae9dfed00f30ad7b100d56e0c48..36805ebefbad5d5cda9285a0e89630fecce966f0 100644 (file)
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -86,6 +86,8 @@ enum SysCacheIdentifier
         PUBLICATIONRELMAP,
         RULERELNAME,
         SEQRELID,
+       STATEXTNAMENSP,
+       STATEXTOID,
         STATRELATTINH,
         SUBSCRIPTIONOID,
         SUBSCRIPTIONNAME,
diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out

index b01be59bbbd5bb431d816d24922f44bf85e882d6..ce581bb93d5e3f6ec17d50b5979fea08b15a72bf 100644 (file)
--- a/src/test/regress/expected/alter_generic.out
+++ b/src/test/regress/expected/alter_generic.out
@@ -496,6 +496,48 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD
  ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4);
  ERROR:  function 2(integer,integer) does not exist in operator family "alt_opf18"
  DROP OPERATOR FAMILY alt_opf18 USING btree;
+--
+-- Statistics
+--
+SET SESSION AUTHORIZATION regress_alter_user1;
+CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER);
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat2;   -- failed (name conflict)
+ERROR:  statistics "alt_stat2" already exists in schema "alt_nsp1"
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat3;   -- failed (name conflict)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2;  -- failed (no role membership)
+ERROR:  must be member of role "regress_alter_user2"
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3;  -- OK
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;    -- OK
+SET SESSION AUTHORIZATION regress_alter_user2;
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+ALTER STATISTICS alt_stat3 RENAME TO alt_stat4;    -- failed (not owner)
+ERROR:  must be owner of statistics alt_stat3
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat4;    -- OK
+ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner)
+ERROR:  must be owner of statistics alt_stat3
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership)
+ERROR:  must be member of role "regress_alter_user3"
+ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2;                -- failed (not owner)
+ERROR:  must be owner of statistics alt_stat3
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;                -- failed (name conflict)
+ERROR:  statistics "alt_stat2" already exists in schema "alt_nsp2"
+RESET SESSION AUTHORIZATION;
+SELECT nspname, staname, rolname
+  FROM pg_statistic_ext s, pg_namespace n, pg_authid a
+ WHERE s.stanamespace = n.oid AND s.staowner = a.oid
+   AND n.nspname in ('alt_nsp1', 'alt_nsp2')
+ ORDER BY nspname, staname;
+ nspname  |  staname  |       rolname       
+----------+-----------+---------------------
+ alt_nsp1 | alt_stat2 | regress_alter_user2
+ alt_nsp1 | alt_stat3 | regress_alter_user1
+ alt_nsp1 | alt_stat4 | regress_alter_user2
+ alt_nsp2 | alt_stat2 | regress_alter_user3
+(4 rows)
+
  --
  -- Text Search Dictionary
  --
@@ -639,7 +681,7 @@ DROP LANGUAGE alt_lang3 CASCADE;
  DROP LANGUAGE alt_lang4 CASCADE;
  ERROR:  language "alt_lang4" does not exist
  DROP SCHEMA alt_nsp1 CASCADE;
-NOTICE:  drop cascades to 26 other objects
+NOTICE:  drop cascades to 27 other objects
  DETAIL:  drop cascades to function alt_func3(integer)
  drop cascades to function alt_agg3(integer)
  drop cascades to function alt_func4(integer)
@@ -656,6 +698,7 @@ drop cascades to operator family alt_opc1 for access method hash
  drop cascades to operator family alt_opc2 for access method hash
  drop cascades to operator family alt_opf4 for access method hash
  drop cascades to operator family alt_opf2 for access method hash
+drop cascades to table alt_regress_1
  drop cascades to text search dictionary alt_ts_dict3
  drop cascades to text search dictionary alt_ts_dict4
  drop cascades to text search dictionary alt_ts_dict2
diff --git a/src/test/regress/expected/object_address.out b/src/test/regress/expected/object_address.out

index 978d9a9a0f89914154ee612eef3aa4d547794c7c..814e05e4ef124c350c436a7f9fbe657ccb0c3c9c 100644 (file)
--- a/src/test/regress/expected/object_address.out
+++ b/src/test/regress/expected/object_address.out
@@ -39,6 +39,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL (
  CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable;
  CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT);
  WARNING:  tables were not subscribed, you will have to run ALTER SUBSCRIPTION ... REFRESH PUBLICATION to subscribe the tables
+CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable;
  -- test some error cases
  SELECT pg_get_object_address('stone', '{}', '{}');
  ERROR:  unrecognized object type "stone"
@@ -409,7 +410,8 @@ WITH objects (type, name, args) AS (VALUES
                                 ('access method', '{btree}', '{}'),
                                 ('publication', '{addr_pub}', '{}'),
                                 ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'),
-                               ('subscription', '{addr_sub}', '{}')
+                               ('subscription', '{addr_sub}', '{}'),
+                               ('statistics', '{addr_nsp, gentable_stat}', '{}')
          )
  SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
         -- test roundtrip through pg_identify_object_as_address
@@ -457,6 +459,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
   trigger                   |            |                   | t on addr_nsp.gentable                                               | t
   operator family           | pg_catalog | integer_ops       | pg_catalog.integer_ops USING btree                                   | t
   policy                    |            |                   | genpol on addr_nsp.gentable                                          | t
+ statistics                | addr_nsp   | gentable_stat     | addr_nsp.gentable_stat                                               | t
   collation                 | pg_catalog | "default"         | pg_catalog."default"                                                 | t
   transform                 |            |                   | for integer on language sql                                          | t
   text search dictionary    | addr_nsp   | addr_ts_dict      | addr_nsp.addr_ts_dict                                                | t
@@ -466,7 +469,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
   subscription              |            | addr_sub          | addr_sub                                                             | t
   publication               |            | addr_pub          | addr_pub                                                             | t
   publication relation      |            |                   | gentable in publication addr_pub                                     | t
-(45 rows)
+(46 rows)
  
  ---
  --- Cleanup resources
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 64d9dd605fdc4c90b6f191705156ca327eb327d4..262036ac4ff503ff79424c33f59a8dea4f84d74d 100644 (file)
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -823,11 +823,12 @@ WHERE c.castmethod = 'b' AND
   text              | character         |        0 | i
   character varying | character         |        0 | i
   pg_node_tree      | text              |        0 | i
+ pg_ndistinct      | bytea             |        0 | i
   cidr              | inet              |        0 | i
   xml               | text              |        0 | a
   xml               | character varying |        0 | a
   xml               | character         |        0 | a
-(7 rows)
+(8 rows)
  
  -- **************** pg_conversion ****************
  -- Look for illegal values in pg_conversion fields.
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out

index c4c8450b830511f144d222ea46adc8d1548d2b4c..7f04c7a7cc8dabf41ef9066453dcac1386a9c44d 100644 (file)
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2164,6 +2164,14 @@ pg_stats| SELECT n.nspname AS schemaname,
       JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum))))
       LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
    WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid))));
+pg_stats_ext| SELECT n.nspname AS schemaname,
+    c.relname AS tablename,
+    s.staname,
+    s.stakeys AS attnums,
+    length((s.standistinct)::text) AS ndistbytes
+   FROM ((pg_statistic_ext s
+     JOIN pg_class c ON ((c.oid = s.starelid)))
+     LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)));
  pg_tables| SELECT n.nspname AS schemaname,
      c.relname AS tablename,
      pg_get_userbyid(c.relowner) AS tableowner,
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out

index 8e3028edaa265757eda78767de8df504eb2145fe..753ad81e43f5904a5497d6c65a800e20af8a22e5 100644 (file)
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -142,6 +142,7 @@ pg_shdepend|t
  pg_shdescription|t
  pg_shseclabel|t
  pg_statistic|t
+pg_statistic_ext|t
  pg_subscription|t
  pg_subscription_rel|t
  pg_tablespace|t
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out

new file mode 100644 (file)

index 0000000..83d70bf
--- /dev/null
+++ b/src/test/regress/expected/stats_ext.out
@@ -0,0 +1,155 @@
+-- Generic extended statistics support
+-- Ensure stats are dropped sanely
+CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS ab1_a_b_stats;
+CREATE SCHEMA regress_schema_2;
+CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS regress_schema_2.ab1_a_b_stats;
+-- Ensure statistics are dropped when columns are
+CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ALTER TABLE ab1 DROP COLUMN a;
+\d ab1
+                Table "public.ab1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ b      | integer |           |          | 
+ c      | integer |           |          | 
+Statistics:
+    "public.ab1_b_c_stats" WITH (ndistinct) ON (b, c)
+
+DROP TABLE ab1;
+-- Ensure things work sanely with SET STATISTICS 0
+CREATE TABLE ab1 (a INTEGER, b INTEGER);
+ALTER TABLE ab1 ALTER a SET STATISTICS 0;
+INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ANALYZE ab1;
+ERROR:  extended statistics could not be collected for column "a" of relation public.ab1
+HINT:  Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1
+ALTER TABLE ab1 ALTER a SET STATISTICS -1;
+ANALYZE ab1;
+DROP TABLE ab1;
+-- n-distinct tests
+CREATE TABLE ndistinct (
+    filler1 TEXT,
+    filler2 NUMERIC,
+    a INT,
+    b INT,
+    filler3 DATE,
+    c INT,
+    d INT
+);
+-- unknown column
+CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
+ERROR:  column "unknown_column" referenced in statistics does not exist
+-- single column
+CREATE STATISTICS s10 ON (a) FROM ndistinct;
+ERROR:  statistics require at least 2 columns
+-- single column, duplicated
+CREATE STATISTICS s10 ON (a,a) FROM ndistinct;
+ERROR:  duplicate column name in statistics definition
+-- two columns, one duplicated
+CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
+ERROR:  duplicate column name in statistics definition
+-- correct command
+CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
+-- perfectly correlated groups
+INSERT INTO ndistinct (a, b, c, filler1)
+     SELECT i/100, i/100, i/100, cash_words(i::money)
+       FROM generate_series(1,10000) s(i);
+ANALYZE ndistinct;
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+ staenabled |                                          standistinct                                          
+------------+------------------------------------------------------------------------------------------------
+ {d}        | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}]
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
+   Group Key: a, b
+   ->  Seq Scan on ndistinct
+(3 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
+   Group Key: a, b, c
+   ->  Seq Scan on ndistinct
+(3 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
+   Group Key: a, b, c, d
+   ->  Seq Scan on ndistinct
+(3 rows)
+
+TRUNCATE TABLE ndistinct;
+-- partially correlated groups
+INSERT INTO ndistinct (a, b, c)
+     SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE ndistinct;
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+ staenabled |                                          standistinct                                          
+------------+------------------------------------------------------------------------------------------------
+ {d}        | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}]
+(1 row)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ HashAggregate  (cost=230.00..232.01 rows=201 width=16)
+   Group Key: a, b
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=8)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ HashAggregate  (cost=255.00..257.01 rows=201 width=20)
+   Group Key: a, b, c
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=12)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ HashAggregate  (cost=280.00..290.00 rows=1000 width=24)
+   Group Key: a, b, c, d
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=16)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ HashAggregate  (cost=255.00..265.00 rows=1000 width=20)
+   Group Key: b, c, d
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=12)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ HashAggregate  (cost=230.00..240.00 rows=1000 width=16)
+   Group Key: a, d
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=8)
+(3 rows)
+
+DROP TABLE ndistinct;
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out

index 8d75bbfab31b2a7de2330c99140d24f9daa41b43..84022f6a29867e9d5ade1bf6db1e876beedb0f0d 100644 (file)
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -59,7 +59,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR
  -- Look for types that should have an array type according to their typtype,
  -- but don't.  We exclude composites here because we have not bothered to
  -- make array types corresponding to the system catalogs' rowtypes.
--- NOTE: as of v10, this check finds pg_node_tree and smgr.
+-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr.
  SELECT p1.oid, p1.typname
  FROM pg_type as p1
  WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
@@ -67,11 +67,12 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
      (SELECT 1 FROM pg_type as p2
       WHERE p2.typname = ('_' || p1.typname)::name AND
             p2.typelem = p1.oid and p1.typarray = p2.oid);
- oid |   typname    
------+--------------
- 194 | pg_node_tree
- 210 | smgr
-(2 rows)
+ oid  |   typname    
+------+--------------
+  194 | pg_node_tree
+ 3361 | pg_ndistinct
+  210 | smgr
+(3 rows)
  
  -- Make sure typarray points to a varlena array type of our own base
  SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype,
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule

index 38743d98c345b76514e7f349a0ae9cf066c55106..c283bdcb3721d85a54ac6b5c3567e3b29a2a1d85 100644 (file)
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -89,7 +89,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview
  # ----------
  # Another group of parallel tests
  # ----------
-test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan
+test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan stats_ext
  
  # rules cannot run concurrently with any test that creates a view
  test: rules psql_crosstab amutils
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule

index d9f64c28738d1dec7de66adc3b2aa84cb030de4a..3a0d536a2ba9bb13ded2fb5be7773a73467870be 100644 (file)
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -130,6 +130,7 @@ test: misc_functions
  test: sysviews
  test: tsrf
  test: tidscan
+test: stats_ext
  test: rules
  test: psql_crosstab
  test: select_parallel
diff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql

index c9ea47996725b8bed1c89ff4408787eb85e157c6..f6fa8d8bfdb8421059226419562b467b905dddd0 100644 (file)
--- a/src/test/regress/sql/alter_generic.sql
+++ b/src/test/regress/sql/alter_generic.sql
@@ -433,6 +433,37 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD
  ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4);
  DROP OPERATOR FAMILY alt_opf18 USING btree;
  
+--
+-- Statistics
+--
+SET SESSION AUTHORIZATION regress_alter_user1;
+CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER);
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat2;   -- failed (name conflict)
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat3;   -- failed (name conflict)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2;  -- failed (no role membership)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3;  -- OK
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;    -- OK
+
+SET SESSION AUTHORIZATION regress_alter_user2;
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+
+ALTER STATISTICS alt_stat3 RENAME TO alt_stat4;    -- failed (not owner)
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat4;    -- OK
+ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership)
+ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2;                -- failed (not owner)
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;                -- failed (name conflict)
+
+RESET SESSION AUTHORIZATION;
+SELECT nspname, staname, rolname
+  FROM pg_statistic_ext s, pg_namespace n, pg_authid a
+ WHERE s.stanamespace = n.oid AND s.staowner = a.oid
+   AND n.nspname in ('alt_nsp1', 'alt_nsp2')
+ ORDER BY nspname, staname;
  
  --
  -- Text Search Dictionary
diff --git a/src/test/regress/sql/object_address.sql b/src/test/regress/sql/object_address.sql

index 28476daff18b07c97c61ed906cd4d051cd5bfe62..c9219e47c4afe008a31c50532047f482d493bda8 100644 (file)
--- a/src/test/regress/sql/object_address.sql
+++ b/src/test/regress/sql/object_address.sql
@@ -41,6 +41,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL (
         TO SQL WITH FUNCTION int4recv(internal));
  CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable;
  CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT);
+CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable;
  
  -- test some error cases
  SELECT pg_get_object_address('stone', '{}', '{}');
@@ -185,7 +186,8 @@ WITH objects (type, name, args) AS (VALUES
                                 ('access method', '{btree}', '{}'),
                                 ('publication', '{addr_pub}', '{}'),
                                 ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'),
-                               ('subscription', '{addr_sub}', '{}')
+                               ('subscription', '{addr_sub}', '{}'),
+                               ('statistics', '{addr_nsp, gentable_stat}', '{}')
          )
  SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
         -- test roundtrip through pg_identify_object_as_address
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql

new file mode 100644 (file)

index 0000000..946cb84
--- /dev/null
+++ b/src/test/regress/sql/stats_ext.sql
@@ -0,0 +1,102 @@
+-- Generic extended statistics support
+
+-- Ensure stats are dropped sanely
+CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS ab1_a_b_stats;
+
+CREATE SCHEMA regress_schema_2;
+CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS regress_schema_2.ab1_a_b_stats;
+
+-- Ensure statistics are dropped when columns are
+CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ALTER TABLE ab1 DROP COLUMN a;
+\d ab1
+DROP TABLE ab1;
+
+-- Ensure things work sanely with SET STATISTICS 0
+CREATE TABLE ab1 (a INTEGER, b INTEGER);
+ALTER TABLE ab1 ALTER a SET STATISTICS 0;
+INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ANALYZE ab1;
+ALTER TABLE ab1 ALTER a SET STATISTICS -1;
+ANALYZE ab1;
+DROP TABLE ab1;
+
+
+-- n-distinct tests
+CREATE TABLE ndistinct (
+    filler1 TEXT,
+    filler2 NUMERIC,
+    a INT,
+    b INT,
+    filler3 DATE,
+    c INT,
+    d INT
+);
+
+-- unknown column
+CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
+
+-- single column
+CREATE STATISTICS s10 ON (a) FROM ndistinct;
+
+-- single column, duplicated
+CREATE STATISTICS s10 ON (a,a) FROM ndistinct;
+
+-- two columns, one duplicated
+CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
+
+-- correct command
+CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
+
+-- perfectly correlated groups
+INSERT INTO ndistinct (a, b, c, filler1)
+     SELECT i/100, i/100, i/100, cash_words(i::money)
+       FROM generate_series(1,10000) s(i);
+
+ANALYZE ndistinct;
+
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+
+TRUNCATE TABLE ndistinct;
+
+-- partially correlated groups
+INSERT INTO ndistinct (a, b, c)
+     SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE ndistinct;
+
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
+
+DROP TABLE ndistinct;
diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql

index 0a31249f5d5b46f0b5cf4b2a8fb652847a0f8cf7..4c658140081503d873aefd65d198fbb53bd485dd 100644 (file)
--- a/src/test/regress/sql/type_sanity.sql
+++ b/src/test/regress/sql/type_sanity.sql
@@ -53,7 +53,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR
  -- Look for types that should have an array type according to their typtype,
  -- but don't.  We exclude composites here because we have not bothered to
  -- make array types corresponding to the system catalogs' rowtypes.
--- NOTE: as of v10, this check finds pg_node_tree and smgr.
+-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr.
  
  SELECT p1.oid, p1.typname
  FROM pg_type as p1
author	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Fri, 24 Mar 2017 17:06:10 +0000 (14:06 -0300)
committer	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Fri, 24 Mar 2017 17:06:10 +0000 (14:06 -0300)
doc/src/sgml/catalogs.sgml		patch \| blob \| history
doc/src/sgml/func.sgml		patch \| blob \| history
doc/src/sgml/ref/allfiles.sgml		patch \| blob \| history
doc/src/sgml/ref/alter_statistics.sgml	[new file with mode: 0644]	patch \| blob
doc/src/sgml/ref/alter_table.sgml		patch \| blob \| history
doc/src/sgml/ref/comment.sgml		patch \| blob \| history
doc/src/sgml/ref/create_statistics.sgml	[new file with mode: 0644]	patch \| blob
doc/src/sgml/ref/drop_statistics.sgml	[new file with mode: 0644]	patch \| blob
doc/src/sgml/reference.sgml		patch \| blob \| history
src/backend/Makefile		patch \| blob \| history
src/backend/catalog/Makefile		patch \| blob \| history
src/backend/catalog/aclchk.c		patch \| blob \| history
src/backend/catalog/dependency.c		patch \| blob \| history
src/backend/catalog/heap.c		patch \| blob \| history
src/backend/catalog/namespace.c		patch \| blob \| history
src/backend/catalog/objectaddress.c		patch \| blob \| history
src/backend/catalog/pg_shdepend.c		patch \| blob \| history
src/backend/catalog/system_views.sql		patch \| blob \| history
src/backend/commands/Makefile		patch \| blob \| history
src/backend/commands/alter.c		patch \| blob \| history
src/backend/commands/analyze.c		patch \| blob \| history
src/backend/commands/dropcmds.c		patch \| blob \| history
src/backend/commands/event_trigger.c		patch \| blob \| history
src/backend/commands/statscmds.c	[new file with mode: 0644]	patch \| blob
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| history
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/optimizer/util/plancat.c		patch \| blob \| history
src/backend/parser/gram.y		patch \| blob \| history
src/backend/statistics/Makefile	[new file with mode: 0644]	patch \| blob
src/backend/statistics/README	[new file with mode: 0644]	patch \| blob
src/backend/statistics/extended_stats.c	[new file with mode: 0644]	patch \| blob
src/backend/statistics/mvdistinct.c	[new file with mode: 0644]	patch \| blob
src/backend/tcop/utility.c		patch \| blob \| history
src/backend/utils/adt/ruleutils.c		patch \| blob \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| history
src/backend/utils/cache/relcache.c		patch \| blob \| history
src/backend/utils/cache/syscache.c		patch \| blob \| history
src/bin/pg_dump/common.c		patch \| blob \| history
src/bin/pg_dump/pg_backup_archiver.c		patch \| blob \| history
src/bin/pg_dump/pg_dump.c		patch \| blob \| history
src/bin/pg_dump/pg_dump.h		patch \| blob \| history
src/bin/pg_dump/pg_dump_sort.c		patch \| blob \| history
src/bin/psql/describe.c		patch \| blob \| history
src/include/catalog/catversion.h		patch \| blob \| history
src/include/catalog/dependency.h		patch \| blob \| history
src/include/catalog/heap.h		patch \| blob \| history
src/include/catalog/indexing.h		patch \| blob \| history
src/include/catalog/namespace.h		patch \| blob \| history
src/include/catalog/pg_cast.h		patch \| blob \| history
src/include/catalog/pg_proc.h		patch \| blob \| history
src/include/catalog/pg_statistic_ext.h	[new file with mode: 0644]	patch \| blob
src/include/catalog/pg_type.h		patch \| blob \| history
src/include/catalog/toasting.h		patch \| blob \| history
src/include/commands/defrem.h		patch \| blob \| history
src/include/nodes/nodes.h		patch \| blob \| history
src/include/nodes/parsenodes.h		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/include/statistics/extended_stats_internal.h	[new file with mode: 0644]	patch \| blob
src/include/statistics/statistics.h	[new file with mode: 0644]	patch \| blob
src/include/utils/acl.h		patch \| blob \| history
src/include/utils/rel.h		patch \| blob \| history
src/include/utils/relcache.h		patch \| blob \| history
src/include/utils/syscache.h		patch \| blob \| history
src/test/regress/expected/alter_generic.out		patch \| blob \| history
src/test/regress/expected/object_address.out		patch \| blob \| history
src/test/regress/expected/opr_sanity.out		patch \| blob \| history
src/test/regress/expected/rules.out		patch \| blob \| history
src/test/regress/expected/sanity_check.out		patch \| blob \| history
src/test/regress/expected/stats_ext.out	[new file with mode: 0644]	patch \| blob
src/test/regress/expected/type_sanity.out		patch \| blob \| history
src/test/regress/parallel_schedule		patch \| blob \| history
src/test/regress/serial_schedule		patch \| blob \| history
src/test/regress/sql/alter_generic.sql		patch \| blob \| history
src/test/regress/sql/object_address.sql		patch \| blob \| history
src/test/regress/sql/stats_ext.sql	[new file with mode: 0644]	patch \| blob
src/test/regress/sql/type_sanity.sql		patch \| blob \| history