From 7b504eb282ca2f5104b5c00b4f05a3ef6bb1385b Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Fri, 24 Mar 2017 14:06:10 -0300 Subject: [PATCH] Implement multivariate n-distinct coefficients MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add support for explicitly declared statistic objects (CREATE STATISTICS), allowing collection of statistics on more complex combinations that individual table columns. Companion commands DROP STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are added too. All this DDL has been designed so that more statistic types can be added later on, such as multivariate most-common-values and multivariate histograms between columns of a single table, leaving room for permitting columns on multiple tables, too, as well as expressions. This commit only adds support for collection of n-distinct coefficient on user-specified sets of columns in a single table. This is useful to estimate number of distinct groups in GROUP BY and DISTINCT clauses; estimation errors there can cause over-allocation of memory in hashed aggregates, for instance, so it's a worthwhile problem to solve. A new special pseudo-type pg_ndistinct is used. (num-distinct estimation was deemed sufficiently useful by itself that this is worthwhile even if no further statistic types are added immediately; so much so that another version of essentially the same functionality was submitted by Kyotaro Horiguchi: https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp though this commit does not use that code.) Author: Tomas Vondra. Some code rework by Álvaro. Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes, Ideriha Takeshi Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql --- doc/src/sgml/catalogs.sgml | 97 +++ doc/src/sgml/func.sgml | 36 +- doc/src/sgml/ref/allfiles.sgml | 3 + doc/src/sgml/ref/alter_statistics.sgml | 115 +++ doc/src/sgml/ref/alter_table.sgml | 9 +- doc/src/sgml/ref/comment.sgml | 6 +- doc/src/sgml/ref/create_statistics.sgml | 155 ++++ doc/src/sgml/ref/drop_statistics.sgml | 98 +++ doc/src/sgml/reference.sgml | 3 + src/backend/Makefile | 2 +- src/backend/catalog/Makefile | 1 + src/backend/catalog/aclchk.c | 35 + src/backend/catalog/dependency.c | 9 + src/backend/catalog/heap.c | 74 ++ src/backend/catalog/namespace.c | 56 ++ src/backend/catalog/objectaddress.c | 55 ++ src/backend/catalog/pg_shdepend.c | 2 + src/backend/catalog/system_views.sql | 10 + src/backend/commands/Makefile | 6 +- src/backend/commands/alter.c | 8 + src/backend/commands/analyze.c | 23 +- src/backend/commands/dropcmds.c | 7 + src/backend/commands/event_trigger.c | 3 + src/backend/commands/statscmds.c | 296 ++++++++ src/backend/nodes/copyfuncs.c | 17 + src/backend/nodes/equalfuncs.c | 15 + src/backend/nodes/outfuncs.c | 31 + src/backend/optimizer/util/plancat.c | 67 +- src/backend/parser/gram.y | 62 +- src/backend/statistics/Makefile | 17 + src/backend/statistics/README | 34 + src/backend/statistics/extended_stats.c | 389 ++++++++++ src/backend/statistics/mvdistinct.c | 671 ++++++++++++++++++ src/backend/tcop/utility.c | 12 + src/backend/utils/adt/ruleutils.c | 81 +++ src/backend/utils/adt/selfuncs.c | 181 ++++- src/backend/utils/cache/relcache.c | 79 +++ src/backend/utils/cache/syscache.c | 23 + src/bin/pg_dump/common.c | 4 + src/bin/pg_dump/pg_backup_archiver.c | 3 +- src/bin/pg_dump/pg_dump.c | 153 ++++ src/bin/pg_dump/pg_dump.h | 9 + src/bin/pg_dump/pg_dump_sort.c | 12 +- src/bin/psql/describe.c | 51 ++ src/include/catalog/catversion.h | 2 +- src/include/catalog/dependency.h | 1 + src/include/catalog/heap.h | 1 + src/include/catalog/indexing.h | 7 + src/include/catalog/namespace.h | 2 + src/include/catalog/pg_cast.h | 4 + src/include/catalog/pg_proc.h | 11 + src/include/catalog/pg_statistic_ext.h | 75 ++ src/include/catalog/pg_type.h | 4 + src/include/catalog/toasting.h | 1 + src/include/commands/defrem.h | 4 + src/include/nodes/nodes.h | 2 + src/include/nodes/parsenodes.h | 15 + src/include/nodes/relation.h | 19 + .../statistics/extended_stats_internal.h | 64 ++ src/include/statistics/statistics.h | 47 ++ src/include/utils/acl.h | 2 + src/include/utils/rel.h | 4 + src/include/utils/relcache.h | 1 + src/include/utils/syscache.h | 2 + src/test/regress/expected/alter_generic.out | 45 +- src/test/regress/expected/object_address.out | 7 +- src/test/regress/expected/opr_sanity.out | 3 +- src/test/regress/expected/rules.out | 8 + src/test/regress/expected/sanity_check.out | 1 + src/test/regress/expected/stats_ext.out | 155 ++++ src/test/regress/expected/type_sanity.out | 13 +- src/test/regress/parallel_schedule | 2 +- src/test/regress/serial_schedule | 1 + src/test/regress/sql/alter_generic.sql | 31 + src/test/regress/sql/object_address.sql | 4 +- src/test/regress/sql/stats_ext.sql | 102 +++ src/test/regress/sql/type_sanity.sql | 2 +- 77 files changed, 3599 insertions(+), 63 deletions(-) create mode 100644 doc/src/sgml/ref/alter_statistics.sgml create mode 100644 doc/src/sgml/ref/create_statistics.sgml create mode 100644 doc/src/sgml/ref/drop_statistics.sgml create mode 100644 src/backend/commands/statscmds.c create mode 100644 src/backend/statistics/Makefile create mode 100644 src/backend/statistics/README create mode 100644 src/backend/statistics/extended_stats.c create mode 100644 src/backend/statistics/mvdistinct.c create mode 100644 src/include/catalog/pg_statistic_ext.h create mode 100644 src/include/statistics/extended_stats_internal.h create mode 100644 src/include/statistics/statistics.h create mode 100644 src/test/regress/expected/stats_ext.out create mode 100644 src/test/regress/sql/stats_ext.sql diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index c531c73aac..ac39c639ed 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -295,6 +295,11 @@ planner statistics + + pg_statistic_ext + extended planner statistics + + pg_subscription logical replication subscriptions @@ -4247,6 +4252,98 @@ + + <structname>pg_statistic_ext</structname> + + + pg_statistic_ext + + + + The catalog pg_statistic_ext + holds extended planner statistics. + + + + <structname>pg_statistic_ext</> Columns + + + + + Name + Type + References + Description + + + + + + + starelid + oid + pg_class.oid + The table that the described columns belongs to + + + + staname + name + + Name of the statistic. + + + + stanamespace + oid + pg_namespace.oid + + The OID of the namespace that contains this statistic + + + + + staowner + oid + pg_authid.oid + Owner of the statistic + + + + staenabled + char[] + + + An array with the modes of the enabled statistic types, encoded as + d for ndistinct coefficients. + + + + + stakeys + int2vector + pg_attribute.attnum + + This is an array of values that indicate which table columns this + statistic covers. For example a value of 1 3 would + mean that the first and the third table columns make up the statistic key. + + + + + standistinct + pg_ndistinct + + + N-distinct coefficients, serialized as pg_ndistinct type. + + + + + +
+
+ <structname>pg_namespace</structname> diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 3f0f7363b9..ba6f8dd8d2 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -16720,6 +16720,10 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); pg_get_serial_sequence + + pg_get_statisticsextdef + + pg_get_triggerdef @@ -16889,6 +16893,11 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); get name of the sequence that a serial, smallserial or bigserial column uses
+ + pg_get_statisticsextdef(statext_oid) + text + get CREATE STATISTICS command for extended statistics objects + pg_get_triggerdef(trigger_oid) text @@ -17034,19 +17043,20 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); pg_get_constraintdef, pg_get_indexdef, pg_get_ruledef, - and pg_get_triggerdef, respectively reconstruct the - creating command for a constraint, index, rule, or trigger. (Note that this - is a decompiled reconstruction, not the original text of the command.) - pg_get_expr decompiles the internal form of an - individual expression, such as the default value for a column. It can be - useful when examining the contents of system catalogs. If the expression - might contain Vars, specify the OID of the relation they refer to as the - second parameter; if no Vars are expected, zero is sufficient. - pg_get_viewdef reconstructs the SELECT - query that defines a view. Most of these functions come in two variants, - one of which can optionally pretty-print the result. The - pretty-printed format is more readable, but the default format is more - likely to be interpreted the same way by future versions of + pg_get_statisticsextdef, and + pg_get_triggerdef, respectively reconstruct the + creating command for a constraint, index, rule, extended statistics object, + or trigger. (Note that this is a decompiled reconstruction, not the + original text of the command.) pg_get_expr decompiles + the internal form of an individual expression, such as the default value + for a column. It can be useful when examining the contents of system + catalogs. If the expression might contain Vars, specify the OID of the + relation they refer to as the second parameter; if no Vars are expected, + zero is sufficient. pg_get_viewdef reconstructs the + SELECT query that defines a view. Most of these functions come + in two variants, one of which can optionally pretty-print the + result. The pretty-printed format is more readable, but the default format + is more likely to be interpreted the same way by future versions of PostgreSQL; avoid using pretty-printed output for dump purposes. Passing false for the pretty-print parameter yields the same result as the variant that does not have the parameter at all. diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml index 974e1b74e4..01acc2ef9d 100644 --- a/doc/src/sgml/ref/allfiles.sgml +++ b/doc/src/sgml/ref/allfiles.sgml @@ -34,6 +34,7 @@ Complete list of usable sgml source files in this directory. + @@ -80,6 +81,7 @@ Complete list of usable sgml source files in this directory. + @@ -126,6 +128,7 @@ Complete list of usable sgml source files in this directory. + diff --git a/doc/src/sgml/ref/alter_statistics.sgml b/doc/src/sgml/ref/alter_statistics.sgml new file mode 100644 index 0000000000..3e4d28614a --- /dev/null +++ b/doc/src/sgml/ref/alter_statistics.sgml @@ -0,0 +1,115 @@ + + + + + ALTER STATISTICS + + + + ALTER STATISTICS + 7 + SQL - Language Statements + + + + ALTER STATISTICS + + change the definition of a extended statistics + + + + + +ALTER STATISTICS name OWNER TO { new_owner | CURRENT_USER | SESSION_USER } +ALTER STATISTICS name RENAME TO new_name +ALTER STATISTICS name SET SCHEMA new_schema + + + + + Description + + + ALTER STATISTICS changes the parameters of an existing + extended statistics. Any parameters not specifically set in the + ALTER STATISTICS command retain their prior settings. + + + + You must own the statistics to use ALTER STATISTICS. + To change a statistics' schema, you must also have CREATE + privilege on the new schema. + To alter the owner, you must also be a direct or indirect member of the new + owning role, and that role must have CREATE privilege on + the statistics' schema. (These restrictions enforce that altering the owner + doesn't do anything you couldn't do by dropping and recreating the statistics. + However, a superuser can alter ownership of any statistics anyway.) + + + + + Parameters + + + + + name + + + The name (optionally schema-qualified) of the statistics to be altered. + + + + + + new_owner + + + The user name of the new owner of the statistics. + + + + + + new_name + + + The new name for the statistics. + + + + + + new_schema + + + The new schema for the statistics. + + + + + + + + + + Compatibility + + + There's no ALTER STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + + diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 767ea321da..75de226253 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -119,9 +119,12 @@ ALTER TABLE [ IF EXISTS ] name This form drops a column from a table. Indexes and table constraints involving the column will be automatically - dropped as well. You will need to say CASCADE if - anything outside the table depends on the column, for example, - foreign key references or views. + dropped as well. + Multivariate statistics referencing the dropped column will also be + removed if the removal of the column would cause the statistics to + contain data for only a single column. + You will need to say CASCADE if anything outside the table + depends on the column, for example, foreign key references or views. If IF EXISTS is specified and the column does not exist, no error is thrown. In this case a notice is issued instead. diff --git a/doc/src/sgml/ref/comment.sgml b/doc/src/sgml/ref/comment.sgml index 7483c8c03f..8fe17a5767 100644 --- a/doc/src/sgml/ref/comment.sgml +++ b/doc/src/sgml/ref/comment.sgml @@ -51,6 +51,7 @@ COMMENT ON SCHEMA object_name | SEQUENCE object_name | SERVER object_name | + STATISTICS object_name | TABLE object_name | TABLESPACE object_name | TEXT SEARCH CONFIGURATION object_name | @@ -125,8 +126,8 @@ COMMENT ON The name of the object to be commented. Names of tables, aggregates, collations, conversions, domains, foreign tables, functions, indexes, operators, operator classes, operator families, sequences, - text search objects, types, and views can be schema-qualified. - When commenting on a column, + statistics, text search objects, types, and views can be + schema-qualified. When commenting on a column, relation_name must refer to a table, view, composite type, or foreign table. @@ -327,6 +328,7 @@ COMMENT ON RULE my_rule ON my_table IS 'Logs updates of employee records'; COMMENT ON SCHEMA my_schema IS 'Departmental data'; COMMENT ON SEQUENCE my_sequence IS 'Used to generate primary keys'; COMMENT ON SERVER myserver IS 'my foreign server'; +COMMENT ON STATISTICS my_statistics IS 'Improves planner row estimations'; COMMENT ON TABLE my_schema.my_table IS 'Employee Information'; COMMENT ON TABLESPACE my_tablespace IS 'Tablespace for indexes'; COMMENT ON TEXT SEARCH CONFIGURATION my_config IS 'Special word filtering'; diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml new file mode 100644 index 0000000000..60184a347b --- /dev/null +++ b/doc/src/sgml/ref/create_statistics.sgml @@ -0,0 +1,155 @@ + + + + + CREATE STATISTICS + + + + CREATE STATISTICS + 7 + SQL - Language Statements + + + + CREATE STATISTICS + define extended statistics + + + + +CREATE STATISTICS [ IF NOT EXISTS ] statistics_name ON ( + column_name, column_name [, ...]) + FROM table_name + + + + + + Description + + + CREATE STATISTICS will create a new extended statistics + object on the specified table. + The statistics will be created in the current database and + will be owned by the user issuing the command. + + + + If a schema name is given (for example, CREATE STATISTICS + myschema.mystat ...) then the statistics is created in the specified + schema. Otherwise it is created in the current schema. The name of + the statistics must be distinct from the name of any other statistics in the + same schema. + + + + + Parameters + + + + + IF NOT EXISTS + + + Do not throw an error if a statistics with the same name already exists. + A notice is issued in this case. Note that only the name of the + statistics object is considered here. The definition of the statistics is + not considered. + + + + + + statistics_name + + + The name (optionally schema-qualified) of the statistics to be created. + + + + + + column_name + + + The name of a column to be included in the statistics. + + + + + + table_name + + + The name (optionally schema-qualified) of the table the statistics should + be created on. + + + + + + + + + + Notes + + + You must be the owner of a table to create or change statistics on it. + + + + + Examples + + + Create table t1 with two functionally dependent columns, i.e. + knowledge of a value in the first column is sufficient for determining the + value in the other column. Then functional dependencies are built on those + columns: + + +CREATE TABLE t1 ( + a int, + b int +); + +INSERT INTO t1 SELECT i/100, i/500 + FROM generate_series(1,1000000) s(i); + +CREATE STATISTICS s1 ON (a, b) FROM t1; + +ANALYZE t1; + +-- valid combination of values +EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0); + +-- invalid combination of values +EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1); + + + + + + + Compatibility + + + There's no CREATE STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + diff --git a/doc/src/sgml/ref/drop_statistics.sgml b/doc/src/sgml/ref/drop_statistics.sgml new file mode 100644 index 0000000000..98c338182b --- /dev/null +++ b/doc/src/sgml/ref/drop_statistics.sgml @@ -0,0 +1,98 @@ + + + + + DROP STATISTICS + + + + DROP STATISTICS + 7 + SQL - Language Statements + + + + DROP STATISTICS + remove extended statistics + + + + +DROP STATISTICS [ IF EXISTS ] name [, ...] + + + + + Description + + + DROP STATISTICS removes statistics from the database. + Only the statistics owner, the schema owner, and superuser can drop a + statistics. + + + + + + Parameters + + + + IF EXISTS + + + Do not throw an error if the statistics do not exist. A notice is + issued in this case. + + + + + + name + + + The name (optionally schema-qualified) of the statistics to drop. + + + + + + + + + Examples + + + To destroy two statistics objects on different schemas, without failing + if they don't exist: + + +DROP STATISTICS IF EXISTS + accounting.users_uid_creation, + public.grants_user_role; + + + + + + + Compatibility + + + There's no DROP STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + + diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml index 3d8ad232fa..9000b3aaaa 100644 --- a/doc/src/sgml/reference.sgml +++ b/doc/src/sgml/reference.sgml @@ -60,6 +60,7 @@ &alterSchema; &alterSequence; &alterServer; + &alterStatistics; &alterSubscription; &alterSystem; &alterTable; @@ -108,6 +109,7 @@ &createSchema; &createSequence; &createServer; + &createStatistics; &createSubscription; &createTable; &createTableAs; @@ -154,6 +156,7 @@ &dropSchema; &dropSequence; &dropServer; + &dropStatistics; &dropSubscription; &dropTable; &dropTableSpace; diff --git a/src/backend/Makefile b/src/backend/Makefile index fffb0d95ba..bce9d2c3eb 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \ main nodes optimizer port postmaster regex replication rewrite \ - storage tcop tsearch utils $(top_builddir)/src/timezone + statistics storage tcop tsearch utils $(top_builddir)/src/timezone include $(srcdir)/common.mk diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index 159cab5c18..fd33426bad 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -33,6 +33,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\ pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \ pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \ pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \ + pg_statistic_ext.h \ pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \ pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \ pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \ diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index be86d76a59..d01930f4a8 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -48,6 +48,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" @@ -3302,6 +3303,8 @@ static const char *const no_priv_msg[MAX_ACL_KIND] = gettext_noop("permission denied for collation %s"), /* ACL_KIND_CONVERSION */ gettext_noop("permission denied for conversion %s"), + /* ACL_KIND_STATISTICS */ + gettext_noop("permission denied for statistics %s"), /* ACL_KIND_TABLESPACE */ gettext_noop("permission denied for tablespace %s"), /* ACL_KIND_TSDICTIONARY */ @@ -3352,6 +3355,8 @@ static const char *const not_owner_msg[MAX_ACL_KIND] = gettext_noop("must be owner of collation %s"), /* ACL_KIND_CONVERSION */ gettext_noop("must be owner of conversion %s"), + /* ACL_KIND_STATISTICS */ + gettext_noop("must be owner of statistics %s"), /* ACL_KIND_TABLESPACE */ gettext_noop("must be owner of tablespace %s"), /* ACL_KIND_TSDICTIONARY */ @@ -3467,6 +3472,10 @@ pg_aclmask(AclObjectKind objkind, Oid table_oid, AttrNumber attnum, Oid roleid, mask, how, NULL); case ACL_KIND_NAMESPACE: return pg_namespace_aclmask(table_oid, roleid, mask, how); + case ACL_KIND_STATISTICS: + elog(ERROR, "grantable rights not supported for statistics"); + /* not reached, but keep compiler quiet */ + return ACL_NO_RIGHTS; case ACL_KIND_TABLESPACE: return pg_tablespace_aclmask(table_oid, roleid, mask, how); case ACL_KIND_FDW: @@ -5103,6 +5112,32 @@ pg_subscription_ownercheck(Oid sub_oid, Oid roleid) return has_privs_of_role(roleid, ownerId); } +/* + * Ownership check for a extended statistics (specified by OID). + */ +bool +pg_statistics_ownercheck(Oid stat_oid, Oid roleid) +{ + HeapTuple tuple; + Oid ownerId; + + /* Superusers bypass all permission checking. */ + if (superuser_arg(roleid)) + return true; + + tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stat_oid)); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("statistics with OID %u do not exist", stat_oid))); + + ownerId = ((Form_pg_statistic_ext) GETSTRUCT(tuple))->staowner; + + ReleaseSysCache(tuple); + + return has_privs_of_role(roleid, ownerId); +} + /* * Check whether specified role has CREATEROLE privilege (or is a superuser) * diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index fc088b2165..ee27cae7df 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -51,6 +51,7 @@ #include "catalog/pg_publication.h" #include "catalog/pg_publication_rel.h" #include "catalog/pg_rewrite.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" @@ -154,6 +155,7 @@ static const Oid object_classes[] = { RewriteRelationId, /* OCLASS_REWRITE */ TriggerRelationId, /* OCLASS_TRIGGER */ NamespaceRelationId, /* OCLASS_SCHEMA */ + StatisticExtRelationId, /* OCLASS_STATISTIC_EXT */ TSParserRelationId, /* OCLASS_TSPARSER */ TSDictionaryRelationId, /* OCLASS_TSDICT */ TSTemplateRelationId, /* OCLASS_TSTEMPLATE */ @@ -1263,6 +1265,10 @@ doDeletion(const ObjectAddress *object, int flags) DropTransformById(object->objectId); break; + case OCLASS_STATISTIC_EXT: + RemoveStatisticsById(object->objectId); + break; + default: elog(ERROR, "unrecognized object class: %u", object->classId); @@ -2377,6 +2383,9 @@ getObjectClass(const ObjectAddress *object) case NamespaceRelationId: return OCLASS_SCHEMA; + case StatisticExtRelationId: + return OCLASS_STATISTIC_EXT; + case TSParserRelationId: return OCLASS_TSPARSER; diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index d49dcdc015..eee5e2f6ca 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -52,6 +52,7 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription_rel.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" @@ -1609,7 +1610,10 @@ RemoveAttributeById(Oid relid, AttrNumber attnum) heap_close(attr_rel, RowExclusiveLock); if (attnum > 0) + { RemoveStatistics(relid, attnum); + RemoveStatisticsExt(relid, attnum); + } relation_close(rel, NoLock); } @@ -1860,6 +1864,7 @@ heap_drop_with_catalog(Oid relid) * delete statistics */ RemoveStatistics(relid, 0); + RemoveStatisticsExt(relid, 0); /* * delete attribute tuples @@ -2771,6 +2776,75 @@ RemoveStatistics(Oid relid, AttrNumber attnum) } +/* + * RemoveStatisticsExt --- remove entries in pg_statistic_ext for a relation + * + * If attnum is zero, remove all entries for rel; else remove only the + * one(s) involving that column. + */ +void +RemoveStatisticsExt(Oid relid, AttrNumber attnum) +{ + Relation pgstatisticext; + SysScanDesc scan; + ScanKeyData key; + HeapTuple tuple; + + /* + * Scan pg_statistic_ext to delete relevant tuples + */ + pgstatisticext = heap_open(StatisticExtRelationId, RowExclusiveLock); + + ScanKeyInit(&key, + Anum_pg_statistic_ext_starelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + scan = systable_beginscan(pgstatisticext, + StatisticExtRelidIndexId, + true, NULL, 1, &key); + + while (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + bool delete = false; + + if (attnum == 0) + delete = true; + else if (attnum != 0) + { + Form_pg_statistic_ext staForm; + int i; + + /* + * Decode the stakeys array and delete any stats that involve the + * specified column. + */ + staForm = (Form_pg_statistic_ext) GETSTRUCT(tuple); + for (i = 0; i < staForm->stakeys.dim1; i++) + { + if (staForm->stakeys.values[i] == attnum) + { + delete = true; + break; + } + } + } + + if (delete) + { + CatalogTupleDelete(pgstatisticext, &tuple->t_self); + deleteDependencyRecordsFor(StatisticExtRelationId, + HeapTupleGetOid(tuple), + false); + } + } + + systable_endscan(scan); + + heap_close(pgstatisticext, RowExclusiveLock); +} + + /* * RelationTruncateIndexes - truncate all indexes associated * with the heap relation to zero tuples. diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index a38da3047f..e521bd908a 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -2085,6 +2085,62 @@ ConversionIsVisible(Oid conid) return visible; } +/* + * get_statistics_oid - find a statistics by possibly qualified name + * + * If not found, returns InvalidOid if missing_ok, else throws error + */ +Oid +get_statistics_oid(List *names, bool missing_ok) +{ + char *schemaname; + char *stats_name; + Oid namespaceId; + Oid stats_oid = InvalidOid; + ListCell *l; + + /* deconstruct the name list */ + DeconstructQualifiedName(names, &schemaname, &stats_name); + + if (schemaname) + { + /* use exact schema given */ + namespaceId = LookupExplicitNamespace(schemaname, missing_ok); + if (missing_ok && !OidIsValid(namespaceId)) + stats_oid = InvalidOid; + else + stats_oid = GetSysCacheOid2(STATEXTNAMENSP, + PointerGetDatum(stats_name), + ObjectIdGetDatum(namespaceId)); + } + else + { + /* search for it in search path */ + recomputeNamespacePath(); + + foreach(l, activeSearchPath) + { + namespaceId = lfirst_oid(l); + + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + stats_oid = GetSysCacheOid2(STATEXTNAMENSP, + PointerGetDatum(stats_name), + ObjectIdGetDatum(namespaceId)); + if (OidIsValid(stats_oid)) + break; + } + } + + if (!OidIsValid(stats_oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("statistics \"%s\" do not exist", + NameListToString(names)))); + + return stats_oid; +} + /* * get_ts_parser_oid - find a TS parser by possibly qualified name * diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index 61a831b403..2948d64fa7 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -48,6 +48,7 @@ #include "catalog/pg_publication.h" #include "catalog/pg_publication_rel.h" #include "catalog/pg_rewrite.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" @@ -478,6 +479,18 @@ static const ObjectPropertyType ObjectProperty[] = InvalidAttrNumber, ACL_KIND_SUBSCRIPTION, true + }, + { + StatisticExtRelationId, + StatisticExtOidIndexId, + STATEXTOID, + STATEXTNAMENSP, + Anum_pg_statistic_ext_staname, + Anum_pg_statistic_ext_stanamespace, + Anum_pg_statistic_ext_staowner, + InvalidAttrNumber, /* no ACL (same as relation) */ + ACL_KIND_STATISTICS, + true } }; @@ -696,6 +709,10 @@ static const struct object_type_map /* OCLASS_TRANSFORM */ { "transform", OBJECT_TRANSFORM + }, + /* OBJECT_STATISTIC_EXT */ + { + "statistics", OBJECT_STATISTIC_EXT } }; @@ -974,6 +991,12 @@ get_object_address(ObjectType objtype, Node *object, address = get_object_address_defacl(castNode(List, object), missing_ok); break; + case OBJECT_STATISTIC_EXT: + address.classId = StatisticExtRelationId; + address.objectId = get_statistics_oid(castNode(List, object), + missing_ok); + address.objectSubId = 0; + break; default: elog(ERROR, "unrecognized objtype: %d", (int) objtype); /* placate compiler, in case it thinks elog might return */ @@ -2083,6 +2106,7 @@ pg_get_object_address(PG_FUNCTION_ARGS) case OBJECT_ATTRIBUTE: case OBJECT_COLLATION: case OBJECT_CONVERSION: + case OBJECT_STATISTIC_EXT: case OBJECT_TSPARSER: case OBJECT_TSDICTIONARY: case OBJECT_TSTEMPLATE: @@ -2370,6 +2394,10 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser"))); break; + case OBJECT_STATISTIC_EXT: + if (!pg_statistics_ownercheck(address.objectId, roleid)) + aclcheck_error_type(ACLCHECK_NOT_OWNER, address.objectId); + break; default: elog(ERROR, "unrecognized object type: %d", (int) objtype); @@ -3857,6 +3885,10 @@ getObjectTypeDescription(const ObjectAddress *object) appendStringInfoString(&buffer, "subscription"); break; + case OCLASS_STATISTIC_EXT: + appendStringInfoString(&buffer, "statistics"); + break; + default: appendStringInfo(&buffer, "unrecognized %u", object->classId); break; @@ -4880,6 +4912,29 @@ getObjectIdentityParts(const ObjectAddress *object, break; } + case OCLASS_STATISTIC_EXT: + { + HeapTuple tup; + Form_pg_statistic_ext formStatistic; + char *schema; + + tup = SearchSysCache1(STATEXTOID, + ObjectIdGetDatum(object->objectId)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for statistics %u", + object->objectId); + formStatistic = (Form_pg_statistic_ext) GETSTRUCT(tup); + schema = get_namespace_name_or_temp(formStatistic->stanamespace); + appendStringInfoString(&buffer, + quote_qualified_identifier(schema, + NameStr(formStatistic->staname))); + if (objname) + *objname = list_make2(schema, + pstrdup(NameStr(formStatistic->staname))); + ReleaseSysCache(tup); + } + break; + default: appendStringInfo(&buffer, "unrecognized object %u %u %d", object->classId, diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c index 8d946ff44c..d28a8afb47 100644 --- a/src/backend/catalog/pg_shdepend.c +++ b/src/backend/catalog/pg_shdepend.c @@ -39,6 +39,7 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" #include "catalog/pg_shdepend.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_ts_config.h" @@ -1415,6 +1416,7 @@ shdepReassignOwned(List *roleids, Oid newrole) case OperatorFamilyRelationId: case OperatorClassRelationId: case ExtensionRelationId: + case StatisticExtRelationId: case TableSpaceRelationId: case DatabaseRelationId: case TSConfigRelationId: diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 80d14296de..b41882aa52 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -186,6 +186,16 @@ CREATE OR REPLACE VIEW pg_sequences AS WHERE NOT pg_is_other_temp_schema(N.oid) AND relkind = 'S'; +CREATE VIEW pg_stats_ext AS + SELECT + N.nspname AS schemaname, + C.relname AS tablename, + S.staname AS staname, + S.stakeys AS attnums, + length(s.standistinct) AS ndistbytes + FROM (pg_statistic_ext S JOIN pg_class C ON (C.oid = S.starelid)) + LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace); + CREATE VIEW pg_stats WITH (security_barrier) AS SELECT nspname AS schemaname, diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile index e0fab38cbe..4a6c99e090 100644 --- a/src/backend/commands/Makefile +++ b/src/backend/commands/Makefile @@ -18,8 +18,8 @@ OBJS = amcmds.o aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \ event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \ indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \ policy.o portalcmds.o prepare.o proclang.o publicationcmds.o \ - schemacmds.o seclabel.o sequence.o subscriptioncmds.o tablecmds.o \ - tablespace.o trigger.o tsearchcmds.o typecmds.o user.o vacuum.o \ - vacuumlazy.o variable.o view.o + schemacmds.o seclabel.o sequence.o statscmds.o subscriptioncmds.o \ + tablecmds.o tablespace.o trigger.o tsearchcmds.o typecmds.o user.o \ + vacuum.o vacuumlazy.o variable.o view.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c index cf1391c2e6..2c6435b759 100644 --- a/src/backend/commands/alter.c +++ b/src/backend/commands/alter.c @@ -33,6 +33,7 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" #include "catalog/pg_subscription.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_dict.h" #include "catalog/pg_ts_parser.h" @@ -120,6 +121,10 @@ report_namespace_conflict(Oid classId, const char *name, Oid nspOid) Assert(OidIsValid(nspOid)); msgfmt = gettext_noop("conversion \"%s\" already exists in schema \"%s\""); break; + case StatisticExtRelationId: + Assert(OidIsValid(nspOid)); + msgfmt = gettext_noop("statistics \"%s\" already exists in schema \"%s\""); + break; case TSParserRelationId: Assert(OidIsValid(nspOid)); msgfmt = gettext_noop("text search parser \"%s\" already exists in schema \"%s\""); @@ -373,6 +378,7 @@ ExecRenameStmt(RenameStmt *stmt) case OBJECT_OPCLASS: case OBJECT_OPFAMILY: case OBJECT_LANGUAGE: + case OBJECT_STATISTIC_EXT: case OBJECT_TSCONFIGURATION: case OBJECT_TSDICTIONARY: case OBJECT_TSPARSER: @@ -489,6 +495,7 @@ ExecAlterObjectSchemaStmt(AlterObjectSchemaStmt *stmt, case OBJECT_OPERATOR: case OBJECT_OPCLASS: case OBJECT_OPFAMILY: + case OBJECT_STATISTIC_EXT: case OBJECT_TSCONFIGURATION: case OBJECT_TSDICTIONARY: case OBJECT_TSPARSER: @@ -803,6 +810,7 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt) case OBJECT_OPERATOR: case OBJECT_OPCLASS: case OBJECT_OPFAMILY: + case OBJECT_STATISTIC_EXT: case OBJECT_TABLESPACE: case OBJECT_TSDICTIONARY: case OBJECT_TSCONFIGURATION: diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 055338fdff..c5b5c54bab 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -17,6 +17,7 @@ #include #include "access/multixact.h" +#include "access/sysattr.h" #include "access/transam.h" #include "access/tupconvert.h" #include "access/tuptoaster.h" @@ -28,6 +29,7 @@ #include "catalog/pg_collation.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_namespace.h" +#include "catalog/pg_statistic_ext.h" #include "commands/dbcommands.h" #include "commands/tablecmds.h" #include "commands/vacuum.h" @@ -39,13 +41,17 @@ #include "parser/parse_relation.h" #include "pgstat.h" #include "postmaster/autovacuum.h" +#include "statistics/extended_stats_internal.h" +#include "statistics/statistics.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "storage/proc.h" #include "storage/procarray.h" #include "utils/acl.h" #include "utils/attoptcache.h" +#include "utils/builtins.h" #include "utils/datum.h" +#include "utils/fmgroids.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -566,6 +572,10 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, update_attstats(RelationGetRelid(Irel[ind]), false, thisdata->attr_cnt, thisdata->vacattrstats); } + + /* Build extended statistics (if there are any). */ + BuildRelationExtStatistics(onerel, totalrows, numrows, rows, attr_cnt, + vacattrstats); } /* @@ -1681,19 +1691,6 @@ ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) /* * Extra information used by the default analysis routines */ -typedef struct -{ - Oid eqopr; /* '=' operator for datatype, if any */ - Oid eqfunc; /* and associated function */ - Oid ltopr; /* '<' operator for datatype, if any */ -} StdAnalyzeData; - -typedef struct -{ - Datum value; /* a data value */ - int tupno; /* position index for tuple it came from */ -} ScalarItem; - typedef struct { int count; /* # of duplicates */ diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c index ab73fbf961..cb948f0204 100644 --- a/src/backend/commands/dropcmds.c +++ b/src/backend/commands/dropcmds.c @@ -286,6 +286,13 @@ does_not_exist_skipping(ObjectType objtype, Node *object) msg = gettext_noop("schema \"%s\" does not exist, skipping"); name = strVal((Value *) object); break; + case OBJECT_STATISTIC_EXT: + if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("extended statistics \"%s\" do not exist, skipping"); + name = NameListToString(castNode(List, object)); + } + break; case OBJECT_TSPARSER: if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) { diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index 346b347ae1..7366fc74be 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -112,6 +112,7 @@ static event_trigger_support_data event_trigger_support[] = { {"SCHEMA", true}, {"SEQUENCE", true}, {"SERVER", true}, + {"STATISTICS", true}, {"SUBSCRIPTION", true}, {"TABLE", true}, {"TABLESPACE", false}, @@ -1108,6 +1109,7 @@ EventTriggerSupportsObjectType(ObjectType obtype) case OBJECT_SCHEMA: case OBJECT_SEQUENCE: case OBJECT_SUBSCRIPTION: + case OBJECT_STATISTIC_EXT: case OBJECT_TABCONSTRAINT: case OBJECT_TABLE: case OBJECT_TRANSFORM: @@ -1173,6 +1175,7 @@ EventTriggerSupportsObjectClass(ObjectClass objclass) case OCLASS_PUBLICATION: case OCLASS_PUBLICATION_REL: case OCLASS_SUBSCRIPTION: + case OCLASS_STATISTIC_EXT: return true; } diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c new file mode 100644 index 0000000000..416309106a --- /dev/null +++ b/src/backend/commands/statscmds.c @@ -0,0 +1,296 @@ +/*------------------------------------------------------------------------- + * + * statscmds.c + * Commands for creating and altering extended statistics + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/statscmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/relscan.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_statistic_ext.h" +#include "commands/defrem.h" +#include "miscadmin.h" +#include "statistics/statistics.h" +#include "utils/builtins.h" +#include "utils/inval.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + + +/* used for sorting the attnums in CreateStatistics */ +static int +compare_int16(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(int16)); +} + +/* + * CREATE STATISTICS + */ +ObjectAddress +CreateStatistics(CreateStatsStmt *stmt) +{ + int i; + ListCell *l; + int16 attnums[STATS_MAX_DIMENSIONS]; + int numcols = 0; + ObjectAddress address = InvalidObjectAddress; + char *namestr; + NameData staname; + Oid statoid; + Oid namespaceId; + HeapTuple htup; + Datum values[Natts_pg_statistic_ext]; + bool nulls[Natts_pg_statistic_ext]; + int2vector *stakeys; + Relation statrel; + Relation rel; + Oid relid; + ObjectAddress parentobject, + childobject; + Datum types[1]; /* only ndistinct defined now */ + int ntypes; + ArrayType *staenabled; + bool build_ndistinct; + bool requested_type = false; + + Assert(IsA(stmt, CreateStatsStmt)); + + /* resolve the pieces of the name (namespace etc.) */ + namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr); + namestrcpy(&staname, namestr); + + /* + * If if_not_exists was given and the statistics already exists, bail out. + */ + if (SearchSysCacheExists2(STATEXTNAMENSP, + PointerGetDatum(&staname), + ObjectIdGetDatum(namespaceId))) + { + if (stmt->if_not_exists) + { + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("statistics \"%s\" already exist, skipping", + namestr))); + return InvalidObjectAddress; + } + + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("statistics \"%s\" already exist", namestr))); + } + + rel = heap_openrv(stmt->relation, AccessExclusiveLock); + relid = RelationGetRelid(rel); + + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_MATVIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" is not a table or materialized view", + RelationGetRelationName(rel)))); + + /* + * Transform column names to array of attnums. While at it, enforce some + * constraints. + */ + foreach(l, stmt->keys) + { + char *attname = strVal(lfirst(l)); + HeapTuple atttuple; + Form_pg_attribute attForm; + TypeCacheEntry *type; + + atttuple = SearchSysCacheAttName(relid, attname); + if (!HeapTupleIsValid(atttuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" referenced in statistics does not exist", + attname))); + attForm = (Form_pg_attribute) GETSTRUCT(atttuple); + + /* Disallow use of system attributes in extended stats */ + if (attForm->attnum < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("statistic creation on system columns is not supported"))); + + /* Disallow data types without a less-than operator */ + type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); + if (type->lt_opr == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only scalar types can be used in extended statistics"))); + + /* Make sure no more than STATS_MAX_DIMENSIONS columns are used */ + if (numcols >= STATS_MAX_DIMENSIONS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("cannot have more than %d keys in statistics", + STATS_MAX_DIMENSIONS))); + + attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum; + ReleaseSysCache(atttuple); + numcols++; + } + + /* + * Check that at least two columns were specified in the statement. The + * upper bound was already checked in the loop above. + */ + if (numcols < 2) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("statistics require at least 2 columns"))); + + /* + * Sort the attnums, which makes detecting duplicies somewhat easier, and + * it does not hurt (it does not affect the efficiency, unlike for + * indexes, for example). + */ + qsort(attnums, numcols, sizeof(int16), compare_int16); + + /* + * Look for duplicities in the list of columns. The attnums are sorted so + * just check consecutive elements. + */ + for (i = 1; i < numcols; i++) + if (attnums[i] == attnums[i - 1]) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("duplicate column name in statistics definition"))); + + stakeys = buildint2vector(attnums, numcols); + + /* + * Parse the statistics options. Currently only statistics types are + * recognized. + */ + build_ndistinct = false; + foreach(l, stmt->options) + { + DefElem *opt = (DefElem *) lfirst(l); + + if (strcmp(opt->defname, "ndistinct") == 0) + { + build_ndistinct = defGetBoolean(opt); + requested_type = true; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized STATISTICS option \"%s\"", + opt->defname))); + } + /* If no statistic type was specified, build them all. */ + if (!requested_type) + build_ndistinct = true; + + /* construct the char array of enabled statistic types */ + ntypes = 0; + if (build_ndistinct) + types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT); + Assert(ntypes > 0); + staenabled = construct_array(types, ntypes, CHAROID, 1, true, 'c'); + + /* + * Everything seems fine, so let's build the pg_statistic_ext tuple. + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + values[Anum_pg_statistic_ext_starelid - 1] = ObjectIdGetDatum(relid); + values[Anum_pg_statistic_ext_staname - 1] = NameGetDatum(&staname); + values[Anum_pg_statistic_ext_stanamespace - 1] = ObjectIdGetDatum(namespaceId); + values[Anum_pg_statistic_ext_staowner - 1] = ObjectIdGetDatum(GetUserId()); + values[Anum_pg_statistic_ext_stakeys - 1] = PointerGetDatum(stakeys); + values[Anum_pg_statistic_ext_staenabled - 1] = PointerGetDatum(staenabled); + + /* no statistics build yet */ + nulls[Anum_pg_statistic_ext_standistinct - 1] = true; + + /* insert it into pg_statistic_ext */ + statrel = heap_open(StatisticExtRelationId, RowExclusiveLock); + htup = heap_form_tuple(statrel->rd_att, values, nulls); + CatalogTupleInsert(statrel, htup); + statoid = HeapTupleGetOid(htup); + heap_freetuple(htup); + heap_close(statrel, RowExclusiveLock); + relation_close(rel, NoLock); + + /* + * Add a dependency on a table, so that stats get dropped on DROP TABLE. + */ + ObjectAddressSet(parentobject, RelationRelationId, relid); + ObjectAddressSet(childobject, StatisticExtRelationId, statoid); + recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO); + + /* + * Also add dependency on the schema. This is required to ensure that we + * drop the statistics on DROP SCHEMA. This is not handled automatically + * by DROP TABLE because the statistics are not an object in the table's + * schema. + */ + ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId); + recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO); + + ObjectAddressSet(address, StatisticExtRelationId, statoid); + + /* + * Invalidate relcache so that others see the new statistics. + */ + CacheInvalidateRelcache(rel); + + return address; +} + +/* + * Guts of statistics deletion. + */ +void +RemoveStatisticsById(Oid statsOid) +{ + Relation relation; + Oid relid; + Relation rel; + HeapTuple tup; + Form_pg_statistic_ext statext; + + /* + * Delete the pg_statistic_ext tuple. + */ + relation = heap_open(StatisticExtRelationId, RowExclusiveLock); + + tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid)); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for statistics %u", statsOid); + + statext = (Form_pg_statistic_ext) GETSTRUCT(tup); + relid = statext->starelid; + + rel = heap_open(relid, AccessExclusiveLock); + + simple_heap_delete(relation, &tup->t_self); + + CacheInvalidateRelcache(rel); + + ReleaseSysCache(tup); + + heap_close(relation, RowExclusiveLock); + heap_close(rel, NoLock); +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 93bda42715..c23d5c5285 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -3337,6 +3337,20 @@ _copyIndexStmt(const IndexStmt *from) return newnode; } +static CreateStatsStmt * +_copyCreateStatsStmt(const CreateStatsStmt *from) +{ + CreateStatsStmt *newnode = makeNode(CreateStatsStmt); + + COPY_NODE_FIELD(defnames); + COPY_NODE_FIELD(relation); + COPY_NODE_FIELD(keys); + COPY_NODE_FIELD(options); + COPY_SCALAR_FIELD(if_not_exists); + + return newnode; +} + static CreateFunctionStmt * _copyCreateFunctionStmt(const CreateFunctionStmt *from) { @@ -5050,6 +5064,9 @@ copyObject(const void *from) case T_IndexStmt: retval = _copyIndexStmt(from); break; + case T_CreateStatsStmt: + retval = _copyCreateStatsStmt(from); + break; case T_CreateFunctionStmt: retval = _copyCreateFunctionStmt(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 0d12636d92..5941b7a2bf 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1334,6 +1334,18 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b) return true; } +static bool +_equalCreateStatsStmt(const CreateStatsStmt *a, const CreateStatsStmt *b) +{ + COMPARE_NODE_FIELD(defnames); + COMPARE_NODE_FIELD(relation); + COMPARE_NODE_FIELD(keys); + COMPARE_NODE_FIELD(options); + COMPARE_SCALAR_FIELD(if_not_exists); + + return true; +} + static bool _equalCreateFunctionStmt(const CreateFunctionStmt *a, const CreateFunctionStmt *b) { @@ -3236,6 +3248,9 @@ equal(const void *a, const void *b) case T_IndexStmt: retval = _equalIndexStmt(a, b); break; + case T_CreateStatsStmt: + retval = _equalCreateStatsStmt(a, b); + break; case T_CreateFunctionStmt: retval = _equalCreateFunctionStmt(a, b); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 1b9005fa53..541af02935 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2202,6 +2202,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) WRITE_NODE_FIELD(lateral_vars); WRITE_BITMAPSET_FIELD(lateral_referencers); WRITE_NODE_FIELD(indexlist); + WRITE_NODE_FIELD(statlist); WRITE_UINT_FIELD(pages); WRITE_FLOAT_FIELD(tuples, "%.0f"); WRITE_FLOAT_FIELD(allvisfrac, "%.6f"); @@ -2274,6 +2275,18 @@ _outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node) appendStringInfo(str, " %d", list_length(node->rinfos[i])); } +static void +_outStatisticExtInfo(StringInfo str, const StatisticExtInfo *node) +{ + WRITE_NODE_TYPE("STATISTICEXTINFO"); + + /* NB: this isn't a complete set of fields */ + WRITE_OID_FIELD(statOid); + /* don't write rel, leads to infinite recursion in plan tree dump */ + WRITE_CHAR_FIELD(kind); + WRITE_BITMAPSET_FIELD(keys); +} + static void _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) { @@ -2577,6 +2590,18 @@ _outIndexStmt(StringInfo str, const IndexStmt *node) WRITE_BOOL_FIELD(if_not_exists); } +static void +_outCreateStatsStmt(StringInfo str, const CreateStatsStmt *node) +{ + WRITE_NODE_TYPE("CREATESTATSSTMT"); + + WRITE_NODE_FIELD(defnames); + WRITE_NODE_FIELD(relation); + WRITE_NODE_FIELD(keys); + WRITE_NODE_FIELD(options); + WRITE_BOOL_FIELD(if_not_exists); +} + static void _outNotifyStmt(StringInfo str, const NotifyStmt *node) { @@ -3936,6 +3961,9 @@ outNode(StringInfo str, const void *obj) case T_PlannerParamItem: _outPlannerParamItem(str, obj); break; + case T_StatisticExtInfo: + _outStatisticExtInfo(str, obj); + break; case T_ExtensibleNode: _outExtensibleNode(str, obj); @@ -3953,6 +3981,9 @@ outNode(StringInfo str, const void *obj) case T_IndexStmt: _outIndexStmt(str, obj); break; + case T_CreateStatsStmt: + _outCreateStatsStmt(str, obj); + break; case T_NotifyStmt: _outNotifyStmt(str, obj); break; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 463f806467..cc88dcc28e 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -29,6 +29,7 @@ #include "catalog/heap.h" #include "catalog/partition.h" #include "catalog/pg_am.h" +#include "catalog/pg_statistic_ext.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -40,8 +41,11 @@ #include "parser/parse_relation.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" +#include "statistics/statistics.h" #include "storage/bufmgr.h" +#include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -63,7 +67,7 @@ static List *get_relation_constraints(PlannerInfo *root, bool include_notnull); static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); - +static List *get_relation_statistics(RelOptInfo *rel, Relation relation); /* * get_relation_info - @@ -398,6 +402,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, rel->indexlist = indexinfos; + rel->statlist = get_relation_statistics(rel, relation); + /* Grab foreign-table info using the relcache, while we have it */ if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { @@ -1251,6 +1257,65 @@ get_relation_constraints(PlannerInfo *root, return result; } +/* + * get_relation_statistics + * Retrieve extended statistics defined on the table. + * + * Returns a List (possibly empty) of StatisticExtInfo objects describing + * the statistics. Note that this doesn't load the actual statistics data, + * just the identifying metadata. Only stats actually built are considered. + */ +static List * +get_relation_statistics(RelOptInfo *rel, Relation relation) +{ + List *statoidlist; + List *stainfos = NIL; + ListCell *l; + + statoidlist = RelationGetStatExtList(relation); + + foreach(l, statoidlist) + { + Oid statOid = lfirst_oid(l); + Form_pg_statistic_ext staForm; + HeapTuple htup; + Bitmapset *keys = NULL; + int i; + + htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid)); + if (!htup) + elog(ERROR, "cache lookup failed for statistics %u", statOid); + staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); + + /* + * First, build the array of columns covered. This is ultimately + * wasted if no stats are actually built, but it doesn't seem worth + * troubling over that case. + */ + for (i = 0; i < staForm->stakeys.dim1; i++) + keys = bms_add_member(keys, staForm->stakeys.values[i]); + + /* add one StatisticExtInfo for each kind built */ + if (statext_is_kind_built(htup, STATS_EXT_NDISTINCT)) + { + StatisticExtInfo *info = makeNode(StatisticExtInfo); + + info->statOid = statOid; + info->rel = rel; + info->kind = STATS_EXT_NDISTINCT; + info->keys = bms_copy(keys); + + stainfos = lcons(info, stainfos); + } + + ReleaseSysCache(htup); + bms_free(keys); + } + + list_free(statoidlist); + + return stainfos; +} /* * relation_excluded_by_constraints diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 82844a0399..bbcfc1fb4f 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -257,7 +257,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); ConstraintsSetStmt CopyStmt CreateAsStmt CreateCastStmt CreateDomainStmt CreateExtensionStmt CreateGroupStmt CreateOpClassStmt CreateOpFamilyStmt AlterOpFamilyStmt CreatePLangStmt - CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt + CreateSchemaStmt CreateSeqStmt CreateStmt CreateStatsStmt CreateTableSpaceStmt CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt CreateAssertStmt CreateTransformStmt CreateTrigStmt CreateEventTrigStmt CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatePolicyStmt @@ -874,6 +874,7 @@ stmt : | CreateSeqStmt | CreateStmt | CreateSubscriptionStmt + | CreateStatsStmt | CreateTableSpaceStmt | CreateTransformStmt | CreateTrigStmt @@ -3747,6 +3748,35 @@ OptConsTableSpace: USING INDEX TABLESPACE name { $$ = $4; } ExistingIndex: USING INDEX index_name { $$ = $3; } ; +/***************************************************************************** + * + * QUERY : + * CREATE STATISTICS stats_name WITH (options) ON (columns) FROM relname + * + *****************************************************************************/ + + +CreateStatsStmt: CREATE STATISTICS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name + { + CreateStatsStmt *n = makeNode(CreateStatsStmt); + n->defnames = $3; + n->relation = $10; + n->keys = $7; + n->options = $4; + n->if_not_exists = false; + $$ = (Node *)n; + } + | CREATE STATISTICS IF_P NOT EXISTS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name + { + CreateStatsStmt *n = makeNode(CreateStatsStmt); + n->defnames = $6; + n->relation = $13; + n->keys = $10; + n->options = $7; + n->if_not_exists = true; + $$ = (Node *)n; + } + ; /***************************************************************************** * @@ -6042,6 +6072,7 @@ drop_type_any_name: | FOREIGN TABLE { $$ = OBJECT_FOREIGN_TABLE; } | COLLATION { $$ = OBJECT_COLLATION; } | CONVERSION_P { $$ = OBJECT_CONVERSION; } + | STATISTICS { $$ = OBJECT_STATISTIC_EXT; } | TEXT_P SEARCH PARSER { $$ = OBJECT_TSPARSER; } | TEXT_P SEARCH DICTIONARY { $$ = OBJECT_TSDICTIONARY; } | TEXT_P SEARCH TEMPLATE { $$ = OBJECT_TSTEMPLATE; } @@ -6119,7 +6150,7 @@ opt_restart_seqs: * EXTENSION | EVENT TRIGGER | FOREIGN DATA WRAPPER | * FOREIGN TABLE | INDEX | [PROCEDURAL] LANGUAGE | * MATERIALIZED VIEW | POLICY | ROLE | SCHEMA | SEQUENCE | - * SERVER | TABLE | TABLESPACE | + * SERVER | STATISTICS | TABLE | TABLESPACE | * TEXT SEARCH CONFIGURATION | TEXT SEARCH DICTIONARY | * TEXT SEARCH PARSER | TEXT SEARCH TEMPLATE | TYPE | * VIEW] | @@ -6288,6 +6319,7 @@ comment_type_any_name: COLUMN { $$ = OBJECT_COLUMN; } | INDEX { $$ = OBJECT_INDEX; } | SEQUENCE { $$ = OBJECT_SEQUENCE; } + | STATISTICS { $$ = OBJECT_STATISTIC_EXT; } | TABLE { $$ = OBJECT_TABLE; } | VIEW { $$ = OBJECT_VIEW; } | MATERIALIZED VIEW { $$ = OBJECT_MATVIEW; } @@ -8428,6 +8460,15 @@ RenameStmt: ALTER AGGREGATE aggregate_with_argtypes RENAME TO name n->missing_ok = false; $$ = (Node *)n; } + | ALTER STATISTICS any_name RENAME TO name + { + RenameStmt *n = makeNode(RenameStmt); + n->renameType = OBJECT_STATISTIC_EXT; + n->object = (Node *) $3; + n->newname = $6; + n->missing_ok = false; + $$ = (Node *)n; + } | ALTER TEXT_P SEARCH PARSER any_name RENAME TO name { RenameStmt *n = makeNode(RenameStmt); @@ -8643,6 +8684,15 @@ AlterObjectSchemaStmt: n->missing_ok = true; $$ = (Node *)n; } + | ALTER STATISTICS any_name SET SCHEMA name + { + AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt); + n->objectType = OBJECT_STATISTIC_EXT; + n->object = (Node *) $3; + n->newschema = $6; + n->missing_ok = false; + $$ = (Node *)n; + } | ALTER TEXT_P SEARCH PARSER any_name SET SCHEMA name { AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt); @@ -8906,6 +8956,14 @@ AlterOwnerStmt: ALTER AGGREGATE aggregate_with_argtypes OWNER TO RoleSpec n->newowner = $6; $$ = (Node *)n; } + | ALTER STATISTICS any_name OWNER TO RoleSpec + { + AlterOwnerStmt *n = makeNode(AlterOwnerStmt); + n->objectType = OBJECT_STATISTIC_EXT; + n->object = (Node *) $3; + n->newowner = $6; + $$ = (Node *)n; + } | ALTER TEXT_P SEARCH DICTIONARY any_name OWNER TO RoleSpec { AlterOwnerStmt *n = makeNode(AlterOwnerStmt); diff --git a/src/backend/statistics/Makefile b/src/backend/statistics/Makefile new file mode 100644 index 0000000000..b3615bd69a --- /dev/null +++ b/src/backend/statistics/Makefile @@ -0,0 +1,17 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for statistics +# +# IDENTIFICATION +# src/backend/statistics/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/statistics +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = extended_stats.o mvdistinct.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/statistics/README b/src/backend/statistics/README new file mode 100644 index 0000000000..beb7c2485f --- /dev/null +++ b/src/backend/statistics/README @@ -0,0 +1,34 @@ +Extended statistics +=================== + +When estimating various quantities (e.g. condition selectivities) the default +approach relies on the assumption of independence. In practice that's often +not true, resulting in estimation errors. + +Extended statistics track different types of dependencies between the columns, +hopefully improving the estimates and producing better plans. + +Currently we only have one type of extended statistics - ndistinct +coefficients, and we use it to improve estimates of grouping queries. See +README.ndistinct for details. + + +Size of sample in ANALYZE +------------------------- +When performing ANALYZE, the number of rows to sample is determined as + + (300 * statistics_target) + +That works reasonably well for statistics on individual columns, but perhaps +it's not enough for extended statistics. Papers analyzing estimation errors +all use samples proportional to the table (usually finding that 1-3% of the +table is enough to build accurate stats). + +The requested accuracy (number of MCV items or histogram bins) should also +be considered when determining the sample size, and in extended statistics +those are not necessarily limited by statistics_target. + +This however merits further discussion, because collecting the sample is quite +expensive and increasing it further would make ANALYZE even more painful. +Judging by the experiments with the current implementation, the fixed size +seems to work reasonably well for now, so we leave this as a future work. diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c new file mode 100644 index 0000000000..d2b9f6a7c7 --- /dev/null +++ b/src/backend/statistics/extended_stats.c @@ -0,0 +1,389 @@ +/*------------------------------------------------------------------------- + * + * extended_stats.c + * POSTGRES extended statistics + * + * Generic code supporting statistic objects created via CREATE STATISTICS. + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/statistics/extended_stats.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "catalog/indexing.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_statistic_ext.h" +#include "nodes/relation.h" +#include "statistics/extended_stats_internal.h" +#include "statistics/statistics.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +/* + * Used internally to refer to an individual pg_statistic_ext entry. + */ +typedef struct StatExtEntry +{ + Oid statOid; /* OID of pg_statistic_ext entry */ + Bitmapset *columns; /* attribute numbers covered by the statistics */ + List *types; /* 'char' list of enabled statistic kinds */ +} StatExtEntry; + + +static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid); +static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, + int natts, VacAttrStats **vacattrstats); +static void statext_store(Relation pg_stext, Oid relid, + MVNDistinct *ndistinct, + VacAttrStats **stats); + + +/* + * Compute requested extended stats, using the rows sampled for the plain + * (single-column) stats. + * + * This fetches a list of stats from pg_statistic_ext, computes the stats + * and serializes them back into the catalog (as bytea values). + */ +void +BuildRelationExtStatistics(Relation onerel, double totalrows, + int numrows, HeapTuple *rows, + int natts, VacAttrStats **vacattrstats) +{ + Relation pg_stext; + ListCell *lc; + List *stats; + + pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock); + stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel)); + + foreach(lc, stats) + { + StatExtEntry *stat = (StatExtEntry *) lfirst(lc); + MVNDistinct *ndistinct = NULL; + VacAttrStats **stats; + ListCell *lc2; + + /* filter only the interesting vacattrstats records */ + stats = lookup_var_attr_stats(onerel, stat->columns, + natts, vacattrstats); + + /* check allowed number of dimensions */ + Assert(bms_num_members(stat->columns) >= 2 && + bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS); + + /* compute statistic of each type */ + foreach(lc2, stat->types) + { + char t = (char) lfirst_int(lc2); + + if (t == STATS_EXT_NDISTINCT) + ndistinct = statext_ndistinct_build(totalrows, numrows, rows, + stat->columns, stats); + } + + /* store the statistics in the catalog */ + statext_store(pg_stext, stat->statOid, ndistinct, stats); + } + + heap_close(pg_stext, RowExclusiveLock); +} + +/* + * statext_is_kind_built + * Is this stat kind built in the given pg_statistic_ext tuple? + */ +bool +statext_is_kind_built(HeapTuple htup, char type) +{ + AttrNumber attnum; + + switch (type) + { + case STATS_EXT_NDISTINCT: + attnum = Anum_pg_statistic_ext_standistinct; + break; + + default: + elog(ERROR, "unexpected statistics type requested: %d", type); + } + + return !heap_attisnull(htup, attnum); +} + +/* + * Return a list (of StatExtEntry) of statistics for the given relation. + */ +static List * +fetch_statentries_for_relation(Relation pg_statext, Oid relid) +{ + SysScanDesc scan; + ScanKeyData skey; + HeapTuple htup; + List *result = NIL; + + /* + * Prepare to scan pg_statistic_ext for entries having indrelid = this + * rel. + */ + ScanKeyInit(&skey, + Anum_pg_statistic_ext_starelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(scan))) + { + StatExtEntry *entry; + Datum datum; + bool isnull; + int i; + ArrayType *arr; + char *enabled; + Form_pg_statistic_ext staForm; + + entry = palloc0(sizeof(StatExtEntry)); + entry->statOid = HeapTupleGetOid(htup); + staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); + for (i = 0; i < staForm->stakeys.dim1; i++) + { + entry->columns = bms_add_member(entry->columns, + staForm->stakeys.values[i]); + } + + /* decode the staenabled char array into a list of chars */ + datum = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_staenabled, &isnull); + Assert(!isnull); + arr = DatumGetArrayTypeP(datum); + if (ARR_NDIM(arr) != 1 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != CHAROID) + elog(ERROR, "staenabled is not a 1-D char array"); + enabled = (char *) ARR_DATA_PTR(arr); + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + Assert(enabled[i] == STATS_EXT_NDISTINCT); + entry->types = lappend_int(entry->types, (int) enabled[i]); + } + + result = lappend(result, entry); + } + + systable_endscan(scan); + + return result; +} + +/* + * Using 'vacattrstats' of size 'natts' as input data, return a newly built + * VacAttrStats array which includes only the items corresponding to attributes + * indicated by 'attrs'. + */ +static VacAttrStats ** +lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts, + VacAttrStats **vacattrstats) +{ + int i = 0; + int x = -1; + VacAttrStats **stats; + Bitmapset *matched = NULL; + + stats = (VacAttrStats **) + palloc(bms_num_members(attrs) * sizeof(VacAttrStats *)); + + /* lookup VacAttrStats info for the requested columns (same attnum) */ + while ((x = bms_next_member(attrs, x)) >= 0) + { + int j; + + stats[i] = NULL; + for (j = 0; j < natts; j++) + { + if (x == vacattrstats[j]->tupattnum) + { + stats[i] = vacattrstats[j]; + break; + } + } + + if (!stats[i]) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("extended statistics could not be collected for column \"%s\" of relation %s.%s", + NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname), + get_namespace_name(rel->rd_rel->relnamespace), + RelationGetRelationName(rel)), + errhint("Consider ALTER TABLE \"%s\".\"%s\" ALTER \"%s\" SET STATISTICS -1", + get_namespace_name(rel->rd_rel->relnamespace), + RelationGetRelationName(rel), + NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname)))); + + /* + * Check that we found a non-dropped column and that the attnum + * matches. + */ + Assert(!stats[i]->attr->attisdropped); + matched = bms_add_member(matched, stats[i]->tupattnum); + + i++; + } + if (bms_subset_compare(matched, attrs) != BMS_EQUAL) + elog(ERROR, "could not find all attributes in attribute stats array"); + bms_free(matched); + + return stats; +} + +/* + * statext_store + * Serializes the statistics and stores them into the pg_statistic_ext tuple. + */ +static void +statext_store(Relation pg_stext, Oid statOid, + MVNDistinct *ndistinct, + VacAttrStats **stats) +{ + HeapTuple stup, + oldtup; + Datum values[Natts_pg_statistic_ext]; + bool nulls[Natts_pg_statistic_ext]; + bool replaces[Natts_pg_statistic_ext]; + + memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool)); + memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool)); + memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum)); + + /* + * Construct a new pg_statistic_ext tuple, replacing the calculated stats. + */ + if (ndistinct != NULL) + { + bytea *data = statext_ndistinct_serialize(ndistinct); + + nulls[Anum_pg_statistic_ext_standistinct - 1] = (data == NULL); + values[Anum_pg_statistic_ext_standistinct - 1] = PointerGetDatum(data); + } + + /* always replace the value (either by bytea or NULL) */ + replaces[Anum_pg_statistic_ext_standistinct - 1] = true; + + /* there should already be a pg_statistic_ext tuple */ + oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid)); + if (!HeapTupleIsValid(oldtup)) + elog(ERROR, "cache lookup failed for extended statistics %u", statOid); + + /* replace it */ + stup = heap_modify_tuple(oldtup, + RelationGetDescr(pg_stext), + values, + nulls, + replaces); + ReleaseSysCache(oldtup); + CatalogTupleUpdate(pg_stext, &stup->t_self, stup); + + heap_freetuple(stup); +} + +/* initialize multi-dimensional sort */ +MultiSortSupport +multi_sort_init(int ndims) +{ + MultiSortSupport mss; + + Assert(ndims >= 2); + + mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup) + +sizeof(SortSupportData) * ndims); + + mss->ndims = ndims; + + return mss; +} + +/* + * Prepare sort support info using the given sort operator + * at the position 'sortdim' + */ +void +multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper) +{ + SortSupport ssup = &mss->ssup[sortdim]; + + ssup->ssup_cxt = CurrentMemoryContext; + ssup->ssup_collation = DEFAULT_COLLATION_OID; + ssup->ssup_nulls_first = false; + ssup->ssup_cxt = CurrentMemoryContext; + + PrepareSortSupportFromOrderingOp(oper, ssup); +} + +/* compare all the dimensions in the selected order */ +int +multi_sort_compare(const void *a, const void *b, void *arg) +{ + MultiSortSupport mss = (MultiSortSupport) arg; + SortItem *ia = (SortItem *) a; + SortItem *ib = (SortItem *) b; + int i; + + for (i = 0; i < mss->ndims; i++) + { + int compare; + + compare = ApplySortComparator(ia->values[i], ia->isnull[i], + ib->values[i], ib->isnull[i], + &mss->ssup[i]); + + if (compare != 0) + return compare; + } + + /* equal by default */ + return 0; +} + +/* compare selected dimension */ +int +multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, + MultiSortSupport mss) +{ + return ApplySortComparator(a->values[dim], a->isnull[dim], + b->values[dim], b->isnull[dim], + &mss->ssup[dim]); +} + +int +multi_sort_compare_dims(int start, int end, + const SortItem *a, const SortItem *b, + MultiSortSupport mss) +{ + int dim; + + for (dim = start; dim <= end; dim++) + { + int r = ApplySortComparator(a->values[dim], a->isnull[dim], + b->values[dim], b->isnull[dim], + &mss->ssup[dim]); + + if (r != 0) + return r; + } + + return 0; +} diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c new file mode 100644 index 0000000000..5df4e29eec --- /dev/null +++ b/src/backend/statistics/mvdistinct.c @@ -0,0 +1,671 @@ +/*------------------------------------------------------------------------- + * + * mvdistinct.c + * POSTGRES multivariate ndistinct coefficients + * + * Estimating number of groups in a combination of columns (e.g. for GROUP BY) + * is tricky, and the estimation error is often significant. + + * The multivariate ndistinct coefficients address this by storing ndistinct + * estimates for combinations of the user-specified columns. So for example + * given a statistics object on three columns (a,b,c), this module estimates + * and store n-distinct for (a,b), (a,c), (b,c) and (a,b,c). The per-column + * estimates are already available in pg_statistic. + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/statistics/mvdistinct.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/htup_details.h" +#include "catalog/pg_statistic_ext.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "lib/stringinfo.h" +#include "utils/syscache.h" +#include "utils/typcache.h" +#include "statistics/extended_stats_internal.h" +#include "statistics/statistics.h" + + +static double ndistinct_for_combination(double totalrows, int numrows, + HeapTuple *rows, VacAttrStats **stats, + int k, int *combination); +static double estimate_ndistinct(double totalrows, int numrows, int d, int f1); +static int n_choose_k(int n, int k); +static int num_combinations(int n); + +/* Combination generator API */ + +/* internal state for generator of k-combinations of n elements */ +typedef struct CombinationGenerator +{ + int k; /* size of the combination */ + int n; /* total number of elements */ + int current; /* index of the next combination to return */ + int ncombinations; /* number of combinations (size of array) */ + int *combinations; /* array of pre-built combinations */ +} CombinationGenerator; + +static CombinationGenerator *generator_init(int n, int k); +static void generator_free(CombinationGenerator *state); +static int *generator_next(CombinationGenerator *state); +static void generate_combinations(CombinationGenerator *state); + + +/* + * statext_ndistinct_build + * Compute ndistinct coefficient for the combination of attributes. + * + * This computes the ndistinct estimate using the same estimator used + * in analyze.c and then computes the coefficient. + */ +MVNDistinct * +statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows, + Bitmapset *attrs, VacAttrStats **stats) +{ + MVNDistinct *result; + int k; + int itemcnt; + int numattrs = bms_num_members(attrs); + int numcombs = num_combinations(numattrs); + + result = palloc(offsetof(MVNDistinct, items) + + numcombs * sizeof(MVNDistinctItem)); + result->magic = STATS_NDISTINCT_MAGIC; + result->type = STATS_NDISTINCT_TYPE_BASIC; + result->nitems = numcombs; + + itemcnt = 0; + for (k = 2; k <= numattrs; k++) + { + int *combination; + CombinationGenerator *generator; + + /* generate combinations of K out of N elements */ + generator = generator_init(numattrs, k); + + while ((combination = generator_next(generator))) + { + MVNDistinctItem *item = &result->items[itemcnt]; + int j; + + item->attrs = NULL; + for (j = 0; j < k; j++) + item->attrs = bms_add_member(item->attrs, + stats[combination[j]]->attr->attnum); + item->ndistinct = + ndistinct_for_combination(totalrows, numrows, rows, + stats, k, combination); + + itemcnt++; + Assert(itemcnt <= result->nitems); + } + + generator_free(generator); + } + + /* must consume exactly the whole output array */ + Assert(itemcnt == result->nitems); + + return result; +} + +/* + * statext_ndistinct_load + * Load the ndistinct value for the indicated pg_statistic_ext tuple + */ +MVNDistinct * +statext_ndistinct_load(Oid mvoid) +{ + bool isnull = false; + Datum ndist; + HeapTuple htup; + + htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid)); + if (!htup) + elog(ERROR, "cache lookup failed for statistics %u", mvoid); + + ndist = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_standistinct, &isnull); + if (isnull) + elog(ERROR, + "requested statistic kind %c not yet built for statistics %u", + STATS_EXT_NDISTINCT, mvoid); + + ReleaseSysCache(htup); + + return statext_ndistinct_deserialize(DatumGetByteaP(ndist)); +} + +/* + * statext_ndistinct_serialize + * serialize ndistinct to the on-disk bytea format + */ +bytea * +statext_ndistinct_serialize(MVNDistinct *ndistinct) +{ + int i; + bytea *output; + char *tmp; + Size len; + + Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC); + Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC); + + /* + * Base size is base struct size, plus one base struct for each items, + * including number of items for each. + */ + len = VARHDRSZ + offsetof(MVNDistinct, items) + + ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + sizeof(int)); + + /* and also include space for the actual attribute numbers */ + for (i = 0; i < ndistinct->nitems; i++) + { + int nmembers; + + nmembers = bms_num_members(ndistinct->items[i].attrs); + Assert(nmembers >= 2); + len += sizeof(AttrNumber) * nmembers; + } + + output = (bytea *) palloc(len); + SET_VARSIZE(output, len); + + tmp = VARDATA(output); + + /* Store the base struct values */ + memcpy(tmp, ndistinct, offsetof(MVNDistinct, items)); + tmp += offsetof(MVNDistinct, items); + + /* + * store number of attributes and attribute numbers for each ndistinct + * entry + */ + for (i = 0; i < ndistinct->nitems; i++) + { + MVNDistinctItem item = ndistinct->items[i]; + int nmembers = bms_num_members(item.attrs); + int x; + + memcpy(tmp, &item.ndistinct, sizeof(double)); + tmp += sizeof(double); + memcpy(tmp, &nmembers, sizeof(int)); + tmp += sizeof(int); + + x = -1; + while ((x = bms_next_member(item.attrs, x)) >= 0) + { + AttrNumber value = (AttrNumber) x; + + memcpy(tmp, &value, sizeof(AttrNumber)); + tmp += sizeof(AttrNumber); + } + + Assert(tmp <= ((char *) output + len)); + } + + return output; +} + +/* + * statext_ndistinct_deserialize + * Read an on-disk bytea format MVNDistinct to in-memory format + */ +MVNDistinct * +statext_ndistinct_deserialize(bytea *data) +{ + int i; + Size expected_size; + MVNDistinct *ndistinct; + char *tmp; + + if (data == NULL) + return NULL; + + if (VARSIZE_ANY_EXHDR(data) < offsetof(MVNDistinct, items)) + elog(ERROR, "invalid MVNDistinct size %ld (expected at least %ld)", + VARSIZE_ANY_EXHDR(data), offsetof(MVNDistinct, items)); + + /* read the MVNDistinct header */ + ndistinct = (MVNDistinct *) palloc(sizeof(MVNDistinct)); + + /* initialize pointer to the data part (skip the varlena header) */ + tmp = VARDATA_ANY(data); + + /* get the header and perform basic sanity checks */ + memcpy(ndistinct, tmp, offsetof(MVNDistinct, items)); + tmp += offsetof(MVNDistinct, items); + + if (ndistinct->magic != STATS_NDISTINCT_MAGIC) + elog(ERROR, "invalid ndistinct magic %d (expected %d)", + ndistinct->magic, STATS_NDISTINCT_MAGIC); + + if (ndistinct->type != STATS_NDISTINCT_TYPE_BASIC) + elog(ERROR, "invalid ndistinct type %d (expected %d)", + ndistinct->type, STATS_NDISTINCT_TYPE_BASIC); + + Assert(ndistinct->nitems > 0); + + /* what minimum bytea size do we expect for those parameters */ + expected_size = offsetof(MVNDistinct, items) + + ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + + sizeof(AttrNumber) * 2); + + if (VARSIZE_ANY_EXHDR(data) < expected_size) + elog(ERROR, "invalid dependencies size %ld (expected at least %ld)", + VARSIZE_ANY_EXHDR(data), expected_size); + + /* allocate space for the ndistinct items */ + ndistinct = repalloc(ndistinct, offsetof(MVNDistinct, items) + + (ndistinct->nitems * sizeof(MVNDistinctItem))); + + for (i = 0; i < ndistinct->nitems; i++) + { + MVNDistinctItem *item = &ndistinct->items[i]; + int nelems; + + item->attrs = NULL; + + /* ndistinct value */ + memcpy(&item->ndistinct, tmp, sizeof(double)); + tmp += sizeof(double); + + /* number of attributes */ + memcpy(&nelems, tmp, sizeof(int)); + tmp += sizeof(int); + Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS)); + + while (nelems-- > 0) + { + AttrNumber attno; + + memcpy(&attno, tmp, sizeof(AttrNumber)); + tmp += sizeof(AttrNumber); + item->attrs = bms_add_member(item->attrs, attno); + } + + /* still within the bytea */ + Assert(tmp <= ((char *) data + VARSIZE_ANY(data))); + } + + /* we should have consumed the whole bytea exactly */ + Assert(tmp == ((char *) data + VARSIZE_ANY(data))); + + return ndistinct; +} + +/* + * pg_ndistinct_in + * input routine for type pg_ndistinct + * + * pg_ndistinct is real enough to be a table column, but it has no + * operations of its own, and disallows input (jus like pg_node_tree). + */ +Datum +pg_ndistinct_in(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_ndistinct + * output routine for type pg_ndistinct + * + * Produces a human-readable representation of the value. + */ +Datum +pg_ndistinct_out(PG_FUNCTION_ARGS) +{ + bytea *data = PG_GETARG_BYTEA_PP(0); + MVNDistinct *ndist = statext_ndistinct_deserialize(data); + int i; + StringInfoData str; + + initStringInfo(&str); + appendStringInfoChar(&str, '['); + + for (i = 0; i < ndist->nitems; i++) + { + MVNDistinctItem item = ndist->items[i]; + + if (i > 0) + appendStringInfoString(&str, ", "); + + appendStringInfoChar(&str, '{'); + outBitmapset(&str, item.attrs); + appendStringInfo(&str, ", %f}", item.ndistinct); + } + + appendStringInfoChar(&str, ']'); + + PG_RETURN_CSTRING(str.data); +} + +/* + * pg_ndistinct_recv + * binary input routine for type pg_ndistinct + */ +Datum +pg_ndistinct_recv(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_ndistinct_send + * binary output routine for type pg_ndistinct + * + * n-distinct is serialized into a bytea value, so let's send that. + */ +Datum +pg_ndistinct_send(PG_FUNCTION_ARGS) +{ + return byteasend(fcinfo); +} + +/* + * ndistinct_for_combination + * Estimates number of distinct values in a combination of columns. + * + * This uses the same ndistinct estimator as compute_scalar_stats() in + * ANALYZE, i.e., + * n*d / (n - f1 + f1*n/N) + * + * except that instead of values in a single column we are dealing with + * combination of multiple columns. + */ +static double +ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, + VacAttrStats **stats, int k, int *combination) +{ + int i, + j; + int f1, + cnt, + d; + bool *isnull; + Datum *values; + SortItem *items; + MultiSortSupport mss; + + mss = multi_sort_init(k); + + /* + * In order to determine the number of distinct elements, create separate + * values[]/isnull[] arrays with all the data we have, then sort them + * using the specified column combination as dimensions. We could try to + * sort in place, but it'd probably be more complex and bug-prone. + */ + items = (SortItem *) palloc(numrows * sizeof(SortItem)); + values = (Datum *) palloc0(sizeof(Datum) * numrows * k); + isnull = (bool *) palloc0(sizeof(bool) * numrows * k); + + for (i = 0; i < numrows; i++) + { + items[i].values = &values[i * k]; + items[i].isnull = &isnull[i * k]; + } + + /* + * For each dimension, set up sort-support and fill in the values from + * the sample data. + */ + for (i = 0; i < k; i++) + { + VacAttrStats *colstat = stats[combination[i]]; + TypeCacheEntry *type; + + type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR); + if (type->lt_opr == InvalidOid) /* shouldn't happen */ + elog(ERROR, "cache lookup failed for ordering operator for type %u", + colstat->attrtypid); + + /* prepare the sort function for this dimension */ + multi_sort_add_dimension(mss, i, type->lt_opr); + + /* accumulate all the data for this dimension into the arrays */ + for (j = 0; j < numrows; j++) + { + items[j].values[i] = + heap_getattr(rows[j], + colstat->attr->attnum, + colstat->tupDesc, + &items[j].isnull[i]); + } + } + + /* We can sort the array now ... */ + qsort_arg((void *) items, numrows, sizeof(SortItem), + multi_sort_compare, mss); + + /* ... and count the number of distinct combinations */ + + f1 = 0; + cnt = 1; + d = 1; + for (i = 1; i < numrows; i++) + { + if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) + { + if (cnt == 1) + f1 += 1; + + d++; + cnt = 0; + } + + cnt += 1; + } + + if (cnt == 1) + f1 += 1; + + return estimate_ndistinct(totalrows, numrows, d, f1); +} + +/* The Duj1 estimator (already used in analyze.c). */ +static double +estimate_ndistinct(double totalrows, int numrows, int d, int f1) +{ + double numer, + denom, + ndistinct; + + numer = (double) numrows * (double) d; + + denom = (double) (numrows - f1) + + (double) f1 *(double) numrows / totalrows; + + ndistinct = numer / denom; + + /* Clamp to sane range in case of roundoff error */ + if (ndistinct < (double) d) + ndistinct = (double) d; + + if (ndistinct > totalrows) + ndistinct = totalrows; + + return floor(ndistinct + 0.5); +} + +/* + * n_choose_k + * computes binomial coefficients using an algorithm that is both + * efficient and prevents overflows + */ +static int +n_choose_k(int n, int k) +{ + int d, + r; + + Assert((k > 0) && (n >= k)); + + /* use symmetry of the binomial coefficients */ + k = Min(k, n - k); + + r = 1; + for (d = 1; d <= k; ++d) + { + r *= n--; + r /= d; + } + + return r; +} + +/* + * num_combinations + * number of combinations, excluding single-value combinations + */ +static int +num_combinations(int n) +{ + int k; + int ncombs = 1; + + for (k = 1; k <= n; k++) + ncombs *= 2; + + ncombs -= (n + 1); + + return ncombs; +} + +/* + * generator_init + * initialize the generator of combinations + * + * The generator produces combinations of K elements in the interval (0..N). + * We prebuild all the combinations in this method, which is simpler than + * generating them on the fly. + */ +static CombinationGenerator * +generator_init(int n, int k) +{ + CombinationGenerator *state; + + Assert((n >= k) && (k > 0)); + + /* allocate the generator state as a single chunk of memory */ + state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator)); + + state->ncombinations = n_choose_k(n, k); + + /* pre-allocate space for all combinations*/ + state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations); + + state->current = 0; + state->k = k; + state->n = n; + + /* now actually pre-generate all the combinations of K elements */ + generate_combinations(state); + + /* make sure we got the expected number of combinations */ + Assert(state->current == state->ncombinations); + + /* reset the number, so we start with the first one */ + state->current = 0; + + return state; +} + +/* + * generator_next + * returns the next combination from the prebuilt list + * + * Returns a combination of K array indexes (0 .. N), as specified to + * generator_init), or NULL when there are no more combination. + */ +static int * +generator_next(CombinationGenerator *state) +{ + if (state->current == state->ncombinations) + return NULL; + + return &state->combinations[state->k * state->current++]; +} + +/* + * generator_free + * free the internal state of the generator + * + * Releases the generator internal state (pre-built combinations). + */ +static void +generator_free(CombinationGenerator *state) +{ + pfree(state->combinations); + pfree(state); +} + +/* + * generate_combinations_recurse + * given a prefix, generate all possible combinations + * + * Given a prefix (first few elements of the combination), generate following + * elements recursively. We generate the combinations in lexicographic order, + * which eliminates permutations of the same combination. + */ +static void +generate_combinations_recurse(CombinationGenerator *state, + int index, int start, int *current) +{ + /* If we haven't filled all the elements, simply recurse. */ + if (index < state->k) + { + int i; + + /* + * The values have to be in ascending order, so make sure we start + * with the value passed by parameter. + */ + + for (i = start; i < state->n; i++) + { + current[index] = i; + generate_combinations_recurse(state, (index + 1), (i + 1), current); + } + + return; + } + else + { + /* we got a valid combination, add it to the array */ + memcpy(&state->combinations[(state->k * state->current)], + current, state->k * sizeof(int)); + state->current++; + } +} + +/* + * generate_combinations + * generate all k-combinations of N elements + */ +static void +generate_combinations(CombinationGenerator *state) +{ + int *current = (int *) palloc0(sizeof(int) * state->k); + + generate_combinations_recurse(state, 0, 0, current); + + pfree(current); +} diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index c8d20fffea..b59821bf97 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1623,6 +1623,10 @@ ProcessUtilitySlow(ParseState *pstate, commandCollected = true; break; + case T_CreateStatsStmt: + address = CreateStatistics((CreateStatsStmt *) parsetree); + break; + case T_AlterCollationStmt: address = AlterCollation((AlterCollationStmt *) parsetree); break; @@ -1992,6 +1996,8 @@ AlterObjectTypeCommandTag(ObjectType objtype) break; case OBJECT_SUBSCRIPTION: tag = "ALTER SUBSCRIPTION"; + case OBJECT_STATISTIC_EXT: + tag = "ALTER STATISTICS"; break; default: tag = "???"; @@ -2286,6 +2292,8 @@ CreateCommandTag(Node *parsetree) break; case OBJECT_PUBLICATION: tag = "DROP PUBLICATION"; + case OBJECT_STATISTIC_EXT: + tag = "DROP STATISTICS"; break; default: tag = "???"; @@ -2689,6 +2697,10 @@ CreateCommandTag(Node *parsetree) tag = "EXECUTE"; break; + case T_CreateStatsStmt: + tag = "CREATE STATISTICS"; + break; + case T_DeallocateStmt: { DeallocateStmt *stmt = (DeallocateStmt *) parsetree; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 5c823250bc..81c91039e4 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -35,6 +35,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" #include "commands/defrem.h" @@ -317,6 +318,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno, const Oid *excludeOps, bool attrsOnly, bool showTblSpc, int prettyFlags, bool missing_ok); +static char *pg_get_statisticsext_worker(Oid statextid, bool missing_ok); static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags, bool attrsOnly); static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand, @@ -1421,6 +1423,85 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, return buf.data; } +/* + * pg_get_statisticsextdef + * Get the definition of an extended statistics object + */ +Datum +pg_get_statisticsextdef(PG_FUNCTION_ARGS) +{ + Oid statextid = PG_GETARG_OID(0); + char *res; + + res = pg_get_statisticsext_worker(statextid, true); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +/* + * Internal workhorse to decompile an extended statistics object. + */ +static char * +pg_get_statisticsext_worker(Oid statextid, bool missing_ok) +{ + Form_pg_statistic_ext statextrec; + Form_pg_class pgclassrec; + HeapTuple statexttup; + HeapTuple pgclasstup; + StringInfoData buf; + int colno; + + statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid)); + + if (!HeapTupleIsValid(statexttup)) + { + if (missing_ok) + return NULL; + elog(ERROR, "cache lookup failed for extended statistics %u", statextid); + } + + statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup); + + pgclasstup = SearchSysCache1(RELOID, ObjectIdGetDatum(statextrec->starelid)); + + if (!HeapTupleIsValid(statexttup)) + { + ReleaseSysCache(statexttup); + elog(ERROR, "cache lookup failed for relation %u", statextrec->starelid); + } + + pgclassrec = (Form_pg_class) GETSTRUCT(pgclasstup); + + initStringInfo(&buf); + + appendStringInfo(&buf, "CREATE STATISTICS %s ON (", + quote_identifier(NameStr(statextrec->staname))); + + for (colno = 0; colno < statextrec->stakeys.dim1; colno++) + { + AttrNumber attnum = statextrec->stakeys.values[colno]; + char *attname; + + if (colno > 0) + appendStringInfoString(&buf, ", "); + + attname = get_relid_attribute_name(statextrec->starelid, attnum); + + appendStringInfoString(&buf, quote_identifier(attname)); + } + + appendStringInfo(&buf, ") FROM %s", + quote_identifier(NameStr(pgclassrec->relname))); + + ReleaseSysCache(statexttup); + ReleaseSysCache(pgclasstup); + + return buf.data; +} + /* * pg_get_partkeydef * diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index f8b28fe0e6..cc24c8aeb5 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -110,6 +110,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_type.h" #include "executor/executor.h" #include "mb/pg_wchar.h" @@ -126,6 +127,7 @@ #include "parser/parse_clause.h" #include "parser/parse_coerce.h" #include "parser/parsetree.h" +#include "statistics/statistics.h" #include "utils/builtins.h" #include "utils/bytea.h" #include "utils/date.h" @@ -164,6 +166,8 @@ static double eqjoinsel_inner(Oid operator, static double eqjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2, RelOptInfo *inner_rel); +static bool estimate_multivariate_ndistinct(PlannerInfo *root, + RelOptInfo *rel, List **varinfos, double *ndistinct); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); @@ -3398,25 +3402,25 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, { GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos); RelOptInfo *rel = varinfo1->rel; - double reldistinct = varinfo1->ndistinct; + double reldistinct = 1; double relmaxndistinct = reldistinct; int relvarcount = 1; List *newvarinfos = NIL; + List *relvarinfos = NIL; /* - * Get the product of numdistinct estimates of the Vars for this rel. - * Also, construct new varinfos list of remaining Vars. + * Split the list of varinfos in two - one for the current rel, + * one for remaining Vars on other rels. */ + relvarinfos = lcons(varinfo1, relvarinfos); for_each_cell(l, lnext(list_head(varinfos))) { GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l); if (varinfo2->rel == varinfo1->rel) { - reldistinct *= varinfo2->ndistinct; - if (relmaxndistinct < varinfo2->ndistinct) - relmaxndistinct = varinfo2->ndistinct; - relvarcount++; + /* varinfos on current rel */ + relvarinfos = lcons(varinfo2, relvarinfos); } else { @@ -3425,6 +3429,43 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, } } + /* + * Get the numdistinct estimate for the Vars of this rel. We + * iteratively search for multivariate n-distinct with maximum number + * of vars; assuming that each var group is independent of the others, + * we multiply them together. Any remaining relvarinfos after + * no more multivariate matches are found are assumed independent too, + * so their individual ndistinct estimates are multiplied also. + */ + while (relvarinfos) + { + double mvndistinct; + + if (estimate_multivariate_ndistinct(root, rel, &relvarinfos, + &mvndistinct)) + { + reldistinct *= mvndistinct; + if (relmaxndistinct < mvndistinct) + relmaxndistinct = mvndistinct; + relvarcount++; /* inaccurate, but doesn't matter */ + } + else + { + foreach (l, relvarinfos) + { + GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l); + + reldistinct *= varinfo2->ndistinct; + if (relmaxndistinct < varinfo2->ndistinct) + relmaxndistinct = varinfo2->ndistinct; + relvarcount++; + } + + /* we're done with this relation */ + relvarinfos = NIL; + } + } + /* * Sanity check --- don't divide by zero if empty relation. */ @@ -3667,6 +3708,132 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets) *------------------------------------------------------------------------- */ +/* + * Find applicable ndistinct statistics for the given list of VarInfos (which + * must all belong to the given rel), and update *ndistinct to the estimate of + * the MVNDistinctItem that best matches. If a match it found, *varinfos is + * updated to remove the list of matched varinfos. + * + * Varinfos that aren't for simple Vars are ignored. + * + * Return TRUE if we're able to find a match, FALSE otherwise. + */ +static bool +estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, + List **varinfos, double *ndistinct) +{ + ListCell *lc; + Bitmapset *attnums = NULL; + int nmatches; + Oid statOid = InvalidOid; + MVNDistinct *stats; + Bitmapset *matched = NULL; + + /* bail out immediately if the table has no extended statistics */ + if (!rel->statlist) + return false; + + /* Determine the attnums we're looking for */ + foreach(lc, *varinfos) + { + GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc); + + Assert(varinfo->rel == rel); + + if (IsA(varinfo->var, Var)) + { + attnums = bms_add_member(attnums, + ((Var *) varinfo->var)->varattno); + } + } + + /* look for the ndistinct statistics matching the most vars */ + nmatches = 1; /* we require at least two matches */ + foreach(lc, rel->statlist) + { + StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); + Bitmapset *shared; + + /* skip statistics of other kinds */ + if (info->kind != STATS_EXT_NDISTINCT) + continue; + + /* compute attnums shared by the vars and the statistic */ + shared = bms_intersect(info->keys, attnums); + + /* + * Does this statistics matches more columns than the currently + * best statistic? If so, use this one instead. + * + * XXX This should break ties using name of the statistic, or + * something like that, to make the outcome stable. + */ + if (bms_num_members(shared) > nmatches) + { + statOid = info->statOid; + nmatches = bms_num_members(shared); + matched = shared; + } + } + + /* No match? */ + if (statOid == InvalidOid) + return false; + Assert(nmatches > 1 && matched != NULL); + + stats = statext_ndistinct_load(statOid); + + /* + * If we have a match, search it for the specific item that matches (there + * must be one), and construct the output values. + */ + if (stats) + { + int i; + List *newlist = NIL; + MVNDistinctItem *item = NULL; + + /* Find the specific item that exactly matches the combination */ + for (i = 0; i < stats->nitems; i++) + { + MVNDistinctItem *tmpitem = &stats->items[i]; + + if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL) + { + item = tmpitem; + break; + } + } + + /* make sure we found an item */ + if (!item) + elog(ERROR, "corrupt MVNDistinct entry"); + + /* Form the output varinfo list, keeping only unmatched ones */ + foreach(lc, *varinfos) + { + GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc); + AttrNumber attnum; + + if (!IsA(varinfo->var, Var)) + { + newlist = lappend(newlist, varinfo); + continue; + } + + attnum = ((Var *) varinfo->var)->varattno; + if (!bms_is_member(attnum, matched)) + newlist = lappend(newlist, varinfo); + } + + *varinfos = newlist; + *ndistinct = item->ndistinct; + return true; + } + + return false; +} + /* * convert_to_scalar * Convert non-NULL values of the indicated types to the comparison diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index ce55fc5277..a6b60c67ca 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -56,6 +56,7 @@ #include "catalog/pg_publication.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_shseclabel.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_trigger.h" @@ -4451,6 +4452,82 @@ RelationGetIndexList(Relation relation) return result; } +/* + * RelationGetStatExtList + * get a list of OIDs of extended statistics on this relation + * + * The statistics list is created only if someone requests it, in a way + * similar to RelationGetIndexList(). We scan pg_statistic_ext to find + * relevant statistics, and add the list to the relcache entry so that we + * won't have to compute it again. Note that shared cache inval of a + * relcache entry will delete the old list and set rd_statvalid to 0, + * so that we must recompute the statistics list on next request. This + * handles creation or deletion of a statistic. + * + * The returned list is guaranteed to be sorted in order by OID, although + * this is not currently needed. + * + * Since shared cache inval causes the relcache's copy of the list to go away, + * we return a copy of the list palloc'd in the caller's context. The caller + * may list_free() the returned list after scanning it. This is necessary + * since the caller will typically be doing syscache lookups on the relevant + * statistics, and syscache lookup could cause SI messages to be processed! + */ +List * +RelationGetStatExtList(Relation relation) +{ + Relation indrel; + SysScanDesc indscan; + ScanKeyData skey; + HeapTuple htup; + List *result; + List *oldlist; + MemoryContext oldcxt; + + /* Quick exit if we already computed the list. */ + if (relation->rd_statvalid != 0) + return list_copy(relation->rd_statlist); + + /* + * We build the list we intend to return (in the caller's context) while + * doing the scan. After successfully completing the scan, we copy that + * list into the relcache entry. This avoids cache-context memory leakage + * if we get some sort of error partway through. + */ + result = NIL; + + /* Prepare to scan pg_statistic_ext for entries having starelid = this rel. */ + ScanKeyInit(&skey, + Anum_pg_statistic_ext_starelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + indrel = heap_open(StatisticExtRelationId, AccessShareLock); + indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(indscan))) + /* TODO maybe include only already built statistics? */ + result = insert_ordered_oid(result, HeapTupleGetOid(htup)); + + systable_endscan(indscan); + + heap_close(indrel, AccessShareLock); + + /* Now save a copy of the completed list in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + oldlist = relation->rd_statlist; + relation->rd_statlist = list_copy(result); + + relation->rd_statvalid = true; + MemoryContextSwitchTo(oldcxt); + + /* Don't leak the old list, if there is one */ + list_free(oldlist); + + return result; +} + /* * insert_ordered_oid * Insert a new Oid into a sorted list of Oids, preserving ordering @@ -5560,6 +5637,8 @@ load_relcache_init_file(bool shared) rel->rd_pkattr = NULL; rel->rd_idattr = NULL; rel->rd_pubactions = NULL; + rel->rd_statvalid = false; + rel->rd_statlist = NIL; rel->rd_createSubid = InvalidSubTransactionId; rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; rel->rd_amcache = NULL; diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index d5a376406f..d8c823f42b 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -61,6 +61,7 @@ #include "catalog/pg_shseclabel.h" #include "catalog/pg_replication_origin.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_subscription_rel.h" #include "catalog/pg_tablespace.h" @@ -726,6 +727,28 @@ static const struct cachedesc cacheinfo[] = { }, 32 }, + {StatisticExtRelationId, /* STATEXTNAMENSP */ + StatisticExtNameIndexId, + 2, + { + Anum_pg_statistic_ext_staname, + Anum_pg_statistic_ext_stanamespace, + 0, + 0 + }, + 4 + }, + {StatisticExtRelationId, /* STATEXTOID */ + StatisticExtOidIndexId, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 4 + }, {StatisticRelationId, /* STATRELATTINH */ StatisticRelidAttnumInhIndexId, 3, diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index 89530a9f0f..e2bc3576dc 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -272,6 +272,10 @@ getSchemaData(Archive *fout, int *numTablesPtr) write_msg(NULL, "reading indexes\n"); getIndexes(fout, tblinfo, numTables); + if (g_verbose) + write_msg(NULL, "reading extended statistics\n"); + getExtendedStatistics(fout, tblinfo, numTables); + if (g_verbose) write_msg(NULL, "reading constraints\n"); getConstraints(fout, tblinfo, numTables); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index dd0892539a..f77581d6ec 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -3540,7 +3540,8 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass) strcmp(te->desc, "TRIGGER") == 0 || strcmp(te->desc, "ROW SECURITY") == 0 || strcmp(te->desc, "POLICY") == 0 || - strcmp(te->desc, "USER MAPPING") == 0) + strcmp(te->desc, "USER MAPPING") == 0 || + strcmp(te->desc, "STATISTICS") == 0) { /* these object types don't have separate owners */ } diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index b3d95d7f6e..ba34cc163e 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -192,6 +192,7 @@ static void dumpAttrDef(Archive *fout, AttrDefInfo *adinfo); static void dumpSequence(Archive *fout, TableInfo *tbinfo); static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, IndxInfo *indxinfo); +static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo); static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo); static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo); static void dumpTSParser(Archive *fout, TSParserInfo *prsinfo); @@ -6582,6 +6583,99 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) destroyPQExpBuffer(query); } +/* + * getExtendedStatistics + * get information about extended statistics on a dumpable table + * or materialized view. + * + * Note: extended statistics data is not returned directly to the caller, but + * it does get entered into the DumpableObject tables. + */ +void +getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables) +{ + int i, + j; + PQExpBuffer query; + PGresult *res; + StatsExtInfo *statsextinfo; + int ntups; + int i_tableoid; + int i_oid; + int i_staname; + int i_stadef; + + /* Extended statistics were new in v10 */ + if (fout->remoteVersion < 100000) + return; + + query = createPQExpBuffer(); + + for (i = 0; i < numTables; i++) + { + TableInfo *tbinfo = &tblinfo[i]; + + /* Only plain tables and materialized views can have extended statistics. */ + if (tbinfo->relkind != RELKIND_RELATION && + tbinfo->relkind != RELKIND_MATVIEW) + continue; + + /* + * Ignore extended statistics of tables whose definitions are not to + * be dumped. + */ + if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)) + continue; + + if (g_verbose) + write_msg(NULL, "reading extended statistics for table \"%s.%s\"\n", + tbinfo->dobj.namespace->dobj.name, + tbinfo->dobj.name); + + /* Make sure we are in proper schema so stadef is right */ + selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name); + + resetPQExpBuffer(query); + + appendPQExpBuffer(query, + "SELECT " + "tableoid, " + "oid, " + "staname, " + "pg_catalog.pg_get_statisticsextdef(oid) AS stadef " + "FROM pg_statistic_ext " + "WHERE starelid = '%u' " + "ORDER BY staname", tbinfo->dobj.catId.oid); + + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + ntups = PQntuples(res); + + i_tableoid = PQfnumber(res, "tableoid"); + i_oid = PQfnumber(res, "oid"); + i_staname = PQfnumber(res, "staname"); + i_stadef = PQfnumber(res, "stadef"); + + statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo)); + + for (j = 0; j < ntups; j++) + { + statsextinfo[j].dobj.objType = DO_STATSEXT; + statsextinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid)); + statsextinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid)); + AssignDumpId(&statsextinfo[j].dobj); + statsextinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_staname)); + statsextinfo[j].dobj.namespace = tbinfo->dobj.namespace; + statsextinfo[j].statsexttable = tbinfo; + statsextinfo[j].statsextdef = pg_strdup(PQgetvalue(res, j, i_stadef)); + } + + PQclear(res); + } + + destroyPQExpBuffer(query); +} + /* * getConstraints * @@ -9234,6 +9328,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) case DO_INDEX: dumpIndex(fout, (IndxInfo *) dobj); break; + case DO_STATSEXT: + dumpStatisticsExt(fout, (StatsExtInfo *) dobj); + break; case DO_REFRESH_MATVIEW: refreshMatViewData(fout, (TableDataInfo *) dobj); break; @@ -15728,6 +15825,61 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo) destroyPQExpBuffer(labelq); } +/* + * dumpStatisticsExt + * write out to fout an extended statistics object + */ +static void +dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo) +{ + DumpOptions *dopt = fout->dopt; + TableInfo *tbinfo = statsextinfo->statsexttable; + PQExpBuffer q; + PQExpBuffer delq; + PQExpBuffer labelq; + + if (dopt->dataOnly) + return; + + q = createPQExpBuffer(); + delq = createPQExpBuffer(); + labelq = createPQExpBuffer(); + + appendPQExpBuffer(labelq, "STATISTICS %s", + fmtId(statsextinfo->dobj.name)); + + appendPQExpBuffer(q, "%s;\n", statsextinfo->statsextdef); + + appendPQExpBuffer(delq, "DROP STATISTICS %s.", + fmtId(tbinfo->dobj.namespace->dobj.name)); + appendPQExpBuffer(delq, "%s;\n", + fmtId(statsextinfo->dobj.name)); + + if (statsextinfo->dobj.dump & DUMP_COMPONENT_DEFINITION) + ArchiveEntry(fout, statsextinfo->dobj.catId, + statsextinfo->dobj.dumpId, + statsextinfo->dobj.name, + tbinfo->dobj.namespace->dobj.name, + NULL, + tbinfo->rolname, false, + "STATISTICS", SECTION_POST_DATA, + q->data, delq->data, NULL, + NULL, 0, + NULL, NULL); + + /* Dump Statistics Comments */ + if (statsextinfo->dobj.dump & DUMP_COMPONENT_COMMENT) + dumpComment(fout, labelq->data, + tbinfo->dobj.namespace->dobj.name, + tbinfo->rolname, + statsextinfo->dobj.catId, 0, + statsextinfo->dobj.dumpId); + + destroyPQExpBuffer(q); + destroyPQExpBuffer(delq); + destroyPQExpBuffer(labelq); +} + /* * dumpConstraint * write out to fout a user-defined constraint @@ -17266,6 +17418,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, addObjectDependency(postDataBound, dobj->dumpId); break; case DO_INDEX: + case DO_STATSEXT: case DO_REFRESH_MATVIEW: case DO_TRIGGER: case DO_EVENT_TRIGGER: diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index a466527ec6..cb22f63bd6 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -56,6 +56,7 @@ typedef enum DO_TABLE, DO_ATTRDEF, DO_INDEX, + DO_STATSEXT, DO_RULE, DO_TRIGGER, DO_CONSTRAINT, @@ -362,6 +363,13 @@ typedef struct _indxInfo int relpages; /* relpages of the underlying table */ } IndxInfo; +typedef struct _statsExtInfo +{ + DumpableObject dobj; + TableInfo *statsexttable; /* link to table the stats ext is for */ + char *statsextdef; +} StatsExtInfo; + typedef struct _ruleInfo { DumpableObject dobj; @@ -682,6 +690,7 @@ extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables); extern InhInfo *getInherits(Archive *fout, int *numInherits); extern PartInfo *getPartitions(Archive *fout, int *numPartitions); extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables); +extern void getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables); extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables); extern RuleInfo *getRules(Archive *fout, int *numRules); extern void getTriggers(Archive *fout, TableInfo tblinfo[], int numTables); diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index e555de8857..5c19b05ca4 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -53,10 +53,11 @@ static const int dbObjectTypePriority[] = 18, /* DO_TABLE */ 20, /* DO_ATTRDEF */ 28, /* DO_INDEX */ - 29, /* DO_RULE */ - 30, /* DO_TRIGGER */ + 29, /* DO_STATSEXT */ + 30, /* DO_RULE */ + 31, /* DO_TRIGGER */ 27, /* DO_CONSTRAINT */ - 31, /* DO_FK_CONSTRAINT */ + 32, /* DO_FK_CONSTRAINT */ 2, /* DO_PROCLANG */ 10, /* DO_CAST */ 23, /* DO_TABLE_DATA */ @@ -1291,6 +1292,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize) "INDEX %s (ID %d OID %u)", obj->name, obj->dumpId, obj->catId.oid); return; + case DO_STATSEXT: + snprintf(buf, bufsize, + "STATISTICS %s (ID %d OID %u)", + obj->name, obj->dumpId, obj->catId.oid); + return; case DO_REFRESH_MATVIEW: snprintf(buf, bufsize, "REFRESH MATERIALIZED VIEW %s (ID %d OID %u)", diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 8c583127fd..3cf1742020 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2320,6 +2320,57 @@ describeOneTableDetails(const char *schemaname, PQclear(result); } + /* print any extended statistics */ + if (pset.sversion >= 100000) + { + printfPQExpBuffer(&buf, + "SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n" + " (SELECT pg_catalog.string_agg(pg_catalog.quote_ident(attname::text),', ') \n" + " FROM ((SELECT pg_catalog.unnest(stakeys) AS attnum) s\n" + " JOIN pg_catalog.pg_attribute a ON (starelid = a.attrelid AND\n" + "a.attnum = s.attnum AND not attisdropped))) AS columns,\n" + " (staenabled::char[] @> '{d}'::char[]) AS ndist_enabled\n" + "FROM pg_catalog.pg_statistic_ext stat WHERE starelid = '%s'\n" + "ORDER BY 1;", + oid); + + result = PSQLexec(buf.data); + if (!result) + goto error_return; + else + tuples = PQntuples(result); + + if (tuples > 0) + { + printTableAddFooter(&cont, _("Statistics:")); + + for (i = 0; i < tuples; i++) + { + int cnt = 0; + + printfPQExpBuffer(&buf, " "); + + /* statistics name (qualified with namespace) */ + appendPQExpBuffer(&buf, "\"%s.%s\" WITH (", + PQgetvalue(result, i, 1), + PQgetvalue(result, i, 2)); + + /* options */ + if (strcmp(PQgetvalue(result, i, 5), "t") == 0) + { + appendPQExpBufferStr(&buf, "ndistinct"); + cnt++; + } + + appendPQExpBuffer(&buf, ") ON (%s)", + PQgetvalue(result, i, 4)); + + printTableAddFooter(&cont, buf.data); + } + } + PQclear(result); + } + /* print rules */ if (tableinfo.hasrules && tableinfo.relkind != RELKIND_MATVIEW) { diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index c9c9a18777..b8fa18ae2e 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201703241 +#define CATALOG_VERSION_NO 201703242 #endif diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 10759c7c58..9effbce2f1 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -147,6 +147,7 @@ typedef enum ObjectClass OCLASS_REWRITE, /* pg_rewrite */ OCLASS_TRIGGER, /* pg_trigger */ OCLASS_SCHEMA, /* pg_namespace */ + OCLASS_STATISTIC_EXT, /* pg_statistic_ext */ OCLASS_TSPARSER, /* pg_ts_parser */ OCLASS_TSDICT, /* pg_ts_dict */ OCLASS_TSTEMPLATE, /* pg_ts_template */ diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index 1187797fd9..473fe177ba 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -119,6 +119,7 @@ extern void RemoveAttrDefault(Oid relid, AttrNumber attnum, DropBehavior behavior, bool complain, bool internal); extern void RemoveAttrDefaultById(Oid attrdefId); extern void RemoveStatistics(Oid relid, AttrNumber attnum); +extern void RemoveStatisticsExt(Oid relid, AttrNumber attnum); extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno, bool relhasoids); diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index 5d4190c05e..a7266860ce 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -182,6 +182,13 @@ DECLARE_UNIQUE_INDEX(pg_largeobject_loid_pn_index, 2683, on pg_largeobject using DECLARE_UNIQUE_INDEX(pg_largeobject_metadata_oid_index, 2996, on pg_largeobject_metadata using btree(oid oid_ops)); #define LargeObjectMetadataOidIndexId 2996 +DECLARE_UNIQUE_INDEX(pg_statistic_ext_oid_index, 3380, on pg_statistic_ext using btree(oid oid_ops)); +#define StatisticExtOidIndexId 3380 +DECLARE_UNIQUE_INDEX(pg_statistic_ext_name_index, 3997, on pg_statistic_ext using btree(staname name_ops, stanamespace oid_ops)); +#define StatisticExtNameIndexId 3997 +DECLARE_INDEX(pg_statistic_ext_relid_index, 3379, on pg_statistic_ext using btree(starelid oid_ops)); +#define StatisticExtRelidIndexId 3379 + DECLARE_UNIQUE_INDEX(pg_namespace_nspname_index, 2684, on pg_namespace using btree(nspname name_ops)); #define NamespaceNameIndexId 2684 DECLARE_UNIQUE_INDEX(pg_namespace_oid_index, 2685, on pg_namespace using btree(oid oid_ops)); diff --git a/src/include/catalog/namespace.h b/src/include/catalog/namespace.h index dbeb25b1ac..35e0e2b089 100644 --- a/src/include/catalog/namespace.h +++ b/src/include/catalog/namespace.h @@ -141,6 +141,8 @@ extern Oid get_collation_oid(List *collname, bool missing_ok); extern Oid get_conversion_oid(List *conname, bool missing_ok); extern Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding); +extern Oid get_statistics_oid(List *names, bool missing_ok); + /* initialization & transaction cleanup code */ extern void InitializeSearchPath(void); extern void AtEOXact_Namespace(bool isCommit, bool parallel); diff --git a/src/include/catalog/pg_cast.h b/src/include/catalog/pg_cast.h index ce8dc59e5a..bc5d28a4fa 100644 --- a/src/include/catalog/pg_cast.h +++ b/src/include/catalog/pg_cast.h @@ -254,6 +254,10 @@ DATA(insert ( 23 18 78 e f )); /* pg_node_tree can be coerced to, but not from, text */ DATA(insert ( 194 25 0 i b )); +/* pg_ndistinct can be coerced to, but not from, bytea and text */ +DATA(insert ( 3361 17 0 i b )); +DATA(insert ( 3361 25 0 i i )); + /* * Datetime category */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index a66d045100..ee67459c32 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -1983,6 +1983,8 @@ DESCR("select statement of a view"); DATA(insert OID = 1642 ( pg_get_userbyid PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 19 "26" _null_ _null_ _null_ _null_ _null_ pg_get_userbyid _null_ _null_ _null_ )); DESCR("role name by OID (with fallback)"); DATA(insert OID = 1643 ( pg_get_indexdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_indexdef _null_ _null_ _null_ )); +DESCR("extended statistics description"); +DATA(insert OID = 3415 ( pg_get_statisticsextdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_statisticsextdef _null_ _null_ _null_ )); DESCR("index description"); DATA(insert OID = 3352 ( pg_get_partkeydef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_partkeydef _null_ _null_ _null_ )); DESCR("partition key description"); @@ -2758,6 +2760,15 @@ DESCR("current user privilege on any column by rel name"); DATA(insert OID = 3029 ( has_any_column_privilege PGNSP PGUID 12 10 0 0 0 f f f f t f s s 2 0 16 "26 25" _null_ _null_ _null_ _null_ _null_ has_any_column_privilege_id _null_ _null_ _null_ )); DESCR("current user privilege on any column by rel oid"); +DATA(insert OID = 3355 ( pg_ndistinct_in PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3361 "2275" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_in _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 3356 ( pg_ndistinct_out PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_out _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 3357 ( pg_ndistinct_recv PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 3361 "2281" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_recv _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 3358 ( pg_ndistinct_send PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_send _null_ _null_ _null_ )); +DESCR("I/O"); + DATA(insert OID = 1928 ( pg_stat_get_numscans PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ )); DESCR("statistics: number of scans done for table/index"); DATA(insert OID = 1929 ( pg_stat_get_tuples_returned PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ )); diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h new file mode 100644 index 0000000000..5f67fe7fc8 --- /dev/null +++ b/src/include/catalog/pg_statistic_ext.h @@ -0,0 +1,75 @@ +/*------------------------------------------------------------------------- + * + * pg_statistic_ext.h + * definition of the system "extended statistic" relation (pg_statistic_ext) + * along with the relation's initial contents. + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/catalog/pg_statistic_ext.h + * + * NOTES + * the genbki.pl script reads this file and generates .bki + * information from the DATA() statements. + * + *------------------------------------------------------------------------- + */ +#ifndef PG_STATISTIC_EXT_H +#define PG_STATISTIC_EXT_H + +#include "catalog/genbki.h" + +/* ---------------- + * pg_statistic_ext definition. cpp turns this into + * typedef struct FormData_pg_statistic_ext + * ---------------- + */ +#define StatisticExtRelationId 3381 + +CATALOG(pg_statistic_ext,3381) +{ + /* These fields form the unique key for the entry: */ + Oid starelid; /* relation containing attributes */ + NameData staname; /* statistics name */ + Oid stanamespace; /* OID of namespace containing this statistics */ + Oid staowner; /* statistics owner */ + + /* + * variable-length fields start here, but we allow direct access to + * stakeys + */ + int2vector stakeys; /* array of column keys */ + +#ifdef CATALOG_VARLEN + char staenabled[1] BKI_FORCE_NOT_NULL; /* statistic types + * requested to build */ + pg_ndistinct standistinct; /* ndistinct coefficients (serialized) */ +#endif + +} FormData_pg_statistic_ext; + +/* ---------------- + * Form_pg_statistic_ext corresponds to a pointer to a tuple with + * the format of pg_statistic_ext relation. + * ---------------- + */ +typedef FormData_pg_statistic_ext *Form_pg_statistic_ext; + +/* ---------------- + * compiler constants for pg_statistic_ext + * ---------------- + */ +#define Natts_pg_statistic_ext 7 +#define Anum_pg_statistic_ext_starelid 1 +#define Anum_pg_statistic_ext_staname 2 +#define Anum_pg_statistic_ext_stanamespace 3 +#define Anum_pg_statistic_ext_staowner 4 +#define Anum_pg_statistic_ext_stakeys 5 +#define Anum_pg_statistic_ext_staenabled 6 +#define Anum_pg_statistic_ext_standistinct 7 + +#define STATS_EXT_NDISTINCT 'd' + +#endif /* PG_STATISTIC_EXT_H */ diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index 9f61238179..9ad67258fe 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -364,6 +364,10 @@ DATA(insert OID = 194 ( pg_node_tree PGNSP PGUID -1 f b S f t \054 0 0 0 pg_node DESCR("string representing an internal node tree"); #define PGNODETREEOID 194 +DATA(insert OID = 3361 ( pg_ndistinct PGNSP PGUID -1 f b S f t \054 0 0 0 pg_ndistinct_in pg_ndistinct_out pg_ndistinct_recv pg_ndistinct_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ )); +DESCR("multivariate ndistinct coefficients"); +#define PGNDISTINCTOID 3361 + DATA(insert OID = 32 ( pg_ddl_command PGNSP PGUID SIZEOF_POINTER t p P f t \054 0 0 0 pg_ddl_command_in pg_ddl_command_out pg_ddl_command_recv pg_ddl_command_send - - - ALIGNOF_POINTER p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("internal type for passing CollectedCommand"); #define PGDDLCOMMANDOID 32 diff --git a/src/include/catalog/toasting.h b/src/include/catalog/toasting.h index db7f145b5f..00d0a8326f 100644 --- a/src/include/catalog/toasting.h +++ b/src/include/catalog/toasting.h @@ -53,6 +53,7 @@ DECLARE_TOAST(pg_proc, 2836, 2837); DECLARE_TOAST(pg_rewrite, 2838, 2839); DECLARE_TOAST(pg_seclabel, 3598, 3599); DECLARE_TOAST(pg_statistic, 2840, 2841); +DECLARE_TOAST(pg_statistic_ext, 3439, 3440); DECLARE_TOAST(pg_trigger, 2336, 2337); /* shared catalogs */ diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 8740cee944..c323e81e6c 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -77,6 +77,10 @@ extern ObjectAddress DefineOperator(List *names, List *parameters); extern void RemoveOperatorById(Oid operOid); extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt); +/* commands/statscmds.c */ +extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt); +extern void RemoveStatisticsById(Oid statsOid); + /* commands/aggregatecmds.c */ extern ObjectAddress DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle, List *parameters); diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index b2d8514f89..fc883a6f3e 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -279,6 +279,7 @@ typedef enum NodeTag T_PlaceHolderInfo, T_MinMaxAggInfo, T_PlannerParamItem, + T_StatisticExtInfo, /* * TAGS FOR MEMORY NODES (memnodes.h) @@ -424,6 +425,7 @@ typedef enum NodeTag T_CreateSubscriptionStmt, T_AlterSubscriptionStmt, T_DropSubscriptionStmt, + T_CreateStatsStmt, T_AlterCollationStmt, /* diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index f3773ca929..3a71dd5b37 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1593,6 +1593,7 @@ typedef enum ObjectType OBJECT_SCHEMA, OBJECT_SEQUENCE, OBJECT_SUBSCRIPTION, + OBJECT_STATISTIC_EXT, OBJECT_TABCONSTRAINT, OBJECT_TABLE, OBJECT_TABLESPACE, @@ -2656,6 +2657,20 @@ typedef struct IndexStmt bool if_not_exists; /* just do nothing if index already exists? */ } IndexStmt; +/* ---------------------- + * Create Statistics Statement + * ---------------------- + */ +typedef struct CreateStatsStmt +{ + NodeTag type; + List *defnames; /* qualified name (list of Value strings) */ + RangeVar *relation; /* relation to build statistics on */ + List *keys; /* String nodes naming referenced columns */ + List *options; /* list of DefElem */ + bool if_not_exists; /* do nothing if statistics already exists */ +} CreateStatsStmt; + /* ---------------------- * Create Function Statement * ---------------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 1c88a79a21..0a5187cef3 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -529,6 +529,7 @@ typedef struct RelOptInfo List *lateral_vars; /* LATERAL Vars and PHVs referenced by rel */ Relids lateral_referencers; /* rels that reference me laterally */ List *indexlist; /* list of IndexOptInfo */ + List *statlist; /* list of StatisticExtInfo */ BlockNumber pages; /* size estimates derived from pg_class */ double tuples; double allvisfrac; @@ -668,6 +669,24 @@ typedef struct ForeignKeyOptInfo List *rinfos[INDEX_MAX_KEYS]; } ForeignKeyOptInfo; +/* + * StatisticExtInfo + * Information about extended statistics for planning/optimization + * + * This contains information about which columns are covered by the + * statistics (stakeys), which options were requested while adding the + * statistics (*_enabled), and which kinds of statistics were actually + * built and are available for the optimizer (*_built). + */ +typedef struct StatisticExtInfo +{ + NodeTag type; + + Oid statOid; /* OID of the statistics row */ + RelOptInfo *rel; /* back-link to index's table */ + char kind; /* statistic kind of this entry */ + Bitmapset *keys; /* attnums of the columns covered */ +} StatisticExtInfo; /* * EquivalenceClasses diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h new file mode 100644 index 0000000000..961f1f78c0 --- /dev/null +++ b/src/include/statistics/extended_stats_internal.h @@ -0,0 +1,64 @@ +/*------------------------------------------------------------------------- + * + * extended_stats_internal.h + * POSTGRES extended statistics internal declarations + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/include/statistics/extended_stats_internal.h + * + *------------------------------------------------------------------------- + */ +#ifndef EXTENDED_STATS_INTERNAL_H +#define EXTENDED_STATS_INTERNAL_H + +#include "utils/sortsupport.h" +#include "statistics/statistics.h" + + +typedef struct +{ + Oid eqopr; /* '=' operator for datatype, if any */ + Oid eqfunc; /* and associated function */ + Oid ltopr; /* '<' operator for datatype, if any */ +} StdAnalyzeData; + +typedef struct +{ + Datum value; /* a data value */ + int tupno; /* position index for tuple it came from */ +} ScalarItem; + +/* multi-sort */ +typedef struct MultiSortSupportData +{ + int ndims; /* number of dimensions supported by the */ + SortSupportData ssup[1]; /* sort support data for each dimension */ +} MultiSortSupportData; + +typedef MultiSortSupportData *MultiSortSupport; + +typedef struct SortItem +{ + Datum *values; + bool *isnull; +} SortItem; + +extern MVNDistinct *statext_ndistinct_build(double totalrows, + int numrows, HeapTuple *rows, + Bitmapset *attrs, VacAttrStats **stats); +extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct); +extern MVNDistinct *statext_ndistinct_deserialize(bytea *data); + +extern MultiSortSupport multi_sort_init(int ndims); +extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, + Oid oper); +extern int multi_sort_compare(const void *a, const void *b, void *arg); +extern int multi_sort_compare_dim(int dim, const SortItem * a, + const SortItem * b, MultiSortSupport mss); +extern int multi_sort_compare_dims(int start, int end, const SortItem * a, + const SortItem * b, MultiSortSupport mss); + +#endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h new file mode 100644 index 0000000000..a15e39e1a3 --- /dev/null +++ b/src/include/statistics/statistics.h @@ -0,0 +1,47 @@ +/*------------------------------------------------------------------------- + * + * statistics.h + * Extended statistics and selectivity estimation functions. + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/statistics/statistics.h + * + *------------------------------------------------------------------------- + */ +#ifndef STATISTICS_H +#define STATISTICS_H + +#include "commands/vacuum.h" + +#define STATS_MAX_DIMENSIONS 8 /* max number of attributes */ + +/* Multivariate distinct coefficients */ +#define STATS_NDISTINCT_MAGIC 0xA352BFA4 /* struct identifier */ +#define STATS_NDISTINCT_TYPE_BASIC 1 /* struct version */ + +/* MVDistinctItem represents a single combination of columns */ +typedef struct MVNDistinctItem +{ + double ndistinct; /* ndistinct value for this combination */ + Bitmapset *attrs; /* attr numbers of items */ +} MVNDistinctItem; + +/* A MVNDistinct object, comprising all possible combinations of columns */ +typedef struct MVNDistinct +{ + uint32 magic; /* magic constant marker */ + uint32 type; /* type of ndistinct (BASIC) */ + uint32 nitems; /* number of items in the statistic */ + MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]; +} MVNDistinct; + +extern MVNDistinct *statext_ndistinct_load(Oid mvoid); + +extern void BuildRelationExtStatistics(Relation onerel, double totalrows, + int numrows, HeapTuple *rows, + int natts, VacAttrStats **vacattrstats); +extern bool statext_is_kind_built(HeapTuple htup, char kind); + +#endif /* STATISTICS_H */ diff --git a/src/include/utils/acl.h b/src/include/utils/acl.h index 0d118525c9..c957d8e170 100644 --- a/src/include/utils/acl.h +++ b/src/include/utils/acl.h @@ -192,6 +192,7 @@ typedef enum AclObjectKind ACL_KIND_OPFAMILY, /* pg_opfamily */ ACL_KIND_COLLATION, /* pg_collation */ ACL_KIND_CONVERSION, /* pg_conversion */ + ACL_KIND_STATISTICS, /* pg_statistic_ext */ ACL_KIND_TABLESPACE, /* pg_tablespace */ ACL_KIND_TSDICTIONARY, /* pg_ts_dict */ ACL_KIND_TSCONFIGURATION, /* pg_ts_config */ @@ -326,6 +327,7 @@ extern bool pg_event_trigger_ownercheck(Oid et_oid, Oid roleid); extern bool pg_extension_ownercheck(Oid ext_oid, Oid roleid); extern bool pg_publication_ownercheck(Oid pub_oid, Oid roleid); extern bool pg_subscription_ownercheck(Oid sub_oid, Oid roleid); +extern bool pg_statistics_ownercheck(Oid stat_oid, Oid roleid); extern bool has_createrole_privilege(Oid roleid); extern bool has_bypassrls_privilege(Oid roleid); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index a617a7cf56..ab875bb9d7 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -92,6 +92,7 @@ typedef struct RelationData bool rd_isvalid; /* relcache entry is valid */ char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 = * valid, 2 = temporarily forced */ + bool rd_statvalid; /* is rd_statlist valid? */ /* * rd_createSubid is the ID of the highest subtransaction the rel has @@ -136,6 +137,9 @@ typedef struct RelationData Oid rd_pkindex; /* OID of primary key, if any */ Oid rd_replidindex; /* OID of replica identity index, if any */ + /* data managed by RelationGetStatExtList: */ + List *rd_statlist; /* list of OIDs of extended stats */ + /* data managed by RelationGetIndexAttrBitmap: */ Bitmapset *rd_indexattr; /* identifies columns used in indexes */ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index da36b6774f..81af3aebb8 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -39,6 +39,7 @@ extern void RelationClose(Relation relation); */ extern List *RelationGetFKeyList(Relation relation); extern List *RelationGetIndexList(Relation relation); +extern List *RelationGetStatExtList(Relation relation); extern Oid RelationGetOidIndex(Relation relation); extern Oid RelationGetPrimaryKeyIndex(Relation relation); extern Oid RelationGetReplicaIndex(Relation relation); diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index b35faf81b9..36805ebefb 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -86,6 +86,8 @@ enum SysCacheIdentifier PUBLICATIONRELMAP, RULERELNAME, SEQRELID, + STATEXTNAMENSP, + STATEXTOID, STATRELATTINH, SUBSCRIPTIONOID, SUBSCRIPTIONNAME, diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out index b01be59bbb..ce581bb93d 100644 --- a/src/test/regress/expected/alter_generic.out +++ b/src/test/regress/expected/alter_generic.out @@ -496,6 +496,48 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); ERROR: function 2(integer,integer) does not exist in operator family "alt_opf18" DROP OPERATOR FAMILY alt_opf18 USING btree; +-- +-- Statistics +-- +SET SESSION AUTHORIZATION regress_alter_user1; +CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER); +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; +ALTER STATISTICS alt_stat1 RENAME TO alt_stat2; -- failed (name conflict) +ERROR: statistics "alt_stat2" already exists in schema "alt_nsp1" +ALTER STATISTICS alt_stat1 RENAME TO alt_stat3; -- failed (name conflict) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2; -- failed (no role membership) +ERROR: must be member of role "regress_alter_user2" +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- OK +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- OK +SET SESSION AUTHORIZATION regress_alter_user2; +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; +ALTER STATISTICS alt_stat3 RENAME TO alt_stat4; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat1 RENAME TO alt_stat4; -- OK +ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership) +ERROR: must be member of role "regress_alter_user3" +ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- failed (name conflict) +ERROR: statistics "alt_stat2" already exists in schema "alt_nsp2" +RESET SESSION AUTHORIZATION; +SELECT nspname, staname, rolname + FROM pg_statistic_ext s, pg_namespace n, pg_authid a + WHERE s.stanamespace = n.oid AND s.staowner = a.oid + AND n.nspname in ('alt_nsp1', 'alt_nsp2') + ORDER BY nspname, staname; + nspname | staname | rolname +----------+-----------+--------------------- + alt_nsp1 | alt_stat2 | regress_alter_user2 + alt_nsp1 | alt_stat3 | regress_alter_user1 + alt_nsp1 | alt_stat4 | regress_alter_user2 + alt_nsp2 | alt_stat2 | regress_alter_user3 +(4 rows) + -- -- Text Search Dictionary -- @@ -639,7 +681,7 @@ DROP LANGUAGE alt_lang3 CASCADE; DROP LANGUAGE alt_lang4 CASCADE; ERROR: language "alt_lang4" does not exist DROP SCHEMA alt_nsp1 CASCADE; -NOTICE: drop cascades to 26 other objects +NOTICE: drop cascades to 27 other objects DETAIL: drop cascades to function alt_func3(integer) drop cascades to function alt_agg3(integer) drop cascades to function alt_func4(integer) @@ -656,6 +698,7 @@ drop cascades to operator family alt_opc1 for access method hash drop cascades to operator family alt_opc2 for access method hash drop cascades to operator family alt_opf4 for access method hash drop cascades to operator family alt_opf2 for access method hash +drop cascades to table alt_regress_1 drop cascades to text search dictionary alt_ts_dict3 drop cascades to text search dictionary alt_ts_dict4 drop cascades to text search dictionary alt_ts_dict2 diff --git a/src/test/regress/expected/object_address.out b/src/test/regress/expected/object_address.out index 978d9a9a0f..814e05e4ef 100644 --- a/src/test/regress/expected/object_address.out +++ b/src/test/regress/expected/object_address.out @@ -39,6 +39,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL ( CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable; CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT); WARNING: tables were not subscribed, you will have to run ALTER SUBSCRIPTION ... REFRESH PUBLICATION to subscribe the tables +CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable; -- test some error cases SELECT pg_get_object_address('stone', '{}', '{}'); ERROR: unrecognized object type "stone" @@ -409,7 +410,8 @@ WITH objects (type, name, args) AS (VALUES ('access method', '{btree}', '{}'), ('publication', '{addr_pub}', '{}'), ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'), - ('subscription', '{addr_sub}', '{}') + ('subscription', '{addr_sub}', '{}'), + ('statistics', '{addr_nsp, gentable_stat}', '{}') ) SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, -- test roundtrip through pg_identify_object_as_address @@ -457,6 +459,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, trigger | | | t on addr_nsp.gentable | t operator family | pg_catalog | integer_ops | pg_catalog.integer_ops USING btree | t policy | | | genpol on addr_nsp.gentable | t + statistics | addr_nsp | gentable_stat | addr_nsp.gentable_stat | t collation | pg_catalog | "default" | pg_catalog."default" | t transform | | | for integer on language sql | t text search dictionary | addr_nsp | addr_ts_dict | addr_nsp.addr_ts_dict | t @@ -466,7 +469,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, subscription | | addr_sub | addr_sub | t publication | | addr_pub | addr_pub | t publication relation | | | gentable in publication addr_pub | t -(45 rows) +(46 rows) --- --- Cleanup resources diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 64d9dd605f..262036ac4f 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -823,11 +823,12 @@ WHERE c.castmethod = 'b' AND text | character | 0 | i character varying | character | 0 | i pg_node_tree | text | 0 | i + pg_ndistinct | bytea | 0 | i cidr | inet | 0 | i xml | text | 0 | a xml | character varying | 0 | a xml | character | 0 | a -(7 rows) +(8 rows) -- **************** pg_conversion **************** -- Look for illegal values in pg_conversion fields. diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index c4c8450b83..7f04c7a7cc 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2164,6 +2164,14 @@ pg_stats| SELECT n.nspname AS schemaname, JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid)))); +pg_stats_ext| SELECT n.nspname AS schemaname, + c.relname AS tablename, + s.staname, + s.stakeys AS attnums, + length((s.standistinct)::text) AS ndistbytes + FROM ((pg_statistic_ext s + JOIN pg_class c ON ((c.oid = s.starelid))) + LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))); pg_tables| SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner, diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index 8e3028edaa..753ad81e43 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -142,6 +142,7 @@ pg_shdepend|t pg_shdescription|t pg_shseclabel|t pg_statistic|t +pg_statistic_ext|t pg_subscription|t pg_subscription_rel|t pg_tablespace|t diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out new file mode 100644 index 0000000000..83d70bf9b9 --- /dev/null +++ b/src/test/regress/expected/stats_ext.out @@ -0,0 +1,155 @@ +-- Generic extended statistics support +-- Ensure stats are dropped sanely +CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS ab1_a_b_stats; +CREATE SCHEMA regress_schema_2; +CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS regress_schema_2.ab1_a_b_stats; +-- Ensure statistics are dropped when columns are +CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ALTER TABLE ab1 DROP COLUMN a; +\d ab1 + Table "public.ab1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | integer | | | + c | integer | | | +Statistics: + "public.ab1_b_c_stats" WITH (ndistinct) ON (b, c) + +DROP TABLE ab1; +-- Ensure things work sanely with SET STATISTICS 0 +CREATE TABLE ab1 (a INTEGER, b INTEGER); +ALTER TABLE ab1 ALTER a SET STATISTICS 0; +INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ANALYZE ab1; +ERROR: extended statistics could not be collected for column "a" of relation public.ab1 +HINT: Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1 +ALTER TABLE ab1 ALTER a SET STATISTICS -1; +ANALYZE ab1; +DROP TABLE ab1; +-- n-distinct tests +CREATE TABLE ndistinct ( + filler1 TEXT, + filler2 NUMERIC, + a INT, + b INT, + filler3 DATE, + c INT, + d INT +); +-- unknown column +CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct; +ERROR: column "unknown_column" referenced in statistics does not exist +-- single column +CREATE STATISTICS s10 ON (a) FROM ndistinct; +ERROR: statistics require at least 2 columns +-- single column, duplicated +CREATE STATISTICS s10 ON (a,a) FROM ndistinct; +ERROR: duplicate column name in statistics definition +-- two columns, one duplicated +CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct; +ERROR: duplicate column name in statistics definition +-- correct command +CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct; +-- perfectly correlated groups +INSERT INTO ndistinct (a, b, c, filler1) + SELECT i/100, i/100, i/100, cash_words(i::money) + FROM generate_series(1,10000) s(i); +ANALYZE ndistinct; +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + staenabled | standistinct +------------+------------------------------------------------------------------------------------------------ + {d} | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}] +(1 row) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b + -> Seq Scan on ndistinct +(3 rows) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b, c + -> Seq Scan on ndistinct +(3 rows) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b, c, d + -> Seq Scan on ndistinct +(3 rows) + +TRUNCATE TABLE ndistinct; +-- partially correlated groups +INSERT INTO ndistinct (a, b, c) + SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i); +ANALYZE ndistinct; +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + staenabled | standistinct +------------+------------------------------------------------------------------------------------------------ + {d} | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}] +(1 row) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate (cost=230.00..232.01 rows=201 width=16) + Group Key: a, b + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=255.00..257.01 rows=201 width=20) + Group Key: a, b, c + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=280.00..290.00 rows=1000 width=24) + Group Key: a, b, c, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=16) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=255.00..265.00 rows=1000 width=20) + Group Key: b, c, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, d; + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate (cost=230.00..240.00 rows=1000 width=16) + Group Key: a, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8) +(3 rows) + +DROP TABLE ndistinct; diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index 8d75bbfab3..84022f6a29 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -59,7 +59,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR -- Look for types that should have an array type according to their typtype, -- but don't. We exclude composites here because we have not bothered to -- make array types corresponding to the system catalogs' rowtypes. --- NOTE: as of v10, this check finds pg_node_tree and smgr. +-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr. SELECT p1.oid, p1.typname FROM pg_type as p1 WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' @@ -67,11 +67,12 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' (SELECT 1 FROM pg_type as p2 WHERE p2.typname = ('_' || p1.typname)::name AND p2.typelem = p1.oid and p1.typarray = p2.oid); - oid | typname ------+-------------- - 194 | pg_node_tree - 210 | smgr -(2 rows) + oid | typname +------+-------------- + 194 | pg_node_tree + 3361 | pg_ndistinct + 210 | smgr +(3 rows) -- Make sure typarray points to a varlena array type of our own base SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype, diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 38743d98c3..c283bdcb37 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -89,7 +89,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview # ---------- # Another group of parallel tests # ---------- -test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan +test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan stats_ext # rules cannot run concurrently with any test that creates a view test: rules psql_crosstab amutils diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index d9f64c2873..3a0d536a2b 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -130,6 +130,7 @@ test: misc_functions test: sysviews test: tsrf test: tidscan +test: stats_ext test: rules test: psql_crosstab test: select_parallel diff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql index c9ea479967..f6fa8d8bfd 100644 --- a/src/test/regress/sql/alter_generic.sql +++ b/src/test/regress/sql/alter_generic.sql @@ -433,6 +433,37 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); DROP OPERATOR FAMILY alt_opf18 USING btree; +-- +-- Statistics +-- +SET SESSION AUTHORIZATION regress_alter_user1; +CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER); +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; + +ALTER STATISTICS alt_stat1 RENAME TO alt_stat2; -- failed (name conflict) +ALTER STATISTICS alt_stat1 RENAME TO alt_stat3; -- failed (name conflict) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2; -- failed (no role membership) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- OK +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- OK + +SET SESSION AUTHORIZATION regress_alter_user2; +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; + +ALTER STATISTICS alt_stat3 RENAME TO alt_stat4; -- failed (not owner) +ALTER STATISTICS alt_stat1 RENAME TO alt_stat4; -- OK +ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership) +ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2; -- failed (not owner) +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- failed (name conflict) + +RESET SESSION AUTHORIZATION; +SELECT nspname, staname, rolname + FROM pg_statistic_ext s, pg_namespace n, pg_authid a + WHERE s.stanamespace = n.oid AND s.staowner = a.oid + AND n.nspname in ('alt_nsp1', 'alt_nsp2') + ORDER BY nspname, staname; -- -- Text Search Dictionary diff --git a/src/test/regress/sql/object_address.sql b/src/test/regress/sql/object_address.sql index 28476daff1..c9219e47c4 100644 --- a/src/test/regress/sql/object_address.sql +++ b/src/test/regress/sql/object_address.sql @@ -41,6 +41,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL ( TO SQL WITH FUNCTION int4recv(internal)); CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable; CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT); +CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable; -- test some error cases SELECT pg_get_object_address('stone', '{}', '{}'); @@ -185,7 +186,8 @@ WITH objects (type, name, args) AS (VALUES ('access method', '{btree}', '{}'), ('publication', '{addr_pub}', '{}'), ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'), - ('subscription', '{addr_sub}', '{}') + ('subscription', '{addr_sub}', '{}'), + ('statistics', '{addr_nsp, gentable_stat}', '{}') ) SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, -- test roundtrip through pg_identify_object_as_address diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql new file mode 100644 index 0000000000..946cb84853 --- /dev/null +++ b/src/test/regress/sql/stats_ext.sql @@ -0,0 +1,102 @@ +-- Generic extended statistics support + +-- Ensure stats are dropped sanely +CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS ab1_a_b_stats; + +CREATE SCHEMA regress_schema_2; +CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS regress_schema_2.ab1_a_b_stats; + +-- Ensure statistics are dropped when columns are +CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ALTER TABLE ab1 DROP COLUMN a; +\d ab1 +DROP TABLE ab1; + +-- Ensure things work sanely with SET STATISTICS 0 +CREATE TABLE ab1 (a INTEGER, b INTEGER); +ALTER TABLE ab1 ALTER a SET STATISTICS 0; +INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ANALYZE ab1; +ALTER TABLE ab1 ALTER a SET STATISTICS -1; +ANALYZE ab1; +DROP TABLE ab1; + + +-- n-distinct tests +CREATE TABLE ndistinct ( + filler1 TEXT, + filler2 NUMERIC, + a INT, + b INT, + filler3 DATE, + c INT, + d INT +); + +-- unknown column +CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct; + +-- single column +CREATE STATISTICS s10 ON (a) FROM ndistinct; + +-- single column, duplicated +CREATE STATISTICS s10 ON (a,a) FROM ndistinct; + +-- two columns, one duplicated +CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct; + +-- correct command +CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct; + +-- perfectly correlated groups +INSERT INTO ndistinct (a, b, c, filler1) + SELECT i/100, i/100, i/100, cash_words(i::money) + FROM generate_series(1,10000) s(i); + +ANALYZE ndistinct; + +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + +TRUNCATE TABLE ndistinct; + +-- partially correlated groups +INSERT INTO ndistinct (a, b, c) + SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i); + +ANALYZE ndistinct; + +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, d; + +DROP TABLE ndistinct; diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index 0a31249f5d..4c65814008 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -53,7 +53,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR -- Look for types that should have an array type according to their typtype, -- but don't. We exclude composites here because we have not bothered to -- make array types corresponding to the system catalogs' rowtypes. --- NOTE: as of v10, this check finds pg_node_tree and smgr. +-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr. SELECT p1.oid, p1.typname FROM pg_type as p1 -- 2.40.0