From f0e44751d7175fa3394da2c8f85e3ceb3cdbfe63 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 7 Dec 2016 13:17:43 -0500 Subject: [PATCH] Implement table partitioning. Table partitioning is like table inheritance and reuses much of the existing infrastructure, but there are some important differences. The parent is called a partitioned table and is always empty; it may not have indexes or non-inherited constraints, since those make no sense for a relation with no data of its own. The children are called partitions and contain all of the actual data. Each partition has an implicit partitioning constraint. Multiple inheritance is not allowed, and partitioning and inheritance can't be mixed. Partitions can't have extra columns and may not allow nulls unless the parent does. Tuples inserted into the parent are automatically routed to the correct partition, so tuple-routing ON INSERT triggers are not needed. Tuple routing isn't yet supported for partitions which are foreign tables, and it doesn't handle updates that cross partition boundaries. Currently, tables can be range-partitioned or list-partitioned. List partitioning is limited to a single column, but range partitioning can involve multiple columns. A partitioning "column" can be an expression. Because table partitioning is less general than table inheritance, it is hoped that it will be easier to reason about properties of partitions, and therefore that this will serve as a better foundation for a variety of possible optimizations, including query planner optimizations. The tuple routing based which this patch does based on the implicit partitioning constraints is an example of this, but it seems likely that many other useful optimizations are also possible. Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat, Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova, Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me. --- doc/src/sgml/catalogs.sgml | 129 +- doc/src/sgml/ref/alter_table.sgml | 117 +- doc/src/sgml/ref/create_foreign_table.sgml | 26 + doc/src/sgml/ref/create_table.sgml | 154 ++ src/backend/access/common/reloptions.c | 2 + src/backend/catalog/Makefile | 4 +- src/backend/catalog/aclchk.c | 2 + src/backend/catalog/dependency.c | 10 +- src/backend/catalog/heap.c | 270 ++- src/backend/catalog/index.c | 4 +- src/backend/catalog/objectaddress.c | 5 +- src/backend/catalog/partition.c | 1917 ++++++++++++++++++++ src/backend/catalog/pg_constraint.c | 2 +- src/backend/commands/analyze.c | 6 +- src/backend/commands/copy.c | 174 +- src/backend/commands/createas.c | 2 +- src/backend/commands/indexcmds.c | 24 +- src/backend/commands/lockcmds.c | 2 +- src/backend/commands/policy.c | 5 +- src/backend/commands/seclabel.c | 3 +- src/backend/commands/sequence.c | 5 +- src/backend/commands/tablecmds.c | 1567 ++++++++++++++-- src/backend/commands/trigger.c | 16 +- src/backend/commands/typecmds.c | 3 +- src/backend/commands/vacuum.c | 3 +- src/backend/commands/view.c | 3 +- src/backend/executor/execMain.c | 125 +- src/backend/executor/nodeModifyTable.c | 154 +- src/backend/nodes/copyfuncs.c | 81 + src/backend/nodes/equalfuncs.c | 70 + src/backend/nodes/nodeFuncs.c | 6 + src/backend/nodes/outfuncs.c | 55 + src/backend/nodes/readfuncs.c | 34 + src/backend/optimizer/util/plancat.c | 20 + src/backend/parser/analyze.c | 8 + src/backend/parser/gram.y | 347 +++- src/backend/parser/parse_agg.c | 10 + src/backend/parser/parse_expr.c | 5 + src/backend/parser/parse_func.c | 3 + src/backend/parser/parse_utilcmd.c | 326 +++- src/backend/rewrite/rewriteDefine.c | 3 +- src/backend/rewrite/rewriteHandler.c | 3 +- src/backend/rewrite/rowsecurity.c | 3 +- src/backend/tcop/utility.c | 6 +- src/backend/utils/adt/ruleutils.c | 241 +++ src/backend/utils/cache/relcache.c | 364 +++- src/backend/utils/cache/syscache.c | 12 + src/bin/pg_dump/common.c | 90 + src/bin/pg_dump/pg_dump.c | 186 +- src/bin/pg_dump/pg_dump.h | 14 + src/bin/psql/describe.c | 146 +- src/bin/psql/tab-complete.c | 6 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/dependency.h | 3 +- src/include/catalog/heap.h | 11 + src/include/catalog/indexing.h | 3 + src/include/catalog/partition.h | 83 + src/include/catalog/pg_class.h | 23 +- src/include/catalog/pg_partitioned_table.h | 76 + src/include/catalog/pg_proc.h | 2 + src/include/commands/defrem.h | 2 + src/include/commands/tablecmds.h | 2 +- src/include/executor/executor.h | 6 + src/include/nodes/execnodes.h | 14 + src/include/nodes/nodes.h | 5 + src/include/nodes/parsenodes.h | 79 +- src/include/parser/kwlist.h | 2 + src/include/parser/parse_node.h | 3 +- src/include/parser/parse_utilcmd.h | 2 + src/include/pg_config_manual.h | 5 + src/include/utils/builtins.h | 1 + src/include/utils/rel.h | 89 + src/include/utils/syscache.h | 1 + src/test/regress/expected/alter_table.out | 343 ++++ src/test/regress/expected/create_table.out | 413 +++++ src/test/regress/expected/inherit.out | 272 +++ src/test/regress/expected/insert.out | 140 ++ src/test/regress/expected/sanity_check.out | 1 + src/test/regress/expected/update.out | 27 + src/test/regress/sql/alter_table.sql | 294 +++ src/test/regress/sql/create_table.sql | 315 ++++ src/test/regress/sql/inherit.sql | 52 + src/test/regress/sql/insert.sql | 86 + src/test/regress/sql/update.sql | 21 + src/tools/pgindent/typedefs.list | 6 + 85 files changed, 8886 insertions(+), 271 deletions(-) create mode 100644 src/backend/catalog/partition.c create mode 100644 src/include/catalog/partition.h create mode 100644 src/include/catalog/pg_partitioned_table.h diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index c4246dcd86..9d2e89523d 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -225,6 +225,11 @@ template data for procedural languages + + pg_partitioned_table + information about partition key of tables + + pg_policy row-security policies @@ -1723,7 +1728,8 @@ char - r = ordinary table, i = index, + r = ordinary table, P = partitioned table, + i = index S = sequence, v = view, m = materialized view, c = composite type, t = TOAST table, @@ -1839,6 +1845,13 @@ + + relispartition + bool + + True if table is a partition + + relfrozenxid xid @@ -1885,6 +1898,16 @@ Access-method-specific options, as keyword=value strings + + + relpartbound + pg_node_tree + + + If table is a partition (see relispartition), + internal representation of the partition bound + + @@ -4689,6 +4712,110 @@ + + <structname>pg_partitioned_table</structname> + + + pg_partitioned_table + + + + The catalog pg_partitioned_table stores + information about how tables are partitioned. + + + + <structname>pg_partitioned_table</> Columns + + + + + Name + Type + References + Description + + + + + + + partrelid + oid + pg_class.oid + The OID of the pg_class entry for this partitioned table + + + + partstrat + char + + + Partitioning strategy; l = list partitioned table, + r = range partitioned table + + + + + partnatts + int2 + + The number of columns in partition key + + + + partattrs + int2vector + pg_attribute.attnum + + This is an array of partnatts values that + indicate which table columns are part of the partition key. For + example, a value of 1 3 would mean that the first + and the third table columns make up the partition key. A zero in this + array indicates that the corresponding partition key column is an + expression, rather than a simple column reference. + + + + + partclass + oidvector + pg_opclass.oid + + For each column in the partition key, this contains the OID of the + operator class to use. See + pg_opclass for details. + + + + + partcollation + oidvector + pg_opclass.oid + + For each column in the partition key, this contains the OID of the + the collation to use for partitioning. + + + + + partexprs + pg_node_tree + + + Expression trees (in nodeToString() + representation) for partition key columns that are not simple column + references. This is a list with one element for each zero + entry in partattrs. Null if all partition key columns + are simple references. + + + + + +
+
+ <structname>pg_policy</structname> diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index e48ccf21e4..a6a43c4b30 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -33,6 +33,10 @@ ALTER TABLE [ IF EXISTS ] name SET SCHEMA new_schema ALTER TABLE ALL IN TABLESPACE name [ OWNED BY role_name [, ... ] ] SET TABLESPACE new_tablespace [ NOWAIT ] +ALTER TABLE [ IF EXISTS ] name + ATTACH PARTITION partition_name FOR VALUES partition_bound_spec +ALTER TABLE [ IF EXISTS ] name + DETACH PARTITION partition_name where action is one of: @@ -166,6 +170,12 @@ ALTER TABLE ALL IN TABLESPACE name values or to reject null values. You can only use SET NOT NULL when the column contains no null values. + + + If this table is a partition, one cannot perform DROP NOT NULL + on a column if it is marked NOT NULL in the parent + table. + @@ -704,13 +714,63 @@ ALTER TABLE ALL IN TABLESPACE name + + ATTACH PARTITION partition_name partition_bound_spec + + + This form attaches an existing table (which might itself be partitioned) + as a partition of the target table using the same syntax for + partition_bound_spec as + . The partition bound specification + must correspond to the partitioning strategy and partition key of the + target table. The table to be attached must have all the same columns + as the target table and no more; moreover, the column types must also + match. Also, it must have all the NOT NULL and + CHECK constraints of the target table. Currently + UNIQUE, PRIMARY KEY, and + FOREIGN KEY constraints are not considered. + If any of the CHECK constraints of the table being + attached is marked NO INHERIT, the command will fail; + such a constraint must be recreated without the NO INHERIT + clause. + + + + A full table scan is performed on the table being attached to check that + no existing row in the table violates the partition constraint. It is + possible to avoid this scan by adding a valid CHECK + constraint to the table that would allow only the rows satisfying the + desired partition constraint before running this command. It will be + determined using such a constraint that the table need not be scanned + to validate the partition constraint. This does not work, however, if + any of the partition keys is an expression and the partition does not + accept NULL values. If attaching a list partition + that will not accept NULL values, also add + NOT NULL constraint to the partition key column, + unless it's an expression. + + + + + + DETACH PARTITION partition_name + + + This form detaches specified partition of the target table. The detached + partition continues to exist as a standalone table, but no longer has any + ties to the table from which it was detached. + + + + All the actions except RENAME, - SET TABLESPACE and SET SCHEMA - can be combined into + SET TABLESPACE, SET SCHEMA, + ATTACH PARTITION, and + DETACH PARTITION can be combined into a list of multiple alterations to apply in parallel. For example, it is possible to add several columns and/or alter the type of several columns in a single command. This is particularly useful with large @@ -721,8 +781,9 @@ ALTER TABLE ALL IN TABLESPACE name You must own the table to use ALTER TABLE. To change the schema or tablespace of a table, you must also have CREATE privilege on the new schema or tablespace. - To add the table as a new child of a parent table, you must own the - parent table as well. + To add the table as a new child of a parent table, you must own the parent + table as well. Also, to attach a table as a new partition of the table, + you must own the table being attached. To alter the owner, you must also be a direct or indirect member of the new owning role, and that role must have CREATE privilege on the table's schema. (These restrictions enforce that altering the owner @@ -938,6 +999,25 @@ ALTER TABLE ALL IN TABLESPACE name + + partition_name + + + The name of the table to attach as a new partition or to detach from this table. + + + + + + partition_bound_spec + + + The partition bound specification for a new partition. Refer to + for more details on the syntax of the same. + + + + @@ -977,6 +1057,11 @@ ALTER TABLE ALL IN TABLESPACE name but does not require a table rewrite. + + Similarly, when attaching a new partition it may be scanned to verify that + existing rows meet the partition constraint. + + The main reason for providing the option to specify multiple changes in a single ALTER TABLE is that multiple table scans or @@ -1047,6 +1132,9 @@ ALTER TABLE ALL IN TABLESPACE name COLUMN (i.e., ALTER TABLE ONLY ... DROP COLUMN) never removes any descendant columns, but instead marks them as independently defined rather than inherited. + A nonrecursive DROP COLUMN command will fail for a + partitioned table, because all partitions of a table must have the same + columns as the partitioning root. @@ -1233,6 +1321,27 @@ ALTER TABLE distributors DROP CONSTRAINT distributors_pkey, ADD CONSTRAINT distributors_pkey PRIMARY KEY USING INDEX dist_id_temp_idx; + + Attach a partition to range partitioned table: + +ALTER TABLE measurement + ATTACH PARTITION measurement_y2016m07 FOR VALUES FROM ('2016-07-01') TO ('2016-08-01'); + + + + Attach a partition to list partitioned table: + +ALTER TABLE cities + ATTACH PARTITION cities_west FOR VALUES IN ('Los Angeles', 'San Francisco'); + + + + Detach a partition from partitioned table: + +ALTER TABLE cities + DETACH PARTITION measurement_y2015m12; + + diff --git a/doc/src/sgml/ref/create_foreign_table.sgml b/doc/src/sgml/ref/create_foreign_table.sgml index 413b033cb5..5d0dcf567b 100644 --- a/doc/src/sgml/ref/create_foreign_table.sgml +++ b/doc/src/sgml/ref/create_foreign_table.sgml @@ -27,6 +27,15 @@ CREATE FOREIGN TABLE [ IF NOT EXISTS ] table_name SERVER server_name [ OPTIONS ( option 'value' [, ... ] ) ] +CREATE FOREIGN TABLE [ IF NOT EXISTS ] table_name + PARTITION OF parent_table [ ( + { column_name WITH OPTIONS [ column_constraint [ ... ] ] + | table_constraint } + [, ... ] +) ] partition_bound_spec + SERVER server_name +[ OPTIONS ( option 'value' [, ... ] ) ] + where column_constraint is: [ CONSTRAINT constraint_name ] @@ -67,6 +76,12 @@ CHECK ( expression ) [ NO INHERIT ] name as any existing data type in the same schema. + + If PARTITION OF clause is specified then the table is + created as a partition of parent_table with specified + bounds. + + To be able to create a foreign table, you must have USAGE privilege on the foreign server, as well as USAGE @@ -314,6 +329,17 @@ CREATE FOREIGN TABLE films ( SERVER film_server; + + Create foreign table measurement_y2016m07, which will be + accessed through the server server_07, as a partition + of the range partitioned table measurement: + + +CREATE FOREIGN TABLE measurement_y2016m07 + PARTITION OF measurement FOR VALUES FROM ('2016-07-01') TO ('2016-08-01') + SERVER server_07; + + diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index bf2ad64d66..8bf8af302b 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -28,6 +28,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI [, ... ] ] ) [ INHERITS ( parent_table [, ... ] ) ] +[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -38,6 +39,18 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI | table_constraint } [, ... ] ) ] +[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] +[ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] +[ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] +[ TABLESPACE tablespace_name ] + +CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] table_name + PARTITION OF parent_table [ ( + { column_name [ column_constraint [ ... ] ] + | table_constraint } + [, ... ] +) ] FOR VALUES partition_bound_spec +[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ] [ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ] [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ] [ TABLESPACE tablespace_name ] @@ -70,6 +83,11 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI { INCLUDING | EXCLUDING } { DEFAULTS | CONSTRAINTS | INDEXES | STORAGE | COMMENTS | ALL } +and partition_bound_spec is: + +{ IN ( expression [, ...] ) | + FROM ( { expression | UNBOUNDED } [, ...] ) TO ( { expression | UNBOUNDED } [, ...] ) } + index_parameters in UNIQUE, PRIMARY KEY, and EXCLUDE constraints are: [ WITH ( storage_parameter [= value] [, ... ] ) ] @@ -229,6 +247,51 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI + + PARTITION OF parent_table + + + Creates the table as partition of the specified + parent table. + + + + The partition bound specification must correspond to the partitioning + method and partition key of the parent table, and must not overlap with + any existing partition of that parent. + + + + A partition cannot have columns other than those inherited from the + parent. That includes the oid column, which can be + specified using the WITH (OIDS) clause. + Defaults and constraints can optionally be specified for each of the + inherited columns. One can also specify table constraints in addition + to those inherited from the parent. If a check constraint with the name + matching one of the parent's constraint is specified, it is merged with + the latter, provided the specified condition is same. + + + + Rows inserted into a partitioned table will be automatically routed to + the correct partition. If no suitable partition exists, an error will + occur. + + + + A partition must have the same column names and types as the table of + which it is a partition. Therefore, modifications to the column names + or types of the partitioned table will automatically propagate to all + children, as will operations such as TRUNCATE which normally affect a + table and all of its inheritance children. It is also possible to + TRUNCATE a partition individually, just as for an inheritance child. + Note that dropping a partition with DROP TABLE + requires taking an ACCESS EXCLUSIVE lock on the + parent table. + + + + column_name @@ -313,6 +376,46 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI + + PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ opclass ] [, ...] ) + + + The optional PARTITION BY clause specifies a strategy + of partitioning the table. The table thus created is called a + partitioned table. The parenthesized list of + columns or expressions forms the partition key + for the table. When using range partitioning, the partition key can + include multiple columns or expressions, but for list partitioning, the + partition key must consist of a single column or expression. If no + btree operator class is specified when creating a partitioned table, + the default btree operator class for the datatype will be used. If + there is none, an error will be reported. + + + + A partitioned table is divided into sub-tables (called partitions), + which are created using separate CREATE TABLE commands. + The partitioned table is itself empty. A data row inserted into the + table is routed to a partition based on the value of columns or + expressions in the partition key. If no existing partition matches + the values in the new row, an error will be reported. + + + + Partitioned tables do not support UNIQUE, + PRIMARY KEY, EXCLUDE, or + FOREIGN KEY constraints; however, you can define + these constraints on individual partitions. + + + + When using range partitioning, a NOT NULL constraint + is added to each non-expression column in the partition key. + + + + + LIKE source_table [ like_option ... ] @@ -1368,6 +1471,57 @@ CREATE TABLE employees OF employee_type ( PRIMARY KEY (name), salary WITH OPTIONS DEFAULT 1000 ); + + + + Create a range partitioned table: + +CREATE TABLE measurement ( + city_id int not null, + logdate date not null, + peaktemp int, + unitsales int +) PARTITION BY RANGE (logdate); + + + + Create a list partitioned table: + +CREATE TABLE cities ( + name text not null, + population int, +) PARTITION BY LIST (initcap(name)); + + + + Create partition of a range partitioned table: + +CREATE TABLE measurement_y2016m07 + PARTITION OF measurement ( + unitsales WITH OPTIONS DEFAULT 0 +) FOR VALUES FROM ('2016-07-01') TO ('2016-08-01'); + + + + Create partition of a list partitioned table: + +CREATE TABLE cities_west + PARTITION OF cities ( + CONSTRAINT city_id_nonzero CHECK (city_id != 0) +) FOR VALUES IN ('Los Angeles', 'San Francisco'); + + + + Create partition of a list partitioned table that is itself further + partitioned and then add a partition to it: + +CREATE TABLE cities_west + PARTITION OF cities ( + CONSTRAINT city_id_nonzero CHECK (city_id != 0) +) FOR VALUES IN ('Los Angeles', 'San Francisco') PARTITION BY RANGE (population); + +CREATE TABLE cities_west_10000_to_100000 + PARTITION OF cities_west FOR VALUES FROM (10000) TO (100000); diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 83a97b06ab..34018cac7c 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -930,6 +930,7 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc, case RELKIND_RELATION: case RELKIND_TOASTVALUE: case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: options = heap_reloptions(classForm->relkind, datum, false); break; case RELKIND_VIEW: @@ -1381,6 +1382,7 @@ heap_reloptions(char relkind, Datum reloptions, bool validate) return (bytea *) rdopts; case RELKIND_RELATION: case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: return default_reloptions(reloptions, validate, RELOPT_KIND_HEAP); default: /* other relkinds are not supported */ diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index 1ce7610049..2d5ac09bec 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -11,7 +11,7 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global OBJS = catalog.o dependency.o heap.o index.o indexing.o namespace.o aclchk.o \ - objectaccess.o objectaddress.o pg_aggregate.o pg_collation.o \ + objectaccess.o objectaddress.o partition.o pg_aggregate.o pg_collation.o \ pg_constraint.o pg_conversion.o \ pg_depend.o pg_enum.o pg_inherits.o pg_largeobject.o pg_namespace.o \ pg_operator.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \ @@ -41,7 +41,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\ pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \ pg_foreign_table.h pg_policy.h pg_replication_origin.h \ pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \ - pg_collation.h pg_range.h pg_transform.h \ + pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \ toasting.h indexing.h \ ) diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index c0df6710d1..3086021432 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -768,6 +768,8 @@ objectsInSchemaToOids(GrantObjectType objtype, List *nspnames) objects = list_concat(objects, objs); objs = getRelationsInNamespace(namespaceId, RELKIND_FOREIGN_TABLE); objects = list_concat(objects, objs); + objs = getRelationsInNamespace(namespaceId, RELKIND_PARTITIONED_TABLE); + objects = list_concat(objects, objs); break; case ACL_OBJECT_SEQUENCE: objs = getRelationsInNamespace(namespaceId, RELKIND_SEQUENCE); diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index b697e88ef0..0cdd1c5c6c 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1352,7 +1352,8 @@ void recordDependencyOnSingleRelExpr(const ObjectAddress *depender, Node *expr, Oid relId, DependencyType behavior, - DependencyType self_behavior) + DependencyType self_behavior, + bool ignore_self) { find_expr_references_context context; RangeTblEntry rte; @@ -1407,9 +1408,10 @@ recordDependencyOnSingleRelExpr(const ObjectAddress *depender, context.addrs->numrefs = outrefs; /* Record the self-dependencies */ - recordMultipleDependencies(depender, - self_addrs->refs, self_addrs->numrefs, - self_behavior); + if (!ignore_self) + recordMultipleDependencies(depender, + self_addrs->refs, self_addrs->numrefs, + self_behavior); free_object_addresses(self_addrs); } diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 0b804e7ac6..7f5bad0b5d 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -41,6 +41,7 @@ #include "catalog/heap.h" #include "catalog/index.h" #include "catalog/objectaccess.h" +#include "catalog/partition.h" #include "catalog/pg_attrdef.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" @@ -48,6 +49,8 @@ #include "catalog/pg_foreign_table.h" #include "catalog/pg_inherits.h" #include "catalog/pg_namespace.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_partitioned_table.h" #include "catalog/pg_statistic.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" @@ -808,6 +811,7 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass); values[Anum_pg_class_relispopulated - 1] = BoolGetDatum(rd_rel->relispopulated); values[Anum_pg_class_relreplident - 1] = CharGetDatum(rd_rel->relreplident); + values[Anum_pg_class_relispartition - 1] = BoolGetDatum(rd_rel->relispartition); values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid); values[Anum_pg_class_relminmxid - 1] = MultiXactIdGetDatum(rd_rel->relminmxid); if (relacl != (Datum) 0) @@ -819,6 +823,9 @@ InsertPgClassTuple(Relation pg_class_desc, else nulls[Anum_pg_class_reloptions - 1] = true; + /* relpartbound is set by updating this tuple, if necessary */ + nulls[Anum_pg_class_relpartbound - 1] = true; + tup = heap_form_tuple(RelationGetDescr(pg_class_desc), values, nulls); /* @@ -924,6 +931,9 @@ AddNewRelationTuple(Relation pg_class_desc, new_rel_reltup->reltype = new_type_oid; new_rel_reltup->reloftype = reloftype; + /* relispartition is always set by updating this tuple later */ + new_rel_reltup->relispartition = false; + new_rel_desc->rd_att->tdtypeid = new_type_oid; /* Now build and insert the tuple */ @@ -1104,7 +1114,8 @@ heap_create_with_catalog(const char *relname, if (IsBinaryUpgrade && (relkind == RELKIND_RELATION || relkind == RELKIND_SEQUENCE || relkind == RELKIND_VIEW || relkind == RELKIND_MATVIEW || - relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE)) + relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE || + relkind == RELKIND_PARTITIONED_TABLE)) { if (!OidIsValid(binary_upgrade_next_heap_pg_class_oid)) ereport(ERROR, @@ -1138,6 +1149,7 @@ heap_create_with_catalog(const char *relname, case RELKIND_VIEW: case RELKIND_MATVIEW: case RELKIND_FOREIGN_TABLE: + case RELKIND_PARTITIONED_TABLE: relacl = get_user_default_acl(ACL_OBJECT_RELATION, ownerid, relnamespace); break; @@ -1182,7 +1194,8 @@ heap_create_with_catalog(const char *relname, relkind == RELKIND_VIEW || relkind == RELKIND_MATVIEW || relkind == RELKIND_FOREIGN_TABLE || - relkind == RELKIND_COMPOSITE_TYPE)) + relkind == RELKIND_COMPOSITE_TYPE || + relkind == RELKIND_PARTITIONED_TABLE)) new_array_oid = AssignTypeArrayOid(); /* @@ -1349,7 +1362,9 @@ heap_create_with_catalog(const char *relname, if (relpersistence == RELPERSISTENCE_UNLOGGED) { Assert(relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW || - relkind == RELKIND_TOASTVALUE); + relkind == RELKIND_TOASTVALUE || + relkind == RELKIND_PARTITIONED_TABLE); + heap_create_init_fork(new_rel_desc); } @@ -1754,12 +1769,29 @@ void heap_drop_with_catalog(Oid relid) { Relation rel; + Oid parentOid; + Relation parent = NULL; /* * Open and lock the relation. */ rel = relation_open(relid, AccessExclusiveLock); + /* + * If the relation is a partition, we must grab exclusive lock on its + * parent because we need to update its partition descriptor. We must + * take a table lock strong enough to prevent all queries on the parent + * from proceeding until we commit and send out a shared-cache-inval + * notice that will make them update their partition descriptor. + * Sometimes, doing this is cycles spent uselessly, especially if the + * parent will be dropped as part of the same command anyway. + */ + if (rel->rd_rel->relispartition) + { + parentOid = get_partition_parent(relid); + parent = heap_open(parentOid, AccessExclusiveLock); + } + /* * There can no longer be anyone *else* touching the relation, but we * might still have open queries or cursors, or pending trigger events, in @@ -1795,6 +1827,12 @@ heap_drop_with_catalog(Oid relid) heap_close(rel, RowExclusiveLock); } + /* + * If a partitioned table, delete the pg_partitioned_table tuple. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + RemovePartitionKeyByRelId(relid); + /* * Schedule unlinking of the relation's physical files at commit. */ @@ -1845,6 +1883,12 @@ heap_drop_with_catalog(Oid relid) * delete relation tuple */ DeleteRelationTuple(relid); + + if (parent) + { + CacheInvalidateRelcache(parent); + heap_close(parent, NoLock); /* keep the lock */ + } } @@ -2027,6 +2071,17 @@ StoreRelCheck(Relation rel, char *ccname, Node *expr, else attNos = NULL; + /* + * Partitioned tables do not contain any rows themselves, so a NO INHERIT + * constraint makes no sense. + */ + if (is_no_inherit && + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot add NO INHERIT constraint to partitioned table \"%s\"", + RelationGetRelationName(rel)))); + /* * Create the Check Constraint */ @@ -2440,8 +2495,11 @@ MergeWithExistingConstraint(Relation rel, char *ccname, Node *expr, * definition) then interpret addition of a local constraint as a * legal merge. This allows ALTER ADD CONSTRAINT on parent and * child tables to be given in either order with same end state. + * However if the relation is a partition, all inherited + * constraints are always non-local, including those that were + * merged. */ - if (is_local && !con->conislocal) + if (is_local && !con->conislocal && !rel->rd_rel->relispartition) allow_merge = true; if (!found || !allow_merge) @@ -2486,10 +2544,24 @@ MergeWithExistingConstraint(Relation rel, char *ccname, Node *expr, tup = heap_copytuple(tup); con = (Form_pg_constraint) GETSTRUCT(tup); - if (is_local) - con->conislocal = true; + /* + * In case of partitions, an inherited constraint must be + * inherited only once since it cannot have multiple parents and + * it is never considered local. + */ + if (rel->rd_rel->relispartition) + { + con->coninhcount = 1; + con->conislocal = false; + } else - con->coninhcount++; + { + if (is_local) + con->conislocal = true; + else + con->coninhcount++; + } + if (is_no_inherit) { Assert(is_local); @@ -3013,3 +3085,187 @@ insert_ordered_unique_oid(List *list, Oid datum) lappend_cell_oid(list, prev, datum); return list; } + +/* + * StorePartitionKey + * Store information about the partition key rel into the catalog + */ +void +StorePartitionKey(Relation rel, + char strategy, + int16 partnatts, + AttrNumber *partattrs, + List *partexprs, + Oid *partopclass, + Oid *partcollation) +{ + int i; + int2vector *partattrs_vec; + oidvector *partopclass_vec; + oidvector *partcollation_vec; + Datum partexprDatum; + Relation pg_partitioned_table; + HeapTuple tuple; + Datum values[Natts_pg_partitioned_table]; + bool nulls[Natts_pg_partitioned_table]; + ObjectAddress myself; + ObjectAddress referenced; + + Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + + tuple = SearchSysCache1(PARTRELID, + ObjectIdGetDatum(RelationGetRelid(rel))); + + /* Copy the partition attribute numbers, opclass OIDs into arrays */ + partattrs_vec = buildint2vector(partattrs, partnatts); + partopclass_vec = buildoidvector(partopclass, partnatts); + partcollation_vec = buildoidvector(partcollation, partnatts); + + /* Convert the expressions (if any) to a text datum */ + if (partexprs) + { + char *exprString; + + exprString = nodeToString(partexprs); + partexprDatum = CStringGetTextDatum(exprString); + pfree(exprString); + } + else + partexprDatum = (Datum) 0; + + pg_partitioned_table = heap_open(PartitionedRelationId, RowExclusiveLock); + + MemSet(nulls, false, sizeof(nulls)); + + /* Only this can ever be NULL */ + if (!partexprDatum) + nulls[Anum_pg_partitioned_table_partexprs - 1] = true; + + values[Anum_pg_partitioned_table_partrelid - 1] = ObjectIdGetDatum(RelationGetRelid(rel)); + values[Anum_pg_partitioned_table_partstrat - 1] = CharGetDatum(strategy); + values[Anum_pg_partitioned_table_partnatts - 1] = Int16GetDatum(partnatts); + values[Anum_pg_partitioned_table_partattrs - 1] = PointerGetDatum(partattrs_vec); + values[Anum_pg_partitioned_table_partclass - 1] = PointerGetDatum(partopclass_vec); + values[Anum_pg_partitioned_table_partcollation - 1] = PointerGetDatum(partcollation_vec); + values[Anum_pg_partitioned_table_partexprs - 1] = partexprDatum; + + tuple = heap_form_tuple(RelationGetDescr(pg_partitioned_table), values, nulls); + + simple_heap_insert(pg_partitioned_table, tuple); + + /* Update the indexes on pg_partitioned_table */ + CatalogUpdateIndexes(pg_partitioned_table, tuple); + heap_close(pg_partitioned_table, RowExclusiveLock); + + /* Mark this relation as dependent on a few things as follows */ + myself.classId = RelationRelationId; + myself.objectId = RelationGetRelid(rel);; + myself.objectSubId = 0; + + /* Operator class and collation per key column */ + for (i = 0; i < partnatts; i++) + { + referenced.classId = OperatorClassRelationId; + referenced.objectId = partopclass[i]; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + referenced.classId = CollationRelationId; + referenced.objectId = partcollation[i]; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } + + /* + * Anything mentioned in the expressions. We must ignore the column + * references, which will depend on the table itself; there is no + * separate partition key object. + */ + if (partexprs) + recordDependencyOnSingleRelExpr(&myself, + (Node *) partexprs, + RelationGetRelid(rel), + DEPENDENCY_NORMAL, + DEPENDENCY_AUTO, true); + + /* + * We must invalidate the relcache so that the next + * CommandCounterIncrement() will cause the same to be rebuilt using the + * information in just created catalog entry. + */ + CacheInvalidateRelcache(rel); +} + +/* + * RemovePartitionKeyByRelId + * Remove pg_partitioned_table entry for a relation + */ +void +RemovePartitionKeyByRelId(Oid relid) +{ + Relation rel; + HeapTuple tuple; + + rel = heap_open(PartitionedRelationId, RowExclusiveLock); + + tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for partition key of relation %u", + relid); + + simple_heap_delete(rel, &tuple->t_self); + + ReleaseSysCache(tuple); + heap_close(rel, RowExclusiveLock); +} + +/* + * StorePartitionBound + * Update pg_class tuple of rel to store the partition bound and set + * relispartition to true + */ +void +StorePartitionBound(Relation rel, Node *bound) +{ + Relation classRel; + HeapTuple tuple, + newtuple; + Datum new_val[Natts_pg_class]; + bool new_null[Natts_pg_class], + new_repl[Natts_pg_class]; + + /* Update pg_class tuple */ + classRel = heap_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(rel))); +#ifdef USE_ASSERT_CHECKING + { + Form_pg_class classForm; + bool isnull; + + classForm = (Form_pg_class) GETSTRUCT(tuple); + Assert(!classForm->relispartition); + (void) SysCacheGetAttr(RELOID, tuple, Anum_pg_class_relpartbound, + &isnull); + Assert(isnull); + } +#endif + + /* Fill in relpartbound value */ + memset(new_val, 0, sizeof(new_val)); + memset(new_null, false, sizeof(new_null)); + memset(new_repl, false, sizeof(new_repl)); + new_val[Anum_pg_class_relpartbound - 1] = CStringGetTextDatum(nodeToString(bound)); + new_null[Anum_pg_class_relpartbound - 1] = false; + new_repl[Anum_pg_class_relpartbound - 1] = true; + newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel), + new_val, new_null, new_repl); + /* Also set the flag */ + ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = true; + simple_heap_update(classRel, &newtuple->t_self, newtuple); + CatalogUpdateIndexes(classRel, newtuple); + heap_freetuple(newtuple); + heap_close(classRel, RowExclusiveLock); +} diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 08b646d8f3..08b0989112 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1043,7 +1043,7 @@ index_create(Relation heapRelation, (Node *) indexInfo->ii_Expressions, heapRelationId, DEPENDENCY_NORMAL, - DEPENDENCY_AUTO); + DEPENDENCY_AUTO, false); } /* Store dependencies on anything mentioned in predicate */ @@ -1053,7 +1053,7 @@ index_create(Relation heapRelation, (Node *) indexInfo->ii_Predicate, heapRelationId, DEPENDENCY_NORMAL, - DEPENDENCY_AUTO); + DEPENDENCY_AUTO, false); } } else diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index d531d17cdb..bb4b080b00 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -1204,7 +1204,8 @@ get_relation_by_qualified_name(ObjectType objtype, List *objname, RelationGetRelationName(relation)))); break; case OBJECT_TABLE: - if (relation->rd_rel->relkind != RELKIND_RELATION) + if (relation->rd_rel->relkind != RELKIND_RELATION && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", @@ -3244,6 +3245,7 @@ getRelationDescription(StringInfo buffer, Oid relid) switch (relForm->relkind) { case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: appendStringInfo(buffer, _("table %s"), relname); break; @@ -3701,6 +3703,7 @@ getRelationTypeDescription(StringInfo buffer, Oid relid, int32 objectSubId) switch (relForm->relkind) { case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: appendStringInfoString(buffer, "table"); break; case RELKIND_INDEX: diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c new file mode 100644 index 0000000000..6dab45f0ed --- /dev/null +++ b/src/backend/catalog/partition.c @@ -0,0 +1,1917 @@ +/*------------------------------------------------------------------------- + * + * partition.c + * Partitioning related data structures and functions. + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/catalog/partition.c + * + *------------------------------------------------------------------------- +*/ + +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/sysattr.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/objectaddress.h" +#include "catalog/partition.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_inherits_fn.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "nodes/parsenodes.h" +#include "optimizer/clauses.h" +#include "optimizer/planmain.h" +#include "optimizer/var.h" +#include "rewrite/rewriteManip.h" +#include "storage/lmgr.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/memutils.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/ruleutils.h" +#include "utils/syscache.h" + +/* + * Information about bounds of a partitioned relation + * + * A list partition datum that is known to be NULL is never put into the + * datums array. Instead, it is tracked using has_null and null_index fields. + * + * In the case of range partitioning, ndatums will typically be far less than + * 2 * nparts, because a partition's upper bound and the next partition's lower + * bound are the same in most common cases, and we only store one of them. + * + * In the case of list partitioning, the indexes array stores one entry for + * every datum, which is the index of the partition that accepts a given datum. + * In case of range partitioning, it stores one entry per distinct range + * datum, which is the index of the partition for which a given datum + * is an upper bound. + */ + +/* Ternary value to represent what's contained in a range bound datum */ +typedef enum RangeDatumContent +{ + RANGE_DATUM_FINITE = 0, /* actual datum stored elsewhere */ + RANGE_DATUM_NEG_INF, /* negative infinity */ + RANGE_DATUM_POS_INF /* positive infinity */ +} RangeDatumContent; + +typedef struct PartitionBoundInfoData +{ + char strategy; /* list or range bounds? */ + int ndatums; /* Length of the datums following array */ + Datum **datums; /* Array of datum-tuples with key->partnatts + * datums each */ + RangeDatumContent **content;/* what's contained in each range bound datum? + * (see the above enum); NULL for list + * partitioned tables */ + int *indexes; /* Partition indexes; one entry per member of + * the datums array (plus one if range + * partitioned table) */ + bool has_null; /* Is there a null-accepting partition? false + * for range partitioned tables */ + int null_index; /* Index of the null-accepting partition; -1 + * for range partitioned tables */ +} PartitionBoundInfoData; + +/* + * When qsort'ing partition bounds after reading from the catalog, each bound + * is represented with one of the following structs. + */ + +/* One value coming from some (index'th) list partition */ +typedef struct PartitionListValue +{ + int index; + Datum value; +} PartitionListValue; + +/* One bound of a range partition */ +typedef struct PartitionRangeBound +{ + int index; + Datum *datums; /* range bound datums */ + RangeDatumContent *content; /* what's contained in each datum? */ + bool lower; /* this is the lower (vs upper) bound */ +} PartitionRangeBound; + +static int32 qsort_partition_list_value_cmp(const void *a, const void *b, + void *arg); +static int32 qsort_partition_rbound_cmp(const void *a, const void *b, + void *arg); + +static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec); +static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec); +static Oid get_partition_operator(PartitionKey key, int col, + StrategyNumber strategy, bool *need_relabel); +static List *generate_partition_qual(Relation rel, bool recurse); + +static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index, + List *datums, bool lower); +static int32 partition_rbound_cmp(PartitionKey key, + Datum *datums1, RangeDatumContent *content1, bool lower1, + PartitionRangeBound *b2); +static int32 partition_rbound_datum_cmp(PartitionKey key, + Datum *rb_datums, RangeDatumContent *rb_content, + Datum *tuple_datums); + +static int32 partition_bound_cmp(PartitionKey key, + PartitionBoundInfo boundinfo, + int offset, void *probe, bool probe_is_bound); +static int partition_bound_bsearch(PartitionKey key, + PartitionBoundInfo boundinfo, + void *probe, bool probe_is_bound, bool *is_equal); + +/* Support get_partition_for_tuple() */ +static void FormPartitionKeyDatum(PartitionDispatch pd, + TupleTableSlot *slot, + EState *estate, + Datum *values, + bool *isnull); + +/* + * RelationBuildPartitionDesc + * Form rel's partition descriptor + * + * Not flushed from the cache by RelationClearRelation() unless changed because + * of addition or removal of partition. + */ +void +RelationBuildPartitionDesc(Relation rel) +{ + List *inhoids, + *partoids; + Oid *oids = NULL; + List *boundspecs = NIL; + ListCell *cell; + int i, + nparts; + PartitionKey key = RelationGetPartitionKey(rel); + PartitionDesc result; + MemoryContext oldcxt; + + int ndatums = 0; + + /* List partitioning specific */ + PartitionListValue **all_values = NULL; + bool found_null = false; + int null_index = -1; + + /* Range partitioning specific */ + PartitionRangeBound **rbounds = NULL; + + /* + * The following could happen in situations where rel has a pg_class entry + * but not the pg_partitioned_table entry yet. + */ + if (key == NULL) + return; + + /* Get partition oids from pg_inherits */ + inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock); + + /* Collect bound spec nodes in a list */ + i = 0; + partoids = NIL; + foreach(cell, inhoids) + { + Oid inhrelid = lfirst_oid(cell); + HeapTuple tuple; + Datum datum; + bool isnull; + Node *boundspec; + + tuple = SearchSysCache1(RELOID, inhrelid); + + /* + * It is possible that the pg_class tuple of a partition has not been + * updated yet to set its relpartbound field. The only case where + * this happens is when we open the parent relation to check using its + * partition descriptor that a new partition's bound does not overlap + * some existing partition. + */ + if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition) + { + ReleaseSysCache(tuple); + continue; + } + + datum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + Assert(!isnull); + boundspec = (Node *) stringToNode(TextDatumGetCString(datum)); + boundspecs = lappend(boundspecs, boundspec); + partoids = lappend_oid(partoids, inhrelid); + ReleaseSysCache(tuple); + } + + nparts = list_length(partoids); + + if (nparts > 0) + { + oids = (Oid *) palloc(nparts * sizeof(Oid)); + i = 0; + foreach(cell, partoids) + oids[i++] = lfirst_oid(cell); + + /* Convert from node to the internal representation */ + if (key->strategy == PARTITION_STRATEGY_LIST) + { + List *non_null_values = NIL; + + /* + * Create a unified list of non-null values across all partitions. + */ + i = 0; + found_null = false; + null_index = -1; + foreach(cell, boundspecs) + { + ListCell *c; + PartitionBoundSpec *spec = lfirst(cell); + + if (spec->strategy != PARTITION_STRATEGY_LIST) + elog(ERROR, "invalid strategy in partition bound spec"); + + foreach(c, spec->listdatums) + { + Const *val = lfirst(c); + PartitionListValue *list_value = NULL; + + if (!val->constisnull) + { + list_value = (PartitionListValue *) + palloc0(sizeof(PartitionListValue)); + list_value->index = i; + list_value->value = val->constvalue; + } + else + { + /* + * Never put a null into the values array, flag + * instead for the code further down below where we + * construct the actual relcache struct. + */ + if (found_null) + elog(ERROR, "found null more than once"); + found_null = true; + null_index = i; + } + + if (list_value) + non_null_values = lappend(non_null_values, + list_value); + } + + i++; + } + + ndatums = list_length(non_null_values); + + /* + * Collect all list values in one array. Alongside the value, we + * also save the index of partition the value comes from. + */ + all_values = (PartitionListValue **) palloc(ndatums * + sizeof(PartitionListValue *)); + i = 0; + foreach(cell, non_null_values) + { + PartitionListValue *src = lfirst(cell); + + all_values[i] = (PartitionListValue *) + palloc(sizeof(PartitionListValue)); + all_values[i]->value = src->value; + all_values[i]->index = src->index; + i++; + } + + qsort_arg(all_values, ndatums, sizeof(PartitionListValue *), + qsort_partition_list_value_cmp, (void *) key); + } + else if (key->strategy == PARTITION_STRATEGY_RANGE) + { + int j, + k; + PartitionRangeBound **all_bounds, + *prev; + bool *distinct_indexes; + + all_bounds = (PartitionRangeBound **) palloc0(2 * nparts * + sizeof(PartitionRangeBound *)); + distinct_indexes = (bool *) palloc(2 * nparts * sizeof(bool)); + + /* + * Create a unified list of range bounds across all the + * partitions. + */ + i = j = 0; + foreach(cell, boundspecs) + { + PartitionBoundSpec *spec = lfirst(cell); + PartitionRangeBound *lower, + *upper; + + if (spec->strategy != PARTITION_STRATEGY_RANGE) + elog(ERROR, "invalid strategy in partition bound spec"); + + lower = make_one_range_bound(key, i, spec->lowerdatums, + true); + upper = make_one_range_bound(key, i, spec->upperdatums, + false); + all_bounds[j] = lower; + all_bounds[j + 1] = upper; + j += 2; + i++; + } + Assert(j == 2 * nparts); + + /* Sort all the bounds in ascending order */ + qsort_arg(all_bounds, 2 * nparts, + sizeof(PartitionRangeBound *), + qsort_partition_rbound_cmp, + (void *) key); + + /* + * Count the number of distinct bounds to allocate an array of + * that size. + */ + ndatums = 0; + prev = NULL; + for (i = 0; i < 2 * nparts; i++) + { + PartitionRangeBound *cur = all_bounds[i]; + bool is_distinct = false; + int j; + + /* Is current bound is distinct from the previous? */ + for (j = 0; j < key->partnatts; j++) + { + Datum cmpval; + + if (prev == NULL) + { + is_distinct = true; + break; + } + + /* + * If either of them has infinite element, we can't equate + * them. Even when both are infinite, they'd have + * opposite signs, because only one of cur and prev is a + * lower bound). + */ + if (cur->content[j] != RANGE_DATUM_FINITE || + prev->content[j] != RANGE_DATUM_FINITE) + { + is_distinct = true; + break; + } + cmpval = FunctionCall2Coll(&key->partsupfunc[j], + key->partcollation[j], + cur->datums[j], + prev->datums[j]); + if (DatumGetInt32(cmpval) != 0) + { + is_distinct = true; + break; + } + } + + /* + * Count the current bound if it is distinct from the previous + * one. Also, store if the index i contains a distinct bound + * that we'd like put in the relcache array. + */ + if (is_distinct) + { + distinct_indexes[i] = true; + ndatums++; + } + else + distinct_indexes[i] = false; + + prev = cur; + } + + /* + * Finally save them in an array from where they will be copied + * into the relcache. + */ + rbounds = (PartitionRangeBound **) palloc(ndatums * + sizeof(PartitionRangeBound *)); + k = 0; + for (i = 0; i < 2 * nparts; i++) + { + if (distinct_indexes[i]) + rbounds[k++] = all_bounds[i]; + } + Assert(k == ndatums); + } + else + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + /* Now build the actual relcache partition descriptor */ + rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext, + RelationGetRelationName(rel), + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt); + + result = (PartitionDescData *) palloc0(sizeof(PartitionDescData)); + result->nparts = nparts; + if (nparts > 0) + { + PartitionBoundInfo boundinfo; + int *mapping; + int next_index = 0; + + result->oids = (Oid *) palloc0(nparts * sizeof(Oid)); + + boundinfo = (PartitionBoundInfoData *) + palloc0(sizeof(PartitionBoundInfoData)); + boundinfo->strategy = key->strategy; + boundinfo->ndatums = ndatums; + boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); + + /* Initialize mapping array with invalid values */ + mapping = (int *) palloc(sizeof(int) * nparts); + for (i = 0; i < nparts; i++) + mapping[i] = -1; + + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + { + boundinfo->has_null = found_null; + boundinfo->indexes = (int *) palloc(ndatums * sizeof(int)); + + /* + * Copy values. Indexes of individual values are mapped + * to canonical values so that they match for any two list + * partitioned tables with same number of partitions and + * same lists per partition. One way to canonicalize is + * to assign the index in all_values[] of the smallest + * value of each partition, as the index of all of the + * partition's values. + */ + for (i = 0; i < ndatums; i++) + { + boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum)); + boundinfo->datums[i][0] = datumCopy(all_values[i]->value, + key->parttypbyval[0], + key->parttyplen[0]); + + /* If the old index has no mapping, assign one */ + if (mapping[all_values[i]->index] == -1) + mapping[all_values[i]->index] = next_index++; + + boundinfo->indexes[i] = mapping[all_values[i]->index]; + } + + /* + * If null-accepting partition has no mapped index yet, + * assign one. This could happen if such partition + * accepts only null and hence not covered in the above + * loop which only handled non-null values. + */ + if (found_null) + { + Assert(null_index >= 0); + if (mapping[null_index] == -1) + mapping[null_index] = next_index++; + } + + /* All partition must now have a valid mapping */ + Assert(next_index == nparts); + + if (found_null) + boundinfo->null_index = mapping[null_index]; + else + boundinfo->null_index = -1; + break; + } + + case PARTITION_STRATEGY_RANGE: + { + boundinfo->content = (RangeDatumContent **) palloc(ndatums * + sizeof(RangeDatumContent *)); + boundinfo->indexes = (int *) palloc((ndatums + 1) * + sizeof(int)); + + for (i = 0; i < ndatums; i++) + { + int j; + + boundinfo->datums[i] = (Datum *) palloc(key->partnatts * + sizeof(Datum)); + boundinfo->content[i] = (RangeDatumContent *) + palloc(key->partnatts * + sizeof(RangeDatumContent)); + for (j = 0; j < key->partnatts; j++) + { + if (rbounds[i]->content[j] == RANGE_DATUM_FINITE) + boundinfo->datums[i][j] = + datumCopy(rbounds[i]->datums[j], + key->parttypbyval[j], + key->parttyplen[j]); + /* Remember, we are storing the tri-state value. */ + boundinfo->content[i][j] = rbounds[i]->content[j]; + } + + /* + * There is no mapping for invalid indexes. + * + * Any lower bounds in the rbounds array have invalid + * indexes assigned, because the values between the + * previous bound (if there is one) and this (lower) + * bound are not part of the range of any existing + * partition. + */ + if (rbounds[i]->lower) + boundinfo->indexes[i] = -1; + else + { + int orig_index = rbounds[i]->index; + + /* If the old index is has no mapping, assign one */ + if (mapping[orig_index] == -1) + mapping[orig_index] = next_index++; + + boundinfo->indexes[i] = mapping[orig_index]; + } + } + boundinfo->indexes[i] = -1; + break; + } + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + result->boundinfo = boundinfo; + + /* + * Now assign OIDs from the original array into mapped indexes of the + * result array. Order of OIDs in the former is defined by the + * catalog scan that retrived them, whereas that in the latter is + * defined by canonicalized representation of the list values or the + * range bounds. + */ + for (i = 0; i < nparts; i++) + result->oids[mapping[i]] = oids[i]; + pfree(mapping); + } + + MemoryContextSwitchTo(oldcxt); + rel->rd_partdesc = result; +} + +/* + * Are two partition bound collections logically equal? + * + * Used in the keep logic of relcache.c (ie, in RelationClearRelation()). + * This is also useful when b1 and b2 are bound collections of two separate + * relations, respectively, because PartitionBoundInfo is a canonical + * representation of partition bounds. + */ +bool +partition_bounds_equal(PartitionKey key, + PartitionBoundInfo b1, PartitionBoundInfo b2) +{ + int i; + + if (b1->strategy != b2->strategy) + return false; + + if (b1->ndatums != b2->ndatums) + return false; + + if (b1->has_null != b2->has_null) + return false; + + if (b1->null_index != b2->null_index) + return false; + + for (i = 0; i < b1->ndatums; i++) + { + int j; + + for (j = 0; j < key->partnatts; j++) + { + int32 cmpval; + + cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[j], + key->partcollation[j], + b1->datums[i][j], + b2->datums[i][j])); + if (cmpval != 0) + return false; + + /* Range partitions can have infinite datums */ + if (b1->content != NULL && b1->content[i][j] != b2->content[i][j]) + return false; + } + + if (b1->indexes[i] != b2->indexes[i]) + return false; + } + + /* There are ndatums+1 indexes in case of range partitions */ + if (key->strategy == PARTITION_STRATEGY_RANGE && + b1->indexes[i] != b2->indexes[i]) + return false; + + return true; +} + +/* + * check_new_partition_bound + * + * Checks if the new partition's bound overlaps any of the existing partitions + * of parent. Also performs additional checks as necessary per strategy. + */ +void +check_new_partition_bound(char *relname, Relation parent, Node *bound) +{ + PartitionBoundSpec *spec = (PartitionBoundSpec *) bound; + PartitionKey key = RelationGetPartitionKey(parent); + PartitionDesc partdesc = RelationGetPartitionDesc(parent); + ParseState *pstate = make_parsestate(NULL); + int with = -1; + bool overlap = false; + + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + { + Assert(spec->strategy == PARTITION_STRATEGY_LIST); + + if (partdesc->nparts > 0) + { + PartitionBoundInfo boundinfo = partdesc->boundinfo; + ListCell *cell; + + Assert(boundinfo && + boundinfo->strategy == PARTITION_STRATEGY_LIST && + (boundinfo->ndatums > 0 || boundinfo->has_null)); + + foreach(cell, spec->listdatums) + { + Const *val = lfirst(cell); + + if (!val->constisnull) + { + int offset; + bool equal; + + offset = partition_bound_bsearch(key, boundinfo, + &val->constvalue, + true, &equal); + if (offset >= 0 && equal) + { + overlap = true; + with = boundinfo->indexes[offset]; + break; + } + } + else if (boundinfo->has_null) + { + overlap = true; + with = boundinfo->null_index; + break; + } + } + } + + break; + } + + case PARTITION_STRATEGY_RANGE: + { + PartitionRangeBound *lower, + *upper; + + Assert(spec->strategy == PARTITION_STRATEGY_RANGE); + lower = make_one_range_bound(key, -1, spec->lowerdatums, true); + upper = make_one_range_bound(key, -1, spec->upperdatums, false); + + /* + * First check if the resulting range would be empty with + * specified lower and upper bounds + */ + if (partition_rbound_cmp(key, lower->datums, lower->content, true, + upper) >= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot create range partition with empty range"), + parser_errposition(pstate, spec->location))); + + if (partdesc->nparts > 0) + { + PartitionBoundInfo boundinfo = partdesc->boundinfo; + int off1, + off2; + bool equal = false; + + Assert(boundinfo && boundinfo->ndatums > 0 && + boundinfo->strategy == PARTITION_STRATEGY_RANGE); + + /* + * Find the greatest index of a range bound that is less + * than or equal with the new lower bound. + */ + off1 = partition_bound_bsearch(key, boundinfo, lower, true, + &equal); + + /* + * If equal has been set to true, that means the new lower + * bound is found to be equal with the bound at off1, + * which clearly means an overlap with the partition at + * index off1+1). + * + * Otherwise, check if there is a "gap" that could be + * occupied by the new partition. In case of a gap, the + * new upper bound should not cross past the upper + * boundary of the gap, that is, off2 == off1 should be + * true. + */ + if (!equal && boundinfo->indexes[off1 + 1] < 0) + { + off2 = partition_bound_bsearch(key, boundinfo, upper, + true, &equal); + + if (equal || off1 != off2) + { + overlap = true; + with = boundinfo->indexes[off2 + 1]; + } + } + else + { + overlap = true; + with = boundinfo->indexes[off1 + 1]; + } + } + + break; + } + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + if (overlap) + { + Assert(with >= 0); + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("partition \"%s\" would overlap partition \"%s\"", + relname, get_rel_name(partdesc->oids[with])), + parser_errposition(pstate, spec->location))); + } +} + +/* + * get_partition_parent + * + * Returns inheritance parent of a partition by scanning pg_inherits + * + * Note: Because this function assumes that the relation whose OID is passed + * as an argument will have precisely one parent, it should only be called + * when it is known that the relation is a partition. + */ +Oid +get_partition_parent(Oid relid) +{ + Form_pg_inherits form; + Relation catalogRelation; + SysScanDesc scan; + ScanKeyData key[2]; + HeapTuple tuple; + Oid result; + + catalogRelation = heap_open(InheritsRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + ScanKeyInit(&key[1], + Anum_pg_inherits_inhseqno, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(1)); + + scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true, + NULL, 2, key); + + tuple = systable_getnext(scan); + Assert(HeapTupleIsValid(tuple)); + + form = (Form_pg_inherits) GETSTRUCT(tuple); + result = form->inhparent; + + systable_endscan(scan); + heap_close(catalogRelation, AccessShareLock); + + return result; +} + +/* + * get_qual_from_partbound + * Given a parser node for partition bound, return the list of executable + * expressions as partition constraint + */ +List * +get_qual_from_partbound(Relation rel, Relation parent, Node *bound) +{ + PartitionBoundSpec *spec = (PartitionBoundSpec *) bound; + PartitionKey key = RelationGetPartitionKey(parent); + List *my_qual = NIL; + TupleDesc parent_tupdesc = RelationGetDescr(parent); + AttrNumber parent_attno; + AttrNumber *partition_attnos; + bool found_whole_row; + + Assert(key != NULL); + + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + Assert(spec->strategy == PARTITION_STRATEGY_LIST); + my_qual = get_qual_for_list(key, spec); + break; + + case PARTITION_STRATEGY_RANGE: + Assert(spec->strategy == PARTITION_STRATEGY_RANGE); + my_qual = get_qual_for_range(key, spec); + break; + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + /* + * Translate vars in the generated expression to have correct attnos. Note + * that the vars in my_qual bear attnos dictated by key which carries + * physical attnos of the parent. We must allow for a case where physical + * attnos of a partition can be different from the parent. + */ + partition_attnos = (AttrNumber *) + palloc0(parent_tupdesc->natts * sizeof(AttrNumber)); + for (parent_attno = 1; parent_attno <= parent_tupdesc->natts; + parent_attno++) + { + Form_pg_attribute attribute = parent_tupdesc->attrs[parent_attno - 1]; + char *attname = NameStr(attribute->attname); + AttrNumber partition_attno; + + if (attribute->attisdropped) + continue; + + partition_attno = get_attnum(RelationGetRelid(rel), attname); + partition_attnos[parent_attno - 1] = partition_attno; + } + + my_qual = (List *) map_variable_attnos((Node *) my_qual, + 1, 0, + partition_attnos, + parent_tupdesc->natts, + &found_whole_row); + /* there can never be a whole-row reference here */ + if (found_whole_row) + elog(ERROR, "unexpected whole-row reference found in partition key"); + + return my_qual; +} + +/* + * RelationGetPartitionQual + * + * Returns a list of partition quals + */ +List * +RelationGetPartitionQual(Relation rel, bool recurse) +{ + /* Quick exit */ + if (!rel->rd_rel->relispartition) + return NIL; + + return generate_partition_qual(rel, recurse); +} + +/* Turn an array of OIDs with N elements into a list */ +#define OID_ARRAY_TO_LIST(arr, N, list) \ + do\ + {\ + int i;\ + for (i = 0; i < (N); i++)\ + (list) = lappend_oid((list), (arr)[i]);\ + } while(0) + +/* + * RelationGetPartitionDispatchInfo + * Returns information necessary to route tuples down a partition tree + * + * All the partitions will be locked with lockmode, unless it is NoLock. + * A list of the OIDs of all the leaf partition of rel is returned in + * *leaf_part_oids. + */ +PartitionDispatch * +RelationGetPartitionDispatchInfo(Relation rel, int lockmode, + int *num_parted, List **leaf_part_oids) +{ + PartitionDesc rootpartdesc = RelationGetPartitionDesc(rel); + PartitionDispatchData **pd; + List *all_parts = NIL, + *parted_rels; + ListCell *lc; + int i, + k; + + /* + * Lock partitions and make a list of the partitioned ones to prepare + * their PartitionDispatch objects below. + * + * Cannot use find_all_inheritors() here, because then the order of OIDs + * in parted_rels list would be unknown, which does not help, because we + * we assign indexes within individual PartitionDispatch in an order that + * is predetermined (determined by the order of OIDs in individual + * partition descriptors). + */ + *num_parted = 1; + parted_rels = list_make1(rel); + OID_ARRAY_TO_LIST(rootpartdesc->oids, rootpartdesc->nparts, all_parts); + foreach(lc, all_parts) + { + Relation partrel = heap_open(lfirst_oid(lc), lockmode); + PartitionDesc partdesc = RelationGetPartitionDesc(partrel); + + /* + * If this partition is a partitioned table, add its children to the + * end of the list, so that they are processed as well. + */ + if (partdesc) + { + (*num_parted)++; + parted_rels = lappend(parted_rels, partrel); + OID_ARRAY_TO_LIST(partdesc->oids, partdesc->nparts, all_parts); + } + else + heap_close(partrel, NoLock); + + /* + * We keep the partitioned ones open until we're done using the + * information being collected here (for example, see + * ExecEndModifyTable). + */ + } + + /* Generate PartitionDispatch objects for all partitioned tables */ + pd = (PartitionDispatchData **) palloc(*num_parted * + sizeof(PartitionDispatchData *)); + *leaf_part_oids = NIL; + i = k = 0; + foreach(lc, parted_rels) + { + Relation partrel = lfirst(lc); + PartitionKey partkey = RelationGetPartitionKey(partrel); + PartitionDesc partdesc = RelationGetPartitionDesc(partrel); + int j, + m; + + pd[i] = (PartitionDispatch) palloc(sizeof(PartitionDispatchData)); + pd[i]->reldesc = partrel; + pd[i]->key = partkey; + pd[i]->keystate = NIL; + pd[i]->partdesc = partdesc; + pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int)); + + m = 0; + for (j = 0; j < partdesc->nparts; j++) + { + Oid partrelid = partdesc->oids[j]; + + if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE) + { + *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid); + pd[i]->indexes[j] = k++; + } + else + { + /* + * We can assign indexes this way because of the way + * parted_rels has been generated. + */ + pd[i]->indexes[j] = -(i + 1 + m); + m++; + } + } + i++; + } + + return pd; +} + +/* Module-local functions */ + +/* + * get_qual_for_list + * + * Returns a list of expressions to use as a list partition's constraint. + */ +static List * +get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec) +{ + List *result; + ArrayExpr *arr; + ScalarArrayOpExpr *opexpr; + ListCell *cell, + *prev, + *next; + Node *keyCol; + Oid operoid; + bool need_relabel, + list_has_null = false; + NullTest *nulltest1 = NULL, + *nulltest2 = NULL; + + /* Left operand is either a simple Var or arbitrary expression */ + if (key->partattrs[0] != 0) + keyCol = (Node *) makeVar(1, + key->partattrs[0], + key->parttypid[0], + key->parttypmod[0], + key->parttypcoll[0], + 0); + else + keyCol = (Node *) copyObject(linitial(key->partexprs)); + + /* + * We must remove any NULL value in the list; we handle it separately + * below. + */ + prev = NULL; + for (cell = list_head(spec->listdatums); cell; cell = next) + { + Const *val = (Const *) lfirst(cell); + + next = lnext(cell); + + if (val->constisnull) + { + list_has_null = true; + spec->listdatums = list_delete_cell(spec->listdatums, + cell, prev); + } + else + prev = cell; + } + + if (!list_has_null) + { + /* + * Gin up a col IS NOT NULL test that will be AND'd with other + * expressions + */ + nulltest1 = makeNode(NullTest); + nulltest1->arg = (Expr *) keyCol; + nulltest1->nulltesttype = IS_NOT_NULL; + nulltest1->argisrow = false; + nulltest1->location = -1; + } + else + { + /* + * Gin up a col IS NULL test that will be OR'd with other expressions + */ + nulltest2 = makeNode(NullTest); + nulltest2->arg = (Expr *) keyCol; + nulltest2->nulltesttype = IS_NULL; + nulltest2->argisrow = false; + nulltest2->location = -1; + } + + /* Right operand is an ArrayExpr containing this partition's values */ + arr = makeNode(ArrayExpr); + arr->array_typeid = !type_is_array(key->parttypid[0]) + ? get_array_type(key->parttypid[0]) + : key->parttypid[0]; + arr->array_collid = key->parttypcoll[0]; + arr->element_typeid = key->parttypid[0]; + arr->elements = spec->listdatums; + arr->multidims = false; + arr->location = -1; + + /* Get the correct btree equality operator */ + operoid = get_partition_operator(key, 0, BTEqualStrategyNumber, + &need_relabel); + if (need_relabel || key->partcollation[0] != key->parttypcoll[0]) + keyCol = (Node *) makeRelabelType((Expr *) keyCol, + key->partopcintype[0], + -1, + key->partcollation[0], + COERCE_EXPLICIT_CAST); + + /* Build leftop = ANY (rightop) */ + opexpr = makeNode(ScalarArrayOpExpr); + opexpr->opno = operoid; + opexpr->opfuncid = get_opcode(operoid); + opexpr->useOr = true; + opexpr->inputcollid = key->partcollation[0]; + opexpr->args = list_make2(keyCol, arr); + opexpr->location = -1; + + if (nulltest1) + result = list_make2(nulltest1, opexpr); + else if (nulltest2) + { + Expr *or; + + or = makeBoolExpr(OR_EXPR, list_make2(nulltest2, opexpr), -1); + result = list_make1(or); + } + else + result = list_make1(opexpr); + + return result; +} + +/* + * get_qual_for_range + * + * Get a list of OpExpr's to use as a range partition's constraint. + */ +static List * +get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec) +{ + List *result = NIL; + ListCell *cell1, + *cell2, + *partexprs_item; + int i; + + /* + * Iterate over columns of the key, emitting an OpExpr for each using the + * corresponding lower and upper datums as constant operands. + */ + i = 0; + partexprs_item = list_head(key->partexprs); + forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums) + { + PartitionRangeDatum *ldatum = lfirst(cell1), + *udatum = lfirst(cell2); + Node *keyCol; + Const *lower_val = NULL, + *upper_val = NULL; + EState *estate; + MemoryContext oldcxt; + Expr *test_expr; + ExprState *test_exprstate; + Datum test_result; + bool isNull; + bool need_relabel = false; + Oid operoid; + NullTest *nulltest; + + /* Left operand */ + if (key->partattrs[i] != 0) + { + keyCol = (Node *) makeVar(1, + key->partattrs[i], + key->parttypid[i], + key->parttypmod[i], + key->parttypcoll[i], + 0); + } + else + { + keyCol = (Node *) copyObject(lfirst(partexprs_item)); + partexprs_item = lnext(partexprs_item); + } + + /* + * Emit a IS NOT NULL expression for non-Var keys, because whereas + * simple attributes are covered by NOT NULL constraints, expression + * keys are still nullable which is not acceptable in case of range + * partitioning. + */ + if (!IsA(keyCol, Var)) + { + nulltest = makeNode(NullTest); + nulltest->arg = (Expr *) keyCol; + nulltest->nulltesttype = IS_NOT_NULL; + nulltest->argisrow = false; + nulltest->location = -1; + result = lappend(result, nulltest); + } + + /* + * Stop at this column if either of lower or upper datum is infinite, + * but do emit an OpExpr for the non-infinite datum. + */ + if (!ldatum->infinite) + lower_val = (Const *) ldatum->value; + if (!udatum->infinite) + upper_val = (Const *) udatum->value; + + /* + * If lower_val and upper_val are both finite and happen to be equal, + * emit only (keyCol = lower_val) for this column, because all rows in + * this partition could only ever contain this value (ie, lower_val) + * in the current partitioning column. We must consider further + * columns because the above condition does not fully constrain the + * rows of this partition. + */ + if (lower_val && upper_val) + { + /* Get the correct btree equality operator for the test */ + operoid = get_partition_operator(key, i, BTEqualStrategyNumber, + &need_relabel); + + /* Create the test expression */ + estate = CreateExecutorState(); + oldcxt = MemoryContextSwitchTo(estate->es_query_cxt); + test_expr = make_opclause(operoid, + BOOLOID, + false, + (Expr *) lower_val, + (Expr *) upper_val, + InvalidOid, + key->partcollation[i]); + fix_opfuncids((Node *) test_expr); + test_exprstate = ExecInitExpr(test_expr, NULL); + test_result = ExecEvalExprSwitchContext(test_exprstate, + GetPerTupleExprContext(estate), + &isNull, NULL); + MemoryContextSwitchTo(oldcxt); + FreeExecutorState(estate); + + if (DatumGetBool(test_result)) + { + /* This can never be, but it's better to make sure */ + if (i == key->partnatts - 1) + elog(ERROR, "invalid range bound specification"); + + if (need_relabel || key->partcollation[i] != key->parttypcoll[i]) + keyCol = (Node *) makeRelabelType((Expr *) keyCol, + key->partopcintype[i], + -1, + key->partcollation[i], + COERCE_EXPLICIT_CAST); + result = lappend(result, + make_opclause(operoid, + BOOLOID, + false, + (Expr *) keyCol, + (Expr *) lower_val, + InvalidOid, + key->partcollation[i])); + + /* Go over to consider the next column. */ + i++; + continue; + } + } + + /* + * We can say here that lower_val != upper_val. Emit expressions + * (keyCol >= lower_val) and (keyCol < upper_val), then stop. + */ + if (lower_val) + { + operoid = get_partition_operator(key, i, + BTGreaterEqualStrategyNumber, + &need_relabel); + + if (need_relabel || key->partcollation[i] != key->parttypcoll[i]) + keyCol = (Node *) makeRelabelType((Expr *) keyCol, + key->partopcintype[i], + -1, + key->partcollation[i], + COERCE_EXPLICIT_CAST); + result = lappend(result, + make_opclause(operoid, + BOOLOID, + false, + (Expr *) keyCol, + (Expr *) lower_val, + InvalidOid, + key->partcollation[i])); + } + + if (upper_val) + { + operoid = get_partition_operator(key, i, + BTLessStrategyNumber, + &need_relabel); + + if (need_relabel || key->partcollation[i] != key->parttypcoll[i]) + keyCol = (Node *) makeRelabelType((Expr *) keyCol, + key->partopcintype[i], + -1, + key->partcollation[i], + COERCE_EXPLICIT_CAST); + + result = lappend(result, + make_opclause(operoid, + BOOLOID, + false, + (Expr *) keyCol, + (Expr *) upper_val, + InvalidOid, + key->partcollation[i])); + } + + /* + * We can stop at this column, because we would not have checked the + * next column when routing a given row into this partition. + */ + break; + } + + return result; +} + +/* + * get_partition_operator + * + * Return oid of the operator of given strategy for a given partition key + * column. + */ +static Oid +get_partition_operator(PartitionKey key, int col, StrategyNumber strategy, + bool *need_relabel) +{ + Oid operoid; + + /* + * First check if there exists an operator of the given strategy, with + * this column's type as both its lefttype and righttype, in the + * partitioning operator family specified for the column. + */ + operoid = get_opfamily_member(key->partopfamily[col], + key->parttypid[col], + key->parttypid[col], + strategy); + + /* + * If one doesn't exist, we must resort to using an operator in the same + * opreator family but with the operator class declared input type. It is + * OK to do so, because the column's type is known to be binary-coercible + * with the operator class input type (otherwise, the operator class in + * question would not have been accepted as the partitioning operator + * class). We must however inform the caller to wrap the non-Const + * expression with a RelabelType node to denote the implicit coercion. It + * ensures that the resulting expression structurally matches similarly + * processed expressions within the optimizer. + */ + if (!OidIsValid(operoid)) + { + operoid = get_opfamily_member(key->partopfamily[col], + key->partopcintype[col], + key->partopcintype[col], + strategy); + *need_relabel = true; + } + else + *need_relabel = false; + + if (!OidIsValid(operoid)) + elog(ERROR, "could not find operator for partitioning"); + + return operoid; +} + +/* + * generate_partition_qual + * + * Generate partition predicate from rel's partition bound expression + * + * Result expression tree is stored CacheMemoryContext to ensure it survives + * as long as the relcache entry. But we should be running in a less long-lived + * working context. To avoid leaking cache memory if this routine fails partway + * through, we build in working memory and then copy the completed structure + * into cache memory. + */ +static List * +generate_partition_qual(Relation rel, bool recurse) +{ + HeapTuple tuple; + MemoryContext oldcxt; + Datum boundDatum; + bool isnull; + Node *bound; + List *my_qual = NIL, + *result = NIL; + Relation parent; + + /* Guard against stack overflow due to overly deep partition tree */ + check_stack_depth(); + + /* Grab at least an AccessShareLock on the parent table */ + parent = heap_open(get_partition_parent(RelationGetRelid(rel)), + AccessShareLock); + + /* Quick copy */ + if (rel->rd_partcheck) + { + if (parent->rd_rel->relispartition && recurse) + result = list_concat(generate_partition_qual(parent, true), + copyObject(rel->rd_partcheck)); + else + result = copyObject(rel->rd_partcheck); + + heap_close(parent, AccessShareLock); + return result; + } + + /* Get pg_class.relpartbound */ + if (!rel->rd_rel->relispartition) /* should not happen */ + elog(ERROR, "relation \"%s\" has relispartition = false", + RelationGetRelationName(rel)); + tuple = SearchSysCache1(RELOID, RelationGetRelid(rel)); + boundDatum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + if (isnull) /* should not happen */ + elog(ERROR, "relation \"%s\" has relpartbound = null", + RelationGetRelationName(rel)); + bound = stringToNode(TextDatumGetCString(boundDatum)); + ReleaseSysCache(tuple); + + my_qual = get_qual_from_partbound(rel, parent, bound); + + /* If requested, add parent's quals to the list (if any) */ + if (parent->rd_rel->relispartition && recurse) + { + List *parent_check; + + parent_check = generate_partition_qual(parent, true); + result = list_concat(parent_check, my_qual); + } + else + result = my_qual; + + /* Save a copy of my_qual in the relcache */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + rel->rd_partcheck = copyObject(my_qual); + MemoryContextSwitchTo(oldcxt); + + /* Keep the parent locked until commit */ + heap_close(parent, NoLock); + + return result; +} + +/* ---------------- + * FormPartitionKeyDatum + * Construct values[] and isnull[] arrays for the partition key + * of a tuple. + * + * pkinfo partition key execution info + * slot Heap tuple from which to extract partition key + * estate executor state for evaluating any partition key + * expressions (must be non-NULL) + * values Array of partition key Datums (output area) + * isnull Array of is-null indicators (output area) + * + * the ecxt_scantuple slot of estate's per-tuple expr context must point to + * the heap tuple passed in. + * ---------------- + */ +static void +FormPartitionKeyDatum(PartitionDispatch pd, + TupleTableSlot *slot, + EState *estate, + Datum *values, + bool *isnull) +{ + ListCell *partexpr_item; + int i; + + if (pd->key->partexprs != NIL && pd->keystate == NIL) + { + /* Check caller has set up context correctly */ + Assert(estate != NULL && + GetPerTupleExprContext(estate)->ecxt_scantuple == slot); + + /* First time through, set up expression evaluation state */ + pd->keystate = (List *) ExecPrepareExpr((Expr *) pd->key->partexprs, + estate); + } + + partexpr_item = list_head(pd->keystate); + for (i = 0; i < pd->key->partnatts; i++) + { + AttrNumber keycol = pd->key->partattrs[i]; + Datum datum; + bool isNull; + + if (keycol != 0) + { + /* Plain column; get the value directly from the heap tuple */ + datum = slot_getattr(slot, keycol, &isNull); + } + else + { + /* Expression; need to evaluate it */ + if (partexpr_item == NULL) + elog(ERROR, "wrong number of partition key expressions"); + datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item), + GetPerTupleExprContext(estate), + &isNull, + NULL); + partexpr_item = lnext(partexpr_item); + } + values[i] = datum; + isnull[i] = isNull; + } + + if (partexpr_item != NULL) + elog(ERROR, "wrong number of partition key expressions"); +} + +/* + * get_partition_for_tuple + * Finds a leaf partition for tuple contained in *slot + * + * Returned value is the sequence number of the leaf partition thus found, + * or -1 if no leaf partition is found for the tuple. *failed_at is set + * to the OID of the partitioned table whose partition was not found in + * the latter case. + */ +int +get_partition_for_tuple(PartitionDispatch * pd, + TupleTableSlot *slot, + EState *estate, + Oid *failed_at) +{ + PartitionDispatch parent; + Datum values[PARTITION_MAX_KEYS]; + bool isnull[PARTITION_MAX_KEYS]; + int cur_offset, + cur_index; + int i; + + /* start with the root partitioned table */ + parent = pd[0]; + while (true) + { + PartitionKey key = parent->key; + PartitionDesc partdesc = parent->partdesc; + + /* Quick exit */ + if (partdesc->nparts == 0) + { + *failed_at = RelationGetRelid(parent->reldesc); + return -1; + } + + /* Extract partition key from tuple */ + FormPartitionKeyDatum(parent, slot, estate, values, isnull); + + if (key->strategy == PARTITION_STRATEGY_RANGE) + { + /* Disallow nulls in the range partition key of the tuple */ + for (i = 0; i < key->partnatts; i++) + if (isnull[i]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("range partition key of row contains null"))); + } + + if (partdesc->boundinfo->has_null && isnull[0]) + /* Tuple maps to the null-accepting list partition */ + cur_index = partdesc->boundinfo->null_index; + else + { + /* Else bsearch in partdesc->boundinfo */ + bool equal = false; + + cur_offset = partition_bound_bsearch(key, partdesc->boundinfo, + values, false, &equal); + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + if (cur_offset >= 0 && equal) + cur_index = partdesc->boundinfo->indexes[cur_offset]; + else + cur_index = -1; + break; + + case PARTITION_STRATEGY_RANGE: + + /* + * Offset returned is such that the bound at offset is + * found to be less or equal with the tuple. So, the bound + * at offset+1 would be the upper bound. + */ + cur_index = partdesc->boundinfo->indexes[cur_offset + 1]; + break; + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + } + + /* + * cur_index < 0 means we failed to find a partition of this parent. + * cur_index >= 0 means we either found the leaf partition, or the + * next parent to find a partition of. + */ + if (cur_index < 0) + { + *failed_at = RelationGetRelid(parent->reldesc); + return -1; + } + else if (parent->indexes[cur_index] < 0) + parent = pd[-parent->indexes[cur_index]]; + else + break; + } + + return parent->indexes[cur_index]; +} + +/* + * qsort_partition_list_value_cmp + * + * Compare two list partition bound datums + */ +static int32 +qsort_partition_list_value_cmp(const void *a, const void *b, void *arg) +{ + Datum val1 = (*(const PartitionListValue **) a)->value, + val2 = (*(const PartitionListValue **) b)->value; + PartitionKey key = (PartitionKey) arg; + + return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], + key->partcollation[0], + val1, val2)); +} + +/* + * make_one_range_bound + * + * Return a PartitionRangeBound given a list of PartitionRangeDatum elements + * and a flag telling whether the bound is lower or not. Made into a function + * because there are multiple sites that want to use this facility. + */ +static PartitionRangeBound * +make_one_range_bound(PartitionKey key, int index, List *datums, bool lower) +{ + PartitionRangeBound *bound; + ListCell *cell; + int i; + + bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound)); + bound->index = index; + bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum)); + bound->content = (RangeDatumContent *) palloc0(key->partnatts * + sizeof(RangeDatumContent)); + bound->lower = lower; + + i = 0; + foreach(cell, datums) + { + PartitionRangeDatum *datum = lfirst(cell); + + /* What's contained in this range datum? */ + bound->content[i] = !datum->infinite + ? RANGE_DATUM_FINITE + : (lower ? RANGE_DATUM_NEG_INF + : RANGE_DATUM_POS_INF); + + if (bound->content[i] == RANGE_DATUM_FINITE) + { + Const *val = (Const *) datum->value; + + if (val->constisnull) + elog(ERROR, "invalid range bound datum"); + bound->datums[i] = val->constvalue; + } + + i++; + } + + return bound; +} + +/* Used when sorting range bounds across all range partitions */ +static int32 +qsort_partition_rbound_cmp(const void *a, const void *b, void *arg) +{ + PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a); + PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b); + PartitionKey key = (PartitionKey) arg; + + return partition_rbound_cmp(key, b1->datums, b1->content, b1->lower, b2); +} + +/* + * partition_rbound_cmp + * + * Return for two range bounds whether the 1st one (specified in datum1, + * content1, and lower1) is <=, =, >= the bound specified in *b2 + */ +static int32 +partition_rbound_cmp(PartitionKey key, + Datum *datums1, RangeDatumContent *content1, bool lower1, + PartitionRangeBound *b2) +{ + int32 cmpval; + int i; + Datum *datums2 = b2->datums; + RangeDatumContent *content2 = b2->content; + bool lower2 = b2->lower; + + for (i = 0; i < key->partnatts; i++) + { + /* + * First, handle cases involving infinity, which don't require + * invoking the comparison proc. + */ + if (content1[i] != RANGE_DATUM_FINITE && + content2[i] != RANGE_DATUM_FINITE) + + /* + * Both are infinity, so they are equal unless one is negative + * infinity and other positive (or vice versa) + */ + return content1[i] == content2[i] ? 0 + : (content1[i] < content2[i] ? -1 : 1); + else if (content1[i] != RANGE_DATUM_FINITE) + return content1[i] == RANGE_DATUM_NEG_INF ? -1 : 1; + else if (content2[i] != RANGE_DATUM_FINITE) + return content2[i] == RANGE_DATUM_NEG_INF ? 1 : -1; + + cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i], + key->partcollation[i], + datums1[i], + datums2[i])); + if (cmpval != 0) + break; + } + + /* + * If the comparison is anything other than equal, we're done. If they + * compare equal though, we still have to consider whether the boundaries + * are inclusive or exclusive. Exclusive one is considered smaller of the + * two. + */ + if (cmpval == 0 && lower1 != lower2) + cmpval = lower1 ? 1 : -1; + + return cmpval; +} + +/* + * partition_rbound_datum_cmp + * + * Return whether range bound (specified in rb_datums, rb_content, and + * rb_lower) <=, =, >= partition key of tuple (tuple_datums) + */ +static int32 +partition_rbound_datum_cmp(PartitionKey key, + Datum *rb_datums, RangeDatumContent *rb_content, + Datum *tuple_datums) +{ + int i; + int32 cmpval = -1; + + for (i = 0; i < key->partnatts; i++) + { + if (rb_content[i] != RANGE_DATUM_FINITE) + return rb_content[i] == RANGE_DATUM_NEG_INF ? -1 : 1; + + cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i], + key->partcollation[i], + rb_datums[i], + tuple_datums[i])); + if (cmpval != 0) + break; + } + + return cmpval; +} + +/* + * partition_bound_cmp + * + * Return whether the bound at offset in boundinfo is <=, =, >= the argument + * specified in *probe. + */ +static int32 +partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo, + int offset, void *probe, bool probe_is_bound) +{ + Datum *bound_datums = boundinfo->datums[offset]; + int32 cmpval = -1; + + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], + key->partcollation[0], + bound_datums[0], + *(Datum *) probe)); + break; + + case PARTITION_STRATEGY_RANGE: + { + RangeDatumContent *content = boundinfo->content[offset]; + + if (probe_is_bound) + { + /* + * We need to pass whether the existing bound is a lower + * bound, so that two equal-valued lower and upper bounds + * are not regarded equal. + */ + bool lower = boundinfo->indexes[offset] < 0; + + cmpval = partition_rbound_cmp(key, + bound_datums, content, lower, + (PartitionRangeBound *) probe); + } + else + cmpval = partition_rbound_datum_cmp(key, + bound_datums, content, + (Datum *) probe); + break; + } + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + return cmpval; +} + +/* + * Binary search on a collection of partition bounds. Returns greatest index + * of bound in array boundinfo->datums which is less or equal with *probe. + * If all bounds in the array are greater than *probe, -1 is returned. + * + * *probe could either be a partition bound or a Datum array representing + * the partition key of a tuple being routed; probe_is_bound tells which. + * We pass that down to the comparison function so that it can interpret the + * contents of *probe accordingly. + * + * *is_equal is set to whether the bound at the returned index is equal with + * *probe. + */ +static int +partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo, + void *probe, bool probe_is_bound, bool *is_equal) +{ + int lo, + hi, + mid; + + lo = -1; + hi = boundinfo->ndatums - 1; + while (lo < hi) + { + int32 cmpval; + + mid = (lo + hi + 1) / 2; + cmpval = partition_bound_cmp(key, boundinfo, mid, probe, + probe_is_bound); + if (cmpval <= 0) + { + lo = mid; + *is_equal = (cmpval == 0); + } + else + hi = mid - 1; + } + + return lo; +} diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index 8fabe6899f..724b41e64c 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -368,7 +368,7 @@ CreateConstraintEntry(const char *constraintName, */ recordDependencyOnSingleRelExpr(&conobject, conExpr, relId, DEPENDENCY_NORMAL, - DEPENDENCY_NORMAL); + DEPENDENCY_NORMAL, false); } /* Post creation hook for new constraint */ diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index c617abb223..f4afcd9aae 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -201,7 +201,8 @@ analyze_rel(Oid relid, RangeVar *relation, int options, * locked the relation. */ if (onerel->rd_rel->relkind == RELKIND_RELATION || - onerel->rd_rel->relkind == RELKIND_MATVIEW) + onerel->rd_rel->relkind == RELKIND_MATVIEW || + onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { /* Regular table, so we'll use the regular row acquisition function */ acquirefunc = acquire_sample_rows; @@ -1317,7 +1318,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, /* Check table type (MATVIEW can't happen, but might as well allow) */ if (childrel->rd_rel->relkind == RELKIND_RELATION || - childrel->rd_rel->relkind == RELKIND_MATVIEW) + childrel->rd_rel->relkind == RELKIND_MATVIEW || + childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { /* Regular table, so use the regular row acquisition function */ acquirefunc = acquire_sample_rows; diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index ec5d6f1565..270be0af18 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -161,6 +161,11 @@ typedef struct CopyStateData ExprState **defexprs; /* array of default att expressions */ bool volatile_defexprs; /* is any of defexprs volatile? */ List *range_table; + PartitionDispatch *partition_dispatch_info; + int num_dispatch; + int num_partitions; + ResultRelInfo *partitions; + TupleConversionMap **partition_tupconv_maps; /* * These variables are used to reduce overhead in textual COPY FROM. @@ -1397,6 +1402,71 @@ BeginCopy(ParseState *pstate, (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("table \"%s\" does not have OIDs", RelationGetRelationName(cstate->rel)))); + + /* + * Initialize state for CopyFrom tuple routing. Watch out for + * any foreign partitions. + */ + if (is_from && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionDispatch *pd; + List *leaf_parts; + ListCell *cell; + int i, + num_parted, + num_leaf_parts; + ResultRelInfo *leaf_part_rri; + + /* Get the tuple-routing information and lock partitions */ + pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock, + &num_parted, &leaf_parts); + num_leaf_parts = list_length(leaf_parts); + cstate->partition_dispatch_info = pd; + cstate->num_dispatch = num_parted; + cstate->num_partitions = num_leaf_parts; + cstate->partitions = (ResultRelInfo *) palloc(num_leaf_parts * + sizeof(ResultRelInfo)); + cstate->partition_tupconv_maps = (TupleConversionMap **) + palloc0(num_leaf_parts * sizeof(TupleConversionMap *)); + + leaf_part_rri = cstate->partitions; + i = 0; + foreach(cell, leaf_parts) + { + Relation partrel; + + /* + * We locked all the partitions above including the leaf + * partitions. Note that each of the relations in + * cstate->partitions will be closed by CopyFrom() after + * it's finished with its processing. + */ + partrel = heap_open(lfirst_oid(cell), NoLock); + + /* + * Verify result relation is a valid target for the current + * operation. + */ + CheckValidResultRel(partrel, CMD_INSERT); + + InitResultRelInfo(leaf_part_rri, + partrel, + 1, /* dummy */ + false, /* no partition constraint check */ + 0); + + /* Open partition indices */ + ExecOpenIndices(leaf_part_rri, false); + + if (!equalTupleDescs(tupDesc, RelationGetDescr(partrel))) + cstate->partition_tupconv_maps[i] = + convert_tuples_by_name(tupDesc, + RelationGetDescr(partrel), + gettext_noop("could not convert row type")); + leaf_part_rri++; + i++; + } + } } else { @@ -1751,6 +1821,12 @@ BeginCopyTo(ParseState *pstate, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot copy from sequence \"%s\"", RelationGetRelationName(rel)))); + else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy from partitioned table \"%s\"", + RelationGetRelationName(rel)), + errhint("Try the COPY (SELECT ...) TO variant."))); else ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -2249,6 +2325,7 @@ CopyFrom(CopyState cstate) Datum *values; bool *nulls; ResultRelInfo *resultRelInfo; + ResultRelInfo *saved_resultRelInfo = NULL; EState *estate = CreateExecutorState(); /* for ExecConstraints() */ ExprContext *econtext; TupleTableSlot *myslot; @@ -2275,6 +2352,7 @@ CopyFrom(CopyState cstate) * only hint about them in the view case.) */ if (cstate->rel->rd_rel->relkind != RELKIND_RELATION && + cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE && !(cstate->rel->trigdesc && cstate->rel->trigdesc->trig_insert_instead_row)) { @@ -2385,6 +2463,7 @@ CopyFrom(CopyState cstate) InitResultRelInfo(resultRelInfo, cstate->rel, 1, /* dummy rangetable index */ + true, /* do load partition check expression */ 0); ExecOpenIndices(resultRelInfo, false); @@ -2407,11 +2486,13 @@ CopyFrom(CopyState cstate) * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default * expressions. Such triggers or expressions might query the table we're * inserting to, and act differently if the tuples that have already been - * processed and prepared for insertion are not there. + * processed and prepared for insertion are not there. We also can't + * do it if the table is partitioned. */ if ((resultRelInfo->ri_TrigDesc != NULL && (resultRelInfo->ri_TrigDesc->trig_insert_before_row || resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) || + cstate->partition_dispatch_info != NULL || cstate->volatile_defexprs) { useHeapMultiInsert = false; @@ -2488,6 +2569,59 @@ CopyFrom(CopyState cstate) slot = myslot; ExecStoreTuple(tuple, slot, InvalidBuffer, false); + /* Determine the partition to heap_insert the tuple into */ + if (cstate->partition_dispatch_info) + { + int leaf_part_index; + TupleConversionMap *map; + + /* + * Away we go ... If we end up not finding a partition after all, + * ExecFindPartition() does not return and errors out instead. + * Otherwise, the returned value is to be used as an index into + * arrays mt_partitions[] and mt_partition_tupconv_maps[] that + * will get us the ResultRelInfo and TupleConversionMap for the + * partition, respectively. + */ + leaf_part_index = ExecFindPartition(resultRelInfo, + cstate->partition_dispatch_info, + slot, + estate); + Assert(leaf_part_index >= 0 && + leaf_part_index < cstate->num_partitions); + + /* + * Save the old ResultRelInfo and switch to the one corresponding + * to the selected partition. + */ + saved_resultRelInfo = resultRelInfo; + resultRelInfo = cstate->partitions + leaf_part_index; + + /* We do not yet have a way to insert into a foreign partition */ + if (resultRelInfo->ri_FdwRoutine) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot route inserted tuples to a foreign table"))); + + /* + * For ExecInsertIndexTuples() to work on the partition's indexes + */ + estate->es_result_relation_info = resultRelInfo; + + /* + * We might need to convert from the parent rowtype to the + * partition rowtype. + */ + map = cstate->partition_tupconv_maps[leaf_part_index]; + if (map) + { + tuple = do_convert_tuple(tuple, map); + ExecStoreTuple(tuple, slot, InvalidBuffer, true); + } + + tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + } + skip_tuple = false; /* BEFORE ROW INSERT Triggers */ @@ -2513,7 +2647,8 @@ CopyFrom(CopyState cstate) else { /* Check the constraints of the tuple */ - if (cstate->rel->rd_att->constr) + if (cstate->rel->rd_att->constr || + resultRelInfo->ri_PartitionCheck) ExecConstraints(resultRelInfo, slot, estate); if (useHeapMultiInsert) @@ -2546,7 +2681,8 @@ CopyFrom(CopyState cstate) List *recheckIndexes = NIL; /* OK, store the tuple and create index entries for it */ - heap_insert(cstate->rel, tuple, mycid, hi_options, bistate); + heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid, + hi_options, bistate); if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, @@ -2570,6 +2706,12 @@ CopyFrom(CopyState cstate) * tuples inserted by an INSERT command. */ processed++; + + if (saved_resultRelInfo) + { + resultRelInfo = saved_resultRelInfo; + estate->es_result_relation_info = resultRelInfo; + } } } @@ -2607,6 +2749,32 @@ CopyFrom(CopyState cstate) ExecCloseIndices(resultRelInfo); + /* Close all the partitioned tables, leaf partitions, and their indices */ + if (cstate->partition_dispatch_info) + { + int i; + + /* + * Remember cstate->partition_dispatch_info[0] corresponds to the root + * partitioned table, which we must not try to close, because it is + * the main target table of COPY that will be closed eventually by + * DoCopy(). + */ + for (i = 1; i < cstate->num_dispatch; i++) + { + PartitionDispatch pd = cstate->partition_dispatch_info[i]; + + heap_close(pd->reldesc, NoLock); + } + for (i = 0; i < cstate->num_partitions; i++) + { + ResultRelInfo *resultRelInfo = cstate->partitions + i; + + ExecCloseIndices(resultRelInfo); + heap_close(resultRelInfo->ri_RelationDesc, NoLock); + } + } + FreeExecutorState(estate); /* diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 5b4f6affcc..d6d52d9929 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -112,7 +112,7 @@ create_ctas_internal(List *attrList, IntoClause *into) * Create the relation. (This will error out if there's an existing view, * so we don't need more code to complain if "replace" is false.) */ - intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL); + intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL); /* * If necessary, create a TOAST table for the target table. Note that diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 85817c6530..eeb2b1fe80 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -69,8 +69,6 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo, char *accessMethodName, Oid accessMethodId, bool amcanorder, bool isconstraint); -static Oid GetIndexOpClass(List *opclass, Oid attrType, - char *accessMethodName, Oid accessMethodId); static char *ChooseIndexName(const char *tabname, Oid namespaceId, List *colnames, List *exclusionOpNames, bool primary, bool isconstraint); @@ -383,6 +381,11 @@ DefineIndex(Oid relationId, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot create index on foreign table \"%s\"", RelationGetRelationName(rel)))); + else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot create index on partitioned table \"%s\"", + RelationGetRelationName(rel)))); else ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -1145,10 +1148,10 @@ ComputeIndexAttrs(IndexInfo *indexInfo, /* * Identify the opclass to use. */ - classOidP[attn] = GetIndexOpClass(attribute->opclass, - atttype, - accessMethodName, - accessMethodId); + classOidP[attn] = ResolveOpClass(attribute->opclass, + atttype, + accessMethodName, + accessMethodId); /* * Identify the exclusion operator, if any. @@ -1255,10 +1258,13 @@ ComputeIndexAttrs(IndexInfo *indexInfo, /* * Resolve possibly-defaulted operator class specification + * + * Note: This is used to resolve operator class specification in index and + * partition key definitions. */ -static Oid -GetIndexOpClass(List *opclass, Oid attrType, - char *accessMethodName, Oid accessMethodId) +Oid +ResolveOpClass(List *opclass, Oid attrType, + char *accessMethodName, Oid accessMethodId) { char *schemaname; char *opcname; diff --git a/src/backend/commands/lockcmds.c b/src/backend/commands/lockcmds.c index a0c0d75977..9e62e00b8d 100644 --- a/src/backend/commands/lockcmds.c +++ b/src/backend/commands/lockcmds.c @@ -87,7 +87,7 @@ RangeVarCallbackForLockTable(const RangeVar *rv, Oid relid, Oid oldrelid, * check */ /* Currently, we only allow plain tables to be locked */ - if (relkind != RELKIND_RELATION) + if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", diff --git a/src/backend/commands/policy.c b/src/backend/commands/policy.c index 70e22c1000..6da3205c9e 100644 --- a/src/backend/commands/policy.c +++ b/src/backend/commands/policy.c @@ -88,7 +88,7 @@ RangeVarCallbackForPolicy(const RangeVar *rv, Oid relid, Oid oldrelid, rv->relname))); /* Relation type MUST be a table. */ - if (relkind != RELKIND_RELATION) + if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", rv->relname))); @@ -384,7 +384,8 @@ RemovePolicyById(Oid policy_id) relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid; rel = heap_open(relid, AccessExclusiveLock); - if (rel->rd_rel->relkind != RELKIND_RELATION) + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c index 5bd7e124c1..2b0ae34830 100644 --- a/src/backend/commands/seclabel.c +++ b/src/backend/commands/seclabel.c @@ -110,7 +110,8 @@ ExecSecLabelStmt(SecLabelStmt *stmt) relation->rd_rel->relkind != RELKIND_VIEW && relation->rd_rel->relkind != RELKIND_MATVIEW && relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE && - relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, materialized view, composite type, or foreign table", diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 7e37108b8d..d953b4408b 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -234,7 +234,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) stmt->tablespacename = NULL; stmt->if_not_exists = seq->if_not_exists; - address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL); + address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL); seqoid = address.objectId; Assert(seqoid != InvalidOid); @@ -1475,7 +1475,8 @@ process_owned_by(Relation seqrel, List *owned_by) /* Must be a regular or foreign table */ if (!(tablerel->rd_rel->relkind == RELKIND_RELATION || - tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)) + tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE || + tablerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("referenced relation \"%s\" is not a table or foreign table", diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 6322fa75a7..c77b216d4f 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -29,6 +29,7 @@ #include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/objectaccess.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" @@ -65,6 +66,9 @@ #include "nodes/parsenodes.h" #include "optimizer/clauses.h" #include "optimizer/planner.h" +#include "optimizer/predtest.h" +#include "optimizer/prep.h" +#include "optimizer/var.h" #include "parser/parse_clause.h" #include "parser/parse_coerce.h" #include "parser/parse_collate.h" @@ -162,6 +166,7 @@ typedef struct AlteredTableInfo Oid newTableSpace; /* new tablespace; 0 means no change */ bool chgPersistence; /* T if SET LOGGED/UNLOGGED is used */ char newrelpersistence; /* if above is true */ + List *partition_constraint; /* for attach partition validation */ /* Objects to rebuild after completing ALTER TYPE operations */ List *changedConstraintOids; /* OIDs of constraints to rebuild */ List *changedConstraintDefs; /* string definitions of same */ @@ -252,6 +257,12 @@ static const struct dropmsgstrings dropmsgstringarray[] = { gettext_noop("foreign table \"%s\" does not exist, skipping"), gettext_noop("\"%s\" is not a foreign table"), gettext_noop("Use DROP FOREIGN TABLE to remove a foreign table.")}, + {RELKIND_PARTITIONED_TABLE, + ERRCODE_UNDEFINED_TABLE, + gettext_noop("table \"%s\" does not exist"), + gettext_noop("table \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not a table"), + gettext_noop("Use DROP TABLE to remove a table.")}, {'\0', 0, NULL, NULL, NULL, NULL} }; @@ -272,7 +283,8 @@ struct DropRelationCallbackState static void truncate_check_rel(Relation rel); static List *MergeAttributes(List *schema, List *supers, char relpersistence, - List **supOids, List **supconstr, int *supOidCount); + bool is_partition, List **supOids, List **supconstr, + int *supOidCount); static bool MergeCheckConstraint(List *constraints, char *name, Node *expr); static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel); static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel); @@ -339,7 +351,9 @@ static void add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid); static void add_column_collation_dependency(Oid relid, int32 attnum, Oid collid); static void ATPrepAddOids(List **wqueue, Relation rel, bool recurse, AlterTableCmd *cmd, LOCKMODE lockmode); +static void ATPrepDropNotNull(Relation rel, bool recurse, bool recursing); static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode); +static void ATPrepSetNotNull(Relation rel, bool recurse, bool recursing); static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel, const char *colName, LOCKMODE lockmode); static ObjectAddress ATExecColumnDefault(Relation rel, const char *colName, @@ -433,6 +447,15 @@ static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, void *arg); static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, void *arg); +static bool is_partition_attr(Relation rel, AttrNumber attnum, bool *used_in_expr); +static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy); +static void ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs, + List **partexprs, Oid *partopclass, Oid *partcollation); +static void CreateInheritance(Relation child_rel, Relation parent_rel); +static void RemoveInheritance(Relation child_rel, Relation parent_rel); +static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel, + PartitionCmd *cmd); +static ObjectAddress ATExecDetachPartition(Relation rel, RangeVar *name); /* ---------------------------------------------------------------- @@ -455,7 +478,7 @@ static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, */ ObjectAddress DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, - ObjectAddress *typaddress) + ObjectAddress *typaddress, const char *queryString) { char relname[NAMEDATALEN]; Oid namespaceId; @@ -492,6 +515,14 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("ON COMMIT can only be used on temporary tables"))); + if (stmt->partspec != NULL) + { + if (relkind != RELKIND_RELATION) + elog(ERROR, "unexpected relkind: %d", (int) relkind); + + relkind = RELKIND_PARTITIONED_TABLE; + } + /* * Look up the namespace in which we are supposed to create the relation, * check we have permission to create there, lock it against concurrent @@ -578,6 +609,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, */ schema = MergeAttributes(schema, stmt->inhRelations, stmt->relation->relpersistence, + stmt->partbound != NULL, &inheritOids, &old_constraints, &parentOidCount); /* @@ -588,17 +620,33 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, descriptor = BuildDescForRelation(schema); /* - * Notice that we allow OIDs here only for plain tables, even though some - * other relkinds can support them. This is necessary because the - * default_with_oids GUC must apply only to plain tables and not any other - * relkind; doing otherwise would break existing pg_dump files. We could - * allow explicit "WITH OIDS" while not allowing default_with_oids to - * affect other relkinds, but it would complicate interpretOidsOption(). + * Notice that we allow OIDs here only for plain tables and partitioned + * tables, even though some other relkinds can support them. This is + * necessary because the default_with_oids GUC must apply only to plain + * tables and not any other relkind; doing otherwise would break existing + * pg_dump files. We could allow explicit "WITH OIDS" while not allowing + * default_with_oids to affect other relkinds, but it would complicate + * interpretOidsOption(). */ localHasOids = interpretOidsOption(stmt->options, - (relkind == RELKIND_RELATION)); + (relkind == RELKIND_RELATION || + relkind == RELKIND_PARTITIONED_TABLE)); descriptor->tdhasoid = (localHasOids || parentOidCount > 0); + if (stmt->partbound) + { + /* If the parent has OIDs, partitions must have them too. */ + if (parentOidCount > 0 && !localHasOids) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot create table without OIDs as partition of table with OIDs"))); + /* If the parent doesn't, partitions must not have them. */ + if (parentOidCount == 0 && localHasOids) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot create table with OIDs as partition of table without OIDs"))); + } + /* * Find columns with default values and prepare for insertion of the * defaults. Pre-cooked (that is, inherited) defaults go into a list of @@ -697,6 +745,110 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, */ rel = relation_open(relationId, AccessExclusiveLock); + /* Process and store partition bound, if any. */ + if (stmt->partbound) + { + Node *bound; + ParseState *pstate; + Oid parentId = linitial_oid(inheritOids); + Relation parent; + + /* Already have strong enough lock on the parent */ + parent = heap_open(parentId, NoLock); + + /* + * We are going to try to validate the partition bound specification + * against the partition key of parentRel, so it better have one. + */ + if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("\"%s\" is not partitioned", + RelationGetRelationName(parent)))); + + /* Tranform the bound values */ + pstate = make_parsestate(NULL); + pstate->p_sourcetext = queryString; + bound = transformPartitionBound(pstate, parent, stmt->partbound); + + /* + * Check first that the new partition's bound is valid and does not + * overlap with any of existing partitions of the parent - note that + * it does not return on error. + */ + check_new_partition_bound(relname, parent, bound); + heap_close(parent, NoLock); + + /* Update the pg_class entry. */ + StorePartitionBound(rel, bound); + + /* + * The code that follows may also update the pg_class tuple to update + * relnumchecks, so bump up the command counter to avoid the "already + * updated by self" error. + */ + CommandCounterIncrement(); + } + + /* + * Process the partitioning specification (if any) and store the + * partition key information into the catalog. + */ + if (stmt->partspec) + { + char strategy; + int partnatts, + i; + AttrNumber partattrs[PARTITION_MAX_KEYS]; + Oid partopclass[PARTITION_MAX_KEYS]; + Oid partcollation[PARTITION_MAX_KEYS]; + List *partexprs = NIL; + List *cmds = NIL; + + /* + * We need to transform the raw parsetrees corresponding to partition + * expressions into executable expression trees. Like column defaults + * and CHECK constraints, we could not have done the transformation + * earlier. + */ + stmt->partspec = transformPartitionSpec(rel, stmt->partspec, + &strategy); + ComputePartitionAttrs(rel, stmt->partspec->partParams, + partattrs, &partexprs, partopclass, + partcollation); + + partnatts = list_length(stmt->partspec->partParams); + StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs, + partopclass, partcollation); + + /* Force key columns to be NOT NULL when using range partitioning */ + if (strategy == PARTITION_STRATEGY_RANGE) + { + for (i = 0; i < partnatts; i++) + { + AttrNumber partattno = partattrs[i]; + Form_pg_attribute attform = descriptor->attrs[partattno-1]; + + if (partattno != 0 && !attform->attnotnull) + { + /* Add a subcommand to make this one NOT NULL */ + AlterTableCmd *cmd = makeNode(AlterTableCmd); + + cmd->subtype = AT_SetNotNull; + cmd->name = pstrdup(NameStr(attform->attname)); + cmds = lappend(cmds, cmd); + } + } + + /* + * Although, there cannot be any partitions yet, we still need to + * pass true for recurse; ATPrepSetNotNull() complains if we don't + */ + if (cmds != NIL) + AlterTableInternal(RelationGetRelid(rel), cmds, true); + } + } + /* * Now add any newly specified column default values and CHECK constraints * to the new relation. These are passed to us in the form of raw @@ -927,6 +1079,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, HeapTuple tuple; struct DropRelationCallbackState *state; char relkind; + char expected_relkind; Form_pg_class classform; LOCKMODE heap_lockmode; @@ -955,7 +1108,19 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, return; /* concurrently dropped, so nothing to do */ classform = (Form_pg_class) GETSTRUCT(tuple); - if (classform->relkind != relkind) + /* + * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE, + * but RemoveRelations() can only pass one relkind for a given relation. + * It chooses RELKIND_RELATION for both regular and partitioned tables. + * That means we must be careful before giving the wrong type error when + * the relation is RELKIND_PARTITIONED_TABLE. + */ + if (classform->relkind == RELKIND_PARTITIONED_TABLE) + expected_relkind = RELKIND_RELATION; + else + expected_relkind = classform->relkind; + + if (relkind != expected_relkind) DropErrorMsgWrongType(rel->relname, classform->relkind, relkind); /* Allow DROP to either table owner or schema owner */ @@ -1054,6 +1219,10 @@ ExecuteTruncate(TruncateStmt *stmt) relids = lappend_oid(relids, childrelid); } } + else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("must truncate child tables too"))); } /* @@ -1153,6 +1322,7 @@ ExecuteTruncate(TruncateStmt *stmt) InitResultRelInfo(resultRelInfo, rel, 0, /* dummy rangetable index */ + false, 0); resultRelInfo++; } @@ -1293,7 +1463,8 @@ truncate_check_rel(Relation rel) AclResult aclresult; /* Only allow truncate on regular tables */ - if (rel->rd_rel->relkind != RELKIND_RELATION) + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", @@ -1359,6 +1530,7 @@ storage_name(char c) * of ColumnDef's.) It is destructively changed. * 'supers' is a list of names (as RangeVar nodes) of parent relations. * 'relpersistence' is a persistence type of the table. + * 'is_partition' tells if the table is a partition * * Output arguments: * 'supOids' receives a list of the OIDs of the parent relations. @@ -1410,7 +1582,8 @@ storage_name(char c) */ static List * MergeAttributes(List *schema, List *supers, char relpersistence, - List **supOids, List **supconstr, int *supOidCount) + bool is_partition, List **supOids, List **supconstr, + int *supOidCount) { ListCell *entry; List *inhSchema = NIL; @@ -1420,6 +1593,7 @@ MergeAttributes(List *schema, List *supers, char relpersistence, bool have_bogus_defaults = false; int child_attno; static Node bogus_marker = {0}; /* marks conflicting defaults */ + List *saved_schema = NIL; /* * Check for and reject tables with too many columns. We perform this @@ -1438,6 +1612,17 @@ MergeAttributes(List *schema, List *supers, char relpersistence, errmsg("tables can have at most %d columns", MaxHeapAttributeNumber))); + /* + * In case of a partition, there are no new column definitions, only + * dummy ColumnDefs created for column constraints. We merge these + * constraints inherited from the parent. + */ + if (is_partition) + { + saved_schema = schema; + schema = NIL; + } + /* * Check for duplicate names in the explicit list of attributes. * @@ -1518,11 +1703,35 @@ MergeAttributes(List *schema, List *supers, char relpersistence, * on the parent table, which might otherwise be attempting to clear * the parent's relhassubclass field, if its previous children were * recently dropped. + * + * If the child table is a partition, then we instead grab an exclusive + * lock on the parent because its partition descriptor will be changed + * by addition of the new partition. + */ + if (!is_partition) + relation = heap_openrv(parent, ShareUpdateExclusiveLock); + else + relation = heap_openrv(parent, AccessExclusiveLock); + + /* + * We do not allow partitioned tables and partitions to participate + * in regular inheritance. */ - relation = heap_openrv(parent, ShareUpdateExclusiveLock); + if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && + !is_partition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from partitioned table \"%s\"", + parent->relname))); + if (relation->rd_rel->relispartition && !is_partition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from partition \"%s\"", + parent->relname))); if (relation->rd_rel->relkind != RELKIND_RELATION && - relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("inherited relation \"%s\" is not a table or foreign table", @@ -1532,7 +1741,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence, relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("cannot inherit from temporary relation \"%s\"", + errmsg(!is_partition + ? "cannot inherit from temporary relation \"%s\"" + : "cannot create a permanent relation as partition of temporary relation \"%s\"", parent->relname))); /* If existing rel is temp, it must belong to this session */ @@ -1540,7 +1751,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence, !relation->rd_islocaltemp) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("cannot inherit from temporary relation of another session"))); + errmsg(!is_partition + ? "cannot inherit from temporary relation of another session" + : "cannot create as partition of temporary relation of another session"))); /* * We should have an UNDER permission flag for this, but for now, @@ -1777,9 +1990,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence, pfree(newattno); /* - * Close the parent rel, but keep our ShareUpdateExclusiveLock on it - * until xact commit. That will prevent someone else from deleting or - * ALTERing the parent before the child is committed. + * Close the parent rel, but keep our lock on it until xact commit. + * That will prevent someone else from deleting or ALTERing the parent + * before the child is committed. */ heap_close(relation, NoLock); } @@ -1787,7 +2000,8 @@ MergeAttributes(List *schema, List *supers, char relpersistence, /* * If we had no inherited attributes, the result schema is just the * explicitly declared columns. Otherwise, we need to merge the declared - * columns into the inherited schema list. + * columns into the inherited schema list. Although, we never have any + * explicitly declared columns if the table is a partition. */ if (inhSchema != NIL) { @@ -1815,6 +2029,12 @@ MergeAttributes(List *schema, List *supers, char relpersistence, Oid defcollid, newcollid; + /* + * Partitions have only one parent, so conflict should never + * occur + */ + Assert(!is_partition); + /* * Yes, try to merge the two column definitions. They must * have the same type, typmod, and collation. @@ -1896,6 +2116,56 @@ MergeAttributes(List *schema, List *supers, char relpersistence, MaxHeapAttributeNumber))); } + /* + * Now that we have the column definition list for a partition, we can + * check whether the columns referenced in column option specifications + * actually exist. Also, we merge the options into the corresponding + * column definitions. + */ + if (is_partition && list_length(saved_schema) > 0) + { + schema = list_concat(schema, saved_schema); + + foreach(entry, schema) + { + ColumnDef *coldef = lfirst(entry); + ListCell *rest = lnext(entry); + ListCell *prev = entry; + + /* + * Partition column option that does not belong to a column from + * the parent. This works because the columns from the parent + * come first in the list (see above). + */ + if (coldef->typeName == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", + coldef->colname))); + while (rest != NULL) + { + ColumnDef *restdef = lfirst(rest); + ListCell *next = lnext(rest); /* need to save it in case + * we delete it */ + + if (strcmp(coldef->colname, restdef->colname) == 0) + { + /* + * merge the column options into the column from the + * parent + */ + coldef->is_not_null = restdef->is_not_null; + coldef->raw_default = restdef->raw_default; + coldef->cooked_default = restdef->cooked_default; + coldef->constraints = restdef->constraints; + list_delete_cell(schema, rest, prev); + } + prev = rest; + rest = next; + } + } + } + /* * If we found any conflicting parent default values, check to make sure * they were overridden by the child. @@ -2166,7 +2436,8 @@ renameatt_check(Oid myrelid, Form_pg_class classform, bool recursing) relkind != RELKIND_MATVIEW && relkind != RELKIND_COMPOSITE_TYPE && relkind != RELKIND_INDEX && - relkind != RELKIND_FOREIGN_TABLE) + relkind != RELKIND_FOREIGN_TABLE && + relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, materialized view, composite type, index, or foreign table", @@ -3057,6 +3328,11 @@ AlterTableGetLockLevel(List *cmds) cmd_lockmode = AlterTableGetRelOptionsLockLevel((List *) cmd->def); break; + case AT_AttachPartition: + case AT_DetachPartition: + cmd_lockmode = AccessExclusiveLock; + break; + default: /* oops */ elog(ERROR, "unrecognized alter table type: %d", (int) cmd->subtype); @@ -3168,12 +3444,14 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, break; case AT_DropNotNull: /* ALTER COLUMN DROP NOT NULL */ ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE); + ATPrepDropNotNull(rel, recurse, recursing); ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode); /* No command-specific prep needed */ pass = AT_PASS_DROP; break; case AT_SetNotNull: /* ALTER COLUMN SET NOT NULL */ ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE); + ATPrepSetNotNull(rel, recurse, recursing); ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode); /* No command-specific prep needed */ pass = AT_PASS_ADD_CONSTR; @@ -3374,6 +3652,12 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, /* No command-specific prep needed */ pass = AT_PASS_MISC; break; + case AT_AttachPartition: + case AT_DetachPartition: + ATSimplePermissions(rel, ATT_TABLE); + /* No command-specific prep needed */ + pass = AT_PASS_MISC; + break; default: /* oops */ elog(ERROR, "unrecognized alter table type: %d", (int) cmd->subtype); @@ -3444,7 +3728,14 @@ ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode) { AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab); - if (tab->relkind == RELKIND_RELATION || + /* + * If the table is source table of ATTACH PARTITION command, we did + * not modify anything about it that will change its toasting + * requirement, so no need to check. + */ + if (((tab->relkind == RELKIND_RELATION || + tab->relkind == RELKIND_PARTITIONED_TABLE) && + tab->partition_constraint == NIL) || tab->relkind == RELKIND_MATVIEW) AlterTableCreateToastTable(tab->relid, (Datum) 0, lockmode); } @@ -3693,6 +3984,12 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, case AT_GenericOptions: ATExecGenericOptions(rel, (List *) cmd->def); break; + case AT_AttachPartition: + ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def); + break; + case AT_DetachPartition: + ATExecDetachPartition(rel, ((PartitionCmd *) cmd->def)->name); + break; default: /* oops */ elog(ERROR, "unrecognized alter table type: %d", (int) cmd->subtype); @@ -3878,7 +4175,8 @@ ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode) * Test the current data within the table against new constraints * generated by ALTER TABLE commands, but don't rebuild data. */ - if (tab->constraints != NIL || tab->new_notnull) + if (tab->constraints != NIL || tab->new_notnull || + tab->partition_constraint != NIL) ATRewriteTable(tab, InvalidOid, lockmode); /* @@ -3958,6 +4256,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) CommandId mycid; BulkInsertState bistate; int hi_options; + List *partqualstate = NIL; /* * Open the relation(s). We have surely already locked the existing @@ -4022,6 +4321,15 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) } } + /* Build expression execution states for partition check quals */ + if (tab->partition_constraint) + { + needscan = true; + partqualstate = (List *) + ExecPrepareExpr((Expr *) tab->partition_constraint, + estate); + } + foreach(l, tab->newvals) { NewColumnValue *ex = lfirst(l); @@ -4211,6 +4519,11 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) } } + if (partqualstate && !ExecQual(partqualstate, econtext, true)) + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("partition constraint is violated by some row"))); + /* Write the tuple out to the new relation */ if (newrel) heap_insert(newrel, tuple, mycid, hi_options, bistate); @@ -4291,6 +4604,7 @@ ATSimplePermissions(Relation rel, int allowed_targets) switch (rel->rd_rel->relkind) { case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: actual_target = ATT_TABLE; break; case RELKIND_VIEW: @@ -4407,7 +4721,8 @@ ATSimpleRecursion(List **wqueue, Relation rel, */ if (recurse && (rel->rd_rel->relkind == RELKIND_RELATION || - rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)) + rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)) { Oid relid = RelationGetRelid(rel); ListCell *child; @@ -4527,7 +4842,8 @@ find_composite_type_dependencies(Oid typeOid, Relation origRelation, att = rel->rd_att->attrs[pg_depend->objsubid - 1]; if (rel->rd_rel->relkind == RELKIND_RELATION || - rel->rd_rel->relkind == RELKIND_MATVIEW) + rel->rd_rel->relkind == RELKIND_MATVIEW || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { if (origTypeName) ereport(ERROR, @@ -4728,6 +5044,11 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, if (recursing) ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE); + if (rel->rd_rel->relispartition && !recursing) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot add column to a partition"))); + attrdesc = heap_open(AttributeRelationId, RowExclusiveLock); /* @@ -5174,6 +5495,20 @@ ATPrepAddOids(List **wqueue, Relation rel, bool recurse, AlterTableCmd *cmd, LOC * Return the address of the modified column. If the column was already * nullable, InvalidObjectAddress is returned. */ + +static void +ATPrepDropNotNull(Relation rel, bool recurse, bool recursing) +{ + /* + * If the parent is a partitioned table, like check constraints, NOT NULL + * constraints must be dropped from child tables. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && + !recurse && !recursing) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraint must be dropped from child tables too"))); +} static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode) { @@ -5249,6 +5584,45 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode) list_free(indexoidlist); + /* If rel is partition, shouldn't drop NOT NULL if parent has the same */ + if (rel->rd_rel->relispartition) + { + Oid parentId = get_partition_parent(RelationGetRelid(rel)); + Relation parent = heap_open(parentId, AccessShareLock); + TupleDesc tupDesc = RelationGetDescr(parent); + AttrNumber parent_attnum; + + parent_attnum = get_attnum(parentId, colName); + if (tupDesc->attrs[parent_attnum - 1]->attnotnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("column \"%s\" is marked NOT NULL in parent table", + colName))); + heap_close(parent, AccessShareLock); + } + + /* + * If the table is a range partitioned table, check that the column + * is not in the partition key. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionKey key = RelationGetPartitionKey(rel); + int partnatts = get_partition_natts(key), + i; + + for (i = 0; i < partnatts; i++) + { + AttrNumber partattnum = get_partition_col_attnum(key, i); + + if (partattnum == attnum) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("column \"%s\" is in range partition key", + colName))); + } + } + /* * Okay, actually perform the catalog change ... if needed */ @@ -5281,6 +5655,21 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode) * Return the address of the modified column. If the column was already NOT * NULL, InvalidObjectAddress is returned. */ + +static void +ATPrepSetNotNull(Relation rel, bool recurse, bool recursing) +{ + /* + * If the parent is a partitioned table, like check constraints, NOT NULL + * constraints must be added to the child tables. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && + !recurse && !recursing) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraint must be added to child tables too"))); +} + static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel, const char *colName, LOCKMODE lockmode) @@ -5419,7 +5808,8 @@ ATPrepSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE if (rel->rd_rel->relkind != RELKIND_RELATION && rel->rd_rel->relkind != RELKIND_MATVIEW && rel->rd_rel->relkind != RELKIND_INDEX && - rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, materialized view, index, or foreign table", @@ -5691,6 +6081,68 @@ ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing, cmd->subtype = AT_DropColumnRecurse; } +/* + * Checks if attnum is a partition attribute for rel + * + * Sets *used_in_expr if attnum is found to be referenced in some partition + * key expression. It's possible for a column to be both used directly and + * as part of an expression; if that happens, *used_in_expr may end up as + * either true or false. That's OK for current uses of this function, because + * *used_in_expr is only used to tailor the error message text. + */ +static bool +is_partition_attr(Relation rel, AttrNumber attnum, bool *used_in_expr) +{ + PartitionKey key; + int partnatts; + List *partexprs; + ListCell *partexprs_item; + int i; + + if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + return false; + + key = RelationGetPartitionKey(rel); + partnatts = get_partition_natts(key); + partexprs = get_partition_exprs(key); + + partexprs_item = list_head(partexprs); + for (i = 0; i < partnatts; i++) + { + AttrNumber partattno = get_partition_col_attnum(key, i); + + if (partattno != 0) + { + if (attnum == partattno) + { + if (used_in_expr) + *used_in_expr = false; + return true; + } + } + else + { + /* Arbitrary expression */ + Node *expr = (Node *) lfirst(partexprs_item); + Bitmapset *expr_attrs = NULL; + + /* Find all attributes referenced */ + pull_varattnos(expr, 1, &expr_attrs); + partexprs_item = lnext(partexprs_item); + + if (bms_is_member(attnum - FirstLowInvalidHeapAttributeNumber, + expr_attrs)) + { + if (used_in_expr) + *used_in_expr = true; + return true; + } + } + } + + return false; +} + /* * Return value is the address of the dropped column. */ @@ -5705,6 +6157,7 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName, AttrNumber attnum; List *children; ObjectAddress object; + bool is_expr; /* At top level, permission check was done in ATPrepCmd, else do it */ if (recursing) @@ -5749,6 +6202,19 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName, errmsg("cannot drop inherited column \"%s\"", colName))); + /* Don't drop columns used in the partition key */ + if (is_partition_attr(rel, attnum, &is_expr)) + { + if (!is_expr) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot drop column named in partition key"))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot drop column referenced in partition key expression"))); + } + ReleaseSysCache(tuple); /* @@ -5763,6 +6229,15 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName, Relation attr_rel; ListCell *child; + /* + * In case of a partitioned table, the column must be dropped from the + * partitions as well. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && !recurse) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("column must be dropped from child tables too"))); + attr_rel = heap_open(AttributeRelationId, RowExclusiveLock); foreach(child, children) { @@ -6267,6 +6742,12 @@ ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel, * Validity checks (permission checks wait till we have the column * numbers) */ + if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot reference partitioned table \"%s\"", + RelationGetRelationName(pkrel)))); + if (pkrel->rd_rel->relkind != RELKIND_RELATION) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -7776,6 +8257,16 @@ ATExecDropConstraint(Relation rel, const char *constrName, } } + /* + * In case of a partitioned table, the constraint must be dropped from + * the partitions too. There is no such thing as NO INHERIT constraints + * in case of partitioned tables. + */ + if (!recurse && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("constraint must be dropped from child tables too"))); + /* * Propagate to children as appropriate. Unlike most other ALTER * routines, we have to do this one level of recursion at a time; we can't @@ -7904,6 +8395,7 @@ ATPrepAlterColumnType(List **wqueue, NewColumnValue *newval; ParseState *pstate = make_parsestate(NULL); AclResult aclresult; + bool is_expr; if (rel->rd_rel->reloftype && !recursing) ereport(ERROR, @@ -7934,6 +8426,19 @@ ATPrepAlterColumnType(List **wqueue, errmsg("cannot alter inherited column \"%s\"", colName))); + /* Don't alter columns used in the partition key */ + if (is_partition_attr(rel, attnum, &is_expr)) + { + if (!is_expr) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot alter type of column named in partition key"))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("cannot alter type of column referenced in partition key expression"))); + } + /* Look up the target type */ typenameTypeIdAndMod(NULL, typeName, &targettype, &targettypmod); @@ -7949,7 +8454,8 @@ ATPrepAlterColumnType(List **wqueue, list_make1_oid(rel->rd_rel->reltype), false); - if (tab->relkind == RELKIND_RELATION) + if (tab->relkind == RELKIND_RELATION || + tab->relkind == RELKIND_PARTITIONED_TABLE) { /* * Set up an expression to transform the old data value to the new @@ -8979,6 +9485,7 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock case RELKIND_VIEW: case RELKIND_MATVIEW: case RELKIND_FOREIGN_TABLE: + case RELKIND_PARTITIONED_TABLE: /* ok to change owner */ break; case RELKIND_INDEX: @@ -9440,6 +9947,7 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, case RELKIND_RELATION: case RELKIND_TOASTVALUE: case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: (void) heap_reloptions(rel->rd_rel->relkind, newOptions, true); break; case RELKIND_VIEW: @@ -9860,7 +10368,8 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) /* Only move the object type requested */ if ((stmt->objtype == OBJECT_TABLE && - relForm->relkind != RELKIND_RELATION) || + relForm->relkind != RELKIND_RELATION && + relForm->relkind != RELKIND_PARTITIONED_TABLE) || (stmt->objtype == OBJECT_INDEX && relForm->relkind != RELKIND_INDEX) || (stmt->objtype == OBJECT_MATVIEW && @@ -10059,6 +10568,16 @@ ATPrepAddInherit(Relation child_rel) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot change inheritance of typed table"))); + + if (child_rel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change inheritance of a partition"))); + + if (child_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change inheritance of partitioned table"))); } /* @@ -10067,12 +10586,7 @@ ATPrepAddInherit(Relation child_rel) static ObjectAddress ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) { - Relation parent_rel, - catalogRelation; - SysScanDesc scan; - ScanKeyData key; - HeapTuple inheritsTuple; - int32 inhseqno; + Relation parent_rel; List *children; ObjectAddress address; @@ -10110,18 +10624,94 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot inherit to temporary relation of another session"))); + /* Prevent partitioned tables from becoming inheritance parents */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from partitioned table \"%s\"", + parent->relname))); + + /* Likewise for partitions */ + if (parent_rel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from a partition"))); + /* - * Check for duplicates in the list of parents, and determine the highest - * inhseqno already present; we'll use the next one for the new parent. - * (Note: get RowExclusiveLock because we will write pg_inherits below.) + * Prevent circularity by seeing if proposed parent inherits from child. + * (In particular, this disallows making a rel inherit from itself.) * - * Note: we do not reject the case where the child already inherits from - * the parent indirectly; CREATE TABLE doesn't reject comparable cases. + * This is not completely bulletproof because of race conditions: in + * multi-level inheritance trees, someone else could concurrently be + * making another inheritance link that closes the loop but does not join + * either of the rels we have locked. Preventing that seems to require + * exclusive locks on the entire inheritance tree, which is a cure worse + * than the disease. find_all_inheritors() will cope with circularity + * anyway, so don't sweat it too much. + * + * We use weakest lock we can on child's children, namely AccessShareLock. */ - catalogRelation = heap_open(InheritsRelationId, RowExclusiveLock); - ScanKeyInit(&key, - Anum_pg_inherits_inhrelid, - BTEqualStrategyNumber, F_OIDEQ, + children = find_all_inheritors(RelationGetRelid(child_rel), + AccessShareLock, NULL); + + if (list_member_oid(children, RelationGetRelid(parent_rel))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("circular inheritance not allowed"), + errdetail("\"%s\" is already a child of \"%s\".", + parent->relname, + RelationGetRelationName(child_rel)))); + + /* If parent has OIDs then child must have OIDs */ + if (parent_rel->rd_rel->relhasoids && !child_rel->rd_rel->relhasoids) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("table \"%s\" without OIDs cannot inherit from table \"%s\" with OIDs", + RelationGetRelationName(child_rel), + RelationGetRelationName(parent_rel)))); + + /* OK to create inheritance */ + CreateInheritance(child_rel, parent_rel); + + ObjectAddressSet(address, RelationRelationId, + RelationGetRelid(parent_rel)); + + /* keep our lock on the parent relation until commit */ + heap_close(parent_rel, NoLock); + + return address; +} + +/* + * CreateInheritance + * Catalog manipulation portion of creating inheritance between a child + * table and a parent table. + * + * Common to ATExecAddInherit() and ATExecAttachPartition(). + */ +static void +CreateInheritance(Relation child_rel, Relation parent_rel) +{ + Relation catalogRelation; + SysScanDesc scan; + ScanKeyData key; + HeapTuple inheritsTuple; + int32 inhseqno; + + /* Note: get RowExclusiveLock because we will write pg_inherits below. */ + catalogRelation = heap_open(InheritsRelationId, RowExclusiveLock); + + /* + * Check for duplicates in the list of parents, and determine the highest + * inhseqno already present; we'll use the next one for the new parent. + * Also, if proposed child is a partition, it cannot already be inheriting. + * + * Note: we do not reject the case where the child already inherits from + * the parent indirectly; CREATE TABLE doesn't reject comparable cases. + */ + ScanKeyInit(&key, + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationGetRelid(child_rel))); scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true, NULL, 1, &key); @@ -10137,44 +10727,12 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) (errcode(ERRCODE_DUPLICATE_TABLE), errmsg("relation \"%s\" would be inherited from more than once", RelationGetRelationName(parent_rel)))); + if (inh->inhseqno > inhseqno) inhseqno = inh->inhseqno; } systable_endscan(scan); - /* - * Prevent circularity by seeing if proposed parent inherits from child. - * (In particular, this disallows making a rel inherit from itself.) - * - * This is not completely bulletproof because of race conditions: in - * multi-level inheritance trees, someone else could concurrently be - * making another inheritance link that closes the loop but does not join - * either of the rels we have locked. Preventing that seems to require - * exclusive locks on the entire inheritance tree, which is a cure worse - * than the disease. find_all_inheritors() will cope with circularity - * anyway, so don't sweat it too much. - * - * We use weakest lock we can on child's children, namely AccessShareLock. - */ - children = find_all_inheritors(RelationGetRelid(child_rel), - AccessShareLock, NULL); - - if (list_member_oid(children, RelationGetRelid(parent_rel))) - ereport(ERROR, - (errcode(ERRCODE_DUPLICATE_TABLE), - errmsg("circular inheritance not allowed"), - errdetail("\"%s\" is already a child of \"%s\".", - parent->relname, - RelationGetRelationName(child_rel)))); - - /* If parent has OIDs then child must have OIDs */ - if (parent_rel->rd_rel->relhasoids && !child_rel->rd_rel->relhasoids) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("table \"%s\" without OIDs cannot inherit from table \"%s\" with OIDs", - RelationGetRelationName(child_rel), - RelationGetRelationName(parent_rel)))); - /* Match up the columns and bump attinhcount as needed */ MergeAttributesIntoExisting(child_rel, parent_rel); @@ -10189,16 +10747,8 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) inhseqno + 1, catalogRelation); - ObjectAddressSet(address, RelationRelationId, - RelationGetRelid(parent_rel)); - /* Now we're done with pg_inherits */ heap_close(catalogRelation, RowExclusiveLock); - - /* keep our lock on the parent relation until commit */ - heap_close(parent_rel, NoLock); - - return address; } /* @@ -10249,7 +10799,7 @@ constraints_equivalent(HeapTuple a, HeapTuple b, TupleDesc tupleDesc) * Check columns in child table match up with columns in parent, and increment * their attinhcount. * - * Called by ATExecAddInherit + * Called by CreateInheritance * * Currently all parent columns must be found in child. Missing columns are an * error. One day we might consider creating new columns like CREATE TABLE @@ -10267,12 +10817,17 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel) int parent_natts; TupleDesc tupleDesc; HeapTuple tuple; + bool child_is_partition = false; attrrel = heap_open(AttributeRelationId, RowExclusiveLock); tupleDesc = RelationGetDescr(parent_rel); parent_natts = tupleDesc->natts; + /* If parent_rel is a partitioned table, child_rel must be a partition */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + child_is_partition = true; + for (parent_attno = 1; parent_attno <= parent_natts; parent_attno++) { Form_pg_attribute attribute = tupleDesc->attrs[parent_attno - 1]; @@ -10320,6 +10875,18 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel) * later on, this change will just roll back.) */ childatt->attinhcount++; + + /* + * In case of partitions, we must enforce that value of attislocal + * is same in all partitions. (Note: there are only inherited + * attributes in partitions) + */ + if (child_is_partition) + { + Assert(childatt->attinhcount == 1); + childatt->attislocal = false; + } + simple_heap_update(attrrel, &tuple->t_self, tuple); CatalogUpdateIndexes(attrrel, tuple); heap_freetuple(tuple); @@ -10342,7 +10909,7 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel) * * Constraints that are marked ONLY in the parent are ignored. * - * Called by ATExecAddInherit + * Called by CreateInheritance * * Currently all constraints in parent must be present in the child. One day we * may consider adding new constraints like CREATE TABLE does. @@ -10361,10 +10928,15 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) SysScanDesc parent_scan; ScanKeyData parent_key; HeapTuple parent_tuple; + bool child_is_partition = false; catalog_relation = heap_open(ConstraintRelationId, RowExclusiveLock); tuple_desc = RelationGetDescr(catalog_relation); + /* If parent_rel is a partitioned table, child_rel must be a partition */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + child_is_partition = true; + /* Outer loop scans through the parent's constraint definitions */ ScanKeyInit(&parent_key, Anum_pg_constraint_conrelid, @@ -10441,6 +11013,18 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) child_copy = heap_copytuple(child_tuple); child_con = (Form_pg_constraint) GETSTRUCT(child_copy); child_con->coninhcount++; + + /* + * In case of partitions, an inherited constraint must be + * inherited only once since it cannot have multiple parents and + * it is never considered local. + */ + if (child_is_partition) + { + Assert(child_con->coninhcount == 1); + child_con->conislocal = false; + } + simple_heap_update(catalog_relation, &child_copy->t_self, child_copy); CatalogUpdateIndexes(catalog_relation, child_copy); heap_freetuple(child_copy); @@ -10465,6 +11049,46 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) /* * ALTER TABLE NO INHERIT * + * Return value is the address of the relation that is no longer parent. + */ +static ObjectAddress +ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) +{ + ObjectAddress address; + Relation parent_rel; + + if (rel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot change inheritance of a partition"))); + + /* + * AccessShareLock on the parent is probably enough, seeing that DROP + * TABLE doesn't lock parent tables at all. We need some lock since we'll + * be inspecting the parent's schema. + */ + parent_rel = heap_openrv(parent, AccessShareLock); + + /* + * We don't bother to check ownership of the parent table --- ownership of + * the child is presumed enough rights. + */ + + /* Off to RemoveInheritance() where most of the work happens */ + RemoveInheritance(rel, parent_rel); + + /* keep our lock on the parent relation until commit */ + heap_close(parent_rel, NoLock); + + ObjectAddressSet(address, RelationRelationId, + RelationGetRelid(parent_rel)); + + return address; +} + +/* + * RemoveInheritance + * * Drop a parent from the child's parents. This just adjusts the attinhcount * and attislocal of the columns and removes the pg_inherit and pg_depend * entries. @@ -10478,13 +11102,11 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) * coninhcount and conislocal for inherited constraints are adjusted in * exactly the same way. * - * Return value is the address of the relation that is no longer parent. + * Common to ATExecDropInherit() and ATExecDetachPartition(). */ -static ObjectAddress -ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) +static void +RemoveInheritance(Relation child_rel, Relation parent_rel) { - Relation parent_rel; - Oid parent_oid; Relation catalogRelation; SysScanDesc scan; ScanKeyData key[3]; @@ -10493,19 +11115,11 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) constraintTuple; List *connames; bool found = false; - ObjectAddress address; + bool child_is_partition = false; - /* - * AccessShareLock on the parent is probably enough, seeing that DROP - * TABLE doesn't lock parent tables at all. We need some lock since we'll - * be inspecting the parent's schema. - */ - parent_rel = heap_openrv(parent, AccessShareLock); - - /* - * We don't bother to check ownership of the parent table --- ownership of - * the child is presumed enough rights. - */ + /* If parent_rel is a partitioned table, child_rel must be a partition */ + if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + child_is_partition = true; /* * Find and destroy the pg_inherits entry linking the two, or error out if @@ -10515,7 +11129,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) ScanKeyInit(&key[0], Anum_pg_inherits_inhrelid, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(RelationGetRelid(rel))); + ObjectIdGetDatum(RelationGetRelid(child_rel))); scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true, NULL, 1, key); @@ -10536,11 +11150,20 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) heap_close(catalogRelation, RowExclusiveLock); if (!found) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("relation \"%s\" is not a parent of relation \"%s\"", - RelationGetRelationName(parent_rel), - RelationGetRelationName(rel)))); + { + if (child_is_partition) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("relation \"%s\" is not a partition of relation \"%s\"", + RelationGetRelationName(child_rel), + RelationGetRelationName(parent_rel)))); + else + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("relation \"%s\" is not a parent of relation \"%s\"", + RelationGetRelationName(parent_rel), + RelationGetRelationName(child_rel)))); + } /* * Search through child columns looking for ones matching parent rel @@ -10549,7 +11172,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) ScanKeyInit(&key[0], Anum_pg_attribute_attrelid, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(RelationGetRelid(rel))); + ObjectIdGetDatum(RelationGetRelid(child_rel))); scan = systable_beginscan(catalogRelation, AttributeRelidNumIndexId, true, NULL, 1, key); while (HeapTupleIsValid(attributeTuple = systable_getnext(scan))) @@ -10611,7 +11234,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) ScanKeyInit(&key[0], Anum_pg_constraint_conrelid, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(RelationGetRelid(rel))); + ObjectIdGetDatum(RelationGetRelid(child_rel))); scan = systable_beginscan(catalogRelation, ConstraintRelidIndexId, true, NULL, 1, key); @@ -10642,7 +11265,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) if (copy_con->coninhcount <= 0) /* shouldn't happen */ elog(ERROR, "relation %u has non-inherited constraint \"%s\"", - RelationGetRelid(rel), NameStr(copy_con->conname)); + RelationGetRelid(child_rel), NameStr(copy_con->conname)); copy_con->coninhcount--; if (copy_con->coninhcount == 0) @@ -10654,30 +11277,20 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) } } - parent_oid = RelationGetRelid(parent_rel); - systable_endscan(scan); heap_close(catalogRelation, RowExclusiveLock); - drop_parent_dependency(RelationGetRelid(rel), + drop_parent_dependency(RelationGetRelid(child_rel), RelationRelationId, RelationGetRelid(parent_rel)); - /* * Post alter hook of this inherits. Since object_access_hook doesn't take * multiple object identifiers, we relay oid of parent relation using * auxiliary_id argument. */ InvokeObjectPostAlterHookArg(InheritsRelationId, - RelationGetRelid(rel), 0, + RelationGetRelid(child_rel), 0, RelationGetRelid(parent_rel), false); - - /* keep our lock on the parent relation until commit */ - heap_close(parent_rel, NoLock); - - ObjectAddressSet(address, RelationRelationId, parent_oid); - - return address; } /* @@ -11499,7 +12112,8 @@ AlterTableNamespaceInternal(Relation rel, Oid oldNspOid, Oid nspOid, /* Fix other dependent stuff */ if (rel->rd_rel->relkind == RELKIND_RELATION || - rel->rd_rel->relkind == RELKIND_MATVIEW) + rel->rd_rel->relkind == RELKIND_MATVIEW || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { AlterIndexNamespaces(classRel, rel, oldNspOid, nspOid, objsMoved); AlterSeqNamespaces(classRel, rel, oldNspOid, nspOid, @@ -11948,7 +12562,7 @@ RangeVarCallbackOwnsTable(const RangeVar *relation, if (!relkind) return; if (relkind != RELKIND_RELATION && relkind != RELKIND_TOASTVALUE && - relkind != RELKIND_MATVIEW) + relkind != RELKIND_MATVIEW && relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table or materialized view", relation->relname))); @@ -12105,7 +12719,8 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, relkind != RELKIND_VIEW && relkind != RELKIND_MATVIEW && relkind != RELKIND_SEQUENCE && - relkind != RELKIND_FOREIGN_TABLE) + relkind != RELKIND_FOREIGN_TABLE && + relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, materialized view, sequence, or foreign table", @@ -12113,3 +12728,701 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, ReleaseSysCache(tuple); } + +/* + * Transform any expressions present in the partition key + */ +static PartitionSpec * +transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy) +{ + PartitionSpec *newspec; + ParseState *pstate; + RangeTblEntry *rte; + ListCell *l; + + newspec = (PartitionSpec *) makeNode(PartitionSpec); + + newspec->strategy = partspec->strategy; + newspec->location = partspec->location; + newspec->partParams = NIL; + + /* Parse partitioning strategy name */ + if (!pg_strcasecmp(partspec->strategy, "list")) + *strategy = PARTITION_STRATEGY_LIST; + else if (!pg_strcasecmp(partspec->strategy, "range")) + *strategy = PARTITION_STRATEGY_RANGE; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized partitioning strategy \"%s\"", + partspec->strategy))); + + /* + * Create a dummy ParseState and insert the target relation as its sole + * rangetable entry. We need a ParseState for transformExpr. + */ + pstate = make_parsestate(NULL); + rte = addRangeTableEntryForRelation(pstate, rel, NULL, false, true); + addRTEtoQuery(pstate, rte, true, true, true); + + /* take care of any partition expressions */ + foreach(l, partspec->partParams) + { + ListCell *lc; + PartitionElem *pelem = (PartitionElem *) lfirst(l); + + /* Check for PARTITION BY ... (foo, foo) */ + foreach(lc, newspec->partParams) + { + PartitionElem *pparam = (PartitionElem *) lfirst(lc); + + if (pelem->name && pparam->name && + !strcmp(pelem->name, pparam->name)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" appears more than once in partition key", + pelem->name), + parser_errposition(pstate, pelem->location))); + } + + if (pelem->expr) + { + /* Now do parse transformation of the expression */ + pelem->expr = transformExpr(pstate, pelem->expr, + EXPR_KIND_PARTITION_EXPRESSION); + + /* we have to fix its collations too */ + assign_expr_collations(pstate, pelem->expr); + } + + newspec->partParams = lappend(newspec->partParams, pelem); + } + + return newspec; +} + +/* + * Compute per-partition-column information from a list of PartitionElem's + */ +static void +ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs, + List **partexprs, Oid *partopclass, Oid *partcollation) +{ + int attn; + ListCell *lc; + + attn = 0; + foreach(lc, partParams) + { + PartitionElem *pelem = (PartitionElem *) lfirst(lc); + Oid atttype; + Oid attcollation; + + if (pelem->name != NULL) + { + /* Simple attribute reference */ + HeapTuple atttuple; + Form_pg_attribute attform; + + atttuple = SearchSysCacheAttName(RelationGetRelid(rel), pelem->name); + if (!HeapTupleIsValid(atttuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" named in partition key does not exist", + pelem->name))); + attform = (Form_pg_attribute) GETSTRUCT(atttuple); + + if (attform->attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("cannot use system column \"%s\" in partition key", + pelem->name))); + + partattrs[attn] = attform->attnum; + atttype = attform->atttypid; + attcollation = attform->attcollation; + ReleaseSysCache(atttuple); + + /* Note that whole-row references can't happen here; see below */ + } + else + { + /* Expression */ + Node *expr = pelem->expr; + + Assert(expr != NULL); + atttype = exprType(expr); + attcollation = exprCollation(expr); + + /* + * Strip any top-level COLLATE clause. This ensures that we treat + * "x COLLATE y" and "(x COLLATE y)" alike. + */ + while (IsA(expr, CollateExpr)) + expr = (Node *) ((CollateExpr *) expr)->arg; + + if (IsA(expr, Var) && + ((Var *) expr)->varattno != InvalidAttrNumber) + { + /* + * User wrote "(column)" or "(column COLLATE something)". + * Treat it like simple attribute anyway. + */ + partattrs[attn] = ((Var *) expr)->varattno; + } + else + { + Bitmapset *expr_attrs = NULL; + + partattrs[attn] = 0; /* marks the column as expression */ + *partexprs = lappend(*partexprs, expr); + + /* + * Note that expression_planner does not change the passed in + * expression destructively and we have already saved the + * expression to be stored into the catalog above. + */ + expr = (Node *) expression_planner((Expr *) expr); + + /* + * Partition expression cannot contain mutable functions, + * because a given row must always map to the same partition + * as long as there is no change in the partition boundary + * structure. + */ + if (contain_mutable_functions(expr)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("functions in partition key expression must be marked IMMUTABLE"))); + + /* + * While it is not exactly *wrong* for an expression to be + * a constant value, it seems better to prevent such input. + */ + if (IsA(expr, Const)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot use constant expression as partition key"))); + + /* + * transformPartitionSpec() should have already rejected subqueries, + * aggregates, window functions, and SRFs, based on the EXPR_KIND_ + * for partition expressions. + */ + + /* Cannot have expressions containing whole-row references */ + pull_varattnos(expr, 1, &expr_attrs); + if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, + expr_attrs)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("partition key expressions cannot contain whole-row references"))); + } + } + + /* + * Apply collation override if any + */ + if (pelem->collation) + attcollation = get_collation_oid(pelem->collation, false); + + /* + * Check we have a collation iff it's a collatable type. The only + * expected failures here are (1) COLLATE applied to a noncollatable + * type, or (2) partition expression had an unresolved collation. + * But we might as well code this to be a complete consistency check. + */ + if (type_is_collatable(atttype)) + { + if (!OidIsValid(attcollation)) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for partition expression"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + else + { + if (OidIsValid(attcollation)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("collations are not supported by type %s", + format_type_be(atttype)))); + } + + partcollation[attn] = attcollation; + + /* + * Identify a btree opclass to use. Currently, we use only btree + * operators, which seems enough for list and range partitioning. + */ + if (!pelem->opclass) + { + partopclass[attn] = GetDefaultOpClass(atttype, BTREE_AM_OID); + + if (!OidIsValid(partopclass[attn])) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("data type %s has no default btree operator class", + format_type_be(atttype)), + errhint("You must specify a btree operator class or define a default btree operator class for the data type."))); + } + else + partopclass[attn] = ResolveOpClass(pelem->opclass, + atttype, + "btree", + BTREE_AM_OID); + + attn++; + } +} + +/* + * ALTER TABLE ATTACH PARTITION FOR VALUES + * + * Return the address of the newly attached partition. + */ +static ObjectAddress +ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd) +{ + PartitionKey key = RelationGetPartitionKey(rel); + Relation attachRel, + catalog; + List *childrels; + TupleConstr *attachRel_constr; + List *partConstraint, + *existConstraint; + SysScanDesc scan; + ScanKeyData skey; + HeapTuple tuple; + AttrNumber attno; + int natts; + TupleDesc tupleDesc; + bool skip_validate = false; + ObjectAddress address; + + attachRel = heap_openrv(cmd->name, AccessExclusiveLock); + + /* + * Must be owner of both parent and source table -- parent was checked by + * ATSimplePermissions call in ATPrepCmd + */ + ATSimplePermissions(attachRel, ATT_TABLE | ATT_FOREIGN_TABLE); + + /* A partition can only have one parent */ + if (attachRel->rd_rel->relispartition) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is already a partition", + RelationGetRelationName(attachRel)))); + + if (attachRel->rd_rel->reloftype) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach a typed table as partition"))); + + /* + * Table being attached should not already be part of inheritance; either + * as a child table... + */ + catalog = heap_open(InheritsRelationId, AccessShareLock); + ScanKeyInit(&skey, + Anum_pg_inherits_inhrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(attachRel))); + scan = systable_beginscan(catalog, InheritsRelidSeqnoIndexId, true, + NULL, 1, &skey); + if (HeapTupleIsValid(systable_getnext(scan))) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach inheritance child as partition"))); + systable_endscan(scan); + + /* ...or as a parent table (except the case when it is partitioned) */ + ScanKeyInit(&skey, + Anum_pg_inherits_inhparent, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(attachRel))); + scan = systable_beginscan(catalog, InheritsParentIndexId, true, NULL, + 1, &skey); + if (HeapTupleIsValid(systable_getnext(scan)) && + attachRel->rd_rel->relkind == RELKIND_RELATION) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach inheritance parent as partition"))); + systable_endscan(scan); + heap_close(catalog, AccessShareLock); + + /* + * Prevent circularity by seeing if rel is a partition of attachRel. + * (In particular, this disallows making a rel a partition of itself.) + */ + childrels = find_all_inheritors(RelationGetRelid(attachRel), + AccessShareLock, NULL); + if (list_member_oid(childrels, RelationGetRelid(rel))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("circular inheritance not allowed"), + errdetail("\"%s\" is already a child of \"%s\".", + RelationGetRelationName(rel), + RelationGetRelationName(attachRel)))); + + /* Temp parent cannot have a partition that is itself not a temp */ + if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + attachRel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach a permanent relation as partition of temporary relation \"%s\"", + RelationGetRelationName(rel)))); + + /* If the parent is temp, it must belong to this session */ + if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + !rel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach as partition of temporary relation of another session"))); + + /* Ditto for the partition */ + if (attachRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + !attachRel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach temporary relation of another session as partition"))); + + /* If parent has OIDs then child must have OIDs */ + if (rel->rd_rel->relhasoids && !attachRel->rd_rel->relhasoids) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach table \"%s\" without OIDs as partition of" + " table \"%s\" with OIDs", RelationGetRelationName(attachRel), + RelationGetRelationName(rel)))); + + /* OTOH, if parent doesn't have them, do not allow in attachRel either */ + if (attachRel->rd_rel->relhasoids && !rel->rd_rel->relhasoids) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot attach table \"%s\" with OIDs as partition of table" + " \"%s\" without OIDs", RelationGetRelationName(attachRel), + RelationGetRelationName(rel)))); + + /* Check if there are any columns in attachRel that aren't in the parent */ + tupleDesc = RelationGetDescr(attachRel); + natts = tupleDesc->natts; + for (attno = 1; attno <= natts; attno++) + { + Form_pg_attribute attribute = tupleDesc->attrs[attno - 1]; + char *attributeName = NameStr(attribute->attname); + + /* Ignore dropped */ + if (attribute->attisdropped) + continue; + + /* Find same column in parent (matching on column name). */ + tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), attributeName); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("table \"%s\" contains column \"%s\" not found in parent \"%s\"", + RelationGetRelationName(attachRel), attributeName, + RelationGetRelationName(rel)), + errdetail("New partition should contain only the columns present in parent."))); + } + + /* OK to create inheritance. Rest of the checks performed there */ + CreateInheritance(attachRel, rel); + + /* + * Check that the new partition's bound is valid and does not overlap any + * of existing partitions of the parent - note that it does not return + * on error. + */ + check_new_partition_bound(RelationGetRelationName(attachRel), rel, + cmd->bound); + + /* Update the pg_class entry. */ + StorePartitionBound(attachRel, cmd->bound); + + /* + * Generate partition constraint from the partition bound specification. + * If the parent itself is a partition, make sure to include its + * constraint as well. + */ + partConstraint = list_concat(get_qual_from_partbound(attachRel, rel, + cmd->bound), + RelationGetPartitionQual(rel, true)); + partConstraint = (List *) eval_const_expressions(NULL, + (Node *) partConstraint); + partConstraint = (List *) canonicalize_qual((Expr *) partConstraint); + partConstraint = list_make1(make_ands_explicit(partConstraint)); + + /* + * Check if we can do away with having to scan the table being attached + * to validate the partition constraint, by *proving* that the existing + * constraints of the table *imply* the partition predicate. We include + * the table's check constraints and NOT NULL constraints in the list of + * clauses passed to predicate_implied_by(). + * + * There is a case in which we cannot rely on just the result of the + * proof. + */ + tupleDesc = RelationGetDescr(attachRel); + attachRel_constr = tupleDesc->constr; + existConstraint = NIL; + if (attachRel_constr > 0) + { + int num_check = attachRel_constr->num_check; + int i; + Bitmapset *not_null_attrs = NULL; + List *part_constr; + ListCell *lc; + bool partition_accepts_null = true; + int partnatts; + + if (attachRel_constr->has_not_null) + { + int natts = attachRel->rd_att->natts; + + for (i = 1; i <= natts; i++) + { + Form_pg_attribute att = attachRel->rd_att->attrs[i - 1]; + + if (att->attnotnull && !att->attisdropped) + { + NullTest *ntest = makeNode(NullTest); + + ntest->arg = (Expr *) makeVar(1, + i, + att->atttypid, + att->atttypmod, + att->attcollation, + 0); + ntest->nulltesttype = IS_NOT_NULL; + + /* + * argisrow=false is correct even for a composite column, + * because attnotnull does not represent a SQL-spec IS NOT + * NULL test in such a case, just IS DISTINCT FROM NULL. + */ + ntest->argisrow = false; + ntest->location = -1; + existConstraint = lappend(existConstraint, ntest); + not_null_attrs = bms_add_member(not_null_attrs, i); + } + } + } + + for (i = 0; i < num_check; i++) + { + Node *cexpr; + + /* + * If this constraint hasn't been fully validated yet, we must + * ignore it here. + */ + if (!attachRel_constr->check[i].ccvalid) + continue; + + cexpr = stringToNode(attachRel_constr->check[i].ccbin); + + /* + * Run each expression through const-simplification and + * canonicalization. It is necessary, because we will be + * comparing it to similarly-processed qual clauses, and may fail + * to detect valid matches without this. + */ + cexpr = eval_const_expressions(NULL, cexpr); + cexpr = (Node *) canonicalize_qual((Expr *) cexpr); + + existConstraint = list_concat(existConstraint, + make_ands_implicit((Expr *) cexpr)); + } + + existConstraint = list_make1(make_ands_explicit(existConstraint)); + + /* And away we go ... */ + if (predicate_implied_by(partConstraint, existConstraint)) + skip_validate = true; + + /* + * We choose to err on the safer side, ie, give up on skipping the + * the validation scan, if the partition key column doesn't have + * the NOT NULL constraint and the table is to become a list partition + * that does not accept nulls. In this case, the partition predicate + * (partConstraint) does include an 'key IS NOT NULL' expression, + * however, because of the way predicate_implied_by_simple_clause() + * is designed to handle IS NOT NULL predicates in the absence of a + * IS NOT NULL clause, we cannot rely on just the above proof. + * + * That is not an issue in case of a range partition, because if there + * were no NOT NULL constraint defined on the key columns, an error + * would be thrown before we get here anyway. That is not true, + * however, if any of the partition keys is an expression, which is + * handled below. + */ + part_constr = linitial(partConstraint); + part_constr = make_ands_implicit((Expr *) part_constr); + + /* + * part_constr contains an IS NOT NULL expression, if this is a list + * partition that does not accept nulls (in fact, also if this is a + * range partition and some partition key is an expression, but we + * never skip validation in that case anyway; see below) + */ + foreach(lc, part_constr) + { + Node *expr = lfirst(lc); + + if (IsA(expr, NullTest) && + ((NullTest *) expr)->nulltesttype == IS_NOT_NULL) + { + partition_accepts_null = false; + break; + } + } + + partnatts = get_partition_natts(key); + for (i = 0; i < partnatts; i++) + { + AttrNumber partattno; + + partattno = get_partition_col_attnum(key, i); + + /* If partition key is an expression, must not skip validation */ + if (!partition_accepts_null && + (partattno == 0 || + !bms_is_member(partattno, not_null_attrs))) + skip_validate = false; + } + } + + if (skip_validate) + elog(NOTICE, "skipping scan to validate partition constraint"); + + /* + * Set up to have the table to be scanned to validate the partition + * constraint (see partConstraint above). If it's a partitioned table, + * we instead schdule its leaf partitions to be scanned instead. + */ + if (!skip_validate) + { + List *all_parts; + ListCell *lc; + + /* Take an exclusive lock on the partitions to be checked */ + if (attachRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + all_parts = find_all_inheritors(RelationGetRelid(attachRel), + AccessExclusiveLock, NULL); + else + all_parts = list_make1_oid(RelationGetRelid(attachRel)); + + foreach(lc, all_parts) + { + AlteredTableInfo *tab; + Oid part_relid = lfirst_oid(lc); + Relation part_rel; + Expr *constr; + + /* Lock already taken */ + if (part_relid != RelationGetRelid(attachRel)) + part_rel = heap_open(part_relid, NoLock); + else + part_rel = attachRel; + + /* + * Skip if it's a partitioned table. Only RELKIND_RELATION + * relations (ie, leaf partitions) need to be scanned. + */ + if (part_rel != attachRel && + part_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + heap_close(part_rel, NoLock); + continue; + } + + /* Grab a work queue entry */ + tab = ATGetQueueEntry(wqueue, part_rel); + + constr = linitial(partConstraint); + tab->partition_constraint = make_ands_implicit((Expr *) constr); + + /* keep our lock until commit */ + if (part_rel != attachRel) + heap_close(part_rel, NoLock); + } + } + + /* + * Invalidate the relcache so that the new partition is now included + * in rel's partition descriptor. + */ + CacheInvalidateRelcache(rel); + + ObjectAddressSet(address, RelationRelationId, RelationGetRelid(attachRel)); + + /* keep our lock until commit */ + heap_close(attachRel, NoLock); + + return address; +} + +/* + * ALTER TABLE DETACH PARTITION + * + * Return the address of the relation that is no longer a partition of rel. + */ +static ObjectAddress +ATExecDetachPartition(Relation rel, RangeVar *name) +{ + Relation partRel, + classRel; + HeapTuple tuple, + newtuple; + Datum new_val[Natts_pg_class]; + bool isnull, + new_null[Natts_pg_class], + new_repl[Natts_pg_class]; + ObjectAddress address; + + partRel = heap_openrv(name, AccessShareLock); + + /* All inheritance related checks are performed within the function */ + RemoveInheritance(partRel, rel); + + /* Update pg_class tuple */ + classRel = heap_open(RelationRelationId, RowExclusiveLock); + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(partRel))); + Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition); + + (void) SysCacheGetAttr(RELOID, tuple, Anum_pg_class_relpartbound, + &isnull); + Assert(!isnull); + + /* Clear relpartbound and reset relispartition */ + memset(new_val, 0, sizeof(new_val)); + memset(new_null, false, sizeof(new_null)); + memset(new_repl, false, sizeof(new_repl)); + new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0; + new_null[Anum_pg_class_relpartbound - 1] = true; + new_repl[Anum_pg_class_relpartbound - 1] = true; + newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel), + new_val, new_null, new_repl); + + ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false; + simple_heap_update(classRel, &newtuple->t_self, newtuple); + CatalogUpdateIndexes(classRel, newtuple); + heap_freetuple(newtuple); + heap_close(classRel, RowExclusiveLock); + + /* + * Invalidate the relcache so that the partition is no longer included + * in our partition descriptor. + */ + CacheInvalidateRelcache(rel); + + ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel)); + + /* keep our lock until commit */ + heap_close(partRel, NoLock); + + return address; +} diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 1c264b7736..02e9693f28 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -176,7 +176,8 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString, * Triggers must be on tables or views, and there are additional * relation-type-specific restrictions. */ - if (rel->rd_rel->relkind == RELKIND_RELATION) + if (rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { /* Tables can't have INSTEAD OF triggers */ if (stmt->timing != TRIGGER_TYPE_BEFORE && @@ -186,6 +187,13 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString, errmsg("\"%s\" is a table", RelationGetRelationName(rel)), errdetail("Tables cannot have INSTEAD OF triggers."))); + /* Disallow ROW triggers on partitioned tables */ + if (stmt->row && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is a partitioned table", + RelationGetRelationName(rel)), + errdetail("Partitioned tables cannot have ROW triggers."))); } else if (rel->rd_rel->relkind == RELKIND_VIEW) { @@ -1211,7 +1219,8 @@ RemoveTriggerById(Oid trigOid) if (rel->rd_rel->relkind != RELKIND_RELATION && rel->rd_rel->relkind != RELKIND_VIEW && - rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, or foreign table", @@ -1316,7 +1325,8 @@ RangeVarCallbackForRenameTrigger(const RangeVar *rv, Oid relid, Oid oldrelid, /* only tables and views can have triggers */ if (form->relkind != RELKIND_RELATION && form->relkind != RELKIND_VIEW && - form->relkind != RELKIND_FOREIGN_TABLE) + form->relkind != RELKIND_FOREIGN_TABLE && + form->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, or foreign table", diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 056933a584..5e3989acd2 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -2107,7 +2107,8 @@ DefineCompositeType(RangeVar *typevar, List *coldeflist) /* * Finally create the relation. This also creates the type. */ - DefineRelation(createStmt, RELKIND_COMPOSITE_TYPE, InvalidOid, &address); + DefineRelation(createStmt, RELKIND_COMPOSITE_TYPE, InvalidOid, &address, + NULL); return address; } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 58bbf5548b..b1be2f7ad5 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1314,7 +1314,8 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params) */ if (onerel->rd_rel->relkind != RELKIND_RELATION && onerel->rd_rel->relkind != RELKIND_MATVIEW && - onerel->rd_rel->relkind != RELKIND_TOASTVALUE) + onerel->rd_rel->relkind != RELKIND_TOASTVALUE && + onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) { ereport(WARNING, (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables", diff --git a/src/backend/commands/view.c b/src/backend/commands/view.c index 325a81096f..c6b0e4f2b3 100644 --- a/src/backend/commands/view.c +++ b/src/backend/commands/view.c @@ -228,7 +228,8 @@ DefineVirtualRelation(RangeVar *relation, List *tlist, bool replace, * existing view, so we don't need more code to complain if "replace" * is false). */ - address = DefineRelation(createStmt, RELKIND_VIEW, InvalidOid, NULL); + address = DefineRelation(createStmt, RELKIND_VIEW, InvalidOid, NULL, + NULL); Assert(address.objectId != InvalidOid); return address; } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 71c07288a1..0f47c7e010 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -42,6 +42,7 @@ #include "access/transam.h" #include "access/xact.h" #include "catalog/namespace.h" +#include "catalog/partition.h" #include "commands/matview.h" #include "commands/trigger.h" #include "executor/execdebug.h" @@ -825,6 +826,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) InitResultRelInfo(resultRelInfo, resultRelation, resultRelationIndex, + true, estate->es_instrument); resultRelInfo++; } @@ -1019,6 +1021,7 @@ CheckValidResultRel(Relation resultRel, CmdType operation) switch (resultRel->rd_rel->relkind) { case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: /* OK */ break; case RELKIND_SEQUENCE: @@ -1152,6 +1155,7 @@ CheckValidRowMarkRel(Relation rel, RowMarkType markType) switch (rel->rd_rel->relkind) { case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: /* OK */ break; case RELKIND_SEQUENCE: @@ -1212,6 +1216,7 @@ void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, + bool load_partition_check, int instrument_options) { MemSet(resultRelInfo, 0, sizeof(ResultRelInfo)); @@ -1249,6 +1254,10 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, resultRelInfo->ri_ConstraintExprs = NULL; resultRelInfo->ri_junkFilter = NULL; resultRelInfo->ri_projectReturning = NULL; + if (load_partition_check) + resultRelInfo->ri_PartitionCheck = + RelationGetPartitionQual(resultRelationDesc, + true); } /* @@ -1311,6 +1320,7 @@ ExecGetTriggerResultRel(EState *estate, Oid relid) InitResultRelInfo(rInfo, rel, 0, /* dummy rangetable index */ + true, estate->es_instrument); estate->es_trig_target_relations = lappend(estate->es_trig_target_relations, rInfo); @@ -1691,6 +1701,46 @@ ExecRelCheck(ResultRelInfo *resultRelInfo, return NULL; } +/* + * ExecPartitionCheck --- check that tuple meets the partition constraint. + * + * Note: This is called *iff* resultRelInfo is the main target table. + */ +static bool +ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, + EState *estate) +{ + ExprContext *econtext; + + /* + * If first time through, build expression state tree for the partition + * check expression. Keep it in the per-query memory context so they'll + * survive throughout the query. + */ + if (resultRelInfo->ri_PartitionCheckExpr == NULL) + { + List *qual = resultRelInfo->ri_PartitionCheck; + + resultRelInfo->ri_PartitionCheckExpr = (List *) + ExecPrepareExpr((Expr *) qual, estate); + } + + /* + * We will use the EState's per-tuple context for evaluating constraint + * expressions (creating it if it's not already there). + */ + econtext = GetPerTupleExprContext(estate); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* + * As in case of the catalogued constraints, we treat a NULL result as + * success here, not a failure. + */ + return ExecQual(resultRelInfo->ri_PartitionCheckExpr, econtext, true); +} + void ExecConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate) @@ -1702,9 +1752,9 @@ ExecConstraints(ResultRelInfo *resultRelInfo, Bitmapset *insertedCols; Bitmapset *updatedCols; - Assert(constr); + Assert(constr || resultRelInfo->ri_PartitionCheck); - if (constr->has_not_null) + if (constr && constr->has_not_null) { int natts = tupdesc->natts; int attrChk; @@ -1735,7 +1785,7 @@ ExecConstraints(ResultRelInfo *resultRelInfo, } } - if (constr->num_check > 0) + if (constr && constr->num_check > 0) { const char *failed; @@ -1759,6 +1809,26 @@ ExecConstraints(ResultRelInfo *resultRelInfo, errtableconstraint(rel, failed))); } } + + if (resultRelInfo->ri_PartitionCheck && + !ExecPartitionCheck(resultRelInfo, slot, estate)) + { + char *val_desc; + + insertedCols = GetInsertedColumns(resultRelInfo, estate); + updatedCols = GetUpdatedColumns(resultRelInfo, estate); + modifiedCols = bms_union(insertedCols, updatedCols); + val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel), + slot, + tupdesc, + modifiedCols, + 64); + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("new row for relation \"%s\" violates partition constraint", + RelationGetRelationName(rel)), + val_desc ? errdetail("Failing row contains %s.", val_desc) : 0)); + } } /* @@ -2926,3 +2996,52 @@ EvalPlanQualEnd(EPQState *epqstate) epqstate->planstate = NULL; epqstate->origslot = NULL; } + +/* + * ExecFindPartition -- Find a leaf partition in the partition tree rooted + * at parent, for the heap tuple contained in *slot + * + * estate must be non-NULL; we'll need it to compute any expressions in the + * partition key(s) + * + * If no leaf partition is found, this routine errors out with the appropriate + * error message, else it returns the leaf partition sequence number returned + * by get_partition_for_tuple() unchanged. + */ +int +ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, + TupleTableSlot *slot, EState *estate) +{ + int result; + Oid failed_at; + ExprContext *econtext = GetPerTupleExprContext(estate); + + econtext->ecxt_scantuple = slot; + result = get_partition_for_tuple(pd, slot, estate, &failed_at); + if (result < 0) + { + Relation rel = resultRelInfo->ri_RelationDesc; + char *val_desc; + Bitmapset *insertedCols, + *updatedCols, + *modifiedCols; + TupleDesc tupDesc = RelationGetDescr(rel); + + insertedCols = GetInsertedColumns(resultRelInfo, estate); + updatedCols = GetUpdatedColumns(resultRelInfo, estate); + modifiedCols = bms_union(insertedCols, updatedCols); + val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel), + slot, + tupDesc, + modifiedCols, + 64); + Assert(OidIsValid(failed_at)); + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("no partition of relation \"%s\" found for row", + get_rel_name(failed_at)), + val_desc ? errdetail("Failing row contains %s.", val_desc) : 0)); + } + + return result; +} diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index efb0c5e8e5..c0b58d1841 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -258,6 +258,7 @@ ExecInsert(ModifyTableState *mtstate, { HeapTuple tuple; ResultRelInfo *resultRelInfo; + ResultRelInfo *saved_resultRelInfo = NULL; Relation resultRelationDesc; Oid newId; List *recheckIndexes = NIL; @@ -272,6 +273,56 @@ ExecInsert(ModifyTableState *mtstate, * get information on the (current) result relation */ resultRelInfo = estate->es_result_relation_info; + + /* Determine the partition to heap_insert the tuple into */ + if (mtstate->mt_partition_dispatch_info) + { + int leaf_part_index; + TupleConversionMap *map; + + /* + * Away we go ... If we end up not finding a partition after all, + * ExecFindPartition() does not return and errors out instead. + * Otherwise, the returned value is to be used as an index into + * arrays mt_partitions[] and mt_partition_tupconv_maps[] that + * will get us the ResultRelInfo and TupleConversionMap for the + * partition, respectively. + */ + leaf_part_index = ExecFindPartition(resultRelInfo, + mtstate->mt_partition_dispatch_info, + slot, + estate); + Assert(leaf_part_index >= 0 && + leaf_part_index < mtstate->mt_num_partitions); + + /* + * Save the old ResultRelInfo and switch to the one corresponding to + * the selected partition. + */ + saved_resultRelInfo = resultRelInfo; + resultRelInfo = mtstate->mt_partitions + leaf_part_index; + + /* We do not yet have a way to insert into a foreign partition */ + if (resultRelInfo->ri_FdwRoutine) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot route inserted tuples to a foreign table"))); + + /* For ExecInsertIndexTuples() to work on the partition's indexes */ + estate->es_result_relation_info = resultRelInfo; + + /* + * We might need to convert from the parent rowtype to the partition + * rowtype. + */ + map = mtstate->mt_partition_tupconv_maps[leaf_part_index]; + if (map) + { + tuple = do_convert_tuple(tuple, map); + ExecStoreTuple(tuple, slot, InvalidBuffer, true); + } + } + resultRelationDesc = resultRelInfo->ri_RelationDesc; /* @@ -369,7 +420,7 @@ ExecInsert(ModifyTableState *mtstate, /* * Check the constraints of the tuple */ - if (resultRelationDesc->rd_att->constr) + if (resultRelationDesc->rd_att->constr || resultRelInfo->ri_PartitionCheck) ExecConstraints(resultRelInfo, slot, estate); if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0) @@ -511,6 +562,12 @@ ExecInsert(ModifyTableState *mtstate, list_free(recheckIndexes); + if (saved_resultRelInfo) + { + resultRelInfo = saved_resultRelInfo; + estate->es_result_relation_info = resultRelInfo; + } + /* * Check any WITH CHECK OPTION constraints from parent views. We are * required to do this after testing all constraints and uniqueness @@ -922,7 +979,7 @@ lreplace:; /* * Check the constraints of the tuple */ - if (resultRelationDesc->rd_att->constr) + if (resultRelationDesc->rd_att->constr || resultRelInfo->ri_PartitionCheck) ExecConstraints(resultRelInfo, slot, estate); /* @@ -1565,6 +1622,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) Plan *subplan; ListCell *l; int i; + Relation rel; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -1655,6 +1713,75 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) estate->es_result_relation_info = saved_resultRelInfo; + /* Build state for INSERT tuple routing */ + rel = mtstate->resultRelInfo->ri_RelationDesc; + if (operation == CMD_INSERT && + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + PartitionDispatch *pd; + int i, + j, + num_parted, + num_leaf_parts; + List *leaf_parts; + ListCell *cell; + ResultRelInfo *leaf_part_rri; + + /* Form the partition node tree and lock partitions */ + pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock, + &num_parted, &leaf_parts); + mtstate->mt_partition_dispatch_info = pd; + mtstate->mt_num_dispatch = num_parted; + num_leaf_parts = list_length(leaf_parts); + mtstate->mt_num_partitions = num_leaf_parts; + mtstate->mt_partitions = (ResultRelInfo *) + palloc0(num_leaf_parts * sizeof(ResultRelInfo)); + mtstate->mt_partition_tupconv_maps = (TupleConversionMap **) + palloc0(num_leaf_parts * sizeof(TupleConversionMap *)); + + leaf_part_rri = mtstate->mt_partitions; + i = j = 0; + foreach(cell, leaf_parts) + { + Oid partrelid = lfirst_oid(cell); + Relation partrel; + + /* + * We locked all the partitions above including the leaf + * partitions. Note that each of the relations in + * mtstate->mt_partitions will be closed by ExecEndModifyTable(). + */ + partrel = heap_open(partrelid, NoLock); + + /* + * Verify result relation is a valid target for the current + * operation + */ + CheckValidResultRel(partrel, CMD_INSERT); + + InitResultRelInfo(leaf_part_rri, + partrel, + 1, /* dummy */ + false, /* no partition constraint checks */ + eflags); + + /* Open partition indices (note: ON CONFLICT unsupported)*/ + if (partrel->rd_rel->relhasindex && operation != CMD_DELETE && + leaf_part_rri->ri_IndexRelationDescs == NULL) + ExecOpenIndices(leaf_part_rri, false); + + if (!equalTupleDescs(RelationGetDescr(rel), + RelationGetDescr(partrel))) + mtstate->mt_partition_tupconv_maps[i] = + convert_tuples_by_name(RelationGetDescr(rel), + RelationGetDescr(partrel), + gettext_noop("could not convert row type")); + + leaf_part_rri++; + i++; + } + } + /* * Initialize any WITH CHECK OPTION constraints if needed. */ @@ -1886,7 +2013,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind; if (relkind == RELKIND_RELATION || - relkind == RELKIND_MATVIEW) + relkind == RELKIND_MATVIEW || + relkind == RELKIND_PARTITIONED_TABLE) { j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid"); if (!AttributeNumberIsValid(j->jf_junkAttNo)) @@ -1971,6 +2099,26 @@ ExecEndModifyTable(ModifyTableState *node) resultRelInfo); } + /* Close all the partitioned tables, leaf partitions, and their indices + * + * Remember node->mt_partition_dispatch_info[0] corresponds to the root + * partitioned table, which we must not try to close, because it is the + * main target table of the query that will be closed by ExecEndPlan(). + */ + for (i = 1; i < node->mt_num_dispatch; i++) + { + PartitionDispatch pd = node->mt_partition_dispatch_info[i]; + + heap_close(pd->reldesc, NoLock); + } + for (i = 0; i < node->mt_num_partitions; i++) + { + ResultRelInfo *resultRelInfo = node->mt_partitions + i; + + ExecCloseIndices(resultRelInfo); + heap_close(resultRelInfo->ri_RelationDesc, NoLock); + } + /* * Free the exprcontext */ diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index dd66adb0b2..e30c57e86b 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -3030,6 +3030,8 @@ CopyCreateStmtFields(const CreateStmt *from, CreateStmt *newnode) COPY_NODE_FIELD(relation); COPY_NODE_FIELD(tableElts); COPY_NODE_FIELD(inhRelations); + COPY_NODE_FIELD(partspec); + COPY_NODE_FIELD(partbound); COPY_NODE_FIELD(ofTypename); COPY_NODE_FIELD(constraints); COPY_NODE_FIELD(options); @@ -4188,6 +4190,70 @@ _copyAlterPolicyStmt(const AlterPolicyStmt *from) return newnode; } +static PartitionSpec * +_copyPartitionSpec(const PartitionSpec *from) +{ + + PartitionSpec *newnode = makeNode(PartitionSpec); + + COPY_STRING_FIELD(strategy); + COPY_NODE_FIELD(partParams); + COPY_LOCATION_FIELD(location); + + return newnode; +} + +static PartitionElem * +_copyPartitionElem(const PartitionElem *from) +{ + PartitionElem *newnode = makeNode(PartitionElem); + + COPY_STRING_FIELD(name); + COPY_NODE_FIELD(expr); + COPY_NODE_FIELD(collation); + COPY_NODE_FIELD(opclass); + COPY_LOCATION_FIELD(location); + + return newnode; +} + +static PartitionBoundSpec * +_copyPartitionBoundSpec(const PartitionBoundSpec *from) +{ + PartitionBoundSpec *newnode = makeNode(PartitionBoundSpec); + + COPY_SCALAR_FIELD(strategy); + COPY_NODE_FIELD(listdatums); + COPY_NODE_FIELD(lowerdatums); + COPY_NODE_FIELD(upperdatums); + COPY_LOCATION_FIELD(location); + + return newnode; +} + +static PartitionRangeDatum * +_copyPartitionRangeDatum(const PartitionRangeDatum *from) +{ + PartitionRangeDatum *newnode = makeNode(PartitionRangeDatum); + + COPY_SCALAR_FIELD(infinite); + COPY_NODE_FIELD(value); + COPY_LOCATION_FIELD(location); + + return newnode; +} + +static PartitionCmd * +_copyPartitionCmd(const PartitionCmd *from) +{ + PartitionCmd *newnode = makeNode(PartitionCmd); + + COPY_NODE_FIELD(name); + COPY_NODE_FIELD(bound); + + return newnode; +} + /* **************************************************************** * pg_list.h copy functions * **************************************************************** @@ -5105,6 +5171,21 @@ copyObject(const void *from) case T_TriggerTransition: retval = _copyTriggerTransition(from); break; + case T_PartitionSpec: + retval = _copyPartitionSpec(from); + break; + case T_PartitionElem: + retval = _copyPartitionElem(from); + break; + case T_PartitionBoundSpec: + retval = _copyPartitionBoundSpec(from); + break; + case T_PartitionRangeDatum: + retval = _copyPartitionRangeDatum(from); + break; + case T_PartitionCmd: + retval = _copyPartitionCmd(from); + break; /* * MISCELLANEOUS NODES diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index cad3aebecd..b7a109cfb0 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1168,6 +1168,8 @@ _equalCreateStmt(const CreateStmt *a, const CreateStmt *b) COMPARE_NODE_FIELD(relation); COMPARE_NODE_FIELD(tableElts); COMPARE_NODE_FIELD(inhRelations); + COMPARE_NODE_FIELD(partspec); + COMPARE_NODE_FIELD(partbound); COMPARE_NODE_FIELD(ofTypename); COMPARE_NODE_FIELD(constraints); COMPARE_NODE_FIELD(options); @@ -2646,6 +2648,59 @@ _equalTriggerTransition(const TriggerTransition *a, const TriggerTransition *b) return true; } +static bool +_equalPartitionSpec(const PartitionSpec *a, const PartitionSpec *b) +{ + COMPARE_STRING_FIELD(strategy); + COMPARE_NODE_FIELD(partParams); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool +_equalPartitionElem(const PartitionElem *a, const PartitionElem *b) +{ + COMPARE_STRING_FIELD(name); + COMPARE_NODE_FIELD(expr); + COMPARE_NODE_FIELD(collation); + COMPARE_NODE_FIELD(opclass); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool +_equalPartitionBoundSpec(const PartitionBoundSpec *a, const PartitionBoundSpec *b) +{ + COMPARE_SCALAR_FIELD(strategy); + COMPARE_NODE_FIELD(listdatums); + COMPARE_NODE_FIELD(lowerdatums); + COMPARE_NODE_FIELD(upperdatums); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool +_equalPartitionRangeDatum(const PartitionRangeDatum *a, const PartitionRangeDatum *b) +{ + COMPARE_SCALAR_FIELD(infinite); + COMPARE_NODE_FIELD(value); + COMPARE_LOCATION_FIELD(location); + + return true; +} + +static bool +_equalPartitionCmd(const PartitionCmd *a, const PartitionCmd *b) +{ + COMPARE_NODE_FIELD(name); + COMPARE_NODE_FIELD(bound); + + return true; +} + /* * Stuff from pg_list.h */ @@ -3402,6 +3457,21 @@ equal(const void *a, const void *b) case T_TriggerTransition: retval = _equalTriggerTransition(a, b); break; + case T_PartitionSpec: + retval = _equalPartitionSpec(a, b); + break; + case T_PartitionElem: + retval = _equalPartitionElem(a, b); + break; + case T_PartitionBoundSpec: + retval = _equalPartitionBoundSpec(a, b); + break; + case T_PartitionRangeDatum: + retval = _equalPartitionRangeDatum(a, b); + break; + case T_PartitionCmd: + retval = _equalPartitionCmd(a, b); + break; default: elog(ERROR, "unrecognized node type: %d", diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 399744193c..973fb152c1 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -1552,6 +1552,12 @@ exprLocation(const Node *expr) /* just use nested expr's location */ loc = exprLocation((Node *) ((const InferenceElem *) expr)->expr); break; + case T_PartitionBoundSpec: + loc = ((const PartitionBoundSpec *) expr)->location; + break; + case T_PartitionRangeDatum: + loc = ((const PartitionRangeDatum *) expr)->location; + break; default: /* for any other node type it's just unknown... */ loc = -1; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 748b687929..0d858f5920 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2392,6 +2392,8 @@ _outCreateStmtInfo(StringInfo str, const CreateStmt *node) WRITE_NODE_FIELD(relation); WRITE_NODE_FIELD(tableElts); WRITE_NODE_FIELD(inhRelations); + WRITE_NODE_FIELD(partspec); + WRITE_NODE_FIELD(partbound); WRITE_NODE_FIELD(ofTypename); WRITE_NODE_FIELD(constraints); WRITE_NODE_FIELD(options); @@ -3277,6 +3279,47 @@ _outForeignKeyCacheInfo(StringInfo str, const ForeignKeyCacheInfo *node) appendStringInfo(str, " %u", node->conpfeqop[i]); } +static void +_outPartitionSpec(StringInfo str, const PartitionSpec *node) +{ + WRITE_NODE_TYPE("PARTITIONBY"); + + WRITE_STRING_FIELD(strategy); + WRITE_NODE_FIELD(partParams); + WRITE_LOCATION_FIELD(location); +} + +static void +_outPartitionElem(StringInfo str, const PartitionElem *node) +{ + WRITE_NODE_TYPE("PARTITIONELEM"); + + WRITE_STRING_FIELD(name); + WRITE_NODE_FIELD(expr); + WRITE_NODE_FIELD(collation); + WRITE_NODE_FIELD(opclass); + WRITE_LOCATION_FIELD(location); +} + +static void +_outPartitionBoundSpec(StringInfo str, const PartitionBoundSpec *node) +{ + WRITE_NODE_TYPE("PARTITIONBOUND"); + + WRITE_CHAR_FIELD(strategy); + WRITE_NODE_FIELD(listdatums); + WRITE_NODE_FIELD(lowerdatums); + WRITE_NODE_FIELD(upperdatums); +} + +static void +_outPartitionRangeDatum(StringInfo str, const PartitionRangeDatum *node) +{ + WRITE_NODE_TYPE("PARTRANGEDATUM"); + + WRITE_BOOL_FIELD(infinite); + WRITE_NODE_FIELD(value); +} /* * outNode - @@ -3865,6 +3908,18 @@ outNode(StringInfo str, const void *obj) case T_TriggerTransition: _outTriggerTransition(str, obj); break; + case T_PartitionSpec: + _outPartitionSpec(str, obj); + break; + case T_PartitionElem: + _outPartitionElem(str, obj); + break; + case T_PartitionBoundSpec: + _outPartitionBoundSpec(str, obj); + break; + case T_PartitionRangeDatum: + _outPartitionRangeDatum(str, obj); + break; default: diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 917e6c8a65..c587d4e1d7 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -2265,6 +2265,36 @@ _readExtensibleNode(void) READ_DONE(); } +/* + * _readPartitionBoundSpec + */ +static PartitionBoundSpec * +_readPartitionBoundSpec(void) +{ + READ_LOCALS(PartitionBoundSpec); + + READ_CHAR_FIELD(strategy); + READ_NODE_FIELD(listdatums); + READ_NODE_FIELD(lowerdatums); + READ_NODE_FIELD(upperdatums); + + READ_DONE(); +} + +/* + * _readPartitionRangeDatum + */ +static PartitionRangeDatum * +_readPartitionRangeDatum(void) +{ + READ_LOCALS(PartitionRangeDatum); + + READ_BOOL_FIELD(infinite); + READ_NODE_FIELD(value); + + READ_DONE(); +} + /* * parseNodeString * @@ -2497,6 +2527,10 @@ parseNodeString(void) return_value = _readAlternativeSubPlan(); else if (MATCH("EXTENSIBLENODE", 14)) return_value = _readExtensibleNode(); + else if (MATCH("PARTITIONBOUND", 14)) + return_value = _readPartitionBoundSpec(); + else if (MATCH("PARTRANGEDATUM", 14)) + return_value = _readPartitionRangeDatum(); else { elog(ERROR, "badly formatted node string \"%.32s\"...", token); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index bb16c59028..72272d9bb7 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -27,6 +27,7 @@ #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/heap.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" #include "foreign/fdwapi.h" #include "miscadmin.h" @@ -1140,6 +1141,7 @@ get_relation_constraints(PlannerInfo *root, Index varno = rel->relid; Relation relation; TupleConstr *constr; + List *pcqual; /* * We assume the relation has already been safely locked. @@ -1225,6 +1227,24 @@ get_relation_constraints(PlannerInfo *root, } } + /* Append partition predicates, if any */ + pcqual = RelationGetPartitionQual(relation, true); + if (pcqual) + { + /* + * Run each expression through const-simplification and + * canonicalization similar to check constraints. + */ + pcqual = (List *) eval_const_expressions(root, (Node *) pcqual); + pcqual = (List *) canonicalize_qual((Expr *) pcqual); + + /* Fix Vars to have the desired varno */ + if (varno != 1) + ChangeVarNodes((Node *) pcqual, 1, varno, 0); + + result = list_concat(result, pcqual); + } + heap_close(relation, NoLock); return result; diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 1a541788eb..7364346167 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -806,8 +806,16 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) /* Process ON CONFLICT, if any. */ if (stmt->onConflictClause) + { + /* Bail out if target relation is partitioned table */ + if (pstate->p_target_rangetblentry->relkind == RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ON CONFLICT clause is not supported with partitioned tables"))); + qry->onConflict = transformOnConflictClause(pstate, stmt->onConflictClause); + } /* * If we have a RETURNING clause, we need to add the target relation to diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 414348b95b..2ed7b5259d 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -229,6 +229,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); struct ImportQual *importqual; InsertStmt *istmt; VariableSetStmt *vsetstmt; + PartitionElem *partelem; + PartitionSpec *partspec; + PartitionRangeDatum *partrange_datum; } %type stmt schema_stmt @@ -276,7 +279,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type add_drop opt_asc_desc opt_nulls_order %type alter_table_cmd alter_type_cmd opt_collate_clause - replica_identity + replica_identity partition_cmd %type alter_table_cmds alter_type_cmds %type opt_drop_behavior @@ -545,6 +548,17 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); opt_frame_clause frame_extent frame_bound %type opt_existing_window_name %type opt_if_not_exists +%type PartitionSpec OptPartitionSpec +%type part_strategy +%type part_elem +%type part_params +%type OptPartitionElementList PartitionElementList +%type PartitionElement +%type ForValues +%type partbound_datum +%type partbound_datum_list +%type PartitionRangeDatum +%type range_datum_list /* * Non-keyword token types. These are hard-wired into the "flex" lexer. @@ -570,7 +584,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); /* ordinary key words in alphabetical order */ %token ABORT_P ABSOLUTE_P ACCESS ACTION ADD_P ADMIN AFTER AGGREGATE ALL ALSO ALTER ALWAYS ANALYSE ANALYZE AND ANY ARRAY AS ASC - ASSERTION ASSIGNMENT ASYMMETRIC AT ATTRIBUTE AUTHORIZATION + ASSERTION ASSIGNMENT ASYMMETRIC AT ATTACH ATTRIBUTE AUTHORIZATION BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT BOOLEAN_P BOTH BY @@ -586,7 +600,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); DATA_P DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DESC - DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP + DETACH DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P + DOUBLE_P DROP EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EVENT EXCEPT EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN @@ -1787,6 +1802,24 @@ AlterTableStmt: n->missing_ok = true; $$ = (Node *)n; } + | ALTER TABLE relation_expr partition_cmd + { + AlterTableStmt *n = makeNode(AlterTableStmt); + n->relation = $3; + n->cmds = list_make1($4); + n->relkind = OBJECT_TABLE; + n->missing_ok = false; + $$ = (Node *)n; + } + | ALTER TABLE IF_P EXISTS relation_expr partition_cmd + { + AlterTableStmt *n = makeNode(AlterTableStmt); + n->relation = $5; + n->cmds = list_make1($6); + n->relkind = OBJECT_TABLE; + n->missing_ok = true; + $$ = (Node *)n; + } | ALTER TABLE ALL IN_P TABLESPACE name SET TABLESPACE name opt_nowait { AlterTableMoveAllStmt *n = @@ -1932,6 +1965,34 @@ alter_table_cmds: | alter_table_cmds ',' alter_table_cmd { $$ = lappend($1, $3); } ; +partition_cmd: + /* ALTER TABLE ATTACH PARTITION FOR VALUES */ + ATTACH PARTITION qualified_name ForValues + { + AlterTableCmd *n = makeNode(AlterTableCmd); + PartitionCmd *cmd = makeNode(PartitionCmd); + + n->subtype = AT_AttachPartition; + cmd->name = $3; + cmd->bound = (Node *) $4; + n->def = (Node *) cmd; + + $$ = (Node *) n; + } + /* ALTER TABLE DETACH PARTITION */ + | DETACH PARTITION qualified_name + { + AlterTableCmd *n = makeNode(AlterTableCmd); + PartitionCmd *cmd = makeNode(PartitionCmd); + + n->subtype = AT_DetachPartition; + cmd->name = $3; + n->def = (Node *) cmd; + + $$ = (Node *) n; + } + ; + alter_table_cmd: /* ALTER TABLE ADD */ ADD_P columnDef @@ -2467,6 +2528,73 @@ reloption_elem: } ; +ForValues: + /* a LIST partition */ + FOR VALUES IN_P '(' partbound_datum_list ')' + { + PartitionBoundSpec *n = makeNode(PartitionBoundSpec); + + n->strategy = PARTITION_STRATEGY_LIST; + n->listdatums = $5; + n->location = @3; + + $$ = (Node *) n; + } + + /* a RANGE partition */ + | FOR VALUES FROM '(' range_datum_list ')' TO '(' range_datum_list ')' + { + PartitionBoundSpec *n = makeNode(PartitionBoundSpec); + + n->strategy = PARTITION_STRATEGY_RANGE; + n->lowerdatums = $5; + n->upperdatums = $9; + n->location = @3; + + $$ = (Node *) n; + } + ; + +partbound_datum: + Sconst { $$ = makeStringConst($1, @1); } + | NumericOnly { $$ = makeAConst($1, @1); } + | NULL_P { $$ = makeNullAConst(@1); } + ; + +partbound_datum_list: + partbound_datum { $$ = list_make1($1); } + | partbound_datum_list ',' partbound_datum + { $$ = lappend($1, $3); } + ; + +range_datum_list: + PartitionRangeDatum { $$ = list_make1($1); } + | range_datum_list ',' PartitionRangeDatum + { $$ = lappend($1, $3); } + ; + +PartitionRangeDatum: + UNBOUNDED + { + PartitionRangeDatum *n = makeNode(PartitionRangeDatum); + + n->infinite = true; + n->value = NULL; + n->location = @1; + + $$ = n; + } + | partbound_datum + { + PartitionRangeDatum *n = makeNode(PartitionRangeDatum); + + n->infinite = false; + n->value = $1; + n->location = @1; + + $$ = n; + } + ; /***************************************************************************** * @@ -2812,69 +2940,113 @@ copy_generic_opt_arg_list_item: *****************************************************************************/ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' - OptInherit OptWith OnCommitOption OptTableSpace + OptInherit OptPartitionSpec OptWith OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $4->relpersistence = $2; n->relation = $4; n->tableElts = $6; n->inhRelations = $8; + n->partspec = $9; n->ofTypename = NULL; n->constraints = NIL; - n->options = $9; - n->oncommit = $10; - n->tablespacename = $11; + n->options = $10; + n->oncommit = $11; + n->tablespacename = $12; n->if_not_exists = false; $$ = (Node *)n; } | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name '(' - OptTableElementList ')' OptInherit OptWith OnCommitOption - OptTableSpace + OptTableElementList ')' OptInherit OptPartitionSpec OptWith + OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $7->relpersistence = $2; n->relation = $7; n->tableElts = $9; n->inhRelations = $11; + n->partspec = $12; n->ofTypename = NULL; n->constraints = NIL; - n->options = $12; - n->oncommit = $13; - n->tablespacename = $14; + n->options = $13; + n->oncommit = $14; + n->tablespacename = $15; n->if_not_exists = true; $$ = (Node *)n; } | CREATE OptTemp TABLE qualified_name OF any_name - OptTypedTableElementList OptWith OnCommitOption OptTableSpace + OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption + OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $4->relpersistence = $2; n->relation = $4; n->tableElts = $7; n->inhRelations = NIL; + n->partspec = $8; n->ofTypename = makeTypeNameFromNameList($6); n->ofTypename->location = @6; n->constraints = NIL; - n->options = $8; - n->oncommit = $9; - n->tablespacename = $10; + n->options = $9; + n->oncommit = $10; + n->tablespacename = $11; n->if_not_exists = false; $$ = (Node *)n; } | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name OF any_name - OptTypedTableElementList OptWith OnCommitOption OptTableSpace + OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption + OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $7->relpersistence = $2; n->relation = $7; n->tableElts = $10; n->inhRelations = NIL; + n->partspec = $11; n->ofTypename = makeTypeNameFromNameList($9); n->ofTypename->location = @9; n->constraints = NIL; + n->options = $12; + n->oncommit = $13; + n->tablespacename = $14; + n->if_not_exists = true; + $$ = (Node *)n; + } + | CREATE OptTemp TABLE qualified_name PARTITION OF qualified_name + OptPartitionElementList ForValues OptPartitionSpec OptWith + OnCommitOption OptTableSpace + { + CreateStmt *n = makeNode(CreateStmt); + $4->relpersistence = $2; + n->relation = $4; + n->tableElts = $8; + n->inhRelations = list_make1($7); + n->partbound = (Node *) $9; + n->partspec = $10; + n->ofTypename = NULL; + n->constraints = NIL; n->options = $11; n->oncommit = $12; n->tablespacename = $13; + n->if_not_exists = false; + $$ = (Node *)n; + } + | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name PARTITION OF + qualified_name OptPartitionElementList ForValues OptPartitionSpec + OptWith OnCommitOption OptTableSpace + { + CreateStmt *n = makeNode(CreateStmt); + $7->relpersistence = $2; + n->relation = $7; + n->tableElts = $11; + n->inhRelations = list_make1($10); + n->partbound = (Node *) $12; + n->partspec = $13; + n->ofTypename = NULL; + n->constraints = NIL; + n->options = $14; + n->oncommit = $15; + n->tablespacename = $16; n->if_not_exists = true; $$ = (Node *)n; } @@ -2923,6 +3095,11 @@ OptTypedTableElementList: | /*EMPTY*/ { $$ = NIL; } ; +OptPartitionElementList: + '(' PartitionElementList ')' { $$ = $2; } + | /*EMPTY*/ { $$ = NIL; } + ; + TableElementList: TableElement { @@ -2945,6 +3122,17 @@ TypedTableElementList: } ; +PartitionElementList: + PartitionElement + { + $$ = list_make1($1); + } + | PartitionElementList ',' PartitionElement + { + $$ = lappend($1, $3); + } + ; + TableElement: columnDef { $$ = $1; } | TableLikeClause { $$ = $1; } @@ -2956,6 +3144,28 @@ TypedTableElement: | TableConstraint { $$ = $1; } ; +PartitionElement: + TableConstraint { $$ = $1; } + | ColId ColQualList + { + ColumnDef *n = makeNode(ColumnDef); + n->colname = $1; + n->typeName = NULL; + n->inhcount = 0; + n->is_local = true; + n->is_not_null = false; + n->is_from_type = false; + n->storage = 0; + n->raw_default = NULL; + n->cooked_default = NULL; + n->collOid = InvalidOid; + SplitColQualList($2, &n->constraints, &n->collClause, + yyscanner); + n->location = @1; + $$ = (Node *) n; + } + ; + columnDef: ColId Typename create_generic_options ColQualList { ColumnDef *n = makeNode(ColumnDef); @@ -3419,6 +3629,65 @@ OptInherit: INHERITS '(' qualified_name_list ')' { $$ = $3; } | /*EMPTY*/ { $$ = NIL; } ; +/* Optional partition key specification */ +OptPartitionSpec: PartitionSpec { $$ = $1; } + | /*EMPTY*/ { $$ = NULL; } + ; + +PartitionSpec: PARTITION BY part_strategy '(' part_params ')' + { + PartitionSpec *n = makeNode(PartitionSpec); + + n->strategy = $3; + n->partParams = $5; + n->location = @1; + + $$ = n; + } + ; + +part_strategy: IDENT { $$ = $1; } + | unreserved_keyword { $$ = pstrdup($1); } + ; + +part_params: part_elem { $$ = list_make1($1); } + | part_params ',' part_elem { $$ = lappend($1, $3); } + ; + +part_elem: ColId opt_collate opt_class + { + PartitionElem *n = makeNode(PartitionElem); + + n->name = $1; + n->expr = NULL; + n->collation = $2; + n->opclass = $3; + n->location = @1; + $$ = n; + } + | func_expr_windowless opt_collate opt_class + { + PartitionElem *n = makeNode(PartitionElem); + + n->name = NULL; + n->expr = $1; + n->collation = $2; + n->opclass = $3; + n->location = @1; + $$ = n; + } + | '(' a_expr ')' opt_collate opt_class + { + PartitionElem *n = makeNode(PartitionElem); + + n->name = NULL; + n->expr = $2; + n->collation = $4; + n->opclass = $5; + n->location = @1; + $$ = n; + } + ; /* WITH (options) is preferred, WITH OIDS and WITHOUT OIDS are legacy forms */ OptWith: WITH reloptions { $$ = $2; } @@ -4484,6 +4753,48 @@ CreateForeignTableStmt: n->options = $14; $$ = (Node *) n; } + | CREATE FOREIGN TABLE qualified_name + PARTITION OF qualified_name OptPartitionElementList ForValues + SERVER name create_generic_options + { + CreateForeignTableStmt *n = makeNode(CreateForeignTableStmt); + $4->relpersistence = RELPERSISTENCE_PERMANENT; + n->base.relation = $4; + n->base.inhRelations = list_make1($7); + n->base.tableElts = $8; + n->base.partbound = (Node *) $9; + n->base.ofTypename = NULL; + n->base.constraints = NIL; + n->base.options = NIL; + n->base.oncommit = ONCOMMIT_NOOP; + n->base.tablespacename = NULL; + n->base.if_not_exists = false; + /* FDW-specific data */ + n->servername = $11; + n->options = $12; + $$ = (Node *) n; + } + | CREATE FOREIGN TABLE IF_P NOT EXISTS qualified_name + PARTITION OF qualified_name OptPartitionElementList ForValues + SERVER name create_generic_options + { + CreateForeignTableStmt *n = makeNode(CreateForeignTableStmt); + $7->relpersistence = RELPERSISTENCE_PERMANENT; + n->base.relation = $7; + n->base.inhRelations = list_make1($10); + n->base.tableElts = $11; + n->base.partbound = (Node *) $12; + n->base.ofTypename = NULL; + n->base.constraints = NIL; + n->base.options = NIL; + n->base.oncommit = ONCOMMIT_NOOP; + n->base.tablespacename = NULL; + n->base.if_not_exists = true; + /* FDW-specific data */ + n->servername = $14; + n->options = $15; + $$ = (Node *) n; + } ; /***************************************************************************** @@ -13703,6 +14014,7 @@ unreserved_keyword: | ASSERTION | ASSIGNMENT | AT + | ATTACH | ATTRIBUTE | BACKWARD | BEFORE @@ -13749,6 +14061,7 @@ unreserved_keyword: | DELIMITER | DELIMITERS | DEPENDS + | DETACH | DICTIONARY | DISABLE_P | DISCARD diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 481a4ddc48..92d1577030 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -501,6 +501,13 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr) err = _("grouping operations are not allowed in trigger WHEN conditions"); break; + case EXPR_KIND_PARTITION_EXPRESSION: + if (isAgg) + err = _("aggregate functions are not allowed in partition key expression"); + else + err = _("grouping operations are not allowed in partition key expression"); + + break; /* * There is intentionally no default: case here, so that the @@ -858,6 +865,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, case EXPR_KIND_TRIGGER_WHEN: err = _("window functions are not allowed in trigger WHEN conditions"); break; + case EXPR_KIND_PARTITION_EXPRESSION: + err = _("window functions are not allowed in partition key expression"); + break; /* * There is intentionally no default: case here, so that the diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 17d1cbf8b3..8a2bdf06e8 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -1843,6 +1843,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink) case EXPR_KIND_TRIGGER_WHEN: err = _("cannot use subquery in trigger WHEN condition"); break; + case EXPR_KIND_PARTITION_EXPRESSION: + err = _("cannot use subquery in partition key expression"); + break; /* * There is intentionally no default: case here, so that the @@ -3446,6 +3449,8 @@ ParseExprKindName(ParseExprKind exprKind) return "EXECUTE"; case EXPR_KIND_TRIGGER_WHEN: return "WHEN"; + case EXPR_KIND_PARTITION_EXPRESSION: + return "PARTITION BY"; /* * There is intentionally no default: case here, so that the diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index 56c9a4293d..7d9b4157d4 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -2166,6 +2166,9 @@ check_srf_call_placement(ParseState *pstate, int location) case EXPR_KIND_TRIGGER_WHEN: err = _("set-returning functions are not allowed in trigger WHEN conditions"); break; + case EXPR_KIND_PARTITION_EXPRESSION: + err = _("set-returning functions are not allowed in partition key expression"); + break; /* * There is intentionally no default: case here, so that the diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 0670bc2482..cc6a961bb4 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -47,8 +47,10 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/planner.h" #include "parser/analyze.h" #include "parser/parse_clause.h" +#include "parser/parse_coerce.h" #include "parser/parse_collate.h" #include "parser/parse_expr.h" #include "parser/parse_relation.h" @@ -62,6 +64,7 @@ #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/ruleutils.h" #include "utils/syscache.h" #include "utils/typcache.h" @@ -87,6 +90,8 @@ typedef struct List *alist; /* "after list" of things to do after creating * the table */ IndexStmt *pkey; /* PRIMARY KEY index, if any */ + bool ispartitioned; /* true if table is partitioned */ + Node *partbound; /* transformed FOR VALUES */ } CreateStmtContext; /* State shared by transformCreateSchemaStmt and its subroutines */ @@ -129,6 +134,7 @@ static void transformConstraintAttrs(CreateStmtContext *cxt, List *constraintList); static void transformColumnType(CreateStmtContext *cxt, ColumnDef *column); static void setSchemaName(char *context_schema, char **stmt_schema_name); +static void transformAttachPartition(CreateStmtContext *cxt, PartitionCmd *cmd); /* @@ -229,6 +235,7 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString) cxt.blist = NIL; cxt.alist = NIL; cxt.pkey = NULL; + cxt.ispartitioned = stmt->partspec != NULL; /* * Notice that we allow OIDs here only for plain tables, even though @@ -247,6 +254,28 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString) if (stmt->ofTypename) transformOfType(&cxt, stmt->ofTypename); + if (stmt->partspec) + { + int partnatts = list_length(stmt->partspec->partParams); + + if (stmt->inhRelations && !stmt->partbound) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot create partitioned table as inheritance child"))); + + if (partnatts > PARTITION_MAX_KEYS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("cannot partition using more than %d columns", + PARTITION_MAX_KEYS))); + + if (!pg_strcasecmp(stmt->partspec->strategy, "list") && + partnatts > 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot list partition using more than one column"))); + } + /* * Run through each primary element in the table creation clause. Separate * column defs from constraints, and do preliminary analysis. We have to @@ -583,6 +612,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column) errmsg("primary key constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("primary key constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); /* FALL THRU */ case CONSTR_UNIQUE: @@ -592,6 +627,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column) errmsg("unique constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unique constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); if (constraint->keys == NIL) constraint->keys = list_make1(makeString(column->colname)); cxt->ixconstraints = lappend(cxt->ixconstraints, constraint); @@ -609,6 +650,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column) errmsg("foreign key constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("foreign key constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); /* * Fill in the current attribute's name and throw it into the @@ -674,6 +721,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint) errmsg("primary key constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("primary key constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); cxt->ixconstraints = lappend(cxt->ixconstraints, constraint); break; @@ -684,6 +737,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint) errmsg("unique constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unique constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); cxt->ixconstraints = lappend(cxt->ixconstraints, constraint); break; @@ -694,6 +753,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint) errmsg("exclusion constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("exclusion constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); cxt->ixconstraints = lappend(cxt->ixconstraints, constraint); break; @@ -708,6 +773,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint) errmsg("foreign key constraints are not supported on foreign tables"), parser_errposition(cxt->pstate, constraint->location))); + if (cxt->ispartitioned) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("foreign key constraints are not supported on partitioned tables"), + parser_errposition(cxt->pstate, + constraint->location))); cxt->fkconstraints = lappend(cxt->fkconstraints, constraint); break; @@ -763,7 +834,8 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla relation->rd_rel->relkind != RELKIND_VIEW && relation->rd_rel->relkind != RELKIND_MATVIEW && relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE && - relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, view, materialized view, composite type, or foreign table", @@ -1854,7 +1926,8 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) rel = heap_openrv(inh, AccessShareLock); /* check user requested inheritance from valid relkind */ if (rel->rd_rel->relkind != RELKIND_RELATION && - rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE) + rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE && + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("inherited relation \"%s\" is not a table or foreign table", @@ -2512,6 +2585,8 @@ transformAlterTableStmt(Oid relid, AlterTableStmt *stmt, cxt.blist = NIL; cxt.alist = NIL; cxt.pkey = NULL; + cxt.ispartitioned = (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + cxt.partbound = NULL; /* * The only subtypes that currently require parse transformation handling @@ -2594,6 +2669,19 @@ transformAlterTableStmt(Oid relid, AlterTableStmt *stmt, break; } + case AT_AttachPartition: + { + PartitionCmd *partcmd = (PartitionCmd *) cmd->def; + + transformAttachPartition(&cxt, partcmd); + + /* assign transformed values */ + partcmd->bound = cxt.partbound; + } + + newcmds = lappend(newcmds, cmd); + break; + default: newcmds = lappend(newcmds, cmd); break; @@ -2958,3 +3046,237 @@ setSchemaName(char *context_schema, char **stmt_schema_name) "different from the one being created (%s)", *stmt_schema_name, context_schema))); } + +/* + * transformAttachPartition + * Analyze ATTACH PARTITION ... FOR VALUES ... + */ +static void +transformAttachPartition(CreateStmtContext *cxt, PartitionCmd *cmd) +{ + Relation parentRel = cxt->rel; + + /* + * We are going to try to validate the partition bound specification + * against the partition key of rel, so it better have one. + */ + if (parentRel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("\"%s\" is not partitioned", + RelationGetRelationName(parentRel)))); + + /* tranform the values */ + Assert(RelationGetPartitionKey(parentRel) != NULL); + cxt->partbound = transformPartitionBound(cxt->pstate, parentRel, + cmd->bound); +} + +/* + * transformPartitionBound + * + * Transform partition bound specification + */ +Node * +transformPartitionBound(ParseState *pstate, Relation parent, Node *bound) +{ + PartitionBoundSpec *spec = (PartitionBoundSpec *) bound, + *result_spec; + PartitionKey key = RelationGetPartitionKey(parent); + char strategy = get_partition_strategy(key); + int partnatts = get_partition_natts(key); + List *partexprs = get_partition_exprs(key); + + result_spec = copyObject(spec); + + if (strategy == PARTITION_STRATEGY_LIST) + { + ListCell *cell; + char *colname; + + /* Get the only column's name in case we need to output an error */ + if (key->partattrs[0] != 0) + colname = get_relid_attribute_name(RelationGetRelid(parent), + key->partattrs[0]); + else + colname = deparse_expression((Node *) linitial(partexprs), + deparse_context_for(RelationGetRelationName(parent), + RelationGetRelid(parent)), + false, false); + + if (spec->strategy != PARTITION_STRATEGY_LIST) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("invalid bound specification for a list partition"), + parser_errposition(pstate, exprLocation(bound)))); + + result_spec->listdatums = NIL; + foreach(cell, spec->listdatums) + { + A_Const *con = (A_Const *) lfirst(cell); + Node *value; + ListCell *cell2; + bool duplicate; + + value = (Node *) make_const(pstate, &con->val, con->location); + value = coerce_to_target_type(pstate, + value, exprType(value), + get_partition_col_typid(key, 0), + get_partition_col_typmod(key, 0), + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + + if (value == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"", + format_type_be(get_partition_col_typid(key, 0)), + colname), + parser_errposition(pstate, + exprLocation((Node *) con)))); + + /* Simplify the expression */ + value = (Node *) expression_planner((Expr *) value); + + /* Don't add to the result if the value is a duplicate */ + duplicate = false; + foreach(cell2, result_spec->listdatums) + { + Const *value2 = (Const *) lfirst(cell2); + + if (equal(value, value2)) + { + duplicate = true; + break; + } + } + if (duplicate) + continue; + + result_spec->listdatums = lappend(result_spec->listdatums, + value); + } + } + else if (strategy == PARTITION_STRATEGY_RANGE) + { + ListCell *cell1, + *cell2; + int i, + j; + char *colname; + + if (spec->strategy != PARTITION_STRATEGY_RANGE) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("invalid bound specification for a range partition"), + parser_errposition(pstate, exprLocation(bound)))); + + Assert(spec->lowerdatums != NIL && spec->upperdatums != NIL); + + if (list_length(spec->lowerdatums) != partnatts) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("FROM must specify exactly one value per partitioning column"))); + if (list_length(spec->upperdatums) != partnatts) + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("TO must specify exactly one value per partitioning column"))); + + i = j = 0; + result_spec->lowerdatums = result_spec->upperdatums = NIL; + forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums) + { + PartitionRangeDatum *ldatum, + *rdatum; + Node *value; + A_Const *lcon = NULL, + *rcon = NULL; + + ldatum = (PartitionRangeDatum *) lfirst(cell1); + rdatum = (PartitionRangeDatum *) lfirst(cell2); + /* Get the column's name in case we need to output an error */ + if (key->partattrs[i] != 0) + colname = get_relid_attribute_name(RelationGetRelid(parent), + key->partattrs[i]); + else + { + colname = deparse_expression((Node *) list_nth(partexprs, j), + deparse_context_for(RelationGetRelationName(parent), + RelationGetRelid(parent)), + false, false); + ++j; + } + + if (!ldatum->infinite) + lcon = (A_Const *) ldatum->value; + if (!rdatum->infinite) + rcon = (A_Const *) rdatum->value; + + if (lcon) + { + value = (Node *) make_const(pstate, &lcon->val, lcon->location); + if (((Const *) value)->constisnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot specify NULL in range bound"))); + value = coerce_to_target_type(pstate, + value, exprType(value), + get_partition_col_typid(key, i), + get_partition_col_typmod(key, i), + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (value == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"", + format_type_be(get_partition_col_typid(key, i)), + colname), + parser_errposition(pstate, exprLocation((Node *) ldatum)))); + + /* Simplify the expression */ + value = (Node *) expression_planner((Expr *) value); + ldatum->value = value; + } + + if (rcon) + { + value = (Node *) make_const(pstate, &rcon->val, rcon->location); + if (((Const *) value)->constisnull) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot specify NULL in range bound"))); + value = coerce_to_target_type(pstate, + value, exprType(value), + get_partition_col_typid(key, i), + get_partition_col_typmod(key, i), + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + if (value == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"", + format_type_be(get_partition_col_typid(key, i)), + colname), + parser_errposition(pstate, exprLocation((Node *) rdatum)))); + + /* Simplify the expression */ + value = (Node *) expression_planner((Expr *) value); + rdatum->value = value; + } + + result_spec->lowerdatums = lappend(result_spec->lowerdatums, + copyObject(ldatum)); + result_spec->upperdatums = lappend(result_spec->upperdatums, + copyObject(rdatum)); + + ++i; + } + } + else + elog(ERROR, "unexpected partition strategy: %d", (int) strategy); + + return (Node *) result_spec; +} diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index f82d891c34..32e1328149 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -261,7 +261,8 @@ DefineQueryRewrite(char *rulename, */ if (event_relation->rd_rel->relkind != RELKIND_RELATION && event_relation->rd_rel->relkind != RELKIND_MATVIEW && - event_relation->rd_rel->relkind != RELKIND_VIEW) + event_relation->rd_rel->relkind != RELKIND_VIEW && + event_relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table or view", diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 65c3d6e081..bf4f098c15 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -1231,7 +1231,8 @@ rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte, TargetEntry *tle; if (target_relation->rd_rel->relkind == RELKIND_RELATION || - target_relation->rd_rel->relkind == RELKIND_MATVIEW) + target_relation->rd_rel->relkind == RELKIND_MATVIEW || + target_relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { /* * Emit CTID so that executor can find the row to update or delete. diff --git a/src/backend/rewrite/rowsecurity.c b/src/backend/rewrite/rowsecurity.c index b7edefc7dd..e38586dd80 100644 --- a/src/backend/rewrite/rowsecurity.c +++ b/src/backend/rewrite/rowsecurity.c @@ -121,7 +121,8 @@ get_row_security_policies(Query *root, RangeTblEntry *rte, int rt_index, *hasSubLinks = false; /* If this is not a normal relation, just return immediately */ - if (rte->relkind != RELKIND_RELATION) + if (rte->relkind != RELKIND_RELATION && + rte->relkind != RELKIND_PARTITIONED_TABLE) return; /* Switch to checkAsUser if it's set */ diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index f50ce408ae..fd4eff4907 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -987,7 +987,8 @@ ProcessUtilitySlow(ParseState *pstate, /* Create the table itself */ address = DefineRelation((CreateStmt *) stmt, RELKIND_RELATION, - InvalidOid, NULL); + InvalidOid, NULL, + queryString); EventTriggerCollectSimpleCommand(address, secondaryObject, stmt); @@ -1020,7 +1021,8 @@ ProcessUtilitySlow(ParseState *pstate, /* Create the table itself */ address = DefineRelation((CreateStmt *) stmt, RELKIND_FOREIGN_TABLE, - InvalidOid, NULL); + InvalidOid, NULL, + queryString); CreateForeignTable((CreateForeignTableStmt *) stmt, address.objectId); EventTriggerCollectSimpleCommand(address, diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index fecee85e5b..4e2ba19d1b 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -33,6 +33,7 @@ #include "catalog/pg_language.h" #include "catalog/pg_opclass.h" #include "catalog/pg_operator.h" +#include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" @@ -315,6 +316,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno, const Oid *excludeOps, bool attrsOnly, bool showTblSpc, int prettyFlags, bool missing_ok); +static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags); static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand, int prettyFlags, bool missing_ok); static text *pg_get_expr_worker(text *expr, Oid relid, const char *relname, @@ -1415,6 +1417,163 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, return buf.data; } +/* + * pg_get_partkeydef + * + * Returns the partition key specification, ie, the following: + * + * PARTITION BY { RANGE | LIST } (column opt_collation opt_opclass [, ...]) + */ +Datum +pg_get_partkeydef(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + + PG_RETURN_TEXT_P(string_to_text(pg_get_partkeydef_worker(relid, + PRETTYFLAG_INDENT))); +} + +/* + * Internal workhorse to decompile a partition key definition. + */ +static char * +pg_get_partkeydef_worker(Oid relid, int prettyFlags) +{ + Form_pg_partitioned_table form; + HeapTuple tuple; + oidvector *partclass; + oidvector *partcollation; + List *partexprs; + ListCell *partexpr_item; + List *context; + Datum datum; + bool isnull; + StringInfoData buf; + int keyno; + char *str; + char *sep; + + tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for partition key of %u", relid); + + form = (Form_pg_partitioned_table) GETSTRUCT(tuple); + + Assert(form->partrelid == relid); + + /* Must get partclass and partcollation the hard way */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partclass, &isnull); + Assert(!isnull); + partclass = (oidvector *) DatumGetPointer(datum); + + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partcollation, &isnull); + Assert(!isnull); + partcollation = (oidvector *) DatumGetPointer(datum); + + + /* + * Get the expressions, if any. (NOTE: we do not use the relcache + * versions of the expressions, because we want to display non-const-folded + * expressions.) + */ + if (!heap_attisnull(tuple, Anum_pg_partitioned_table_partexprs)) + { + Datum exprsDatum; + bool isnull; + char *exprsString; + + exprsDatum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partexprs, &isnull); + Assert(!isnull); + exprsString = TextDatumGetCString(exprsDatum); + partexprs = (List *) stringToNode(exprsString); + + if (!IsA(partexprs, List)) + elog(ERROR, "unexpected node type found in partexprs: %d", + (int) nodeTag(partexprs)); + + pfree(exprsString); + } + else + partexprs = NIL; + + partexpr_item = list_head(partexprs); + context = deparse_context_for(get_relation_name(relid), relid); + + initStringInfo(&buf); + + switch (form->partstrat) + { + case PARTITION_STRATEGY_LIST: + appendStringInfo(&buf, "LIST"); + break; + case PARTITION_STRATEGY_RANGE: + appendStringInfo(&buf, "RANGE"); + break; + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) form->partstrat); + } + + appendStringInfo(&buf, " ("); + sep = ""; + for (keyno = 0; keyno < form->partnatts; keyno++) + { + AttrNumber attnum = form->partattrs.values[keyno]; + Oid keycoltype; + Oid keycolcollation; + Oid partcoll; + + appendStringInfoString(&buf, sep); + sep = ", "; + if (attnum != 0) + { + /* Simple attribute reference */ + char *attname; + int32 keycoltypmod; + + attname = get_relid_attribute_name(relid, attnum); + appendStringInfoString(&buf, quote_identifier(attname)); + get_atttypetypmodcoll(relid, attnum, + &keycoltype, &keycoltypmod, + &keycolcollation); + } + else + { + /* Expression */ + Node *partkey; + + if (partexpr_item == NULL) + elog(ERROR, "too few entries in partexprs list"); + partkey = (Node *) lfirst(partexpr_item); + partexpr_item = lnext(partexpr_item); + /* Deparse */ + str = deparse_expression_pretty(partkey, context, false, false, + 0, 0); + + appendStringInfoString(&buf, str); + keycoltype = exprType(partkey); + keycolcollation = exprCollation(partkey); + } + + /* Add collation, if not default for column */ + partcoll = partcollation->values[keyno]; + if (OidIsValid(partcoll) && partcoll != keycolcollation) + appendStringInfo(&buf, " COLLATE %s", + generate_collation_name((partcoll))); + + /* Add the operator class name, if not default */ + get_opclass_name(partclass->values[keyno], keycoltype, &buf); + } + appendStringInfoChar(&buf, ')'); + + /* Clean up */ + ReleaseSysCache(tuple); + + return buf.data; +} /* * pg_get_constraintdef @@ -8291,6 +8450,88 @@ get_rule_expr(Node *node, deparse_context *context, } break; + case T_PartitionBoundSpec: + { + PartitionBoundSpec *spec = (PartitionBoundSpec *) node; + ListCell *cell; + char *sep; + + switch (spec->strategy) + { + case PARTITION_STRATEGY_LIST: + Assert(spec->listdatums != NIL); + + appendStringInfoString(buf, "FOR VALUES"); + appendStringInfoString(buf, " IN ("); + sep = ""; + foreach (cell, spec->listdatums) + { + Const *val = lfirst(cell); + + appendStringInfoString(buf, sep); + get_const_expr(val, context, -1); + sep = ", "; + } + + appendStringInfoString(buf, ")"); + break; + + case PARTITION_STRATEGY_RANGE: + Assert(spec->lowerdatums != NIL && + spec->upperdatums != NIL && + list_length(spec->lowerdatums) == + list_length(spec->upperdatums)); + + appendStringInfoString(buf, "FOR VALUES"); + appendStringInfoString(buf, " FROM"); + appendStringInfoString(buf, " ("); + sep = ""; + foreach (cell, spec->lowerdatums) + { + PartitionRangeDatum *datum = lfirst(cell); + Const *val; + + appendStringInfoString(buf, sep); + if (datum->infinite) + appendStringInfoString(buf, "UNBOUNDED"); + else + { + val = (Const *) datum->value; + get_const_expr(val, context, -1); + } + sep = ", "; + } + appendStringInfoString(buf, ")"); + + appendStringInfoString(buf, " TO"); + appendStringInfoString(buf, " ("); + sep = ""; + foreach (cell, spec->upperdatums) + { + PartitionRangeDatum *datum = lfirst(cell); + Const *val; + + appendStringInfoString(buf, sep); + if (datum->infinite) + appendStringInfoString(buf, "UNBOUNDED"); + else + { + val = (Const *) datum->value; + get_const_expr(val, context, -1); + } + sep = ", "; + } + appendStringInfoString(buf, ")"); + break; + + default: + elog(ERROR, "unrecognized partition strategy: %d", + (int) spec->strategy); + break; + } + } + break; + case T_List: { char *sep; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 79e0b1ff48..2a6835991c 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -32,6 +32,7 @@ #include "access/htup_details.h" #include "access/multixact.h" +#include "access/nbtree.h" #include "access/reloptions.h" #include "access/sysattr.h" #include "access/xact.h" @@ -40,6 +41,7 @@ #include "catalog/index.h" #include "catalog/indexing.h" #include "catalog/namespace.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" #include "catalog/pg_amproc.h" #include "catalog/pg_attrdef.h" @@ -49,6 +51,7 @@ #include "catalog/pg_database.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" +#include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_shseclabel.h" @@ -258,6 +261,8 @@ static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_hi static Relation AllocateRelationDesc(Form_pg_class relp); static void RelationParseRelOptions(Relation relation, HeapTuple tuple); static void RelationBuildTupleDesc(Relation relation); +static void RelationBuildPartitionKey(Relation relation); +static PartitionKey copy_partition_key(PartitionKey fromkey); static Relation RelationBuildDesc(Oid targetRelId, bool insertIt); static void RelationInitPhysicalAddr(Relation relation); static void load_critical_index(Oid indexoid, Oid heapoid); @@ -278,6 +283,8 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid, StrategyNumber numSupport); static void RelationCacheInitFileRemoveInDir(const char *tblspcpath); static void unlink_initfile(const char *initfilename); +static bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1, + PartitionDesc partdesc2); /* @@ -435,6 +442,7 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple) case RELKIND_INDEX: case RELKIND_VIEW: case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: break; default: return; @@ -795,6 +803,237 @@ RelationBuildRuleLock(Relation relation) relation->rd_rules = rulelock; } +/* + * RelationBuildPartitionKey + * Build and attach to relcache partition key data of relation + * + * Partitioning key data is stored in CacheMemoryContext to ensure it survives + * as long as the relcache. To avoid leaking memory in that context in case + * of an error partway through this function, we build the structure in the + * working context (which must be short-lived) and copy the completed + * structure into the cache memory. + * + * Also, since the structure being created here is sufficiently complex, we + * make a private child context of CacheMemoryContext for each relation that + * has associated partition key information. That means no complicated logic + * to free individual elements whenever the relcache entry is flushed - just + * delete the context. + */ +static void +RelationBuildPartitionKey(Relation relation) +{ + Form_pg_partitioned_table form; + HeapTuple tuple; + bool isnull; + int i; + PartitionKey key; + AttrNumber *attrs; + oidvector *opclass; + oidvector *collation; + ListCell *partexprs_item; + Datum datum; + MemoryContext partkeycxt, + oldcxt; + + tuple = SearchSysCache1(PARTRELID, + ObjectIdGetDatum(RelationGetRelid(relation))); + + /* + * The following happens when we have created our pg_class entry but not + * the pg_partitioned_table entry yet. + */ + if (!HeapTupleIsValid(tuple)) + return; + + key = (PartitionKey) palloc0(sizeof(PartitionKeyData)); + + /* Fixed-length attributes */ + form = (Form_pg_partitioned_table) GETSTRUCT(tuple); + key->strategy = form->partstrat; + key->partnatts = form->partnatts; + + /* + * We can rely on the first variable-length attribute being mapped to the + * relevant field of the catalog's C struct, because all previous + * attributes are non-nullable and fixed-length. + */ + attrs = form->partattrs.values; + + /* But use the hard way to retrieve further variable-length attributes */ + /* Operator class */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partclass, &isnull); + Assert(!isnull); + opclass = (oidvector *) DatumGetPointer(datum); + + /* Collation */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partcollation, &isnull); + Assert(!isnull); + collation = (oidvector *) DatumGetPointer(datum); + + /* Expressions */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partexprs, &isnull); + if (!isnull) + { + char *exprString; + Node *expr; + + exprString = TextDatumGetCString(datum); + expr = stringToNode(exprString); + pfree(exprString); + + /* + * Run the expressions through const-simplification since the planner + * will be comparing them to similarly-processed qual clause operands, + * and may fail to detect valid matches without this step. We don't + * need to bother with canonicalize_qual() though, because partition + * expressions are not full-fledged qualification clauses. + */ + expr = eval_const_expressions(NULL, (Node *) expr); + + /* May as well fix opfuncids too */ + fix_opfuncids((Node *) expr); + key->partexprs = (List *) expr; + } + + key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber)); + key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo)); + + key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + + /* Gather type and collation info as well */ + key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32)); + key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16)); + key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool)); + key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char)); + key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + + /* Copy partattrs and fill other per-attribute info */ + memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16)); + partexprs_item = list_head(key->partexprs); + for (i = 0; i < key->partnatts; i++) + { + AttrNumber attno = key->partattrs[i]; + HeapTuple opclasstup; + Form_pg_opclass opclassform; + Oid funcid; + + /* Collect opfamily information */ + opclasstup = SearchSysCache1(CLAOID, + ObjectIdGetDatum(opclass->values[i])); + if (!HeapTupleIsValid(opclasstup)) + elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]); + + opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup); + key->partopfamily[i] = opclassform->opcfamily; + key->partopcintype[i] = opclassform->opcintype; + + /* + * A btree support function covers the cases of list and range methods + * currently supported. + */ + funcid = get_opfamily_proc(opclassform->opcfamily, + opclassform->opcintype, + opclassform->opcintype, + BTORDER_PROC); + + fmgr_info(funcid, &key->partsupfunc[i]); + + /* Collation */ + key->partcollation[i] = collation->values[i]; + + /* Collect type information */ + if (attno != 0) + { + key->parttypid[i] = relation->rd_att->attrs[attno - 1]->atttypid; + key->parttypmod[i] = relation->rd_att->attrs[attno - 1]->atttypmod; + key->parttypcoll[i] = relation->rd_att->attrs[attno - 1]->attcollation; + } + else + { + key->parttypid[i] = exprType(lfirst(partexprs_item)); + key->parttypmod[i] = exprTypmod(lfirst(partexprs_item)); + key->parttypcoll[i] = exprCollation(lfirst(partexprs_item)); + } + get_typlenbyvalalign(key->parttypid[i], + &key->parttyplen[i], + &key->parttypbyval[i], + &key->parttypalign[i]); + + ReleaseSysCache(opclasstup); + } + + ReleaseSysCache(tuple); + + /* Success --- now copy to the cache memory */ + partkeycxt = AllocSetContextCreate(CacheMemoryContext, + RelationGetRelationName(relation), + ALLOCSET_SMALL_SIZES); + relation->rd_partkeycxt = partkeycxt; + oldcxt = MemoryContextSwitchTo(relation->rd_partkeycxt); + relation->rd_partkey = copy_partition_key(key); + MemoryContextSwitchTo(oldcxt); +} + +/* + * copy_partition_key + * + * The copy is allocated in the current memory context. + */ +static PartitionKey +copy_partition_key(PartitionKey fromkey) +{ + PartitionKey newkey; + int n; + + newkey = (PartitionKey) palloc(sizeof(PartitionKeyData)); + + newkey->strategy = fromkey->strategy; + newkey->partnatts = n = fromkey->partnatts; + + newkey->partattrs = (AttrNumber *) palloc(n * sizeof(AttrNumber)); + memcpy(newkey->partattrs, fromkey->partattrs, n * sizeof(AttrNumber)); + + newkey->partexprs = copyObject(fromkey->partexprs); + + newkey->partopfamily = (Oid *) palloc(n * sizeof(Oid)); + memcpy(newkey->partopfamily, fromkey->partopfamily, n * sizeof(Oid)); + + newkey->partopcintype = (Oid *) palloc(n * sizeof(Oid)); + memcpy(newkey->partopcintype, fromkey->partopcintype, n * sizeof(Oid)); + + newkey->partsupfunc = (FmgrInfo *) palloc(n * sizeof(FmgrInfo)); + memcpy(newkey->partsupfunc, fromkey->partsupfunc, n * sizeof(FmgrInfo)); + + newkey->partcollation = (Oid *) palloc(n * sizeof(Oid)); + memcpy(newkey->partcollation, fromkey->partcollation, n * sizeof(Oid)); + + newkey->parttypid = (Oid *) palloc(n * sizeof(Oid)); + memcpy(newkey->parttypid, fromkey->parttypid, n * sizeof(Oid)); + + newkey->parttypmod = (int32 *) palloc(n * sizeof(int32)); + memcpy(newkey->parttypmod, fromkey->parttypmod, n * sizeof(int32)); + + newkey->parttyplen = (int16 *) palloc(n * sizeof(int16)); + memcpy(newkey->parttyplen, fromkey->parttyplen, n * sizeof(int16)); + + newkey->parttypbyval = (bool *) palloc(n * sizeof(bool)); + memcpy(newkey->parttypbyval, fromkey->parttypbyval, n * sizeof(bool)); + + newkey->parttypalign = (char *) palloc(n * sizeof(bool)); + memcpy(newkey->parttypalign, fromkey->parttypalign, n * sizeof(char)); + + newkey->parttypcoll = (Oid *) palloc(n * sizeof(Oid)); + memcpy(newkey->parttypcoll, fromkey->parttypcoll, n * sizeof(Oid)); + + return newkey; +} + /* * equalRuleLocks * @@ -922,6 +1161,58 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2) return true; } +/* + * equalPartitionDescs + * Compare two partition descriptors for logical equality + */ +static bool +equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1, + PartitionDesc partdesc2) +{ + int i; + + if (partdesc1 != NULL) + { + if (partdesc2 == NULL) + return false; + if (partdesc1->nparts != partdesc2->nparts) + return false; + + Assert(key != NULL || partdesc1->nparts == 0); + + /* + * Same oids? If the partitioning structure did not change, that is, + * no partitions were added or removed to the relation, the oids array + * should still match element-by-element. + */ + for (i = 0; i < partdesc1->nparts; i++) + { + if (partdesc1->oids[i] != partdesc2->oids[i]) + return false; + } + + /* + * Now compare partition bound collections. The logic to iterate over + * the collections is private to partition.c. + */ + if (partdesc1->boundinfo != NULL) + { + if (partdesc2->boundinfo == NULL) + return false; + + if (!partition_bounds_equal(key, partdesc1->boundinfo, + partdesc2->boundinfo)) + return false; + } + else if (partdesc2->boundinfo != NULL) + return false; + } + else if (partdesc2 != NULL) + return false; + + return true; +} + /* * RelationBuildDesc * @@ -1050,6 +1341,20 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) relation->rd_fkeylist = NIL; relation->rd_fkeyvalid = false; + /* if a partitioned table, initialize key and partition descriptor info */ + if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + RelationBuildPartitionKey(relation); + RelationBuildPartitionDesc(relation); + } + else + { + relation->rd_partkeycxt = NULL; + relation->rd_partkey = NULL; + relation->rd_partdesc = NULL; + relation->rd_pdcxt = NULL; + } + /* * if it's an index, initialize index-related information */ @@ -2042,6 +2347,12 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) MemoryContextDelete(relation->rd_rulescxt); if (relation->rd_rsdesc) MemoryContextDelete(relation->rd_rsdesc->rscxt); + if (relation->rd_partkeycxt) + MemoryContextDelete(relation->rd_partkeycxt); + if (relation->rd_pdcxt) + MemoryContextDelete(relation->rd_pdcxt); + if (relation->rd_partcheck) + pfree(relation->rd_partcheck); if (relation->rd_fdwroutine) pfree(relation->rd_fdwroutine); pfree(relation); @@ -2190,11 +2501,12 @@ RelationClearRelation(Relation relation, bool rebuild) * * When rebuilding an open relcache entry, we must preserve ref count, * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also - * attempt to preserve the pg_class entry (rd_rel), tupledesc, and - * rewrite-rule substructures in place, because various places assume - * that these structures won't move while they are working with an - * open relcache entry. (Note: the refcount mechanism for tupledescs - * might someday allow us to remove this hack for the tupledesc.) + * attempt to preserve the pg_class entry (rd_rel), tupledesc, + * rewrite-rule, partition key, and partition descriptor substructures + * in place, because various places assume that these structures won't + * move while they are working with an open relcache entry. (Note: + * the refcount mechanism for tupledescs might someday allow us to + * remove this hack for the tupledesc.) * * Note that this process does not touch CurrentResourceOwner; which * is good because whatever ref counts the entry may have do not @@ -2205,6 +2517,8 @@ RelationClearRelation(Relation relation, bool rebuild) bool keep_tupdesc; bool keep_rules; bool keep_policies; + bool keep_partkey; + bool keep_partdesc; /* Build temporary entry, but don't link it into hashtable */ newrel = RelationBuildDesc(save_relid, false); @@ -2235,6 +2549,10 @@ RelationClearRelation(Relation relation, bool rebuild) keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att); keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules); keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc); + keep_partkey = (relation->rd_partkey != NULL); + keep_partdesc = equalPartitionDescs(relation->rd_partkey, + relation->rd_partdesc, + newrel->rd_partdesc); /* * Perform swapping of the relcache entry contents. Within this @@ -2289,6 +2607,18 @@ RelationClearRelation(Relation relation, bool rebuild) SWAPFIELD(Oid, rd_toastoid); /* pgstat_info must be preserved */ SWAPFIELD(struct PgStat_TableStatus *, pgstat_info); + /* partition key must be preserved, if we have one */ + if (keep_partkey) + { + SWAPFIELD(PartitionKey, rd_partkey); + SWAPFIELD(MemoryContext, rd_partkeycxt); + } + /* preserve old partdesc if no logical change */ + if (keep_partdesc) + { + SWAPFIELD(PartitionDesc, rd_partdesc); + SWAPFIELD(MemoryContext, rd_pdcxt); + } #undef SWAPFIELD @@ -2983,7 +3313,9 @@ RelationBuildLocalRelation(const char *relname, /* system relations and non-table objects don't have one */ if (!IsSystemNamespace(relnamespace) && - (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW)) + (relkind == RELKIND_RELATION || + relkind == RELKIND_MATVIEW || + relkind == RELKIND_PARTITIONED_TABLE)) rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT; else rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING; @@ -3514,6 +3846,20 @@ RelationCacheInitializePhase3(void) restart = true; } + /* + * Reload partition key and descriptor for a partitioned table. + */ + if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + RelationBuildPartitionKey(relation); + Assert(relation->rd_partkey != NULL); + + RelationBuildPartitionDesc(relation); + Assert(relation->rd_partdesc != NULL); + + restart = true; + } + /* Release hold on the relation */ RelationDecrementReferenceCount(relation); @@ -4267,6 +4613,8 @@ RelationGetIndexExpressions(Relation relation) */ result = (List *) eval_const_expressions(NULL, (Node *) result); + result = (List *) canonicalize_qual((Expr *) result); + /* May as well fix opfuncids too */ fix_opfuncids((Node *) result); @@ -5035,6 +5383,10 @@ load_relcache_init_file(bool shared) rel->rd_rulescxt = NULL; rel->trigdesc = NULL; rel->rd_rsdesc = NULL; + rel->rd_partkeycxt = NULL; + rel->rd_partkey = NULL; + rel->rd_partdesc = NULL; + rel->rd_partcheck = NIL; rel->rd_indexprs = NIL; rel->rd_indpred = NIL; rel->rd_exclops = NULL; diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 65ffe84409..a3e0517b94 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -48,6 +48,7 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" +#include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" #include "catalog/pg_range.h" #include "catalog/pg_rewrite.h" @@ -568,6 +569,17 @@ static const struct cachedesc cacheinfo[] = { }, 8 }, + {PartitionedRelationId, /* PARTRELID */ + PartitionedRelidIndexId, + 1, + { + Anum_pg_partitioned_table_partrelid, + 0, + 0, + 0 + }, + 32 + }, {ProcedureRelationId, /* PROCNAMEARGSNSP */ ProcedureNameArgsNspIndexId, 3, diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index 1cbb9874f3..22f1806eca 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -68,6 +68,8 @@ static int numextmembers; static void flagInhTables(TableInfo *tbinfo, int numTables, InhInfo *inhinfo, int numInherits); +static void flagPartitions(TableInfo *tblinfo, int numTables, + PartInfo *partinfo, int numPartitions); static void flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables); static DumpableObject **buildIndexArray(void *objArray, int numObjs, Size objSize); @@ -75,6 +77,8 @@ static int DOCatalogIdCompare(const void *p1, const void *p2); static int ExtensionMemberIdCompare(const void *p1, const void *p2); static void findParentsByOid(TableInfo *self, InhInfo *inhinfo, int numInherits); +static void findPartitionParentByOid(TableInfo *self, PartInfo *partinfo, + int numPartitions); static int strInArray(const char *pattern, char **arr, int arr_size); @@ -93,8 +97,10 @@ getSchemaData(Archive *fout, int *numTablesPtr) NamespaceInfo *nspinfo; ExtensionInfo *extinfo; InhInfo *inhinfo; + PartInfo *partinfo; int numAggregates; int numInherits; + int numPartitions; int numRules; int numProcLangs; int numCasts; @@ -231,6 +237,10 @@ getSchemaData(Archive *fout, int *numTablesPtr) write_msg(NULL, "reading table inheritance information\n"); inhinfo = getInherits(fout, &numInherits); + if (g_verbose) + write_msg(NULL, "reading partition information\n"); + partinfo = getPartitions(fout, &numPartitions); + if (g_verbose) write_msg(NULL, "reading event triggers\n"); getEventTriggers(fout, &numEventTriggers); @@ -245,6 +255,11 @@ getSchemaData(Archive *fout, int *numTablesPtr) write_msg(NULL, "finding inheritance relationships\n"); flagInhTables(tblinfo, numTables, inhinfo, numInherits); + /* Link tables to partition parents, mark parents as interesting */ + if (g_verbose) + write_msg(NULL, "finding partition relationships\n"); + flagPartitions(tblinfo, numTables, partinfo, numPartitions); + if (g_verbose) write_msg(NULL, "reading column info for interesting tables\n"); getTableAttrs(fout, tblinfo, numTables); @@ -273,6 +288,10 @@ getSchemaData(Archive *fout, int *numTablesPtr) write_msg(NULL, "reading policies\n"); getPolicies(fout, tblinfo, numTables); + if (g_verbose) + write_msg(NULL, "reading partition key information for interesting tables\n"); + getTablePartitionKeyInfo(fout, tblinfo, numTables); + *numTablesPtr = numTables; return tblinfo; } @@ -319,6 +338,43 @@ flagInhTables(TableInfo *tblinfo, int numTables, } } +/* flagPartitions - + * Fill in parent link fields of every target table that is partition, + * and mark parents of partitions as interesting + * + * modifies tblinfo + */ +static void +flagPartitions(TableInfo *tblinfo, int numTables, + PartInfo *partinfo, int numPartitions) +{ + int i; + + for (i = 0; i < numTables; i++) + { + /* Some kinds are never partitions */ + if (tblinfo[i].relkind == RELKIND_SEQUENCE || + tblinfo[i].relkind == RELKIND_VIEW || + tblinfo[i].relkind == RELKIND_MATVIEW) + continue; + + /* Don't bother computing anything for non-target tables, either */ + if (!tblinfo[i].dobj.dump) + continue; + + /* Find the parent TableInfo and save */ + findPartitionParentByOid(&tblinfo[i], partinfo, numPartitions); + + /* Mark the parent as interesting for getTableAttrs */ + if (tblinfo[i].partitionOf) + { + tblinfo[i].partitionOf->interesting = true; + addObjectDependency(&tblinfo[i].dobj, + tblinfo[i].partitionOf->dobj.dumpId); + } + } +} + /* flagInhAttrs - * for each dumpable table in tblinfo, flag its inherited attributes * @@ -919,6 +975,40 @@ findParentsByOid(TableInfo *self, self->parents = NULL; } +/* + * findPartitionParentByOid + * find a partition's parent in tblinfo[] + */ +static void +findPartitionParentByOid(TableInfo *self, PartInfo *partinfo, + int numPartitions) +{ + Oid oid = self->dobj.catId.oid; + int i; + + for (i = 0; i < numPartitions; i++) + { + if (partinfo[i].partrelid == oid) + { + TableInfo *parent; + + parent = findTableByOid(partinfo[i].partparent); + if (parent == NULL) + { + write_msg(NULL, "failed sanity check, parent OID %u of table \"%s\" (OID %u) not found\n", + partinfo[i].partparent, + self->dobj.name, + oid); + exit_nicely(1); + } + self->partitionOf = parent; + + /* While we're at it, also save the partdef */ + self->partitiondef = partinfo[i].partdef; + } + } +} + /* * parseOidArray * parse a string of numbers delimited by spaces into a character array diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 42873bb32a..b43d152e77 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -1239,9 +1239,10 @@ expand_table_name_patterns(Archive *fout, "SELECT c.oid" "\nFROM pg_catalog.pg_class c" "\n LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace" - "\nWHERE c.relkind in ('%c', '%c', '%c', '%c', '%c')\n", + "\nWHERE c.relkind in ('%c', '%c', '%c', '%c', '%c', '%c')\n", RELKIND_RELATION, RELKIND_SEQUENCE, RELKIND_VIEW, - RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE); + RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE, + RELKIND_PARTITIONED_TABLE); processSQLNamePattern(GetConnection(fout), query, cell->val, true, false, "n.nspname", "c.relname", NULL, "pg_catalog.pg_table_is_visible(c.oid)"); @@ -2098,6 +2099,9 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids) /* Skip FOREIGN TABLEs (no data to dump) */ if (tbinfo->relkind == RELKIND_FOREIGN_TABLE) return; + /* Skip partitioned tables (data in partitions) */ + if (tbinfo->relkind == RELKIND_PARTITIONED_TABLE) + return; /* Don't dump data in unlogged tables, if so requested */ if (tbinfo->relpersistence == RELPERSISTENCE_UNLOGGED && @@ -4993,7 +4997,7 @@ getTables(Archive *fout, int *numTables) "(c.oid = pip.objoid " "AND pip.classoid = 'pg_class'::regclass " "AND pip.objsubid = 0) " - "WHERE c.relkind in ('%c', '%c', '%c', '%c', '%c', '%c') " + "WHERE c.relkind in ('%c', '%c', '%c', '%c', '%c', '%c', '%c') " "ORDER BY c.oid", acl_subquery->data, racl_subquery->data, @@ -5007,7 +5011,8 @@ getTables(Archive *fout, int *numTables) RELKIND_SEQUENCE, RELKIND_RELATION, RELKIND_SEQUENCE, RELKIND_VIEW, RELKIND_COMPOSITE_TYPE, - RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE); + RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE, + RELKIND_PARTITIONED_TABLE); destroyPQExpBuffer(acl_subquery); destroyPQExpBuffer(racl_subquery); @@ -5535,7 +5540,9 @@ getTables(Archive *fout, int *numTables) * We only need to lock the table for certain components; see * pg_dump.h */ - if (tblinfo[i].dobj.dump && tblinfo[i].relkind == RELKIND_RELATION && + if (tblinfo[i].dobj.dump && + (tblinfo[i].relkind == RELKIND_RELATION || + tblinfo->relkind == RELKIND_PARTITIONED_TABLE) && (tblinfo[i].dobj.dump & DUMP_COMPONENTS_REQUIRING_LOCK)) { resetPQExpBuffer(query); @@ -5635,9 +5642,16 @@ getInherits(Archive *fout, int *numInherits) /* Make sure we are in proper schema */ selectSourceSchema(fout, "pg_catalog"); - /* find all the inheritance information */ - - appendPQExpBufferStr(query, "SELECT inhrelid, inhparent FROM pg_inherits"); + /* + * Find all the inheritance information, excluding implicit inheritance + * via partitioning. We handle that case using getPartitions(), because + * we want more information about partitions than just the parent-child + * relationship. + */ + appendPQExpBufferStr(query, + "SELECT inhrelid, inhparent " + "FROM pg_inherits " + "WHERE inhparent NOT IN (SELECT oid FROM pg_class WHERE relkind = 'P')"); res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); @@ -5663,6 +5677,70 @@ getInherits(Archive *fout, int *numInherits) return inhinfo; } +/* + * getPartitions + * read all the partition inheritance and partition bound information + * from the system catalogs return them in the PartInfo* structure + * + * numPartitions is set to the number of pairs read in + */ +PartInfo * +getPartitions(Archive *fout, int *numPartitions) +{ + PGresult *res; + int ntups; + int i; + PQExpBuffer query = createPQExpBuffer(); + PartInfo *partinfo; + + int i_partrelid; + int i_partparent; + int i_partbound; + + /* Before version 10, there are no partitions */ + if (fout->remoteVersion < 100000) + { + *numPartitions = 0; + return NULL; + } + + /* Make sure we are in proper schema */ + selectSourceSchema(fout, "pg_catalog"); + + /* find the inheritance and boundary information about partitions */ + + appendPQExpBufferStr(query, + "SELECT inhrelid as partrelid, inhparent AS partparent," + " pg_get_expr(relpartbound, inhrelid) AS partbound" + " FROM pg_class c, pg_inherits" + " WHERE c.oid = inhrelid AND c.relispartition"); + + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + ntups = PQntuples(res); + + *numPartitions = ntups; + + partinfo = (PartInfo *) pg_malloc(ntups * sizeof(PartInfo)); + + i_partrelid = PQfnumber(res, "partrelid"); + i_partparent = PQfnumber(res, "partparent"); + i_partbound = PQfnumber(res, "partbound"); + + for (i = 0; i < ntups; i++) + { + partinfo[i].partrelid = atooid(PQgetvalue(res, i, i_partrelid)); + partinfo[i].partparent = atooid(PQgetvalue(res, i, i_partparent)); + partinfo[i].partdef = pg_strdup(PQgetvalue(res, i, i_partbound)); + } + + PQclear(res); + + destroyPQExpBuffer(query); + + return partinfo; +} + /* * getIndexes * get information about every index on a dumpable table @@ -6933,6 +7011,47 @@ getTransforms(Archive *fout, int *numTransforms) return transforminfo; } +/* + * getTablePartitionKeyInfo - + * for each interesting partitioned table, read information about its + * partition key + * + * modifies tblinfo + */ +void +getTablePartitionKeyInfo(Archive *fout, TableInfo *tblinfo, int numTables) +{ + PQExpBuffer q = createPQExpBuffer(); + int i, + ntups; + PGresult *res; + + /* No partitioned tables before 10 */ + if (fout->remoteVersion < 100000) + return; + + for (i = 0; i < numTables; i++) + { + TableInfo *tbinfo = &(tblinfo[i]); + + /* Only partitioned tables have partition key */ + if (tbinfo->relkind != RELKIND_PARTITIONED_TABLE) + continue; + + /* Don't bother computing anything for non-target tables, either */ + if (!tbinfo->dobj.dump) + continue; + + resetPQExpBuffer(q); + appendPQExpBuffer(q, "SELECT pg_catalog.pg_get_partkeydef('%u'::pg_catalog.oid)", + tbinfo->dobj.catId.oid); + res = ExecuteSqlQuery(fout, q->data, PGRES_TUPLES_OK); + ntups = PQntuples(res); + Assert(ntups == 1); + tbinfo->partkeydef = pg_strdup(PQgetvalue(res, 0, 0)); + } +} + /* * getTableAttrs - * for each interesting table, read info about its attributes @@ -14201,6 +14320,17 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) if (tbinfo->reloftype && !dopt->binary_upgrade) appendPQExpBuffer(q, " OF %s", tbinfo->reloftype); + if (tbinfo->partitionOf && !dopt->binary_upgrade) + { + TableInfo *parentRel = tbinfo->partitionOf; + + appendPQExpBuffer(q, " PARTITION OF "); + if (parentRel->dobj.namespace != tbinfo->dobj.namespace) + appendPQExpBuffer(q, "%s.", + fmtId(parentRel->dobj.namespace->dobj.name)); + appendPQExpBufferStr(q, fmtId(parentRel->dobj.name)); + } + if (tbinfo->relkind != RELKIND_MATVIEW) { /* Dump the attributes */ @@ -14229,8 +14359,11 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) (!tbinfo->inhNotNull[j] || dopt->binary_upgrade)); - /* Skip column if fully defined by reloftype */ - if (tbinfo->reloftype && + /* + * Skip column if fully defined by reloftype or the + * partition parent. + */ + if ((tbinfo->reloftype || tbinfo->partitionOf) && !has_default && !has_notnull && !dopt->binary_upgrade) continue; @@ -14259,7 +14392,8 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) } /* Attribute type */ - if (tbinfo->reloftype && !dopt->binary_upgrade) + if ((tbinfo->reloftype || tbinfo->partitionOf) && + !dopt->binary_upgrade) { appendPQExpBufferStr(q, " WITH OPTIONS"); } @@ -14317,15 +14451,22 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) if (actual_atts) appendPQExpBufferStr(q, "\n)"); - else if (!(tbinfo->reloftype && !dopt->binary_upgrade)) + else if (!((tbinfo->reloftype || tbinfo->partitionOf) && + !dopt->binary_upgrade)) { /* * We must have a parenthesized attribute list, even though - * empty, when not using the OF TYPE syntax. + * empty, when not using the OF TYPE or PARTITION OF syntax. */ appendPQExpBufferStr(q, " (\n)"); } + if (tbinfo->partitiondef && !dopt->binary_upgrade) + { + appendPQExpBufferStr(q, "\n"); + appendPQExpBufferStr(q, tbinfo->partitiondef); + } + if (numParents > 0 && !dopt->binary_upgrade) { appendPQExpBufferStr(q, "\nINHERITS ("); @@ -14343,6 +14484,9 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) appendPQExpBufferChar(q, ')'); } + if (tbinfo->relkind == RELKIND_PARTITIONED_TABLE) + appendPQExpBuffer(q, "\nPARTITION BY %s", tbinfo->partkeydef); + if (tbinfo->relkind == RELKIND_FOREIGN_TABLE) appendPQExpBuffer(q, "\nSERVER %s", fmtId(srvname)); } @@ -14403,7 +14547,8 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) */ if (dopt->binary_upgrade && (tbinfo->relkind == RELKIND_RELATION || - tbinfo->relkind == RELKIND_FOREIGN_TABLE)) + tbinfo->relkind == RELKIND_FOREIGN_TABLE || + tbinfo->relkind == RELKIND_PARTITIONED_TABLE)) { for (j = 0; j < tbinfo->numatts; j++) { @@ -14421,7 +14566,8 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) appendStringLiteralAH(q, fmtId(tbinfo->dobj.name), fout); appendPQExpBufferStr(q, "::pg_catalog.regclass;\n"); - if (tbinfo->relkind == RELKIND_RELATION) + if (tbinfo->relkind == RELKIND_RELATION || + tbinfo->relkind == RELKIND_PARTITIONED_TABLE) appendPQExpBuffer(q, "ALTER TABLE ONLY %s ", fmtId(tbinfo->dobj.name)); else @@ -14490,6 +14636,15 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) tbinfo->reloftype); } + if (tbinfo->partitionOf) + { + appendPQExpBufferStr(q, "\n-- For binary upgrade, set up partitions this way.\n"); + appendPQExpBuffer(q, "ALTER TABLE ONLY %s ATTACH PARTITION %s %s;\n", + fmtId(tbinfo->partitionOf->dobj.name), + tbinfo->dobj.name, + tbinfo->partitiondef); + } + appendPQExpBufferStr(q, "\n-- For binary upgrade, set heap's relfrozenxid and relminmxid\n"); appendPQExpBuffer(q, "UPDATE pg_catalog.pg_class\n" "SET relfrozenxid = '%u', relminmxid = '%u'\n" @@ -14638,6 +14793,7 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo) * dump properties we only have ALTER TABLE syntax for */ if ((tbinfo->relkind == RELKIND_RELATION || + tbinfo->relkind == RELKIND_PARTITIONED_TABLE || tbinfo->relkind == RELKIND_MATVIEW) && tbinfo->relreplident != REPLICA_IDENTITY_DEFAULT) { diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 7df9066cd7..395a9f3288 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -312,6 +312,7 @@ typedef struct _tableInfo bool *inhNotNull; /* true if NOT NULL is inherited */ struct _attrDefInfo **attrdefs; /* DEFAULT expressions */ struct _constraintInfo *checkexprs; /* CHECK constraints */ + char *partkeydef; /* partition key definition */ /* * Stuff computed only for dumpable tables. @@ -321,6 +322,8 @@ typedef struct _tableInfo struct _tableDataInfo *dataObj; /* TableDataInfo, if dumping its data */ int numTriggers; /* number of triggers for table */ struct _triggerInfo *triggers; /* array of TriggerInfo structs */ + struct _tableInfo *partitionOf; /* TableInfo for the partition parent */ + char *partitiondef; /* partition key definition */ } TableInfo; typedef struct _attrDefInfo @@ -459,6 +462,15 @@ typedef struct _inhInfo Oid inhparent; /* OID of its parent */ } InhInfo; +/* PartInfo isn't a DumpableObject, just temporary state */ +typedef struct _partInfo +{ + Oid partrelid; /* OID of a partition */ + Oid partparent; /* OID of its parent */ + char *partdef; /* partition bound definition */ +} PartInfo; + + typedef struct _prsInfo { DumpableObject dobj; @@ -625,6 +637,7 @@ extern ConvInfo *getConversions(Archive *fout, int *numConversions); extern TableInfo *getTables(Archive *fout, int *numTables); extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables); extern InhInfo *getInherits(Archive *fout, int *numInherits); +extern PartInfo *getPartitions(Archive *fout, int *numPartitions); extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables); extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables); extern RuleInfo *getRules(Archive *fout, int *numRules); @@ -649,5 +662,6 @@ extern void processExtensionTables(Archive *fout, ExtensionInfo extinfo[], int numExtensions); extern EventTriggerInfo *getEventTriggers(Archive *fout, int *numEventTriggers); extern void getPolicies(Archive *fout, TableInfo tblinfo[], int numTables); +extern void getTablePartitionKeyInfo(Archive *fout, TableInfo *tblinfo, int numTables); #endif /* PG_DUMP_H */ diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 931c6887f9..f0d955be4f 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -865,6 +865,7 @@ permissionsList(const char *pattern) " WHEN 'm' THEN '%s'" " WHEN 'S' THEN '%s'" " WHEN 'f' THEN '%s'" + " WHEN 'P' THEN '%s'" " END as \"%s\",\n" " ", gettext_noop("Schema"), @@ -874,6 +875,7 @@ permissionsList(const char *pattern) gettext_noop("materialized view"), gettext_noop("sequence"), gettext_noop("foreign table"), + gettext_noop("table"), /* partitioned table */ gettext_noop("Type")); printACLColumn(&buf, "c.relacl"); @@ -954,7 +956,7 @@ permissionsList(const char *pattern) appendPQExpBufferStr(&buf, "\nFROM pg_catalog.pg_class c\n" " LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace\n" - "WHERE c.relkind IN ('r', 'v', 'm', 'S', 'f')\n"); + "WHERE c.relkind IN ('r', 'v', 'm', 'S', 'f', 'P')\n"); /* * Unless a schema pattern is specified, we suppress system and temp @@ -1600,8 +1602,8 @@ describeOneTableDetails(const char *schemaname, * types, and foreign tables (c.f. CommentObject() in comment.c). */ if (tableinfo.relkind == 'r' || tableinfo.relkind == 'v' || - tableinfo.relkind == 'm' || - tableinfo.relkind == 'f' || tableinfo.relkind == 'c') + tableinfo.relkind == 'm' || tableinfo.relkind == 'f' || + tableinfo.relkind == 'c' || tableinfo.relkind == 'P') appendPQExpBufferStr(&buf, ", pg_catalog.col_description(a.attrelid, a.attnum)"); } @@ -1666,6 +1668,14 @@ describeOneTableDetails(const char *schemaname, printfPQExpBuffer(&title, _("Foreign table \"%s.%s\""), schemaname, relationname); break; + case 'P': + if (tableinfo.relpersistence == 'u') + printfPQExpBuffer(&title, _("Unlogged table \"%s.%s\""), + schemaname, relationname); + else + printfPQExpBuffer(&title, _("Table \"%s.%s\""), + schemaname, relationname); + break; default: /* untranslated unknown relkind */ printfPQExpBuffer(&title, "?%c? \"%s.%s\"", @@ -1679,8 +1689,8 @@ describeOneTableDetails(const char *schemaname, cols = 2; if (tableinfo.relkind == 'r' || tableinfo.relkind == 'v' || - tableinfo.relkind == 'm' || - tableinfo.relkind == 'f' || tableinfo.relkind == 'c') + tableinfo.relkind == 'm' || tableinfo.relkind == 'f' || + tableinfo.relkind == 'c' || tableinfo.relkind == 'P') { headers[cols++] = gettext_noop("Collation"); headers[cols++] = gettext_noop("Nullable"); @@ -1701,12 +1711,12 @@ describeOneTableDetails(const char *schemaname, { headers[cols++] = gettext_noop("Storage"); if (tableinfo.relkind == 'r' || tableinfo.relkind == 'm' || - tableinfo.relkind == 'f') + tableinfo.relkind == 'f' || tableinfo.relkind == 'P') headers[cols++] = gettext_noop("Stats target"); /* Column comments, if the relkind supports this feature. */ if (tableinfo.relkind == 'r' || tableinfo.relkind == 'v' || - tableinfo.relkind == 'm' || - tableinfo.relkind == 'c' || tableinfo.relkind == 'f') + tableinfo.relkind == 'm' || tableinfo.relkind == 'c' || + tableinfo.relkind == 'f' || tableinfo.relkind == 'P') headers[cols++] = gettext_noop("Description"); } @@ -1782,7 +1792,7 @@ describeOneTableDetails(const char *schemaname, /* Statistics target, if the relkind supports this feature */ if (tableinfo.relkind == 'r' || tableinfo.relkind == 'm' || - tableinfo.relkind == 'f') + tableinfo.relkind == 'f' || tableinfo.relkind == 'P') { printTableAddCell(&cont, PQgetvalue(res, i, firstvcol + 1), false, false); @@ -1790,14 +1800,61 @@ describeOneTableDetails(const char *schemaname, /* Column comments, if the relkind supports this feature. */ if (tableinfo.relkind == 'r' || tableinfo.relkind == 'v' || - tableinfo.relkind == 'm' || - tableinfo.relkind == 'c' || tableinfo.relkind == 'f') + tableinfo.relkind == 'm' || tableinfo.relkind == 'c' || + tableinfo.relkind == 'f' || tableinfo.relkind == 'P') printTableAddCell(&cont, PQgetvalue(res, i, firstvcol + 2), false, false); } } /* Make footers */ + if (pset.sversion >= 90600) + { + /* Get the partition information */ + PGresult *result; + char *parent_name; + char *partdef; + + printfPQExpBuffer(&buf, + "SELECT inhparent::pg_catalog.regclass, pg_get_expr(c.relpartbound, inhrelid)" + " FROM pg_catalog.pg_class c" + " JOIN pg_catalog.pg_inherits" + " ON c.oid = inhrelid" + " WHERE c.oid = '%s' AND c.relispartition;", oid); + result = PSQLexec(buf.data); + if (!result) + goto error_return; + + if (PQntuples(result) > 0) + { + parent_name = PQgetvalue(result, 0, 0); + partdef = PQgetvalue(result, 0, 1); + printfPQExpBuffer(&tmpbuf, _("Partition of: %s %s"), parent_name, + partdef); + printTableAddFooter(&cont, tmpbuf.data); + PQclear(result); + } + } + + if (tableinfo.relkind == 'P') + { + /* Get the partition key information */ + PGresult *result; + char *partkeydef; + + printfPQExpBuffer(&buf, + "SELECT pg_catalog.pg_get_partkeydef('%s'::pg_catalog.oid);", + oid); + result = PSQLexec(buf.data); + if (!result || PQntuples(result) != 1) + goto error_return; + + partkeydef = PQgetvalue(result, 0, 0); + printfPQExpBuffer(&tmpbuf, _("Partition key: %s"), partkeydef); + printTableAddFooter(&cont, tmpbuf.data); + PQclear(result); + } + if (tableinfo.relkind == 'i') { /* Footer information about an index */ @@ -1936,7 +1993,7 @@ describeOneTableDetails(const char *schemaname, PQclear(result); } else if (tableinfo.relkind == 'r' || tableinfo.relkind == 'm' || - tableinfo.relkind == 'f') + tableinfo.relkind == 'f' || tableinfo.relkind == 'P') { /* Footer information about a table */ PGresult *result = NULL; @@ -2513,7 +2570,7 @@ describeOneTableDetails(const char *schemaname, * Finish printing the footer information about a table. */ if (tableinfo.relkind == 'r' || tableinfo.relkind == 'm' || - tableinfo.relkind == 'f') + tableinfo.relkind == 'f' || tableinfo.relkind == 'P') { PGresult *result; int tuples; @@ -2558,8 +2615,12 @@ describeOneTableDetails(const char *schemaname, PQclear(result); } - /* print inherited tables */ - printfPQExpBuffer(&buf, "SELECT c.oid::pg_catalog.regclass FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i WHERE c.oid=i.inhparent AND i.inhrelid = '%s' ORDER BY inhseqno;", oid); + /* print inherited tables (exclude, if parent is a partitioned table) */ + printfPQExpBuffer(&buf, + "SELECT c.oid::pg_catalog.regclass" + " FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i" + " WHERE c.oid=i.inhparent AND i.inhrelid = '%s'" + " AND c.relkind != 'P' ORDER BY inhseqno;", oid); result = PSQLexec(buf.data); if (!result) @@ -2588,9 +2649,23 @@ describeOneTableDetails(const char *schemaname, PQclear(result); } - /* print child tables */ - if (pset.sversion >= 80300) - printfPQExpBuffer(&buf, "SELECT c.oid::pg_catalog.regclass FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i WHERE c.oid=i.inhrelid AND i.inhparent = '%s' ORDER BY c.oid::pg_catalog.regclass::pg_catalog.text;", oid); + /* print child tables (with additional info if partitions) */ + if (pset.sversion >= 100000) + printfPQExpBuffer(&buf, + "SELECT c.oid::pg_catalog.regclass, pg_get_expr(c.relpartbound, c.oid)" + " FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i" + " WHERE c.oid=i.inhrelid AND" + " i.inhparent = '%s' AND" + " EXISTS (SELECT 1 FROM pg_class c WHERE c.oid = '%s')" + " ORDER BY c.oid::pg_catalog.regclass::pg_catalog.text;", oid, oid); + else if (pset.sversion >= 80300) + printfPQExpBuffer(&buf, + "SELECT c.oid::pg_catalog.regclass" + " FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i" + " WHERE c.oid=i.inhrelid AND" + " i.inhparent = '%s' AND" + " EXISTS (SELECT 1 FROM pg_class c WHERE c.oid = '%s')" + " ORDER BY c.oid::pg_catalog.regclass::pg_catalog.text;", oid, oid); else printfPQExpBuffer(&buf, "SELECT c.oid::pg_catalog.regclass FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i WHERE c.oid=i.inhrelid AND i.inhparent = '%s' ORDER BY c.relname;", oid); @@ -2605,24 +2680,39 @@ describeOneTableDetails(const char *schemaname, /* print the number of child tables, if any */ if (tuples > 0) { - printfPQExpBuffer(&buf, _("Number of child tables: %d (Use \\d+ to list them.)"), tuples); + if (tableinfo.relkind != 'P') + printfPQExpBuffer(&buf, _("Number of child tables: %d (Use \\d+ to list them.)"), tuples); + else + printfPQExpBuffer(&buf, _("Number of partitions: %d (Use \\d+ to list them.)"), tuples); printTableAddFooter(&cont, buf.data); } } else { /* display the list of child tables */ - const char *ct = _("Child tables"); + const char *ct = tableinfo.relkind != 'P' ? _("Child tables") : _("Partitions"); int ctw = pg_wcswidth(ct, strlen(ct), pset.encoding); for (i = 0; i < tuples; i++) { - if (i == 0) - printfPQExpBuffer(&buf, "%s: %s", - ct, PQgetvalue(result, i, 0)); + if (tableinfo.relkind != 'P') + { + if (i == 0) + printfPQExpBuffer(&buf, "%s: %s", + ct, PQgetvalue(result, i, 0)); + else + printfPQExpBuffer(&buf, "%*s %s", + ctw, "", PQgetvalue(result, i, 0)); + } else - printfPQExpBuffer(&buf, "%*s %s", - ctw, "", PQgetvalue(result, i, 0)); + { + if (i == 0) + printfPQExpBuffer(&buf, "%s: %s %s", + ct, PQgetvalue(result, i, 0), PQgetvalue(result, i, 1)); + else + printfPQExpBuffer(&buf, "%*s %s %s", + ctw, "", PQgetvalue(result, i, 0), PQgetvalue(result, i, 1)); + } if (i < tuples - 1) appendPQExpBufferChar(&buf, ','); @@ -2717,7 +2807,7 @@ add_tablespace_footer(printTableContent *const cont, char relkind, Oid tablespace, const bool newline) { /* relkinds for which we support tablespaces */ - if (relkind == 'r' || relkind == 'm' || relkind == 'i') + if (relkind == 'r' || relkind == 'm' || relkind == 'i' || relkind == 'P') { /* * We ignore the database default tablespace so that users not using @@ -3051,6 +3141,7 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys " WHEN 'S' THEN '%s'" " WHEN 's' THEN '%s'" " WHEN 'f' THEN '%s'" + " WHEN 'P' THEN '%s'" " END as \"%s\",\n" " pg_catalog.pg_get_userbyid(c.relowner) as \"%s\"", gettext_noop("Schema"), @@ -3062,6 +3153,7 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys gettext_noop("sequence"), gettext_noop("special"), gettext_noop("foreign table"), + gettext_noop("table"), /* partitioned table */ gettext_noop("Type"), gettext_noop("Owner")); @@ -3100,7 +3192,7 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys appendPQExpBufferStr(&buf, "\nWHERE c.relkind IN ("); if (showTables) - appendPQExpBufferStr(&buf, "'r',"); + appendPQExpBufferStr(&buf, "'r', 'P',"); if (showViews) appendPQExpBufferStr(&buf, "'v',"); if (showMatViews) diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 6b95052a67..cd64c39b7f 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -452,7 +452,7 @@ static const SchemaQuery Query_for_list_of_tables = { /* catname */ "pg_catalog.pg_class c", /* selcondition */ - "c.relkind IN ('r')", + "c.relkind IN ('r', 'P')", /* viscondition */ "pg_catalog.pg_table_is_visible(c.oid)", /* namespace */ @@ -483,7 +483,7 @@ static const SchemaQuery Query_for_list_of_updatables = { /* catname */ "pg_catalog.pg_class c", /* selcondition */ - "c.relkind IN ('r', 'f', 'v')", + "c.relkind IN ('r', 'f', 'v', 'P')", /* viscondition */ "pg_catalog.pg_table_is_visible(c.oid)", /* namespace */ @@ -513,7 +513,7 @@ static const SchemaQuery Query_for_list_of_tsvmf = { /* catname */ "pg_catalog.pg_class c", /* selcondition */ - "c.relkind IN ('r', 'S', 'v', 'm', 'f')", + "c.relkind IN ('r', 'S', 'v', 'm', 'f', 'P')", /* viscondition */ "pg_catalog.pg_table_is_visible(c.oid)", /* namespace */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 6adfab751d..a602ba3861 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201612061 +#define CATALOG_VERSION_NO 201612071 #endif diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 4d84a6ba08..e8a302f2fd 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -189,7 +189,8 @@ extern void recordDependencyOnExpr(const ObjectAddress *depender, extern void recordDependencyOnSingleRelExpr(const ObjectAddress *depender, Node *expr, Oid relId, DependencyType behavior, - DependencyType self_behavior); + DependencyType self_behavior, + bool ignore_self); extern ObjectClass getObjectClass(const ObjectAddress *object); diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index b80d8d8b21..77dc1983e8 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -134,4 +134,15 @@ extern void CheckAttributeType(const char *attname, List *containing_rowtypes, bool allow_system_table_mods); +/* pg_partitioned_table catalog manipulation functions */ +extern void StorePartitionKey(Relation rel, + char strategy, + int16 partnatts, + AttrNumber *partattrs, + List *partexprs, + Oid *partopclass, + Oid *partcollation); +extern void RemovePartitionKeyByRelId(Oid relid); +extern void StorePartitionBound(Relation rel, Node *bound); + #endif /* HEAP_H */ diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index ca5eb3d417..40f7576b7b 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -319,6 +319,9 @@ DECLARE_UNIQUE_INDEX(pg_replication_origin_roiident_index, 6001, on pg_replicati DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname text_pattern_ops)); #define ReplicationOriginNameIndex 6002 +DECLARE_UNIQUE_INDEX(pg_partitioned_table_partrelid_index, 3351, on pg_partitioned_table using btree(partrelid oid_ops)); +#define PartitionedRelidIndexId 3351 + /* last step of initialization script: build the indexes declared above */ BUILD_INDICES diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h new file mode 100644 index 0000000000..21effbf87b --- /dev/null +++ b/src/include/catalog/partition.h @@ -0,0 +1,83 @@ +/*------------------------------------------------------------------------- + * + * partition.h + * Header file for structures and utility functions related to + * partitioning + * + * Copyright (c) 2007-2016, PostgreSQL Global Development Group + * + * src/include/catalog/partition.h + * + *------------------------------------------------------------------------- + */ +#ifndef PARTITION_H +#define PARTITION_H + +#include "fmgr.h" +#include "executor/tuptable.h" +#include "nodes/execnodes.h" +#include "parser/parse_node.h" +#include "utils/rel.h" + +/* + * PartitionBoundInfo encapsulates a set of partition bounds. It is usually + * associated with partitioned tables as part of its partition descriptor. + * + * The internal structure is opaque outside partition.c. + */ +typedef struct PartitionBoundInfoData *PartitionBoundInfo; + +/* + * Information about partitions of a partitioned table. + */ +typedef struct PartitionDescData +{ + int nparts; /* Number of partitions */ + Oid *oids; /* OIDs of partitions */ + PartitionBoundInfo boundinfo; /* collection of partition bounds */ +} PartitionDescData; + +typedef struct PartitionDescData *PartitionDesc; + +/*----------------------- + * PartitionDispatch - information about one partitioned table in a partition + * hiearchy required to route a tuple to one of its partitions + * + * reldesc Relation descriptor of the table + * key Partition key information of the table + * keystate Execution state required for expressions in the partition key + * partdesc Partition descriptor of the table + * indexes Array with partdesc->nparts members (for details on what + * individual members represent, see how they are set in + * RelationGetPartitionDispatchInfo()) + *----------------------- + */ +typedef struct PartitionDispatchData +{ + Relation reldesc; + PartitionKey key; + List *keystate; /* list of ExprState */ + PartitionDesc partdesc; + int *indexes; +} PartitionDispatchData; + +typedef struct PartitionDispatchData *PartitionDispatch; + +extern void RelationBuildPartitionDesc(Relation relation); +extern bool partition_bounds_equal(PartitionKey key, + PartitionBoundInfo p1, PartitionBoundInfo p2); + +extern void check_new_partition_bound(char *relname, Relation parent, Node *bound); +extern Oid get_partition_parent(Oid relid); +extern List *get_qual_from_partbound(Relation rel, Relation parent, Node *bound); +extern List *RelationGetPartitionQual(Relation rel, bool recurse); + +/* For tuple routing */ +extern PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel, + int lockmode, int *num_parted, + List **leaf_part_oids); +extern int get_partition_for_tuple(PartitionDispatch *pd, + TupleTableSlot *slot, + EState *estate, + Oid *failed_at); +#endif /* PARTITION_H */ diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index e57b81c417..a61b7a2917 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -70,6 +70,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO * not */ bool relispopulated; /* matview currently holds query results */ char relreplident; /* see REPLICA_IDENTITY_xxx constants */ + bool relispartition; /* is relation a partition? */ TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */ TransactionId relminmxid; /* all multixacts in this rel are >= this. * this is really a MultiXactId */ @@ -78,6 +79,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO /* NOTE: These fields are not present in a relcache entry's rd_rel field. */ aclitem relacl[1]; /* access permissions */ text reloptions[1]; /* access-method-specific options */ + pg_node_tree relpartbound; /* partition bound node tree */ #endif } FormData_pg_class; @@ -97,7 +99,7 @@ typedef FormData_pg_class *Form_pg_class; * ---------------- */ -#define Natts_pg_class 31 +#define Natts_pg_class 33 #define Anum_pg_class_relname 1 #define Anum_pg_class_relnamespace 2 #define Anum_pg_class_reltype 3 @@ -125,10 +127,12 @@ typedef FormData_pg_class *Form_pg_class; #define Anum_pg_class_relforcerowsecurity 25 #define Anum_pg_class_relispopulated 26 #define Anum_pg_class_relreplident 27 -#define Anum_pg_class_relfrozenxid 28 -#define Anum_pg_class_relminmxid 29 -#define Anum_pg_class_relacl 30 -#define Anum_pg_class_reloptions 31 +#define Anum_pg_class_relispartition 28 +#define Anum_pg_class_relfrozenxid 29 +#define Anum_pg_class_relminmxid 30 +#define Anum_pg_class_relacl 31 +#define Anum_pg_class_reloptions 32 +#define Anum_pg_class_relpartbound 33 /* ---------------- * initial contents of pg_class @@ -143,13 +147,13 @@ typedef FormData_pg_class *Form_pg_class; * Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId; * similarly, "1" in relminmxid stands for FirstMultiXactId */ -DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f f t n f 3 1 _null_ _null_ _null_)); DESCR(""); -DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f f t n f 3 1 _null_ _null_ _null_)); DESCR(""); -DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f f f t n f 3 1 _null_ _null_ _null_)); DESCR(""); -DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 31 0 t f f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 33 0 t f f f f f f t n f 3 1 _null_ _null_ _null_)); DESCR(""); @@ -161,6 +165,7 @@ DESCR(""); #define RELKIND_COMPOSITE_TYPE 'c' /* composite type */ #define RELKIND_FOREIGN_TABLE 'f' /* foreign table */ #define RELKIND_MATVIEW 'm' /* materialized view */ +#define RELKIND_PARTITIONED_TABLE 'P' /* partitioned table */ #define RELPERSISTENCE_PERMANENT 'p' /* regular table */ #define RELPERSISTENCE_UNLOGGED 'u' /* unlogged permanent table */ diff --git a/src/include/catalog/pg_partitioned_table.h b/src/include/catalog/pg_partitioned_table.h new file mode 100644 index 0000000000..cec54ae62e --- /dev/null +++ b/src/include/catalog/pg_partitioned_table.h @@ -0,0 +1,76 @@ +/*------------------------------------------------------------------------- + * + * pg_partitioned_table.h + * definition of the system "partitioned table" relation + * along with the relation's initial contents. + * + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * + * $PostgreSQL: pgsql/src/include/catalog/pg_partitioned_table.h $ + * + * NOTES + * the genbki.sh script reads this file and generates .bki + * information from the DATA() statements. + * + *------------------------------------------------------------------------- + */ +#ifndef PG_PARTITIONED_TABLE_H +#define PG_PARTITIONED_TABLE_H + +#include "catalog/genbki.h" + +/* ---------------- + * pg_partitioned_table definition. cpp turns this into + * typedef struct FormData_pg_partitioned_table + * ---------------- + */ +#define PartitionedRelationId 3350 + +CATALOG(pg_partitioned_table,3350) BKI_WITHOUT_OIDS +{ + Oid partrelid; /* partitioned table oid */ + char partstrat; /* partitioning strategy */ + int16 partnatts; /* number of partition key columns */ + + /* + * variable-length fields start here, but we allow direct access to + * partattrs via the C struct. That's because the first variable-length + * field of a heap tuple can be reliably accessed using its C struct + * offset, as previous fields are all non-nullable fixed-length fields. + */ + int2vector partattrs; /* each member of the array is the + * attribute number of a partition key + * column, or 0 if the column is actually + * an expression */ + +#ifdef CATALOG_VARLEN + oidvector partclass; /* operator class to compare keys */ + oidvector partcollation; /* user-specified collation for keys */ + pg_node_tree partexprs; /* list of expressions in the partitioning + * key; one item for each zero entry in + * partattrs[] */ +#endif +} FormData_pg_partitioned_table; + +/* ---------------- + * Form_pg_partitioned_table corresponds to a pointer to a tuple with + * the format of pg_partitioned_table relation. + * ---------------- + */ +typedef FormData_pg_partitioned_table *Form_pg_partitioned_table; + +/* ---------------- + * compiler constants for pg_partitioned_table + * ---------------- + */ +#define Natts_pg_partitioned_table 7 +#define Anum_pg_partitioned_table_partrelid 1 +#define Anum_pg_partitioned_table_partstrat 2 +#define Anum_pg_partitioned_table_partnatts 3 +#define Anum_pg_partitioned_table_partattrs 4 +#define Anum_pg_partitioned_table_partclass 5 +#define Anum_pg_partitioned_table_partcollation 6 +#define Anum_pg_partitioned_table_partexprs 7 + +#endif /* PG_PARTITIONED_TABLE_H */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 047a1ce71c..96e77ec437 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -1979,6 +1979,8 @@ DATA(insert OID = 1642 ( pg_get_userbyid PGNSP PGUID 12 1 0 0 0 f f f f t f DESCR("role name by OID (with fallback)"); DATA(insert OID = 1643 ( pg_get_indexdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_indexdef _null_ _null_ _null_ )); DESCR("index description"); +DATA(insert OID = 3352 ( pg_get_partkeydef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_partkeydef _null_ _null_ _null_ )); +DESCR("partition key description"); DATA(insert OID = 1662 ( pg_get_triggerdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_triggerdef _null_ _null_ _null_ )); DESCR("trigger description"); DATA(insert OID = 1387 ( pg_get_constraintdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_constraintdef _null_ _null_ _null_ )); diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 2b894ff262..d790fbfee2 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -42,6 +42,8 @@ extern bool CheckIndexCompatible(Oid oldId, List *attributeList, List *exclusionOpNames); extern Oid GetDefaultOpClass(Oid type_id, Oid am_id); +extern Oid ResolveOpClass(List *opclass, Oid attrType, + char *accessMethodName, Oid accessMethodId); /* commands/functioncmds.c */ extern ObjectAddress CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt); diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index 7a770f4df5..fa48f2e960 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -23,7 +23,7 @@ extern ObjectAddress DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, - ObjectAddress *typaddress); + ObjectAddress *typaddress, const char *queryString); extern void RemoveRelations(DropStmt *drop); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 136276be53..b4d09f9564 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -14,6 +14,7 @@ #ifndef EXECUTOR_H #define EXECUTOR_H +#include "catalog/partition.h" #include "executor/execdesc.h" #include "nodes/parsenodes.h" @@ -188,6 +189,7 @@ extern void CheckValidResultRel(Relation resultRel, CmdType operation); extern void InitResultRelInfo(ResultRelInfo *resultRelInfo, Relation resultRelationDesc, Index resultRelationIndex, + bool load_partition_check, int instrument_options); extern ResultRelInfo *ExecGetTriggerResultRel(EState *estate, Oid relid); extern bool ExecContextForcesOids(PlanState *planstate, bool *hasoids); @@ -211,6 +213,10 @@ extern void EvalPlanQualSetPlan(EPQState *epqstate, extern void EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple); extern HeapTuple EvalPlanQualGetTuple(EPQState *epqstate, Index rti); +extern int ExecFindPartition(ResultRelInfo *resultRelInfo, + PartitionDispatch *pd, + TupleTableSlot *slot, + EState *estate); #define EvalPlanQualSetSlot(epqstate, slot) ((epqstate)->origslot = (slot)) extern void EvalPlanQualFetchRowMarks(EPQState *epqstate); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 8004d856cc..1de5c8196d 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -16,6 +16,7 @@ #include "access/genam.h" #include "access/heapam.h" +#include "access/tupconvert.h" #include "executor/instrument.h" #include "lib/pairingheap.h" #include "nodes/params.h" @@ -320,6 +321,8 @@ typedef struct JunkFilter * projectReturning for computing a RETURNING list * onConflictSetProj for computing ON CONFLICT DO UPDATE SET * onConflictSetWhere list of ON CONFLICT DO UPDATE exprs (qual) + * PartitionCheck partition check expression + * PartitionCheckExpr partition check expression state * ---------------- */ typedef struct ResultRelInfo @@ -344,6 +347,8 @@ typedef struct ResultRelInfo ProjectionInfo *ri_projectReturning; ProjectionInfo *ri_onConflictSetProj; List *ri_onConflictSetWhere; + List *ri_PartitionCheck; + List *ri_PartitionCheckExpr; } ResultRelInfo; /* ---------------- @@ -1143,6 +1148,15 @@ typedef struct ModifyTableState * tlist */ TupleTableSlot *mt_conflproj; /* CONFLICT ... SET ... projection * target */ + struct PartitionDispatchData **mt_partition_dispatch_info; + /* Tuple-routing support info */ + int mt_num_dispatch; /* Number of entries in the above + * array */ + int mt_num_partitions; /* Number of members in the + * following arrays */ + ResultRelInfo *mt_partitions; /* Per partition result relation */ + TupleConversionMap **mt_partition_tupconv_maps; + /* Per partition tuple conversion map */ } ModifyTableState; /* ---------------- diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index cb9307cd00..c514d3fc93 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -406,6 +406,7 @@ typedef enum NodeTag T_AlterPolicyStmt, T_CreateTransformStmt, T_CreateAmStmt, + T_PartitionCmd, /* * TAGS FOR PARSE TREE NODES (parsenodes.h) @@ -454,6 +455,10 @@ typedef enum NodeTag T_CommonTableExpr, T_RoleSpec, T_TriggerTransition, + T_PartitionElem, + T_PartitionSpec, + T_PartitionBoundSpec, + T_PartitionRangeDatum, /* * TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h) diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index f8003e46f3..6b95c48447 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -699,6 +699,79 @@ typedef struct XmlSerialize int location; /* token location, or -1 if unknown */ } XmlSerialize; +/* Partitioning related definitions */ + +/* + * PartitionElem - a column in the partition key + */ +typedef struct PartitionElem +{ + NodeTag type; + char *name; /* name of column to partition on, or NULL */ + Node *expr; /* expression to partition on, or NULL */ + List *collation; /* name of collation; NIL = default */ + List *opclass; /* name of desired opclass; NIL = default */ + int location; /* token location, or -1 if unknown */ +} PartitionElem; + +/* + * PartitionSpec - partition key specification + */ +typedef struct PartitionSpec +{ + NodeTag type; + char *strategy; /* partitioning strategy ('list' or 'range') */ + List *partParams; /* List of PartitionElems */ + int location; /* token location, or -1 if unknown */ +} PartitionSpec; + +#define PARTITION_STRATEGY_LIST 'l' +#define PARTITION_STRATEGY_RANGE 'r' + +/* + * PartitionBoundSpec - a partition bound specification + */ +typedef struct PartitionBoundSpec +{ + NodeTag type; + + char strategy; + + /* List partition values */ + List *listdatums; + + /* + * Range partition lower and upper bounds; each member of the lists + * is a PartitionRangeDatum (see below). + */ + List *lowerdatums; + List *upperdatums; + + int location; +} PartitionBoundSpec; + +/* + * PartitionRangeDatum + */ +typedef struct PartitionRangeDatum +{ + NodeTag type; + + bool infinite; + Node *value; + + int location; +} PartitionRangeDatum; + +/* + * PartitionCmd - ALTER TABLE partition commands + */ +typedef struct PartitionCmd +{ + NodeTag type; + RangeVar *name; + Node *bound; +} PartitionCmd; /**************************************************************************** * Nodes for a Query tree @@ -1549,7 +1622,9 @@ typedef enum AlterTableType AT_DisableRowSecurity, /* DISABLE ROW SECURITY */ AT_ForceRowSecurity, /* FORCE ROW SECURITY */ AT_NoForceRowSecurity, /* NO FORCE ROW SECURITY */ - AT_GenericOptions /* OPTIONS (...) */ + AT_GenericOptions, /* OPTIONS (...) */ + AT_AttachPartition, /* ATTACH PARTITION */ + AT_DetachPartition /* DETACH PARTITION */ } AlterTableType; typedef struct ReplicaIdentityStmt @@ -1775,6 +1850,8 @@ typedef struct CreateStmt List *tableElts; /* column definitions (list of ColumnDef) */ List *inhRelations; /* relations to inherit from (list of * inhRelation) */ + Node *partbound; /* FOR VALUES clause */ + PartitionSpec *partspec; /* PARTITION BY clause */ TypeName *ofTypename; /* OF typename */ List *constraints; /* constraints (list of Constraint nodes) */ List *options; /* options from WITH clause */ diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 77d873beca..581ff6eedb 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -49,6 +49,7 @@ PG_KEYWORD("assertion", ASSERTION, UNRESERVED_KEYWORD) PG_KEYWORD("assignment", ASSIGNMENT, UNRESERVED_KEYWORD) PG_KEYWORD("asymmetric", ASYMMETRIC, RESERVED_KEYWORD) PG_KEYWORD("at", AT, UNRESERVED_KEYWORD) +PG_KEYWORD("attach", ATTACH, UNRESERVED_KEYWORD) PG_KEYWORD("attribute", ATTRIBUTE, UNRESERVED_KEYWORD) PG_KEYWORD("authorization", AUTHORIZATION, TYPE_FUNC_NAME_KEYWORD) PG_KEYWORD("backward", BACKWARD, UNRESERVED_KEYWORD) @@ -127,6 +128,7 @@ PG_KEYWORD("delimiter", DELIMITER, UNRESERVED_KEYWORD) PG_KEYWORD("delimiters", DELIMITERS, UNRESERVED_KEYWORD) PG_KEYWORD("depends", DEPENDS, UNRESERVED_KEYWORD) PG_KEYWORD("desc", DESC, RESERVED_KEYWORD) +PG_KEYWORD("detach", DETACH, UNRESERVED_KEYWORD) PG_KEYWORD("dictionary", DICTIONARY, UNRESERVED_KEYWORD) PG_KEYWORD("disable", DISABLE_P, UNRESERVED_KEYWORD) PG_KEYWORD("discard", DISCARD, UNRESERVED_KEYWORD) diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index 66335863db..bd6dc020b2 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -64,7 +64,8 @@ typedef enum ParseExprKind EXPR_KIND_ALTER_COL_TRANSFORM, /* transform expr in ALTER COLUMN TYPE */ EXPR_KIND_EXECUTE_PARAMETER, /* parameter value in EXECUTE */ EXPR_KIND_TRIGGER_WHEN, /* WHEN condition in CREATE TRIGGER */ - EXPR_KIND_POLICY /* USING or WITH CHECK expr in policy */ + EXPR_KIND_POLICY, /* USING or WITH CHECK expr in policy */ + EXPR_KIND_PARTITION_EXPRESSION /* PARTITION BY expression */ } ParseExprKind; diff --git a/src/include/parser/parse_utilcmd.h b/src/include/parser/parse_utilcmd.h index be3b6f70c1..783bb0009f 100644 --- a/src/include/parser/parse_utilcmd.h +++ b/src/include/parser/parse_utilcmd.h @@ -25,5 +25,7 @@ extern IndexStmt *transformIndexStmt(Oid relid, IndexStmt *stmt, extern void transformRuleStmt(RuleStmt *stmt, const char *queryString, List **actions, Node **whereClause); extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt); +extern Node *transformPartitionBound(ParseState *pstate, Relation parent, + Node *bound); #endif /* PARSE_UTILCMD_H */ diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index 96885bb990..58b1db9f68 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -45,6 +45,11 @@ */ #define INDEX_MAX_KEYS 32 +/* + * Maximum number of columns in a partition key + */ +#define PARTITION_MAX_KEYS 32 + /* * Set the upper and lower bounds of sequence values. */ diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 90f5132b03..7ed162322c 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -728,6 +728,7 @@ extern Datum pg_get_viewdef_wrap(PG_FUNCTION_ARGS); extern Datum pg_get_viewdef_name(PG_FUNCTION_ARGS); extern Datum pg_get_viewdef_name_ext(PG_FUNCTION_ARGS); extern Datum pg_get_indexdef(PG_FUNCTION_ARGS); +extern Datum pg_get_partkeydef(PG_FUNCTION_ARGS); extern Datum pg_get_indexdef_ext(PG_FUNCTION_ARGS); extern Datum pg_get_triggerdef(PG_FUNCTION_ARGS); extern Datum pg_get_triggerdef_ext(PG_FUNCTION_ARGS); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index fa15f28468..cd7ea1d2dd 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -45,6 +45,35 @@ typedef struct LockInfoData typedef LockInfoData *LockInfo; +/* + * Information about the partition key of a relation + */ +typedef struct PartitionKeyData +{ + char strategy; /* partitioning strategy */ + int16 partnatts; /* number of columns in the partition key */ + AttrNumber *partattrs; /* attribute numbers of columns in the + * partition key */ + List *partexprs; /* list of expressions in the partitioning + * key, or NIL */ + + Oid *partopfamily; /* OIDs of operator families */ + Oid *partopcintype; /* OIDs of opclass declared input data types */ + FmgrInfo *partsupfunc; /* lookup info for support funcs */ + + /* Partitioning collation per attribute */ + Oid *partcollation; + + /* Type information per attribute */ + Oid *parttypid; + int32 *parttypmod; + int16 *parttyplen; + bool *parttypbyval; + char *parttypalign; + Oid *parttypcoll; +} PartitionKeyData; + +typedef struct PartitionKeyData *PartitionKey; /* * Here are the contents of a relation cache entry. @@ -94,6 +123,12 @@ typedef struct RelationData List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */ bool rd_fkeyvalid; /* true if list has been computed */ + MemoryContext rd_partkeycxt; /* private memory cxt for the below */ + struct PartitionKeyData *rd_partkey; /* partition key, or NULL */ + MemoryContext rd_pdcxt; /* private context for partdesc */ + struct PartitionDescData *rd_partdesc; /* partitions, or NULL */ + List *rd_partcheck; /* partition CHECK quals */ + /* data managed by RelationGetIndexList: */ List *rd_indexlist; /* list of OIDs of indexes on relation */ Oid rd_oidindex; /* OID of unique index on OID, if any */ @@ -534,6 +569,60 @@ typedef struct ViewOptions RelationNeedsWAL(relation) && \ !IsCatalogRelation(relation)) +/* + * RelationGetPartitionKey + * Returns the PartitionKey of a relation + */ +#define RelationGetPartitionKey(relation) ((relation)->rd_partkey) + +/* + * PartitionKey inquiry functions + */ +static inline int +get_partition_strategy(PartitionKey key) +{ + return key->strategy; +} + +static inline int +get_partition_natts(PartitionKey key) +{ + return key->partnatts; +} + +static inline List * +get_partition_exprs(PartitionKey key) +{ + return key->partexprs; +} + +/* + * PartitionKey inquiry functions - one column + */ +static inline int16 +get_partition_col_attnum(PartitionKey key, int col) +{ + return key->partattrs[col]; +} + +static inline Oid +get_partition_col_typid(PartitionKey key, int col) +{ + return key->parttypid[col]; +} + +static inline int32 +get_partition_col_typmod(PartitionKey key, int col) +{ + return key->parttypmod[col]; +} + +/* + * RelationGetPartitionDesc + * Returns partition descriptor for a relation. + */ +#define RelationGetPartitionDesc(relation) ((relation)->rd_partdesc) + /* routines in utils/cache/relcache.c */ extern void RelationIncrementReferenceCount(Relation rel); extern void RelationDecrementReferenceCount(Relation rel); diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index 256615b671..39fe947d6e 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -72,6 +72,7 @@ enum SysCacheIdentifier OPEROID, OPFAMILYAMNAMENSP, OPFAMILYOID, + PARTRELID, PROCNAMEARGSNSP, PROCOID, RANGETYPE, diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index cf9f6d3dfa..09cc193f2f 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -2974,3 +2974,346 @@ NOTICE: column "c3" of relation "test_add_column" already exists, skipping c4 | integer | | | DROP TABLE test_add_column; +-- unsupported constraint types for partitioned tables +CREATE TABLE partitioned ( + a int, + b int +) PARTITION BY RANGE (a, (a+b+1)); +ALTER TABLE partitioned ADD UNIQUE (a); +ERROR: unique constraints are not supported on partitioned tables +LINE 1: ALTER TABLE partitioned ADD UNIQUE (a); + ^ +ALTER TABLE partitioned ADD PRIMARY KEY (a); +ERROR: primary key constraints are not supported on partitioned tables +LINE 1: ALTER TABLE partitioned ADD PRIMARY KEY (a); + ^ +ALTER TABLE partitioned ADD FOREIGN KEY (a) REFERENCES blah; +ERROR: foreign key constraints are not supported on partitioned tables +LINE 1: ALTER TABLE partitioned ADD FOREIGN KEY (a) REFERENCES blah; + ^ +ALTER TABLE partitioned ADD EXCLUDE USING gist (a WITH &&); +ERROR: exclusion constraints are not supported on partitioned tables +LINE 1: ALTER TABLE partitioned ADD EXCLUDE USING gist (a WITH &&); + ^ +-- cannot drop column that is part of the partition key +ALTER TABLE partitioned DROP COLUMN a; +ERROR: cannot drop column named in partition key +ALTER TABLE partitioned ALTER COLUMN a TYPE char(5); +ERROR: cannot alter type of column named in partition key +ALTER TABLE partitioned DROP COLUMN b; +ERROR: cannot drop column referenced in partition key expression +ALTER TABLE partitioned ALTER COLUMN b TYPE char(5); +ERROR: cannot alter type of column referenced in partition key expression +-- cannot drop NOT NULL on columns in the range partition key +ALTER TABLE partitioned ALTER COLUMN a DROP NOT NULL; +ERROR: column "a" is in range partition key +-- partitioned table cannot partiticipate in regular inheritance +CREATE TABLE foo ( + a int, + b int +); +ALTER TABLE partitioned INHERIT foo; +ERROR: cannot change inheritance of partitioned table +ALTER TABLE foo INHERIT partitioned; +ERROR: cannot inherit from partitioned table "partitioned" +-- cannot add NO INHERIT constraint to partitioned tables +ALTER TABLE partitioned ADD CONSTRAINT chk_a CHECK (a > 0) NO INHERIT; +ERROR: cannot add NO INHERIT constraint to partitioned table "partitioned" +DROP TABLE partitioned, foo; +-- +-- ATTACH PARTITION +-- +-- check that target table is partitioned +CREATE TABLE unparted ( + a int +); +CREATE TABLE fail_part (like unparted); +ALTER TABLE unparted ATTACH PARTITION fail_part FOR VALUES IN ('a'); +ERROR: "unparted" is not partitioned +DROP TABLE unparted, fail_part; +-- check that partition bound is compatible +CREATE TABLE list_parted ( + a int NOT NULL, + b char(2) COLLATE "en_US", + CONSTRAINT check_a CHECK (a > 0) +) PARTITION BY LIST (a); +CREATE TABLE fail_part (LIKE list_parted); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) TO (10); +ERROR: invalid bound specification for a list partition +LINE 1: ...list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) T... + ^ +DROP TABLE fail_part; +-- check that the table being attached exists +ALTER TABLE list_parted ATTACH PARTITION nonexistant FOR VALUES IN (1); +ERROR: relation "nonexistant" does not exist +-- check ownership of the source table +CREATE ROLE regress_test_me; +CREATE ROLE regress_test_not_me; +CREATE TABLE not_owned_by_me (LIKE list_parted); +ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me; +SET SESSION AUTHORIZATION regress_test_me; +CREATE TABLE owned_by_me ( + a int +) PARTITION BY LIST (a); +ALTER TABLE owned_by_me ATTACH PARTITION not_owned_by_me FOR VALUES IN (1); +ERROR: must be owner of relation not_owned_by_me +RESET SESSION AUTHORIZATION; +DROP TABLE owned_by_me, not_owned_by_me; +DROP ROLE regress_test_not_me; +DROP ROLE regress_test_me; +-- check that the table being attached is not part of regular inheritance +CREATE TABLE parent (LIKE list_parted); +CREATE TABLE child () INHERITS (parent); +ALTER TABLE list_parted ATTACH PARTITION child FOR VALUES IN (1); +ERROR: cannot attach inheritance child as partition +ALTER TABLE list_parted ATTACH PARTITION parent FOR VALUES IN (1); +ERROR: cannot attach inheritance parent as partition +DROP TABLE parent CASCADE; +NOTICE: drop cascades to table child +-- check any TEMP-ness +CREATE TEMP TABLE temp_parted (a int) PARTITION BY LIST (a); +CREATE TABLE perm_part (a int); +ALTER TABLE temp_parted ATTACH PARTITION perm_part FOR VALUES IN (1); +ERROR: cannot attach a permanent relation as partition of temporary relation "temp_parted" +DROP TABLE temp_parted, perm_part; +-- check that the table being attached is not a typed table +CREATE TYPE mytype AS (a int); +CREATE TABLE fail_part OF mytype; +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: cannot attach a typed table as partition +DROP TYPE mytype CASCADE; +NOTICE: drop cascades to table fail_part +-- check existence (or non-existence) of oid column +ALTER TABLE list_parted SET WITH OIDS; +CREATE TABLE fail_part (a int); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: cannot attach table "fail_part" without OIDs as partition of table "list_parted" with OIDs +ALTER TABLE list_parted SET WITHOUT OIDS; +ALTER TABLE fail_part SET WITH OIDS; +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: cannot attach table "fail_part" with OIDs as partition of table "list_parted" without OIDs +DROP TABLE fail_part; +-- check that the table being attached has only columns present in the parent +CREATE TABLE fail_part (like list_parted, c int); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: table "fail_part" contains column "c" not found in parent "list_parted" +DETAIL: New partition should contain only the columns present in parent. +DROP TABLE fail_part; +-- check that the table being attached has every column of the parent +CREATE TABLE fail_part (a int NOT NULL); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table is missing column "b" +DROP TABLE fail_part; +-- check that columns match in type, collation and NOT NULL status +CREATE TABLE fail_part ( + b char(3), + a int NOT NULL +); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table "fail_part" has different type for column "b" +ALTER TABLE fail_part ALTER b TYPE char (2) COLLATE "en_CA"; +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table "fail_part" has different collation for column "b" +DROP TABLE fail_part; +-- check that the table being attached has all constraints of the parent +CREATE TABLE fail_part ( + b char(2) COLLATE "en_US", + a int NOT NULL +); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table is missing constraint "check_a" +-- check that the constraint matches in definition with parent's constraint +ALTER TABLE fail_part ADD CONSTRAINT check_a CHECK (a >= 0); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: child table "fail_part" has different definition for check constraint "check_a" +DROP TABLE fail_part; +-- check the attributes and constraints after partition is attached +CREATE TABLE part_1 ( + a int NOT NULL, + b char(2) COLLATE "en_US", + CONSTRAINT check_a CHECK (a > 0) +); +ALTER TABLE list_parted ATTACH PARTITION part_1 FOR VALUES IN (1); +-- attislocal and conislocal are always false for merged attributes and constraints respectively. +SELECT attislocal, attinhcount FROM pg_attribute WHERE attrelid = 'part_1'::regclass AND attnum > 0; + attislocal | attinhcount +------------+------------- + f | 1 + f | 1 +(2 rows) + +SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_1'::regclass AND conname = 'check_a'; + conislocal | coninhcount +------------+------------- + f | 1 +(1 row) + +-- check that the new partition won't overlap with an existing partition +CREATE TABLE fail_part (LIKE part_1 INCLUDING CONSTRAINTS); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ERROR: partition "fail_part" would overlap partition "part_1" +-- check validation when attaching list partitions +CREATE TABLE list_parted2 ( + a int, + b char +) PARTITION BY LIST (a); +-- check that violating rows are correctly reported +CREATE TABLE part_2 (LIKE list_parted2); +INSERT INTO part_2 VALUES (3, 'a'); +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); +ERROR: partition constraint is violated by some row +-- should be ok after deleting the bad row +DELETE FROM part_2; +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); +-- adding constraints that describe the desired partition constraint +-- (or more restrictive) will help skip the validation scan +CREATE TABLE part_3_4 ( + LIKE list_parted2, + CONSTRAINT check_a CHECK (a IN (3)) +); +-- however, if a list partition does not accept nulls, there should be +-- an explicit NOT NULL constraint on the partition key column for the +-- validation scan to be skipped; +ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4); +-- adding a NOT NULL constraint will cause the scan to be skipped +ALTER TABLE list_parted2 DETACH PARTITION part_3_4; +ALTER TABLE part_3_4 ALTER a SET NOT NULL; +ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4); +NOTICE: skipping scan to validate partition constraint +-- check validation when attaching range partitions +CREATE TABLE range_parted ( + a int, + b int +) PARTITION BY RANGE (a, b); +-- check that violating rows are correctly reported +CREATE TABLE part1 ( + a int NOT NULL CHECK (a = 1), + b int NOT NULL CHECK (b >= 1 AND b <= 10) +); +INSERT INTO part1 VALUES (1, 10); +-- Remember the TO bound is exclusive +ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10); +ERROR: partition constraint is violated by some row +-- should be ok after deleting the bad row +DELETE FROM part1; +ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10); +-- adding constraints that describe the desired partition constraint +-- (or more restrictive) will help skip the validation scan +CREATE TABLE part2 ( + a int NOT NULL CHECK (a = 1), + b int NOT NULL CHECK (b >= 10 AND b < 18) +); +ALTER TABLE range_parted ATTACH PARTITION part2 FOR VALUES FROM (1, 10) TO (1, 20); +NOTICE: skipping scan to validate partition constraint +-- check that leaf partitions are scanned when attaching a partitioned +-- table +CREATE TABLE part_5 ( + LIKE list_parted2 +) PARTITION BY LIST (b); +-- check that violating rows are correctly reported +CREATE TABLE part_5_a PARTITION OF part_5 FOR VALUES IN ('a'); +INSERT INTO part_5_a (a, b) VALUES (6, 'a'); +ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); +ERROR: partition constraint is violated by some row +-- delete the faulting row and also add a constraint to skip the scan +DELETE FROM part_5_a WHERE a NOT IN (3); +ALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IN (5)), ALTER a SET NOT NULL; +ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); +NOTICE: skipping scan to validate partition constraint +-- check that the table being attached is not already a partition +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); +ERROR: "part_2" is already a partition +-- check that circular inheritance is not allowed +ALTER TABLE part_5 ATTACH PARTITION list_parted2 FOR VALUES IN ('b'); +ERROR: circular inheritance not allowed +DETAIL: "part_5" is already a child of "list_parted2". +ALTER TABLE list_parted2 ATTACH PARTITION list_parted2 FOR VALUES IN (0); +ERROR: circular inheritance not allowed +DETAIL: "list_parted2" is already a child of "list_parted2". +-- +-- DETACH PARTITION +-- +-- check that the partition being detached exists at all +ALTER TABLE list_parted2 DETACH PARTITION part_4; +ERROR: relation "part_4" does not exist +-- check that the partition being detached is actually a partition of the parent +CREATE TABLE not_a_part (a int); +ALTER TABLE list_parted2 DETACH PARTITION not_a_part; +ERROR: relation "not_a_part" is not a partition of relation "list_parted2" +ALTER TABLE list_parted2 DETACH PARTITION part_1; +ERROR: relation "part_1" is not a partition of relation "list_parted2" +-- check that, after being detached, attinhcount/coninhcount is dropped to 0 and +-- attislocal/conislocal is set to true +ALTER TABLE list_parted2 DETACH PARTITION part_3_4; +SELECT attinhcount, attislocal FROM pg_attribute WHERE attrelid = 'part_3_4'::regclass AND attnum > 0; + attinhcount | attislocal +-------------+------------ + 0 | t + 0 | t +(2 rows) + +SELECT coninhcount, conislocal FROM pg_constraint WHERE conrelid = 'part_3_4'::regclass AND conname = 'check_a'; + coninhcount | conislocal +-------------+------------ + 0 | t +(1 row) + +DROP TABLE part_3_4; +-- Check ALTER TABLE commands for partitioned tables and partitions +-- cannot add/drop column to/from *only* the parent +ALTER TABLE ONLY list_parted2 ADD COLUMN c int; +ERROR: column must be added to child tables too +ALTER TABLE ONLY list_parted2 DROP COLUMN b; +ERROR: column must be dropped from child tables too +-- cannot add a column to partition or drop an inherited one +ALTER TABLE part_2 ADD COLUMN c text; +ERROR: cannot add column to a partition +ALTER TABLE part_2 DROP COLUMN b; +ERROR: cannot drop inherited column "b" +-- Nor rename, alter type +ALTER TABLE part_2 RENAME COLUMN b to c; +ERROR: cannot rename inherited column "b" +ALTER TABLE part_2 ALTER COLUMN b TYPE text; +ERROR: cannot alter inherited column "b" +-- cannot add NOT NULL or check constraints to *only* the parent (ie, non-inherited) +ALTER TABLE ONLY list_parted2 ALTER b SET NOT NULL; +ERROR: constraint must be added to child tables too +ALTER TABLE ONLY list_parted2 add constraint check_b check (b <> 'zz'); +ERROR: constraint must be added to child tables too +ALTER TABLE list_parted2 add constraint check_b check (b <> 'zz') NO INHERIT; +ERROR: cannot add NO INHERIT constraint to partitioned table "list_parted2" +-- cannot drop inherited NOT NULL or check constraints from partition +ALTER TABLE list_parted2 ALTER b SET NOT NULL, ADD CONSTRAINT check_a2 CHECK (a > 0); +ALTER TABLE part_2 ALTER b DROP NOT NULL; +ERROR: column "b" is marked NOT NULL in parent table +ALTER TABLE part_2 DROP CONSTRAINT check_a2; +ERROR: cannot drop inherited constraint "check_a2" of relation "part_2" +-- cannot drop NOT NULL or check constraints from *only* the parent +ALTER TABLE ONLY list_parted2 ALTER a DROP NOT NULL; +ERROR: constraint must be dropped from child tables too +ALTER TABLE ONLY list_parted2 DROP CONSTRAINT check_a2; +ERROR: constraint must be dropped from child tables too +-- check that a partition cannot participate in regular inheritance +CREATE TABLE inh_test () INHERITS (part_2); +ERROR: cannot inherit from partition "part_2" +CREATE TABLE inh_test (LIKE part_2); +ALTER TABLE inh_test INHERIT part_2; +ERROR: cannot inherit from a partition +ALTER TABLE part_2 INHERIT inh_test; +ERROR: cannot change inheritance of a partition +-- cannot drop or alter type of partition key columns of lower level +-- partitioned tables; for example, part_5, which is list_parted2's +-- partition, is partitioned on b; +ALTER TABLE list_parted2 DROP COLUMN b; +ERROR: cannot drop column named in partition key +ALTER TABLE list_parted2 ALTER COLUMN b TYPE text; +ERROR: cannot alter type of column named in partition key +-- cleanup +DROP TABLE list_parted, list_parted2, range_parted CASCADE; +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table part1 +drop cascades to table part2 +drop cascades to table part_2 +drop cascades to table part_5 +drop cascades to table part_5_a +drop cascades to table part_1 diff --git a/src/test/regress/expected/create_table.out b/src/test/regress/expected/create_table.out index 41ceb874e8..b40a18aec2 100644 --- a/src/test/regress/expected/create_table.out +++ b/src/test/regress/expected/create_table.out @@ -253,3 +253,416 @@ DROP TABLE as_select1; -- check that the oid column is added before the primary key is checked CREATE TABLE oid_pk (f1 INT, PRIMARY KEY(oid)) WITH OIDS; DROP TABLE oid_pk; +-- +-- Partitioned tables +-- +-- cannot combine INHERITS and PARTITION BY (although grammar allows) +CREATE TABLE partitioned ( + a int +) INHERITS (some_table) PARTITION BY LIST (a); +ERROR: cannot create partitioned table as inheritance child +-- cannot use more than 1 column as partition key for list partitioned table +CREATE TABLE partitioned ( + a1 int, + a2 int +) PARTITION BY LIST (a1, a2); -- fail +ERROR: cannot list partition using more than one column +-- unsupported constraint type for partitioned tables +CREATE TABLE partitioned ( + a int PRIMARY KEY +) PARTITION BY RANGE (a); +ERROR: primary key constraints are not supported on partitioned tables +LINE 2: a int PRIMARY KEY + ^ +CREATE TABLE pkrel ( + a int PRIMARY KEY +); +CREATE TABLE partitioned ( + a int REFERENCES pkrel(a) +) PARTITION BY RANGE (a); +ERROR: foreign key constraints are not supported on partitioned tables +LINE 2: a int REFERENCES pkrel(a) + ^ +DROP TABLE pkrel; +CREATE TABLE partitioned ( + a int UNIQUE +) PARTITION BY RANGE (a); +ERROR: unique constraints are not supported on partitioned tables +LINE 2: a int UNIQUE + ^ +CREATE TABLE partitioned ( + a int, + EXCLUDE USING gist (a WITH &&) +) PARTITION BY RANGE (a); +ERROR: exclusion constraints are not supported on partitioned tables +LINE 3: EXCLUDE USING gist (a WITH &&) + ^ +-- prevent column from being used twice in the partition key +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (a, a); +ERROR: column "a" appears more than once in partition key +-- prevent using prohibited expressions in the key +CREATE FUNCTION retset (a int) RETURNS SETOF int AS $$ SELECT 1; $$ LANGUAGE SQL IMMUTABLE; +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (retset(a)); +ERROR: set-returning functions are not allowed in partition key expression +DROP FUNCTION retset(int); +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE ((avg(a))); +ERROR: aggregate functions are not allowed in partition key expression +CREATE TABLE partitioned ( + a int, + b int +) PARTITION BY RANGE ((avg(a) OVER (PARTITION BY b))); +ERROR: window functions are not allowed in partition key expression +CREATE TABLE partitioned ( + a int +) PARTITION BY LIST ((a LIKE (SELECT 1))); +ERROR: cannot use subquery in partition key expression +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (('a')); +ERROR: cannot use constant expression as partition key +CREATE FUNCTION const_func () RETURNS int AS $$ SELECT 1; $$ LANGUAGE SQL IMMUTABLE; +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (const_func()); +ERROR: cannot use constant expression as partition key +DROP FUNCTION const_func(); +-- only accept "list" and "range" as partitioning strategy +CREATE TABLE partitioned ( + a int +) PARTITION BY HASH (a); +ERROR: unrecognized partitioning strategy "hash" +-- specified column must be present in the table +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (b); +ERROR: column "b" named in partition key does not exist +-- cannot use system columns in partition key +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (xmin); +ERROR: cannot use system column "xmin" in partition key +-- functions in key must be immutable +CREATE FUNCTION immut_func (a int) RETURNS int AS $$ SELECT a + random()::int; $$ LANGUAGE SQL; +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (immut_func(a)); +ERROR: functions in partition key expression must be marked IMMUTABLE +DROP FUNCTION immut_func(int); +-- cannot contain whole-row references +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE ((partitioned)); +ERROR: partition key expressions cannot contain whole-row references +-- prevent using columns of unsupported types in key (type must have a btree operator class) +CREATE TABLE partitioned ( + a point +) PARTITION BY LIST (a); +ERROR: data type point has no default btree operator class +HINT: You must specify a btree operator class or define a default btree operator class for the data type. +CREATE TABLE partitioned ( + a point +) PARTITION BY LIST (a point_ops); +ERROR: operator class "point_ops" does not exist for access method "btree" +CREATE TABLE partitioned ( + a point +) PARTITION BY RANGE (a); +ERROR: data type point has no default btree operator class +HINT: You must specify a btree operator class or define a default btree operator class for the data type. +CREATE TABLE partitioned ( + a point +) PARTITION BY RANGE (a point_ops); +ERROR: operator class "point_ops" does not exist for access method "btree" +-- cannot add NO INHERIT constraints to partitioned tables +CREATE TABLE partitioned ( + a int, + CONSTRAINT check_a CHECK (a > 0) NO INHERIT +) PARTITION BY RANGE (a); +ERROR: cannot add NO INHERIT constraint to partitioned table "partitioned" +-- some checks after successful creation of a partitioned table +CREATE FUNCTION plusone(a int) RETURNS INT AS $$ SELECT a+1; $$ LANGUAGE SQL; +CREATE TABLE partitioned ( + a int, + b int, + c text, + d text +) PARTITION BY RANGE (a oid_ops, plusone(b), c collate "default", d collate "en_US"); +-- check relkind +SELECT relkind FROM pg_class WHERE relname = 'partitioned'; + relkind +--------- + P +(1 row) + +-- check that range partition key columns are marked NOT NULL +SELECT attname, attnotnull FROM pg_attribute WHERE attrelid = 'partitioned'::regclass AND attnum > 0; + attname | attnotnull +---------+------------ + a | t + b | f + c | t + d | t +(4 rows) + +-- prevent a function referenced in partition key from being dropped +DROP FUNCTION plusone(int); +ERROR: cannot drop function plusone(integer) because other objects depend on it +DETAIL: table partitioned depends on function plusone(integer) +HINT: Use DROP ... CASCADE to drop the dependent objects too. +-- partitioned table cannot partiticipate in regular inheritance +CREATE TABLE partitioned2 ( + a int +) PARTITION BY LIST ((a+1)); +CREATE TABLE fail () INHERITS (partitioned2); +ERROR: cannot inherit from partitioned table "partitioned2" +-- Partition key in describe output +\d partitioned + Table "public.partitioned" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | not null | + b | integer | | | + c | text | | not null | + d | text | | not null | +Partition key: RANGE (a oid_ops, plusone(b), c, d COLLATE "en_US") + +\d partitioned2 + Table "public.partitioned2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | +Partition key: LIST ((a + 1)) + +DROP TABLE partitioned, partitioned2; +-- +-- Partitions +-- +-- check partition bound syntax +CREATE TABLE list_parted ( + a int +) PARTITION BY LIST (a); +-- syntax allows only string literal, numeric literal and null to be +-- specified for a partition bound value +CREATE TABLE part_1 PARTITION OF list_parted FOR VALUES IN ('1'); +CREATE TABLE part_2 PARTITION OF list_parted FOR VALUES IN (2); +CREATE TABLE part_null PARTITION OF list_parted FOR VALUES IN (null); +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN (int '1'); +ERROR: syntax error at or near "int" +LINE 1: ... fail_part PARTITION OF list_parted FOR VALUES IN (int '1'); + ^ +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN ('1'::int); +ERROR: syntax error at or near "::" +LINE 1: ...fail_part PARTITION OF list_parted FOR VALUES IN ('1'::int); + ^ +-- syntax does not allow empty list of values for list partitions +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN (); +ERROR: syntax error at or near ")" +LINE 1: ...E TABLE fail_part PARTITION OF list_parted FOR VALUES IN (); + ^ +-- trying to specify range for list partitioned table +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES FROM (1) TO (2); +ERROR: invalid bound specification for a list partition +LINE 1: ...BLE fail_part PARTITION OF list_parted FOR VALUES FROM (1) T... + ^ +-- specified literal can't be cast to the partition column data type +CREATE TABLE bools ( + a bool +) PARTITION BY LIST (a); +CREATE TABLE bools_true PARTITION OF bools FOR VALUES IN (1); +ERROR: specified value cannot be cast to type "boolean" of column "a" +LINE 1: ...REATE TABLE bools_true PARTITION OF bools FOR VALUES IN (1); + ^ +DROP TABLE bools; +CREATE TABLE range_parted ( + a date +) PARTITION BY RANGE (a); +-- trying to specify list for range partitioned table +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES IN ('a'); +ERROR: invalid bound specification for a range partition +LINE 1: ...BLE fail_part PARTITION OF range_parted FOR VALUES IN ('a'); + ^ +-- each of start and end bounds must have same number of values as the +-- length of the partition key +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('z'); +ERROR: FROM must specify exactly one value per partitioning column +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a') TO ('z', 1); +ERROR: TO must specify exactly one value per partitioning column +-- cannot specify null values in range bounds +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM (null) TO (unbounded); +ERROR: cannot specify NULL in range bound +-- check if compatible with the specified parent +-- cannot create as partition of a non-partitioned table +CREATE TABLE unparted ( + a int +); +CREATE TABLE fail_part PARTITION OF unparted FOR VALUES IN ('a'); +ERROR: "unparted" is not partitioned +DROP TABLE unparted; +-- cannot create a permanent rel as partition of a temp rel +CREATE TEMP TABLE temp_parted ( + a int +) PARTITION BY LIST (a); +CREATE TABLE fail_part PARTITION OF temp_parted FOR VALUES IN ('a'); +ERROR: cannot create a permanent relation as partition of temporary relation "temp_parted" +DROP TABLE temp_parted; +-- cannot create a table with oids as partition of table without oids +CREATE TABLE no_oids_parted ( + a int +) PARTITION BY RANGE (a) WITHOUT OIDS; +CREATE TABLE fail_part PARTITION OF no_oids_parted FOR VALUES FROM (1) TO (10 )WITH OIDS; +ERROR: cannot create table with OIDs as partition of table without OIDs +DROP TABLE no_oids_parted; +-- likewise, the reverse if also true +CREATE TABLE oids_parted ( + a int +) PARTITION BY RANGE (a) WITH OIDS; +CREATE TABLE fail_part PARTITION OF oids_parted FOR VALUES FROM (1) TO (10 ) WITHOUT OIDS; +ERROR: cannot create table without OIDs as partition of table with OIDs +DROP TABLE oids_parted; +-- check for partition bound overlap and other invalid specifications +CREATE TABLE list_parted2 ( + a varchar +) PARTITION BY LIST (a); +CREATE TABLE part_null_z PARTITION OF list_parted2 FOR VALUES IN (null, 'z'); +CREATE TABLE part_ab PARTITION OF list_parted2 FOR VALUES IN ('a', 'b'); +CREATE TABLE fail_part PARTITION OF list_parted2 FOR VALUES IN (null); +ERROR: partition "fail_part" would overlap partition "part_null_z" +CREATE TABLE fail_part PARTITION OF list_parted2 FOR VALUES IN ('b', 'c'); +ERROR: partition "fail_part" would overlap partition "part_ab" +CREATE TABLE range_parted2 ( + a int +) PARTITION BY RANGE (a); +-- trying to create range partition with empty range +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (0); +ERROR: cannot create range partition with empty range +-- note that the range '[1, 1)' has no elements +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (1); +ERROR: cannot create range partition with empty range +CREATE TABLE part0 PARTITION OF range_parted2 FOR VALUES FROM (unbounded) TO (1); +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (unbounded) TO (2); +ERROR: partition "fail_part" would overlap partition "part0" +CREATE TABLE part1 PARTITION OF range_parted2 FOR VALUES FROM (1) TO (10); +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (9) TO (unbounded); +ERROR: partition "fail_part" would overlap partition "part1" +-- now check for multi-column range partition key +CREATE TABLE range_parted3 ( + a int, + b int +) PARTITION BY RANGE (a, (b+1)); +CREATE TABLE part00 PARTITION OF range_parted3 FOR VALUES FROM (0, unbounded) TO (0, unbounded); +CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (0, unbounded) TO (0, 1); +ERROR: partition "fail_part" would overlap partition "part00" +CREATE TABLE part10 PARTITION OF range_parted3 FOR VALUES FROM (1, unbounded) TO (1, 1); +CREATE TABLE part11 PARTITION OF range_parted3 FOR VALUES FROM (1, 1) TO (1, 10); +CREATE TABLE part12 PARTITION OF range_parted3 FOR VALUES FROM (1, 10) TO (1, unbounded); +CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, 10) TO (1, 20); +ERROR: partition "fail_part" would overlap partition "part12" +-- cannot create a partition that says column b is allowed to range +-- from -infinity to +infinity, while there exist partitions that have +-- more specific ranges +CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, unbounded) TO (1, unbounded); +ERROR: partition "fail_part" would overlap partition "part10" +-- check schema propagation from parent +CREATE TABLE parted ( + a text, + b int NOT NULL DEFAULT 0, + CONSTRAINT check_a CHECK (length(a) > 0) +) PARTITION BY LIST (a); +CREATE TABLE part_a PARTITION OF parted FOR VALUES IN ('a'); +-- only inherited attributes (never local ones) +SELECT attname, attislocal, attinhcount FROM pg_attribute WHERE attrelid = 'part_a'::regclass and attnum > 0; + attname | attislocal | attinhcount +---------+------------+------------- + a | f | 1 + b | f | 1 +(2 rows) + +-- able to specify column default, column constraint, and table constraint +CREATE TABLE part_b PARTITION OF parted ( + b NOT NULL DEFAULT 1 CHECK (b >= 0), + CONSTRAINT check_a CHECK (length(a) > 0) +) FOR VALUES IN ('b'); +NOTICE: merging constraint "check_a" with inherited definition +-- conislocal should be false for any merged constraints +SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_b'::regclass AND conname = 'check_a'; + conislocal | coninhcount +------------+------------- + f | 1 +(1 row) + +-- specify PARTITION BY for a partition +CREATE TABLE fail_part_col_not_found PARTITION OF parted FOR VALUES IN ('c') PARTITION BY RANGE (c); +ERROR: column "c" named in partition key does not exist +CREATE TABLE part_c PARTITION OF parted FOR VALUES IN ('c') PARTITION BY RANGE ((b)); +-- create a level-2 partition +CREATE TABLE part_c_1_10 PARTITION OF part_c FOR VALUES FROM (1) TO (10); +-- Partition bound in describe output +\d part_b + Table "public.part_b" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | text | | | + b | integer | | not null | 1 +Partition of: parted FOR VALUES IN ('b') +Check constraints: + "check_a" CHECK (length(a) > 0) + "part_b_b_check" CHECK (b >= 0) + +-- Both partition bound and partition key in describe output +\d part_c + Table "public.part_c" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | text | | | + b | integer | | not null | 0 +Partition of: parted FOR VALUES IN ('c') +Partition key: RANGE (b) +Check constraints: + "check_a" CHECK (length(a) > 0) +Number of partitions: 1 (Use \d+ to list them.) + +-- Show partition count in the parent's describe output +-- Tempted to include \d+ output listing partitions with bound info but +-- output could vary depending on the order in which partition oids are +-- returned. +\d parted + Table "public.parted" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | text | | | + b | integer | | not null | 0 +Partition key: LIST (a) +Check constraints: + "check_a" CHECK (length(a) > 0) +Number of partitions: 3 (Use \d+ to list them.) + +-- partitions cannot be dropped directly +DROP TABLE part_a; +-- need to specify CASCADE to drop partitions along with the parent +DROP TABLE parted; +ERROR: cannot drop table parted because other objects depend on it +DETAIL: table part_b depends on table parted +table part_c depends on table parted +table part_c_1_10 depends on table part_c +HINT: Use DROP ... CASCADE to drop the dependent objects too. +DROP TABLE parted, list_parted, range_parted, list_parted2, range_parted2, range_parted3 CASCADE; +NOTICE: drop cascades to 14 other objects +DETAIL: drop cascades to table part00 +drop cascades to table part10 +drop cascades to table part11 +drop cascades to table part12 +drop cascades to table part0 +drop cascades to table part1 +drop cascades to table part_null_z +drop cascades to table part_ab +drop cascades to table part_1 +drop cascades to table part_2 +drop cascades to table part_null +drop cascades to table part_b +drop cascades to table part_c +drop cascades to table part_c_1_10 diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index b331828e5d..38ea8e86f3 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -1542,3 +1542,275 @@ FROM generate_series(1, 3) g(i); reset enable_seqscan; reset enable_indexscan; reset enable_bitmapscan; +-- +-- Check that constraint exclusion works correctly with partitions using +-- implicit constraints generated from the partition bound information. +-- +create table list_parted ( + a varchar +) partition by list (a); +create table part_ab_cd partition of list_parted for values in ('ab', 'cd'); +create table part_ef_gh partition of list_parted for values in ('ef', 'gh'); +create table part_null_xy partition of list_parted for values in (null, 'xy'); +explain (costs off) select * from list_parted; + QUERY PLAN +-------------------------------- + Append + -> Seq Scan on list_parted + -> Seq Scan on part_ab_cd + -> Seq Scan on part_ef_gh + -> Seq Scan on part_null_xy +(5 rows) + +explain (costs off) select * from list_parted where a is null; + QUERY PLAN +-------------------------------- + Append + -> Seq Scan on list_parted + Filter: (a IS NULL) + -> Seq Scan on part_null_xy + Filter: (a IS NULL) +(5 rows) + +explain (costs off) select * from list_parted where a is not null; + QUERY PLAN +--------------------------------- + Append + -> Seq Scan on list_parted + Filter: (a IS NOT NULL) + -> Seq Scan on part_ab_cd + Filter: (a IS NOT NULL) + -> Seq Scan on part_ef_gh + Filter: (a IS NOT NULL) + -> Seq Scan on part_null_xy + Filter: (a IS NOT NULL) +(9 rows) + +explain (costs off) select * from list_parted where a in ('ab', 'cd', 'ef'); + QUERY PLAN +---------------------------------------------------------- + Append + -> Seq Scan on list_parted + Filter: ((a)::text = ANY ('{ab,cd,ef}'::text[])) + -> Seq Scan on part_ab_cd + Filter: ((a)::text = ANY ('{ab,cd,ef}'::text[])) + -> Seq Scan on part_ef_gh + Filter: ((a)::text = ANY ('{ab,cd,ef}'::text[])) +(7 rows) + +explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd'); + QUERY PLAN +--------------------------------------------------------------------------------------- + Append + -> Seq Scan on list_parted + Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) + -> Seq Scan on part_ab_cd + Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) + -> Seq Scan on part_ef_gh + Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) + -> Seq Scan on part_null_xy + Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) +(9 rows) + +explain (costs off) select * from list_parted where a = 'ab'; + QUERY PLAN +------------------------------------------ + Append + -> Seq Scan on list_parted + Filter: ((a)::text = 'ab'::text) + -> Seq Scan on part_ab_cd + Filter: ((a)::text = 'ab'::text) +(5 rows) + +create table range_list_parted ( + a int, + b char(2) +) partition by range (a); +create table part_1_10 partition of range_list_parted for values from (1) to (10) partition by list (b); +create table part_1_10_ab partition of part_1_10 for values in ('ab'); +create table part_1_10_cd partition of part_1_10 for values in ('cd'); +create table part_10_20 partition of range_list_parted for values from (10) to (20) partition by list (b); +create table part_10_20_ab partition of part_10_20 for values in ('ab'); +create table part_10_20_cd partition of part_10_20 for values in ('cd'); +create table part_21_30 partition of range_list_parted for values from (21) to (30) partition by list (b); +create table part_21_30_ab partition of part_21_30 for values in ('ab'); +create table part_21_30_cd partition of part_21_30 for values in ('cd'); +create table part_40_inf partition of range_list_parted for values from (40) to (unbounded) partition by list (b); +create table part_40_inf_ab partition of part_40_inf for values in ('ab'); +create table part_40_inf_cd partition of part_40_inf for values in ('cd'); +create table part_40_inf_null partition of part_40_inf for values in (null); +explain (costs off) select * from range_list_parted; + QUERY PLAN +------------------------------------- + Append + -> Seq Scan on range_list_parted + -> Seq Scan on part_1_10 + -> Seq Scan on part_10_20 + -> Seq Scan on part_21_30 + -> Seq Scan on part_40_inf + -> Seq Scan on part_1_10_ab + -> Seq Scan on part_1_10_cd + -> Seq Scan on part_10_20_ab + -> Seq Scan on part_10_20_cd + -> Seq Scan on part_21_30_ab + -> Seq Scan on part_21_30_cd + -> Seq Scan on part_40_inf_ab + -> Seq Scan on part_40_inf_cd + -> Seq Scan on part_40_inf_null +(15 rows) + +explain (costs off) select * from range_list_parted where a = 5; + QUERY PLAN +------------------------------------- + Append + -> Seq Scan on range_list_parted + Filter: (a = 5) + -> Seq Scan on part_1_10 + Filter: (a = 5) + -> Seq Scan on part_1_10_ab + Filter: (a = 5) + -> Seq Scan on part_1_10_cd + Filter: (a = 5) +(9 rows) + +explain (costs off) select * from range_list_parted where b = 'ab'; + QUERY PLAN +------------------------------------- + Append + -> Seq Scan on range_list_parted + Filter: (b = 'ab'::bpchar) + -> Seq Scan on part_1_10 + Filter: (b = 'ab'::bpchar) + -> Seq Scan on part_10_20 + Filter: (b = 'ab'::bpchar) + -> Seq Scan on part_21_30 + Filter: (b = 'ab'::bpchar) + -> Seq Scan on part_40_inf + Filter: (b = 'ab'::bpchar) + -> Seq Scan on part_1_10_ab + Filter: (b = 'ab'::bpchar) + -> Seq Scan on part_10_20_ab + Filter: (b = 'ab'::bpchar) + -> Seq Scan on part_21_30_ab + Filter: (b = 'ab'::bpchar) + -> Seq Scan on part_40_inf_ab + Filter: (b = 'ab'::bpchar) +(19 rows) + +explain (costs off) select * from range_list_parted where a between 3 and 23 and b in ('ab'); + QUERY PLAN +----------------------------------------------------------------- + Append + -> Seq Scan on range_list_parted + Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar)) + -> Seq Scan on part_1_10 + Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar)) + -> Seq Scan on part_10_20 + Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar)) + -> Seq Scan on part_21_30 + Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar)) + -> Seq Scan on part_1_10_ab + Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar)) + -> Seq Scan on part_10_20_ab + Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar)) + -> Seq Scan on part_21_30_ab + Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar)) +(15 rows) + +/* Should select no rows because range partition key cannot be null */ +explain (costs off) select * from range_list_parted where a is null; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +/* Should only select rows from the null-accepting partition */ +explain (costs off) select * from range_list_parted where b is null; + QUERY PLAN +------------------------------------- + Append + -> Seq Scan on range_list_parted + Filter: (b IS NULL) + -> Seq Scan on part_1_10 + Filter: (b IS NULL) + -> Seq Scan on part_10_20 + Filter: (b IS NULL) + -> Seq Scan on part_21_30 + Filter: (b IS NULL) + -> Seq Scan on part_40_inf + Filter: (b IS NULL) + -> Seq Scan on part_40_inf_null + Filter: (b IS NULL) +(13 rows) + +explain (costs off) select * from range_list_parted where a is not null and a < 67; + QUERY PLAN +------------------------------------------------ + Append + -> Seq Scan on range_list_parted + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_1_10 + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_10_20 + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_21_30 + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_40_inf + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_1_10_ab + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_1_10_cd + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_10_20_ab + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_10_20_cd + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_21_30_ab + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_21_30_cd + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_40_inf_ab + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_40_inf_cd + Filter: ((a IS NOT NULL) AND (a < 67)) + -> Seq Scan on part_40_inf_null + Filter: ((a IS NOT NULL) AND (a < 67)) +(29 rows) + +explain (costs off) select * from range_list_parted where a >= 30; + QUERY PLAN +------------------------------------- + Append + -> Seq Scan on range_list_parted + Filter: (a >= 30) + -> Seq Scan on part_40_inf + Filter: (a >= 30) + -> Seq Scan on part_40_inf_ab + Filter: (a >= 30) + -> Seq Scan on part_40_inf_cd + Filter: (a >= 30) + -> Seq Scan on part_40_inf_null + Filter: (a >= 30) +(11 rows) + +drop table list_parted cascade; +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table part_ab_cd +drop cascades to table part_ef_gh +drop cascades to table part_null_xy +drop table range_list_parted cascade; +NOTICE: drop cascades to 13 other objects +DETAIL: drop cascades to table part_1_10 +drop cascades to table part_1_10_ab +drop cascades to table part_1_10_cd +drop cascades to table part_10_20 +drop cascades to table part_10_20_ab +drop cascades to table part_10_20_cd +drop cascades to table part_21_30 +drop cascades to table part_21_30_ab +drop cascades to table part_21_30_cd +drop cascades to table part_40_inf +drop cascades to table part_40_inf_ab +drop cascades to table part_40_inf_cd +drop cascades to table part_40_inf_null diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out index 03619d71c3..561cefa3c4 100644 --- a/src/test/regress/expected/insert.out +++ b/src/test/regress/expected/insert.out @@ -160,3 +160,143 @@ Rules: drop table inserttest2; drop table inserttest; drop type insert_test_type; +-- direct partition inserts should check partition bound constraint +create table range_parted ( + a text, + b int +) partition by range (a, (b+0)); +create table part1 partition of range_parted for values from ('a', 1) to ('a', 10); +create table part2 partition of range_parted for values from ('a', 10) to ('a', 20); +create table part3 partition of range_parted for values from ('b', 1) to ('b', 10); +create table part4 partition of range_parted for values from ('b', 10) to ('b', 20); +-- fail +insert into part1 values ('a', 11); +ERROR: new row for relation "part1" violates partition constraint +DETAIL: Failing row contains (a, 11). +insert into part1 values ('b', 1); +ERROR: new row for relation "part1" violates partition constraint +DETAIL: Failing row contains (b, 1). +-- ok +insert into part1 values ('a', 1); +-- fail +insert into part4 values ('b', 21); +ERROR: new row for relation "part4" violates partition constraint +DETAIL: Failing row contains (b, 21). +insert into part4 values ('a', 10); +ERROR: new row for relation "part4" violates partition constraint +DETAIL: Failing row contains (a, 10). +-- ok +insert into part4 values ('b', 10); +-- fail (partition key a has a NOT NULL constraint) +insert into part1 values (null); +ERROR: null value in column "a" violates not-null constraint +DETAIL: Failing row contains (null, null). +-- fail (expression key (b+0) cannot be null either) +insert into part1 values (1); +ERROR: new row for relation "part1" violates partition constraint +DETAIL: Failing row contains (1, null). +create table list_parted ( + a text, + b int +) partition by list (lower(a)); +create table part_aa_bb partition of list_parted FOR VALUES IN ('aa', 'bb'); +create table part_cc_dd partition of list_parted FOR VALUES IN ('cc', 'dd'); +create table part_null partition of list_parted FOR VALUES IN (null); +-- fail +insert into part_aa_bb values ('cc', 1); +ERROR: new row for relation "part_aa_bb" violates partition constraint +DETAIL: Failing row contains (cc, 1). +insert into part_aa_bb values ('AAa', 1); +ERROR: new row for relation "part_aa_bb" violates partition constraint +DETAIL: Failing row contains (AAa, 1). +insert into part_aa_bb values (null); +ERROR: new row for relation "part_aa_bb" violates partition constraint +DETAIL: Failing row contains (null, null). +-- ok +insert into part_cc_dd values ('cC', 1); +insert into part_null values (null, 0); +-- check in case of multi-level partitioned table +create table part_ee_ff partition of list_parted for values in ('ee', 'ff') partition by range (b); +create table part_ee_ff1 partition of part_ee_ff for values from (1) to (10); +create table part_ee_ff2 partition of part_ee_ff for values from (10) to (20); +-- fail +insert into part_ee_ff1 values ('EE', 11); +ERROR: new row for relation "part_ee_ff1" violates partition constraint +DETAIL: Failing row contains (EE, 11). +-- fail (even the parent's, ie, part_ee_ff's partition constraint applies) +insert into part_ee_ff1 values ('cc', 1); +ERROR: new row for relation "part_ee_ff1" violates partition constraint +DETAIL: Failing row contains (cc, 1). +-- ok +insert into part_ee_ff1 values ('ff', 1); +insert into part_ee_ff2 values ('ff', 11); +-- Check tuple routing for partitioned tables +-- fail +insert into range_parted values ('a', 0); +ERROR: no partition of relation "range_parted" found for row +DETAIL: Failing row contains (a, 0). +-- ok +insert into range_parted values ('a', 1); +insert into range_parted values ('a', 10); +-- fail +insert into range_parted values ('a', 20); +ERROR: no partition of relation "range_parted" found for row +DETAIL: Failing row contains (a, 20). +-- ok +insert into range_parted values ('b', 1); +insert into range_parted values ('b', 10); +-- fail (partition key (b+0) is null) +insert into range_parted values ('a'); +ERROR: range partition key of row contains null +select tableoid::regclass, * from range_parted; + tableoid | a | b +----------+---+---- + part1 | a | 1 + part1 | a | 1 + part2 | a | 10 + part3 | b | 1 + part4 | b | 10 + part4 | b | 10 +(6 rows) + +-- ok +insert into list_parted values (null, 1); +insert into list_parted (a) values ('aA'); +-- fail (partition of part_ee_ff not found in both cases) +insert into list_parted values ('EE', 0); +ERROR: no partition of relation "part_ee_ff" found for row +DETAIL: Failing row contains (EE, 0). +insert into part_ee_ff values ('EE', 0); +ERROR: no partition of relation "part_ee_ff" found for row +DETAIL: Failing row contains (EE, 0). +-- ok +insert into list_parted values ('EE', 1); +insert into part_ee_ff values ('EE', 10); +select tableoid::regclass, * from list_parted; + tableoid | a | b +-------------+----+---- + part_aa_bb | aA | + part_cc_dd | cC | 1 + part_null | | 0 + part_null | | 1 + part_ee_ff1 | ff | 1 + part_ee_ff1 | EE | 1 + part_ee_ff2 | ff | 11 + part_ee_ff2 | EE | 10 +(8 rows) + +-- cleanup +drop table range_parted cascade; +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table part1 +drop cascades to table part2 +drop cascades to table part3 +drop cascades to table part4 +drop table list_parted cascade; +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table part_aa_bb +drop cascades to table part_cc_dd +drop cascades to table part_null +drop cascades to table part_ee_ff +drop cascades to table part_ee_ff1 +drop cascades to table part_ee_ff2 diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index b1ebcf60d2..8fa929a6aa 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -120,6 +120,7 @@ pg_namespace|t pg_opclass|t pg_operator|t pg_opfamily|t +pg_partitioned_table|t pg_pltemplate|t pg_policy|t pg_proc|t diff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out index 609899e1f7..a1e9255450 100644 --- a/src/test/regress/expected/update.out +++ b/src/test/regress/expected/update.out @@ -198,3 +198,30 @@ INSERT INTO upsert_test VALUES (1, 'Bat') ON CONFLICT(a) DROP TABLE update_test; DROP TABLE upsert_test; +-- update to a partition should check partition bound constraint for the new tuple +create table range_parted ( + a text, + b int +) partition by range (a, b); +create table part_a_1_a_10 partition of range_parted for values from ('a', 1) to ('a', 10); +create table part_a_10_a_20 partition of range_parted for values from ('a', 10) to ('a', 20); +create table part_b_1_b_10 partition of range_parted for values from ('b', 1) to ('b', 10); +create table part_b_10_b_20 partition of range_parted for values from ('b', 10) to ('b', 20); +insert into part_a_1_a_10 values ('a', 1); +insert into part_b_10_b_20 values ('b', 10); +-- fail +update part_a_1_a_10 set a = 'b' where a = 'a'; +ERROR: new row for relation "part_a_1_a_10" violates partition constraint +DETAIL: Failing row contains (b, 1). +update range_parted set b = b - 1 where b = 10; +ERROR: new row for relation "part_b_10_b_20" violates partition constraint +DETAIL: Failing row contains (b, 9). +-- ok +update range_parted set b = b + 1 where b = 10; +-- cleanup +drop table range_parted cascade; +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table part_a_1_a_10 +drop cascades to table part_a_10_a_20 +drop cascades to table part_b_1_b_10 +drop cascades to table part_b_10_b_20 diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index c8eed3ec64..c4ed69304f 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -1875,3 +1875,297 @@ ALTER TABLE test_add_column ADD COLUMN c4 integer; \d test_add_column DROP TABLE test_add_column; + +-- unsupported constraint types for partitioned tables +CREATE TABLE partitioned ( + a int, + b int +) PARTITION BY RANGE (a, (a+b+1)); +ALTER TABLE partitioned ADD UNIQUE (a); +ALTER TABLE partitioned ADD PRIMARY KEY (a); +ALTER TABLE partitioned ADD FOREIGN KEY (a) REFERENCES blah; +ALTER TABLE partitioned ADD EXCLUDE USING gist (a WITH &&); + +-- cannot drop column that is part of the partition key +ALTER TABLE partitioned DROP COLUMN a; +ALTER TABLE partitioned ALTER COLUMN a TYPE char(5); +ALTER TABLE partitioned DROP COLUMN b; +ALTER TABLE partitioned ALTER COLUMN b TYPE char(5); + +-- cannot drop NOT NULL on columns in the range partition key +ALTER TABLE partitioned ALTER COLUMN a DROP NOT NULL; + +-- partitioned table cannot partiticipate in regular inheritance +CREATE TABLE foo ( + a int, + b int +); +ALTER TABLE partitioned INHERIT foo; +ALTER TABLE foo INHERIT partitioned; + +-- cannot add NO INHERIT constraint to partitioned tables +ALTER TABLE partitioned ADD CONSTRAINT chk_a CHECK (a > 0) NO INHERIT; + +DROP TABLE partitioned, foo; + +-- +-- ATTACH PARTITION +-- + +-- check that target table is partitioned +CREATE TABLE unparted ( + a int +); +CREATE TABLE fail_part (like unparted); +ALTER TABLE unparted ATTACH PARTITION fail_part FOR VALUES IN ('a'); +DROP TABLE unparted, fail_part; + +-- check that partition bound is compatible +CREATE TABLE list_parted ( + a int NOT NULL, + b char(2) COLLATE "en_US", + CONSTRAINT check_a CHECK (a > 0) +) PARTITION BY LIST (a); +CREATE TABLE fail_part (LIKE list_parted); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) TO (10); +DROP TABLE fail_part; + +-- check that the table being attached exists +ALTER TABLE list_parted ATTACH PARTITION nonexistant FOR VALUES IN (1); + +-- check ownership of the source table +CREATE ROLE regress_test_me; +CREATE ROLE regress_test_not_me; +CREATE TABLE not_owned_by_me (LIKE list_parted); +ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me; +SET SESSION AUTHORIZATION regress_test_me; +CREATE TABLE owned_by_me ( + a int +) PARTITION BY LIST (a); +ALTER TABLE owned_by_me ATTACH PARTITION not_owned_by_me FOR VALUES IN (1); +RESET SESSION AUTHORIZATION; +DROP TABLE owned_by_me, not_owned_by_me; +DROP ROLE regress_test_not_me; +DROP ROLE regress_test_me; + +-- check that the table being attached is not part of regular inheritance +CREATE TABLE parent (LIKE list_parted); +CREATE TABLE child () INHERITS (parent); +ALTER TABLE list_parted ATTACH PARTITION child FOR VALUES IN (1); +ALTER TABLE list_parted ATTACH PARTITION parent FOR VALUES IN (1); +DROP TABLE parent CASCADE; + +-- check any TEMP-ness +CREATE TEMP TABLE temp_parted (a int) PARTITION BY LIST (a); +CREATE TABLE perm_part (a int); +ALTER TABLE temp_parted ATTACH PARTITION perm_part FOR VALUES IN (1); +DROP TABLE temp_parted, perm_part; + +-- check that the table being attached is not a typed table +CREATE TYPE mytype AS (a int); +CREATE TABLE fail_part OF mytype; +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +DROP TYPE mytype CASCADE; + +-- check existence (or non-existence) of oid column +ALTER TABLE list_parted SET WITH OIDS; +CREATE TABLE fail_part (a int); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); + +ALTER TABLE list_parted SET WITHOUT OIDS; +ALTER TABLE fail_part SET WITH OIDS; +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +DROP TABLE fail_part; + +-- check that the table being attached has only columns present in the parent +CREATE TABLE fail_part (like list_parted, c int); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +DROP TABLE fail_part; + +-- check that the table being attached has every column of the parent +CREATE TABLE fail_part (a int NOT NULL); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +DROP TABLE fail_part; + +-- check that columns match in type, collation and NOT NULL status +CREATE TABLE fail_part ( + b char(3), + a int NOT NULL +); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +ALTER TABLE fail_part ALTER b TYPE char (2) COLLATE "en_CA"; +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +DROP TABLE fail_part; + +-- check that the table being attached has all constraints of the parent +CREATE TABLE fail_part ( + b char(2) COLLATE "en_US", + a int NOT NULL +); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); + +-- check that the constraint matches in definition with parent's constraint +ALTER TABLE fail_part ADD CONSTRAINT check_a CHECK (a >= 0); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); +DROP TABLE fail_part; + +-- check the attributes and constraints after partition is attached +CREATE TABLE part_1 ( + a int NOT NULL, + b char(2) COLLATE "en_US", + CONSTRAINT check_a CHECK (a > 0) +); +ALTER TABLE list_parted ATTACH PARTITION part_1 FOR VALUES IN (1); +-- attislocal and conislocal are always false for merged attributes and constraints respectively. +SELECT attislocal, attinhcount FROM pg_attribute WHERE attrelid = 'part_1'::regclass AND attnum > 0; +SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_1'::regclass AND conname = 'check_a'; + +-- check that the new partition won't overlap with an existing partition +CREATE TABLE fail_part (LIKE part_1 INCLUDING CONSTRAINTS); +ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1); + +-- check validation when attaching list partitions +CREATE TABLE list_parted2 ( + a int, + b char +) PARTITION BY LIST (a); + +-- check that violating rows are correctly reported +CREATE TABLE part_2 (LIKE list_parted2); +INSERT INTO part_2 VALUES (3, 'a'); +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); + +-- should be ok after deleting the bad row +DELETE FROM part_2; +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); + +-- adding constraints that describe the desired partition constraint +-- (or more restrictive) will help skip the validation scan +CREATE TABLE part_3_4 ( + LIKE list_parted2, + CONSTRAINT check_a CHECK (a IN (3)) +); + +-- however, if a list partition does not accept nulls, there should be +-- an explicit NOT NULL constraint on the partition key column for the +-- validation scan to be skipped; +ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4); + +-- adding a NOT NULL constraint will cause the scan to be skipped +ALTER TABLE list_parted2 DETACH PARTITION part_3_4; +ALTER TABLE part_3_4 ALTER a SET NOT NULL; +ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4); + + +-- check validation when attaching range partitions +CREATE TABLE range_parted ( + a int, + b int +) PARTITION BY RANGE (a, b); + +-- check that violating rows are correctly reported +CREATE TABLE part1 ( + a int NOT NULL CHECK (a = 1), + b int NOT NULL CHECK (b >= 1 AND b <= 10) +); +INSERT INTO part1 VALUES (1, 10); +-- Remember the TO bound is exclusive +ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10); + +-- should be ok after deleting the bad row +DELETE FROM part1; +ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10); + +-- adding constraints that describe the desired partition constraint +-- (or more restrictive) will help skip the validation scan +CREATE TABLE part2 ( + a int NOT NULL CHECK (a = 1), + b int NOT NULL CHECK (b >= 10 AND b < 18) +); +ALTER TABLE range_parted ATTACH PARTITION part2 FOR VALUES FROM (1, 10) TO (1, 20); + +-- check that leaf partitions are scanned when attaching a partitioned +-- table +CREATE TABLE part_5 ( + LIKE list_parted2 +) PARTITION BY LIST (b); + +-- check that violating rows are correctly reported +CREATE TABLE part_5_a PARTITION OF part_5 FOR VALUES IN ('a'); +INSERT INTO part_5_a (a, b) VALUES (6, 'a'); +ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); + +-- delete the faulting row and also add a constraint to skip the scan +DELETE FROM part_5_a WHERE a NOT IN (3); +ALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IN (5)), ALTER a SET NOT NULL; +ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5); + + +-- check that the table being attached is not already a partition +ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2); + +-- check that circular inheritance is not allowed +ALTER TABLE part_5 ATTACH PARTITION list_parted2 FOR VALUES IN ('b'); +ALTER TABLE list_parted2 ATTACH PARTITION list_parted2 FOR VALUES IN (0); + +-- +-- DETACH PARTITION +-- + +-- check that the partition being detached exists at all +ALTER TABLE list_parted2 DETACH PARTITION part_4; + +-- check that the partition being detached is actually a partition of the parent +CREATE TABLE not_a_part (a int); +ALTER TABLE list_parted2 DETACH PARTITION not_a_part; +ALTER TABLE list_parted2 DETACH PARTITION part_1; + +-- check that, after being detached, attinhcount/coninhcount is dropped to 0 and +-- attislocal/conislocal is set to true +ALTER TABLE list_parted2 DETACH PARTITION part_3_4; +SELECT attinhcount, attislocal FROM pg_attribute WHERE attrelid = 'part_3_4'::regclass AND attnum > 0; +SELECT coninhcount, conislocal FROM pg_constraint WHERE conrelid = 'part_3_4'::regclass AND conname = 'check_a'; +DROP TABLE part_3_4; + +-- Check ALTER TABLE commands for partitioned tables and partitions + +-- cannot add/drop column to/from *only* the parent +ALTER TABLE ONLY list_parted2 ADD COLUMN c int; +ALTER TABLE ONLY list_parted2 DROP COLUMN b; + +-- cannot add a column to partition or drop an inherited one +ALTER TABLE part_2 ADD COLUMN c text; +ALTER TABLE part_2 DROP COLUMN b; + +-- Nor rename, alter type +ALTER TABLE part_2 RENAME COLUMN b to c; +ALTER TABLE part_2 ALTER COLUMN b TYPE text; + +-- cannot add NOT NULL or check constraints to *only* the parent (ie, non-inherited) +ALTER TABLE ONLY list_parted2 ALTER b SET NOT NULL; +ALTER TABLE ONLY list_parted2 add constraint check_b check (b <> 'zz'); +ALTER TABLE list_parted2 add constraint check_b check (b <> 'zz') NO INHERIT; + +-- cannot drop inherited NOT NULL or check constraints from partition +ALTER TABLE list_parted2 ALTER b SET NOT NULL, ADD CONSTRAINT check_a2 CHECK (a > 0); +ALTER TABLE part_2 ALTER b DROP NOT NULL; +ALTER TABLE part_2 DROP CONSTRAINT check_a2; + +-- cannot drop NOT NULL or check constraints from *only* the parent +ALTER TABLE ONLY list_parted2 ALTER a DROP NOT NULL; +ALTER TABLE ONLY list_parted2 DROP CONSTRAINT check_a2; + +-- check that a partition cannot participate in regular inheritance +CREATE TABLE inh_test () INHERITS (part_2); +CREATE TABLE inh_test (LIKE part_2); +ALTER TABLE inh_test INHERIT part_2; +ALTER TABLE part_2 INHERIT inh_test; + +-- cannot drop or alter type of partition key columns of lower level +-- partitioned tables; for example, part_5, which is list_parted2's +-- partition, is partitioned on b; +ALTER TABLE list_parted2 DROP COLUMN b; +ALTER TABLE list_parted2 ALTER COLUMN b TYPE text; + +-- cleanup +DROP TABLE list_parted, list_parted2, range_parted CASCADE; diff --git a/src/test/regress/sql/create_table.sql b/src/test/regress/sql/create_table.sql index 78bdc8bf5e..69848e3094 100644 --- a/src/test/regress/sql/create_table.sql +++ b/src/test/regress/sql/create_table.sql @@ -269,3 +269,318 @@ DROP TABLE as_select1; -- check that the oid column is added before the primary key is checked CREATE TABLE oid_pk (f1 INT, PRIMARY KEY(oid)) WITH OIDS; DROP TABLE oid_pk; + +-- +-- Partitioned tables +-- + +-- cannot combine INHERITS and PARTITION BY (although grammar allows) +CREATE TABLE partitioned ( + a int +) INHERITS (some_table) PARTITION BY LIST (a); + +-- cannot use more than 1 column as partition key for list partitioned table +CREATE TABLE partitioned ( + a1 int, + a2 int +) PARTITION BY LIST (a1, a2); -- fail + +-- unsupported constraint type for partitioned tables +CREATE TABLE partitioned ( + a int PRIMARY KEY +) PARTITION BY RANGE (a); + +CREATE TABLE pkrel ( + a int PRIMARY KEY +); +CREATE TABLE partitioned ( + a int REFERENCES pkrel(a) +) PARTITION BY RANGE (a); +DROP TABLE pkrel; + +CREATE TABLE partitioned ( + a int UNIQUE +) PARTITION BY RANGE (a); + +CREATE TABLE partitioned ( + a int, + EXCLUDE USING gist (a WITH &&) +) PARTITION BY RANGE (a); + +-- prevent column from being used twice in the partition key +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (a, a); + +-- prevent using prohibited expressions in the key +CREATE FUNCTION retset (a int) RETURNS SETOF int AS $$ SELECT 1; $$ LANGUAGE SQL IMMUTABLE; +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (retset(a)); +DROP FUNCTION retset(int); + +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE ((avg(a))); + +CREATE TABLE partitioned ( + a int, + b int +) PARTITION BY RANGE ((avg(a) OVER (PARTITION BY b))); + +CREATE TABLE partitioned ( + a int +) PARTITION BY LIST ((a LIKE (SELECT 1))); + +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (('a')); + +CREATE FUNCTION const_func () RETURNS int AS $$ SELECT 1; $$ LANGUAGE SQL IMMUTABLE; +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (const_func()); +DROP FUNCTION const_func(); + +-- only accept "list" and "range" as partitioning strategy +CREATE TABLE partitioned ( + a int +) PARTITION BY HASH (a); + +-- specified column must be present in the table +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (b); + +-- cannot use system columns in partition key +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (xmin); + +-- functions in key must be immutable +CREATE FUNCTION immut_func (a int) RETURNS int AS $$ SELECT a + random()::int; $$ LANGUAGE SQL; +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE (immut_func(a)); +DROP FUNCTION immut_func(int); + +-- cannot contain whole-row references +CREATE TABLE partitioned ( + a int +) PARTITION BY RANGE ((partitioned)); + +-- prevent using columns of unsupported types in key (type must have a btree operator class) +CREATE TABLE partitioned ( + a point +) PARTITION BY LIST (a); +CREATE TABLE partitioned ( + a point +) PARTITION BY LIST (a point_ops); +CREATE TABLE partitioned ( + a point +) PARTITION BY RANGE (a); +CREATE TABLE partitioned ( + a point +) PARTITION BY RANGE (a point_ops); + +-- cannot add NO INHERIT constraints to partitioned tables +CREATE TABLE partitioned ( + a int, + CONSTRAINT check_a CHECK (a > 0) NO INHERIT +) PARTITION BY RANGE (a); + +-- some checks after successful creation of a partitioned table +CREATE FUNCTION plusone(a int) RETURNS INT AS $$ SELECT a+1; $$ LANGUAGE SQL; + +CREATE TABLE partitioned ( + a int, + b int, + c text, + d text +) PARTITION BY RANGE (a oid_ops, plusone(b), c collate "default", d collate "en_US"); + +-- check relkind +SELECT relkind FROM pg_class WHERE relname = 'partitioned'; + +-- check that range partition key columns are marked NOT NULL +SELECT attname, attnotnull FROM pg_attribute WHERE attrelid = 'partitioned'::regclass AND attnum > 0; + +-- prevent a function referenced in partition key from being dropped +DROP FUNCTION plusone(int); + +-- partitioned table cannot partiticipate in regular inheritance +CREATE TABLE partitioned2 ( + a int +) PARTITION BY LIST ((a+1)); +CREATE TABLE fail () INHERITS (partitioned2); + +-- Partition key in describe output +\d partitioned +\d partitioned2 + +DROP TABLE partitioned, partitioned2; + +-- +-- Partitions +-- + +-- check partition bound syntax + +CREATE TABLE list_parted ( + a int +) PARTITION BY LIST (a); +-- syntax allows only string literal, numeric literal and null to be +-- specified for a partition bound value +CREATE TABLE part_1 PARTITION OF list_parted FOR VALUES IN ('1'); +CREATE TABLE part_2 PARTITION OF list_parted FOR VALUES IN (2); +CREATE TABLE part_null PARTITION OF list_parted FOR VALUES IN (null); +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN (int '1'); +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN ('1'::int); + +-- syntax does not allow empty list of values for list partitions +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN (); +-- trying to specify range for list partitioned table +CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES FROM (1) TO (2); + +-- specified literal can't be cast to the partition column data type +CREATE TABLE bools ( + a bool +) PARTITION BY LIST (a); +CREATE TABLE bools_true PARTITION OF bools FOR VALUES IN (1); +DROP TABLE bools; + +CREATE TABLE range_parted ( + a date +) PARTITION BY RANGE (a); + +-- trying to specify list for range partitioned table +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES IN ('a'); +-- each of start and end bounds must have same number of values as the +-- length of the partition key +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('z'); +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a') TO ('z', 1); + +-- cannot specify null values in range bounds +CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM (null) TO (unbounded); + +-- check if compatible with the specified parent + +-- cannot create as partition of a non-partitioned table +CREATE TABLE unparted ( + a int +); +CREATE TABLE fail_part PARTITION OF unparted FOR VALUES IN ('a'); +DROP TABLE unparted; + +-- cannot create a permanent rel as partition of a temp rel +CREATE TEMP TABLE temp_parted ( + a int +) PARTITION BY LIST (a); +CREATE TABLE fail_part PARTITION OF temp_parted FOR VALUES IN ('a'); +DROP TABLE temp_parted; + +-- cannot create a table with oids as partition of table without oids +CREATE TABLE no_oids_parted ( + a int +) PARTITION BY RANGE (a) WITHOUT OIDS; +CREATE TABLE fail_part PARTITION OF no_oids_parted FOR VALUES FROM (1) TO (10 )WITH OIDS; +DROP TABLE no_oids_parted; + +-- likewise, the reverse if also true +CREATE TABLE oids_parted ( + a int +) PARTITION BY RANGE (a) WITH OIDS; +CREATE TABLE fail_part PARTITION OF oids_parted FOR VALUES FROM (1) TO (10 ) WITHOUT OIDS; +DROP TABLE oids_parted; + +-- check for partition bound overlap and other invalid specifications + +CREATE TABLE list_parted2 ( + a varchar +) PARTITION BY LIST (a); +CREATE TABLE part_null_z PARTITION OF list_parted2 FOR VALUES IN (null, 'z'); +CREATE TABLE part_ab PARTITION OF list_parted2 FOR VALUES IN ('a', 'b'); + +CREATE TABLE fail_part PARTITION OF list_parted2 FOR VALUES IN (null); +CREATE TABLE fail_part PARTITION OF list_parted2 FOR VALUES IN ('b', 'c'); + +CREATE TABLE range_parted2 ( + a int +) PARTITION BY RANGE (a); + +-- trying to create range partition with empty range +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (0); +-- note that the range '[1, 1)' has no elements +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (1); + +CREATE TABLE part0 PARTITION OF range_parted2 FOR VALUES FROM (unbounded) TO (1); +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (unbounded) TO (2); +CREATE TABLE part1 PARTITION OF range_parted2 FOR VALUES FROM (1) TO (10); +CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (9) TO (unbounded); + +-- now check for multi-column range partition key +CREATE TABLE range_parted3 ( + a int, + b int +) PARTITION BY RANGE (a, (b+1)); + +CREATE TABLE part00 PARTITION OF range_parted3 FOR VALUES FROM (0, unbounded) TO (0, unbounded); +CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (0, unbounded) TO (0, 1); + +CREATE TABLE part10 PARTITION OF range_parted3 FOR VALUES FROM (1, unbounded) TO (1, 1); +CREATE TABLE part11 PARTITION OF range_parted3 FOR VALUES FROM (1, 1) TO (1, 10); +CREATE TABLE part12 PARTITION OF range_parted3 FOR VALUES FROM (1, 10) TO (1, unbounded); +CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, 10) TO (1, 20); + +-- cannot create a partition that says column b is allowed to range +-- from -infinity to +infinity, while there exist partitions that have +-- more specific ranges +CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, unbounded) TO (1, unbounded); + +-- check schema propagation from parent + +CREATE TABLE parted ( + a text, + b int NOT NULL DEFAULT 0, + CONSTRAINT check_a CHECK (length(a) > 0) +) PARTITION BY LIST (a); + +CREATE TABLE part_a PARTITION OF parted FOR VALUES IN ('a'); + +-- only inherited attributes (never local ones) +SELECT attname, attislocal, attinhcount FROM pg_attribute WHERE attrelid = 'part_a'::regclass and attnum > 0; + +-- able to specify column default, column constraint, and table constraint +CREATE TABLE part_b PARTITION OF parted ( + b NOT NULL DEFAULT 1 CHECK (b >= 0), + CONSTRAINT check_a CHECK (length(a) > 0) +) FOR VALUES IN ('b'); +-- conislocal should be false for any merged constraints +SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_b'::regclass AND conname = 'check_a'; + +-- specify PARTITION BY for a partition +CREATE TABLE fail_part_col_not_found PARTITION OF parted FOR VALUES IN ('c') PARTITION BY RANGE (c); +CREATE TABLE part_c PARTITION OF parted FOR VALUES IN ('c') PARTITION BY RANGE ((b)); + +-- create a level-2 partition +CREATE TABLE part_c_1_10 PARTITION OF part_c FOR VALUES FROM (1) TO (10); + +-- Partition bound in describe output +\d part_b + +-- Both partition bound and partition key in describe output +\d part_c + +-- Show partition count in the parent's describe output +-- Tempted to include \d+ output listing partitions with bound info but +-- output could vary depending on the order in which partition oids are +-- returned. +\d parted + +-- partitions cannot be dropped directly +DROP TABLE part_a; + +-- need to specify CASCADE to drop partitions along with the parent +DROP TABLE parted; + +DROP TABLE parted, list_parted, range_parted, list_parted2, range_parted2, range_parted3 CASCADE; diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql index f45aab1ac6..e22a14ebda 100644 --- a/src/test/regress/sql/inherit.sql +++ b/src/test/regress/sql/inherit.sql @@ -536,3 +536,55 @@ FROM generate_series(1, 3) g(i); reset enable_seqscan; reset enable_indexscan; reset enable_bitmapscan; + +-- +-- Check that constraint exclusion works correctly with partitions using +-- implicit constraints generated from the partition bound information. +-- +create table list_parted ( + a varchar +) partition by list (a); +create table part_ab_cd partition of list_parted for values in ('ab', 'cd'); +create table part_ef_gh partition of list_parted for values in ('ef', 'gh'); +create table part_null_xy partition of list_parted for values in (null, 'xy'); + +explain (costs off) select * from list_parted; +explain (costs off) select * from list_parted where a is null; +explain (costs off) select * from list_parted where a is not null; +explain (costs off) select * from list_parted where a in ('ab', 'cd', 'ef'); +explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd'); +explain (costs off) select * from list_parted where a = 'ab'; + +create table range_list_parted ( + a int, + b char(2) +) partition by range (a); +create table part_1_10 partition of range_list_parted for values from (1) to (10) partition by list (b); +create table part_1_10_ab partition of part_1_10 for values in ('ab'); +create table part_1_10_cd partition of part_1_10 for values in ('cd'); +create table part_10_20 partition of range_list_parted for values from (10) to (20) partition by list (b); +create table part_10_20_ab partition of part_10_20 for values in ('ab'); +create table part_10_20_cd partition of part_10_20 for values in ('cd'); +create table part_21_30 partition of range_list_parted for values from (21) to (30) partition by list (b); +create table part_21_30_ab partition of part_21_30 for values in ('ab'); +create table part_21_30_cd partition of part_21_30 for values in ('cd'); +create table part_40_inf partition of range_list_parted for values from (40) to (unbounded) partition by list (b); +create table part_40_inf_ab partition of part_40_inf for values in ('ab'); +create table part_40_inf_cd partition of part_40_inf for values in ('cd'); +create table part_40_inf_null partition of part_40_inf for values in (null); + +explain (costs off) select * from range_list_parted; +explain (costs off) select * from range_list_parted where a = 5; +explain (costs off) select * from range_list_parted where b = 'ab'; +explain (costs off) select * from range_list_parted where a between 3 and 23 and b in ('ab'); + +/* Should select no rows because range partition key cannot be null */ +explain (costs off) select * from range_list_parted where a is null; + +/* Should only select rows from the null-accepting partition */ +explain (costs off) select * from range_list_parted where b is null; +explain (costs off) select * from range_list_parted where a is not null and a < 67; +explain (costs off) select * from range_list_parted where a >= 30; + +drop table list_parted cascade; +drop table range_list_parted cascade; diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql index 7924d5d46d..846bb5897a 100644 --- a/src/test/regress/sql/insert.sql +++ b/src/test/regress/sql/insert.sql @@ -84,3 +84,89 @@ create rule irule3 as on insert to inserttest2 do also drop table inserttest2; drop table inserttest; drop type insert_test_type; + +-- direct partition inserts should check partition bound constraint +create table range_parted ( + a text, + b int +) partition by range (a, (b+0)); +create table part1 partition of range_parted for values from ('a', 1) to ('a', 10); +create table part2 partition of range_parted for values from ('a', 10) to ('a', 20); +create table part3 partition of range_parted for values from ('b', 1) to ('b', 10); +create table part4 partition of range_parted for values from ('b', 10) to ('b', 20); + +-- fail +insert into part1 values ('a', 11); +insert into part1 values ('b', 1); +-- ok +insert into part1 values ('a', 1); +-- fail +insert into part4 values ('b', 21); +insert into part4 values ('a', 10); +-- ok +insert into part4 values ('b', 10); + +-- fail (partition key a has a NOT NULL constraint) +insert into part1 values (null); +-- fail (expression key (b+0) cannot be null either) +insert into part1 values (1); + +create table list_parted ( + a text, + b int +) partition by list (lower(a)); +create table part_aa_bb partition of list_parted FOR VALUES IN ('aa', 'bb'); +create table part_cc_dd partition of list_parted FOR VALUES IN ('cc', 'dd'); +create table part_null partition of list_parted FOR VALUES IN (null); + +-- fail +insert into part_aa_bb values ('cc', 1); +insert into part_aa_bb values ('AAa', 1); +insert into part_aa_bb values (null); +-- ok +insert into part_cc_dd values ('cC', 1); +insert into part_null values (null, 0); + +-- check in case of multi-level partitioned table +create table part_ee_ff partition of list_parted for values in ('ee', 'ff') partition by range (b); +create table part_ee_ff1 partition of part_ee_ff for values from (1) to (10); +create table part_ee_ff2 partition of part_ee_ff for values from (10) to (20); + +-- fail +insert into part_ee_ff1 values ('EE', 11); +-- fail (even the parent's, ie, part_ee_ff's partition constraint applies) +insert into part_ee_ff1 values ('cc', 1); +-- ok +insert into part_ee_ff1 values ('ff', 1); +insert into part_ee_ff2 values ('ff', 11); + +-- Check tuple routing for partitioned tables + +-- fail +insert into range_parted values ('a', 0); +-- ok +insert into range_parted values ('a', 1); +insert into range_parted values ('a', 10); +-- fail +insert into range_parted values ('a', 20); +-- ok +insert into range_parted values ('b', 1); +insert into range_parted values ('b', 10); +-- fail (partition key (b+0) is null) +insert into range_parted values ('a'); +select tableoid::regclass, * from range_parted; + +-- ok +insert into list_parted values (null, 1); +insert into list_parted (a) values ('aA'); +-- fail (partition of part_ee_ff not found in both cases) +insert into list_parted values ('EE', 0); +insert into part_ee_ff values ('EE', 0); +-- ok +insert into list_parted values ('EE', 1); +insert into part_ee_ff values ('EE', 10); +select tableoid::regclass, * from list_parted; + +-- cleanup +drop table range_parted cascade; +drop table list_parted cascade; diff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql index ad58273b38..d7721ed376 100644 --- a/src/test/regress/sql/update.sql +++ b/src/test/regress/sql/update.sql @@ -106,3 +106,24 @@ INSERT INTO upsert_test VALUES (1, 'Bat') ON CONFLICT(a) DROP TABLE update_test; DROP TABLE upsert_test; + +-- update to a partition should check partition bound constraint for the new tuple +create table range_parted ( + a text, + b int +) partition by range (a, b); +create table part_a_1_a_10 partition of range_parted for values from ('a', 1) to ('a', 10); +create table part_a_10_a_20 partition of range_parted for values from ('a', 10) to ('a', 20); +create table part_b_1_b_10 partition of range_parted for values from ('b', 1) to ('b', 10); +create table part_b_10_b_20 partition of range_parted for values from ('b', 10) to ('b', 20); +insert into part_a_1_a_10 values ('a', 1); +insert into part_b_10_b_20 values ('b', 10); + +-- fail +update part_a_1_a_10 set a = 'b' where a = 'a'; +update range_parted set b = b - 1 where b = 10; +-- ok +update range_parted set b = b + 1 where b = 10; + +-- cleanup +drop table range_parted cascade; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index c680216e8e..c8cc8f7568 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1469,6 +1469,11 @@ ParsedText ParsedWord ParserSetupHook ParserState +PartitionBoundInfoData +PartitionBoundSpec +PartitionCmd +PartitionListValue +PartitionRangeBound Path PathClauseUsage PathCostComparison @@ -1660,6 +1665,7 @@ RWConflictPoolHeader Range RangeBound RangeBox +RangeDatumContent RangeFunction RangeIOData RangeQueryClause -- 2.40.0