From fd68553860c5e7ece511cdcfa8abbb3fa66ba3ad Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 4 Nov 2005 23:53:18 +0000 Subject: [PATCH] Editorial review for partitioning/constraint exclusion documentation. --- doc/src/sgml/config.sgml | 8 +- doc/src/sgml/ddl.sgml | 2758 +++++++++++++++++++------------------- 2 files changed, 1396 insertions(+), 1370 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index a1c1496cd2..aabcebd7cd 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,5 +1,5 @@ Server Configuration @@ -1970,13 +1970,13 @@ archive_command = 'copy "%p" /mnt/server/archivedir/"%f"' # Windows Enables or disables the query planner's use of table constraints to - limit table access. The default is off. + optimize queries. The default is off. When this parameter is on, the planner compares query conditions with table CHECK constraints, and - omits scanning tables where the conditions contradict the + omits scanning tables for which the conditions contradict the constraints. (Presently this is done only for child tables of inheritance scans.) For example: @@ -2010,7 +2010,7 @@ SELECT * FROM parent WHERE key = 2400; - Refer to for more information + Refer to for more information on using constraint exclusion and partitioning. diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index a3c9552117..75aabc3f70 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -1,4 +1,4 @@ - + Data Definition @@ -12,7 +12,7 @@ Subsequently, we discuss how tables can be organized into schemas, and how privileges can be assigned to tables. Finally, we will briefly look at other features that affect the data storage, - such as views, functions, and triggers. + such as inheritance, views, functions, and triggers. @@ -399,11 +399,9 @@ CREATE TABLE products ( constraint described in the next section can be used. - - Check constraints can also be used to enhance performance with - very large tables, when used in conjunction with the parameter. This is discussed - in more detail in . + + Check constraints can be useful for enhancing the performance of + partitioned tables. For details see . @@ -895,8 +893,8 @@ CREATE TABLE order_items ( The OID of the table containing this row. This column is particularly handy for queries that select from inheritance - hierarchies, since without it, it's difficult to tell which - individual table a row came from. The + hierarchies (see ), since without it, + it's difficult to tell which individual table a row came from. The tableoid can be joined against the oid column of pg_class to obtain the table name. @@ -1044,1670 +1042,1698 @@ CREATE TABLE order_items ( - - Inheritance - - - not-null constraint - + + Modifying Tables - - constraint - NOT NULL - + + table + modifying + - PostgreSQL implements table inheritance - which can be a useful tool for database designers. SQL:1999 and - later define a type inheritance feature, which differs in many - respects from the features described here. + When you create a table and you realize that you made a mistake, or + the requirements of the application change, then you can drop the + table and create it again. But this is not a convenient option if + the table is already filled with data, or if the table is + referenced by other database objects (for instance a foreign key + constraint). Therefore PostgreSQL + provides a family of commands to make modifications to existing + tables. Note that this is conceptually distinct from altering + the data contained in the table: here we are interested in altering + the definition, or structure, of the table. - Let's start with an example: suppose we are trying to build a data - model for cities. Each state has many cities, but only one - capital. We want to be able to quickly retrieve the capital city - for any particular state. This can be done by creating two tables, - one for state capitals and one for cities that are not - capitals. However, what happens when we want to ask for data about - a city, regardless of whether it is a capital or not? The - inheritance feature can help to resolve this problem. We define the - capitals table so that it inherits from - cities: - - -CREATE TABLE cities ( - name text, - population float, - altitude int -- in feet -); - -CREATE TABLE capitals ( - state char(2) -) INHERITS (cities); - + You can + + + Add columns, + + + Remove columns, + + + Add constraints, + + + Remove constraints, + + + Change default values, + + + Change column data types, + + + Rename columns, + + + Rename tables. + + - In this case, a row of capitals inherits - all the columns of its parent table, cities. State - capitals have an extra attribute, state, that shows - their state. + All these actions are performed using the + + command. - - In PostgreSQL, a table can inherit from - zero or more other tables, and a query can reference either all - rows of a table or all rows of a table plus all of its descendants. - For example, the following query finds the names of all cities, - including state capitals, that are located at an altitude over - 500ft: + + Adding a Column + + + column + adding + + + To add a column, use a command like this: -SELECT name, altitude - FROM cities - WHERE altitude > 500; +ALTER TABLE products ADD COLUMN description text; + The new column is initially filled with whatever default + value is given (null if you don't specify a DEFAULT clause). + - which returns: - + + You can also define constraints on the column at the same time, + using the usual syntax: - name | altitude ------------+---------- - Las Vegas | 2174 - Mariposa | 1953 - Madison | 845 +ALTER TABLE products ADD COLUMN description text CHECK (description <> ''); - + In fact all the options that can be applied to a column description + in CREATE TABLE can be used here. Keep in mind however + that the default value must satisfy the given constraints, or the + ADD will fail. Alternatively, you can add + constraints later (see below) after you've filled in the new column + correctly. + + - - On the other hand, the following query finds all the cities that - are not state capitals and are situated at an altitude over 500ft: + + Removing a Column - -SELECT name, altitude - FROM ONLY cities - WHERE altitude > 500; + + column + removing + - name | altitude ------------+---------- - Las Vegas | 2174 - Mariposa | 1953 + + To remove a column, use a command like this: + +ALTER TABLE products DROP COLUMN description; - + Whatever data was in the column disappears. Table constraints involving + the column are dropped, too. However, if the column is referenced by a + foreign key constraint of another table, + PostgreSQL will not silently drop that + constraint. You can authorize dropping everything that depends on + the column by adding CASCADE: + +ALTER TABLE products DROP COLUMN description CASCADE; + + See for a description of the general + mechanism behind this. + + - - Here the ONLY keyword indicates that the query - should apply only to cities, and not any tables - below cities in the inheritance hierarchy. Many - of the commands that we have already discussed — - SELECT, UPDATE and - DELETE — support the - ONLY keyword. - + + Adding a Constraint - - Inheritance and Permissions - - Because permissions are not inherited automatically, a user - attempting to access a parent table must either have at least the - same permission for the child table or must use the - ONLY notation. If creating a new inheritance - relationship in an existing system be careful that this does not - create problems. - - + + constraint + adding + - - Inheritance does not automatically propagate data from - INSERT or COPY commands to - other tables in the inheritance hierarchy. In our example, the - following INSERT statement will fail: + + To add a constraint, the table constraint syntax is used. For example: -INSERT INTO cities -(name, population, altitude, state) -VALUES ('New York', NULL, NULL, 'NY'); +ALTER TABLE products ADD CHECK (name <> ''); +ALTER TABLE products ADD CONSTRAINT some_name UNIQUE (product_no); +ALTER TABLE products ADD FOREIGN KEY (product_group_id) REFERENCES product_groups; - We might hope that the data would be somehow routed to the - capitals table, though this does not happen. If - the child has no locally defined columns, then it is possible to - route data from the parent to the child using a rule, see . This is not possible with the above - INSERT statement because the state column - does not exist on both parent and child tables. - - - - In some cases you may wish to know which table a particular row - originated from. There is a system column called - tableoid in each table which can tell you the - originating table: - + To add a not-null constraint, which cannot be written as a table + constraint, use this syntax: -SELECT c.tableoid, c.name, c.altitude -FROM cities c -WHERE c.altitude > 500; +ALTER TABLE products ALTER COLUMN product_no SET NOT NULL; + - which returns: + + The constraint will be checked immediately, so the table data must + satisfy the constraint before it can be added. + + - - tableoid | name | altitude -----------+-----------+---------- - 139793 | Las Vegas | 2174 - 139793 | Mariposa | 1953 - 139798 | Madison | 845 - + + Removing a Constraint - (If you try to reproduce this example, you will probably get - different numeric OIDs.) By doing a join with - pg_class you can see the actual table names: + + constraint + removing + + + To remove a constraint you need to know its name. If you gave it + a name then that's easy. Otherwise the system assigned a + generated name, which you need to find out. The + psql command \d + tablename can be helpful + here; other interfaces might also provide a way to inspect table + details. Then the command is: -SELECT p.relname, c.name, c.altitude -FROM cities c, pg_class p -WHERE c.altitude > 500 and c.tableoid = p.oid; +ALTER TABLE products DROP CONSTRAINT some_name; + (If you are dealing with a generated constraint name like $2, + don't forget that you'll need to double-quote it to make it a valid + identifier.) + - which returns: + + As with dropping a column, you need to add CASCADE if you + want to drop a constraint that something else depends on. An example + is that a foreign key constraint depends on a unique or primary key + constraint on the referenced column(s). + + + This works the same for all constraint types except not-null + constraints. To drop a not null constraint use - relname | name | altitude -----------+-----------+---------- - cities | Las Vegas | 2174 - cities | Mariposa | 1953 - capitals | Madison | 845 +ALTER TABLE products ALTER COLUMN product_no DROP NOT NULL; - + (Recall that not-null constraints do not have names.) + + + + + Changing a Column's Default Value + + + default value + changing + + + + To set a new default for a column, use a command like this: + +ALTER TABLE products ALTER COLUMN price SET DEFAULT 7.77; + + Note that this doesn't affect any existing rows in the table, it + just changes the default for future INSERT commands. + + + + To remove any default value, use + +ALTER TABLE products ALTER COLUMN price DROP DEFAULT; + + This is effectively the same as setting the default to null. + As a consequence, it is not an error + to drop a default where one hadn't been defined, because the + default is implicitly the null value. + + + + + Changing a Column's Data Type + + + column data type + changing + + + + To convert a column to a different data type, use a command like this: + +ALTER TABLE products ALTER COLUMN price TYPE numeric(10,2); + + This will succeed only if each existing entry in the column can be + converted to the new type by an implicit cast. If a more complex + conversion is needed, you can add a USING clause that + specifies how to compute the new values from the old. + + + + PostgreSQL will attempt to convert the column's + default value (if any) to the new type, as well as any constraints + that involve the column. But these conversions may fail, or may + produce surprising results. It's often best to drop any constraints + on the column before altering its type, and then add back suitably + modified constraints afterwards. + + + + + Renaming a Column + + + column + renaming + + + + To rename a column: + +ALTER TABLE products RENAME COLUMN product_no TO product_number; + + + + + + Renaming a Table + + + table + renaming + + + + To rename a table: + +ALTER TABLE products RENAME TO items; + + + + + + + Privileges + + + privilege + + + + permission + privilege + - As shown above, a child table may locally define columns as well as - inheriting them from their parents. However, a locally defined - column cannot override the data type of an inherited column of the - same name. A table can inherit from a table that has itself - inherited from other tables. A table can also inherit from more - than one parent table, in which case it inherits the union of the - columns defined by the parent tables. Inherited columns with - duplicate names and data types will be merged so that only a single - column is stored. + When you create a database object, you become its owner. By + default, only the owner of an object can do anything with the + object. In order to allow other users to use it, + privileges must be granted. (However, + users that have the superuser attribute can always + access any object.) - Table inheritance can currently only be defined using the - statement. The related statement CREATE TABLE ... AS - SELECT does not allow inheritance to be specified. There - is no way to add an inheritance link to make an existing table into - a child table. Similarly, there is no way to remove an inheritance - link from a child table once it has been defined, other than using - DROP TABLE. A parent table cannot be dropped - while any of its children remain. If you wish to remove a table and - all of its descendants, then you can do so using the - CASCADE option of the statement. + There are several different privileges: SELECT, + INSERT, UPDATE, DELETE, + RULE, REFERENCES, TRIGGER, + CREATE, TEMPORARY, EXECUTE, and + USAGE. The privileges applicable to a particular + object vary depending on the object's type (table, function, etc). + For complete information on the different types of privileges + supported by PostgreSQL, refer to the + reference + page. The following sections and chapters will also show you how + those privileges are used. - Check constraints can be defined on tables within an inheritance - hierarchy. All check constraints on a parent table are - automatically inherited by all of their children. It is currently - possible to inherit mutually exclusive check constraints, but that - definition quickly shows itself since all attempted row inserts - will be rejected. + The right to modify or destroy an object is always the privilege of + the owner only. + + + To change the owner of a table, index, sequence, or view, use the + + command. There are corresponding ALTER commands for + other object types. + + + - will - propogate any changes in data definition on columns or check - constraints down the inheritance hierarchy. Again, dropping - columns or constraints on parent tables is only possible when using - the CASCADE option. ALTER - TABLE follows the same rules for duplicate column merging - and rejection that apply during CREATE TABLE. + To assign privileges, the GRANT command is + used. For example, if joe is an existing user, and + accounts is an existing table, the privilege to + update the table can be granted with + +GRANT UPDATE ON accounts TO joe; + + To grant a privilege to a group, use this syntax: + +GRANT SELECT ON accounts TO GROUP staff; + + The special user name PUBLIC can + be used to grant a privilege to every user on the system. Writing + ALL in place of a specific privilege grants all + privileges that are relevant for the object type. + + + + To revoke a privilege, use the fittingly named + REVOKE command: + +REVOKE ALL ON accounts FROM PUBLIC; + + The special privileges of the object owner (i.e., the right to do + DROP, GRANT, REVOKE, etc.) + are always implicit in being the owner, + and cannot be granted or revoked. But the object owner can choose + to revoke his own ordinary privileges, for example to make a + table read-only for himself as well as others. + + + + Ordinarily, only the object's owner (or a superuser) can grant or + revoke privileges on an object. However, it is possible to grant a + privilege with grant option, which gives the recipient + the right to grant it in turn to others. If the grant option is + subsequently revoked then all who received the privilege from that + recipient (directly or through a chain of grants) will lose the + privilege. For details see the and reference pages. + + + + + Schemas + + + schema + + + + A PostgreSQL database cluster + contains one or more named databases. Users and groups of users are + shared across the entire cluster, but no other data is shared across + databases. Any given client connection to the server can access + only the data in a single database, the one specified in the connection + request. + + + + + Users of a cluster do not necessarily have the privilege to access every + database in the cluster. Sharing of user names means that there + cannot be different users named, say, joe in two databases + in the same cluster; but the system can be configured to allow + joe access to only some of the databases. + + + + + A database contains one or more named schemas, which + in turn contain tables. Schemas also contain other kinds of named + objects, including data types, functions, and operators. The same + object name can be used in different schemas without conflict; for + example, both schema1 and myschema may + contain tables named mytable. Unlike databases, + schemas are not rigidly separated: a user may access objects in any + of the schemas in the database he is connected to, if he has + privileges to do so. - Both parent and child tables can have primary and foreign keys, so - that they can take part normally on both the referencing and - referenced sides of a foreign key constraint. Indexes may be - defined on any of these columns whether or not they are inherited. - However, a serious current limitation of the inheritance feature is - that indexes (including unique constraints) and foreign key - constraints only apply to single tables and do not also index their - inheritance children. This is true on both sides of a foreign key - constraint. Thus, in the terms of the above example: + There are several reasons why one might want to use schemas: - If we declared cities.name to be - UNIQUE or a PRIMARY KEY, this would not stop the - capitals table from having rows with names duplicating - rows in cities. And those duplicate rows would by - default show up in queries from cities. In fact, by - default capitals would have no unique constraint at all, - and so could contain multiple rows with the same name. - You could add a unique constraint to capitals, but this - would not prevent duplication compared to cities. + To allow many users to use one database without interfering with + each other. - Similarly, if we were to specify that - cities.name REFERENCES some - other table, this constraint would not automatically propagate to - capitals. However, it is possible to set up a - foreign key such as capitals.name - REFERENCES states.name. - So it is possible to workaround this restriction by manually adding - foreign keys to each child table. + To organize database objects into logical groups to make them + more manageable. - Specifying that another table's column REFERENCES - cities(name) would allow the other table to contain city names, but - not capital names. There is no good workaround for this case. + Third-party applications can be put into separate schemas so + they cannot collide with the names of other objects. - These deficiencies will probably be fixed in some future release, - but in the meantime considerable care is needed in deciding whether - inheritance is useful for your problem. - + Schemas are analogous to directories at the operating system level, + except that schemas cannot be nested. - - Deprecated + + Creating a Schema + + + schema + creating + + - In previous versions of PostgreSQL, the - default behavior was not to include child tables in queries. This was - found to be error prone and is also in violation of the SQL - standard. Under the old syntax, to get the sub-tables you append - * to the table name. For example: + To create a schema, use the command CREATE + SCHEMA. Give the schema a name of your choice. For + example: -SELECT * from cities*; +CREATE SCHEMA myschema; - You can still explicitly specify scanning child tables by - appending *, as well as explicitly specify not - scanning child tables by writing ONLY. But - beginning in version 7.1, the default behavior for an undecorated - table name is to scan its child tables too, whereas before the - default was not to do so. To get the old default behavior, - disable the configuration - option. - - - - - - Constraint Exclusion and Partitioning - partitioning + qualified name - constraint exclusion + name + qualified - PostgreSQL supports basic table - partitioning. This section describes why and how you can implement - this as part of your database design. + To create or access objects in a schema, write a + qualified name consisting of the schema name and + table name separated by a dot: + +schema.table + + This works anywhere a table name is expected, including the table + modification commands and the data access commands discussed in + the following chapters. + (For brevity we will speak of tables only, but the same ideas apply + to other kinds of named objects, such as types and functions.) - - Overview - - Currently, partitioning is implemented in conjunction with table - inheritance only, though using fully SQL compliant syntax. - Table inheritance allows tables to be split into partitions, and - constraint exclusion allows partitions to be selectively combined - as needed to satisfy a particular SELECT - statement. You should be familiar with inheritance (see ) before attempting to implement - partitioning. + Actually, the even more general syntax + +database.schema.table + + can be used too, but at present this is just for pro + forma compliance with the SQL standard. If you write a database name, + it must be the same as the database you are connected to. - Partitioning can provide several benefits: - - - - Query performance can be improved dramatically for certain kinds - of queries without the need to maintain costly indexes. - - - - - - Insert performance can be improved by breaking down a large - index into multiple pieces. When an index no longer fits easily - in memory, both read and write operations on the index take - progressively more disk accesses. - - + So to create a table in the new schema, use + +CREATE TABLE myschema.mytable ( + ... +); + + - - - Bulk deletes may be avoided altogether by simply removing one of the - partitions, if that requirement is planned into the partitioning design. - - + + schema + removing + - - - Seldom-used data can be migrated to cheaper and slower storage media. - - - + + To drop a schema if it's empty (all objects in it have been + dropped), use + +DROP SCHEMA myschema; + + To drop a schema including all contained objects, use + +DROP SCHEMA myschema CASCADE; + + See for a description of the general + mechanism behind this. + - The benefits will normally be worthwhile only when a table would - otherwise be very large. The exact point at which a table will - benefit from partitioning depends on the application, although the - size of the table should usually exceed the physical memory of the - database server. + + Often you will want to create a schema owned by someone else + (since this is one of the ways to restrict the activities of your + users to well-defined namespaces). The syntax for that is: + +CREATE SCHEMA schemaname AUTHORIZATION username; + + You can even omit the schema name, in which case the schema name + will be the same as the user name. See for how this can be useful. - The following partitioning types are supported by - PostgreSQL &version;: + Schema names beginning with pg_ are reserved for + system purposes and may not be created by users. + + - - - Range Partitioning + + The Public Schema - - - The table is partitioned along a range defined - by a single column or set of columns, with no overlap between - partitions. Examples might be a date range or a range of - identifiers for particular business objects. - - - + + schema + public + - - List Partitioning + + In the previous sections we created tables without specifying any + schema names. By default, such tables (and other objects) are + automatically put into a schema named public. Every new + database contains such a schema. Thus, the following are equivalent: + +CREATE TABLE products ( ... ); + + and + +CREATE TABLE public.products ( ... ); + + + - - - The table is partitioned by explicitly listing which values - relate to each partition. - - - - + + The Schema Search Path - Hash partitioning is not currently supported. - - + + search path + - - Implementing Partitioning + + unqualified name + - - Partitioning a table is a straightforward process. There - are a wide range of options for you to consider, so judging exactly - when and how to implement partitioning is a more complex topic. We - will address that complexity primarily through the examples in this - section. - + + name + unqualified + - - To use partitioning, do the following: - - - - Create the master table, from which all of the - partitions will inherit. - - - This table will contain no data. Do not define any - constraints or keys on this table, unless you intend them to - be applied equally to all partitions. - - + + Qualified names are tedious to write, and it's often best not to + wire a particular schema name into applications anyway. Therefore + tables are often referred to by unqualified names, + which consist of just the table name. The system determines which table + is meant by following a search path, which is a list + of schemas to look in. The first matching table in the search path + is taken to be the one wanted. If there is no match in the search + path, an error is reported, even if matching table names exist + in other schemas in the database. + - - - Create several child tables that inherit from - the master table. - + + schema + current + - - We will refer to the child tables as partitions, though they - are in every way normal PostgreSQL tables. - - + + The first schema named in the search path is called the current schema. + Aside from being the first schema searched, it is also the schema in + which new tables will be created if the CREATE TABLE + command does not specify a schema name. + - - - Add table constraints to define the allowed values in each partition. - - - Only clauses of the form [COLUMN] [OPERATOR] [CONSTANT(s)] will be used - for constraint exclusion. Simple examples would be: + + search_path + + + + To show the current search path, use the following command: -CHECK ( x = 1 ) -CHECK ( county IN ( 'Oxfordshire','Buckinghamshire','Warwickshire' )) -CHECK ( outletID BETWEEN 1 AND 99 ) +SHOW search_path; + In the default setup this returns: + + search_path +-------------- + $user,public + + The first element specifies that a schema with the same name as + the current user is to be searched. If no such schema exists, + the entry is ignored. The second element refers to the + public schema that we have seen already. + - These can be linked together with the Boolean operators - AND and OR to form - complex constraints. Note that there is no difference in - syntax between range and list partitioning; those terms are - descriptive only. Ensure that the set of values in each child - table do not overlap. - - - - - - Add any other indexes you want to the partitions, bearing in - mind that it is always more efficient to add indexes after - data has been bulk loaded. - - - - - - Optionally, define a rule or trigger to redirect modifications - of the master table to the appropriate partition. - - - - - - - - For example, suppose we are constructing a database for a large - ice cream company. The company measures peak temperatures every - day as well as ice cream sales in each region. They have two - tables: - - -CREATE TABLE cities ( - id int not null, - name text not null, - altitude int -- in feet -); - -CREATE TABLE measurement ( - city_id int not null, - logdate date not null, - peaktemp int, - unitsales int -); - - - To reduce the amount of old data that needs to be stored, we - decide to only keep the most recent 3 years worth of data. At the - beginning of each month we remove the oldest month's data. - - - - Most queries just access the last week, month or quarter's data, - since we need to keep track of sales. As a result we have a large table, - yet only the most frequent 10% is accessed. Most of these queries - are online reports for various levels of management. These queries access - much of the table, so it is difficult to build enough indexes and at - the same time allow us to keep loading all of the data fast enough. - Yet, the reports are online so we need to respond quickly. - - - - In this situation we can use partitioning to help us meet all of our - different requirements for the measurements table. Following the - steps outlined above, partitioning can be enabled as follows: - - - - - - - The measurement table is our master table. - - - - - - Next we create one partition for each month using inheritance: - - -CREATE TABLE measurement_yy04mm02 ( ) INHERITS (measurement); -CREATE TABLE measurement_yy04mm03 ( ) INHERITS (measurement); -... -CREATE TABLE measurement_yy05mm11 ( ) INHERITS (measurement); -CREATE TABLE measurement_yy05mm12 ( ) INHERITS (measurement); -CREATE TABLE measurement_yy06mm01 ( ) INHERITS (measurement); - - - Each of the partitions are complete tables in their own right, - but they inherit their definition from the measurement table. - - - - This solves one of our problems: deleting old data. Each - month, all we need to do is perform a DROP - TABLE on the oldest table and create a new table to - insert into. - - - - - - We now add non-overlapping table constraints, so that our - table creation script becomes: - - -CREATE TABLE measurement_yy04mm02 ( - CHECK ( logdate >= DATE '2004-02-01' AND logdate < DATE '2004-03-01' ) - ) INHERITS (measurement); -CREATE TABLE measurement_yy04mm03 ( - CHECK ( logdate >= DATE '2004-03-01' AND logdate < DATE '2004-04-01' ) - ) INHERITS (measurement); -... -CREATE TABLE measurement_yy05mm11 ( - CHECK ( logdate >= DATE '2005-11-01' AND logdate < DATE '2005-12-01' ) - ) INHERITS (measurement); -CREATE TABLE measurement_yy05mm12 ( - CHECK ( logdate >= DATE '2005-12-01' AND logdate < DATE '2006-01-01' ) - ) INHERITS (measurement); -CREATE TABLE measurement_yy06mm01 ( - CHECK ( logdate >= DATE '2006-01-01' AND logdate < DATE '2006-02-01' ) - ) INHERITS (measurement); - - - - - - - We choose not to add further indexes at this time. - - - - - - Data will be added each day to the latest partition. This - allows us to set up a very simple rule to insert data. We must - redefine this each month so that it always points to the - current partition. - - -CREATE OR REPLACE RULE measurement_current_partition AS -ON INSERT -TO measurement -DO INSTEAD - INSERT INTO measurement_yy06mm01 VALUES ( NEW.city_id, - NEW.logdate, - NEW.peaktemp, - NEW.unitsales ); - - - We might want to insert data and have the server automatically - locate the partition into which the row should be added. We - could do this with a more complex set of rules as shown below. - - -CREATE RULE measurement_insert_yy04mm02 AS -ON INSERT -TO measurement WHERE - ( logdate >= DATE '2004-02-01' AND logdate < DATE '2004-03-01' ) -DO INSTEAD - INSERT INTO measurement_yy04mm02 VALUES ( NEW.city_id, - NEW.logdate, - NEW.peaktemp, - NEW.unitsales ); -... -CREATE RULE measurement_insert_yy05mm12 AS -ON INSERT -TO measurement WHERE - ( logdate >= DATE '2005-12-01' AND logdate < DATE '2006-01-01' ) -DO INSTEAD - INSERT INTO measurement_yy05mm12 VALUES ( NEW.city_id, - NEW.logdate, - NEW.peaktemp, - NEW.unitsales ); -CREATE RULE measurement_insert_yy06mm01 AS -ON INSERT -TO measurement WHERE - ( logdate >= DATE '2006-01-01' AND logdate < DATE '2006-02-01' ) -DO INSTEAD - INSERT INTO measurement_yy06mm01 VALUES ( NEW.city_id, - NEW.logdate, - NEW.peaktemp, - NEW.unitsales ); - - - Note that the WHERE clause in each rule - exactly matches those used for the CHECK - constraints on each partition. - - - - - - - As we can see, a complex partitioning scheme could require a - substantial amount of DDL. In the above example we would be - creating a new partition each month, so it may be wise to write a - script that generates the required DDL automatically. - - - - The following caveats apply: - - - - There is currently no way to specify that all of the - CHECK constraints are mutually - exclusive. Care is required by the database designer. - - - - - - There is currently no way to specify that rows may not be - inserted into the master table. A CHECK - constraint on the master table will be inherited by all child - tables, so that cannot not be used for this purpose. - - - - - - For some data types you must explicitly coerce the constant - values into the data type of the column. The following constraint - will work if x is an integer - data type, but not if x is a - bigint: - -CHECK ( x = 1 ) - - For bigint we must use a constraint like: - -CHECK ( x = 1::bigint ) - - The problem is not limited to the bigint data type - — it can occur whenever the default data type of the - constant does not match the data type of the column to which it - is being compared. - - - - - - Partitioning can also be arranged using a UNION - ALL view: - - -CREATE VIEW measurement AS - SELECT * FROM measurement_yy04mm02 -UNION ALL SELECT * FROM measurement_yy04mm03 -... -UNION ALL SELECT * FROM measurement_yy05mm11 -UNION ALL SELECT * FROM measurement_yy05mm12 -UNION ALL SELECT * FROM measurement_yy06mm01; - - - However, constraint exclusion is currently not supported for - partitioned tables defined in this manner. - - - - - - - - Constraint Exclusion in Queries - - - Partitioning can be used to improve query performance when used in - conjunction with constraint exclusion. As an example: - - -SET constraint_exclusion=true; -SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; - - - Without constraint exclusion, the above query would scan each of - the partitions of the measurement table. With constraint - exclusion, the planner will examine each of the constraints and - try to prove that each of the partitions needs to be involved in - the query. If the planner is able to refute that for any - partition, it excludes the partition from the query plan. - + + The first schema in the search path that exists is the default + location for creating new objects. That is the reason that by + default objects are created in the public schema. When objects + are referenced in any other context without schema qualification + (table modification, data modification, or query commands) the + search path is traversed until a matching object is found. + Therefore, in the default configuration, any unqualified access + again can only refer to the public schema. + - You can use the EXPLAIN command to show the difference - between a plan with constraint_exclusion on and a plan - with it off. - - -SET constraint_exclusion=false; -EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; - - QUERY PLAN ------------------------------------------------------------------------------------------------ - Aggregate (cost=158.66..158.68 rows=1 width=0) - -> Append (cost=0.00..151.88 rows=2715 width=0) - -> Seq Scan on measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) - -> Seq Scan on measurement_yy04mm02 measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) - -> Seq Scan on measurement_yy04mm03 measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) -... - -> Seq Scan on measurement_yy05mm12 measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) - -> Seq Scan on measurement_yy06mm01 measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) - - - Now when we enable constraint exclusion, we get a significantly - reduced plan but the same result set: - - -SET constraint_exclusion=true; -EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; - QUERY PLAN ------------------------------------------------------------------------------------------------ - Aggregate (cost=63.47..63.48 rows=1 width=0) - -> Append (cost=0.00..60.75 rows=1086 width=0) - -> Seq Scan on measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) - -> Seq Scan on measurement_yy06mm01 measurement (cost=0.00..30.38 rows=543 width=0) - Filter: (logdate >= '2006-01-01'::date) - - - Don't forget that you still need to run ANALYZE - on each partition individually. A command like this + To put our new schema in the path, we use -ANALYZE measurement; +SET search_path TO myschema,public; - - only affects the master table. - - - - No indexes are required to use constraint exclusion. The - partitions should be defined with appropriate CHECK - constraints. These are then compared with the predicates of the - SELECT query to determine which partitions must be - scanned. - - - - The following caveats apply to this release: - - - - Constraint exclusion only works when the query directly matches - a constant. A constant bound to a parameterized query will not - work in the same way since the plan is fixed and would need to - vary with each execution. Also, stable constants such as - CURRENT_DATE may not be used, since these are - constant only for during the execution of a single query. Join - conditions will not allow constraint exclusion to work either. - - - - - - UPDATE and DELETE commands - against the master table do not perform constraint exclusion. - - - - - - All constraints on all partitions of the master table are considered for - constraint exclusion, so large numbers of partitions are likely to - increase query planning time considerably. - - - - + (We omit the $user here because we have no + immediate need for it.) And then we can access the table without + schema qualification: + +DROP TABLE mytable; + + Also, since myschema is the first element in + the path, new objects would by default be created in it. - - - - - - Modifying Tables - - - table - modifying - - - - When you create a table and you realize that you made a mistake, or - the requirements of the application change, then you can drop the - table and create it again. But this is not a convenient option if - the table is already filled with data, or if the table is - referenced by other database objects (for instance a foreign key - constraint). Therefore PostgreSQL - provides a family of commands to make modifications to existing - tables. Note that this is conceptually distinct from altering - the data contained in the table: here we are interested in altering - the definition, or structure, of the table. - - - - You can - - - Add columns, - - - Remove columns, - - - Add constraints, - - - Remove constraints, - - - Change default values, - - - Change column data types, - - - Rename columns, - - - Rename tables. - - - - All these actions are performed using the - - command. - - - - Adding a Column - - - column - adding - - - To add a column, use a command like this: + We could also have written -ALTER TABLE products ADD COLUMN description text; +SET search_path TO myschema; - The new column is initially filled with whatever default - value is given (null if you don't specify a DEFAULT clause). + Then we no longer have access to the public schema without + explicit qualification. There is nothing special about the public + schema except that it exists by default. It can be dropped, too. - You can also define constraints on the column at the same time, - using the usual syntax: + See also for other ways to manipulate + the schema search path. + + + + The search path works in the same way for data type names, function names, + and operator names as it does for table names. Data type and function + names can be qualified in exactly the same way as table names. If you + need to write a qualified operator name in an expression, there is a + special provision: you must write + +OPERATOR(schema.operator) + + This is needed to avoid syntactic ambiguity. An example is -ALTER TABLE products ADD COLUMN description text CHECK (description <> ''); +SELECT 3 OPERATOR(pg_catalog.+) 4; - In fact all the options that can be applied to a column description - in CREATE TABLE can be used here. Keep in mind however - that the default value must satisfy the given constraints, or the - ADD will fail. Alternatively, you can add - constraints later (see below) after you've filled in the new column - correctly. + In practice one usually relies on the search path for operators, + so as not to have to write anything so ugly as that. - - Removing a Column + + Schemas and Privileges - - column - removing + + privilege + for schemas - To remove a column, use a command like this: - -ALTER TABLE products DROP COLUMN description; - - Whatever data was in the column disappears. Table constraints involving - the column are dropped, too. However, if the column is referenced by a - foreign key constraint of another table, - PostgreSQL will not silently drop that - constraint. You can authorize dropping everything that depends on - the column by adding CASCADE: + By default, users cannot access any objects in schemas they do not + own. To allow that, the owner of the schema needs to grant the + USAGE privilege on the schema. To allow users + to make use of the objects in the schema, additional privileges + may need to be granted, as appropriate for the object. + + + + A user can also be allowed to create objects in someone else's + schema. To allow that, the CREATE privilege on + the schema needs to be granted. Note that by default, everyone + has CREATE and USAGE privileges on + the schema + public. This allows all users that are able to + connect to a given database to create objects in its + public schema. If you do + not want to allow that, you can revoke that privilege: -ALTER TABLE products DROP COLUMN description CASCADE; +REVOKE CREATE ON SCHEMA public FROM PUBLIC; - See for a description of the general - mechanism behind this. + (The first public is the schema, the second + public means every user. In the + first sense it is an identifier, in the second sense it is a + key word, hence the different capitalization; recall the + guidelines from .) - - Adding a Constraint + + The System Catalog Schema - - constraint - adding + + system catalog + schema - To add a constraint, the table constraint syntax is used. For example: - -ALTER TABLE products ADD CHECK (name <> ''); -ALTER TABLE products ADD CONSTRAINT some_name UNIQUE (product_no); -ALTER TABLE products ADD FOREIGN KEY (product_group_id) REFERENCES product_groups; - - To add a not-null constraint, which cannot be written as a table - constraint, use this syntax: - -ALTER TABLE products ALTER COLUMN product_no SET NOT NULL; - + In addition to public and user-created schemas, each + database contains a pg_catalog schema, which contains + the system tables and all the built-in data types, functions, and + operators. pg_catalog is always effectively part of + the search path. If it is not named explicitly in the path then + it is implicitly searched before searching the path's + schemas. This ensures that built-in names will always be + findable. However, you may explicitly place + pg_catalog at the end of your search path if you + prefer to have user-defined names override built-in names. - The constraint will be checked immediately, so the table data must - satisfy the constraint before it can be added. + In PostgreSQL versions before 7.3, + table names beginning with pg_ were reserved. This is + no longer true: you may create such a table name if you wish, in + any non-system schema. However, it's best to continue to avoid + such names, to ensure that you won't suffer a conflict if some + future version defines a system table named the same as your + table. (With the default search path, an unqualified reference to + your table name would be resolved as the system table instead.) + System tables will continue to follow the convention of having + names beginning with pg_, so that they will not + conflict with unqualified user-table names so long as users avoid + the pg_ prefix. - - Removing a Constraint + + Usage Patterns - - constraint - removing - + + Schemas can be used to organize your data in many ways. There are + a few usage patterns that are recommended and are easily supported by + the default configuration: + + + + If you do not create any schemas then all users access the + public schema implicitly. This simulates the situation where + schemas are not available at all. This setup is mainly + recommended when there is only a single user or a few cooperating + users in a database. This setup also allows smooth transition + from the non-schema-aware world. + + + + + + You can create a schema for each user with the same name as + that user. Recall that the default search path starts with + $user, which resolves to the user name. + Therefore, if each user has a separate schema, they access their + own schemas by default. + + + + If you use this setup then you might also want to revoke access + to the public schema (or drop it altogether), so users are + truly constrained to their own schemas. + + + + + + To install shared applications (tables to be used by everyone, + additional functions provided by third parties, etc.), put them + into separate schemas. Remember to grant appropriate + privileges to allow the other users to access them. Users can + then refer to these additional objects by qualifying the names + with a schema name, or they can put the additional schemas into + their search path, as they choose. + + + + + + + + Portability - To remove a constraint you need to know its name. If you gave it - a name then that's easy. Otherwise the system assigned a - generated name, which you need to find out. The - psql command \d - tablename can be helpful - here; other interfaces might also provide a way to inspect table - details. Then the command is: - -ALTER TABLE products DROP CONSTRAINT some_name; - - (If you are dealing with a generated constraint name like $2, - don't forget that you'll need to double-quote it to make it a valid - identifier.) + In the SQL standard, the notion of objects in the same schema + being owned by different users does not exist. Moreover, some + implementations do not allow you to create schemas that have a + different name than their owner. In fact, the concepts of schema + and user are nearly equivalent in a database system that + implements only the basic schema support specified in the + standard. Therefore, many users consider qualified names to + really consist of + username.tablename. + This is how PostgreSQL will effectively + behave if you create a per-user schema for every user. - As with dropping a column, you need to add CASCADE if you - want to drop a constraint that something else depends on. An example - is that a foreign key constraint depends on a unique or primary key - constraint on the referenced column(s). + Also, there is no concept of a public schema in the + SQL standard. For maximum conformance to the standard, you should + not use (perhaps even remove) the public schema. - This works the same for all constraint types except not-null - constraints. To drop a not null constraint use - -ALTER TABLE products ALTER COLUMN product_no DROP NOT NULL; - - (Recall that not-null constraints do not have names.) + Of course, some SQL database systems might not implement schemas + at all, or provide namespace support by allowing (possibly + limited) cross-database access. If you need to work with those + systems, then maximum portability would be achieved by not using + schemas at all. + - - Changing a Column's Default Value - - - default value - changing - + + Inheritance - - To set a new default for a column, use a command like this: - -ALTER TABLE products ALTER COLUMN price SET DEFAULT 7.77; - - Note that this doesn't affect any existing rows in the table, it - just changes the default for future INSERT commands. - + + inheritance + - - To remove any default value, use - -ALTER TABLE products ALTER COLUMN price DROP DEFAULT; - - This is effectively the same as setting the default to null. - As a consequence, it is not an error - to drop a default where one hadn't been defined, because the - default is implicitly the null value. - - + + table + inheritance + - - Changing a Column's Data Type + + PostgreSQL implements table inheritance + which can be a useful tool for database designers. (SQL:1999 and + later define a type inheritance feature, which differs in many + respects from the features described here.) + - - column data type - changing - + + Let's start with an example: suppose we are trying to build a data + model for cities. Each state has many cities, but only one + capital. We want to be able to quickly retrieve the capital city + for any particular state. This can be done by creating two tables, + one for state capitals and one for cities that are not + capitals. However, what happens when we want to ask for data about + a city, regardless of whether it is a capital or not? The + inheritance feature can help to resolve this problem. We define the + capitals table so that it inherits from + cities: - - To convert a column to a different data type, use a command like this: -ALTER TABLE products ALTER COLUMN price TYPE numeric(10,2); - - This will succeed only if each existing entry in the column can be - converted to the new type by an implicit cast. If a more complex - conversion is needed, you can add a USING clause that - specifies how to compute the new values from the old. - +CREATE TABLE cities ( + name text, + population float, + altitude int -- in feet +); - - PostgreSQL will attempt to convert the column's - default value (if any) to the new type, as well as any constraints - that involve the column. But these conversions may fail, or may - produce surprising results. It's often best to drop any constraints - on the column before altering its type, and then add back suitably - modified constraints afterwards. - - +CREATE TABLE capitals ( + state char(2) +) INHERITS (cities); + - - Renaming a Column + In this case, the capitals table inherits + all the columns of its parent table, cities. State + capitals also have an extra column, state, that shows + their state. + - - column - renaming - + + In PostgreSQL, a table can inherit from + zero or more other tables, and a query can reference either all + rows of a table or all rows of a table plus all of its descendant tables. + The latter behavior is the default. + For example, the following query finds the names of all cities, + including state capitals, that are located at an altitude over + 500ft: - - To rename a column: -ALTER TABLE products RENAME COLUMN product_no TO product_number; +SELECT name, altitude + FROM cities + WHERE altitude > 500; - - - - Renaming a Table - - - table - renaming - + Given the sample data from the PostgreSQL + tutorial (see ), this returns: - - To rename a table: -ALTER TABLE products RENAME TO items; + name | altitude +-----------+---------- + Las Vegas | 2174 + Mariposa | 1953 + Madison | 845 - - - - - - Privileges + - - privilege - + + On the other hand, the following query finds all the cities that + are not state capitals and are situated at an altitude over 500ft: - - permission - privilege - + +SELECT name, altitude + FROM ONLY cities + WHERE altitude > 500; - - When you create a database object, you become its owner. By - default, only the owner of an object can do anything with the - object. In order to allow other users to use it, - privileges must be granted. (However, - users that have the superuser attribute can always - access any object.) + name | altitude +-----------+---------- + Las Vegas | 2174 + Mariposa | 1953 + - There are several different privileges: SELECT, - INSERT, UPDATE, DELETE, - RULE, REFERENCES, TRIGGER, - CREATE, TEMPORARY, EXECUTE, and - USAGE. The privileges applicable to a particular - object vary depending on the object's type (table, function, etc). - For complete information on the different types of privileges - supported by PostgreSQL, refer to the - reference - page. The following sections and chapters will also show you how - those privileges are used. + Here the ONLY keyword indicates that the query + should apply only to cities, and not any tables + below cities in the inheritance hierarchy. Many + of the commands that we have already discussed — + SELECT, UPDATE and + DELETE — support the + ONLY keyword. - The right to modify or destroy an object is always the privilege of - the owner only. - + In some cases you may wish to know which table a particular row + originated from. There is a system column called + tableoid in each table which can tell you the + originating table: - - - To change the owner of a table, index, sequence, or view, use the - - command. There are corresponding ALTER commands for - other object types. - - + +SELECT c.tableoid, c.name, c.altitude +FROM cities c +WHERE c.altitude > 500; + + + which returns: - - To assign privileges, the GRANT command is - used. For example, if joe is an existing user, and - accounts is an existing table, the privilege to - update the table can be granted with -GRANT UPDATE ON accounts TO joe; + tableoid | name | altitude +----------+-----------+---------- + 139793 | Las Vegas | 2174 + 139793 | Mariposa | 1953 + 139798 | Madison | 845 - To grant a privilege to a group, use this syntax: + + (If you try to reproduce this example, you will probably get + different numeric OIDs.) By doing a join with + pg_class you can see the actual table names: + -GRANT SELECT ON accounts TO GROUP staff; +SELECT p.relname, c.name, c.altitude +FROM cities c, pg_class p +WHERE c.altitude > 500 and c.tableoid = p.oid; + + + which returns: + + + relname | name | altitude +----------+-----------+---------- + cities | Las Vegas | 2174 + cities | Mariposa | 1953 + capitals | Madison | 845 - The special user name PUBLIC can - be used to grant a privilege to every user on the system. Writing - ALL in place of a specific privilege grants all - privileges that are relevant for the object type. - To revoke a privilege, use the fittingly named - REVOKE command: + Inheritance does not automatically propagate data from + INSERT or COPY commands to + other tables in the inheritance hierarchy. In our example, the + following INSERT statement will fail: -REVOKE ALL ON accounts FROM PUBLIC; +INSERT INTO cities (name, population, altitude, state) +VALUES ('New York', NULL, NULL, 'NY'); - The special privileges of the object owner (i.e., the right to do - DROP, GRANT, REVOKE, etc.) - are always implicit in being the owner, - and cannot be granted or revoked. But the object owner can choose - to revoke his own ordinary privileges, for example to make a - table read-only for himself as well as others. + We might hope that the data would somehow be routed to the + capitals table, but this does not happen: + INSERT always inserts into exactly the table + specified. In some cases it is possible to redirect the insertion + using a rule (see ). However that does not + help for the above case because the cities table + does not contain the column state, and so the + command will be rejected before the rule can be applied. - Ordinarily, only the object's owner (or a superuser) can grant or - revoke privileges on an object. However, it is possible to grant a - privilege with grant option, which gives the recipient - the right to grant it in turn to others. If the grant option is - subsequently revoked then all who received the privilege from that - recipient (directly or through a chain of grants) will lose the - privilege. For details see the and reference pages. + Check constraints can be defined on tables within an inheritance + hierarchy. All check constraints on a parent table are + automatically inherited by all of its children. Other types of + constraints are not inherited, however. - - - Schemas + + A table can inherit from more than one parent table, in which case it has + the union of the columns defined by the parent tables. Any columns + declared in the child table's definition are added to these. If the + same column name appears in multiple parent tables, or in both a parent + table and the child's definition, then these columns are merged + so that there is only one such column in the child table. To be merged, + columns must have the same data types, else an error is raised. The + merged column will have copies of all the check constraints coming from + any one of the column definitions it came from. + - - schema - + + Table inheritance can currently only be defined using the + statement. The related statement CREATE TABLE AS does + not allow inheritance to be specified. There + is no way to add an inheritance link to make an existing table into + a child table. Similarly, there is no way to remove an inheritance + link from a child table once it has been defined, other than by dropping + the table completely. A parent table cannot be dropped + while any of its children remain. If you wish to remove a table and + all of its descendants, one easy way is to drop the parent table with + the CASCADE option. + - A PostgreSQL database cluster - contains one or more named databases. Users and groups of users are - shared across the entire cluster, but no other data is shared across - databases. Any given client connection to the server can access - only the data in a single database, the one specified in the connection - request. + will + propagate any changes in column data definitions and check + constraints down the inheritance hierarchy. Again, dropping + columns or constraints on parent tables is only possible when using + the CASCADE option. ALTER + TABLE follows the same rules for duplicate column merging + and rejection that apply during CREATE TABLE. - - - Users of a cluster do not necessarily have the privilege to access every - database in the cluster. Sharing of user names means that there - cannot be different users named, say, joe in two databases - in the same cluster; but the system can be configured to allow - joe access to only some of the databases. - - + + Caveats - A database contains one or more named schemas, which - in turn contain tables. Schemas also contain other kinds of named - objects, including data types, functions, and operators. The same - object name can be used in different schemas without conflict; for - example, both schema1 and myschema may - contain tables named mytable. Unlike databases, - schemas are not rigidly separated: a user may access objects in any - of the schemas in the database he is connected to, if he has - privileges to do so. + Table access permissions are not automatically inherited. Therefore, + a user attempting to access a parent table must either have permissions + to do the operation on all its child tables as well, or must use the + ONLY notation. When adding a new child table to + an existing inheritance hierarchy, be careful to grant all the needed + permissions on it. - + - There are several reasons why one might want to use schemas: + A serious limitation of the inheritance feature is that indexes (including + unique constraints) and foreign key constraints only apply to single + tables, not to their inheritance children. This is true on both the + referencing and referenced sides of a foreign key constraint. Thus, + in the terms of the above example: - To allow many users to use one database without interfering with - each other. + If we declared cities.name to be + UNIQUE or a PRIMARY KEY, this would not stop the + capitals table from having rows with names duplicating + rows in cities. And those duplicate rows would by + default show up in queries from cities. In fact, by + default capitals would have no unique constraint at all, + and so could contain multiple rows with the same name. + You could add a unique constraint to capitals, but this + would not prevent duplication compared to cities. - To organize database objects into logical groups to make them - more manageable. + Similarly, if we were to specify that + cities.name REFERENCES some + other table, this constraint would not automatically propagate to + capitals. In this case you could work around it by + manually adding the same REFERENCES constraint to + capitals. - Third-party applications can be put into separate schemas so - they cannot collide with the names of other objects. + Specifying that another table's column REFERENCES + cities(name) would allow the other table to contain city names, but + not capital names. There is no good workaround for this case. - Schemas are analogous to directories at the operating system level, - except that schemas cannot be nested. + These deficiencies will probably be fixed in some future release, + but in the meantime considerable care is needed in deciding whether + inheritance is useful for your problem. - - Creating a Schema - - - schema - creating - - + + Deprecated - To create a schema, use the command CREATE - SCHEMA. Give the schema a name of your choice. For - example: + In previous versions of PostgreSQL, the + default behavior was not to include child tables in queries. This was + found to be error prone and is also in violation of the SQL + standard. Under the old syntax, to include the child tables you append + * to the table name. For example: -CREATE SCHEMA myschema; +SELECT * from cities*; + You can still explicitly specify scanning child tables by + appending *, as well as explicitly specify not + scanning child tables by writing ONLY. But + beginning in version 7.1, the default behavior for an undecorated + table name is to scan its child tables too, whereas before the + default was not to do so. To get the old default behavior, + disable the configuration + option. + + + + + + + Partitioning - qualified name + partitioning - name - qualified + table + partitioning - To create or access objects in a schema, write a - qualified name consisting of the schema name and - table name separated by a dot: - -schema.table - - This works anywhere a table name is expected, including the table - modification commands and the data access commands discussed in - the following chapters. - (For brevity we will speak of tables only, but the same ideas apply - to other kinds of named objects, such as types and functions.) + PostgreSQL supports basic table + partitioning. This section describes why and how you can implement + partitioning as part of your database design. + + Overview + - Actually, the even more general syntax - -database.schema.table - - can be used too, but at present this is just for pro - forma compliance with the SQL standard. If you write a database name, - it must be the same as the database you are connected to. + Partitioning refers to splitting what is logically one large table + into smaller physical pieces. + Partitioning can provide several benefits: + + + + Query performance can be improved dramatically for certain kinds + of queries. + + + + + + Update performance can be improved too, since each piece of the table + has indexes smaller than an index on the entire data set would be. + When an index no longer fits easily + in memory, both read and write operations on the index take + progressively more disk accesses. + + + + + + Bulk deletes may be accomplished by simply removing one of the + partitions, if that requirement is planned into the partitioning design. + DROP TABLE is far faster than a bulk DELETE, + to say nothing of the ensuing VACUUM overhead. + + + + + + Seldom-used data can be migrated to cheaper and slower storage media. + + + + + The benefits will normally be worthwhile only when a table would + otherwise be very large. The exact point at which a table will + benefit from partitioning depends on the application, although a + rule of thumb is that the size of the table should exceed the physical + memory of the database server. - So to create a table in the new schema, use - -CREATE TABLE myschema.mytable ( - ... -); - + Currently, PostgreSQL supports partitioning + via table inheritance. Each partition must be created as a child + table of a single parent table. The parent table itself is normally + empty; it exists just to represent the entire data set. You should be + familiar with inheritance (see ) before + attempting to implement partitioning. - - schema - removing - + + The following forms of partitioning can be implemented in + PostgreSQL: + + + + Range Partitioning + + + + The table is partitioned into ranges defined + by a key column or set of columns, with no overlap between + the ranges of values assigned to different partitions. For + example one might partition by date ranges, or by ranges of + identifiers for particular business objects. + + + + + + List Partitioning + + + + The table is partitioned by explicitly listing which key values + appear in each partition. + + + + + + Hash partitioning is not currently supported. + + + + + Implementing Partitioning + + + To set up a partitioned table, do the following: + + + + Create the master table, from which all of the + partitions will inherit. + + + This table will contain no data. Do not define any check + constraints on this table, unless you intend them to + be applied equally to all partitions. There is no point + in defining any indexes or unique constraints on it, either. + + + + + + Create several child tables that each inherit from + the master table. Normally, these tables will not add any columns + to the set inherited from the master. + + + + We will refer to the child tables as partitions, though they + are in every way normal PostgreSQL tables. + + + + + + Add table constraints to the partition tables to define the + allowed key values in each partition. + - - To drop a schema if it's empty (all objects in it have been - dropped), use + + Typical examples would be: -DROP SCHEMA myschema; +CHECK ( x = 1 ) +CHECK ( county IN ( 'Oxfordshire', 'Buckinghamshire', 'Warwickshire' )) +CHECK ( outletID >= 100 AND outletID < 200 ) - To drop a schema including all contained objects, use + Ensure that the constraints guarantee that there is no overlap + between the key values permitted in different partitions. A common + mistake is to set up range constraints like this: -DROP SCHEMA myschema CASCADE; +CHECK ( outletID BETWEEN 100 AND 200 ) +CHECK ( outletID BETWEEN 200 AND 300 ) - See for a description of the general - mechanism behind this. - + This is wrong since it is not clear which partition the key value + 200 belongs in. + - - Often you will want to create a schema owned by someone else - (since this is one of the ways to restrict the activities of your - users to well-defined namespaces). The syntax for that is: - -CREATE SCHEMA schemaname AUTHORIZATION username; - - You can even omit the schema name, in which case the schema name - will be the same as the user name. See for how this can be useful. - + + Note that there is no difference in + syntax between range and list partitioning; those terms are + descriptive only. + + - - Schema names beginning with pg_ are reserved for - system purposes and may not be created by users. - - + + + For each partition, create an index on the key column(s), + as well as any other indexes you might want. (The key index is + not strictly necessary, but in most scenarios it is helpful. + If you intend the key values to be unique then you should + always create a unique or primary-key constraint for each + partition.) + + - - The Public Schema + + + Optionally, define a rule or trigger to redirect modifications + of the master table to the appropriate partition. + + - - schema - public - + + + Ensure that the + configuration + parameter is enabled in postgresql.conf. Without + this, queries will not be optimized as desired. + + + + + + + + For example, suppose we are constructing a database for a large + ice cream company. The company measures peak temperatures every + day as well as ice cream sales in each region. Conceptually, + we want a table like this: - - In the previous sections we created tables without specifying any - schema names. By default, such tables (and other objects) are - automatically put into a schema named public. Every new - database contains such a schema. Thus, the following are equivalent: - -CREATE TABLE products ( ... ); - - and -CREATE TABLE public.products ( ... ); +CREATE TABLE measurement ( + city_id int not null, + logdate date not null, + peaktemp int, + unitsales int +); - - - - The Schema Search Path + We know that most queries will access just the last week's, month's or + quarter's data, since the main use of this table will be to prepare + online reports for management. + To reduce the amount of old data that needs to be stored, we + decide to only keep the most recent 3 years worth of data. At the + beginning of each month we will remove the oldest month's data. + - - search path - + + In this situation we can use partitioning to help us meet all of our + different requirements for the measurements table. Following the + steps outlined above, partitioning can be set up as follows: + - - unqualified name - + + + + + The master table is the measurement table, declared + exactly as above. + + - - name - unqualified - + + + Next we create one partition for each active month: - - Qualified names are tedious to write, and it's often best not to - wire a particular schema name into applications anyway. Therefore - tables are often referred to by unqualified names, - which consist of just the table name. The system determines which table - is meant by following a search path, which is a list - of schemas to look in. The first matching table in the search path - is taken to be the one wanted. If there is no match in the search - path, an error is reported, even if matching table names exist - in other schemas in the database. - + +CREATE TABLE measurement_yy04mm02 ( ) INHERITS (measurement); +CREATE TABLE measurement_yy04mm03 ( ) INHERITS (measurement); +... +CREATE TABLE measurement_yy05mm11 ( ) INHERITS (measurement); +CREATE TABLE measurement_yy05mm12 ( ) INHERITS (measurement); +CREATE TABLE measurement_yy06mm01 ( ) INHERITS (measurement); + - - schema - current - + Each of the partitions are complete tables in their own right, + but they inherit their definition from the + measurement table. + - - The first schema named in the search path is called the current schema. - Aside from being the first schema searched, it is also the schema in - which new tables will be created if the CREATE TABLE - command does not specify a schema name. - + + This solves one of our problems: deleting old data. Each + month, all we will need to do is perform a DROP + TABLE on the oldest child table and create a new + child table for the new month's data. + + - - search_path - + + + We must add non-overlapping table constraints, so that our + table creation script becomes: - - To show the current search path, use the following command: - -SHOW search_path; + +CREATE TABLE measurement_yy04mm02 ( + CHECK ( logdate >= DATE '2004-02-01' AND logdate < DATE '2004-03-01' ) +) INHERITS (measurement); +CREATE TABLE measurement_yy04mm03 ( + CHECK ( logdate >= DATE '2004-03-01' AND logdate < DATE '2004-04-01' ) +) INHERITS (measurement); +... +CREATE TABLE measurement_yy05mm11 ( + CHECK ( logdate >= DATE '2005-11-01' AND logdate < DATE '2005-12-01' ) +) INHERITS (measurement); +CREATE TABLE measurement_yy05mm12 ( + CHECK ( logdate >= DATE '2005-12-01' AND logdate < DATE '2006-01-01' ) +) INHERITS (measurement); +CREATE TABLE measurement_yy06mm01 ( + CHECK ( logdate >= DATE '2006-01-01' AND logdate < DATE '2006-02-01' ) +) INHERITS (measurement); - In the default setup this returns: - - search_path --------------- - $user,public - - The first element specifies that a schema with the same name as - the current user is to be searched. If no such schema exists, - the entry is ignored. The second element refers to the - public schema that we have seen already. - + + - - The first schema in the search path that exists is the default - location for creating new objects. That is the reason that by - default objects are created in the public schema. When objects - are referenced in any other context without schema qualification - (table modification, data modification, or query commands) the - search path is traversed until a matching object is found. - Therefore, in the default configuration, any unqualified access - again can only refer to the public schema. - + + + We probably need indexes on the key columns too: - - To put our new schema in the path, we use - -SET search_path TO myschema,public; + +CREATE INDEX measurement_yy04mm02_logdate ON measurement_yy04mm02 (logdate); +CREATE INDEX measurement_yy04mm03_logdate ON measurement_yy04mm03 (logdate); +... +CREATE INDEX measurement_yy05mm11_logdate ON measurement_yy05mm11 (logdate); +CREATE INDEX measurement_yy05mm12_logdate ON measurement_yy05mm12 (logdate); +CREATE INDEX measurement_yy06mm01_logdate ON measurement_yy06mm01 (logdate); - (We omit the $user here because we have no - immediate need for it.) And then we can access the table without - schema qualification: + + We choose not to add further indexes at this time. + + + + + + If data will be added only to the latest partition, we can + set up a very simple rule to insert data. We must + redefine this each month so that it always points to the + current partition. + -DROP TABLE mytable; +CREATE OR REPLACE RULE measurement_current_partition AS +ON INSERT TO measurement +DO INSTEAD + INSERT INTO measurement_yy06mm01 VALUES ( NEW.city_id, + NEW.logdate, + NEW.peaktemp, + NEW.unitsales ); - Also, since myschema is the first element in - the path, new objects would by default be created in it. - - - We could also have written + We might want to insert data and have the server automatically + locate the partition into which the row should be added. We + could do this with a more complex set of rules as shown below. + -SET search_path TO myschema; +CREATE RULE measurement_insert_yy04mm02 AS +ON INSERT TO measurement WHERE + ( logdate >= DATE '2004-02-01' AND logdate < DATE '2004-03-01' ) +DO INSTEAD + INSERT INTO measurement_yy04mm02 VALUES ( NEW.city_id, + NEW.logdate, + NEW.peaktemp, + NEW.unitsales ); +... +CREATE RULE measurement_insert_yy05mm12 AS +ON INSERT TO measurement WHERE + ( logdate >= DATE '2005-12-01' AND logdate < DATE '2006-01-01' ) +DO INSTEAD + INSERT INTO measurement_yy05mm12 VALUES ( NEW.city_id, + NEW.logdate, + NEW.peaktemp, + NEW.unitsales ); +CREATE RULE measurement_insert_yy06mm01 AS +ON INSERT TO measurement WHERE + ( logdate >= DATE '2006-01-01' AND logdate < DATE '2006-02-01' ) +DO INSTEAD + INSERT INTO measurement_yy06mm01 VALUES ( NEW.city_id, + NEW.logdate, + NEW.peaktemp, + NEW.unitsales ); - Then we no longer have access to the public schema without - explicit qualification. There is nothing special about the public - schema except that it exists by default. It can be dropped, too. - + + Note that the WHERE clause in each rule + exactly matches the the CHECK + constraint for its partition. + + + + + + + As we can see, a complex partitioning scheme could require a + substantial amount of DDL. In the above example we would be + creating a new partition each month, so it may be wise to write a + script that generates the required DDL automatically. + - See also for other ways to manipulate - the schema search path. + The following caveats apply: + + + + There is currently no way to verify that all of the + CHECK constraints are mutually + exclusive. Care is required by the database designer. + + + + + + There is currently no simple way to specify that rows must not be + inserted into the master table. A CHECK (false) + constraint on the master table would be inherited by all child + tables, so that cannot be used for this purpose. One possibility is + to set up an ON INSERT trigger on the master table that + always raises an error. (Alternatively, such a trigger could be + used to redirect the data into the proper child table, instead of + using a set of rules as suggested above.) + + + - The search path works in the same way for data type names, function names, - and operator names as it does for table names. Data type and function - names can be qualified in exactly the same way as table names. If you - need to write a qualified operator name in an expression, there is a - special provision: you must write - -OPERATOR(schema.operator) - - This is needed to avoid syntactic ambiguity. An example is + Partitioning can also be arranged using a UNION ALL + view: + -SELECT 3 OPERATOR(pg_catalog.+) 4; +CREATE VIEW measurement AS + SELECT * FROM measurement_yy04mm02 +UNION ALL SELECT * FROM measurement_yy04mm03 +... +UNION ALL SELECT * FROM measurement_yy05mm11 +UNION ALL SELECT * FROM measurement_yy05mm12 +UNION ALL SELECT * FROM measurement_yy06mm01; - In practice one usually relies on the search path for operators, - so as not to have to write anything so ugly as that. + + However, constraint exclusion is currently not supported for + partitioned tables defined in this manner. Also, the need to + recreate the view adds an extra step to adding and dropping + individual partitions of the dataset. - + - - Schemas and Privileges + + Partitioning and Constraint Exclusion - - privilege - for schemas + + constraint exclusion - By default, users cannot access any objects in schemas they do not - own. To allow that, the owner of the schema needs to grant the - USAGE privilege on the schema. To allow users - to make use of the objects in the schema, additional privileges - may need to be granted, as appropriate for the object. - + Constraint exclusion is a query optimization technique + that improves performance for partitioned tables defined in the + fashion described above. As an example: - - A user can also be allowed to create objects in someone else's - schema. To allow that, the CREATE privilege on - the schema needs to be granted. Note that by default, everyone - has CREATE and USAGE privileges on - the schema - public. This allows all users that are able to - connect to a given database to create objects in its - public schema. If you do - not want to allow that, you can revoke that privilege: -REVOKE CREATE ON SCHEMA public FROM PUBLIC; +SET constraint_exclusion = on; +SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; - (The first public is the schema, the second - public means every user. In the - first sense it is an identifier, in the second sense it is a - key word, hence the different capitalization; recall the - guidelines from .) + + Without constraint exclusion, the above query would scan each of + the partitions of the measurement table. With constraint + exclusion enabled, the planner will examine the constraints of each + partition and try to prove that the partition need not + be scanned because it could not contain any rows meeting the query's + WHERE clause. When the planner can prove this, it + excludes the partition from the query plan. - - - The System Catalog Schema + + You can use the EXPLAIN command to show the difference + between a plan with constraint_exclusion on and a plan + with it off. A typical default plan for this type of table setup is: - - system catalog - schema - + +SET constraint_exclusion = off; +EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; - - In addition to public and user-created schemas, each - database contains a pg_catalog schema, which contains - the system tables and all the built-in data types, functions, and - operators. pg_catalog is always effectively part of - the search path. If it is not named explicitly in the path then - it is implicitly searched before searching the path's - schemas. This ensures that built-in names will always be - findable. However, you may explicitly place - pg_catalog at the end of your search path if you - prefer to have user-defined names override built-in names. + QUERY PLAN +----------------------------------------------------------------------------------------------- + Aggregate (cost=158.66..158.68 rows=1 width=0) + -> Append (cost=0.00..151.88 rows=2715 width=0) + -> Seq Scan on measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2006-01-01'::date) + -> Seq Scan on measurement_yy04mm02 measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2006-01-01'::date) + -> Seq Scan on measurement_yy04mm03 measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2006-01-01'::date) +... + -> Seq Scan on measurement_yy05mm12 measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2006-01-01'::date) + -> Seq Scan on measurement_yy06mm01 measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2006-01-01'::date) + + + Some or all of the partitions might use index scans instead of + full-table sequential scans, but the point here is that there + is no need to scan the older partitions at all to answer this query. + When we enable constraint exclusion, we get a significantly + reduced plan that will deliver the same answer: + + +SET constraint_exclusion = on; +EXPLAIN SELECT count(*) FROM measurement WHERE logdate >= DATE '2006-01-01'; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Aggregate (cost=63.47..63.48 rows=1 width=0) + -> Append (cost=0.00..60.75 rows=1086 width=0) + -> Seq Scan on measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2006-01-01'::date) + -> Seq Scan on measurement_yy06mm01 measurement (cost=0.00..30.38 rows=543 width=0) + Filter: (logdate >= '2006-01-01'::date) + - In PostgreSQL versions before 7.3, - table names beginning with pg_ were reserved. This is - no longer true: you may create such a table name if you wish, in - any non-system schema. However, it's best to continue to avoid - such names, to ensure that you won't suffer a conflict if some - future version defines a system table named the same as your - table. (With the default search path, an unqualified reference to - your table name would be resolved as the system table instead.) - System tables will continue to follow the convention of having - names beginning with pg_, so that they will not - conflict with unqualified user-table names so long as users avoid - the pg_ prefix. + Note that constraint exclusion is driven only by CHECK + constraints, not by the presence of indexes. Therefore it isn't + necessary to define indexes on the key columns. Whether an index + needs to be created for a given partition depends on whether you + expect that queries that scan the partition will generally scan + a large part of the partition or just a small part. An index will + be helpful in the latter case but not the former. - - - - Usage Patterns - Schemas can be used to organize your data in many ways. There are - a few usage patterns that are recommended and are easily supported by - the default configuration: - - - - If you do not create any schemas then all users access the - public schema implicitly. This simulates the situation where - schemas are not available at all. This setup is mainly - recommended when there is only a single user or a few cooperating - users in a database. This setup also allows smooth transition - from the non-schema-aware world. - - - - - - You can create a schema for each user with the same name as - that user. Recall that the default search path starts with - $user, which resolves to the user name. - Therefore, if each user has a separate schema, they access their - own schemas by default. - + The following caveats apply: - - If you use this setup then you might also want to revoke access - to the public schema (or drop it altogether), so users are - truly constrained to their own schemas. - - + + + + Constraint exclusion only works when the query's WHERE + clause contains constants. A parameterized query will not be + optimized, since the planner cannot know what partitions the + parameter value might select at runtime. For the same reason, + stable functions such as CURRENT_DATE + must be avoided. Joining the partition key to a column of another + table will not be optimized, either. + + - - - To install shared applications (tables to be used by everyone, - additional functions provided by third parties, etc.), put them - into separate schemas. Remember to grant appropriate - privileges to allow the other users to access them. Users can - then refer to these additional objects by qualifying the names - with a schema name, or they can put the additional schemas into - their search path, as they choose. - - - - - + + + Avoid cross-datatype comparisons in the CHECK + constraints, as the planner will currently fail to prove such + conditions false. For example, the following constraint + will work if x is an integer + column, but not if x is a + bigint: + +CHECK ( x = 1 ) + + For a bigint column we must use a constraint like: + +CHECK ( x = 1::bigint ) + + The problem is not limited to the bigint data type + — it can occur whenever the default data type of the + constant does not match the data type of the column to which it + is being compared. Cross-datatype comparisons in the supplied + queries are usually OK, just not in the CHECK conditions. + + - - Portability + + + UPDATE and DELETE commands + against the master table do not currently perform constraint exclusion. + + - - In the SQL standard, the notion of objects in the same schema - being owned by different users does not exist. Moreover, some - implementations do not allow you to create schemas that have a - different name than their owner. In fact, the concepts of schema - and user are nearly equivalent in a database system that - implements only the basic schema support specified in the - standard. Therefore, many users consider qualified names to - really consist of - username.tablename. - This is how PostgreSQL will effectively - behave if you create a per-user schema for every user. - + + + All constraints on all partitions of the master table are considered for + constraint exclusion, so large numbers of partitions are likely to + increase query planning time considerably. + + - - Also, there is no concept of a public schema in the - SQL standard. For maximum conformance to the standard, you should - not use (perhaps even remove) the public schema. - + + + Don't forget that you still need to run ANALYZE + on each partition individually. A command like + +ANALYZE measurement; + + will only process the master table. + + - - Of course, some SQL database systems might not implement schemas - at all, or provide namespace support by allowing (possibly - limited) cross-database access. If you need to work with those - systems, then maximum portability would be achieved by not using - schemas at all. + @@ -2798,7 +2824,7 @@ DROP TABLE products CASCADE; and all the dependent objects will be removed. In this case, it doesn't remove the orders table, it only removes the foreign key - constraint. (If you want to check what DROP ... CASCADE will do, + constraint. (If you want to check what DROP ... CASCADE will do, run DROP without CASCADE and read the NOTICE messages.) -- 2.50.0