From 1b342df00af318055a1cf432c3eaa3b74347df39 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 11 Nov 2002 20:14:04 +0000 Subject: [PATCH] Merge documentation updates from 7.3 branch. --- doc/src/sgml/advanced.sgml | 46 +- doc/src/sgml/array.sgml | 24 +- doc/src/sgml/backup.sgml | 51 +- doc/src/sgml/client-auth.sgml | 29 +- doc/src/sgml/datatype.sgml | 795 ++++++++----------- doc/src/sgml/datetime.sgml | 552 ++++++------- doc/src/sgml/ddl.sgml | 16 +- doc/src/sgml/diskusage.sgml | 11 +- doc/src/sgml/dml.sgml | 19 +- doc/src/sgml/func.sgml | 1324 ++++++++++++++++--------------- doc/src/sgml/indices.sgml | 176 +--- doc/src/sgml/install-win32.sgml | 8 - doc/src/sgml/installation.sgml | 88 +- doc/src/sgml/keywords.sgml | 130 ++- doc/src/sgml/libpq.sgml | 17 +- doc/src/sgml/maintenance.sgml | 19 +- doc/src/sgml/manage-ag.sgml | 4 +- doc/src/sgml/monitoring.sgml | 374 ++++----- doc/src/sgml/mvcc.sgml | 117 ++- doc/src/sgml/perform.sgml | 238 +++--- doc/src/sgml/queries.sgml | 20 +- doc/src/sgml/query.sgml | 34 +- doc/src/sgml/runtime.sgml | 430 +++++----- doc/src/sgml/start.sgml | 10 +- doc/src/sgml/syntax.sgml | 117 +-- doc/src/sgml/typeconv.sgml | 24 +- doc/src/sgml/user-manag.sgml | 8 +- doc/src/sgml/user.sgml | 4 +- 28 files changed, 2268 insertions(+), 2417 deletions(-) diff --git a/doc/src/sgml/advanced.sgml b/doc/src/sgml/advanced.sgml index 1a39a9b8ff..cc5d8cd52d 100644 --- a/doc/src/sgml/advanced.sgml +++ b/doc/src/sgml/advanced.sgml @@ -1,5 +1,5 @@ @@ -46,14 +46,14 @@ $Header: /cvsroot/pgsql/doc/src/sgml/advanced.sgml,v 1.30 2002/10/24 17:48:54 pe view over the query, which gives a name to the query that you can refer to like an ordinary table. - + CREATE VIEW myview AS SELECT city, temp_lo, temp_hi, prcp, date, location FROM weather, cities WHERE city = name; SELECT * FROM myview; - + @@ -101,7 +101,7 @@ SELECT * FROM myview; The new declaration of the tables would look like this: - + CREATE TABLE cities ( city varchar(80) primary key, location point @@ -114,23 +114,23 @@ CREATE TABLE weather ( prcp real, date date ); - + Now try inserting an invalid record: - + INSERT INTO weather VALUES ('Berkeley', 45, 53, 0.0, '1994-11-28'); - + - + ERROR: <unnamed> referential integrity violation - key referenced from weather not found in cities - + The behavior of foreign keys can be finely tuned to your application. We will not go beyond this simple example in this - tutorial, but just refer you to the &cite-reference; + tutorial, but just refer you to the &cite-user; for more information. Making correct use of foreign keys will definitely improve the quality of your database applications, so you are strongly encouraged to learn about them. @@ -161,7 +161,7 @@ ERROR: <unnamed> referential integrity violation - key referenced from we to Bob's account. Simplifying outrageously, the SQL commands for this might look like - + UPDATE accounts SET balance = balance - 100.00 WHERE name = 'Alice'; UPDATE branches SET balance = balance - 100.00 @@ -170,7 +170,7 @@ UPDATE accounts SET balance = balance + 100.00 WHERE name = 'Bob'; UPDATE branches SET balance = balance + 100.00 WHERE name = (SELECT branch_name FROM accounts WHERE name = 'Bob'); - + @@ -222,13 +222,13 @@ UPDATE branches SET balance = balance + 100.00 BEGIN and COMMIT commands. So our banking transaction would actually look like - + BEGIN; UPDATE accounts SET balance = balance - 100.00 WHERE name = 'Alice'; -- etc etc COMMIT; - + @@ -278,7 +278,7 @@ COMMIT; implicitly when you list all cities. If you're really clever you might invent some scheme like this: - + CREATE TABLE capitals ( name text, population real, @@ -296,7 +296,7 @@ CREATE VIEW cities AS SELECT name, population, altitude FROM capitals UNION SELECT name, population, altitude FROM non_capitals; - + This works OK as far as querying goes, but it gets ugly when you need to update several rows, to name one thing. @@ -305,7 +305,7 @@ CREATE VIEW cities AS A better solution is this: - + CREATE TABLE cities ( name text, population real, @@ -315,7 +315,7 @@ CREATE TABLE cities ( CREATE TABLE capitals ( state char(2) ) INHERITS (cities); - + @@ -336,11 +336,11 @@ CREATE TABLE capitals ( including state capitals, that are located at an altitude over 500 ft.: - + SELECT name, altitude FROM cities WHERE altitude > 500; - + which returns: @@ -359,11 +359,11 @@ SELECT name, altitude all the cities that are not state capitals and are situated at an altitude of 500 ft. or higher: - + SELECT name, altitude FROM ONLY cities WHERE altitude > 500; - + name | altitude @@ -380,7 +380,7 @@ SELECT name, altitude cities table, and not tables below cities in the inheritance hierarchy. Many of the commands that we have already discussed -- - SELECT, UPDATE and + SELECT, UPDATE, and DELETE -- support this ONLY notation. diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml index dc2b08161e..b9900b4c7d 100644 --- a/doc/src/sgml/array.sgml +++ b/doc/src/sgml/array.sgml @@ -1,4 +1,4 @@ - + Arrays @@ -21,7 +21,7 @@ CREATE TABLE sal_emp ( As shown, an array data type is named by appending square brackets ([]) to the data type name of the array elements. - The above query will create a table named + The above command will create a table named sal_emp with columns including a text string (name), a one-dimensional array of type @@ -68,7 +68,7 @@ SELECT name FROM sal_emp WHERE pay_by_quarter[1] <> pay_by_quarter[2]; The array subscript numbers are written within square brackets. By default PostgreSQL uses the - one-based numbering convention for arrays, that is, + one-based numbering convention for arrays, that is, an array of n elements starts with array[1] and ends with array[n]. @@ -90,10 +90,9 @@ SELECT pay_by_quarter[3] FROM sal_emp; We can also access arbitrary rectangular slices of an array, or subarrays. An array slice is denoted by writing - lower subscript : - upper subscript for one or more - array dimensions. This query retrieves the first item on Bill's - schedule for the first two days of the week: + lower-bound:upper-bound + for one or more array dimensions. This query retrieves the first + item on Bill's schedule for the first two days of the week: SELECT schedule[1:2][1:1] FROM sal_emp WHERE name = 'Bill'; @@ -112,9 +111,10 @@ SELECT schedule[1:2][1] FROM sal_emp WHERE name = 'Bill'; with the same result. An array subscripting operation is taken to represent an array slice if any of the subscripts are written in the - form lower : - upper. A lower bound of 1 is assumed for - any subscript where only one value is specified. + form + lower:upper. + A lower bound of 1 is assumed for any subscript where only one value + is specified. @@ -310,7 +310,7 @@ SELECT * FROM sal_emp WHERE pay_by_quarter **= 10000; - Remember that what you write in an SQL query will first be interpreted + Remember that what you write in an SQL command will first be interpreted as a string literal, and then as an array. This doubles the number of backslashes you need. For example, to insert a text array value containing a backslash and a double quote, you'd need to write @@ -323,7 +323,7 @@ INSERT ... VALUES ('{"\\\\","\\""}'); become \ and " respectively. (If we were working with a data type whose input routine also treated backslashes specially, bytea for example, we might need as many as eight backslashes - in the query to get one backslash into the stored array element.) + in the command to get one backslash into the stored array element.) diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml index 8c3e8246aa..327990e558 100644 --- a/doc/src/sgml/backup.sgml +++ b/doc/src/sgml/backup.sgml @@ -1,5 +1,5 @@ Backup and Restore @@ -64,7 +64,7 @@ pg_dump dbname > As any other PostgreSQL client application, pg_dump will by default connect with the database - user name that is equal to the current Unix user name. To override + user name that is equal to the current operating system user name. To override this, either specify the option or set the environment variable PGUSER. Remember that pg_dump connections are subject to the normal @@ -104,9 +104,9 @@ psql dbname < where infile is what you used as outfile - for the pg_dump command. The database pg_dump command. The database dbname will not be created by this - command, you must create it yourself from template0 before executing + command, you must create it yourself from template0 before executing psql (e.g., with createdb -T template0 dbname). psql supports similar options to pg_dump @@ -129,23 +129,22 @@ psql dbname < pg_dump and psql to write to or read from pipes makes it possible to dump a database directly from one server to another, for example - pg_dump -h host1 dbname | psql -h host2 dbname - - - - - The dumps produced by pg_dump are relative to template0. This means - that any languages, procedures, etc. added to template1 will also be - dumped by pg_dump. As a result, when restoring, if - you are using a customized template1, you must create the empty - database from template0, as in the example above. - - + + + The dumps produced by pg_dump are relative to + template0. This means that any languages, procedures, + etc. added to template1 will also be dumped by + pg_dump. As a result, when restoring, if you are + using a customized template1, you must create the + empty database from template0, as in the example + above. + + @@ -222,20 +221,16 @@ cat filename.gz | gunzip | psql pg_dump dbname | split -b 1m - filename - Reload with - createdb dbname cat filename* | psql dbname - @@ -249,14 +244,11 @@ cat filename* | psql pg_dump -Fc dbname > filename - See the pg_dump and pg_restore reference pages for details. - @@ -284,7 +276,7 @@ pg_dump -Fc dbname > For reasons of backward compatibility, pg_dump does not dump large objects by default. To dump large objects you must use - either the custom or the TAR output format, and use the -b option in + either the custom or the TAR output format, and use the @@ -9,24 +9,23 @@ $Header: /cvsroot/pgsql/doc/src/sgml/mvcc.sgml,v 2.28 2002/09/21 18:32:53 petere concurrency - - - Multiversion Concurrency Control - (MVCC) - is an advanced technique for improving database performance in a - multiuser environment. - Vadim Mikheev (vadim@krs.ru) provided - the implementation for PostgreSQL. - - + + This chapter describes the behavior of the PostgreSQL database + system when two or more sessions try to access the same data at the + same time. The goals in that situation are to allow efficient + access for all sessions while maintaining strict data integrity. + Every developer of database applications should be familiar with + the topics covered in this chapter. + Introduction - Unlike most other database systems which use locks for concurrency control, + Unlike traditional database systems which use locks for concurrency control, PostgreSQL - maintains data consistency by using a multiversion model. + maintains data consistency by using a multiversion model + (Multiversion Concurrency Control, MVCC). This means that while querying a database each transaction sees a snapshot of data (a database version) as it was some @@ -56,7 +55,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/mvcc.sgml,v 2.28 2002/09/21 18:32:53 petere Transaction Isolation - The ANSI/ISO SQL + The SQL standard defines four levels of transaction isolation in terms of three phenomena that must be prevented between concurrent transactions. @@ -65,8 +64,8 @@ $Header: /cvsroot/pgsql/doc/src/sgml/mvcc.sgml,v 2.28 2002/09/21 18:32:53 petere - dirty reads - dirty reads + dirty read + dirty read @@ -77,8 +76,8 @@ $Header: /cvsroot/pgsql/doc/src/sgml/mvcc.sgml,v 2.28 2002/09/21 18:32:53 petere - non-repeatable reads - non-repeatable reads + nonrepeatable read + nonrepeatable read @@ -92,7 +91,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/mvcc.sgml,v 2.28 2002/09/21 18:32:53 petere phantom read - phantom reads + phantom read @@ -111,6 +110,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/mvcc.sgml,v 2.28 2002/09/21 18:32:53 petere The four transaction isolation levels and the corresponding behaviors are described in . + <acronym>SQL</acronym> Transaction Isolation Levels @@ -125,7 +125,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/mvcc.sgml,v 2.28 2002/09/21 18:32:53 petere Dirty Read - Non-Repeatable Read + Nonrepeatable Read Phantom Read @@ -195,15 +195,13 @@ $Header: /cvsroot/pgsql/doc/src/sgml/mvcc.sgml,v 2.28 2002/09/21 18:32:53 petere
-
PostgreSQL offers the read committed and serializable isolation levels. -
- + Read Committed Isolation Level @@ -229,7 +227,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/mvcc.sgml,v 2.28 2002/09/21 18:32:53 petere - UPDATE, DELETE and SELECT + UPDATE, DELETE, and SELECT FOR UPDATE commands behave the same as SELECT in terms of searching for target rows: they will only find target rows that were committed as of the query start time. However, such a target @@ -287,9 +285,9 @@ COMMIT; be necessary to guarantee a more rigorously consistent view of the database than the Read Committed mode provides. - + - + Serializable Isolation Level @@ -316,13 +314,13 @@ COMMIT; committed.) This is different from Read Committed in that the SELECT sees a snapshot as of the start of the transaction, not as of the start - of the current query within the transaction. Successive + of the current query within the transaction. Thus, successive SELECTs within a single transaction always see the same data. - UPDATE, DELETE and SELECT + UPDATE, DELETE, and SELECT FOR UPDATE commands behave the same as SELECT in terms of searching for target rows: they will only find target rows that were committed as of the transaction start time. However, such a @@ -370,7 +368,8 @@ ERROR: Can't serialize access due to concurrent update a transaction performs several successive queries that must see identical views of the database. - + + Explicit Locking @@ -421,8 +420,7 @@ ERROR: Can't serialize access due to concurrent update To examine a list of the currently outstanding locks in a database server, use the pg_locks system view. For more information on monitoring the status of the lock - manager subsystem, refer to the Administrator's - Guide. + manager subsystem, refer to the &cite-admin;. @@ -647,14 +645,14 @@ ERROR: Can't serialize access due to concurrent update Use of explicit locking can cause deadlocks, wherein two (or more) transactions each hold locks that the other wants. - For example, if transaction 1 acquires exclusive lock on table A - and then tries to acquire exclusive lock on table B, while transaction - 2 has already exclusive-locked table B and now wants exclusive lock + For example, if transaction 1 acquires an exclusive lock on table A + and then tries to acquire an exclusive lock on table B, while transaction + 2 has already exclusive-locked table B and now wants an exclusive lock on table A, then neither one can proceed. PostgreSQL automatically detects deadlock situations and resolves them by aborting one of the transactions involved, allowing the other(s) to complete. (Exactly which transaction - will be aborted is difficult to predict, and should not be relied on.) + will be aborted is difficult to predict and should not be relied on.) @@ -678,7 +676,7 @@ ERROR: Can't serialize access due to concurrent update - Data consistency checks at the application level + Data Consistency Checks at the Application Level Because readers in PostgreSQL @@ -718,11 +716,10 @@ ERROR: Can't serialize access due to concurrent update - Before version 6.5 PostgreSQL - used read-locks and so the - above consideration is also the case - when upgrading to 6.5 (or higher) from previous - PostgreSQL versions. + Before version 6.5 PostgreSQL used + read locks, and so the above consideration is also the case when + upgrading from PostgreSQL versions + prior to 6.5. @@ -732,7 +729,7 @@ ERROR: Can't serialize access due to concurrent update example, a banking application might wish to check that the sum of all credits in one table equals the sum of debits in another table, when both tables are being actively updated. Comparing the results of two - successive SELECT SUM(...) commands will not work reliably under + successive SELECT SUM(...) commands will not work reliably under Read Committed mode, since the second query will likely include the results of transactions not counted by the first. Doing the two sums in a single serializable transaction will give an accurate picture of the @@ -758,7 +755,8 @@ ERROR: Can't serialize access due to concurrent update the table are still running --- but if the snapshot seen by the transaction predates obtaining the lock, it may predate some now-committed changes in the table. A serializable transaction's snapshot is actually - frozen at the start of its first query (SELECT/INSERT/UPDATE/DELETE), so + frozen at the start of its first query (SELECT, INSERT, + UPDATE, or DELETE), so it's possible to obtain explicit locks before the snapshot is frozen. @@ -781,47 +779,40 @@ ERROR: Can't serialize access due to concurrent update - GiST and R-Tree indexes + B-tree indexes - Share/exclusive index-level locks are used for read/write access. - Locks are released after statement is done. + Short-term share/exclusive page-level locks are used for + read/write access. Locks are released immediately after each + index tuple is fetched or inserted. B-tree indexes provide + the highest concurrency without deadlock conditions. - Hash indexes + GiST and R-tree indexes - Share/exclusive page-level locks are used for read/write access. - Locks are released after page is processed. - - - - Page-level locks provide better concurrency than index-level ones - but are subject to deadlocks. + Share/exclusive index-level locks are used for read/write access. + Locks are released after the statement (command) is done. - B-tree indexes + Hash indexes - Short-term share/exclusive page-level locks are used for - read/write access. Locks are released immediately after each index - tuple is fetched/inserted. - - - - B-tree indexes provide the highest concurrency without deadlock - conditions. + Share/exclusive page-level locks are used for read/write + access. Locks are released after the page is processed. + Page-level locks provide better concurrency than index-level + ones but are liable to deadlocks. diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index 05727523f0..fb2dd96b03 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -1,5 +1,5 @@ @@ -32,30 +32,30 @@ $Header: /cvsroot/pgsql/doc/src/sgml/perform.sgml,v 1.21 2002/09/21 18:32:53 pet - Estimated start-up cost (time expended before output scan can start, - e.g., time to do the sorting in a SORT node). + Estimated start-up cost (Time expended before output scan can start, + e.g., time to do the sorting in a sort node.) - Estimated total cost (if all tuples are retrieved, which they may not - be --- a query with a LIMIT will stop short of paying the total cost, - for example). + Estimated total cost (If all rows are retrieved, which they may not + be --- a query with a LIMIT clause will stop short of paying the total cost, + for example.) - Estimated number of rows output by this plan node (again, only if - executed to completion). + Estimated number of rows output by this plan node (Again, only if + executed to completion.) Estimated average width (in bytes) of rows output by this plan - node. + node @@ -64,9 +64,9 @@ $Header: /cvsroot/pgsql/doc/src/sgml/perform.sgml,v 1.21 2002/09/21 18:32:53 pet The costs are measured in units of disk page fetches. (CPU effort estimates are converted into disk-page units using some - fairly arbitrary fudge-factors. If you want to experiment with these + fairly arbitrary fudge factors. If you want to experiment with these factors, see the list of run-time configuration parameters in the - Administrator's Guide.) + &cite-admin;.) @@ -74,17 +74,17 @@ $Header: /cvsroot/pgsql/doc/src/sgml/perform.sgml,v 1.21 2002/09/21 18:32:53 pet the cost of all its child nodes. It's also important to realize that the cost only reflects things that the planner/optimizer cares about. In particular, the cost does not consider the time spent transmitting - result tuples to the frontend --- which could be a pretty dominant + result rows to the frontend --- which could be a pretty dominant factor in the true elapsed time, but the planner ignores it because it cannot change it by altering the plan. (Every correct plan will - output the same tuple set, we trust.) + output the same row set, we trust.) Rows output is a little tricky because it is not the number of rows processed/scanned by the query --- it is usually less, reflecting the - estimated selectivity of any WHERE-clause constraints that are being + estimated selectivity of any WHERE-clause constraints that are being applied at this node. Ideally the top-level rows estimate will approximate the number of rows actually returned, updated, or deleted by the query. @@ -92,44 +92,44 @@ $Header: /cvsroot/pgsql/doc/src/sgml/perform.sgml,v 1.21 2002/09/21 18:32:53 pet Here are some examples (using the regress test database after a - vacuum analyze, and 7.3 development sources): + VACUUM ANALYZE, and 7.3 development sources): - + regression=# EXPLAIN SELECT * FROM tenk1; QUERY PLAN ------------------------------------------------------------- Seq Scan on tenk1 (cost=0.00..333.00 rows=10000 width=148) - + This is about as straightforward as it gets. If you do - + SELECT * FROM pg_class WHERE relname = 'tenk1'; - + you will find out that tenk1 has 233 disk - pages and 10000 tuples. So the cost is estimated at 233 page - reads, defined as 1.0 apiece, plus 10000 * cpu_tuple_cost which is - currently 0.01 (try show cpu_tuple_cost). + pages and 10000 rows. So the cost is estimated at 233 page + reads, defined as costing 1.0 apiece, plus 10000 * cpu_tuple_cost which is + currently 0.01 (try SHOW cpu_tuple_cost). - Now let's modify the query to add a WHERE condition: + Now let's modify the query to add a WHERE condition: - + regression=# EXPLAIN SELECT * FROM tenk1 WHERE unique1 < 1000; QUERY PLAN ------------------------------------------------------------ Seq Scan on tenk1 (cost=0.00..358.00 rows=1033 width=148) Filter: (unique1 < 1000) - + - The estimate of output rows has gone down because of the WHERE clause. + The estimate of output rows has gone down because of the WHERE clause. However, the scan will still have to visit all 10000 rows, so the cost hasn't decreased; in fact it has gone up a bit to reflect the extra CPU - time spent checking the WHERE condition. + time spent checking the WHERE condition. @@ -144,26 +144,26 @@ regression=# EXPLAIN SELECT * FROM tenk1 WHERE unique1 < 1000; Modify the query to restrict the condition even more: - + regression=# EXPLAIN SELECT * FROM tenk1 WHERE unique1 < 50; QUERY PLAN ------------------------------------------------------------------------------- Index Scan using tenk1_unique1 on tenk1 (cost=0.00..179.33 rows=49 width=148) Index Cond: (unique1 < 50) - + - and you will see that if we make the WHERE condition selective + and you will see that if we make the WHERE condition selective enough, the planner will eventually decide that an index scan is cheaper than a sequential scan. - This plan will only have to visit 50 tuples because of the index, + This plan will only have to visit 50 rows because of the index, so it wins despite the fact that each individual fetch is more expensive than reading a whole disk page sequentially. - Add another clause to the WHERE condition: + Add another clause to the WHERE condition: - + regression=# EXPLAIN SELECT * FROM tenk1 WHERE unique1 < 50 AND regression-# stringu1 = 'xxx'; QUERY PLAN @@ -171,11 +171,11 @@ regression-# stringu1 = 'xxx'; Index Scan using tenk1_unique1 on tenk1 (cost=0.00..179.45 rows=1 width=148) Index Cond: (unique1 < 50) Filter: (stringu1 = 'xxx'::name) - + The added clause stringu1 = 'xxx' reduces the output-rows estimate, but not the cost because we still have to visit the - same set of tuples. Notice that the stringu1 clause + same set of rows. Notice that the stringu1 clause cannot be applied as an index condition (since this index is only on the unique1 column). Instead it is applied as a filter on the rows retrieved by the index. Thus the cost has actually gone up @@ -185,7 +185,7 @@ regression-# stringu1 = 'xxx'; Let's try joining two tables, using the fields we have been discussing: - + regression=# EXPLAIN SELECT * FROM tenk1 t1, tenk2 t2 WHERE t1.unique1 < 50 regression-# AND t1.unique2 = t2.unique2; QUERY PLAN @@ -197,30 +197,30 @@ regression-# AND t1.unique2 = t2.unique2; -> Index Scan using tenk2_unique2 on tenk2 t2 (cost=0.00..3.01 rows=1 width=148) Index Cond: ("outer".unique2 = t2.unique2) - + In this nested-loop join, the outer scan is the same index scan we had in the example before last, and so its cost and row count are the same - because we are applying the unique1 < 50 WHERE clause at that node. + because we are applying the unique1 < 50 WHERE clause at that node. The t1.unique2 = t2.unique2 clause is not relevant yet, so it doesn't - affect row count of the outer scan. For the inner scan, the unique2 value of the + affect row count of the outer scan. For the inner scan, the unique2 value of the current - outer-scan tuple is plugged into the inner index scan + outer-scan row is plugged into the inner index scan to produce an index condition like t2.unique2 = constant. So we get the - same inner-scan plan and costs that we'd get from, say, explain select - * from tenk2 where unique2 = 42. The costs of the loop node are then set + same inner-scan plan and costs that we'd get from, say, EXPLAIN SELECT + * FROM tenk2 WHERE unique2 = 42. The costs of the loop node are then set on the basis of the cost of the outer scan, plus one repetition of the - inner scan for each outer tuple (49 * 3.01, here), plus a little CPU + inner scan for each outer row (49 * 3.01, here), plus a little CPU time for join processing. In this example the loop's output row count is the same as the product of the two scans' row counts, but that's not true in general, because - in general you can have WHERE clauses that mention both relations and + in general you can have WHERE clauses that mention both relations and so can only be applied at the join point, not to either input scan. For example, if we added WHERE ... AND t1.hundred < t2.hundred, that would decrease the output row count of the join node, but not change @@ -233,9 +233,9 @@ regression-# AND t1.unique2 = t2.unique2; flags for each plan type. (This is a crude tool, but useful. See also .) - -regression=# set enable_nestloop = off; -SET VARIABLE + +regression=# SET enable_nestloop = off; +SET regression=# EXPLAIN SELECT * FROM tenk1 t1, tenk2 t2 WHERE t1.unique1 < 50 regression-# AND t1.unique2 = t2.unique2; QUERY PLAN @@ -247,25 +247,25 @@ regression-# AND t1.unique2 = t2.unique2; -> Index Scan using tenk1_unique1 on tenk1 t1 (cost=0.00..179.33 rows=49 width=148) Index Cond: (unique1 < 50) - + This plan proposes to extract the 50 interesting rows of tenk1 using ye same olde index scan, stash them into an in-memory hash table, and then do a sequential scan of tenk2, probing into the hash table - for possible matches of t1.unique2 = t2.unique2 at each tenk2 tuple. + for possible matches of t1.unique2 = t2.unique2 at each tenk2 row. The cost to read tenk1 and set up the hash table is entirely start-up - cost for the hash join, since we won't get any tuples out until we can + cost for the hash join, since we won't get any rows out until we can start reading tenk2. The total time estimate for the join also - includes a hefty charge for CPU time to probe the hash table - 10000 times. Note, however, that we are NOT charging 10000 times 179.33; + includes a hefty charge for the CPU time to probe the hash table + 10000 times. Note, however, that we are not charging 10000 times 179.33; the hash table setup is only done once in this plan type. It is possible to check on the accuracy of the planner's estimated costs - by using EXPLAIN ANALYZE. This command actually executes the query, + by using EXPLAIN ANALYZE. This command actually executes the query, and then displays the true run time accumulated within each plan node - along with the same estimated costs that a plain EXPLAIN shows. + along with the same estimated costs that a plain EXPLAIN shows. For example, we might get a result like this: @@ -296,7 +296,7 @@ regression-# WHERE t1.unique1 < 50 AND t1.unique2 = t2.unique2; In some query plans, it is possible for a subplan node to be executed more than once. For example, the inner index scan is executed once per outer - tuple in the above nested-loop plan. In such cases, the + row in the above nested-loop plan. In such cases, the loops value reports the total number of executions of the node, and the actual time and rows values shown are averages per-execution. This is done to make the numbers @@ -307,19 +307,19 @@ regression-# WHERE t1.unique1 < 50 AND t1.unique2 = t2.unique2; The Total runtime shown by EXPLAIN ANALYZE includes - executor start-up and shutdown time, as well as time spent processing - the result tuples. It does not include parsing, rewriting, or planning - time. For a SELECT query, the total run time will normally be just a + executor start-up and shut-down time, as well as time spent processing + the result rows. It does not include parsing, rewriting, or planning + time. For a SELECT query, the total run time will normally be just a little larger than the total time reported for the top-level plan node. - For INSERT, UPDATE, and DELETE queries, the total run time may be + For INSERT, UPDATE, and DELETE commands, the total run time may be considerably larger, because it includes the time spent processing the - result tuples. In these queries, the time for the top plan node - essentially is the time spent computing the new tuples and/or locating + result rows. In these commands, the time for the top plan node + essentially is the time spent computing the new rows and/or locating the old ones, but it doesn't include the time spent making the changes. - It is worth noting that EXPLAIN results should not be extrapolated + It is worth noting that EXPLAIN results should not be extrapolated to situations other than the one you are actually testing; for example, results on a toy-sized table can't be assumed to apply to large tables. The planner's cost estimates are not linear and so it may well choose @@ -333,7 +333,7 @@ regression-# WHERE t1.unique1 < 50 AND t1.unique2 = t2.unique2; - Statistics used by the Planner + Statistics Used by the Planner As we saw in the previous section, the query planner needs to estimate @@ -351,8 +351,8 @@ regression-# WHERE t1.unique1 < 50 AND t1.unique2 = t2.unique2; with queries similar to this one: -regression=# select relname, relkind, reltuples, relpages from pg_class -regression-# where relname like 'tenk1%'; +regression=# SELECT relname, relkind, reltuples, relpages FROM pg_class +regression-# WHERE relname LIKE 'tenk1%'; relname | relkind | reltuples | relpages ---------------+---------+-----------+---------- tenk1 | r | 10000 | 233 @@ -382,10 +382,10 @@ regression-# where relname like 'tenk1%'; Most queries retrieve only a fraction of the rows in a table, due - to having WHERE clauses that restrict the rows to be examined. + to having WHERE clauses that restrict the rows to be examined. The planner thus needs to make an estimate of the - selectivity of WHERE clauses, that is, the fraction of - rows that match each clause of the WHERE condition. The information + selectivity of WHERE clauses, that is, the fraction of + rows that match each clause of the WHERE condition. The information used for this task is stored in the pg_statistic system catalog. Entries in pg_statistic are updated by ANALYZE and VACUUM ANALYZE commands, @@ -406,7 +406,7 @@ regression-# where relname like 'tenk1%'; For example, we might do: -regression=# select attname, n_distinct, most_common_vals from pg_stats where tablename = 'road'; +regression=# SELECT attname, n_distinct, most_common_vals FROM pg_stats WHERE tablename = 'road'; attname | n_distinct | most_common_vals ---------+------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- name | -0.467008 | {"I- 580 Ramp","I- 880 Ramp","Sp Railroad ","I- 580 ","I- 680 Ramp","I- 80 Ramp","14th St ","5th St ","Mission Blvd","I- 880 "} @@ -414,12 +414,14 @@ regression=# select attname, n_distinct, most_common_vals from pg_stats where ta (2 rows) regression=# + - As of PostgreSQL 7.2 the following columns exist - in pg_stats: + + shows the columns that + exist in pg_stats. - +
<structname>pg_stats</structname> Columns @@ -435,7 +437,7 @@ regression=# tablename name - Name of table containing column + Name of the table containing the column @@ -447,13 +449,13 @@ regression=# null_frac real - Fraction of column's entries that are NULL + Fraction of column's entries that are null avg_width integer - Average width in bytes of column's entries + Average width in bytes of the column's entries @@ -462,7 +464,7 @@ regression=# If greater than zero, the estimated number of distinct values in the column. If less than zero, the negative of the number of distinct values divided by the number of rows. (The negated form - is used when ANALYZE believes that the number of distinct values + is used when ANALYZE believes that the number of distinct values is likely to increase as the table grows; the positive form is used when the column seems to have a fixed number of possible values.) For example, -1 indicates a unique column in which the number of @@ -481,7 +483,7 @@ regression=# most_common_freqs real[] A list of the frequencies of the most common values, - ie, number of occurrences of each divided by total number of rows. + i.e., number of occurrences of each divided by total number of rows. @@ -530,30 +532,32 @@ regression=# Controlling the Planner with Explicit <literal>JOIN</> Clauses - Beginning with PostgreSQL 7.1 it is possible - to control the query planner to some extent by using explicit JOIN + Beginning with PostgreSQL 7.1 it has been possible + to control the query planner to some extent by using the explicit JOIN syntax. To see why this matters, we first need some background. In a simple join query, such as - -SELECT * FROM a,b,c WHERE a.id = b.id AND b.ref = c.id; - - the planner is free to join the given tables in any order. For example, - it could generate a query plan that joins A to B, using the WHERE clause - a.id = b.id, and then joins C to this joined table, using the other - WHERE clause. Or it could join B to C and then join A to that result. - Or it could join A to C and then join them with B --- but that would - be inefficient, since the full Cartesian product of A and C would have - to be formed, there being no applicable WHERE clause to allow optimization - of the join. - (All joins in the PostgreSQL executor happen - between two input tables, so it's necessary to build up the result in one - or another of these fashions.) The important point is that these different - join possibilities give semantically equivalent results but may have hugely - different execution costs. Therefore, the planner will explore all of them - to try to find the most efficient query plan. + +SELECT * FROM a, b, c WHERE a.id = b.id AND b.ref = c.id; + + the planner is free to join the given tables in any order. For + example, it could generate a query plan that joins A to B, using + the WHERE condition a.id = b.id, and then + joins C to this joined table, using the other WHERE + condition. Or it could join B to C and then join A to that result. + Or it could join A to C and then join them with B --- but that + would be inefficient, since the full Cartesian product of A and C + would have to be formed, there being no applicable condition in the + WHERE clause to allow optimization of the join. (All + joins in the PostgreSQL executor happen + between two input tables, so it's necessary to build up the result + in one or another of these fashions.) The important point is that + these different join possibilities give semantically equivalent + results but may have hugely different execution costs. Therefore, + the planner will explore all of them to try to find the most + efficient query plan. @@ -567,7 +571,7 @@ SELECT * FROM a,b,c WHERE a.id = b.id AND b.ref = c.id; search to a genetic probabilistic search through a limited number of possibilities. (The switch-over threshold is set by the GEQO_THRESHOLD run-time - parameter described in the Administrator's Guide.) + parameter described in the &cite-admin;.) The genetic search takes less time, but it won't necessarily find the best possible plan. @@ -575,9 +579,9 @@ SELECT * FROM a,b,c WHERE a.id = b.id AND b.ref = c.id; When the query involves outer joins, the planner has much less freedom than it does for plain (inner) joins. For example, consider - + SELECT * FROM a LEFT JOIN (b JOIN c ON (b.ref = c.id)) ON (a.id = b.id); - + Although this query's restrictions are superficially similar to the previous example, the semantics are different because a row must be emitted for each row of A that has no matching row in the join of B and C. @@ -587,27 +591,27 @@ SELECT * FROM a LEFT JOIN (b JOIN c ON (b.ref = c.id)) ON (a.id = b.id); - In PostgreSQL 7.1, the planner treats all - explicit JOIN syntaxes as constraining the join order, even though + The PostgreSQL query planner treats all + explicit JOIN syntaxes as constraining the join order, even though it is not logically necessary to make such a constraint for inner joins. Therefore, although all of these queries give the same result: - -SELECT * FROM a,b,c WHERE a.id = b.id AND b.ref = c.id; + +SELECT * FROM a, b, c WHERE a.id = b.id AND b.ref = c.id; SELECT * FROM a CROSS JOIN b CROSS JOIN c WHERE a.id = b.id AND b.ref = c.id; SELECT * FROM a JOIN (b JOIN c ON (b.ref = c.id)) ON (a.id = b.id); - - the second and third take less time to plan than the first. This effect + + but the second and third take less time to plan than the first. This effect is not worth worrying about for only three tables, but it can be a lifesaver with many tables. You do not need to constrain the join order completely in order to - cut search time, because it's OK to use JOIN operators in a plain - FROM list. For example, - + cut search time, because it's OK to use JOIN operators in a plain + FROM list. For example, + SELECT * FROM a CROSS JOIN b, c, d, e WHERE ...; - + forces the planner to join A to B before joining them to other tables, but doesn't constrain its choices otherwise. In this example, the number of possible join orders is reduced by a factor of 5. @@ -617,22 +621,22 @@ SELECT * FROM a CROSS JOIN b, c, d, e WHERE ...; If you have a mix of outer and inner joins in a complex query, you might not want to constrain the planner's search for a good ordering of inner joins inside an outer join. You can't do that directly in the - JOIN syntax, but you can get around the syntactic limitation by using + JOIN syntax, but you can get around the syntactic limitation by using subselects. For example, - + SELECT * FROM d LEFT JOIN (SELECT * FROM a, b, c WHERE ...) AS ss ON (...); - + Here, joining D must be the last step in the query plan, but the - planner is free to consider various join orders for A,B,C. + planner is free to consider various join orders for A, B, C. Constraining the planner's search in this way is a useful technique both for reducing planning time and for directing the planner to a good query plan. If the planner chooses a bad join order by default, - you can force it to choose a better order via JOIN syntax --- assuming + you can force it to choose a better order via JOIN syntax --- assuming that you know of a better order, that is. Experimentation is recommended. @@ -658,6 +662,10 @@ SELECT * FROM d LEFT JOIN If you allow each insertion to be committed separately, PostgreSQL is doing a lot of work for each record added. + An additional benefit of doing all insertions in one transaction + is that if the insertion of one record were to fail then the + insertion of all records inserted up to that point would be rolled + back, so you won't be stuck with partially loaded data. @@ -696,7 +704,7 @@ SELECT * FROM d LEFT JOIN - ANALYZE Afterwards + Run ANALYZE Afterwards It's a good idea to run ANALYZE or VACUUM diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml index 12f043b95f..b4ec30773d 100644 --- a/doc/src/sgml/queries.sgml +++ b/doc/src/sgml/queries.sgml @@ -1,4 +1,4 @@ - + Queries @@ -668,7 +668,7 @@ SELECT select_list order in which the columns are listed does not matter. The purpose is to reduce each group of rows sharing common values into one group row that is representative of all rows in the group. - This is done to eliminate redundancy in the output and/or obtain + This is done to eliminate redundancy in the output and/or compute aggregates that apply to these groups. For instance: => SELECT * FROM test1; @@ -694,7 +694,12 @@ SELECT select_list In the second query, we could not have written SELECT * FROM test1 GROUP BY x, because there is no single value for the column y that could be associated with each - group. In general, if a table is grouped, columns that are not + group. The grouped-by columns can be referenced in the select list since + they have a known constant value per group. + + + + In general, if a table is grouped, columns that are not used in the grouping cannot be referenced except in aggregate expressions. An example with aggregate expressions is: @@ -712,11 +717,6 @@ SELECT select_list linkend="functions-aggregate">. - - The grouped-by columns can be referenced in the select list since - they have a known constant value per group. - - Grouping without aggregate expressions effectively calculates the @@ -740,7 +740,7 @@ SELECT product_id, p.name, (sum(s.units) * p.price) AS sales in the GROUP BY clause since they are referenced in the query select list. (Depending on how exactly the products table is set up, name and price may be fully dependent on the - product ID, so the additional groups could theoretically be + product ID, so the additional groupings could theoretically be unnecessary, but this is not implemented yet.) The column s.units does not have to be in the GROUP BY list since it is only used in an aggregate expression @@ -828,7 +828,7 @@ SELECT product_id, p.name, (sum(s.units) * (p.price - p.cost)) AS profit - Select List Items + Select-List Items The simplest kind of select list is * which diff --git a/doc/src/sgml/query.sgml b/doc/src/sgml/query.sgml index 1ec9fba722..4eed42be30 100644 --- a/doc/src/sgml/query.sgml +++ b/doc/src/sgml/query.sgml @@ -1,5 +1,5 @@ @@ -13,7 +13,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/query.sgml,v 1.27 2002/10/24 17:48:54 peter SQL to perform simple operations. This tutorial is only intended to give you an introduction and is in no way a complete tutorial on SQL. Numerous books - have been written on SQL92, including SQL, including and . You should be aware that some PostgreSQL language features are extensions to the standard. @@ -44,7 +44,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/query.sgml,v 1.27 2002/10/24 17:48:54 peter The \i command reads in commands from the specified file. The -s option puts you in - single step mode which pauses before sending each query to the + single step mode which pauses before sending each statement to the server. The commands used in this section are in the file basics.sql. @@ -502,7 +502,7 @@ SELECT * join operator will have each of its rows in the output at least once, whereas the table on the right will only have those rows output that match some row of the left table. When outputting a - left-table row for which there is no right-table match, empty (NULL) + left-table row for which there is no right-table match, empty (null) values are substituted for the right-table columns. @@ -601,7 +601,7 @@ SELECT max(temp_lo) FROM weather; subquery - If we want to know what city (or cities) that reading occurred in, + If we wanted to know what city (or cities) that reading occurred in, we might try @@ -615,7 +615,7 @@ SELECT city FROM weather WHERE temp_lo = max(temp_lo); WRONG go into the aggregation stage; so it has to be evaluated before aggregate functions are computed.) However, as is often the case - the query can be restated to accomplish the intended result; here + the query can be restated to accomplish the intended result, here by using a subquery: @@ -630,9 +630,9 @@ SELECT city FROM weather (1 row) - This is OK because the sub-select is an independent computation + This is OK because the subquery is an independent computation that computes its own aggregate separately from what is happening - in the outer select. + in the outer query. @@ -684,10 +684,18 @@ SELECT city, max(temp_lo) SELECT city, max(temp_lo) FROM weather - WHERE city LIKE 'S%' + WHERE city LIKE 'S%' GROUP BY city HAVING max(temp_lo) < 40; + + + + The LIKE operator does pattern matching and + is explained in the &cite-user;. + + + @@ -729,7 +737,7 @@ SELECT city, max(temp_lo) You can update existing rows using the UPDATE command. Suppose you discover the temperature readings are - all off by 2 degrees as of November 28, you may update the + all off by 2 degrees as of November 28. You may update the data as follows: @@ -762,8 +770,8 @@ SELECT * FROM weather; - Suppose you are no longer interested in the weather of Hayward, - then you can do the following to delete those rows from the table. + Suppose you are no longer interested in the weather of Hayward. + Then you can do the following to delete those rows from the table. Deletions are performed using the DELETE command: @@ -786,7 +794,7 @@ SELECT * FROM weather; - One should be wary of queries of the form + One should be wary of statements of the form DELETE FROM tablename; diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index c5718040fc..0f8ca84094 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1,5 +1,5 @@ @@ -11,7 +11,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.149 2002/11/08 17:37:52 tg - The <productname>PostgreSQL</productname> user account + The <productname>PostgreSQL</productname> User Account postgres user @@ -37,7 +37,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.149 2002/11/08 17:37:52 tg - Creating a database cluster + Creating a Database Cluster database cluster @@ -152,7 +152,7 @@ set to "C". For more information see the Administrator's Guide. This is intended to warn you that the currently selected locale will cause indexes to be sorted in an order that prevents them from - being used for LIKE and regular-expression searches. If you need + being used for LIKE and regular-expression searches. If you need good performance in such searches, you should set your current locale to C and re-run initdb, e.g., by running initdb --lc-collate=C. The sort @@ -165,7 +165,7 @@ set to "C". For more information see the Administrator's Guide. - Starting the database server + Starting the Database Server @@ -229,7 +229,7 @@ pg_ctl start -l logfile Normally, you will want to start the database server when the - computer boots. Auto-start scripts are operating-system specific. + computer boots. Autostart scripts are operating system-specific. There are a few distributed with PostgreSQL in the /contrib/start-scripts directory. This may require root @@ -384,13 +384,13 @@ IpcSemaphoreCreate: semget(key=5440026, num=16, 01600) failed: No space left on means your kernel's limit on the number of System V semaphores is smaller than the number PostgreSQL wants to create. As above, you may be able to work around the problem by - starting the postmaster with a reduced number of backend processes + starting the postmaster with a reduced number of allowed connections ( switch), but you'll eventually want to increase the kernel limit. - If you get an illegal system call error, it is likely + If you get an illegal system call error, it is likely that shared memory or semaphores are not supported in your kernel at all. In that case your only option is to reconfigure the kernel to enable these features. @@ -456,7 +456,7 @@ psql: could not connect to server: Connection refused - Run-time configuration + Run-time Configuration configuration @@ -558,7 +558,7 @@ env PGOPTIONS='-c geqo=off' psql pg_settings - pg_settings virtual table allows display and update + The pg_settings virtual table allows display and update of current session run-time parameters. There is one entry for each of the available parameters provided by SHOW ALL. But it is in a form that allows it to be joined with other relations and have a @@ -579,28 +579,25 @@ env PGOPTIONS='-c geqo=off' psql
pg_settings Columns - + Name Type - References Description - name + name text - The name of a current session run-time parameter - setting + setting text - The value of a current session run-time parameter @@ -630,7 +627,7 @@ env PGOPTIONS='-c geqo=off' psql Sets the optimizer's estimate of the cost of processing each - operator in a WHERE clause. This is measured as a fraction of + operator in a WHERE clause. This is measured as a fraction of the cost of a sequential page fetch. @@ -860,85 +857,93 @@ env PGOPTIONS='-c geqo=off' psql SERVER_MIN_MESSAGES (string) - This controls how much detail is written to the server logs. The - default is NOTICE. Valid values are DEBUG5, + This controls how much message detail is written to the server + logs. Valid values are DEBUG5, DEBUG4, DEBUG3, DEBUG2, DEBUG1, INFO, NOTICE, - WARNING, ERROR, LOG, - FATAL, and PANIC. Later values send less - detail to the logs. LOG has a different precedence - here than in CLIENT_MIN_MESSAGES. + WARNING, ERROR, LOG, + FATAL, and PANIC. Later values send + less detail to the logs. The default is NOTICE. + Note that LOG has a different precedence here than + in CLIENT_MIN_MESSAGES. + Here is a summary of the various message types: - DEBUG[1-5] + DEBUG[1-5] - This provides information for use by developers. + Provides information for use by developers. + - INFO + INFO - This provides information requested by the user, e.g. - SET. + Provides information implicitly requested by the user, + e.g., during VACUUM VERBOSE. + - NOTICE + NOTICE - This provides information that may be helpful to users, e.g. - truncation of long identifiers, sequence creation as part of - SERIAL. + Provides information that may be helpful to users, e.g., + truncation of long identifiers and index creation as part + of primary keys. + - WARNING + WARNING - This provides warnings to the user, e.g. COMMIT + Provides warnings to the user, e.g., COMMIT outside a transaction. + - ERROR + ERROR - Reports the error that caused the transaction to abort. + Reports the error that caused a transaction to abort. - LOG + LOG - This reports information of interest to administrators, e.g. + Reports information of interest to administrators, e.g., checkpoint activity. + - FATAL + FATAL - This reports why the backend session terminated. + Reports why a backend session terminated. + - PANIC + PANIC - This reports why all backends restarted. + Reports why all backend sessions restarted. @@ -951,15 +956,15 @@ env PGOPTIONS='-c geqo=off' psql CLIENT_MIN_MESSAGES (string) - This controls how much detail is written to the client. The - default is NOTICE. Valid values are - DEBUG5, DEBUG4, DEBUG3, - DEBUG2, DEBUG1, LOG, - NOTICE, WARNING, and ERROR. - Later values send less information to the user. LOG - has a different precedence here than in - SERVER_MIN_MESSAGES. Also see that section for an - explanation of the various values. + This controls how much message detail is written to the + client. Valid values are DEBUG5, + DEBUG4, DEBUG3, DEBUG2, + DEBUG1, LOG, NOTICE, + WARNING, and ERROR. Later values send + less information to the client. The default is + NOTICE. Note that LOG has a different + precedence here than in SERVER_MIN_MESSAGES. Also + see that section for an explanation of the various values. @@ -973,7 +978,7 @@ env PGOPTIONS='-c geqo=off' psql to turn this on, as it might expose programming mistakes. To use this option, the macro USE_ASSERT_CHECKING must be defined when PostgreSQL is - built (accomplished by the configure option + built (accomplished by the configure option ). Note that DEBUG_ASSERTIONS defaults to on if PostgreSQL has been built with @@ -990,7 +995,7 @@ env PGOPTIONS='-c geqo=off' psql These flags enable various debugging output to be sent to the - server log. For each executed query, prints either the query text, + server log. For each executed query, print either the query text, the resulting parse tree, the query rewriter output, or the execution plan. indents these displays to produce a more readable but much longer output format. @@ -1032,22 +1037,39 @@ env PGOPTIONS='-c geqo=off' psql + + LOG_DURATION (boolean) + + + Causes the duration of every completed statement to be logged. + To use this option, enable LOG_STATEMENT and + LOG_PID so you can link the statement to the + duration using the process ID. + + + + LOG_MIN_ERROR_STATEMENT (string) - This controls which message types output the original query to - the server logs. All queries matching the setting or higher are - logged. The default is PANIC (effectively - "off"). Valid values are DEBUG5, - DEBUG4, DEBUG3, - DEBUG2, DEBUG1, - INFO, NOTICE, - WARNING, ERROR, - FATAL, and PANIC. + This controls for which message levels the SQL statement + causing that message is to be recorded in the server log. All + statements causing a message of the level of the setting or + higher are logged. The default is PANIC + (effectively turning this feature off). Valid values are + DEBUG5, DEBUG4, + DEBUG3, DEBUG2, + DEBUG1, INFO, + NOTICE, WARNING, + ERROR, FATAL, and + PANIC. For example, if you set this to + ERROR then all SQL statements causing + errors, fatal errors, or panics will be logged. + - It is recommended you enable LOG_PID as well + It is recommended you enable LOG_PID as well so you can more easily match the error statement with the error message. @@ -1071,18 +1093,7 @@ env PGOPTIONS='-c geqo=off' psql LOG_STATEMENT (boolean) - Prints each query received. - - - - - - LOG_DURATION (boolean) - - - Prints the duration of every completed query. To use this option, - enable LOG_STATEMENT and LOG_PID so you - can link the original query to the duration using the process id. + Causes each SQL statement to be logged. @@ -1186,9 +1197,12 @@ env PGOPTIONS='-c geqo=off' psql This option determines the syslog facility to be used when syslog is enabled. You may choose - from LOCAL0, LOCAL1, LOCAL2, LOCAL3, LOCAL4, LOCAL5, LOCAL6, - LOCAL7; the default is LOCAL0. See also the documentation of - your system's syslog. + from LOCAL0, LOCAL1, + LOCAL2, LOCAL3, LOCAL4, + LOCAL5, LOCAL6, LOCAL7; + the default is LOCAL0. See also the + documentation of your system's + syslog. @@ -1221,12 +1235,12 @@ env PGOPTIONS='-c geqo=off' psql - General operation + General Operation - AUTOCOMMIT (bool) + AUTOCOMMIT (boolean) autocommit @@ -1254,7 +1268,7 @@ env PGOPTIONS='-c geqo=off' psql Once another command is issued, a transaction block begins and any SET, SHOW, or RESET commands are considered to be part of the - transaction, i.e. they are committed or rolled back depending + transaction, i.e., they are committed or rolled back depending on the completion status of the transaction. To execute a SET, SHOW, or RESET command at the start of a transaction block, use BEGIN @@ -1276,7 +1290,7 @@ env PGOPTIONS='-c geqo=off' psql - AUSTRALIAN_TIMEZONES (bool) + AUSTRALIAN_TIMEZONES (boolean) Australian time zones @@ -1330,19 +1344,33 @@ env PGOPTIONS='-c geqo=off' psql DB_USER_NAMESPACE (boolean) - This allows per-database user names. You can create users as - username@dbname. When username is passed by the client, - @ and the database name is appended to the user name and - that database-specific user name is looked up by the server. - When creating user names containing @, you will need - to quote the user name. + This allows per-database user names. It is off by default. + + + + If this is on, create users as username@dbname. + When username is passed by a connecting client, + @ and the database name is appended to the user + name and that database-specific user name is looked up by the + server. Note that when you create users with names containing + @ within the SQL environment, you will need to + quote the user name. + - With this option enabled, you can still create ordinary global - users. Simply append @ when specifying the user name - in the client. The @ will be stripped off and looked up - by the server. + With this option enabled, you can still create ordinary global + users. Simply append @ when specifying the user + name in the client. The @ will be stripped off + before the user name is looked up by the server. + + + + This feature is intended as a temporary measure until a + complete solution is found. At that time, this option will + be removed. + + @@ -1393,7 +1421,7 @@ env PGOPTIONS='-c geqo=off' psql - Consult the PostgreSQL User's Guide and + Consult the &cite-user; and the command SET TRANSACTION for more information. @@ -1424,11 +1452,9 @@ env PGOPTIONS='-c geqo=off' psql distribution are installed. (Use pg_config --pkglibdir to print the name of this directory.) For example: - dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' - @@ -1690,8 +1716,8 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' When a password is specified in CREATE USER or - ALTER USER without writing either ENCRYPTED or - UNENCRYPTED, this flag determines whether the password is to be + ALTER USER without writing either ENCRYPTED or + UNENCRYPTED, this flag determines whether the password is to be encrypted. The default is on (encrypt the password). @@ -1714,37 +1740,37 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' namespaces - This variable specifies the order in which namespaces are searched - when an object (table, data type, function, etc) is referenced by a + This variable specifies the order in which schemas are searched + when an object (table, data type, function, etc.) is referenced by a simple name with no schema component. When there are objects of - identical names in different namespaces, the one found first + identical names in different schemas, the one found first in the search path is used. An object that is not in any of the - namespaces in the search path can only be referenced by specifying - its containing namespace with a qualified (dotted) name. + schemas in the search path can only be referenced by specifying + its containing schema with a qualified (dotted) name. - The value for search_path has to be a comma-separated - list of namespace (schema) names. If one of the list items is - the special value $user, then the namespace - having the same name as the SESSION_USER is substituted, if there - is such a namespace. (If not, $user is ignored.) + The value for search_path has to be a comma-separated + list of schema names. If one of the list items is + the special value $user, then the schema + having the same name as the SESSION_USER is substituted, if there + is such a schema. (If not, $user is ignored.) - The system catalog namespace, pg_catalog, is always + The system catalog schema, pg_catalog, is always searched, whether it is mentioned in the path or not. If it is mentioned in the path then it will be searched in the specified order. If pg_catalog is not in the path then it will be searched before searching any of the path items. - It should also be noted that the temporary-table namespace, - pg_temp_nnn, is implicitly searched before any of + It should also be noted that the temporary-table schema, + pg_temp_nnn, is implicitly searched before any of these. When objects are created without specifying a particular target - namespace, they will be placed in the first namespace listed + schema, they will be placed in the first schema listed in the search path. An error is reported if the search path is empty. @@ -1752,21 +1778,14 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' The default value for this parameter is '$user, public' (where the second part will be - ignored if there is no namespace named public). + ignored if there is no schema named public). This supports shared use of a database (where no users - have private namespaces, and all share use of public), - private per-user namespaces, and combinations of these. Other + have private schemas, and all share use of public), + private per-user schemas, and combinations of these. Other effects can be obtained by altering the default search path setting, either globally or per-user. - - By default, a newly created database will contain a world-writable - namespace named public, but no private namespaces. - The administrator may choose to restrict permissions on - public or even remove it, if that suits his purposes. - - schemas @@ -1779,6 +1798,10 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' shows how the requests appearing in search_path were resolved. + + + For more information on schema handling, see the &cite-user;. + @@ -1807,10 +1830,10 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' SILENT_MODE (bool) - Runs postmaster silently. If this option is set, the postmaster + Runs the server silently. If this option is set, the server will automatically run in background and any controlling ttys are disassociated, thus no messages are written to standard - output or standard error (same effect as postmaster's -S + output or standard error (same effect as postmaster's option). Unless some logging system such as syslog is enabled, using this option is discouraged since it makes it impossible to see error messages. @@ -1824,14 +1847,14 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' Specifies the amount of memory to be used by internal sorts and hashes before switching to temporary disk files. The value is - specified in kilobytes, and defaults to 1024 kilobytes (1MB). + specified in kilobytes, and defaults to 1024 kilobytes (1 MB). Note that for a complex query, several sorts might be running in parallel, and each one will be allowed to use as much memory as this value specifies before it starts to put data into temporary files. Also, each running backend could be doing one or more sorts simultaneously, so the total memory used could be many times the value of SORT_MEM. Sorts are used - by ORDER BY, merge joins, and CREATE INDEX. + by ORDER BY, merge joins, and CREATE INDEX. @@ -1847,8 +1870,7 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' behavior you can set this variable to off, but in the long run you are encouraged to change your applications to use the ONLY keyword to exclude subtables. See the - SQL language reference and the User's - Guide for more information about inheritance. + SQL language reference and the &cite-user; for more information about inheritance. @@ -1887,7 +1909,7 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' Sets the time zone for displaying and interpreting timestamps. The default is to use whatever the system environment - specifies as the timezone. + specifies as the time zone. @@ -1901,10 +1923,10 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' expr = NULL (or NULL = expr) are treated as expr IS NULL, that is, they - return true if expr evaluates to the NULL value, + return true if expr evaluates to the null value, and false otherwise. The correct behavior of expr = NULL is to always - return NULL (unknown). Therefore this option defaults to off. + return null (unknown). Therefore this option defaults to off. @@ -1914,11 +1936,11 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' null values, so if you use that interface to access the database you might want to turn this option on. Since expressions of the form expr = NULL always - return NULL (using the correct interpretation) they are not + return the null value (using the correct interpretation) they are not very useful and do not appear often in normal applications, so this option does little harm in practice. But new users are frequently confused about the semantics of expressions - involving NULL, so this option is not on by default. + involving null values, so this option is not on by default. @@ -1930,8 +1952,7 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' - Refer to the User's Guide for related - information. + Refer to the &cite-user; for related information. @@ -1941,7 +1962,7 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' Specifies the directory of the Unix-domain socket on which the - postmaster is to listen for + server is to listen for connections from client applications. The default is normally /tmp, but can be changed at build time. @@ -1954,7 +1975,7 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' Sets the group owner of the Unix domain socket. (The owning user of the socket is always the user that starts the - postmaster.) In combination with the option + server.) In combination with the option this can be used as an additional access control mechanism for this socket type. By default this is the empty string, which uses the default @@ -1982,7 +2003,7 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' anyone can connect. Reasonable alternatives are 0770 (only user and group, see also under ) and 0700 - (only user). (Note that actually for a Unix socket, only write + (only user). (Note that actually for a Unix domain socket, only write permission matters and there is no point in setting or revoking read or execute permissions.) @@ -2070,8 +2091,8 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' enough additional transactions may become ready to commit within the given interval. But the delay is just wasted if no other transactions become ready to commit. Therefore, the delay is - only performed if at least COMMIT_SIBLINGS other transactions - are active at the instant that a backend has written its commit + only performed if at least COMMIT_SIBLINGS other transactions + are active at the instant that a backend process has written its commit record. @@ -2103,7 +2124,7 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' WAL_DEBUG (integer) - If non-zero, turn on WAL-related debugging output on standard + If nonzero, turn on WAL-related debugging output on standard error. @@ -2130,107 +2151,111 @@ dynamic_library_path = '/usr/local/lib/postgresql:/home/my_project/lib:$libdir' - - Short options + + Short Options + For convenience there are also single letter option switches - available for many parameters. They are described in the following - table. + available for many parameters. They are described in . + -
+
Short option key - - + Short option Equivalent - Remark + shared_buffers = x - - server_min_messages = DEBUGx - + server_min_messages = DEBUGx fsync = off - virtual_host = x - tcpip_socket = on - unix_socket_directory = x - ssl = on - max_connections = x - port = x - - , , , , , - enable_indexscan=off, enable_hashjoin=off, - enable_mergejoin=off, enable_nestloop=off, enable_seqscan=off, - enable_tidscan=off - * + + , , + , , + , + + For historical reasons, these options must be passed to + the individual backend process via the + postmaster option, for example, + +$ postmaster -o '-S 1024 -s' + + or via PGOPTIONS from the client side, as + explained above. + + + + + enable_indexscan=off, + enable_hashjoin=off, + enable_mergejoin=off, + enable_nestloop=off, + enable_seqscan=off, + enable_tidscan=off + + - - sort_mem = x - * + + show_statement_stats = on + - - show_statement_stats = on - * + + + sort_mem = x + - , , + , , show_parser_stats=on, show_planner_stats=on, show_executor_stats=on - *
- For historical reasons, options marked * must be - passed to the individual backend process via the - postmaster option, for example, - -$ postmaster -o '-S 1024 -s' - - or via PGOPTIONS from the client side, as explained - above. -
- +
@@ -2305,7 +2330,7 @@ $ postmaster -o '-S 1024 -s' SHMMAX Maximum size of shared memory segment (bytes) - 250kB + 8.2kB * shared_buffers + 14.2kB * max_connections or infinity + 250kB + 8.2 kB * shared_buffers + 14.2 kB * max_connections or infinity @@ -2453,7 +2478,7 @@ $ postmaster -o '-S 1024 -s' mind that shared memory is not pageable; it is locked in RAM. To increase the number of shared buffers supported by the postmaster, add the following to your kernel configuration - file. A SHMALL value of 1024 represents 4MB of + file. A SHMALL value of 1024 represents 4 MB of shared memory. The following increases the maximum shared memory area to 32 MB: @@ -2466,7 +2491,7 @@ options "SHMMAX=\(SHMALL*PAGE_SIZE\)" For those running 4.1 or later, just make the above changes, recompile the kernel, and reboot. For those running earlier - releases, use bpatch to find the + releases, use bpatch to find the sysptsize value in the current kernel. This is computed dynamically at boot time. @@ -2812,7 +2837,7 @@ default:\ - Shutting down the server + Shutting Down the Server There are several ways to shut down the database server. You control @@ -2903,14 +2928,16 @@ $ kill -INT `head -1 /usr/local/pgsql/data/postmaster.pid` With SSL support compiled in, the PostgreSQL server - can be started with the argument @@ -2924,19 +2951,18 @@ $ kill -INT `head -1 /usr/local/pgsql/data/postmaster.pid`OpenSSL documentation. A simple self-signed certificate can be used to get started for testing, but a - certificate signed by a CA (either one of the global + certificate signed by a certificate authority (CA) (either one of the global CAs or a local one) should be used in production so the client can verify the server's identity. To create a quick self-signed certificate, use the following OpenSSL command: -cd $PGDATA openssl req -new -text -out server.req Fill out the information that openssl asks for. Make sure that you enter the local host name as Common Name; the challenge password can be left blank. The script will generate a key that is - passphrase protected; it will not accept a pass phrase that is less + passphrase protected; it will not accept a passphrase that is less than four characters long. To remove the passphrase (as you must if you want automatic start-up of the server), run the commands @@ -2954,7 +2980,7 @@ chmod og-rwx server.key - Secure TCP/IP Connections with <application>SSH</application> tunnels + Secure TCP/IP Connections with <application>SSH</application> Tunnels ssh @@ -2970,20 +2996,20 @@ chmod og-rwx server.key - One can use ssh to encrypt the network + One can use SSH to encrypt the network connection between clients and a PostgreSQL server. Done properly, this - should lead to an adequately secure network connection. + provides an adequately secure network connection. - First make sure that an ssh server is + First make sure that an SSH server is running properly on the same machine as PostgreSQL and that you can log in using ssh as some user. Then you can establish a secure tunnel with a command like this from the client machine: -$ ssh -L 3333:foo.com:5432 joe@foo.com +ssh -L 3333:foo.com:5432 joe@foo.com The first number in the argument, 3333, is the port number of your end of the tunnel; it can be chosen freely. The @@ -3006,7 +3032,7 @@ psql -h localhost -p 3333 template1 - Several other products exist that can provide secure tunnels using + Several other applications exist that can provide secure tunnels using a procedure similar in concept to the one just described. diff --git a/doc/src/sgml/start.sgml b/doc/src/sgml/start.sgml index ca1550bd6e..5fff93e7d0 100644 --- a/doc/src/sgml/start.sgml +++ b/doc/src/sgml/start.sgml @@ -1,5 +1,5 @@ @@ -281,10 +281,10 @@ createdb: database creation failed Using an existing graphical frontend tool like - PgAccess or - ApplixWare (via - ODBC) to create and manipulate a database. - These possibilities are not covered in this tutorial. + PgAccess or an office suite with + ODBC support to create and manipulate a + database. These possibilities are not covered in this + tutorial. diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml index be1a6e764f..55e9c7b641 100644 --- a/doc/src/sgml/syntax.sgml +++ b/doc/src/sgml/syntax.sgml @@ -1,5 +1,5 @@ @@ -121,7 +121,7 @@ INSERT INTO MY_TABLE VALUES (3, 'hi there'); characters of an identifier; longer names can be written in commands, but they will be truncated. By default, NAMEDATALEN is 64 so the maximum identifier length - is 63 (but at the time the system is built, + is 63 (but at the time PostgreSQL is built, NAMEDATALEN can be changed in src/include/postgres_ext.h). @@ -170,8 +170,9 @@ UPDATE "my_table" SET "a" = 5; Quoted identifiers can contain any character other than a double - quote itself. This allows constructing table or column names that - would otherwise not be possible, such as ones containing spaces or + quote itself. To include a double quote, write two double quotes. + This allows constructing table or column names that would + otherwise not be possible, such as ones containing spaces or ampersands. The length limitation still applies. @@ -272,7 +273,7 @@ SELECT 'foobar'; SELECT 'foo' 'bar'; is not valid syntax. (This slightly bizarre behavior is specified - by SQL9x; PostgreSQL is + by SQL; PostgreSQL is following the standard.) @@ -298,7 +299,7 @@ SELECT 'foo' 'bar'; Alternatively, bit-string constants can be specified in hexadecimal notation, using a leading X (upper or lower case), e.g., X'1FF'. This notation is equivalent to - a bit-string constant with four binary digits for each hex digit. + a bit-string constant with four binary digits for each hexadecimal digit. @@ -328,7 +329,7 @@ SELECT 'foo' 'bar'; decimal point, if one is used. At least one digit must follow the exponent marker (e), if one is present. There may not be any spaces or other characters embedded in the - constant. Notice that any leading plus or minus sign is not actually + constant. Note that any leading plus or minus sign is not actually considered part of the constant; it is an operator applied to the constant. @@ -650,13 +651,16 @@ CAST ( 'string' AS type ) - The precedence and associativity of the operators is hard-wired - into the parser. Most operators have the same precedence and are - left-associative. This may lead to non-intuitive behavior; for - example the Boolean operators < and > have a different - precedence than the Boolean operators <= and >=. Also, - you will sometimes need to add parentheses when using combinations - of binary and unary operators. For instance + shows the precedence and + associativity of the operators in PostgreSQL. Most operators have + the same precedence and are left-associative. The precedence and + associativity of the operators is hard-wired into the parser. + This may lead to non-intuitive behavior; for example the Boolean + operators < and > have a different + precedence than the Boolean operators <= and + >=. Also, you will sometimes need to add + parentheses when using combinations of binary and unary operators. + For instance SELECT 5 ! - 6; @@ -673,7 +677,7 @@ SELECT (5 !) - 6; This is the price one pays for extensibility. - +
Operator Precedence (decreasing) @@ -825,7 +829,7 @@ SELECT (5 !) - 6; SELECT 3 OPERATOR(pg_catalog.+) 4; the OPERATOR construct is taken to have the default precedence - shown above for any other operator. This is true no matter + shown in for any other operator. This is true no matter which specific operator name appears inside OPERATOR(). @@ -901,9 +905,8 @@ SELECT 3 OPERATOR(pg_catalog.+) 4; -( expression ) - Parentheses are used to group subexpressions and override precedence. + Another value expression in parentheses, useful to group subexpressions and override precedence. @@ -928,21 +931,30 @@ SELECT 3 OPERATOR(pg_catalog.+) 4; Column References - A column can be referenced in the form: + A column can be referenced in the form -correlation.columnname `['subscript`]' +correlation.columnname + or + +correlation.columnname[subscript] + + (Here, the brackets [ ] are meant to appear literally.) + + correlation is the name of a table (possibly qualified), or an alias for a table defined by means of a - FROM clause, or + FROM clause, or the key words NEW or OLD. - (NEW and OLD can only appear in rules, + (NEW and OLD can only appear in rewrite rules, while other correlation names can be used in any SQL statement.) The correlation name and separating dot may be omitted if the column name - is unique - across all the tables being used in the current query. If - column is of an array type, then the + is unique across all the tables being used in the current query. (See also .) + + + + If column is of an array type, then the optional subscript selects a specific element or elements in the array. If no subscript is provided, then the whole array is selected. (See for more about @@ -968,9 +980,9 @@ $number dept, as -CREATE FUNCTION dept (text) RETURNS dept - AS 'SELECT * FROM dept WHERE name = $1' - LANGUAGE SQL; +CREATE FUNCTION dept(text) RETURNS dept + AS 'SELECT * FROM dept WHERE name = $1' + LANGUAGE SQL; Here the $1 will be replaced by the first @@ -993,7 +1005,7 @@ CREATE FUNCTION dept (text) RETURNS dept keywords AND, OR, and NOT, or is a qualified operator name - OPERATOR(schema.operatorname) +OPERATOR(schema.operatorname) Which particular operators exist and whether they are unary or binary depends on what operators have been @@ -1042,12 +1054,12 @@ sqrt(2) output value, such as the sum or average of the inputs. The syntax of an aggregate expression is one of the following: - - aggregate_name (expression) - aggregate_name (ALL expression) - aggregate_name (DISTINCT expression) - aggregate_name ( * ) - + +aggregate_name (expression) +aggregate_name (ALL expression) +aggregate_name (DISTINCT expression) +aggregate_name ( * ) + where aggregate_name is a previously defined aggregate (possibly a qualified name), and @@ -1101,7 +1113,7 @@ sqrt(2) CAST ( expression AS type ) expression::type - The CAST syntax conforms to SQL92; the syntax with + The CAST syntax conforms to SQL; the syntax with :: is historical PostgreSQL usage. @@ -1123,8 +1135,8 @@ CAST ( expression AS type to the type that a value expression must produce (for example, when it is assigned to a table column); the system will automatically apply a type cast in such cases. However, automatic casting is only done for - cast functions that are marked OK to apply implicitly - in the system catalogs. Other cast functions must be invoked with + casts that are marked OK to apply implicitly + in the system catalogs. Other casts must be invoked with explicit casting syntax. This restriction is intended to prevent surprising conversions from being applied silently. @@ -1143,6 +1155,13 @@ CAST ( expression AS type double-quoted, because of syntactic conflicts. Therefore, the use of the function-like cast syntax leads to inconsistencies and should probably be avoided in new applications. + + (The function-like syntax is in fact just a function call. When + one of the two standard cast syntaxes is used to do a run-time + conversion, it will internally invoke a registered function to + perform the conversion. By convention, these conversion functions + have the same name as their output type, but this is not something + that a portable application should rely on.) @@ -1151,8 +1170,9 @@ CAST ( expression AS type A scalar subquery is an ordinary - SELECT in parentheses that returns exactly one - row with one column. The SELECT query is executed + SELECT query in parentheses that returns exactly one + row with one column. (See for information about writing queries.) + The SELECT query is executed and the single returned value is used in the surrounding value expression. It is an error to use a query that returns more than one row or more than one column as a scalar subquery. @@ -1168,7 +1188,7 @@ CAST ( expression AS type state: SELECT name, (SELECT max(pop) FROM cities WHERE cities.state = states.name) -FROM states; + FROM states; @@ -1202,25 +1222,26 @@ SELECT somefunc() OR true; As a consequence, it is unwise to use functions with side effects as part of complex expressions. It is particularly dangerous to - rely on side effects or evaluation order in WHERE and HAVING clauses, + rely on side effects or evaluation order in WHERE and HAVING clauses, since those clauses are extensively reprocessed as part of developing an execution plan. Boolean - expressions (AND/OR/NOT combinations) in those clauses may be reorganized + expressions (AND/OR/NOT combinations) in those clauses may be reorganized in any manner allowed by the laws of Boolean algebra. - When it is essential to force evaluation order, a CASE construct may - be used. For example, this is an untrustworthy way of trying to - avoid division by zero in a WHERE clause: + When it is essential to force evaluation order, a CASE + construct (see ) may be + used. For example, this is an untrustworthy way of trying to + avoid division by zero in a WHERE clause: SELECT ... WHERE x <> 0 AND y/x > 1.5; - but this is safe: + But this is safe: SELECT ... WHERE CASE WHEN x <> 0 THEN y/x > 1.5 ELSE false END; - A CASE construct used in this fashion will defeat optimization attempts, + A CASE construct used in this fashion will defeat optimization attempts, so it should only be done when necessary. diff --git a/doc/src/sgml/typeconv.sgml b/doc/src/sgml/typeconv.sgml index e6ff564be9..e0a330daef 100644 --- a/doc/src/sgml/typeconv.sgml +++ b/doc/src/sgml/typeconv.sgml @@ -1,9 +1,6 @@ Type Conversion - - Introduction - SQL queries can, intentionally or not, require mixing of different data types in the same expression. @@ -29,10 +26,9 @@ operators. -The Programmer's Guide has more details on the exact algorithms used for +The &cite-programmer; has more details on the exact algorithms used for implicit type conversion and coercion. - Overview @@ -41,7 +37,7 @@ implicit type conversion and coercion. SQL is a strongly typed language. That is, every data item has an associated data type which determines its behavior and allowed usage. PostgreSQL has an extensible type system that is -much more general and flexible than other RDBMS implementations. +much more general and flexible than other SQL implementations. Hence, most type conversion behavior in PostgreSQL should be governed by general rules rather than by ad hoc heuristics, to allow mixed-type expressions to be meaningful even with user-defined types. @@ -142,13 +138,13 @@ conventions for the SQL standard native types such as -The PostgreSQL parser uses the convention that all -type conversion functions take a single argument of the source type and are -named with the same name as the target type. Any function meeting these -criteria is considered to be a valid conversion function, and may be used -by the parser as such. This simple assumption gives the parser the power -to explore type conversion possibilities without hardcoding, allowing -extended user-defined types to use these same features transparently. +The system catalogs store information about which conversions, called +casts, between data types are valid, and how to +perform those conversions. Additional casts can be added by the user +with the CREATE CAST command. (This is usually +done in conjunction with defining new data types. The set of casts +between the built-in types has been carefully crafted and should not +be altered.) @@ -169,7 +165,7 @@ types. All type conversion rules are designed with several principles in mind: - + Implicit conversions should never have surprising or unpredictable outcomes. diff --git a/doc/src/sgml/user-manag.sgml b/doc/src/sgml/user-manag.sgml index 508277e840..ee63b03a74 100644 --- a/doc/src/sgml/user-manag.sgml +++ b/doc/src/sgml/user-manag.sgml @@ -1,5 +1,5 @@ @@ -129,7 +129,7 @@ dropuser name A password is only significant if the client authentication method requires the user to supply a password when connecting - to the database. At present, the -- 2.40.0