Description of table expressions, including join syntax, from Robert B.

author Peter Eisentraut <peter_e@gmx.net>

Sun, 21 Jan 2001 22:02:01 +0000 (22:02 +0000)

committer Peter Eisentraut <peter_e@gmx.net>

Sun, 21 Jan 2001 22:02:01 +0000 (22:02 +0000)
author Peter Eisentraut <peter_e@gmx.net>
Sun, 21 Jan 2001 22:02:01 +0000 (22:02 +0000)
committer Peter Eisentraut <peter_e@gmx.net>
Sun, 21 Jan 2001 22:02:01 +0000 (22:02 +0000)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml

index 4ea4881f81b5c73ad6a5403510b452af8cc1846c..cf4ac7f7ac7055965c9c0abde3b614956c2edc6b 100644 (file)
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1,4 +1,4 @@
-<!-- $Header: /cvsroot/pgsql/doc/src/sgml/func.sgml,v 1.48 2001/01/20 20:59:28 petere Exp $ -->
+<!-- $Header: /cvsroot/pgsql/doc/src/sgml/func.sgml,v 1.49 2001/01/21 22:02:01 petere Exp $ -->
  
  <chapter id="functions">
   <title>Functions and Operators</title>
@@ -24,6 +24,102 @@
    other implementations.
   </para>
  
+
+ <sect1 id="functions-logical">
+  <title>Logical Operators</title>
+
+  <para>
+   The usual logical operators are available:
+
+   <simplelist>
+    <member>AND</member>
+    <member>OR</member>
+    <member>NOT</member>
+   </simplelist>
+
+   SQL uses a three-valued boolean logic where NULL represents
+   <quote>unknown</quote>.  Observe the following truth tables:
+
+   <informaltable>
+    <tgroup cols="4">
+     <thead>
+      <row>
+       <entry><replaceable>a</replaceable></entry>
+       <entry><replaceable>b</replaceable></entry>
+       <entry><replaceable>a</replaceable> AND <replaceable>b</replaceable></entry>
+       <entry><replaceable>a</replaceable> OR <replaceable>b</replaceable></entry>
+      </row>
+     </thead>
+
+     <tbody>
+      <row>
+       <entry>TRUE</entry>
+       <entry>TRUE</entry>
+       <entry>TRUE</entry>
+       <entry>TRUE</entry>
+      </row>
+
+      <row>
+       <entry>TRUE</entry>
+       <entry>FALSE</entry>
+       <entry>FALSE</entry>
+       <entry>TRUE</entry>
+      </row>
+
+      <row>
+       <entry>TRUE</entry>
+       <entry>NULL</entry>
+       <entry>NULL</entry>
+       <entry>TRUE</entry>
+      </row>
+
+      <row>
+       <entry>FALSE</entry>
+       <entry>FALSE</entry>
+       <entry>FALSE</entry>
+       <entry>FALSE</entry>
+      </row>
+
+      <row>
+       <entry>FALSE</entry>
+       <entry>NULL</entry>
+       <entry>FALSE</entry>
+       <entry>NULL</entry>
+      </row>
+     </tbody>
+    </tgroup>
+   </informaltable>
+
+   <informaltable>
+    <tgroup cols="2">
+     <thead>
+      <row>
+       <entry><replaceable>a</replaceable></entry>
+       <entry>NOT <replaceable>a</replaceable></entry>
+      </row>
+     </thead>
+
+     <tbody>
+      <row>
+       <entry>TRUE</entry>
+       <entry>FALSE</entry>
+      </row>
+
+      <row>
+       <entry>FALSE</entry>
+       <entry>TRUE</entry>
+      </row>
+
+      <row>
+       <entry>NULL</entry>
+       <entry>NULL</entry>
+      </row>
+     </tbody>
+    </tgroup>
+   </informaltable>
+  </para>
+ </sect1>
+
   <sect1 id="functions-comparison">
    <title>Comparison Operators</title>
  
@@ -88,6 +184,23 @@
     <literal>&lt;</literal> operator to compare a boolean with
     <literal>3</literal>).
    </para>
+
+  <para>
+   To check whether a value is or is not NULL, use the constructs
+<synopsis>
+<replaceable>expression</replaceable> IS NULL
+<replaceable>expression</replaceable> IS NOT NULL
+</synopsis>
+   Do <emphasis>not</emphasis> use
+   <literal><replaceable>expression</replaceable> = NULL</literal>
+   because NULL is not <quote>equal to</quote> NULL.  (NULL represents
+   an unknown value, so it is not known whether two unknown values are
+   equal.)  <productname>Postgres</productname> implicitly converts
+   <literal>= NULL</literal> clauses to <literal>IS NULL</literal> to
+   allow some broken client applications (such as
+   <productname>Microsoft Access</productname>) to work, but this may
+   be discontinued in a future release.
+  </para>
   </sect1>
  
  
diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml

index 918ff103dd99cb1a7977f0319cfdcb2ed025ab24..f994cbe0a07ac8bbbf6d438417c4dd439a9f882d 100644 (file)
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -1,5 +1,5 @@
  <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/syntax.sgml,v 1.35 2001/01/13 23:58:55 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/syntax.sgml,v 1.36 2001/01/21 22:02:01 petere Exp $
  -->
  
  <chapter id="sql-syntax">
@@ -17,10 +17,10 @@ $Header: /cvsroot/pgsql/doc/src/sgml/syntax.sgml,v 1.35 2001/01/13 23:58:55 pete
    <para>
     SQL input consists of a sequence of
     <firstterm>commands</firstterm>.  A command is composed of a
-   sequence of <firstterm>tokens</firstterm>, which depend on the
-   syntax of the particular command, terminated by a semicolon
-   (<quote>;</quote>).  The end of the input stream also terminates a
-   command.
+   sequence of <firstterm>tokens</firstterm>, terminated by a
+   semicolon (<quote>;</quote>).  The end of the input stream also
+   terminates a command.  Which tokens are valid depends on the syntax
+   of the particular command.
    </para>
  
    <para>
@@ -41,7 +41,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/syntax.sgml,v 1.35 2001/01/13 23:58:55 pete
  
    <informalexample id="sql-syntax-ex-commands">
     <para>
-    For example, the following is (lexically) valid SQL input:
+    For example, the following is (syntactically) valid SQL input:
  <programlisting>
  SELECT * FROM MY_TABLE;
  UPDATE MY_TABLE SET A = 5;
@@ -93,7 +93,7 @@ INSERT INTO MY_TABLE VALUES (3, 'hi there');
      key word can be letters, digits
      (<literal>0</literal>-<literal>9</literal>), or underscores,
      although the SQL standard will not define a key word that contains
-    digits or start or ends with an underscore.
+    digits or starts or ends with an underscore.
     </para>
  
     <para>
@@ -181,7 +181,7 @@ UPDATE "my_table" SET "a" = 5;
      constants are discussed afterwards.
     </para>
  
-   <sect3>
+   <sect3 id="sql-syntax-strings">
      <title>String Constants</title>
  
      <para>
@@ -651,251 +651,244 @@ CAST ( '<replaceable>string</replaceable>' AS <replaceable>type</replaceable> )
       Transaction and command identifiers are 32 bit quantities.
      </para>
  
-    <para>
-     A column can be referenced in the form:
-
-<synopsis>
-<replaceable>corelation</replaceable>.<replaceable>columnname</replaceable> `['<replaceable>subscript</replaceable>`]'
-</synopsis>
+  </sect1>
  
-     <replaceable>corelation</replaceable> is either the name of a
-     table, an alias for a table defined by means of a FROM clause, or
-     the keyword <literal>NEW</literal> or <literal>OLD</literal>.
-     (NEW and OLD can only appear in the action portion of a rule,
-     while other corelation names can be used in any SQL statement.)
-     The corelation name can be omitted if the column name is unique
-     across all the tables being used in the current query.  If
-     <replaceable>column</replaceable> is of an array type, then the
-     optional <replaceable>subscript</replaceable> selects a specific
-     element in the array.  If no subscript is provided, then the
-     whole array is selected.  Refer to the description of the
-     particular commands in the <citetitle>PostgreSQL Reference
-     Manual</citetitle> for the allowed syntax in each case.
-    </para>
  
-  </sect1>
+ <sect1 id="sql-expressions">
+  <title>Value Expressions</title>
  
-  <sect1 id="sql-expressions">
-   <title>Expressions</title>
+  <para>
+   Value expressions are used in a variety of syntactic contexts, such
+   as in the target list of the <command>SELECT</command> command, as
+   new column values in <command>INSERT</command> or
+   <command>UPDATE</command>, or in search conditions in a number of
+   commands.  The result of a value expression is sometimes called a
+   <firstterm>scalar</firstterm>, to distinguish it from the result of
+   a table expression (which is a table).  Value expressions are
+   therefore also called <firstterm>scalar expressions</firstterm> (or
+   even simply <firstterm>expressions</firstterm>).  The expression
+   syntax allows the calculation of values from primitive parts using
+   arithmetic, logical, set, and other operations.
+  </para>
  
-   <para>
-    <acronym>SQL92</acronym> allows <firstterm>expressions</firstterm>
-    to transform data in tables. Expressions may contain operators
-    and functions.
-   </para>
+  <para>
+   A value expression is one of the following:
  
-   <para>
-    An expression is one of the following:
+   <itemizedlist>
+    <listitem>
+     <para>
+      A constant or literal value; see <xref linkend="sql-syntax-constants">.
+     </para>
+    </listitem>
  
-    <simplelist>
-     <member>constant</member>
-     <member>column</member>
-     <member><replaceable>expression</replaceable> <replaceable>binary_operator</replaceable> <replaceable>expression</replaceable></member>
-     <member><replaceable>expression</replaceable> <replaceable>right_unary_operator</replaceable></member>
-     <member><replaceable>left_unary_operator</replaceable> <replaceable>expression</replaceable></member>
-     <member>( <replaceable>expression</replaceable> )</member>
-     <member>parameter</member>
-     <member>functional expression</member>
-     <member>aggregate expression</member>
-    </simplelist>
-   </para>
+    <listitem>
+     <para>
+      A column reference
+     </para>
+    </listitem>
  
-   <para>
-    We have already discussed constants and columns.  The three kinds of
-    operator expressions indicate respectively binary (infix), right-unary
-    (suffix) and left-unary (prefix) operators.  The following sections
-    discuss the remaining options.
-   </para>
+    <listitem>
+     <para>
+      An operator invocation:
+      <simplelist>
+       <member><replaceable>expression</replaceable> <replaceable>operator</replaceable> <replaceable>expression</replaceable> (binary infix operator)</member>
+       <member><replaceable>expression</replaceable> <replaceable>operator</replaceable> (unary postfix operator)</member>
+       <member><replaceable>operator</replaceable> <replaceable>expression</replaceable> (unary prefix operator)</member>
+      </simplelist>
+      where <replaceable>operator</replaceable> follows the syntax
+      rules of <xref linkend="sql-syntax-operators"> or is one of the
+      tokens <token>AND</token>, <token>OR</token>, and
+      <token>NOT</token>.  What particular operators exist and whether
+      they are unary or binary depends on what operators have been
+      defined by the system or the user.  <xref linkend="functions">
+      describes the built-in operators.
+     </para>
+    </listitem>
  
-   <sect2>
-    <title>Parameters</title>
+    <listitem>
+     <para>
+<synopsis>( <replaceable>expression</replaceable> )</synopsis>
+      Parentheses are used to group subexpressions and override precedence.
+     </para>
+    </listitem>
  
-    <para>
-     A <firstterm>parameter</firstterm>
-     is used to indicate a parameter in a SQL function.  Typically this
-     is used in SQL function definition statements.  The form of a
-     parameter is:
-
-     <synopsis>
-$<replaceable class="parameter">number</replaceable>
-     </synopsis>
-    </para>
+    <listitem>
+     <para>
+      A positional parameter reference, in the body of a function declaration.
+     </para>
+    </listitem>
  
-    <para>
-     For example, consider the definition of a function,
-     <function>dept</function>, as
-
-     <programlisting>
-CREATE FUNCTION dept (name)
-       RETURNS dept
-       AS 'select * from dept where name = $1'
-       LANGUAGE 'sql';
-     </programlisting>
-    </para>
-   </sect2>
+    <listitem>
+     <para>
+      A function call
+     </para>
+    </listitem>
  
-   <sect2>
-    <title>Functional Expressions</title>
+    <listitem>
+     <para>
+      An aggregate expression
+     </para>
+    </listitem>
  
-    <para>
-     A <firstterm>functional expression</firstterm>
-     is the name of a legal SQL function, followed by its argument list
-     enclosed in parentheses:
+    <listitem>
+     <para>
+      A scalar subquery.  This is an ordinary
+      <command>SELECT</command> in parenthesis that returns exactly one
+      row with one column.  It is an error to use a subquery that
+      returns more than one row or more than one column in the context
+      of a value expression.
+     </para>
+    </listitem>
+   </itemizedlist>
+  </para>
  
-     <synopsis>
-<replaceable>function</replaceable> (<replaceable>expression</replaceable> [, <replaceable>expression</replaceable> ... ] )
-     </synopsis>
-    </para>
+  <para>
+   In addition to this list, there are a number of contructs that can
+   be classified as an expression but do not follow any general syntax
+   rules.  These generally have the semantics of a function or
+   operator and are explained in the appropriate location in <xref
+   linkend="functions">.  An example is the <literal>IS NULL</literal>
+   clause.
+  </para>
  
-    <para>
-     For example, the following computes the square root of an employee
-     salary:
+  <para>
+   We have already discussed constants in <xref
+   linkend="sql-syntax-constants">.  The following sections discuss
+   the remaining options.
+  </para>
  
-     <programlisting>
-sqrt(emp.salary)
-     </programlisting>
-    </para>
-   </sect2>
+  <sect2>
+   <title>Column References</title>
  
-   <sect2 id="syntax-aggregates">
-    <title>Aggregate Expressions</title>
+   <para>
+    A column can be referenced in the form:
+<synopsis>
+<replaceable>corelation</replaceable>.<replaceable>columnname</replaceable> `['<replaceable>subscript</replaceable>`]'
+</synopsis>
  
-    <para>
-     An <firstterm>aggregate expression</firstterm> represents the application
-     of an aggregate function across the rows selected by a query.
-     An aggregate function reduces multiple inputs to a single output value,
-     such as the sum or average of the inputs.
-     The syntax of an aggregate expression is one of the following:
+    <replaceable>corelation</replaceable> is either the name of a
+    table, an alias for a table defined by means of a FROM clause, or
+    the keyword <literal>NEW</literal> or <literal>OLD</literal>.
+    (NEW and OLD can only appear in the action portion of a rule,
+    while other corelation names can be used in any SQL statement.)
+    The corelation name can be omitted if the column name is unique
+    across all the tables being used in the current query.  If
+    <replaceable>column</replaceable> is of an array type, then the
+    optional <replaceable>subscript</replaceable> selects a specific
+    element in the array.  If no subscript is provided, then the whole
+    array is selected.  Refer to the description of the particular
+    commands in the <citetitle>PostgreSQL Reference Manual</citetitle>
+    for the allowed syntax in each case.
+   </para>
+  </sect2>
  
-    <simplelist>
-     <member><replaceable>aggregate_name</replaceable> (<replaceable>expression</replaceable>)</member>
-     <member><replaceable>aggregate_name</replaceable> (ALL <replaceable>expression</replaceable>)</member>
-     <member><replaceable>aggregate_name</replaceable> (DISTINCT <replaceable>expression</replaceable>)</member>
-     <member><replaceable>aggregate_name</replaceable> ( * )</member>
-    </simplelist>
+  <sect2>
+   <title>Positional Parameters</title>
  
-     where <replaceable>aggregate_name</replaceable> is a previously defined
-     aggregate, and <replaceable>expression</replaceable> is any expression
-     that doesn't itself contain an aggregate expression.
-    </para>
+   <para>
+    A positional parameter reference is used to indicate a parameter
+    in an SQL function.  Typically this is used in SQL function
+    definition statements.  The form of a parameter is:
+<synopsis>
+$<replaceable>number</replaceable>
+</synopsis>
+   </para>
  
-    <para>
-     The first form of aggregate expression invokes the aggregate across all
-     input rows for which the given expression yields a non-null value.
-     The second form is the same as the first, since ALL is the default.
-     The third form invokes the aggregate for all distinct non-null values
-     of the expression found in the input rows.  The last form invokes the
-     aggregate once for each input row regardless of null or non-null values;
-     since no particular input value is specified, it is generally only useful
-     for the count() aggregate.
-    </para>
+   <para>
+    For example, consider the definition of a function,
+    <function>dept</function>, as
  
-    <para>
-      For example, count(*) yields the total number of input rows;
-      count(f1) yields the number of input rows in which f1 is non-null;
-      count(distinct f1) yields the number of distinct non-null values of f1.
-    </para>
-   </sect2>
+<programlisting>
+CREATE FUNCTION dept (text) RETURNS dept
+  AS 'select * from dept where name = $1'
+  LANGUAGE 'sql';
+</programlisting>
  
-   <sect2>
-    <title>Target List</title>
+    Here the <literal>$1</literal> will be replaced by the first
+    function argument when the function is invoked.
+   </para>
+  </sect2>
  
-    <para>
-     A <firstterm>target list</firstterm>
-     is a comma-separated list of one or more elements, each
-     of which must be of the form:
-
-     <synopsis>
-<replaceable>expression</replaceable> [ AS <replaceable>result_attname</replaceable> ]
-     </synopsis>
-
-     where <replaceable>result_attname</replaceable>
-     is the name to be assigned to the created column.  If 
-     <replaceable>result_attname</replaceable>
-     is not present, then <productname>Postgres</productname> selects a
-     default name based on the contents of <replaceable>expression</replaceable>.
-     If <replaceable>expression</replaceable> is a simple attribute reference
-     then the default name will be the same as that attribute's name, but
-     otherwise the implementation is free to assign any default name.
-    </para>
-   </sect2>
+  <sect2>
+   <title>Function Calls</title>
  
-   <sect2>
-    <title>Qualification</title>
+   <para>
+    The syntax for a function call is the name of a legal function
+    (subject to the syntax rules for identifiers of <xref
+    linkend="sql-syntax-identifiers"> , followed by its argument list
+    enclosed in parentheses:
  
-    <para>
-     A <firstterm>qualification</firstterm>
-     consists of any number of clauses connected by the logical operators:
+<synopsis>
+<replaceable>function</replaceable> (<optional><replaceable>expression</replaceable> <optional>, <replaceable>expression</replaceable> ... </optional></optional> )
+</synopsis>
+   </para>
  
-     <simplelist>
-      <member>NOT</member>
-      <member>AND</member>
-      <member>OR</member>
-     </simplelist>
+   <para>
+    For example, the following computes the square root of 2:
+<programlisting>
+sqrt(2)
+</programlisting>
+   </para>
  
-     A clause is an <replaceable>expression</replaceable>
-     that evaluates to a <literal>boolean</literal> over a set of instances.
-    </para>
-   </sect2>
+   <para>
+    The list of built-in functions is in <xref linkend="functions">.
+    Other functions may be added by the user.
+   </para>
+  </sect2>
  
-   <sect2>
-    <title>From List</title>
+  <sect2 id="syntax-aggregates">
+   <title>Aggregate Expressions</title>
  
-    <para>
-     The <firstterm>from list</firstterm>
-     is a comma-separated list of <firstterm>from-expressions</firstterm>.
-     The simplest possibility for a from-expression is:
-
-     <synopsis>
-<replaceable>table_reference</replaceable> [ [ AS ] <replaceable class="PARAMETER">alias</replaceable> ]
-     </synopsis>
-
-     where <replaceable>table_reference</replaceable> is of the form
-
-     <synopsis>
-[ ONLY ] <replaceable class="PARAMETER">table_name</replaceable> [ * ]
-     </synopsis>
-
-     The from-expression defines an instance variable that ranges over the
-     rows of the specified table.  The instance variable's name is either
-     the table name, or the <replaceable>alias</replaceable> if one is given.
-     Ordinarily, if the table has child tables then the instance variable
-     will range over all rows in the inheritance hierarchy starting with
-     the specified table.  If <literal>ONLY</literal> is specified then
-     child tables are not included.  A trailing asterisk <literal>*</literal>
-     can be written to specifically indicate that child tables are included
-     (<literal>ONLY</literal> and <literal>*</literal> are mutually
-     exclusive).
-    </para>
+   <para>
+    An <firstterm>aggregate expression</firstterm> represents the
+    application of an aggregate function across the rows selected by a
+    query.  An aggregate function reduces multiple inputs to a single
+    output value, such as the sum or average of the inputs.  The
+    syntax of an aggregate expression is one of the following:
  
-    <para>
-     A from-expression can also be a sub-query:
+    <simplelist>
+     <member><replaceable>aggregate_name</replaceable> (<replaceable>expression</replaceable>)</member>
+     <member><replaceable>aggregate_name</replaceable> (ALL <replaceable>expression</replaceable>)</member>
+     <member><replaceable>aggregate_name</replaceable> (DISTINCT <replaceable>expression</replaceable>)</member>
+     <member><replaceable>aggregate_name</replaceable> ( * )</member>
+    </simplelist>
  
-     <synopsis>
-( <replaceable class="PARAMETER">select-statement</replaceable> ) [ AS ] <replaceable class="PARAMETER">alias</replaceable>
-     </synopsis>
+    where <replaceable>aggregate_name</replaceable> is a previously
+    defined aggregate, and <replaceable>expression</replaceable> is
+    any expression that does not itself contain an aggregate
+    expression.
+   </para>
  
-     Here, the effect is as though the SELECT were executed and its results
-     stored in a temporary table, which then becomes available as an instance
-     variable under the given <replaceable>alias</replaceable>.
-    </para>
+   <para>
+    The first form of aggregate expression invokes the aggregate
+    across all input rows for which the given expression yields a
+    non-NULL value.  The second form is the same as the first, since
+    <literal>ALL</literal> is the default.  The third form invokes the
+    aggregate for all distinct non-NULL values of the expression found
+    in the input rows.  The last form invokes the aggregate once for
+    each input row regardless of NULL or non-NULL values; since no
+    particular input value is specified, it is generally only useful
+    for the <function>count()</function> aggregate function.
+   </para>
  
-    <para>
-     Finally, a from-expression can be built up from simpler from-expressions
-     using JOIN clauses:
+   <para>
+    For example, <literal>count(*)</literal> yields the total number
+    of input rows; <literal>count(f1)</literal> yields the number of
+    input rows in which <literal>f1</literal> is non-NULL;
+    <literal>count(distinct f1)</literal> yields the number of
+    distinct non-NULL values of <literal>f1</literal>.
+   </para>
  
-     <synopsis>
-<replaceable class="PARAMETER">from_expression</replaceable> [ NATURAL ] <replaceable class="PARAMETER">join_type</replaceable> <replaceable class="PARAMETER">from_expression</replaceable>
-    [ ON <replaceable class="PARAMETER">join_condition</replaceable> | USING ( <replaceable class="PARAMETER">join_column_list</replaceable> ) ]
-     </synopsis>
+   <para>
+    The predefined aggregate functions are described in <xref
+    linkend="functions-aggregate">.
+   </para>
+  </sect2>
  
-     This syntax allows specification of <firstterm>outer joins</firstterm>.
-     For details see the reference page for SELECT.
-    </para>
-   </sect2>
+ </sect1>
  
  
-  <sect2 id="sql-precedence">
+  <sect1 id="sql-precedence">
     <title>Lexical Precedence</title>
  
     <para>
@@ -919,7 +912,7 @@ SELECT (5 &amp;) ~ 6;
     </para>
  
     <table tocentry="1">
-    <title>Operator Ordering (decreasing precedence)</title>
+    <title>Operator Precedence (decreasing)</title>
  
      <tgroup cols="2">
       <thead>
@@ -1062,10 +1055,529 @@ SELECT (5 &amp;) ~ 6;
      the same precedence as the built-in <quote>+</quote> operator, no
      matter what yours does.
     </para>
+  </sect1>
+
+
+ <sect1 id="sql-table-expressions">
+  <title>Table Expressions</title>
+
+  <para>
+   A <firstterm>table expression</firstterm> specifies a table.  The
+   table expression contains a FROM clause that is optionally followed
+   by WHERE, GROUP BY, and HAVING clauses.  Trivial table expressions
+   simply refer to a table on disk, a so-called base table, but more
+   complex expressions can be used to modify or combine base tables in
+   various ways.
+  </para>
+
+  <para>
+   The general syntax of the <command>SELECT</command> command is
+<synopsis>
+SELECT <replaceable>select_list</replaceable> <replaceable>table_expression</replaceable>
+</synopsis>
+
+   The <replaceable>select_list</replaceable> is a comma separated
+   list of <replaceable>value expressions</replaceable> as defined in
+   <xref linkend="sql-expressions"> that specify the derived columns
+   of the query output table.  Column names in the derived table that
+   is the result of the <replaceable>table_expression</replaceable>
+   can be used in the <replaceable>value expression</replaceable>s of
+   the <replaceable>select_list</replaceable>.
+  </para>
+
+  <para>
+   The WHERE, GROUP BY, and HAVING clauses in the table expression
+   specify a pipeline of successive transformations performed on the
+   table derived in the FROM clause.  The final transformed table that
+   is derived provides the input rows used to derive output rows as
+   specified by the select list of derived column value expressions.
+  </para>
+       
+  <sect2>
+   <title>FROM clause</title>
+ 
+   <para>
+    The FROM clause derives a table from one or more other tables
+    given in a comma-separated table reference list.
+<synopsis>
+FROM <replaceable>table_reference</replaceable> <optional>, <replaceable>table_reference</replaceable> <optional>, ...</optional></optional>
+</synopsis>
+
+    A table reference may be a table name or a derived table such as a
+    subquery, a table join, or complex combinations of these.  If more
+    than one table reference is listed in the FROM clause they are
+    CROSS JOINed (see below) to form the derived table that may then
+    be subject to transformations by the WHERE, GROUP BY, and HAVING
+    clauses and is finally the result of the overall table expression.
+   </para>
+
+   <para>
+    If a table reference is a simple table name and it is the
+    supertable in a table inheritance hierarchy, rows of the table
+    include rows from all of its subtable successors unless the
+    keyword ONLY precedes the table name.
+   </para>
+
+   <sect3>
+    <title>Joined Tables</title>
+
+    <para>
+     A joined table is a table derived from two other (real or
+     derived) tables according to the rules of the particular join
+     type.  INNER, OUTER, NATURAL, and CROSS JOIN are supported.
+    </para>
+
+    <variablelist>
+     <title>Join Types</title>
+
+     <varlistentry>
+      <term>CROSS JOIN</term>
+
+      <listitem>
+<synopsis>
+<replaceable>T1</replaceable> CROSS JOIN <replaceable>T2</replaceable>
+</synopsis>
+
+       <para>
+        For each combination of rows from
+        <replaceable>T1</replaceable> and
+        <replaceable>T2</replaceable> the derived table will contain a
+        row consisting of all columns in <replaceable>T1</replaceable>
+        followed by all columns in <replaceable>T2</replaceable>.  If
+        the tables have have N and M rows respectively, the joined
+        table will have N * M rows.  A cross join is essentially an
+        <literal>INNER JOIN ON TRUE</literal>.
+       </para>
+
+       <tip>
+        <para>
+         <literal>FROM <replaceable>T1</replaceable> CROSS JOIN
+         <replaceable>T2</replaceable></literal> is equivalent to
+         <literal>FROM <replaceable>T1</replaceable>,
+         <replaceable>T2</replaceable></literal>.
+        </para>
+       </tip>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term>Qualified JOINs</term>
+      <listitem>
+
+<synopsis>
+<replaceable>T1</replaceable> { <optional>INNER</optional> | { LEFT | RIGHT | FULL } <optional>OUTER</optional> } JOIN <replaceable>T2</replaceable> ON <replaceable>boolean expression</replaceable>
+<replaceable>T1</replaceable> { <optional>INNER</optional> | { LEFT | RIGHT | FULL } <optional>OUTER</optional> } JOIN <replaceable>T2</replaceable> USING ( <replaceable>join column list</replaceable> )
+</synopsis>
+        
+       <para>
+        The words <token>INNER</token> and <token>OUTER</token> are
+        optional for all JOINs.  <token>INNER</token> is the default;
+        <token>LEFT</token>, <token>RIGHT</token>, and
+        <token>FULL</token> are for OUTER JOINs only.
+       </para>
+
+       <para>
+        The <firstterm>join condition</firstterm> is specified in the
+        ON or USING clause.  (The meaning of the join condition
+        depends on the particular join type; see below.)  The ON
+        clause takes a boolean value expression of the same kind as is
+        used in a WHERE clause.  The USING clause takes a
+        comma-separated list of column names, which the joined tables
+        must have in common, and joins the tables on the equality of
+        those columns as a set, resulting in a joined table having one
+        column for each common column listed and all of the other
+        columns from both tables.  Thus, <literal>USING (a, b,
+        c)</literal> is equivalent to <literal>ON (t1.a = t2.a AND
+        t1.b = t2.b AND t1.c = t2.c)</literal> with the exception that
+        if ON is used there will be two columns a, b, and c in the
+        result, whereas with USING there will be only one of each.
+       </para>
+
+       <variablelist>
+        <varlistentry>
+         <term>INNER JOIN</term>
+
+         <listitem>
+          <para>
+           For each row R1 of T1, the joined table has a row for each
+           row in T2 that satisfies the join condition with R1.
+          </para>
+         </listitem>
+        </varlistentry>
+
+        <varlistentry>
+         <term>LEFT OUTER JOIN</term>
+
+         <listitem>
+          <para>
+           First, an INNER JOIN is performed.  Then, for a row in T1
+           that does not satisfy the join condition with any row in
+           T2, a joined row is returned with NULL values in columns of
+           T2.  Thus, the joined table unconditionally has a row for each
+           row in T1.
+          </para>
+         </listitem>
+        </varlistentry>
+         
+        <varlistentry>
+         <term>RIGHT OUTER JOIN</term>
+
+         <listitem>
+          <para>
+           This is like a left join, only that the result table will
+           unconditionally have a row for each row in T2.
+          </para>
+         </listitem>
+        </varlistentry>
+         
+        <varlistentry>
+         <term>FULL OUTER JOIN</term>
+
+         <listitem>
+          <para>
+           First, an INNER JOIN is performed.  Then, for each row in
+           T1 that does not satisfy the join condition with any row in
+           T2, a joined row is returned with null values in columns of
+           T2.  Also, for each row of T2 that does not satisfy the
+           join condition with any row in T1, a joined row with null
+           values in the columns of T1 is returned.
+          </para>
+         </listitem>
+        </varlistentry>
+       </variablelist>
+      </listitem>
+     </varlistentry>
+      
+     <varlistentry>
+      <term>NATURAL JOIN</term>
+
+      <listitem>
+<synopsis>
+<replaceable>T1</replaceable> NATURAL { <optional>INNER</optional> | { LEFT | RIGHT | FULL } <optional>OUTER</optional> JOIN <replaceable>T2</replaceable>
+</synopsis>
+       <para>
+        A natural join creates a joined table where every pair of matching
+        column names between the two tables are merged into one column. The
+        join specification is effectively a USING clause containing all the
+        common column names and is otherwise like a Qualified JOIN.
+       </para>
+      </listitem>
+     </varlistentry>
+    </variablelist>
+
+    <para>
+     Joins of all types can be chained together or nested where either
+     or both of <replaceable>T1</replaceable> and
+     <replaceable>T2</replaceable> may be JOINed tables.  Parenthesis
+     can be used around JOIN clauses to control the join order which
+     are otherwise left to right.
+    </para>
+   </sect3>
+
+   <sect3 id="sql-subqueries">
+    <title>Subqueries</title>
+
+    <para>
+     Subqueries specifying a derived table must be enclosed in
+     parenthesis and <emphasis>must</emphasis> be named using an AS
+     clause.  (See <xref linkend="sql-table-aliases">.)
+    </para>
+
+<programlisting>
+FROM (SELECT * FROM table1) AS alias_name
+</programlisting>
+
+    <para>
+     This example is equivalent to <literal>FROM table1 AS
+     alias_name</literal>.  Many subquieries can be written as table
+     joins instead.
+    </para>
+   </sect3>
+
+   <sect3 id="sql-table-aliases">
+    <title>Table and Column Aliases</title>
+
+    <para>
+     A temporary name can be given to tables and complex table
+     references to be used for references to the derived table in
+     further processing.  This is called a <firstterm>table
+     alias</firstterm>.
+<synopsis>
+FROM <replaceable>table_reference</replaceable> AS <replaceable>alias</replaceable>
+</synopsis>
+     Here, <replaceable>alias</replaceable> can be any regular
+     identifier.  The alias becomes the new name of the table
+     reference for the current query -- it is no longer possible to
+     refer to the table by the original name (if the table reference
+     was an ordinary base table).  Thus
+<programlisting>
+SELECT * FROM my_table AS m WHERE my_table.a > 5;
+</programlisting>
+     is not valid SQL syntax.  What will happen instead, as a
+     <productname>Postgres</productname> extension, is that an implict
+     table reference is added to the FROM clause, so the query is
+     processed as if it was written as
+<programlisting>
+SELECT * FROM my_table AS m, my_table WHERE my_table.a > 5;
+</programlisting>
+     Table aliases are mainly for notational convenience, but it is
+     necessary to use them when joining a table to itself, e.g.,
+<programlisting>
+SELECT * FROM my_table AS a CROSS JOIN my_table AS b ...
+</programlisting>
+     Additionally, an alias is required if the table reference is a
+     subquery.
+    </para>
+
+    <para>
+     Parenthesis are used to resolve ambiguities.  The following
+     statement will assign the alias <literal>b</literal> to the
+     result of the join, unlike the previous example:
+<programlisting>
+SELECT * FROM (my_table AS a CROSS JOIN my_table) AS b ...
+</programlisting>
+    </para>
+
+    <para>
+<synopsis>
+FROM <replaceable>table_reference</replaceable> <replaceable>alias</replaceable>
+</synopsis>
+     This form is equivalent the previously treated one; the
+     <token>AS</token> key word is noise.
+    </para>
+
+    <para>
+<synopsis>
+FROM <replaceable>table_reference</replaceable> <optional>AS</optional> <replaceable>alias</replaceable> ( <replaceable>column1</replaceable> <optional>, <replaceable>column2</replaceable> <optional>, ...</optional></optional> )
+</synopsis>
+     In addition to renaming the table as described above, the columns
+     of the table are also given temporary names.  If less column
+     aliases are specified than the actual table has columns, the last
+     columns are not renamed.  This syntax is especially useful for
+     self-joins or subqueries.
+    </para>
+   </sect3>
+
+   <sect3>
+    <title>Examples</title>
+
+    <para>
+<programlisting>
+FROM T1 INNER JOIN T2 USING (C)
+FROM T1 LEFT OUTER JOIN T2 USING (C)
+FROM (T1 RIGHT OUTER JOIN T2 ON (T1C1=T2C1)) AS DT1
+FROM (T1 FULL OUTER JOIN T2 USING (C)) AS DT1 (DT1C1, DT1C2)
+
+FROM T1 NATURAL INNER JOIN T2
+FROM T1 NATURAL LEFT OUTER JOIN T2
+FROM T1 NATURAL RIGHT OUTER JOIN T2
+FROM T1 NATURAL FULL OUTER JOIN T2
+
+FROM (SELECT * FROM T1) DT1 CROSS JOIN T2, T3
+FROM (SELECT * FROM T1) DT1, T2, T3
+</programlisting>
+
+     Above are some examples of joined tables and complex derived
+     tables.  Notice how the AS clause renames or names a derived
+     table and how the optional comma-separated list of column names
+     that follows gives names or renames the columns.  The last two
+     FROM clauses produce the same derived table from T1, T2, and T3.
+     The AS keyword was omitted in naming the subquery as DT1.  The
+     keywords OUTER and INNER are noise that can be omitted also.
+    </para>
+   </sect3>
+
+  </sect2>
+
+  <sect2>
+   <title>WHERE clause</title>
+
+   <para>
+    The syntax of the WHERE clause is
+<synopsis>
+WHERE <replaceable>search condition</replaceable>
+</synopsis>
+    where <replaceable>search condition</replaceable> is any value
+    expression as defined in <xref linkend="sql-expressions"> that
+    returns a value of type <type>boolean</type>.
+   </para>
+
+   <para>
+    After the processing of the FROM clause is done, each row of the
+    derived table is checked against the search condition.  If the
+    result of the condition is true, the row is kept in the output
+    table, otherwise (that is, if the result is false or NULL) it is
+    discared.  The search condition typically references at least some
+    column in the table generated in the FROM clause; this is not
+    required, but otherwise the WHERE clause will be fairly useless.
+   </para>
+
+   <note>
+    <para>
+     Before the implementation of the JOIN syntax, it was necessary to
+     put the join condition of an inner join in the WHERE clause.  For
+     example, these table expressions are equivalent:
+<programlisting>
+FROM a, b WHERE a.id = b.id AND b.val &gt; 5
+</programlisting>
+     and
+<programlisting>
+FROM a INNER JOIN b ON (a.id = b.id) WHERE b.val &gt; 5
+</programlisting>
+     or perhaps even
+<programlisting>
+FROM a NATURAL JOIN b WHERE b.val &gt; 5
+</programlisting>
+     Which one of these you use is mainly a matter of style.  The JOIN
+     syntax in the FROM clause is probably not as portable to other
+     products.  For outer joins there is no choice in any case:  they
+     must be done in the FROM clause.
+    </para>
+   </note>
+
+<programlisting>
+FROM FDT WHERE
+    C1 > 5
+
+FROM FDT WHERE
+    C1 IN (1, 2, 3)
+FROM FDT WHERE
+    C1 IN (SELECT C1 FROM T2)
+FROM FDT WHERE
+    C1 IN (SELECT C3 FROM T2 WHERE C2 = FDT.C1 + 10)
+
+FROM FDT WHERE
+    C1 BETWEEN (SELECT C3 FROM T2 WHERE C2 = FDT.C1 + 10) AND 100
+
+FROM FDT WHERE
+    EXISTS (SELECT C1 FROM T2 WHERE C2 > FDT.C1)
+</programlisting>
+
+   <para>
+    In the examples above, FDT is the table derived in the FROM
+    clause. Rows that do not meet the search condition of the where
+    clause are eliminated from FDT. Notice the use of scalar
+    subqueries as value expressions (C2 assumed UNIQUE).  Just like
+    any other query, the subqueries can employ complex table
+    expressions.  Notice how FDT is referenced in the subqueries.
+    Qualifying C1 as FDT.C1 is only necessary if C1 is the name of a
+    column in the derived input table of the subquery.  Qualifying the
+    column name adds clarity even when it is not needed.  The column
+    naming scope of an outer query extends into its inner queries.
+   </para>
    </sect2>
  
+<!-- This is confusing as heck.  Make it simpler. -->
+
+<![IGNORE[
+
+  <sect2>
+   <title>GROUP BY and HAVING clauses</title>     
+
+   <para>
+    After passing the WHERE filter, the derived input table may be
+    subject to grouping, using the GROUP BY clause, and elimination of
+    group rows using the HAVING clause.  (The HAVING clause can also
+    be used without GROUP BY, but then it is equivalent to the WHERE
+    clause.)
+   </para>
+
+   <para>
+    In standard SQL, the GROUP BY clause takes a list of column names,
+    that specify a subrow, from the derived input table produced by
+    the previous WHERE or FROM clause and partitions the table into
+    groups with duplicate subrows such that within a column of the
+    subrow, no column value is distinct from other column values. The
+    resulting derived input table is a special type of table, called a
+    grouped table, which still contains all columns but only
+    references to columns of the grouped subrow, and group aggregates,
+    derived from any of the columns, may appear in derived column
+    value expressions in the query select list.  When deriving an
+    output table from a query using a grouped input table, each output
+    row is derived from a corresponding group/partition of the grouped
+    table. Aggregates computed in a derived output column are
+    aggregates on the current partition/group of the grouped input
+    table being processed.  Only one output table row results per
+    group/partition of the grouped input table.
+   </para>
+
+   <para>
+    Postgres has extended the GROUP BY clause to allow some
+    non-standard, but useful behavior. Derived output columns, given
+    names using an AS clause in the query select list, may appear in
+    the GROUP BY clause in combination with, or instead of, the input
+    table column names. Tables may also be grouped by arbitrary
+    expressions. If output table column names appear in the GROUP BY
+    list, then the input table is augmented with additional columns of
+    the output table columns listed in the GROUP BY clause. The value
+    for each row in the additional columns is computed from the value
+    expression that defines the output column in the query select
+    list. The augmented input table is grouped by the column names
+    listed in the GROUP BY clause. The resulting grouped augmented
+    input table is then treated according standard SQL GROUP BY
+    semantics. Only the columns of the unaugmented input table in the
+    grouped subrow (if any), and group aggregates, derived from any of
+    the columns of the unaugmented input table, may be referenced in
+    the value expressions of the derived output columns of the
+    query. Output columns derived with an aggregate expression cannot
+    be named in the GROUP BY clause.
+   </para>
+
+   <para>
+    A HAVING clause may optionally follow a GROUP BY clause.  The
+    HAVING clause selects or eliminates, depending on which
+    perspective is taken, groups from the grouped table derived in the
+    GROUP BY clause that precedes it.  The search condition is the
+    same type of expression allowed in a WHERE clause and may
+    reference any of the input table column names in the grouped
+    subrow, but may not reference any others or any named output
+    columns.  When the search condition results in TRUE the group is
+    retained, otherwise the group is eliminated.
+   </para>
+  </sect2>
+
+   <sect2>
+    <title>ORDER BY and LIMIT clauses</title>
+          
+    <para>
+     ORDER BY and LIMIT clauses are not clauses of a table expression.
+     They are optional clauses that may follow a query expression and
+     are discussed here because they are commonly used with the
+     clauses above.
+    </para>
+
+    <para>
+     ORDER BY takes a comma-separated list of columns and performs a
+     cascaded ordering of the table by the columns listed, in the
+     order listed.  The keyword DESC or ASC may follow any column name
+     or expression in the list to specify descending or ascending
+     ordering, respectively. Ascending order is the default.  The
+     ORDER BY clause conforms to the SQL standard but is extended in
+     Postgres.  Postgres allows ORDER BY to reference both output
+     table columns, as named in the select list using the AS clause,
+     and input table columns, as given by the table derived in the
+     FROM clause and other previous clauses. Postgres also extends
+     ORDER BY to allow ordering by arbitrary expressions. If used in a
+     query with a GROUP BY clause, the ORDER BY clause can only
+     reference output table column names and grouped input table
+     columns.
+    </para>
+
+    <para>
+     LIMIT is not a standard SQL clause.  LIMIT is a Postgres
+     extension that limits the number of rows that will be returned
+     from a query.  The rows returned by a query using the LIMIT
+     clause are random if no ORDER BY clause is specified.  A LIMIT
+     clause may optionally be followed by an OFFSET clause which
+     specifies a number of rows to be skipped in the output table
+     before returning the number of rows specified in the LIMIT
+     clause.
+    </para>
+   </sect2>
+]]>
    </sect1>
- </chapter>
+
+</chapter>
  
  <!-- Keep this comment at the end of the file
  Local variables:
author	Peter Eisentraut <peter_e@gmx.net>
	Sun, 21 Jan 2001 22:02:01 +0000 (22:02 +0000)
committer	Peter Eisentraut <peter_e@gmx.net>
	Sun, 21 Jan 2001 22:02:01 +0000 (22:02 +0000)
doc/src/sgml/func.sgml		patch \| blob \| history
doc/src/sgml/syntax.sgml		patch \| blob \| history