New cost model for planning, incorporating a penalty for random page

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 15 Feb 2000 20:49:31 +0000 (20:49 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 15 Feb 2000 20:49:31 +0000 (20:49 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 15 Feb 2000 20:49:31 +0000 (20:49 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 15 Feb 2000 20:49:31 +0000 (20:49 +0000)
diff --git a/doc/src/sgml/libpq++.sgml b/doc/src/sgml/libpq++.sgml

index d259206c88227318029f6fd5405cc4936aa06b3b..65cc873e2803b1a7e1daa8989d5882acca7e8ab5 100644 (file)
--- a/doc/src/sgml/libpq++.sgml
+++ b/doc/src/sgml/libpq++.sgml
@@ -164,24 +164,6 @@
         sets the default mode for the genetic optimizer.
         </para>
        </listitem>
-      <listitem>
-       <para>
-       <envar>PGRPLANS</envar>
-       sets the default mode to allow or disable right-sided plans in the optimizer.
-       </para>
-      </listitem>
-      <listitem>
-       <para>
-       <envar>PGCOSTHEAP</envar>
-       sets the default cost for heap searches for the optimizer.
-       </para>
-      </listitem>
-      <listitem>
-       <para>
-       <envar>PGCOSTINDEX</envar>
-       sets the default cost for indexed searches for the optimizer.
-       </para>
-      </listitem>
       </itemizedlist>
      </para>
         
diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml

index 2e02618c62e955461add106d0e4f1e7f942f63a8..506d98002c586a4559300cfc4517d245c8b8c5f4 100644 (file)
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1900,24 +1900,6 @@ behavior for every Postgres session:
  sets the default mode for the genetic optimizer.
  </para>
  </listitem>
-<listitem>
-<para>
-<envar>PGRPLANS</envar>
-sets the default mode to allow or disable right-sided plans in the optimizer.
-</para>
-</listitem>
-<listitem>
-<para>
-<envar>PGCOSTHEAP</envar>
-sets the default cost for heap searches for the optimizer.
-</para>
-</listitem>
-<listitem>
-<para>
-<envar>PGCOSTINDEX</envar>
-sets the default cost for indexed searches for the optimizer.
-</para>
-</listitem>
  </itemizedlist>
  </para>
  
diff --git a/doc/src/sgml/ref/set.sgml b/doc/src/sgml/ref/set.sgml

index 51177570649c6f300dc5860e43aec18bfa8a137b..2c32c76ff035b3bfc9328fa07a257f537411928a 100644 (file)
--- a/doc/src/sgml/ref/set.sgml
+++ b/doc/src/sgml/ref/set.sgml
@@ -1,5 +1,5 @@
  <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/set.sgml,v 1.28 1999/07/22 15:09:15 thomas Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/set.sgml,v 1.29 2000/02/15 20:49:07 tgl Exp $
  Postgres documentation
  -->
  
@@ -50,7 +50,8 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE }
        <term><replaceable class="PARAMETER">value</replaceable></term>
        <listitem>
         <para>
-       New value of parameter.
+       New value of parameter.  The word <term>DEFAULT</term> can be
+       written to specify resetting the parameter to its default value.
         </para>
        </listitem>
       </varlistentry>
@@ -78,20 +79,12 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE }
            </para>
           </listitem>
          </varlistentry>
-        
-        <varlistentry>
-         <term>DEFAULT</term>
-         <listitem>
-          <para>
-           Sets the multi-byte client encoding to the default value.
-          </para>
-         </listitem>
-        </varlistentry>
         </variablelist>
         </para>
  
         <para>
-       This is only enabled if multi-byte was specified to configure.
+       This option is only available if MULTIBYTE support was enabled
+       during the configure step of building Postgres.
         </para>
        </listitem>
       </varlistentry>
@@ -176,6 +169,9 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE }
         <simplelist>
          <member>
           Setting the <envar>PGDATESTYLE</envar> environment variable.
+         If PGDATESTYLE is set in the frontend environment of a client
+         based on libpq, libpq will automatically set DATESTYLE to the
+         value of PGDATESTYLE during connection startup.
          </member>
          <member>
           Running postmaster using the option <option>-o -e</option> to set
@@ -218,19 +214,12 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE }
            </para>
           </listitem>
          </varlistentry>
-        
-        <varlistentry>
-         <term>DEFAULT</term>
-         <listitem>
-          <para>
-           Sets the multi-byte server encoding.
-          </para>
-         </listitem>
-        </varlistentry>
         </variablelist>
         </para>
+
         <para>
-       This is only enabled if multi-byte was specified to configure.
+       This option is only available if MULTIBYTE support was enabled
+       during the configure step of building Postgres.
         </para>
        </listitem>
       </varlistentry>
@@ -286,16 +275,17 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE }
         If an invalid time zone is specified, the time zone
         becomes GMT (on most systems anyway).
         </para>
-       <para>
-       A frontend which uses libpq may be initialized by setting the PGTZ
-       environment variable.
-       </para>
         <para>
         The second syntax shown above, allows one to set the timezone
         with a syntax similar to SQL92 <command>SET TIME ZONE</command>.
         The LOCAL keyword is just an alternate form
         of DEFAULT for SQL92 compatibility.
         </para>
+       <para>
+        If the PGTZ environment variable is set in the frontend
+       environment of a client based on libpq, libpq will automatically
+       set TIMEZONE to the value of PGTZ during connection startup.
+       </para>
        </listitem>
       </varlistentry>
  
@@ -349,133 +339,381 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE }
      
      <variablelist>
       <varlistentry>
-      <term>COST_HEAP</term>
+      <term>RANDOM_PAGE_COST</term>
        <listitem>
         <para>
-       Sets the default cost of a heap scan for use by the optimizer.
+        Sets the optimizer's estimate of the cost of a nonsequentially
+       fetched disk page.  This is measured as a multiple of the cost
+       of a sequential page fetch.
         
         <variablelist>
          <varlistentry>
-         <term><replaceable class="parameter">float4</replaceable></term>
+         <term><replaceable class="parameter">float8</replaceable></term>
           <listitem>
            <para>
-           Set the cost of a heap scan to the specified floating point value.
+           Set the cost of a random page access
+           to the specified floating-point value.
            </para>
           </listitem>
          </varlistentry>
-        
+       </variablelist>
+       </para>
+      </listitem>
+     </varlistentry>
+    
+     <varlistentry>
+      <term>CPU_TUPLE_COST</term>
+      <listitem>
+       <para>
+        Sets the optimizer's estimate of the cost of processing each
+       tuple during a query.  This is measured as a fraction of the cost
+       of a sequential page fetch.
+       
+       <variablelist>
          <varlistentry>
-         <term>DEFAULT</term>
+         <term><replaceable class="parameter">float8</replaceable></term>
           <listitem>
            <para>
-           Sets the cost of a heap scan to the default value.
+           Set the cost of per-tuple CPU processing
+           to the specified floating-point value.
            </para>
           </listitem>
          </varlistentry>
         </variablelist>
         </para>
+      </listitem>
+     </varlistentry>
+    
+     <varlistentry>
+      <term>CPU_INDEX_TUPLE_COST</term>
+      <listitem>
         <para>
-       The frontend may be initialized by setting the PGCOSTHEAP
-       environment variable.
+        Sets the optimizer's estimate of the cost of processing each
+       index tuple during an index scan.  This is measured as a fraction
+       of the cost of a sequential page fetch.
+       
+       <variablelist>
+        <varlistentry>
+         <term><replaceable class="parameter">float8</replaceable></term>
+         <listitem>
+          <para>
+           Set the cost of per-index-tuple CPU processing
+           to the specified floating-point value.
+          </para>
+         </listitem>
+        </varlistentry>
+       </variablelist>
+       </para>
+      </listitem>
+     </varlistentry>
+    
+     <varlistentry>
+      <term>CPU_OPERATOR_COST</term>
+      <listitem>
+       <para>
+        Sets the optimizer's estimate of the cost of processing each
+       operator in a WHERE clause.  This is measured as a fraction
+       of the cost of a sequential page fetch.
+       
+       <variablelist>
+        <varlistentry>
+         <term><replaceable class="parameter">float8</replaceable></term>
+         <listitem>
+          <para>
+           Set the cost of per-operator CPU processing
+           to the specified floating-point value.
+          </para>
+         </listitem>
+        </varlistentry>
+       </variablelist>
+       </para>
+      </listitem>
+     </varlistentry>
+    
+     <varlistentry>
+      <term>EFFECTIVE_CACHE_SIZE</term>
+      <listitem>
+       <para>
+        Sets the optimizer's assumption about the effective size of the
+       disk cache (that is, the portion of the kernel's disk cache that
+       will be used for Postgres data files).  This is measured in disk
+       pages, which are normally 8Kb apiece.
+       
+       <variablelist>
+        <varlistentry>
+         <term><replaceable class="parameter">float8</replaceable></term>
+         <listitem>
+          <para>
+           Set the assumed cache size
+           to the specified floating-point value.
+          </para>
+         </listitem>
+        </varlistentry>
+       </variablelist>
         </para>
        </listitem>
       </varlistentry>
  
       <varlistentry>
-      <term>COST_INDEX</term>
+      <term>ENABLE_SEQSCAN</term>
        <listitem>
         <para>
-       Sets the default cost of an index scan for use by the optimizer.
+        Enables or disables the planner's use of sequential scan plan types.
+       (It's not possible to suppress sequential scans entirely, but turning
+       this variable OFF discourages the planner from using one if there is
+       any other method available.)
  
-       <variablelist>     
+       <variablelist>
          <varlistentry>
-         <term><replaceable class="parameter">float4</replaceable></term>
+         <term>ON</term>
           <listitem>
            <para>
-           Set the cost of an index scan to the specified floating point value.
+           enables use of sequential scans (default setting).
            </para>
           </listitem>
          </varlistentry>
-        
+       
          <varlistentry>
-         <term>DEFAULT</term>
+         <term>OFF</term>
           <listitem>
            <para>
-           Sets the cost of an index scan to the default value.
+           disables use of sequential scans.
            </para>
           </listitem>
          </varlistentry>
         </variablelist>
         </para>
+      </listitem>
+     </varlistentry>
  
+     <varlistentry>
+      <term>ENABLE_INDEXSCAN</term>
+      <listitem>
         <para>
-       The frontend may be initialized by setting the PGCOSTINDEX
-       environment variable.
+        Enables or disables the planner's use of index scan plan types.
+
+       <variablelist>
+        <varlistentry>
+         <term>ON</term>
+         <listitem>
+          <para>
+           enables use of index scans (default setting).
+          </para>
+         </listitem>
+        </varlistentry>
+       
+        <varlistentry>
+         <term>OFF</term>
+         <listitem>
+          <para>
+           disables use of index scans.
+          </para>
+         </listitem>
+        </varlistentry>
+       </variablelist>
         </para>
        </listitem>
       </varlistentry>
  
       <varlistentry>
-      <term>GEQO</term>
+      <term>ENABLE_TIDSCAN</term>
        <listitem>
         <para>
-       Sets the threshold for using the genetic optimizer algorithm.
+        Enables or disables the planner's use of TID scan plan types.
  
         <variablelist>
          <varlistentry>
           <term>ON</term>
           <listitem>
            <para>
-           enables the genetic optimizer algorithm
-           for statements with 6 or more tables.
+           enables use of TID scans (default setting).
            </para>
           </listitem>
          </varlistentry>
+       
+        <varlistentry>
+         <term>OFF</term>
+         <listitem>
+          <para>
+           disables use of TID scans.
+          </para>
+         </listitem>
+        </varlistentry>
+       </variablelist>
+       </para>
+      </listitem>
+     </varlistentry>
  
+     <varlistentry>
+      <term>ENABLE_SORT</term>
+      <listitem>
+       <para>
+        Enables or disables the planner's use of explicit sort steps.
+       (It's not possible to suppress explicit sorts entirely, but turning
+       this variable OFF discourages the planner from using one if there is
+       any other method available.)
+
+       <variablelist>
          <varlistentry>
-         <term>ON=<replaceable class="parameter">#</replaceable></term>
+         <term>ON</term>
           <listitem>
            <para>
-           Takes an integer argument to enable the genetic optimizer algorithm
-           for statements with <replaceable class="parameter">#</replaceable>
-           or more tables in the query.
+           enables use of sorts (default setting).
            </para>
           </listitem>
          </varlistentry>
+       
+        <varlistentry>
+         <term>OFF</term>
+         <listitem>
+          <para>
+           disables use of sorts.
+          </para>
+         </listitem>
+        </varlistentry>
+       </variablelist>
+       </para>
+      </listitem>
+     </varlistentry>
  
+     <varlistentry>
+      <term>ENABLE_NESTLOOP</term>
+      <listitem>
+       <para>
+        Enables or disables the planner's use of nested-loop join plans.
+       (It's not possible to suppress nested-loop joins entirely, but turning
+       this variable OFF discourages the planner from using one if there is
+       any other method available.)
+
+       <variablelist>
+        <varlistentry>
+         <term>ON</term>
+         <listitem>
+          <para>
+           enables use of nested-loop joins (default setting).
+          </para>
+         </listitem>
+        </varlistentry>
+       
          <varlistentry>
           <term>OFF</term>
           <listitem>
            <para>
-           disables the genetic optimizer algorithm.
+           disables use of nested-loop joins.
+          </para>
+         </listitem>
+        </varlistentry>
+       </variablelist>
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term>ENABLE_MERGEJOIN</term>
+      <listitem>
+       <para>
+        Enables or disables the planner's use of mergejoin plans.
+
+       <variablelist>
+        <varlistentry>
+         <term>ON</term>
+         <listitem>
+          <para>
+           enables use of merge joins (default setting).
            </para>
           </listitem>
          </varlistentry>
+       
          <varlistentry>
-         <term>DEFAULT</term>
+         <term>OFF</term>
+         <listitem>
+          <para>
+           disables use of merge joins.
+          </para>
+         </listitem>
+        </varlistentry>
+       </variablelist>
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term>ENABLE_HASHJOIN</term>
+      <listitem>
+       <para>
+        Enables or disables the planner's use of hashjoin plans.
+
+       <variablelist>
+        <varlistentry>
+         <term>ON</term>
           <listitem>
            <para>
-           Equivalent to specifying <command>SET GEQO='ON'</command>
+           enables use of hash joins (default setting).
+          </para>
+         </listitem>
+        </varlistentry>
+       
+        <varlistentry>
+         <term>OFF</term>
+         <listitem>
+          <para>
+           disables use of hash joins.
            </para>
           </listitem>
          </varlistentry>
         </variablelist>
         </para>
+      </listitem>
+     </varlistentry>
  
+     <varlistentry>
+      <term>GEQO</term>
+      <listitem>
         <para>
-       This algorithm is on by default, which used GEQO for
-       statements of eleven or more tables.
-       (See the chapter on GEQO in the Programmer's Guide
-       for more information).
+       Sets the threshold for using the genetic optimizer algorithm.
+
+       <variablelist>
+        <varlistentry>
+         <term>ON</term>
+         <listitem>
+          <para>
+           enables the genetic optimizer algorithm
+           for statements with 11 or more tables.
+           (This is also the DEFAULT setting.)
+          </para>
+         </listitem>
+        </varlistentry>
+
+        <varlistentry>
+         <term>ON=<replaceable class="parameter">#</replaceable></term>
+         <listitem>
+          <para>
+           Takes an integer argument to enable the genetic optimizer algorithm
+           for statements with <replaceable class="parameter">#</replaceable>
+           or more tables in the query.
+          </para>
+         </listitem>
+        </varlistentry>
+
+        <varlistentry>
+         <term>OFF</term>
+         <listitem>
+          <para>
+           disables the genetic optimizer algorithm.
+          </para>
+         </listitem>
+        </varlistentry>
+       </variablelist>
         </para>
+
         <para>
-       The frontend may be initialized by setting PGGEQO
-       environment variable.
+       See the chapter on GEQO in the Programmer's Guide
+       for more information about query optimization.
         </para>
         <para>
-       It may be useful when joining big relations with
-       small ones. This algorithm is off by default.
-       It's not used by GEQO anyway.
+        If the PGGEQO environment variable is set in the frontend
+       environment of a client based on libpq, libpq will automatically
+       set GEQO to the value of PGGEQO during connection startup.
         </para>
        </listitem>
       </varlistentry>
@@ -484,10 +722,16 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE }
        <term>KSQO</term>
        <listitem>
         <para>
-       <firstterm>Key Set Query Optimizer</firstterm> forces the query optimizer
-       to optimize repetative OR clauses such as generated by
-       <productname>MicroSoft Access</productname>:
-       
+       <firstterm>Key Set Query Optimizer</firstterm> causes the query
+       planner to convert queries whose WHERE clause contains many
+       OR'ed AND clauses (such as "WHERE (a=1 AND b=2) OR (a=2 AND b=3) ...")
+       into a UNION query.  This method can be faster than the default
+       implementation, but it doesn't necessarily give exactly the same
+       results, since UNION implicitly adds a SELECT DISTINCT clause to
+       eliminate identical output rows.  KSQO is commonly used when
+       working with products like <productname>MicroSoft
+       Access</productname>, which tend to generate queries of this form.
+
         <variablelist>
          <varlistentry>
           <term>ON</term>
@@ -502,7 +746,7 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE }
           <term>OFF</term>
           <listitem>
            <para>
-           disables this optimization.
+           disables this optimization (default setting).
            </para>
           </listitem>
          </varlistentry>
@@ -519,13 +763,9 @@ SET TRANSACTION ISOLATION LEVEL { READ COMMITTED | SERIALIZABLE }
         </para>
  
         <para>
-       It may be useful when joining big relations with
-       small ones. This algorithm is off by default.
-       It's not used by GEQO anyway.
-       </para>
-       <para>
-       The frontend may be initialized by setting the PGKSQO
-       environment variable.
+        The KSQO algorithm used to be absolutely essential for queries
+       with many OR'ed AND clauses, but in Postgres 7.0 and later
+       the standard planner handles these queries fairly successfully.
         </para>
        </listitem>
       </varlistentry>
diff --git a/doc/src/sgml/ref/show.sgml b/doc/src/sgml/ref/show.sgml

index 14b43d823447eeba6376b0d60e8601313e77982e..39fbde16ae0c3434ca5b46327e15493cdff8a38d 100644 (file)
--- a/doc/src/sgml/ref/show.sgml
+++ b/doc/src/sgml/ref/show.sgml
@@ -1,5 +1,5 @@
  <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/show.sgml,v 1.7 1999/07/22 15:09:15 thomas Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/show.sgml,v 1.8 2000/02/15 20:49:07 tgl Exp $
  Postgres documentation
  -->
  
@@ -106,14 +106,14 @@ SHOW VARIABLE
     Description
    </title>
    <para>
-   <command>SHOW</command> will display the current
-   configuration parameters for
-   variable during a session.
+   <command>SHOW</command> will display the current setting of a
+   run-time parameter during a session.
    </para>
    <para>
-   The session can be configured using <command>SET</command> statement,
-   and values
-   can be restored to the defaults using <command>RESET</command> statement.
+   These variables can be set using the <command>SET</command> statement,
+   and
+   can be restored to the default values using the <command>RESET</command>
+   statement.
     Parameters and values are case-insensitive.
    </para>
  
@@ -125,13 +125,12 @@ SHOW VARIABLE
      Notes
     </title>
     <para>
-    The <command>SHOW</command> is a <productname>Postgres</productname>
+    <command>SHOW</command> is a <productname>Postgres</productname>
      language extension.
     </para>
     <para>
      Refer to  <command>SET</command>/<command>RESET</command> 
      to set/reset variable values.
-    See also  <command>SET TIME ZONE</command>.
     </para>
    </refsect2>
   </refsect1>
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c

index 2b152b2fe5b0e7df8c90563035a1486ef343023e..2a38a349d6073999b40bfe939939bdb23ec619a9 100644 (file)
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -5,7 +5,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994-5, Regents of the University of California
   *
- *       $Id: explain.c,v 1.53 2000/02/15 03:36:39 thomas Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/explain.c,v 1.54 2000/02/15 20:49:08 tgl Exp $
   *
   */
  
@@ -217,39 +217,24 @@ explain_outNode(StringInfo str, Plan *plan, int indent, ExplainState *es)
                         {
                                 relation = RelationIdGetRelation(lfirsti(l));
                                 Assert(relation);
-                               if (++i > 1)
-                                       appendStringInfo(str, ", ");
-                               appendStringInfo(str,
+                               appendStringInfo(str, "%s%s",
+                                                                (++i > 1) ? ", " : "",
                                                                  stringStringInfo(RelationGetRelationName(relation)));
                                 /* drop relcache refcount from RelationIdGetRelation */
                                 RelationDecrementReferenceCount(relation);
                         }
+                       /* FALL THRU */
                 case T_SeqScan:
+               case T_TidScan:
                         if (((Scan *) plan)->scanrelid > 0)
                         {
                                 RangeTblEntry *rte = nth(((Scan *) plan)->scanrelid - 1, es->rtable);
  
-                               appendStringInfo(str, " on ");
-                               if (strcmp(rte->ref->relname, rte->relname) != 0)
-                               {
-                                       appendStringInfo(str, "%s ",
-                                                                        stringStringInfo(rte->relname));
-                               }
-                               appendStringInfo(str, stringStringInfo(rte->ref->relname));
-                       }
-                       break;
-               case T_TidScan:
-                       if (((TidScan *) plan)->scan.scanrelid > 0)
-                       {
-                               RangeTblEntry *rte = nth(((TidScan *) plan)->scan.scanrelid - 1, es->rtable);
-
-                               appendStringInfo(str, " on ");
-                               if (strcmp(rte->ref->relname, rte->relname) != 0)
-                               {
-                                       appendStringInfo(str, "%s ",
-                                                                        stringStringInfo(rte->relname));
-                               }
-                               appendStringInfo(str, stringStringInfo(rte->ref->relname));
+                               appendStringInfo(str, " on %s",
+                                                                stringStringInfo(rte->relname));
+                               if (rte->ref && strcmp(rte->ref->relname, rte->relname) != 0)
+                                       appendStringInfo(str, " %s",
+                                                                        stringStringInfo(rte->ref->relname));
                         }
                         break;
                 default:
@@ -257,8 +242,9 @@ explain_outNode(StringInfo str, Plan *plan, int indent, ExplainState *es)
         }
         if (es->printCost)
         {
-               appendStringInfo(str, "  (cost=%.2f rows=%.0f width=%d)",
-                                                plan->cost, plan->plan_rows, plan->plan_width);
+               appendStringInfo(str, "  (cost=%.2f..%.2f rows=%.0f width=%d)",
+                                                plan->startup_cost, plan->total_cost,
+                                                plan->plan_rows, plan->plan_width);
         }
         appendStringInfo(str, "\n");
  
diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c

index 52c4ed3552a1149254673abd6dea97f05c1d26aa..718a62a118d98d79413ceb1fc5acfb4e889f1f82 100644 (file)
--- a/src/backend/commands/variable.c
+++ b/src/backend/commands/variable.c
@@ -1,15 +1,24 @@
-/*
- * Routines for handling of 'SET var TO',
- *     'SHOW var' and 'RESET var' statements.
+/*-------------------------------------------------------------------------
+ *
+ * variable.c
+ *             Routines for handling of 'SET var TO',
+ *             'SHOW var' and 'RESET var' statements.
+ *
+ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
   *
- * $Id: variable.c,v 1.28 2000/01/22 23:50:10 tgl Exp $
+ * IDENTIFICATION
+ *       $Header: /cvsroot/pgsql/src/backend/commands/variable.c,v 1.29 2000/02/15 20:49:08 tgl Exp $
   *
+ *-------------------------------------------------------------------------
   */
  
  #include <ctype.h>
  #include <time.h>
  
  #include "postgres.h"
+
  #include "access/xact.h"
  #include "catalog/pg_shadow.h"
  #include "commands/variable.h"
@@ -24,18 +33,53 @@
  #include "mb/pg_wchar.h"
  #endif
  
+
+/* XXX should be in a header file */
+extern bool _use_keyset_query_optimizer;
+
+
  static bool show_date(void);
  static bool reset_date(void);
  static bool parse_date(const char *);
  static bool show_timezone(void);
  static bool reset_timezone(void);
  static bool parse_timezone(const char *);
-static bool show_cost_heap(void);
-static bool reset_cost_heap(void);
-static bool parse_cost_heap(const char *);
-static bool show_cost_index(void);
-static bool reset_cost_index(void);
-static bool parse_cost_index(const char *);
+static bool show_effective_cache_size(void);
+static bool reset_effective_cache_size(void);
+static bool parse_effective_cache_size(const char *);
+static bool show_random_page_cost(void);
+static bool reset_random_page_cost(void);
+static bool parse_random_page_cost(const char *);
+static bool show_cpu_tuple_cost(void);
+static bool reset_cpu_tuple_cost(void);
+static bool parse_cpu_tuple_cost(const char *);
+static bool show_cpu_index_tuple_cost(void);
+static bool reset_cpu_index_tuple_cost(void);
+static bool parse_cpu_index_tuple_cost(const char *);
+static bool show_cpu_operator_cost(void);
+static bool reset_cpu_operator_cost(void);
+static bool parse_cpu_operator_cost(const char *);
+static bool reset_enable_seqscan(void);
+static bool show_enable_seqscan(void);
+static bool parse_enable_seqscan(const char *);
+static bool reset_enable_indexscan(void);
+static bool show_enable_indexscan(void);
+static bool parse_enable_indexscan(const char *);
+static bool reset_enable_tidscan(void);
+static bool show_enable_tidscan(void);
+static bool parse_enable_tidscan(const char *);
+static bool reset_enable_sort(void);
+static bool show_enable_sort(void);
+static bool parse_enable_sort(const char *);
+static bool reset_enable_nestloop(void);
+static bool show_enable_nestloop(void);
+static bool parse_enable_nestloop(const char *);
+static bool reset_enable_mergejoin(void);
+static bool show_enable_mergejoin(void);
+static bool parse_enable_mergejoin(const char *);
+static bool reset_enable_hashjoin(void);
+static bool show_enable_hashjoin(void);
+static bool parse_enable_hashjoin(const char *);
  static bool reset_geqo(void);
  static bool show_geqo(void);
  static bool parse_geqo(const char *);
@@ -46,8 +90,6 @@ static bool show_XactIsoLevel(void);
  static bool reset_XactIsoLevel(void);
  static bool parse_XactIsoLevel(const char *);
  
-extern bool _use_keyset_query_optimizer;
-
  /*
   *
   * Get_Token
@@ -153,6 +195,204 @@ get_token(char **tok, char **val, const char *str)
         return str;
  }
  
+/*
+ * Generic parse routine for boolean ON/OFF variables
+ */
+static bool
+parse_boolean_var(const char *value,
+                                 bool *variable, const char *varname, bool defaultval)
+{
+       if (value == NULL)
+       {
+               *variable = defaultval;
+               return TRUE;
+       }
+
+       if (strcasecmp(value, "on") == 0)
+               *variable = true;
+       else if (strcasecmp(value, "off") == 0)
+               *variable = false;
+       else
+               elog(ERROR, "Bad value for %s (%s)", varname, value);
+
+       return TRUE;
+}
+
+/*
+ * ENABLE_SEQSCAN
+ */
+static bool
+parse_enable_seqscan(const char *value)
+{
+       return parse_boolean_var(value, &enable_seqscan,
+                                                        "ENABLE_SEQSCAN", true);
+}
+
+static bool
+show_enable_seqscan()
+{
+       elog(NOTICE, "ENABLE_SEQSCAN is %s",
+                enable_seqscan ? "ON" : "OFF");
+       return TRUE;
+}
+
+static bool
+reset_enable_seqscan()
+{
+       enable_seqscan = true;
+       return TRUE;
+}
+
+/*
+ * ENABLE_INDEXSCAN
+ */
+static bool
+parse_enable_indexscan(const char *value)
+{
+       return parse_boolean_var(value, &enable_indexscan,
+                                                        "ENABLE_INDEXSCAN", true);
+}
+
+static bool
+show_enable_indexscan()
+{
+       elog(NOTICE, "ENABLE_INDEXSCAN is %s",
+                enable_indexscan ? "ON" : "OFF");
+       return TRUE;
+}
+
+static bool
+reset_enable_indexscan()
+{
+       enable_indexscan = true;
+       return TRUE;
+}
+
+/*
+ * ENABLE_TIDSCAN
+ */
+static bool
+parse_enable_tidscan(const char *value)
+{
+       return parse_boolean_var(value, &enable_tidscan,
+                                                        "ENABLE_TIDSCAN", true);
+}
+
+static bool
+show_enable_tidscan()
+{
+       elog(NOTICE, "ENABLE_TIDSCAN is %s",
+                enable_tidscan ? "ON" : "OFF");
+       return TRUE;
+}
+
+static bool
+reset_enable_tidscan()
+{
+       enable_tidscan = true;
+       return TRUE;
+}
+
+/*
+ * ENABLE_SORT
+ */
+static bool
+parse_enable_sort(const char *value)
+{
+       return parse_boolean_var(value, &enable_sort,
+                                                        "ENABLE_SORT", true);
+}
+
+static bool
+show_enable_sort()
+{
+       elog(NOTICE, "ENABLE_SORT is %s",
+                enable_sort ? "ON" : "OFF");
+       return TRUE;
+}
+
+static bool
+reset_enable_sort()
+{
+       enable_sort = true;
+       return TRUE;
+}
+
+/*
+ * ENABLE_NESTLOOP
+ */
+static bool
+parse_enable_nestloop(const char *value)
+{
+       return parse_boolean_var(value, &enable_nestloop,
+                                                        "ENABLE_NESTLOOP", true);
+}
+
+static bool
+show_enable_nestloop()
+{
+       elog(NOTICE, "ENABLE_NESTLOOP is %s",
+                enable_nestloop ? "ON" : "OFF");
+       return TRUE;
+}
+
+static bool
+reset_enable_nestloop()
+{
+       enable_nestloop = true;
+       return TRUE;
+}
+
+/*
+ * ENABLE_MERGEJOIN
+ */
+static bool
+parse_enable_mergejoin(const char *value)
+{
+       return parse_boolean_var(value, &enable_mergejoin,
+                                                        "ENABLE_MERGEJOIN", true);
+}
+
+static bool
+show_enable_mergejoin()
+{
+       elog(NOTICE, "ENABLE_MERGEJOIN is %s",
+                enable_mergejoin ? "ON" : "OFF");
+       return TRUE;
+}
+
+static bool
+reset_enable_mergejoin()
+{
+       enable_mergejoin = true;
+       return TRUE;
+}
+
+/*
+ * ENABLE_HASHJOIN
+ */
+static bool
+parse_enable_hashjoin(const char *value)
+{
+       return parse_boolean_var(value, &enable_hashjoin,
+                                                        "ENABLE_HASHJOIN", true);
+}
+
+static bool
+show_enable_hashjoin()
+{
+       elog(NOTICE, "ENABLE_HASHJOIN is %s",
+                enable_hashjoin ? "ON" : "OFF");
+       return TRUE;
+}
+
+static bool
+reset_enable_hashjoin()
+{
+       enable_hashjoin = true;
+       return TRUE;
+}
+
  /*
   *
   * GEQO
@@ -208,7 +448,6 @@ parse_geqo(const char *value)
  static bool
  show_geqo()
  {
-
         if (enable_geqo)
                 elog(NOTICE, "GEQO is ON beginning with %d relations", geqo_rels);
         else
@@ -219,7 +458,6 @@ show_geqo()
  static bool
  reset_geqo(void)
  {
-
  #ifdef GEQO
         enable_geqo = true;
  #else
@@ -230,76 +468,173 @@ reset_geqo(void)
  }
  
  /*
- *
- * COST_HEAP
- *
+ * EFFECTIVE_CACHE_SIZE
   */
  static bool
-parse_cost_heap(const char *value)
+parse_effective_cache_size(const char *value)
  {
         float64         res;
  
         if (value == NULL)
         {
-               reset_cost_heap();
+               reset_effective_cache_size();
                 return TRUE;
         }
  
         res = float8in((char *) value);
-       cpu_page_weight = *res;
+       effective_cache_size = *res;
  
         return TRUE;
  }
  
  static bool
-show_cost_heap()
+show_effective_cache_size()
  {
+       elog(NOTICE, "EFFECTIVE_CACHE_SIZE is %g (%dK pages)",
+                effective_cache_size, BLCKSZ/1024);
+       return TRUE;
+}
  
-       elog(NOTICE, "COST_HEAP is %f", cpu_page_weight);
+static bool
+reset_effective_cache_size()
+{
+       effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
         return TRUE;
  }
  
+/*
+ * RANDOM_PAGE_COST
+ */
  static bool
-reset_cost_heap()
+parse_random_page_cost(const char *value)
  {
-       cpu_page_weight = CPU_PAGE_WEIGHT;
+       float64         res;
+
+       if (value == NULL)
+       {
+               reset_random_page_cost();
+               return TRUE;
+       }
+
+       res = float8in((char *) value);
+       random_page_cost = *res;
+
+       return TRUE;
+}
+
+static bool
+show_random_page_cost()
+{
+       elog(NOTICE, "RANDOM_PAGE_COST is %g", random_page_cost);
+       return TRUE;
+}
+
+static bool
+reset_random_page_cost()
+{
+       random_page_cost = DEFAULT_RANDOM_PAGE_COST;
         return TRUE;
  }
  
  /*
- *
- * COST_INDEX
- *
+ * CPU_TUPLE_COST
   */
  static bool
-parse_cost_index(const char *value)
+parse_cpu_tuple_cost(const char *value)
  {
         float64         res;
  
         if (value == NULL)
         {
-               reset_cost_index();
+               reset_cpu_tuple_cost();
                 return TRUE;
         }
  
         res = float8in((char *) value);
-       cpu_index_page_weight = *res;
+       cpu_tuple_cost = *res;
  
         return TRUE;
  }
  
  static bool
-show_cost_index()
+show_cpu_tuple_cost()
  {
+       elog(NOTICE, "CPU_TUPLE_COST is %g", cpu_tuple_cost);
+       return TRUE;
+}
  
-       elog(NOTICE, "COST_INDEX is %f", cpu_index_page_weight);
+static bool
+reset_cpu_tuple_cost()
+{
+       cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
         return TRUE;
  }
  
+/*
+ * CPU_INDEX_TUPLE_COST
+ */
  static bool
-reset_cost_index()
+parse_cpu_index_tuple_cost(const char *value)
  {
-       cpu_index_page_weight = CPU_INDEX_PAGE_WEIGHT;
+       float64         res;
+
+       if (value == NULL)
+       {
+               reset_cpu_index_tuple_cost();
+               return TRUE;
+       }
+
+       res = float8in((char *) value);
+       cpu_index_tuple_cost = *res;
+
+       return TRUE;
+}
+
+static bool
+show_cpu_index_tuple_cost()
+{
+       elog(NOTICE, "CPU_INDEX_TUPLE_COST is %g", cpu_index_tuple_cost);
+       return TRUE;
+}
+
+static bool
+reset_cpu_index_tuple_cost()
+{
+       cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
+       return TRUE;
+}
+
+/*
+ * CPU_OPERATOR_COST
+ */
+static bool
+parse_cpu_operator_cost(const char *value)
+{
+       float64         res;
+
+       if (value == NULL)
+       {
+               reset_cpu_operator_cost();
+               return TRUE;
+       }
+
+       res = float8in((char *) value);
+       cpu_operator_cost = *res;
+
+       return TRUE;
+}
+
+static bool
+show_cpu_operator_cost()
+{
+       elog(NOTICE, "CPU_OPERATOR_COST is %g", cpu_operator_cost);
+       return TRUE;
+}
+
+static bool
+reset_cpu_operator_cost()
+{
+       cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
         return TRUE;
  }
  
@@ -527,6 +862,89 @@ reset_timezone()
         return TRUE;
  }      /* reset_timezone() */
  
+/*-----------------------------------------------------------------------
+KSQO code will one day be unnecessary when the optimizer makes use of
+indexes when multiple ORs are specified in the where clause.
+See optimizer/prep/prepkeyset.c for more on this.
+       daveh@insightdist.com    6/16/98
+-----------------------------------------------------------------------*/
+static bool
+parse_ksqo(const char *value)
+{
+       return parse_boolean_var(value, &_use_keyset_query_optimizer,
+                                                        "KSQO", false);
+}
+
+static bool
+show_ksqo()
+{
+       elog(NOTICE, "KSQO is %s",
+                _use_keyset_query_optimizer ? "ON" : "OFF");
+       return TRUE;
+}
+
+static bool
+reset_ksqo()
+{
+       _use_keyset_query_optimizer = false;
+       return TRUE;
+}
+
+/* SET TRANSACTION */
+
+static bool
+parse_XactIsoLevel(const char *value)
+{
+
+       if (value == NULL)
+       {
+               reset_XactIsoLevel();
+               return TRUE;
+       }
+
+       if (SerializableSnapshot != NULL)
+       {
+               elog(ERROR, "SET TRANSACTION ISOLATION LEVEL must be called before any query");
+               return TRUE;
+       }
+
+
+       if (strcasecmp(value, "SERIALIZABLE") == 0)
+               XactIsoLevel = XACT_SERIALIZABLE;
+       else if (strcasecmp(value, "COMMITTED") == 0)
+               XactIsoLevel = XACT_READ_COMMITTED;
+       else
+               elog(ERROR, "Bad TRANSACTION ISOLATION LEVEL (%s)", value);
+
+       return TRUE;
+}
+
+static bool
+show_XactIsoLevel()
+{
+
+       if (XactIsoLevel == XACT_SERIALIZABLE)
+               elog(NOTICE, "TRANSACTION ISOLATION LEVEL is SERIALIZABLE");
+       else
+               elog(NOTICE, "TRANSACTION ISOLATION LEVEL is READ COMMITTED");
+       return TRUE;
+}
+
+static bool
+reset_XactIsoLevel()
+{
+
+       if (SerializableSnapshot != NULL)
+       {
+               elog(ERROR, "SET TRANSACTION ISOLATION LEVEL must be called before any query");
+               return TRUE;
+       }
+
+       XactIsoLevel = DefaultXactIsoLevel;
+
+       return TRUE;
+}
+
  /*
   * Pg_options
   */
@@ -557,6 +975,7 @@ reset_pg_options(void)
         return (TRUE);
  }
  
+
  /*-----------------------------------------------------------------------*/
  
  struct VariableParsers
@@ -575,10 +994,52 @@ struct VariableParsers
                 "timezone", parse_timezone, show_timezone, reset_timezone
         },
         {
-               "cost_heap", parse_cost_heap, show_cost_heap, reset_cost_heap
+               "effective_cache_size", parse_effective_cache_size,
+               show_effective_cache_size, reset_effective_cache_size
+       },
+       {
+               "random_page_cost", parse_random_page_cost,
+               show_random_page_cost, reset_random_page_cost
+       },
+       {
+               "cpu_tuple_cost", parse_cpu_tuple_cost,
+               show_cpu_tuple_cost, reset_cpu_tuple_cost
+       },
+       {
+               "cpu_index_tuple_cost", parse_cpu_index_tuple_cost,
+               show_cpu_index_tuple_cost, reset_cpu_index_tuple_cost
+       },
+       {
+               "cpu_operator_cost", parse_cpu_operator_cost,
+               show_cpu_operator_cost, reset_cpu_operator_cost
+       },
+       {
+               "enable_seqscan", parse_enable_seqscan,
+               show_enable_seqscan, reset_enable_seqscan
+       },
+       {
+               "enable_indexscan", parse_enable_indexscan,
+               show_enable_indexscan, reset_enable_indexscan
+       },
+       {
+               "enable_tidscan", parse_enable_tidscan,
+               show_enable_tidscan, reset_enable_tidscan
+       },
+       {
+               "enable_sort", parse_enable_sort,
+               show_enable_sort, reset_enable_sort
         },
         {
-               "cost_index", parse_cost_index, show_cost_index, reset_cost_index
+               "enable_nestloop", parse_enable_nestloop,
+               show_enable_nestloop, reset_enable_nestloop
+       },
+       {
+               "enable_mergejoin", parse_enable_mergejoin,
+               show_enable_mergejoin, reset_enable_mergejoin
+       },
+       {
+               "enable_hashjoin", parse_enable_hashjoin,
+               show_enable_hashjoin, reset_enable_hashjoin
         },
         {
                 "geqo", parse_geqo, show_geqo, reset_geqo
@@ -655,102 +1116,3 @@ ResetPGVariable(const char *name)
  
         return TRUE;
  }
-
-
-/*-----------------------------------------------------------------------
-KSQO code will one day be unnecessary when the optimizer makes use of
-indexes when multiple ORs are specified in the where clause.
-See optimizer/prep/prepkeyset.c for more on this.
-       daveh@insightdist.com    6/16/98
------------------------------------------------------------------------*/
-static bool
-parse_ksqo(const char *value)
-{
-       if (value == NULL)
-       {
-               reset_ksqo();
-               return TRUE;
-       }
-
-       if (strcasecmp(value, "on") == 0)
-               _use_keyset_query_optimizer = true;
-       else if (strcasecmp(value, "off") == 0)
-               _use_keyset_query_optimizer = false;
-       else
-               elog(ERROR, "Bad value for Key Set Query Optimizer (%s)", value);
-
-       return TRUE;
-}
-
-static bool
-show_ksqo()
-{
-
-       if (_use_keyset_query_optimizer)
-               elog(NOTICE, "Key Set Query Optimizer is ON");
-       else
-               elog(NOTICE, "Key Set Query Optimizer is OFF");
-       return TRUE;
-}
-
-static bool
-reset_ksqo()
-{
-       _use_keyset_query_optimizer = false;
-       return TRUE;
-}
-
-/* SET TRANSACTION */
-
-static bool
-parse_XactIsoLevel(const char *value)
-{
-
-       if (value == NULL)
-       {
-               reset_XactIsoLevel();
-               return TRUE;
-       }
-
-       if (SerializableSnapshot != NULL)
-       {
-               elog(ERROR, "SET TRANSACTION ISOLATION LEVEL must be called before any query");
-               return TRUE;
-       }
-
-
-       if (strcasecmp(value, "SERIALIZABLE") == 0)
-               XactIsoLevel = XACT_SERIALIZABLE;
-       else if (strcasecmp(value, "COMMITTED") == 0)
-               XactIsoLevel = XACT_READ_COMMITTED;
-       else
-               elog(ERROR, "Bad TRANSACTION ISOLATION LEVEL (%s)", value);
-
-       return TRUE;
-}
-
-static bool
-show_XactIsoLevel()
-{
-
-       if (XactIsoLevel == XACT_SERIALIZABLE)
-               elog(NOTICE, "TRANSACTION ISOLATION LEVEL is SERIALIZABLE");
-       else
-               elog(NOTICE, "TRANSACTION ISOLATION LEVEL is READ COMMITTED");
-       return TRUE;
-}
-
-static bool
-reset_XactIsoLevel()
-{
-
-       if (SerializableSnapshot != NULL)
-       {
-               elog(ERROR, "SET TRANSACTION ISOLATION LEVEL must be called before any query");
-               return TRUE;
-       }
-
-       XactIsoLevel = DefaultXactIsoLevel;
-
-       return TRUE;
-}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index adf0c7f1987c2ffe8dd28542ead1f778336c60f0..5bf01e227228c4d55e9f93a0de964054d06b4c58 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.105 2000/02/15 03:37:08 thomas Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.106 2000/02/15 20:49:09 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -75,7 +75,8 @@ listCopy(List *list)
  static void
  CopyPlanFields(Plan *from, Plan *newnode)
  {
-       newnode->cost = from->cost;
+       newnode->startup_cost = from->startup_cost;
+       newnode->total_cost = from->total_cost;
         newnode->plan_rows = from->plan_rows;
         newnode->plan_width = from->plan_width;
         /* state is NOT copied */
@@ -981,8 +982,9 @@ _copyRelOptInfo(RelOptInfo *from)
  
         Node_Copy(from, newnode, targetlist);
         Node_Copy(from, newnode, pathlist);
-       /* XXX cheapestpath should point to a member of pathlist? */
-       Node_Copy(from, newnode, cheapestpath);
+       /* XXX cheapest-path fields should point to members of pathlist? */
+       Node_Copy(from, newnode, cheapest_startup_path);
+       Node_Copy(from, newnode, cheapest_total_path);
         newnode->pruneable = from->pruneable;
  
         newnode->indexed = from->indexed;
@@ -990,6 +992,7 @@ _copyRelOptInfo(RelOptInfo *from)
         newnode->tuples = from->tuples;
  
         Node_Copy(from, newnode, baserestrictinfo);
+       newnode->baserestrictcost = from->baserestrictcost;
         Node_Copy(from, newnode, joininfo);
         Node_Copy(from, newnode, innerjoin);
  
@@ -1045,6 +1048,7 @@ _copyIndexOptInfo(IndexOptInfo *from)
         newnode->amcostestimate = from->amcostestimate;
         newnode->indproc = from->indproc;
         Node_Copy(from, newnode, indpred);
+       newnode->lossy = from->lossy;
  
         return newnode;
  }
@@ -1066,7 +1070,8 @@ CopyPathFields(Path *from, Path *newnode)
          */
         newnode->parent = from->parent;
  
-       newnode->path_cost = from->path_cost;
+       newnode->startup_cost = from->startup_cost;
+       newnode->total_cost = from->total_cost;
  
         newnode->pathtype = from->pathtype;
  
@@ -1108,6 +1113,7 @@ _copyIndexPath(IndexPath *from)
          */
         newnode->indexid = listCopy(from->indexid);
         Node_Copy(from, newnode, indexqual);
+       newnode->indexscandir = from->indexscandir;
         newnode->joinrelids = listCopy(from->joinrelids);
  
         return newnode;
@@ -1339,8 +1345,7 @@ _copyRangeTblEntry(RangeTblEntry *from)
  
         if (from->relname)
                 newnode->relname = pstrdup(from->relname);
-       if (from->ref)
-               Node_Copy(from, newnode, ref);
+       Node_Copy(from, newnode, ref);
         newnode->relid = from->relid;
         newnode->inh = from->inh;
         newnode->inFromCl = from->inFromCl;
@@ -1449,8 +1454,10 @@ _copyQuery(Query *from)
         Node_Copy(from, newnode, limitOffset);
         Node_Copy(from, newnode, limitCount);
  
-       /* we do not copy the planner internal fields: base_rel_list,
-        * join_rel_list, query_pathkeys.  Not entirely clear if this is right?
+       /*
+        * We do not copy the planner internal fields: base_rel_list,
+        * join_rel_list, equi_key_list, query_pathkeys.
+        * Not entirely clear if this is right?
          */
  
         return newnode;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 3ddc8d6c98a518cc43c91cdd4fb1c321e98e2217..fadc282d1add86fcb5aa8255e80f65cf443af725 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.60 2000/02/15 03:37:08 thomas Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.61 2000/02/15 20:49:09 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -100,10 +100,10 @@ _equalAttr(Attr *a, Attr *b)
  {
         if (!strcmp(a->relname, b->relname))
                 return false;
-       if (length(a->attrs) != length(b->attrs))
+       if (!equal(a->attrs, b->attrs))
                 return false;
  
-       return equal(a->attrs, b->attrs);
+       return true;
  }
  
  static bool
@@ -342,8 +342,8 @@ _equalPath(Path *a, Path *b)
                 return false;
         if (!equal(a->parent, b->parent))
                 return false;
-       /* do not check path_cost, since it may not be set yet, and being
-        * a float there are roundoff error issues anyway...
+       /* do not check path costs, since they may not be set yet, and being
+        * float values there are roundoff error issues anyway...
          */
         if (!equal(a->pathkeys, b->pathkeys))
                 return false;
@@ -359,6 +359,8 @@ _equalIndexPath(IndexPath *a, IndexPath *b)
                 return false;
         if (!equal(a->indexqual, b->indexqual))
                 return false;
+       if (a->indexscandir != b->indexscandir)
+               return false;
         if (!equali(a->joinrelids, b->joinrelids))
                 return false;
         return true;
@@ -625,8 +627,9 @@ _equalQuery(Query *a, Query *b)
  
         /*
          * We do not check the internal-to-the-planner fields: base_rel_list,
-        * join_rel_list, query_pathkeys.  They might not be set yet, and
-        * in any case they should be derivable from the other fields.
+        * join_rel_list, equi_key_list, query_pathkeys.
+        * They might not be set yet, and in any case they should be derivable
+        * from the other fields.
          */
         return true;
  }
@@ -644,16 +647,8 @@ _equalRangeTblEntry(RangeTblEntry *a, RangeTblEntry *b)
                 if (a->relname != b->relname)
                         return false;
         }
-       if (a->ref && b->ref)
-       {
-               if (! equal(a->ref, b->ref))
-                       return false;
-       }
-       else
-       {
-               if (a->ref != b->ref)
-                       return false;
-       }
+       if (!equal(a->ref, b->ref))
+               return false;
         if (a->relid != b->relid)
                 return false;
         if (a->inh != b->inh)
@@ -784,6 +779,9 @@ equal(void *a, void *b)
                 case T_Stream:
                         retval = _equalStream(a, b);
                         break;
+               case T_Attr:
+                       retval = _equalAttr(a, b);
+                       break;
                 case T_Var:
                         retval = _equalVar(a, b);
                         break;
@@ -856,9 +854,6 @@ equal(void *a, void *b)
                 case T_EState:
                         retval = _equalEState(a, b);
                         break;
-               case T_Attr:
-                       retval = _equalAttr(a, b);
-                       break;
                 case T_Integer:
                 case T_String:
                 case T_Float:
diff --git a/src/backend/nodes/freefuncs.c b/src/backend/nodes/freefuncs.c

index 690da02de85f5ec10190b924a83102f2793d40a2..8eed80e61ab42d4835136ac4259514d0424b0936 100644 (file)
--- a/src/backend/nodes/freefuncs.c
+++ b/src/backend/nodes/freefuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/Attic/freefuncs.c,v 1.35 2000/02/15 03:37:08 thomas Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/Attic/freefuncs.c,v 1.36 2000/02/15 20:49:09 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -730,10 +730,11 @@ _freeRelOptInfo(RelOptInfo *node)
  
         freeObject(node->targetlist);
         freeObject(node->pathlist);
-       /* XXX is this right? cheapestpath will typically be a pointer into
-        * pathlist, won't it?
+       /* XXX is this right? cheapest-path fields will typically be pointers
+        * into pathlist, not separate structs...
          */
-       freeObject(node->cheapestpath);
+       freeObject(node->cheapest_startup_path);
+       freeObject(node->cheapest_total_path);
  
         freeObject(node->baserestrictinfo);
         freeObject(node->joininfo);
@@ -1013,8 +1014,7 @@ _freeRangeTblEntry(RangeTblEntry *node)
  {
         if (node->relname)
                 pfree(node->relname);
-       if (node->ref)
-               freeObject(node->ref);
+       freeObject(node->ref);
  
         pfree(node);
  }
@@ -1024,8 +1024,7 @@ _freeAttr(Attr *node)
  {
         if (node->relname)
                 pfree(node->relname);
-       if (node->attrs)
-               freeObject(node->attrs);
+       freeObject(node->attrs);
  
         pfree(node);
  }
@@ -1346,10 +1345,3 @@ freeObject(void *node)
                         break;
         }
  }
-
-
-
-
-
-
-
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index e4c35cc277fb9a4850e8b0ad2fbe69db9b4b3f2e..c40ca9ff9cbdf1e1a8afc969dbbde086477824cc 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- *     $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.107 2000/02/15 03:37:09 thomas Exp $
+ *     $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.108 2000/02/15 20:49:09 tgl Exp $
   *
   * NOTES
   *       Every (plan) node in POSTGRES has an associated "out" routine which
@@ -321,8 +321,9 @@ static void
  _outPlanInfo(StringInfo str, Plan *node)
  {
         appendStringInfo(str,
-                                 ":cost %g :rows %.0f :width %d :state %s :qptargetlist ",
-                                        node->cost,
+                                        ":startup_cost %.2f :total_cost %.2f :rows %.0f :width %d :state %s :qptargetlist ",
+                                        node->startup_cost,
+                                        node->total_cost,
                                          node->plan_rows,
                                          node->plan_width,
                                          node->state ? "not-NULL" : "<>");
@@ -908,15 +909,13 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node)
         appendStringInfo(str, " :pathlist ");
         _outNode(str, node->pathlist);
  
-       /*
-        * Not sure if these are nodes or not.  They're declared as struct
-        * Path *.      Since i don't know, i'll just print the addresses for now.
-        * This can be changed later, if necessary.
-        */
+       appendStringInfo(str, " :cheapest_startup_path ");
+       _outNode(str, node->cheapest_startup_path);
+       appendStringInfo(str, " :cheapest_total_path ");
+       _outNode(str, node->cheapest_total_path);
  
         appendStringInfo(str,
-                                        " :cheapestpath @ 0x%x :pruneable %s :baserestrictinfo ",
-                                        (int) node->cheapestpath,
+                                        " :pruneable %s :baserestrictinfo ",
                                          node->pruneable ? "true" : "false");
         _outNode(str, node->baserestrictinfo);
  
@@ -977,9 +976,11 @@ _outRowMark(StringInfo str, RowMark *node)
  static void
  _outPath(StringInfo str, Path *node)
  {
-       appendStringInfo(str, " PATH :pathtype %d :cost %.2f :pathkeys ",
+       appendStringInfo(str,
+                                        " PATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ",
                                          node->pathtype,
-                                        node->path_cost);
+                                        node->startup_cost,
+                                        node->total_cost);
         _outNode(str, node->pathkeys);
  }
  
@@ -990,9 +991,10 @@ static void
  _outIndexPath(StringInfo str, IndexPath *node)
  {
         appendStringInfo(str,
-                                        " INDEXPATH :pathtype %d :cost %.2f :pathkeys ",
+                                        " INDEXPATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ",
                                          node->path.pathtype,
-                                        node->path.path_cost);
+                                        node->path.startup_cost,
+                                        node->path.total_cost);
         _outNode(str, node->path.pathkeys);
  
         appendStringInfo(str, " :indexid ");
@@ -1001,7 +1003,8 @@ _outIndexPath(StringInfo str, IndexPath *node)
         appendStringInfo(str, " :indexqual ");
         _outNode(str, node->indexqual);
  
-       appendStringInfo(str, " :joinrelids ");
+       appendStringInfo(str, " :indexscandir %d :joinrelids ",
+                                        (int) node->indexscandir);
         _outIntList(str, node->joinrelids);
  }
  
@@ -1012,9 +1015,10 @@ static void
  _outTidPath(StringInfo str, TidPath *node)
  {
         appendStringInfo(str,
-                                        " TIDPATH :pathtype %d :cost %.2f :pathkeys ",
+                                        " TIDPATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ",
                                          node->path.pathtype,
-                                        node->path.path_cost);
+                                        node->path.startup_cost,
+                                        node->path.total_cost);
         _outNode(str, node->path.pathkeys);
  
         appendStringInfo(str, " :tideval ");
@@ -1031,9 +1035,10 @@ static void
  _outNestPath(StringInfo str, NestPath *node)
  {
         appendStringInfo(str,
-                                        " NESTPATH :pathtype %d :cost %.2f :pathkeys ",
+                                        " NESTPATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ",
                                          node->path.pathtype,
-                                        node->path.path_cost);
+                                        node->path.startup_cost,
+                                        node->path.total_cost);
         _outNode(str, node->path.pathkeys);
         appendStringInfo(str, " :outerjoinpath ");
         _outNode(str, node->outerjoinpath);
@@ -1050,9 +1055,10 @@ static void
  _outMergePath(StringInfo str, MergePath *node)
  {
         appendStringInfo(str,
-                                        " MERGEPATH :pathtype %d :cost %.2f :pathkeys ",
+                                        " MERGEPATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ",
                                          node->jpath.path.pathtype,
-                                        node->jpath.path.path_cost);
+                                        node->jpath.path.startup_cost,
+                                        node->jpath.path.total_cost);
         _outNode(str, node->jpath.path.pathkeys);
         appendStringInfo(str, " :outerjoinpath ");
         _outNode(str, node->jpath.outerjoinpath);
@@ -1078,9 +1084,10 @@ static void
  _outHashPath(StringInfo str, HashPath *node)
  {
         appendStringInfo(str,
-                                        " HASHPATH :pathtype %d :cost %.2f :pathkeys ",
+                                        " HASHPATH :pathtype %d :startup_cost %.2f :total_cost %.2f :pathkeys ",
                                          node->jpath.path.pathtype,
-                                        node->jpath.path.path_cost);
+                                        node->jpath.path.startup_cost,
+                                        node->jpath.path.total_cost);
         _outNode(str, node->jpath.path.pathkeys);
         appendStringInfo(str, " :outerjoinpath ");
         _outNode(str, node->jpath.outerjoinpath);
@@ -1364,7 +1371,7 @@ _outNode(StringInfo str, void *obj)
                 return;
         }
  
-       if (nodeTag(obj) == T_List)
+       if (IsA(obj, List))
         {
                 List       *l;
  
@@ -1377,6 +1384,11 @@ _outNode(StringInfo str, void *obj)
                 }
                 appendStringInfoChar(str, ')');
         }
+       else if (IsA_Value(obj))
+       {
+               /* nodeRead does not want to see { } around these! */
+               _outValue(str, obj);
+       }
         else
         {
                 appendStringInfoChar(str, '{');
@@ -1550,11 +1562,6 @@ _outNode(StringInfo str, void *obj)
                         case T_Stream:
                                 _outStream(str, obj);
                                 break;
-                       case T_Integer:
-                       case T_String:
-                       case T_Float:
-                               _outValue(str, obj);
-                               break;
                         case T_A_Expr:
                                 _outAExpr(str, obj);
                                 break;
diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c

index a84b829950f5140d7991a3ab54deaae34434c576..248991c0928623d1e224c11159cb1d12dfa960bf 100644 (file)
--- a/src/backend/nodes/print.c
+++ b/src/backend/nodes/print.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/print.c,v 1.36 2000/02/15 03:37:09 thomas Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/print.c,v 1.37 2000/02/15 20:49:12 tgl Exp $
   *
   * HISTORY
   *       AUTHOR                        DATE                    MAJOR EVENT
@@ -175,9 +175,8 @@ print_expr(Node *expr, List *rtable)
                                 {
                                         rt = rt_fetch(var->varno, rtable);
                                         relname = rt->relname;
-                                       if (rt->ref)
-                                               if (rt->ref->relname)
-                                               relname = rt->relname;  /* table renamed */
+                                       if (rt->ref && rt->ref->relname)
+                                               relname = rt->ref->relname;     /* table renamed */
                                         attname = get_attname(rt->relid, var->varattno);
                                 }
                                 break;
@@ -366,8 +365,9 @@ print_plan_recursive(Plan *p, Query *parsetree, int indentLevel, char *label)
                 return;
         for (i = 0; i < indentLevel; i++)
                 printf(" ");
-       printf("%s%s :c=%.4f :r=%.0f :w=%d ", label, plannode_type(p),
-                  p->cost, p->plan_rows, p->plan_width);
+       printf("%s%s :c=%.2f..%.2f :r=%.0f :w=%d ", label, plannode_type(p),
+                  p->startup_cost, p->total_cost,
+                  p->plan_rows, p->plan_width);
         if (IsA(p, Scan) ||IsA(p, SeqScan))
         {
                 RangeTblEntry *rte;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index 7d56b603b85c1766676a8520a3ad8a2d7ff963e7..7d1e0b4cccf22ca551325c917bd9dce570baf099 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.83 2000/02/15 03:37:09 thomas Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.84 2000/02/15 20:49:12 tgl Exp $
   *
   * NOTES
   *       Most of the read functions for plan nodes are tested. (In fact, they
@@ -217,9 +217,13 @@ _getPlan(Plan *node)
         char       *token;
         int                     length;
  
-       token = lsptok(NULL, &length);          /* first token is :cost */
+       token = lsptok(NULL, &length);          /* first token is :startup_cost */
         token = lsptok(NULL, &length);          /* next is the actual cost */
-       node->cost = (Cost) atof(token);
+       node->startup_cost = (Cost) atof(token);
+
+       token = lsptok(NULL, &length);          /* skip the :total_cost */
+       token = lsptok(NULL, &length);          /* next is the actual cost */
+       node->total_cost = (Cost) atof(token);
  
         token = lsptok(NULL, &length);          /* skip the :rows */
         token = lsptok(NULL, &length);          /* get the plan_rows */
@@ -520,7 +524,6 @@ _readIndexScan()
  
         token = lsptok(NULL, &length);          /* eat :indxorderdir */
         token = lsptok(NULL, &length);          /* get indxorderdir */
-
         local_node->indxorderdir = atoi(token);
  
         return local_node;
@@ -1275,18 +1278,15 @@ _readRelOptInfo()
         token = lsptok(NULL, &length);          /* get :pathlist */
         local_node->pathlist = nodeRead(true);          /* now read it */
  
-       /*
-        * Not sure if these are nodes or not.  They're declared as struct
-        * Path *.      Since i don't know, i'll just print the addresses for now.
-        * This can be changed later, if necessary.
-        */
-
-       token = lsptok(NULL, &length);          /* get :cheapestpath */
-       token = lsptok(NULL, &length);          /* get @ */
-       token = lsptok(NULL, &length);          /* now read it */
+       token = lsptok(NULL, &length);          /* get :cheapest_startup_path */
+       local_node->cheapest_startup_path = nodeRead(true);             /* now read it */
  
-       sscanf(token, "%x", (unsigned int *) &local_node->cheapestpath);
+       token = lsptok(NULL, &length);          /* get :cheapest_total_path */
+       local_node->cheapest_total_path = nodeRead(true);               /* now read it */
  
+       token = lsptok(NULL, &length);          /* eat :pruneable */
+       token = lsptok(NULL, &length);          /* get :pruneable */
+       local_node->pruneable = (token[0] == 't') ? true : false;
  
         token = lsptok(NULL, &length);          /* get :baserestrictinfo */
         local_node->baserestrictinfo = nodeRead(true);  /* now read it */
@@ -1322,29 +1322,6 @@ _readTargetEntry()
         return local_node;
  }
  
-static List *
-_readList()
-{
-       List       *local_node = NULL;
-       char       *token;
-       int                     length;
-
-       token = lsptok(NULL, &length);          /* eat "(" */
-       token = lsptok(NULL, &length);          /* get "{" */
-       while (strncmp(token, "{", length) == 0)
-       {
-               nconc(local_node, nodeRead(true));
-
-               token = lsptok(NULL, &length);          /* eat ")" */
-               if (strncmp(token, "}", length) != 0)
-                       elog(ERROR, "badly formatted attribute list"
-                                " in planstring \"%.10s\"...\n", token);
-               token = lsptok(NULL, &length);          /* "{" or ")" */
-       }
-
-       return local_node;
-}
-
  static Attr *
  _readAttr()
  {
@@ -1356,13 +1333,10 @@ _readAttr()
  
         token = lsptok(NULL, &length);          /* eat :relname */
         token = lsptok(NULL, &length);          /* get relname */
-       if (length == 0)
-               local_node->relname = pstrdup("");
-       else
-               local_node->relname = debackslash(token, length);
+       local_node->relname = debackslash(token, length);
  
         token = lsptok(NULL, &length);          /* eat :attrs */
-       local_node->attrs = _readList();
+       local_node->attrs = nodeRead(true);     /* now read it */
  
         return local_node;
  }
@@ -1388,7 +1362,7 @@ _readRangeTblEntry()
                 local_node->relname = debackslash(token, length);
  
         token = lsptok(NULL, &length);          /* eat :ref */
-       local_node->ref = nodeRead(true);
+       local_node->ref = nodeRead(true);       /* now read it */
  
         token = lsptok(NULL, &length);          /* eat :relid */
         token = lsptok(NULL, &length);          /* get :relid */
@@ -1450,9 +1424,13 @@ _readPath()
         token = lsptok(NULL, &length);          /* now read it */
         local_node->pathtype = atol(token);
  
-       token = lsptok(NULL, &length);          /* get :cost */
+       token = lsptok(NULL, &length);          /* get :startup_cost */
+       token = lsptok(NULL, &length);          /* now read it */
+       local_node->startup_cost = (Cost) atof(token);
+
+       token = lsptok(NULL, &length);          /* get :total_cost */
         token = lsptok(NULL, &length);          /* now read it */
-       local_node->path_cost = (Cost) atof(token);
+       local_node->total_cost = (Cost) atof(token);
  
         token = lsptok(NULL, &length);          /* get :pathkeys */
         local_node->pathkeys = nodeRead(true);          /* now read it */
@@ -1479,9 +1457,13 @@ _readIndexPath()
         token = lsptok(NULL, &length);          /* now read it */
         local_node->path.pathtype = atol(token);
  
-       token = lsptok(NULL, &length);          /* get :cost */
+       token = lsptok(NULL, &length);          /* get :startup_cost */
         token = lsptok(NULL, &length);          /* now read it */
-       local_node->path.path_cost = (Cost) atof(token);
+       local_node->path.startup_cost = (Cost) atof(token);
+
+       token = lsptok(NULL, &length);          /* get :total_cost */
+       token = lsptok(NULL, &length);          /* now read it */
+       local_node->path.total_cost = (Cost) atof(token);
  
         token = lsptok(NULL, &length);          /* get :pathkeys */
         local_node->path.pathkeys = nodeRead(true); /* now read it */
@@ -1492,6 +1474,10 @@ _readIndexPath()
         token = lsptok(NULL, &length);          /* get :indexqual */
         local_node->indexqual = nodeRead(true);         /* now read it */
  
+       token = lsptok(NULL, &length);          /* get :indexscandir */
+       token = lsptok(NULL, &length);          /* now read it */
+       local_node->indexscandir = (ScanDirection) atoi(token);
+
         token = lsptok(NULL, &length);          /* get :joinrelids */
         local_node->joinrelids = toIntList(nodeRead(true));
  
@@ -1517,9 +1503,13 @@ _readTidPath()
         token = lsptok(NULL, &length);          /* now read it */
         local_node->path.pathtype = atol(token);
  
-       token = lsptok(NULL, &length);          /* get :cost */
+       token = lsptok(NULL, &length);          /* get :startup_cost */
         token = lsptok(NULL, &length);          /* now read it */
-       local_node->path.path_cost = (Cost) atof(token);
+       local_node->path.startup_cost = (Cost) atof(token);
+
+       token = lsptok(NULL, &length);          /* get :total_cost */
+       token = lsptok(NULL, &length);          /* now read it */
+       local_node->path.total_cost = (Cost) atof(token);
  
         token = lsptok(NULL, &length);          /* get :pathkeys */
         local_node->path.pathkeys = nodeRead(true); /* now read it */
@@ -1552,9 +1542,13 @@ _readNestPath()
         token = lsptok(NULL, &length);          /* now read it */
         local_node->path.pathtype = atol(token);
  
-       token = lsptok(NULL, &length);          /* get :cost */
+       token = lsptok(NULL, &length);          /* get :startup_cost */
+       token = lsptok(NULL, &length);          /* now read it */
+       local_node->path.startup_cost = (Cost) atof(token);
+
+       token = lsptok(NULL, &length);          /* get :total_cost */
         token = lsptok(NULL, &length);          /* now read it */
-       local_node->path.path_cost = (Cost) atof(token);
+       local_node->path.total_cost = (Cost) atof(token);
  
         token = lsptok(NULL, &length);          /* get :pathkeys */
         local_node->path.pathkeys = nodeRead(true); /* now read it */
@@ -1588,13 +1582,15 @@ _readMergePath()
  
         token = lsptok(NULL, &length);          /* get :pathtype */
         token = lsptok(NULL, &length);          /* now read it */
-
         local_node->jpath.path.pathtype = atol(token);
  
-       token = lsptok(NULL, &length);          /* get :cost */
+       token = lsptok(NULL, &length);          /* get :startup_cost */
         token = lsptok(NULL, &length);          /* now read it */
+       local_node->jpath.path.startup_cost = (Cost) atof(token);
  
-       local_node->jpath.path.path_cost = (Cost) atof(token);
+       token = lsptok(NULL, &length);          /* get :total_cost */
+       token = lsptok(NULL, &length);          /* now read it */
+       local_node->jpath.path.total_cost = (Cost) atof(token);
  
         token = lsptok(NULL, &length);          /* get :pathkeys */
         local_node->jpath.path.pathkeys = nodeRead(true);       /* now read it */
@@ -1637,13 +1633,15 @@ _readHashPath()
  
         token = lsptok(NULL, &length);          /* get :pathtype */
         token = lsptok(NULL, &length);          /* now read it */
-
         local_node->jpath.path.pathtype = atol(token);
  
-       token = lsptok(NULL, &length);          /* get :cost */
+       token = lsptok(NULL, &length);          /* get :startup_cost */
         token = lsptok(NULL, &length);          /* now read it */
+       local_node->jpath.path.startup_cost = (Cost) atof(token);
  
-       local_node->jpath.path.path_cost = (Cost) atof(token);
+       token = lsptok(NULL, &length);          /* get :total_cost */
+       token = lsptok(NULL, &length);          /* now read it */
+       local_node->jpath.path.total_cost = (Cost) atof(token);
  
         token = lsptok(NULL, &length);          /* get :pathkeys */
         local_node->jpath.path.pathkeys = nodeRead(true);       /* now read it */
@@ -1886,14 +1884,6 @@ parsePlanString(void)
                 return_value = _readCaseWhen();
         else if (length == 7 && strncmp(token, "ROWMARK", length) == 0)
                 return_value = _readRowMark();
-#if 0
-       else if (length == 1 && strncmp(token, "{", length) == 0)
-       {
-               /* raw list (of strings?) found in Attr structure - thomas 2000-02-09 */
-               return_value = nodeRead(true);
-               token = lsptok(NULL, &length);  /* eat trailing brace */
-       }
-#endif
         else
                 elog(ERROR, "badly formatted planstring \"%.10s\"...\n", token);
  
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README

index bbc1204395a8555fbe0f30b10b4fcf56f1386b2b..6ca70a91f1d3d931780a4b7028da85e0cdb64758 100644 (file)
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -122,7 +122,7 @@ among other choices.  Although the jointree scanning code produces these
  potential join combinations one at a time, all the ways to produce the
  same set of joined base rels will share the same RelOptInfo, so the paths
  produced from different join combinations that produce equivalent joinrels
-will compete in add_pathlist.
+will compete in add_path.
  
  Once we have built the final join rel, we use either the cheapest path
  for it or the cheapest path with the desired ordering (if that's cheaper
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c

index 614ca47c84d5c8f37b99e65c301646469e47d5c9..1c70e4bcd8d274d591cafe67f96774912052a6e7 100644 (file)
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: geqo_eval.c,v 1.47 2000/02/07 04:40:58 tgl Exp $
+ * $Id: geqo_eval.c,v 1.48 2000/02/15 20:49:14 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -96,8 +96,13 @@ geqo_eval(Query *root, Gene *tour, int num_gene)
         /* construct the best path for the given combination of relations */
         joinrel = gimme_tree(root, tour, 0, num_gene, NULL);
  
-       /* compute fitness */
-       fitness = joinrel->cheapestpath->path_cost;
+       /*
+        * compute fitness
+        *
+        * XXX geqo does not currently support optimization for partial
+        * result retrieval --- how to fix?
+        */
+       fitness = joinrel->cheapest_total_path->total_cost;
  
         /* restore join_rel_list */
         root->join_rel_list = savelist;
@@ -155,8 +160,8 @@ gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, RelOptInfo *old
                         rel_count++;
                         Assert(length(new_rel->relids) == rel_count);
  
-                       /* Find and save the cheapest path for this rel */
-                       set_cheapest(new_rel, new_rel->pathlist);
+                       /* Find and save the cheapest paths for this rel */
+                       set_cheapest(new_rel);
  
                         return gimme_tree(root, tour, rel_count, num_gene, new_rel);
                 }
diff --git a/src/backend/optimizer/geqo/geqo_misc.c b/src/backend/optimizer/geqo/geqo_misc.c

index 849c739f2ddd64c0b811289a460bf7bc929a44ff..01ced310e1b48d4dc9e2c8865670224cb5590b59 100644 (file)
--- a/src/backend/optimizer/geqo/geqo_misc.c
+++ b/src/backend/optimizer/geqo/geqo_misc.c
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: geqo_misc.c,v 1.27 2000/02/07 04:40:58 tgl Exp $
+ * $Id: geqo_misc.c,v 1.28 2000/02/15 20:49:14 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -179,8 +179,9 @@ geqo_print_path(Query *root, Path *path, int indent)
         if (join)
         {
                 jp = (JoinPath *) path;
-               printf("%s rows=%.0f cost=%f\n",
-                          ptype, path->parent->rows, path->path_cost);
+               printf("%s rows=%.0f cost=%.2f..%.2f\n",
+                          ptype, path->parent->rows,
+                          path->startup_cost, path->total_cost);
                 switch (nodeTag(path))
                 {
                         case T_MergePath:
@@ -215,8 +216,9 @@ geqo_print_path(Query *root, Path *path, int indent)
         {
                 int                     relid = lfirsti(path->parent->relids);
  
-               printf("%s(%d) rows=%.0f cost=%f\n",
-                          ptype, relid, path->parent->rows, path->path_cost);
+               printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n",
+                          ptype, relid, path->parent->rows,
+                          path->startup_cost, path->total_cost);
  
                 if (IsA(path, IndexPath))
                 {
@@ -241,6 +243,9 @@ geqo_print_rel(Query *root, RelOptInfo *rel)
         foreach(l, rel->pathlist)
                 geqo_print_path(root, lfirst(l), 1);
  
-       printf("\tcheapest path:\n");
-       geqo_print_path(root, rel->cheapestpath, 1);
+       printf("\tcheapest startup path:\n");
+       geqo_print_path(root, rel->cheapest_startup_path, 1);
+
+       printf("\tcheapest total path:\n");
+       geqo_print_path(root, rel->cheapest_total_path, 1);
  }
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index 52c30f7d01dd3210fed2127b033a89dd2a04f3c3..572ef00d2e8f2f6b61c215edb25c675cd4d41c76 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.58 2000/02/07 04:40:59 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.59 2000/02/15 20:49:16 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -100,7 +100,7 @@ set_base_rel_pathlist(Query *root)
                 /*
                  * Generate paths and add them to the rel's pathlist.
                  *
-                * add_path/add_pathlist will discard any paths that are dominated
+                * Note: add_path() will discard any paths that are dominated
                  * by another available path, keeping only those paths that are
                  * superior along at least one dimension of cost or sortedness.
                  */
@@ -109,24 +109,21 @@ set_base_rel_pathlist(Query *root)
                 add_path(rel, create_seqscan_path(rel));
  
                 /* Consider TID scans */
-               add_pathlist(rel, create_tidscan_paths(root, rel));
+               create_tidscan_paths(root, rel);
  
                 /* Consider index paths for both simple and OR index clauses */
-               add_pathlist(rel, create_index_paths(root,
-                                                                                        rel,
-                                                                                        indices,
-                                                                                        rel->baserestrictinfo,
-                                                                                        rel->joininfo));
+               create_index_paths(root, rel, indices,
+                                                  rel->baserestrictinfo,
+                                                  rel->joininfo);
  
                 /* Note: create_or_index_paths depends on create_index_paths
                  * to have marked OR restriction clauses with relevant indices;
-                * this is why it doesn't need to be given the full list of indices.
+                * this is why it doesn't need to be given the list of indices.
                  */
-               add_pathlist(rel, create_or_index_paths(root, rel,
-                                                                                               rel->baserestrictinfo));
+               create_or_index_paths(root, rel, rel->baserestrictinfo);
  
                 /* Now find the cheapest of the paths for this rel */
-               set_cheapest(rel, rel->pathlist);
+               set_cheapest(rel);
         }
  }
  
@@ -196,8 +193,8 @@ make_one_rel_by_joins(Query *root, int levels_needed)
                                 xfunc_trypullup(rel);
  #endif
  
-                       /* Find and save the cheapest path for this rel */
-                       set_cheapest(rel, rel->pathlist);
+                       /* Find and save the cheapest paths for this rel */
+                       set_cheapest(rel);
  
  #ifdef OPTIMIZER_DEBUG
                         debug_print_rel(root, rel);
@@ -279,15 +276,26 @@ print_path(Query *root, Path *path, int indent)
         if (join)
         {
                 jp = (JoinPath *) path;
-               printf("%s rows=%.0f cost=%f\n",
-                          ptype, path->parent->rows, path->path_cost);
+
+               printf("%s rows=%.0f cost=%.2f..%.2f\n",
+                          ptype, path->parent->rows,
+                          path->startup_cost, path->total_cost);
+
+               if (path->pathkeys)
+               {
+                       for (i = 0; i < indent; i++)
+                               printf("\t");
+                       printf("  pathkeys=");
+                       print_pathkeys(path->pathkeys, root->rtable);
+               }
+
                 switch (nodeTag(path))
                 {
                         case T_MergePath:
                         case T_HashPath:
-                               for (i = 0; i < indent + 1; i++)
+                               for (i = 0; i < indent; i++)
                                         printf("\t");
-                               printf("   clauses=(");
+                               printf("  clauses=(");
                                 print_joinclauses(root, jp->joinrestrictinfo);
                                 printf(")\n");
  
@@ -297,9 +305,9 @@ print_path(Query *root, Path *path, int indent)
  
                                         if (mp->outersortkeys || mp->innersortkeys)
                                         {
-                                               for (i = 0; i < indent + 1; i++)
+                                               for (i = 0; i < indent; i++)
                                                         printf("\t");
-                                               printf("   sortouter=%d sortinner=%d\n",
+                                               printf("  sortouter=%d sortinner=%d\n",
                                                            ((mp->outersortkeys) ? 1 : 0),
                                                            ((mp->innersortkeys) ? 1 : 0));
                                         }
@@ -315,11 +323,14 @@ print_path(Query *root, Path *path, int indent)
         {
                 int                     relid = lfirsti(path->parent->relids);
  
-               printf("%s(%d) rows=%.0f cost=%f\n",
-                          ptype, relid, path->parent->rows, path->path_cost);
+               printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n",
+                          ptype, relid, path->parent->rows,
+                          path->startup_cost, path->total_cost);
  
-               if (IsA(path, IndexPath))
+               if (path->pathkeys)
                 {
+                       for (i = 0; i < indent; i++)
+                               printf("\t");
                         printf("  pathkeys=");
                         print_pathkeys(path->pathkeys, root->rtable);
                 }
@@ -339,8 +350,10 @@ debug_print_rel(Query *root, RelOptInfo *rel)
         printf("\tpath list:\n");
         foreach(l, rel->pathlist)
                 print_path(root, lfirst(l), 1);
-       printf("\tcheapest path:\n");
-       print_path(root, rel->cheapestpath, 1);
+       printf("\tcheapest startup path:\n");
+       print_path(root, rel->cheapest_startup_path, 1);
+       printf("\tcheapest total path:\n");
+       print_path(root, rel->cheapest_total_path, 1);
  }
  
  #endif  /* OPTIMIZER_DEBUG */
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 7c8d4b63c07f5e5bfd470a41abb8078dda962f9a..c14692d5b97edfe9bbe1ff69ba83a14a92b3f7ea 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3,23 +3,46 @@
   * costsize.c
   *       Routines to compute (and set) relation sizes and path costs
   *
- * Path costs are measured in units of disk accesses: one page fetch
- * has cost 1.  The other primitive unit is the CPU time required to
- * process one tuple, which we set at "cpu_page_weight" of a page
- * fetch.  Obviously, the CPU time per tuple depends on the query
- * involved, but the relative CPU and disk speeds of a given platform
- * are so variable that we are lucky if we can get useful numbers
- * at all.  cpu_page_weight is user-settable, in case a particular
- * user is clueful enough to have a better-than-default estimate
- * of the ratio for his platform.  There is also cpu_index_page_weight,
- * the cost to process a tuple of an index during an index scan.
+ * Path costs are measured in units of disk accesses: one sequential page
+ * fetch has cost 1.  All else is scaled relative to a page fetch, using
+ * the scaling parameters
+ *
+ *     random_page_cost        Cost of a non-sequential page fetch
+ *     cpu_tuple_cost          Cost of typical CPU time to process a tuple
+ *     cpu_index_tuple_cost  Cost of typical CPU time to process an index tuple
+ *     cpu_operator_cost       Cost of CPU time to process a typical WHERE operator
+ *
+ * We also use a rough estimate "effective_cache_size" of the number of
+ * disk pages in Postgres + OS-level disk cache.  (We can't simply use
+ * NBuffers for this purpose because that would ignore the effects of
+ * the kernel's disk cache.)
+ *
+ * Obviously, taking constants for these values is an oversimplification,
+ * but it's tough enough to get any useful estimates even at this level of
+ * detail.  Note that all of these parameters are user-settable, in case
+ * the default values are drastically off for a particular platform.
+ *
+ * We compute two separate costs for each path:
+ *             total_cost: total estimated cost to fetch all tuples
+ *             startup_cost: cost that is expended before first tuple is fetched
+ * In some scenarios, such as when there is a LIMIT or we are implementing
+ * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the
+ * path's result.  A caller can estimate the cost of fetching a partial
+ * result by interpolating between startup_cost and total_cost.  In detail:
+ *             actual_cost = startup_cost +
+ *                     (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows;
+ * Note that a relation's rows count (and, by extension, a Plan's plan_rows)
+ * are set without regard to any LIMIT, so that this equation works properly.
+ * (Also, these routines guarantee not to set the rows count to zero, so there
+ * will be no zero divide.)  RelOptInfos, Paths, and Plans themselves never
+ * account for LIMIT.
   *
   * 
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.51 2000/02/07 04:40:59 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.52 2000/02/15 20:49:16 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -27,26 +50,25 @@
  #include "postgres.h"
  
  #include <math.h>
-#ifdef HAVE_LIMITS_H
-#include <limits.h>
-#ifndef MAXINT
-#define MAXINT           INT_MAX
-#endif
-#else
-#ifdef HAVE_VALUES_H
-#include <values.h>
-#endif
-#endif
  
  #include "miscadmin.h"
+#include "nodes/plannodes.h"
+#include "optimizer/clauses.h"
  #include "optimizer/cost.h"
  #include "optimizer/internal.h"
  #include "optimizer/tlist.h"
  #include "utils/lsyscache.h"
  
  
-Cost           cpu_page_weight = CPU_PAGE_WEIGHT;
-Cost           cpu_index_page_weight = CPU_INDEX_PAGE_WEIGHT;
+#define LOG2(x)  (log(x) / 0.693147180559945)
+#define LOG6(x)  (log(x) / 1.79175946922805)
+
+
+double         effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
+Cost           random_page_cost = DEFAULT_RANDOM_PAGE_COST;
+Cost           cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
+Cost           cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
+Cost           cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
  
  Cost           disable_cost = 100000000.0;
  
@@ -59,53 +81,114 @@ bool               enable_mergejoin = true;
  bool           enable_hashjoin = true;
  
  
+static bool cost_qual_eval_walker(Node *node, Cost *total);
  static void set_rel_width(Query *root, RelOptInfo *rel);
  static int     compute_attribute_width(TargetEntry *tlistentry);
  static double relation_byte_size(double tuples, int width);
  static double page_size(double tuples, int width);
-static double base_log(double x, double b);
  
  
  /*
   * cost_seqscan
   *       Determines and returns the cost of scanning a relation sequentially.
- *       If the relation is a temporary to be materialized from a query
- *       embedded within a data field (determined by 'relid' containing an
- *       attribute reference), then a predetermined constant is returned (we
- *       have NO IDEA how big the result of a POSTQUEL procedure is going to
- *       be).
- *
- *             disk = p
- *             cpu = CPU-PAGE-WEIGHT * t
+ *
+ * If the relation is a temporary to be materialized from a query
+ * embedded within a data field (determined by 'relid' containing an
+ * attribute reference), then a predetermined constant is returned (we
+ * have NO IDEA how big the result of a POSTQUEL procedure is going to be).
+ *
+ * Note: for historical reasons, this routine and the others in this module
+ * use the passed result Path only to store their startup_cost and total_cost
+ * results into.  All the input data they need is passed as separate
+ * parameters, even though much of it could be extracted from the result Path.
   */
-Cost
-cost_seqscan(RelOptInfo *baserel)
+void
+cost_seqscan(Path *path, RelOptInfo *baserel)
  {
-       Cost            temp = 0;
+       Cost            startup_cost = 0;
+       Cost            run_cost = 0;
+       Cost            cpu_per_tuple;
  
         /* Should only be applied to base relations */
         Assert(length(baserel->relids) == 1);
  
         if (!enable_seqscan)
-               temp += disable_cost;
+               startup_cost += disable_cost;
  
+       /* disk costs */
         if (lfirsti(baserel->relids) < 0)
         {
                 /*
                  * cost of sequentially scanning a materialized temporary relation
                  */
-               temp += _NONAME_SCAN_COST_;
+               run_cost += _NONAME_SCAN_COST_;
         }
         else
         {
-               temp += baserel->pages;
-               temp += cpu_page_weight * baserel->tuples;
+               /*
+                * The cost of reading a page sequentially is 1.0, by definition.
+                * Note that the Unix kernel will typically do some amount of
+                * read-ahead optimization, so that this cost is less than the true
+                * cost of reading a page from disk.  We ignore that issue here,
+                * but must take it into account when estimating the cost of
+                * non-sequential accesses!
+                */
+               run_cost += baserel->pages;     /* sequential fetches with cost 1.0 */
         }
  
-       Assert(temp >= 0);
-       return temp;
+       /* CPU costs */
+       cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
+       run_cost += cpu_per_tuple * baserel->tuples;
+
+       path->startup_cost = startup_cost;
+       path->total_cost = startup_cost + run_cost;
  }
  
+/*
+ * cost_nonsequential_access
+ *       Estimate the cost of accessing one page at random from a relation
+ *       (or sort temp file) of the given size in pages.
+ *
+ * The simplistic model that the cost is random_page_cost is what we want
+ * to use for large relations; but for small ones that is a serious
+ * overestimate because of the effects of caching.  This routine tries to
+ * account for that.
+ *
+ * Unfortunately we don't have any good way of estimating the effective cache
+ * size we are working with --- we know that Postgres itself has NBuffers
+ * internal buffers, but the size of the kernel's disk cache is uncertain,
+ * and how much of it we get to use is even less certain.  We punt the problem
+ * for now by assuming we are given an effective_cache_size parameter.
+ *
+ * Given a guesstimated cache size, we estimate the actual I/O cost per page
+ * with the entirely ad-hoc equations:
+ *     for rel_size <= effective_cache_size:
+ *             1 + (random_page_cost/2-1) * (rel_size/effective_cache_size) ** 2
+ *     for rel_size >= effective_cache_size:
+ *             random_page_cost * (1 - (effective_cache_size/rel_size)/2)
+ * These give the right asymptotic behavior (=> 1.0 as rel_size becomes
+ * small, => random_page_cost as it becomes large) and meet in the middle
+ * with the estimate that the cache is about 50% effective for a relation
+ * of the same size as effective_cache_size.  (XXX this is probably all
+ * wrong, but I haven't been able to find any theory about how effective
+ * a disk cache should be presumed to be.)
+ */
+static Cost
+cost_nonsequential_access(double relpages)
+{
+       double          relsize;
+
+       /* don't crash on bad input data */
+       if (relpages <= 0.0 || effective_cache_size <= 0.0)
+               return random_page_cost;
+
+       relsize = relpages / effective_cache_size;
+
+       if (relsize >= 1.0)
+               return random_page_cost * (1.0 - 0.5 / relsize);
+       else
+               return 1.0 + (random_page_cost * 0.5 - 1.0) * relsize * relsize;
+}
  
  /*
   * cost_index
@@ -126,25 +209,28 @@ cost_seqscan(RelOptInfo *baserel)
   * tuples, but they won't reduce the number of tuples we have to fetch from
   * the table, so they don't reduce the scan cost.
   */
-Cost
-cost_index(Query *root,
+void
+cost_index(Path *path, Query *root,
                    RelOptInfo *baserel,
                    IndexOptInfo *index,
                    List *indexQuals,
                    bool is_injoin)
  {
-       Cost            temp = 0;
-       Cost            indexAccessCost;
+       Cost            startup_cost = 0;
+       Cost            run_cost = 0;
+       Cost            cpu_per_tuple;
+       Cost            indexStartupCost;
+       Cost            indexTotalCost;
         Selectivity     indexSelectivity;
-       double          reltuples;
-       double          relpages;
+       double          tuples_fetched;
+       double          pages_fetched;
  
         /* Should only be applied to base relations */
         Assert(IsA(baserel, RelOptInfo) && IsA(index, IndexOptInfo));
         Assert(length(baserel->relids) == 1);
  
         if (!enable_indexscan && !is_injoin)
-               temp += disable_cost;
+               startup_cost += disable_cost;
  
         /*
          * Call index-access-method-specific code to estimate the processing
@@ -152,31 +238,21 @@ cost_index(Query *root,
          * (ie, the fraction of main-table tuples we will have to retrieve).
          */
         fmgr(index->amcostestimate, root, baserel, index, indexQuals,
-                &indexAccessCost, &indexSelectivity);
+                &indexStartupCost, &indexTotalCost, &indexSelectivity);
  
         /* all costs for touching index itself included here */
-       temp += indexAccessCost;
+       startup_cost += indexStartupCost;
+       run_cost += indexTotalCost - indexStartupCost;
  
-       /*--------------------
-        * Estimate number of main-table tuples and pages touched.
-        *
-        * Worst case is that each tuple the index tells us to fetch comes
-        * from a different base-rel page, in which case the I/O cost would be
-        * 'reltuples' pages.  In practice we can expect the number of page
-        * fetches to be reduced by the buffer cache, because more than one
-        * tuple can be retrieved per page fetched.  Currently, we estimate
-        * the number of pages to be retrieved as
-        *                      MIN(reltuples, relpages)
-        * This amounts to assuming that the buffer cache is perfectly efficient
-        * and never ends up reading the same page twice within one scan, which
-        * of course is too optimistic.  On the other hand, we are assuming that
-        * the target tuples are perfectly uniformly distributed across the
-        * relation's pages, which is too pessimistic --- any nonuniformity of
-        * distribution will reduce the number of pages we have to fetch.
-        * So, we guess-and-hope that these sources of error will more or less
-        * balance out.
+       /*
+        * Estimate number of main-table tuples and pages fetched.
          *
-        * XXX need to add a penalty for nonsequential page fetches.
+        * If the number of tuples is much smaller than the number of pages in
+        * the relation, each tuple will cost a separate nonsequential fetch.
+        * If it is comparable or larger, then probably we will be able to avoid
+        * some fetches.  We use a growth rate of log(#tuples/#pages + 1) ---
+        * probably totally bogus, but intuitively it gives the right shape of
+        * curve at least.
          *
          * XXX if the relation has recently been "clustered" using this index,
          * then in fact the target tuples will be highly nonuniformly distributed,
@@ -184,54 +260,77 @@ cost_index(Query *root,
          * have no way to know whether the relation has been clustered, nor how
          * much it's been modified since the last clustering, so we ignore this
          * effect.  Would be nice to do better someday.
-        *--------------------
          */
  
-       reltuples = indexSelectivity * baserel->tuples;
+       tuples_fetched = indexSelectivity * baserel->tuples;
  
-       relpages = reltuples;
-       if (baserel->pages > 0 && baserel->pages < relpages)
-               relpages = baserel->pages;
+       if (tuples_fetched > 0 && baserel->pages > 0)
+               pages_fetched = baserel->pages *
+                       log(tuples_fetched / baserel->pages + 1.0);
+       else
+               pages_fetched = tuples_fetched;
+
+       /*
+        * Now estimate one nonsequential access per page fetched,
+        * plus appropriate CPU costs per tuple.
+        */
  
         /* disk costs for main table */
-       temp += relpages;
+       run_cost += pages_fetched * cost_nonsequential_access(baserel->pages);
  
-       /* CPU costs for heap tuples */
-       temp += cpu_page_weight * reltuples;
+       /* CPU costs */
+       cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
+       /*
+        * Assume that the indexquals will be removed from the list of
+        * restriction clauses that we actually have to evaluate as qpquals.
+        * This is not completely right, but it's close.
+        * For a lossy index, however, we will have to recheck all the quals.
+        */
+       if (! index->lossy)
+               cpu_per_tuple -= cost_qual_eval(indexQuals);
  
-       Assert(temp >= 0);
-       return temp;
+       run_cost += cpu_per_tuple * tuples_fetched;
+
+       path->startup_cost = startup_cost;
+       path->total_cost = startup_cost + run_cost;
  }
  
  /*
   * cost_tidscan
   *       Determines and returns the cost of scanning a relation using tid-s.
- *
- *             disk = number of tids
- *             cpu = CPU-PAGE-WEIGHT * number_of_tids
   */
-Cost
-cost_tidscan(RelOptInfo *baserel, List *tideval)
+void
+cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval)
  {
-       Cost    temp = 0;
+       Cost            startup_cost = 0;
+       Cost            run_cost = 0;
+       Cost            cpu_per_tuple;
+       int                     ntuples = length(tideval);
  
         if (!enable_tidscan)
-               temp += disable_cost;
+               startup_cost += disable_cost;
  
-       temp += (1.0 + cpu_page_weight) * length(tideval);
+       /* disk costs --- assume each tuple on a different page */
+       run_cost += random_page_cost * ntuples;
  
-       return temp;
+       /* CPU costs */
+       cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
+       run_cost += cpu_per_tuple * ntuples;
+
+       path->startup_cost = startup_cost;
+       path->total_cost = startup_cost + run_cost;
  }
   
  /*
   * cost_sort
   *       Determines and returns the cost of sorting a relation.
   *
+ * The cost of supplying the input data is NOT included; the caller should
+ * add that cost to both startup and total costs returned from this routine!
+ *
   * If the total volume of data to sort is less than SortMem, we will do
   * an in-memory sort, which requires no I/O and about t*log2(t) tuple
- * comparisons for t tuples.  We use cpu_index_page_weight as the cost
- * of a tuple comparison (is this reasonable, or do we need another
- * basic parameter?).
+ * comparisons for t tuples.
   *
   * If the total volume exceeds SortMem, we switch to a tape-style merge
   * algorithm.  There will still be about t*log2(t) tuple comparisons in
@@ -240,8 +339,14 @@ cost_tidscan(RelOptInfo *baserel, List *tideval)
   * number of initial runs formed (log6 because tuplesort.c uses six-tape
   * merging).  Since the average initial run should be about twice SortMem,
   * we have
- *             disk = 2 * p * ceil(log6(p / (2*SortMem)))
- *             cpu = CPU-INDEX-PAGE-WEIGHT * t * log2(t)
+ *             disk traffic = 2 * relsize * ceil(log6(p / (2*SortMem)))
+ *             cpu = comparison_cost * t * log2(t)
+ *
+ * The disk traffic is assumed to be half sequential and half random
+ * accesses (XXX can't we refine that guess?)
+ *
+ * We charge two operator evals per tuple comparison, which should be in
+ * the right ballpark in most cases.
   *
   * 'pathkeys' is a list of sort keys
   * 'tuples' is the number of tuples in the relation
@@ -252,15 +357,16 @@ cost_tidscan(RelOptInfo *baserel, List *tideval)
   * currently do anything with pathkeys anyway, that doesn't matter...
   * but if it ever does, it should react gracefully to lack of key data.
   */
-Cost
-cost_sort(List *pathkeys, double tuples, int width)
+void
+cost_sort(Path *path, List *pathkeys, double tuples, int width)
  {
-       Cost            temp = 0;
+       Cost            startup_cost = 0;
+       Cost            run_cost = 0;
         double          nbytes = relation_byte_size(tuples, width);
         long            sortmembytes = SortMem * 1024L;
  
         if (!enable_sort)
-               temp += disable_cost;
+               startup_cost += disable_cost;
  
         /*
          * We want to be sure the cost of a sort is never estimated as zero,
@@ -270,42 +376,39 @@ cost_sort(List *pathkeys, double tuples, int width)
         if (tuples < 2.0)
                 tuples = 2.0;
  
-       temp += cpu_index_page_weight * tuples * base_log(tuples, 2.0);
+       /*
+        * CPU costs
+        *
+        * Assume about two operator evals per tuple comparison
+        * and N log2 N comparisons
+        */
+       startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
  
+       /* disk costs */
         if (nbytes > sortmembytes)
         {
                 double          npages = ceil(nbytes / BLCKSZ);
                 double          nruns = nbytes / (sortmembytes * 2);
-               double          log_runs = ceil(base_log(nruns, 6.0));
+               double          log_runs = ceil(LOG6(nruns));
+               double          npageaccesses;
  
                 if (log_runs < 1.0)
                         log_runs = 1.0;
-               temp += 2 * npages * log_runs;
+               npageaccesses = 2.0 * npages * log_runs;
+               /* Assume half are sequential (cost 1), half are not */
+               startup_cost += npageaccesses *
+                       (1.0 + cost_nonsequential_access(npages)) * 0.5;
         }
  
-       Assert(temp > 0);
-       return temp;
-}
-
-
-/*
- * cost_result
- *       Determines and returns the cost of writing a relation of 'tuples'
- *       tuples of 'width' bytes out to a result relation.
- */
-#ifdef NOT_USED
-Cost
-cost_result(double tuples, int width)
-{
-       Cost            temp = 0;
-
-       temp += page_size(tuples, width);
-       temp += cpu_page_weight * tuples;
-       Assert(temp >= 0);
-       return temp;
+       /*
+        * Note: should we bother to assign a nonzero run_cost to reflect the
+        * overhead of extracting tuples from the sort result?  Probably not
+        * worth worrying about.
+        */
+       path->startup_cost = startup_cost;
+       path->total_cost = startup_cost + run_cost;
  }
  
-#endif
  
  /*
   * cost_nestloop
@@ -314,23 +417,45 @@ cost_result(double tuples, int width)
   *
   * 'outer_path' is the path for the outer relation
   * 'inner_path' is the path for the inner relation
+ * 'restrictlist' are the RestrictInfo nodes to be applied at the join
   * 'is_indexjoin' is true if we are using an indexscan for the inner relation
+ *             (not currently needed here; the indexscan adjusts its cost...)
   */
-Cost
-cost_nestloop(Path *outer_path,
+void
+cost_nestloop(Path *path,
+                         Path *outer_path,
                           Path *inner_path,
+                         List *restrictlist,
                           bool is_indexjoin)
  {
-       Cost            temp = 0;
+       Cost            startup_cost = 0;
+       Cost            run_cost = 0;
+       Cost            cpu_per_tuple;
+       double          ntuples;
  
         if (!enable_nestloop)
-               temp += disable_cost;
+               startup_cost += disable_cost;
+
+       /* cost of source data */
+       /*
+        * NOTE: we assume that the inner path's startup_cost is paid once, not
+        * over again on each restart.  This is certainly correct if the inner
+        * path is materialized.  Are there any cases where it is wrong?
+        */
+       startup_cost += outer_path->startup_cost + inner_path->startup_cost;
+       run_cost += outer_path->total_cost - outer_path->startup_cost;
+       run_cost += outer_path->parent->rows *
+               (inner_path->total_cost - inner_path->startup_cost);
  
-       temp += outer_path->path_cost;
-       temp += outer_path->parent->rows * inner_path->path_cost;
+       /* number of tuples processed (not number emitted!) */
+       ntuples = outer_path->parent->rows * inner_path->parent->rows;
  
-       Assert(temp >= 0);
-       return temp;
+       /* CPU costs */
+       cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
+       run_cost += cpu_per_tuple * ntuples;
+
+       path->startup_cost = startup_cost;
+       path->total_cost = startup_cost + run_cost;
  }
  
  /*
@@ -340,33 +465,66 @@ cost_nestloop(Path *outer_path,
   *
   * 'outer_path' is the path for the outer relation
   * 'inner_path' is the path for the inner relation
+ * 'restrictlist' are the RestrictInfo nodes to be applied at the join
   * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
   *                             to sort the outer and inner relations, or NIL if no explicit
   *                             sort is needed because the source path is already ordered
   */
-Cost
-cost_mergejoin(Path *outer_path,
+void
+cost_mergejoin(Path *path,
+                          Path *outer_path,
                            Path *inner_path,
+                          List *restrictlist,
                            List *outersortkeys,
                            List *innersortkeys)
  {
-       Cost            temp = 0;
+       Cost            startup_cost = 0;
+       Cost            run_cost = 0;
+       Cost            cpu_per_tuple;
+       double          ntuples;
+       Path            sort_path;              /* dummy for result of cost_sort */
  
         if (!enable_mergejoin)
-               temp += disable_cost;
+               startup_cost += disable_cost;
  
         /* cost of source data */
-       temp += outer_path->path_cost + inner_path->path_cost;
-
-       if (outersortkeys)                      /* do we need to sort? */
-               temp += cost_sort(outersortkeys,
-                                                 outer_path->parent->rows,
-                                                 outer_path->parent->width);
+       /*
+        * Note we are assuming that each source tuple is fetched just once,
+        * which is not right in the presence of equal keys.  If we had a way of
+        * estimating the proportion of equal keys, we could apply a correction
+        * factor...
+        */
+       if (outersortkeys)                      /* do we need to sort outer? */
+       {
+               startup_cost += outer_path->total_cost;
+               cost_sort(&sort_path,
+                                 outersortkeys,
+                                 outer_path->parent->rows,
+                                 outer_path->parent->width);
+               startup_cost += sort_path.startup_cost;
+               run_cost += sort_path.total_cost - sort_path.startup_cost;
+       }
+       else
+       {
+               startup_cost += outer_path->startup_cost;
+               run_cost += outer_path->total_cost - outer_path->startup_cost;
+       }
  
-       if (innersortkeys)                      /* do we need to sort? */
-               temp += cost_sort(innersortkeys,
-                                                 inner_path->parent->rows,
-                                                 inner_path->parent->width);
+       if (innersortkeys)                      /* do we need to sort inner? */
+       {
+               startup_cost += inner_path->total_cost;
+               cost_sort(&sort_path,
+                                 innersortkeys,
+                                 inner_path->parent->rows,
+                                 inner_path->parent->width);
+               startup_cost += sort_path.startup_cost;
+               run_cost += sort_path.total_cost - sort_path.startup_cost;
+       }
+       else
+       {
+               startup_cost += inner_path->startup_cost;
+               run_cost += inner_path->total_cost - inner_path->startup_cost;
+       }
  
         /*
          * Estimate the number of tuples to be processed in the mergejoin itself
@@ -374,11 +532,14 @@ cost_mergejoin(Path *outer_path,
          * underestimate if there are many equal-keyed tuples in either relation,
          * but we have no good way of estimating that...
          */
-       temp += cpu_page_weight * (outer_path->parent->rows +
-                                                          inner_path->parent->rows);
+       ntuples = outer_path->parent->rows + inner_path->parent->rows;
  
-       Assert(temp >= 0);
-       return temp;
+       /* CPU costs */
+       cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
+       run_cost += cpu_per_tuple * ntuples;
+
+       path->startup_cost = startup_cost;
+       path->total_cost = startup_cost + run_cost;
  }
  
  /*
@@ -388,15 +549,21 @@ cost_mergejoin(Path *outer_path,
   *
   * 'outer_path' is the path for the outer relation
   * 'inner_path' is the path for the inner relation
+ * 'restrictlist' are the RestrictInfo nodes to be applied at the join
   * 'innerdisbursion' is an estimate of the disbursion statistic
   *                             for the inner hash key.
   */
-Cost
-cost_hashjoin(Path *outer_path,
+void
+cost_hashjoin(Path *path,
+                         Path *outer_path,
                           Path *inner_path,
+                         List *restrictlist,
                           Selectivity innerdisbursion)
  {
-       Cost            temp = 0;
+       Cost            startup_cost = 0;
+       Cost            run_cost = 0;
+       Cost            cpu_per_tuple;
+       double          ntuples;
         double          outerbytes = relation_byte_size(outer_path->parent->rows,
                                                                                                 outer_path->parent->width);
         double          innerbytes = relation_byte_size(inner_path->parent->rows,
@@ -404,48 +571,169 @@ cost_hashjoin(Path *outer_path,
         long            hashtablebytes = SortMem * 1024L;
  
         if (!enable_hashjoin)
-               temp += disable_cost;
+               startup_cost += disable_cost;
  
         /* cost of source data */
-       temp += outer_path->path_cost + inner_path->path_cost;
+       startup_cost += outer_path->startup_cost;
+       run_cost += outer_path->total_cost - outer_path->startup_cost;
+       startup_cost += inner_path->total_cost;
  
-       /* cost of computing hash function: must do it once per tuple */
-       temp += cpu_page_weight * (outer_path->parent->rows +
-                                                          inner_path->parent->rows);
+       /* cost of computing hash function: must do it once per input tuple */
+       startup_cost += cpu_operator_cost * inner_path->parent->rows;
+       run_cost += cpu_operator_cost * outer_path->parent->rows;
  
         /* the number of tuple comparisons needed is the number of outer
          * tuples times the typical hash bucket size, which we estimate
-        * conservatively as the inner disbursion times the inner tuple
-        * count.  The cost per comparison is set at cpu_index_page_weight;
-        * is that reasonable, or do we need another basic parameter?
+        * conservatively as the inner disbursion times the inner tuple count.
          */
-       temp += cpu_index_page_weight * outer_path->parent->rows *
+       run_cost += cpu_operator_cost * outer_path->parent->rows *
                 (inner_path->parent->rows * innerdisbursion);
  
+       /*
+        * Estimate the number of tuples that get through the hashing filter
+        * as one per tuple in the two source relations.  This could be a drastic
+        * underestimate if there are many equal-keyed tuples in either relation,
+        * but we have no good way of estimating that...
+        */
+       ntuples = outer_path->parent->rows + inner_path->parent->rows;
+
+       /* CPU costs */
+       cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
+       run_cost += cpu_per_tuple * ntuples;
+
         /*
          * if inner relation is too big then we will need to "batch" the join,
          * which implies writing and reading most of the tuples to disk an
-        * extra time.  Charge one cost unit per page of I/O.
+        * extra time.  Charge one cost unit per page of I/O (correct since
+        * it should be nice and sequential...).  Writing the inner rel counts
+        * as startup cost, all the rest as run cost.
          */
         if (innerbytes > hashtablebytes)
-               temp += 2 * (page_size(outer_path->parent->rows,
-                                                          outer_path->parent->width) +
-                                        page_size(inner_path->parent->rows,
-                                                          inner_path->parent->width));
+       {
+               double  outerpages = page_size(outer_path->parent->rows,
+                                                                          outer_path->parent->width);
+               double  innerpages = page_size(inner_path->parent->rows,
+                                                                          inner_path->parent->width);
+
+               startup_cost += innerpages;
+               run_cost += innerpages + 2 * outerpages;
+       }
  
         /*
          * Bias against putting larger relation on inside.  We don't want
          * an absolute prohibition, though, since larger relation might have
          * better disbursion --- and we can't trust the size estimates
-        * unreservedly, anyway.
+        * unreservedly, anyway.  Instead, inflate the startup cost by
+        * the square root of the size ratio.  (Why square root?  No real good
+        * reason, but it seems reasonable...)
+        */
+       if (innerbytes > outerbytes && outerbytes > 0)
+       {
+               startup_cost *= sqrt(innerbytes / outerbytes);
+       }
+
+       path->startup_cost = startup_cost;
+       path->total_cost = startup_cost + run_cost;
+}
+
+
+/*
+ * cost_qual_eval
+ *             Estimate the CPU cost of evaluating a WHERE clause (once).
+ *             The input can be either an implicitly-ANDed list of boolean
+ *             expressions, or a list of RestrictInfo nodes.
+ */
+Cost
+cost_qual_eval(List *quals)
+{
+       Cost    total = 0;
+
+       cost_qual_eval_walker((Node *) quals, &total);
+       return total;
+}
+
+static bool
+cost_qual_eval_walker(Node *node, Cost *total)
+{
+       if (node == NULL)
+               return false;
+       /*
+        * Our basic strategy is to charge one cpu_operator_cost for each
+        * operator or function node in the given tree.  Vars and Consts
+        * are charged zero, and so are boolean operators (AND, OR, NOT).
+        * Simplistic, but a lot better than no model at all.
+        *
+        * Should we try to account for the possibility of short-circuit
+        * evaluation of AND/OR?
          */
-       if (innerbytes > outerbytes)
-               temp *= 1.1;                    /* is this an OK fudge factor? */
+       if (IsA(node, Expr))
+       {
+               Expr   *expr = (Expr *) node;
+
+               switch (expr->opType)
+               {
+                       case OP_EXPR:
+                       case FUNC_EXPR:
+                               *total += cpu_operator_cost;
+                               break;
+                       case OR_EXPR:
+                       case AND_EXPR:
+                       case NOT_EXPR:
+                               break;
+                       case SUBPLAN_EXPR:
+                               /*
+                                * A subplan node in an expression indicates that the subplan
+                                * will be executed on each evaluation, so charge accordingly.
+                                * (We assume that sub-selects that can be executed as
+                                * InitPlans have already been removed from the expression.)
+                                *
+                                * NOTE: this logic should agree with make_subplan in
+                                * subselect.c. 
+                                */
+                               {
+                                       SubPlan    *subplan = (SubPlan *) expr->oper;
+                                       Plan       *plan = subplan->plan;
+                                       Cost            subcost;
+
+                                       if (subplan->sublink->subLinkType == EXISTS_SUBLINK)
+                                       {
+                                               /* we only need to fetch 1 tuple */
+                                               subcost = plan->startup_cost +
+                                                       (plan->total_cost - plan->startup_cost) / plan->plan_rows;
+                                       }
+                                       else if (subplan->sublink->subLinkType == EXPR_SUBLINK)
+                                       {
+                                               /* assume we need all tuples */
+                                               subcost = plan->total_cost;
+                                       }
+                                       else
+                                       {
+                                               /* assume we need 50% of the tuples */
+                                               subcost = plan->startup_cost +
+                                                       0.50 * (plan->total_cost - plan->startup_cost);
+                                       }
+                                       *total += subcost;
+                               }
+                               break;
+               }
+               /* fall through to examine args of Expr node */
+       }
+       /*
+        * expression_tree_walker doesn't know what to do with RestrictInfo nodes,
+        * but we just want to recurse through them.
+        */
+       if (IsA(node, RestrictInfo))
+       {
+               RestrictInfo   *restrictinfo = (RestrictInfo *) node;
  
-       Assert(temp >= 0);
-       return temp;
+               return cost_qual_eval_walker((Node *) restrictinfo->clause, total);
+       }
+       /* Otherwise, recurse. */
+       return expression_tree_walker(node, cost_qual_eval_walker,
+                                                                 (void *) total);
  }
  
+
  /*
   * set_baserel_size_estimates
   *             Set the size estimates for the given base relation.
@@ -457,6 +745,7 @@ cost_hashjoin(Path *outer_path,
   *     rows: the estimated number of output tuples (after applying
   *           restriction clauses).
   *     width: the estimated average output tuple width in bytes.
+ *     baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
   */
  void
  set_baserel_size_estimates(Query *root, RelOptInfo *rel)
@@ -468,7 +757,14 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
                 restrictlist_selectivity(root,
                                                                  rel->baserestrictinfo,
                                                                  lfirsti(rel->relids));
-       Assert(rel->rows >= 0);
+       /*
+        * Force estimate to be at least one row, to make explain output look
+        * better and to avoid possible divide-by-zero when interpolating cost.
+        */
+       if (rel->rows < 1.0)
+               rel->rows = 1.0;
+
+       rel->baserestrictcost = cost_qual_eval(rel->baserestrictinfo);
  
         set_rel_width(root, rel);
  }
@@ -513,7 +809,12 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
                                                                          restrictlist,
                                                                          0);
  
-       Assert(temp >= 0);
+       /*
+        * Force estimate to be at least one row, to make explain output look
+        * better and to avoid possible divide-by-zero when interpolating cost.
+        */
+       if (temp < 1.0)
+               temp = 1.0;
         rel->rows = temp;
  
         /*
@@ -582,9 +883,3 @@ page_size(double tuples, int width)
  {
         return ceil(relation_byte_size(tuples, width) / BLCKSZ);
  }
-
-static double
-base_log(double x, double b)
-{
-       return log(x) / log(b);
-}
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c

index 4c2b0109bc02421b1b64b59f3d2b3ba2f2315818..edb16ce0d6d26620d9cfc9cba5393ad31f51b341 100644 (file)
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.79 2000/02/05 18:26:09 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.80 2000/02/15 20:49:16 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -83,7 +83,8 @@ static List *index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
  static bool useful_for_mergejoin(RelOptInfo *rel, IndexOptInfo *index,
                                                                  List *joininfo_list);
  static bool useful_for_ordering(Query *root, RelOptInfo *rel,
-                                                               IndexOptInfo *index);
+                                                               IndexOptInfo *index,
+                                                               ScanDirection scandir);
  static bool match_index_to_operand(int indexkey, Var *operand,
                                                                    RelOptInfo *rel, IndexOptInfo *index);
  static bool function_index_operand(Expr *funcOpnd, RelOptInfo *rel,
@@ -106,6 +107,8 @@ static bool string_lessthan(const char * str1, const char * str2,
  /*
   * create_index_paths()
   *       Generate all interesting index paths for the given relation.
+ *       Candidate paths are added to the rel's pathlist (using add_path).
+ *       Additional IndexPath nodes may also be added to rel's innerjoin list.
   *
   * To be considered for an index scan, an index must match one or more
   * restriction clauses or join clauses from the query's qual condition,
@@ -120,29 +123,26 @@ static bool string_lessthan(const char * str1, const char * str2,
   * in its join clauses.  In that context, values for the other rels'
   * attributes are available and fixed during any one scan of the indexpath.
   *
- * This routine's return value is a list of plain IndexPaths for each
- * index the routine deems potentially interesting for the current query
+ * An IndexPath is generated and submitted to add_path() for each index
+ * this routine deems potentially interesting for the current query
   * (at most one IndexPath per index on the given relation).  An innerjoin
   * path is also generated for each interesting combination of outer join
- * relations.  The innerjoin paths are *not* in the return list, but are
- * appended to the "innerjoin" list of the relation itself.
+ * relations.  The innerjoin paths are *not* passed to add_path(), but are
+ * appended to the "innerjoin" list of the relation for later consideration
+ * in nested-loop joins.
   *
   * 'rel' is the relation for which we want to generate index paths
   * 'indices' is a list of available indexes for 'rel'
   * 'restrictinfo_list' is a list of restrictinfo nodes for 'rel'
   * 'joininfo_list' is a list of joininfo nodes for 'rel'
- *
- * Returns a list of IndexPath access path descriptors.  Additional
- * IndexPath nodes may also be added to the rel->innerjoin list.
   */
-List *
+void
  create_index_paths(Query *root,
                                    RelOptInfo *rel,
                                    List *indices,
                                    List *restrictinfo_list,
                                    List *joininfo_list)
  {
-       List       *retval = NIL;
         List       *ilist;
  
         foreach(ilist, indices)
@@ -189,9 +189,9 @@ create_index_paths(Query *root,
                                                                                                         restrictinfo_list);
  
                 if (restrictclauses != NIL)
-                       retval = lappend(retval,
-                                                        create_index_path(root, rel, index,
-                                                                                          restrictclauses));
+                       add_path(rel, (Path *) create_index_path(root, rel, index,
+                                                                                                        restrictclauses,
+                                                                                                        NoMovementScanDirection));
  
                 /*
                  * 3. If this index can be used for a mergejoin, then create an
@@ -205,10 +205,22 @@ create_index_paths(Query *root,
                 if (restrictclauses == NIL)
                 {
                         if (useful_for_mergejoin(rel, index, joininfo_list) ||
-                               useful_for_ordering(root, rel, index))
-                               retval = lappend(retval,
-                                                                create_index_path(root, rel, index, NIL));
+                               useful_for_ordering(root, rel, index, ForwardScanDirection))
+                               add_path(rel, (Path *)
+                                                create_index_path(root, rel, index,
+                                                                                  NIL,
+                                                                                  ForwardScanDirection));
                 }
+               /*
+                * Currently, backwards scan is never considered except for the case
+                * of matching a query result ordering.  Possibly should consider
+                * it in other places?
+                */
+               if (useful_for_ordering(root, rel, index, BackwardScanDirection))
+                       add_path(rel, (Path *)
+                                        create_index_path(root, rel, index,
+                                                                          NIL,
+                                                                          BackwardScanDirection));
  
                 /*
                  * 4. Create an innerjoin index path for each combination of
@@ -231,8 +243,6 @@ create_index_paths(Query *root,
                                                                                                    joinouterrelids));
                 }
         }
-
-       return retval;
  }
  
  
@@ -892,39 +902,26 @@ useful_for_mergejoin(RelOptInfo *rel,
   *       Determine whether the given index can produce an ordering matching
   *       the order that is wanted for the query result.
   *
- * We check to see whether either forward or backward scan direction can
- * match the specified pathkeys.
- *
   * 'rel' is the relation for which 'index' is defined
+ * 'scandir' is the contemplated scan direction
   */
  static bool
  useful_for_ordering(Query *root,
                                         RelOptInfo *rel,
-                                       IndexOptInfo *index)
+                                       IndexOptInfo *index,
+                                       ScanDirection scandir)
  {
         List       *index_pathkeys;
  
         if (root->query_pathkeys == NIL)
                 return false;                   /* no special ordering requested */
  
-       index_pathkeys = build_index_pathkeys(root, rel, index);
+       index_pathkeys = build_index_pathkeys(root, rel, index, scandir);
  
         if (index_pathkeys == NIL)
                 return false;                   /* unordered index */
  
-       if (pathkeys_contained_in(root->query_pathkeys, index_pathkeys))
-               return true;
-
-       /* caution: commute_pathkeys destructively modifies its argument;
-        * safe because we just built the index_pathkeys for local use here.
-        */
-       if (commute_pathkeys(index_pathkeys))
-       {
-               if (pathkeys_contained_in(root->query_pathkeys, index_pathkeys))
-                       return true;            /* useful as a reverse-order path */
-       }
-
-       return false;
+       return pathkeys_contained_in(root->query_pathkeys, index_pathkeys);
  }
  
  /****************************************************************************
@@ -1433,7 +1430,12 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
  
                 pathnode->path.pathtype = T_IndexScan;
                 pathnode->path.parent = rel;
-               pathnode->path.pathkeys = build_index_pathkeys(root, rel, index);
+               /*
+                * There's no point in marking the path with any pathkeys, since
+                * it will only ever be used as the inner path of a nestloop,
+                * and so its ordering does not matter.
+                */
+               pathnode->path.pathkeys = NIL;
  
                 indexquals = get_actual_clauses(clausegroup);
                 /* expand special operators to indexquals the executor can handle */
@@ -1446,11 +1448,13 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
                 pathnode->indexid = lconsi(index->indexoid, NIL);
                 pathnode->indexqual = lcons(indexquals, NIL);
  
+               /* We don't actually care what order the index scans in ... */
+               pathnode->indexscandir = NoMovementScanDirection;
+
                 /* joinrelids saves the rels needed on the outer side of the join */
                 pathnode->joinrelids = lfirst(outerrelids_list);
  
-               pathnode->path.path_cost = cost_index(root, rel, index, indexquals,
-                                                                                         true);
+               cost_index(&pathnode->path, root, rel, index, indexquals, true);
  
                 path_list = lappend(path_list, pathnode);
                 outerrelids_list = lnext(outerrelids_list);
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index f8912a1a5477b3e48131da8e74f871b3f8eb6cbc..091e2e40c7922a9e249f234c7de2e9077b5d6aac 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.51 2000/02/07 04:40:59 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.52 2000/02/15 20:49:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -27,24 +27,21 @@
  #include "parser/parsetree.h"
  #include "utils/lsyscache.h"
  
+static void sort_inner_and_outer(Query *root, RelOptInfo *joinrel,
+                                                                RelOptInfo *outerrel, RelOptInfo *innerrel,
+                                                                List *restrictlist, List *mergeclause_list);
+static void match_unsorted_outer(Query *root, RelOptInfo *joinrel,
+                                                                RelOptInfo *outerrel, RelOptInfo *innerrel,
+                                                                List *restrictlist, List *mergeclause_list);
+#ifdef NOT_USED
+static void match_unsorted_inner(Query *root, RelOptInfo *joinrel,
+                                                                RelOptInfo *outerrel, RelOptInfo *innerrel,
+                                                                List *restrictlist, List *mergeclause_list);
+#endif
+static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
+                                                                RelOptInfo *outerrel, RelOptInfo *innerrel,
+                                                                List *restrictlist);
  static Path *best_innerjoin(List *join_paths, List *outer_relid);
-static List *sort_inner_and_outer(RelOptInfo *joinrel,
-                                                                 RelOptInfo *outerrel,
-                                                                 RelOptInfo *innerrel,
-                                                                 List *restrictlist,
-                                                                 List *mergeclause_list);
-static List *match_unsorted_outer(RelOptInfo *joinrel, RelOptInfo *outerrel,
-                                                                 RelOptInfo *innerrel, List *restrictlist,
-                                                                 List *outerpath_list, Path *cheapest_inner,
-                                                                 Path *best_innerjoin,
-                                                                 List *mergeclause_list);
-static List *match_unsorted_inner(RelOptInfo *joinrel, RelOptInfo *outerrel,
-                                                                 RelOptInfo *innerrel, List *restrictlist,
-                                                                 List *innerpath_list,
-                                                                 List *mergeclause_list);
-static List *hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
-                                                                 RelOptInfo *outerrel, RelOptInfo *innerrel,
-                                                                 List *restrictlist);
  static Selectivity estimate_disbursion(Query *root, Var *var);
  static List *select_mergejoin_clauses(RelOptInfo *joinrel,
                                                                           RelOptInfo *outerrel,
@@ -70,14 +67,8 @@ add_paths_to_joinrel(Query *root,
                                          RelOptInfo *innerrel,
                                          List *restrictlist)
  {
-       Path       *bestinnerjoin;
         List       *mergeclause_list = NIL;
  
-       /*
-        * Get the best inner join for match_unsorted_outer().
-        */
-       bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids);
-
         /*
          * Find potential mergejoin clauses.
          */
@@ -91,84 +82,41 @@ add_paths_to_joinrel(Query *root,
          * 1. Consider mergejoin paths where both relations must be
          * explicitly sorted.
          */
-       add_pathlist(joinrel, sort_inner_and_outer(joinrel,
-                                                                                          outerrel,
-                                                                                          innerrel,
-                                                                                          restrictlist,
-                                                                                          mergeclause_list));
+       sort_inner_and_outer(root, joinrel, outerrel, innerrel,
+                                                restrictlist, mergeclause_list);
  
         /*
          * 2. Consider paths where the outer relation need not be
          * explicitly sorted. This includes both nestloops and
          * mergejoins where the outer path is already ordered.
          */
-       add_pathlist(joinrel, match_unsorted_outer(joinrel,
-                                                                                          outerrel,
-                                                                                          innerrel,
-                                                                                          restrictlist,
-                                                                                          outerrel->pathlist,
-                                                                                          innerrel->cheapestpath,
-                                                                                          bestinnerjoin,
-                                                                                          mergeclause_list));
+       match_unsorted_outer(root, joinrel, outerrel, innerrel,
+                                                restrictlist, mergeclause_list);
  
+#ifdef NOT_USED
         /*
          * 3. Consider paths where the inner relation need not be
          * explicitly sorted.  This includes mergejoins only
          * (nestloops were already built in match_unsorted_outer).
+        *
+        * Diked out as redundant 2/13/2000 -- tgl.  There isn't any
+        * really significant difference between the inner and outer
+        * side of a mergejoin, so match_unsorted_inner creates no paths
+        * that aren't equivalent to those made by match_unsorted_outer
+        * when add_paths_to_joinrel() is invoked with the two rels given
+        * in the other order.
          */
-       add_pathlist(joinrel, match_unsorted_inner(joinrel,
-                                                                                          outerrel,
-                                                                                          innerrel,
-                                                                                          restrictlist,
-                                                                                          innerrel->pathlist,
-                                                                                          mergeclause_list));
+       match_unsorted_inner(root, joinrel, outerrel, innerrel,
+                                                restrictlist, mergeclause_list);
+#endif
  
         /*
          * 4. Consider paths where both outer and inner relations must be
          * hashed before being joined.
          */
         if (enable_hashjoin)
-               add_pathlist(joinrel, hash_inner_and_outer(root,
-                                                                                                  joinrel,
-                                                                                                  outerrel,
-                                                                                                  innerrel,
-                                                                                                  restrictlist));
-}
-
-/*
- * best_innerjoin
- *       Find the cheapest index path that has already been identified by
- *       indexable_joinclauses() as being a possible inner path for the given
- *       outer relation(s) in a nestloop join.
- *
- * 'join_paths' is a list of potential inner indexscan join paths
- * 'outer_relids' is the relid list of the outer join relation
- *
- * Returns the pathnode of the best path, or NULL if there's no
- * usable path.
- */
-static Path *
-best_innerjoin(List *join_paths, Relids outer_relids)
-{
-       Path       *cheapest = (Path *) NULL;
-       List       *join_path;
-
-       foreach(join_path, join_paths)
-       {
-               Path       *path = (Path *) lfirst(join_path);
-
-               Assert(IsA(path, IndexPath));
-
-               /* path->joinrelids is the set of base rels that must be part of
-                * outer_relids in order to use this inner path, because those
-                * rels are used in the index join quals of this inner path.
-                */
-               if (is_subseti(((IndexPath *) path)->joinrelids, outer_relids) &&
-                       (cheapest == NULL ||
-                        path_is_cheaper(path, cheapest)))
-                       cheapest = path;
-       }
-       return cheapest;
+               hash_inner_and_outer(root, joinrel, outerrel, innerrel,
+                                                        restrictlist);
  }
  
  /*
@@ -183,17 +131,15 @@ best_innerjoin(List *join_paths, Relids outer_relids)
   *             clauses that apply to this join
   * 'mergeclause_list' is a list of RestrictInfo nodes for available
   *             mergejoin clauses in this join
- *
- * Returns a list of mergejoin paths.
   */
-static List *
-sort_inner_and_outer(RelOptInfo *joinrel,
+static void
+sort_inner_and_outer(Query *root,
+                                        RelOptInfo *joinrel,
                                          RelOptInfo *outerrel,
                                          RelOptInfo *innerrel,
                                          List *restrictlist,
                                          List *mergeclause_list)
  {
-       List       *path_list = NIL;
         List       *i;
  
         /*
@@ -223,7 +169,6 @@ sort_inner_and_outer(RelOptInfo *joinrel,
                 List               *outerkeys;
                 List               *innerkeys;
                 List               *merge_pathkeys;
-               MergePath          *path_node;
  
                 /* Make a mergeclause list with this guy first. */
                 curclause_list = lcons(restrictinfo,
@@ -231,31 +176,37 @@ sort_inner_and_outer(RelOptInfo *joinrel,
                                                                            listCopy(mergeclause_list)));
                 /* Build sort pathkeys for both sides.
                  *
-                * Note: it's possible that the cheapest path will already be
-                * sorted properly --- create_mergejoin_path will detect that case
-                * and suppress an explicit sort step.
+                * Note: it's possible that the cheapest paths will already be
+                * sorted properly.  create_mergejoin_path will detect that case
+                * and suppress an explicit sort step, so we needn't do so here.
                  */
-               outerkeys = make_pathkeys_for_mergeclauses(curclause_list,
+               outerkeys = make_pathkeys_for_mergeclauses(root,
+                                                                                                  curclause_list,
                                                                                                    outerrel->targetlist);
-               innerkeys = make_pathkeys_for_mergeclauses(curclause_list,
+               innerkeys = make_pathkeys_for_mergeclauses(root,
+                                                                                                  curclause_list,
                                                                                                    innerrel->targetlist);
                 /* Build pathkeys representing output sort order. */
                 merge_pathkeys = build_join_pathkeys(outerkeys,
                                                                                          joinrel->targetlist,
-                                                                                        curclause_list);
-               /* And now we can make the path. */
-               path_node = create_mergejoin_path(joinrel,
-                                                                                 outerrel->cheapestpath,
-                                                                                 innerrel->cheapestpath,
-                                                                                 restrictlist,
-                                                                                 merge_pathkeys,
-                                                                                 get_actual_clauses(curclause_list),
-                                                                                 outerkeys,
-                                                                                 innerkeys);
+                                                                                        root->equi_key_list);
  
-               path_list = lappend(path_list, path_node);
+               /*
+                * And now we can make the path.  We only consider the cheapest-
+                * total-cost input paths, since we are assuming here that a sort
+                * is required.  We will consider cheapest-startup-cost input paths
+                * later, and only if they don't need a sort.
+                */
+               add_path(joinrel, (Path *)
+                                create_mergejoin_path(joinrel,
+                                                                          outerrel->cheapest_total_path,
+                                                                          innerrel->cheapest_total_path,
+                                                                          restrictlist,
+                                                                          merge_pathkeys,
+                                                                          get_actual_clauses(curclause_list),
+                                                                          outerkeys,
+                                                                          innerkeys));
         }
-       return path_list;
  }
  
  /*
@@ -266,74 +217,56 @@ sort_inner_and_outer(RelOptInfo *joinrel,
   *       only outer paths that are already ordered well enough for merging).
   *
   * We always generate a nestloop path for each available outer path.
- * If an indexscan inner path exists that is compatible with this outer rel
- * and cheaper than the cheapest general-purpose inner path, then we use
- * the indexscan inner path; else we use the cheapest general-purpose inner.
+ * In fact we may generate as many as three: one on the cheapest-total-cost
+ * inner path, one on the cheapest-startup-cost inner path (if different),
+ * and one on the best inner-indexscan path (if any).
   *
   * We also consider mergejoins if mergejoin clauses are available.  We have
- * two ways to generate the inner path for a mergejoin: use the cheapest
- * inner path (sorting it if it's not suitably ordered already), or using an
- * inner path that is already suitably ordered for the merge.  If the
- * cheapest inner path is suitably ordered, then by definition it's the one
- * to use.  Otherwise, we look for ordered paths that are cheaper than the
- * cheapest inner + sort costs.  If we have several mergeclauses, it could be
- * that there is no inner path (or only a very expensive one) for the full
- * list of mergeclauses, but better paths exist if we truncate the
- * mergeclause list (thereby discarding some sort key requirements).  So, we
- * consider truncations of the mergeclause list as well as the full list.
- * In any case, we find the cheapest suitable path and generate a single
- * output mergejoin path.  (Since all the possible mergejoins will have
- * identical output pathkeys, there is no need to keep any but the cheapest.)
+ * two ways to generate the inner path for a mergejoin: sort the cheapest
+ * inner path, or use an inner path that is already suitably ordered for the
+ * merge.  If we have several mergeclauses, it could be that there is no inner
+ * path (or only a very expensive one) for the full list of mergeclauses, but
+ * better paths exist if we truncate the mergeclause list (thereby discarding
+ * some sort key requirements).  So, we consider truncations of the
+ * mergeclause list as well as the full list.  (Ideally we'd consider all
+ * subsets of the mergeclause list, but that seems way too expensive.)
   *
   * 'joinrel' is the join relation
   * 'outerrel' is the outer join relation
   * 'innerrel' is the inner join relation
   * 'restrictlist' contains all of the RestrictInfo nodes for restriction
   *             clauses that apply to this join
- * 'outerpath_list' is the list of possible outer paths
- * 'cheapest_inner' is the cheapest inner path
- * 'best_innerjoin' is the best inner index path (if any)
   * 'mergeclause_list' is a list of RestrictInfo nodes for available
   *             mergejoin clauses in this join
- *
- * Returns a list of possible join path nodes.
   */
-static List *
-match_unsorted_outer(RelOptInfo *joinrel,
+static void
+match_unsorted_outer(Query *root,
+                                        RelOptInfo *joinrel,
                                          RelOptInfo *outerrel,
                                          RelOptInfo *innerrel,
                                          List *restrictlist,
-                                        List *outerpath_list,
-                                        Path *cheapest_inner,
-                                        Path *best_innerjoin,
                                          List *mergeclause_list)
  {
-       List       *path_list = NIL;
-       Path       *nestinnerpath;
+       Path       *bestinnerjoin;
         List       *i;
  
         /*
-        * We only use the best innerjoin indexpath if it is cheaper
-        * than the cheapest general-purpose inner path.
+        * Get the best innerjoin indexpath (if any) for this outer rel.
+        * It's the same for all outer paths.
          */
-       if (best_innerjoin &&
-               path_is_cheaper(best_innerjoin, cheapest_inner))
-               nestinnerpath = best_innerjoin;
-       else
-               nestinnerpath = cheapest_inner;
+       bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids);
  
-       foreach(i, outerpath_list)
+       foreach(i, outerrel->pathlist)
         {
                 Path       *outerpath = (Path *) lfirst(i);
-               List       *mergeclauses;
                 List       *merge_pathkeys;
+               List       *mergeclauses;
                 List       *innersortkeys;
-               Path       *mergeinnerpath;
-               int                     mergeclausecount;
+               List       *trialsortkeys;
+               Path       *cheapest_startup_inner;
+               Path       *cheapest_total_inner;
+               int                     clausecnt;
  
-               /* Look for useful mergeclauses (if any) */
-               mergeclauses = find_mergeclauses_for_pathkeys(outerpath->pathkeys,
-                                                                                                         mergeclause_list);
                 /*
                  * The result will have this sort order (even if it is implemented
                  * as a nestloop, and even if some of the mergeclauses are implemented
@@ -341,91 +274,137 @@ match_unsorted_outer(RelOptInfo *joinrel,
                  */
                 merge_pathkeys = build_join_pathkeys(outerpath->pathkeys,
                                                                                          joinrel->targetlist,
-                                                                                        mergeclauses);
+                                                                                        root->equi_key_list);
+
+               /*
+                * Always consider a nestloop join with this outer and cheapest-
+                * total-cost inner.  Consider nestloops using the cheapest-
+                * startup-cost inner as well, and the best innerjoin indexpath.
+                */
+               add_path(joinrel, (Path *)
+                                create_nestloop_path(joinrel,
+                                                                         outerpath,
+                                                                         innerrel->cheapest_total_path,
+                                                                         restrictlist,
+                                                                         merge_pathkeys));
+               if (innerrel->cheapest_startup_path != innerrel->cheapest_total_path)
+                       add_path(joinrel, (Path *)
+                                        create_nestloop_path(joinrel,
+                                                                                 outerpath,
+                                                                                 innerrel->cheapest_startup_path,
+                                                                                 restrictlist,
+                                                                                 merge_pathkeys));
+               if (bestinnerjoin != NULL)
+                       add_path(joinrel, (Path *)
+                                        create_nestloop_path(joinrel,
+                                                                                 outerpath,
+                                                                                 bestinnerjoin,
+                                                                                 restrictlist,
+                                                                                 merge_pathkeys));
  
-               /* Always consider a nestloop join with this outer and best inner. */
-               path_list = lappend(path_list,
-                                                       create_nestloop_path(joinrel,
-                                                                                                outerpath,
-                                                                                                nestinnerpath,
-                                                                                                restrictlist,
-                                                                                                merge_pathkeys));
+               /* Look for useful mergeclauses (if any) */
+               mergeclauses = find_mergeclauses_for_pathkeys(outerpath->pathkeys,
+                                                                                                         mergeclause_list);
  
                 /* Done with this outer path if no chance for a mergejoin */
                 if (mergeclauses == NIL)
                         continue;
  
                 /* Compute the required ordering of the inner path */
-               innersortkeys = make_pathkeys_for_mergeclauses(mergeclauses,
+               innersortkeys = make_pathkeys_for_mergeclauses(root,
+                                                                                                          mergeclauses,
                                                                                                            innerrel->targetlist);
  
-               /* Set up on the assumption that we will use the cheapest_inner */
-               mergeinnerpath = cheapest_inner;
-               mergeclausecount = length(mergeclauses);
-
-               /* If the cheapest_inner doesn't need to be sorted, it is the winner
-                * by definition.
+               /*
+                * Generate a mergejoin on the basis of sorting the cheapest inner.
+                * Since a sort will be needed, only cheapest total cost matters.
                  */
-               if (pathkeys_contained_in(innersortkeys,
-                                                                 cheapest_inner->pathkeys))
-               {
-                       /* cheapest_inner is the winner */
-                       innersortkeys = NIL; /* we do not need to sort it... */
-               }
-               else
-               {
-                       /* look for a presorted path that's cheaper */
-                       List       *trialsortkeys = listCopy(innersortkeys);
-                       Cost            cheapest_cost;
-                       int                     clausecount;
+               add_path(joinrel, (Path *)
+                                create_mergejoin_path(joinrel,
+                                                                          outerpath,
+                                                                          innerrel->cheapest_total_path,
+                                                                          restrictlist,
+                                                                          merge_pathkeys,
+                                                                          get_actual_clauses(mergeclauses),
+                                                                          NIL,
+                                                                          innersortkeys));
  
-                       cheapest_cost = cheapest_inner->path_cost +
-                               cost_sort(innersortkeys, innerrel->rows, innerrel->width);
+               /*
+                * Look for presorted inner paths that satisfy the mergeclause list
+                * or any truncation thereof.  Here, we consider both cheap startup
+                * cost and cheap total cost.
+                */
+               trialsortkeys = listCopy(innersortkeys); /* modifiable copy */
+               cheapest_startup_inner = NULL;
+               cheapest_total_inner = NULL;
  
-                       for (clausecount = mergeclausecount;
-                                clausecount > 0;
-                                clausecount--)
+               for (clausecnt = length(mergeclauses); clausecnt > 0; clausecnt--)
+               {
+                       Path       *innerpath;
+
+                       /* Look for an inner path ordered well enough to merge with
+                        * the first 'clausecnt' mergeclauses.  NB: trialsortkeys list
+                        * is modified destructively, which is why we made a copy...
+                        */
+                       trialsortkeys = ltruncate(clausecnt, trialsortkeys);
+                       innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+                                                                                                          trialsortkeys,
+                                                                                                          TOTAL_COST);
+                       if (innerpath != NULL &&
+                               (cheapest_total_inner == NULL ||
+                                compare_path_costs(innerpath, cheapest_total_inner,
+                                                                       TOTAL_COST) < 0))
                         {
-                               Path       *trialinnerpath;
-
-                               /* Look for an inner path ordered well enough to merge with
-                                * the first 'clausecount' mergeclauses.  NB: trialsortkeys
-                                * is modified destructively, which is why we made a copy...
-                                */
-                               trialinnerpath =
-                                       get_cheapest_path_for_pathkeys(innerrel->pathlist,
-                                                                                                  ltruncate(clausecount,
-                                                                                                                        trialsortkeys),
-                                                                                                  false);
-                               if (trialinnerpath != NULL &&
-                                       trialinnerpath->path_cost < cheapest_cost)
+                               /* Found a cheap (or even-cheaper) sorted path */
+                               List   *newclauses;
+
+                               newclauses = ltruncate(clausecnt,
+                                                                          get_actual_clauses(mergeclauses));
+                               add_path(joinrel, (Path *)
+                                                create_mergejoin_path(joinrel,
+                                                                                          outerpath,
+                                                                                          innerpath,
+                                                                                          restrictlist,
+                                                                                          merge_pathkeys,
+                                                                                          newclauses,
+                                                                                          NIL,
+                                                                                          NIL));
+                               cheapest_total_inner = innerpath;
+                       }
+                       /* Same on the basis of cheapest startup cost ... */
+                       innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+                                                                                                          trialsortkeys,
+                                                                                                          STARTUP_COST);
+                       if (innerpath != NULL &&
+                               (cheapest_startup_inner == NULL ||
+                                compare_path_costs(innerpath, cheapest_startup_inner,
+                                                                       STARTUP_COST) < 0))
+                       {
+                               /* Found a cheap (or even-cheaper) sorted path */
+                               if (innerpath != cheapest_total_inner)
                                 {
-                                       /* Found a cheaper (or even-cheaper) sorted path */
-                                       cheapest_cost = trialinnerpath->path_cost;
-                                       mergeinnerpath = trialinnerpath;
-                                       mergeclausecount = clausecount;
-                                       innersortkeys = NIL; /* we will not need to sort it... */
+                                       List   *newclauses;
+
+                                       newclauses = ltruncate(clausecnt,
+                                                                                  get_actual_clauses(mergeclauses));
+                                       add_path(joinrel, (Path *)
+                                                        create_mergejoin_path(joinrel,
+                                                                                                  outerpath,
+                                                                                                  innerpath,
+                                                                                                  restrictlist,
+                                                                                                  merge_pathkeys,
+                                                                                                  newclauses,
+                                                                                                  NIL,
+                                                                                                  NIL));
                                 }
+                               cheapest_startup_inner = innerpath;
                         }
                 }
-
-               /* Finally, we can build the mergejoin path */
-               mergeclauses = ltruncate(mergeclausecount,
-                                                                get_actual_clauses(mergeclauses));
-               path_list = lappend(path_list,
-                                                       create_mergejoin_path(joinrel,
-                                                                                                 outerpath,
-                                                                                                 mergeinnerpath,
-                                                                                                 restrictlist,
-                                                                                                 merge_pathkeys,
-                                                                                                 mergeclauses,
-                                                                                                 NIL,
-                                                                                                 innersortkeys));
         }
-
-       return path_list;
  }
  
+#ifdef NOT_USED
+
  /*
   * match_unsorted_inner
   *       Generate mergejoin paths that use an explicit sort of the outer path
@@ -436,86 +415,105 @@ match_unsorted_outer(RelOptInfo *joinrel,
   * 'innerrel' is the inner join relation
   * 'restrictlist' contains all of the RestrictInfo nodes for restriction
   *             clauses that apply to this join
- * 'innerpath_list' is the list of possible inner join paths
   * 'mergeclause_list' is a list of RestrictInfo nodes for available
   *             mergejoin clauses in this join
- *
- * Returns a list of possible merge paths.
   */
-static List *
-match_unsorted_inner(RelOptInfo *joinrel,
+static void
+match_unsorted_inner(Query *root,
+                                        RelOptInfo *joinrel,
                                          RelOptInfo *outerrel,
                                          RelOptInfo *innerrel,
                                          List *restrictlist,
-                                        List *innerpath_list,
                                          List *mergeclause_list)
  {
-       List       *path_list = NIL;
         List       *i;
  
-       foreach(i, innerpath_list)
+       foreach(i, innerrel->pathlist)
         {
                 Path       *innerpath = (Path *) lfirst(i);
                 List       *mergeclauses;
+               List       *outersortkeys;
+               List       *merge_pathkeys;
+               Path       *totalouterpath;
+               Path       *startupouterpath;
  
                 /* Look for useful mergeclauses (if any) */
                 mergeclauses = find_mergeclauses_for_pathkeys(innerpath->pathkeys,
                                                                                                           mergeclause_list);
+               if (mergeclauses == NIL)
+                       continue;
  
-               if (mergeclauses)
-               {
-                       List       *outersortkeys;
-                       Path       *mergeouterpath;
-                       List       *merge_pathkeys;
-
-                       /* Compute the required ordering of the outer path */
-                       outersortkeys =
-                               make_pathkeys_for_mergeclauses(mergeclauses,
-                                                                                          outerrel->targetlist);
-
-                       /* Look for an outer path already ordered well enough to merge */
-                       mergeouterpath =
-                               get_cheapest_path_for_pathkeys(outerrel->pathlist,
-                                                                                          outersortkeys,
-                                                                                          false);
-
-                       /* Should we use the mergeouter, or sort the cheapest outer? */
-                       if (mergeouterpath != NULL &&
-                               mergeouterpath->path_cost <=
-                               (outerrel->cheapestpath->path_cost +
-                                cost_sort(outersortkeys, outerrel->rows, outerrel->width)))
-                       {
-                               /* Use mergeouterpath */
-                               outersortkeys = NIL;    /* no explicit sort step */
-                       }
-                       else
-                       {
-                               /* Use outerrel->cheapestpath, with the outersortkeys */
-                               mergeouterpath = outerrel->cheapestpath;
-                       }
+               /* Compute the required ordering of the outer path */
+               outersortkeys = make_pathkeys_for_mergeclauses(root,
+                                                                                                          mergeclauses,
+                                                                                                          outerrel->targetlist);
+
+               /*
+                * Generate a mergejoin on the basis of sorting the cheapest outer.
+                * Since a sort will be needed, only cheapest total cost matters.
+                */
+               merge_pathkeys = build_join_pathkeys(outersortkeys,
+                                                                                        joinrel->targetlist,
+                                                                                        root->equi_key_list);
+               add_path(joinrel, (Path *)
+                                create_mergejoin_path(joinrel,
+                                                                          outerrel->cheapest_total_path,
+                                                                          innerpath,
+                                                                          restrictlist,
+                                                                          merge_pathkeys,
+                                                                          get_actual_clauses(mergeclauses),
+                                                                          outersortkeys,
+                                                                          NIL));
+               /*
+                * Now generate mergejoins based on already-sufficiently-ordered
+                * outer paths.  There's likely to be some redundancy here with paths
+                * already generated by merge_unsorted_outer ... but since
+                * merge_unsorted_outer doesn't consider all permutations of the
+                * mergeclause list, it may fail to notice that this particular
+                * innerpath could have been used with this outerpath.
+                */
+               totalouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
+                                                                                                               outersortkeys,
+                                                                                                               TOTAL_COST);
+               if (totalouterpath == NULL)
+                       continue;                       /* there won't be a startup-cost path either */
  
-                       /* Compute pathkeys the result will have */
-                       merge_pathkeys = build_join_pathkeys(
-                               outersortkeys ? outersortkeys : mergeouterpath->pathkeys,
-                               joinrel->targetlist,
-                               mergeclauses);
-
-                       mergeclauses = get_actual_clauses(mergeclauses);
-                       path_list = lappend(path_list,
-                                                               create_mergejoin_path(joinrel,
-                                                                                                         mergeouterpath,
-                                                                                                         innerpath,
-                                                                                                         restrictlist,
-                                                                                                         merge_pathkeys,
-                                                                                                         mergeclauses,
-                                                                                                         outersortkeys,
-                                                                                                         NIL));
+               merge_pathkeys = build_join_pathkeys(totalouterpath->pathkeys,
+                                                                                        joinrel->targetlist,
+                                                                                        root->equi_key_list);
+               add_path(joinrel, (Path *)
+                                create_mergejoin_path(joinrel,
+                                                                          totalouterpath,
+                                                                          innerpath,
+                                                                          restrictlist,
+                                                                          merge_pathkeys,
+                                                                          get_actual_clauses(mergeclauses),
+                                                                          NIL,
+                                                                          NIL));
+
+               startupouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
+                                                                                                                 outersortkeys,
+                                                                                                                 STARTUP_COST);
+               if (startupouterpath != NULL && startupouterpath != totalouterpath)
+               {
+                       merge_pathkeys = build_join_pathkeys(startupouterpath->pathkeys,
+                                                                                                joinrel->targetlist,
+                                                                                                root->equi_key_list);
+                       add_path(joinrel, (Path *)
+                                        create_mergejoin_path(joinrel,
+                                                                                  startupouterpath,
+                                                                                  innerpath,
+                                                                                  restrictlist,
+                                                                                  merge_pathkeys,
+                                                                                  get_actual_clauses(mergeclauses),
+                                                                                  NIL,
+                                                                                  NIL));
                 }
         }
-
-       return path_list;
  }
  
+#endif
+
  /*
   * hash_inner_and_outer
   *       Create hashjoin join paths by explicitly hashing both the outer and
@@ -526,17 +524,14 @@ match_unsorted_inner(RelOptInfo *joinrel,
   * 'innerrel' is the inner join relation
   * 'restrictlist' contains all of the RestrictInfo nodes for restriction
   *             clauses that apply to this join
- *
- * Returns a list of hashjoin paths.
   */
-static List *
+static void
  hash_inner_and_outer(Query *root,
                                          RelOptInfo *joinrel,
                                          RelOptInfo *outerrel,
                                          RelOptInfo *innerrel,
                                          List *restrictlist)
  {
-       List       *hpath_list = NIL;
         Relids          outerrelids = outerrel->relids;
         Relids          innerrelids = innerrel->relids;
         List       *i;
@@ -558,7 +553,6 @@ hash_inner_and_outer(Query *root,
                                    *right,
                                    *inner;
                 Selectivity     innerdisbursion;
-               HashPath   *hash_path;
  
                 if (restrictinfo->hashjoinoperator == InvalidOid)
                         continue;                       /* not hashjoinable */
@@ -581,17 +575,66 @@ hash_inner_and_outer(Query *root,
                 /* estimate disbursion of inner var for costing purposes */
                 innerdisbursion = estimate_disbursion(root, inner);
  
-               hash_path = create_hashjoin_path(joinrel,
-                                                                                outerrel->cheapestpath,
-                                                                                innerrel->cheapestpath,
-                                                                                restrictlist,
-                                                                                lcons(clause, NIL),
-                                                                                innerdisbursion);
-
-               hpath_list = lappend(hpath_list, hash_path);
+               /*
+                * We consider both the cheapest-total-cost and cheapest-startup-cost
+                * outer paths.  There's no need to consider any but the cheapest-
+                * total-cost inner path, however.
+                */
+               add_path(joinrel, (Path *)
+                                create_hashjoin_path(joinrel,
+                                                                         outerrel->cheapest_total_path,
+                                                                         innerrel->cheapest_total_path,
+                                                                         restrictlist,
+                                                                         lcons(clause, NIL),
+                                                                         innerdisbursion));
+               if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
+                       add_path(joinrel, (Path *)
+                                        create_hashjoin_path(joinrel,
+                                                                                 outerrel->cheapest_startup_path,
+                                                                                 innerrel->cheapest_total_path,
+                                                                                 restrictlist,
+                                                                                 lcons(clause, NIL),
+                                                                                 innerdisbursion));
         }
+}
+
+/*
+ * best_innerjoin
+ *       Find the cheapest index path that has already been identified by
+ *       indexable_joinclauses() as being a possible inner path for the given
+ *       outer relation(s) in a nestloop join.
+ *
+ * We compare indexpaths on total_cost only, assuming that they will all have
+ * zero or negligible startup_cost.  We might have to think harder someday...
+ *
+ * 'join_paths' is a list of potential inner indexscan join paths
+ * 'outer_relids' is the relid list of the outer join relation
+ *
+ * Returns the pathnode of the best path, or NULL if there's no
+ * usable path.
+ */
+static Path *
+best_innerjoin(List *join_paths, Relids outer_relids)
+{
+       Path       *cheapest = (Path *) NULL;
+       List       *join_path;
+
+       foreach(join_path, join_paths)
+       {
+               Path       *path = (Path *) lfirst(join_path);
+
+               Assert(IsA(path, IndexPath));
  
-       return hpath_list;
+               /* path->joinrelids is the set of base rels that must be part of
+                * outer_relids in order to use this inner path, because those
+                * rels are used in the index join quals of this inner path.
+                */
+               if (is_subseti(((IndexPath *) path)->joinrelids, outer_relids) &&
+                       (cheapest == NULL ||
+                        compare_path_costs(path, cheapest, TOTAL_COST) < 0))
+                       cheapest = path;
+       }
+       return cheapest;
  }
  
  /*
diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c

index 9eb0484fc2fa1bc8cd88f8faba8224562d433445..6226100cfc791304a022691b5dc36c7737a82846 100644 (file)
--- a/src/backend/optimizer/path/orindxpath.c
+++ b/src/backend/optimizer/path/orindxpath.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.36 2000/02/05 18:26:09 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.37 2000/02/15 20:49:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,6 +19,7 @@
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
  #include "optimizer/internal.h"
+#include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
  #include "optimizer/plancat.h"
  #include "optimizer/restrictinfo.h"
@@ -27,14 +28,13 @@
  
  static void best_or_subclause_indices(Query *root, RelOptInfo *rel,
                                                                           List *subclauses, List *indices,
-                                                                         List **indexquals,
-                                                                         List **indexids,
-                                                                         Cost *cost);
+                                                                         IndexPath *pathnode);
  static void best_or_subclause_index(Query *root, RelOptInfo *rel,
                                                                         Expr *subclause, List *indices,
                                                                         List **retIndexQual,
                                                                         Oid *retIndexid,
-                                                                       Cost *retCost);
+                                                                       Cost *retStartupCost,
+                                                                       Cost *retTotalCost);
  
  
  /*
@@ -45,14 +45,13 @@ static void best_or_subclause_index(Query *root, RelOptInfo *rel,
   * 'rel' is the relation entry for which the paths are to be created
   * 'clauses' is the list of available restriction clause nodes
   *
- * Returns a list of index path nodes.
- *
+ * Returns nothing, but adds paths to rel->pathlist via add_path().
   */
-List *
+void
  create_or_index_paths(Query *root,
-                                         RelOptInfo *rel, List *clauses)
+                                         RelOptInfo *rel,
+                                         List *clauses)
  {
-       List       *path_list = NIL;
         List       *clist;
  
         foreach(clist, clauses)
@@ -86,17 +85,6 @@ create_or_index_paths(Query *root,
                                  * best available index for each subclause.
                                  */
                                 IndexPath  *pathnode = makeNode(IndexPath);
-                               List       *indexquals;
-                               List       *indexids;
-                               Cost            cost;
-
-                               best_or_subclause_indices(root,
-                                                                                 rel,
-                                                                                 clausenode->clause->args,
-                                                                                 clausenode->subclauseindices,
-                                                                                 &indexquals,
-                                                                                 &indexids,
-                                                                                 &cost);
  
                                 pathnode->path.pathtype = T_IndexScan;
                                 pathnode->path.parent = rel;
@@ -108,17 +96,21 @@ create_or_index_paths(Query *root,
                                  */
                                 pathnode->path.pathkeys = NIL;
  
-                               pathnode->indexid = indexids;
-                               pathnode->indexqual = indexquals;
+                               /* We don't actually care what order the index scans in ... */
+                               pathnode->indexscandir = NoMovementScanDirection;
+
                                 pathnode->joinrelids = NIL;     /* no join clauses here */
-                               pathnode->path.path_cost = cost;
  
-                               path_list = lappend(path_list, pathnode);
+                               best_or_subclause_indices(root,
+                                                                                 rel,
+                                                                                 clausenode->clause->args,
+                                                                                 clausenode->subclauseindices,
+                                                                                 pathnode);
+
+                               add_path(rel, (Path *) pathnode);
                         }
                 }
         }
-
-       return path_list;
  }
  
  /*
@@ -128,53 +120,68 @@ create_or_index_paths(Query *root,
   *       indices.      The cost is the sum of the individual index costs, since
   *       the executor will perform a scan for each subclause of the 'or'.
   *
- * This routine also creates the indexquals and indexids lists that will
- * be needed by the executor.  The indexquals list has one entry for each
+ * This routine also creates the indexqual and indexid lists that will
+ * be needed by the executor.  The indexqual list has one entry for each
   * scan of the base rel, which is a sublist of indexqual conditions to
   * apply in that scan.  The implicit semantics are AND across each sublist
   * of quals, and OR across the toplevel list (note that the executor
- * takes care not to return any single tuple more than once).  The indexids
- * list gives the index to be used in each scan.
+ * takes care not to return any single tuple more than once).  The indexid
+ * list gives the OID of the index to be used in each scan.
   *
   * 'rel' is the node of the relation on which the indexes are defined
   * 'subclauses' are the subclauses of the 'or' clause
   * 'indices' is a list of sublists of the IndexOptInfo nodes that matched
   *             each subclause of the 'or' clause
- * '*indexquals' gets the constructed indexquals for the path (a list
+ * 'pathnode' is the IndexPath node being built.
+ *
+ * Results are returned by setting these fields of the passed pathnode:
+ * 'indexqual' gets the constructed indexquals for the path (a list
   *             of sublists of clauses, one sublist per scan of the base rel)
- * '*indexids' gets a list of the index OIDs for each scan of the rel
- * '*cost' gets the total cost of the path
+ * 'indexid' gets a list of the index OIDs for each scan of the rel
+ * 'startup_cost' and 'total_cost' get the complete path costs.
+ *
+ * 'startup_cost' is the startup cost for the first index scan only;
+ * startup costs for later scans will be paid later on, so they just
+ * get reflected in total_cost.
+ *
+ * NOTE: we choose each scan on the basis of its total cost, ignoring startup
+ * cost.  This is reasonable as long as all index types have zero or small
+ * startup cost, but we might have to work harder if any index types with
+ * nontrivial startup cost are ever invented.
   */
  static void
  best_or_subclause_indices(Query *root,
                                                   RelOptInfo *rel,
                                                   List *subclauses,
                                                   List *indices,
-                                                 List **indexquals,    /* return value */
-                                                 List **indexids,              /* return value */
-                                                 Cost *cost)                   /* return value */
+                                                 IndexPath *pathnode)
  {
         List       *slist;
  
-       *indexquals = NIL;
-       *indexids = NIL;
-       *cost = (Cost) 0.0;
+       pathnode->indexqual = NIL;
+       pathnode->indexid = NIL;
+       pathnode->path.startup_cost = 0;
+       pathnode->path.total_cost = 0;
  
         foreach(slist, subclauses)
         {
                 Expr       *subclause = lfirst(slist);
                 List       *best_indexqual;
                 Oid                     best_indexid;
-               Cost            best_cost;
+               Cost            best_startup_cost;
+               Cost            best_total_cost;
  
                 best_or_subclause_index(root, rel, subclause, lfirst(indices),
-                                                               &best_indexqual, &best_indexid, &best_cost);
+                                                               &best_indexqual, &best_indexid,
+                                                               &best_startup_cost, &best_total_cost);
  
                 Assert(best_indexid != InvalidOid);
  
-               *indexquals = lappend(*indexquals, best_indexqual);
-               *indexids = lappendi(*indexids, best_indexid);
-               *cost += best_cost;
+               pathnode->indexqual = lappend(pathnode->indexqual, best_indexqual);
+               pathnode->indexid = lappendi(pathnode->indexid, best_indexid);
+               if (slist == subclauses)                /* first scan? */
+                       pathnode->path.startup_cost = best_startup_cost;
+               pathnode->path.total_cost += best_total_cost;
  
                 indices = lnext(indices);
         }
@@ -182,16 +189,17 @@ best_or_subclause_indices(Query *root,
  
  /*
   * best_or_subclause_index
- *       Determines which is the best index to be used with a subclause of
- *       an 'or' clause by estimating the cost of using each index and selecting
- *       the least expensive.
+ *       Determines which is the best index to be used with a subclause of an
+ *       'or' clause by estimating the cost of using each index and selecting
+ *       the least expensive (considering total cost only, for now).
   *
   * 'rel' is the node of the relation on which the index is defined
   * 'subclause' is the OR subclause being considered
   * 'indices' is a list of IndexOptInfo nodes that match the subclause
   * '*retIndexQual' gets a list of the indexqual conditions for the best index
   * '*retIndexid' gets the OID of the best index
- * '*retCost' gets the cost of a scan with that index
+ * '*retStartupCost' gets the startup cost of a scan with that index
+ * '*retTotalCost' gets the total cost of a scan with that index
   */
  static void
  best_or_subclause_index(Query *root,
@@ -200,7 +208,8 @@ best_or_subclause_index(Query *root,
                                                 List *indices,
                                                 List **retIndexQual,    /* return value */
                                                 Oid *retIndexid,                /* return value */
-                                               Cost *retCost)                  /* return value */
+                                               Cost *retStartupCost,   /* return value */
+                                               Cost *retTotalCost)             /* return value */
  {
         bool            first_time = true;
         List       *ilist;
@@ -208,27 +217,28 @@ best_or_subclause_index(Query *root,
         /* if we don't match anything, return zeros */
         *retIndexQual = NIL;
         *retIndexid = InvalidOid;
-       *retCost = 0.0;
+       *retStartupCost = 0;
+       *retTotalCost = 0;
  
         foreach(ilist, indices)
         {
                 IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
                 List       *indexqual;
-               Cost            subcost;
+               Path            subclause_path;
  
                 Assert(IsA(index, IndexOptInfo));
  
                 /* Convert this 'or' subclause to an indexqual list */
                 indexqual = extract_or_indexqual_conditions(rel, index, subclause);
  
-               subcost = cost_index(root, rel, index, indexqual,
-                                                        false);
+               cost_index(&subclause_path, root, rel, index, indexqual, false);
  
-               if (first_time || subcost < *retCost)
+               if (first_time || subclause_path.total_cost < *retTotalCost)
                 {
                         *retIndexQual = indexqual;
                         *retIndexid = index->indexoid;
-                       *retCost = subcost;
+                       *retStartupCost = subclause_path.startup_cost;
+                       *retTotalCost = subclause_path.total_cost;
                         first_time = false;
                 }
         }
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c

index 5aeda1e154e157df2e5d291b0a29b8865d850420..b578e33f5c850a6cde157b8b98aace14c75dfcc7 100644 (file)
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.18 2000/01/26 05:56:34 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.19 2000/02/15 20:49:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -17,6 +17,7 @@
  #include "nodes/makefuncs.h"
  #include "optimizer/clauses.h"
  #include "optimizer/joininfo.h"
+#include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
  #include "optimizer/tlist.h"
  #include "optimizer/var.h"
@@ -25,9 +26,9 @@
  #include "utils/lsyscache.h"
  
  static PathKeyItem *makePathKeyItem(Node *key, Oid sortop);
-static Var *find_indexkey_var(int indexkey, List *tlist);
-static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist,
-                                                               List *joinclauses);
+static List *make_canonical_pathkey(Query *root, PathKeyItem *item);
+static Var *find_indexkey_var(Query *root, RelOptInfo *rel,
+                                                         AttrNumber varattno);
  
  
  /*--------------------
@@ -50,50 +51,122 @@ static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist,
   *     Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since
   *     we can say nothing about the overall order of its result.  Also, an
   *     indexscan on an unordered type of index generates NIL pathkeys.  However,
- *     we can always create a pathkey by doing an explicit sort.
- *
- *     Multi-relation RelOptInfo Path's are more complicated.  Mergejoins are
- *     only performed with equijoins ("=").  Because of this, the resulting
- *     multi-relation path actually has more than one primary key.  For example,
- *     a mergejoin using a clause "tab1.col1 = tab2.col1" would generate pathkeys
- *     of ( (tab1.col1/sortop1 tab2.col1/sortop2) ), indicating that the major
- *     sort order of the Path can be taken to be *either* tab1.col1 or tab2.col1.
- *     They are equal, so they are both primary sort keys.  This allows future
- *     joins to use either var as a pre-sorted key to prevent upper Mergejoins
- *     from having to re-sort the Path.  This is why pathkeys is a List of Lists.
- *
- *     Note that while the order of the top list is meaningful (primary vs.
- *     secondary sort key), the order of each sublist is arbitrary.  No code
- *     working with pathkeys should generate a result that depends on the order
- *     of a pathkey sublist.
+ *     we can always create a pathkey by doing an explicit sort.  The pathkeys
+ *     for a sort plan's output just represent the sort key fields and the
+ *     ordering operators used.
+ *
+ *     Things get more interesting when we consider joins.  Suppose we do a
+ *     mergejoin between A and B using the mergeclause A.X = B.Y.  The output
+ *     of the mergejoin is sorted by X --- but it is also sorted by Y.  We
+ *     represent this fact by listing both keys in a single pathkey sublist:
+ *     ( (A.X/xsortop B.Y/ysortop) ).  This pathkey asserts that the major
+ *     sort order of the Path can be taken to be *either* A.X or B.Y.
+ *     They are equal, so they are both primary sort keys.  By doing this,
+ *     we allow future joins to use either var as a pre-sorted key, so upper
+ *     Mergejoins may be able to avoid having to re-sort the Path.  This is
+ *     why pathkeys is a List of Lists.
   *
   *     We keep a sortop associated with each PathKeyItem because cross-data-type
- *     mergejoins are possible; for example int4=int8 is mergejoinable.  In this
- *     case we need to remember that the left var is ordered by int4lt while
- *     the right var is ordered by int8lt.  So the different members of each
- *     sublist could have different sortops.
- *
- *     When producing the pathkeys for a merge or nestloop join, we can keep
- *     all of the keys of the outer path, since the ordering of the outer path
- *     will be preserved in the result.  We add to each pathkey sublist any inner
- *     vars that are equijoined to any of the outer vars in the sublist.  In the
- *     nestloop case we have to be careful to consider only equijoin operators;
- *     the nestloop's join clauses might include non-equijoin operators.
- *     (Currently, we do this by considering only mergejoinable operators while
- *     making the pathkeys, since we have no separate marking for operators that
- *     are equijoins but aren't mergejoinable.)
+ *     mergejoins are possible; for example int4 = int8 is mergejoinable.
+ *     In this case we need to remember that the left var is ordered by int4lt
+ *     while the right var is ordered by int8lt.  So the different members of
+ *     each sublist could have different sortops.
+ *
+ *     Note that while the order of the top list is meaningful (primary vs.
+ *     secondary sort key), the order of each sublist is arbitrary.  Each sublist
+ *     should be regarded as a set of equivalent keys, with no significance
+ *     to the list order.
+ *
+ *     With a little further thought, it becomes apparent that pathkeys for
+ *     joins need not only come from mergejoins.  For example, if we do a
+ *     nestloop join between outer relation A and inner relation B, then any
+ *     pathkeys relevant to A are still valid for the join result: we have
+ *     not altered the order of the tuples from A.  Even more interesting,
+ *     if there was a mergeclause (more formally, an "equijoin clause") A.X=B.Y,
+ *     and A.X was a pathkey for the outer relation A, then we can assert that
+ *     B.Y is a pathkey for the join result; X was ordered before and still is,
+ *     and the joined values of Y are equal to the joined values of X, so Y
+ *     must now be ordered too.  This is true even though we used no mergejoin.
+ *
+ *     More generally, whenever we have an equijoin clause A.X = B.Y and a
+ *     pathkey A.X, we can add B.Y to that pathkey if B is part of the joined
+ *     relation the pathkey is for, *no matter how we formed the join*.
+ *
+ *     In short, then: when producing the pathkeys for a merge or nestloop join,
+ *     we can keep all of the keys of the outer path, since the ordering of the
+ *     outer path will be preserved in the result.  Furthermore, we can add to
+ *     each pathkey sublist any inner vars that are equijoined to any of the
+ *     outer vars in the sublist; this works regardless of whether we are
+ *     implementing the join using that equijoin clause as a mergeclause,
+ *     or merely enforcing the clause after-the-fact as a qpqual filter.
   *
   *     Although Hashjoins also work only with equijoin operators, it is *not*
   *     safe to consider the output of a Hashjoin to be sorted in any particular
   *     order --- not even the outer path's order.  This is true because the
   *     executor might have to split the join into multiple batches.  Therefore
- *     a Hashjoin is always given NIL pathkeys.
+ *     a Hashjoin is always given NIL pathkeys.  (Also, we need to use only
+ *     mergejoinable operators when deducing which inner vars are now sorted,
+ *     because a mergejoin operator tells us which left- and right-datatype
+ *     sortops can be considered equivalent, whereas a hashjoin operator
+ *     doesn't imply anything about sort order.)
   *
   *     Pathkeys are also useful to represent an ordering that we wish to achieve,
   *     since they are easily compared to the pathkeys of a potential candidate
   *     path.  So, SortClause lists are turned into pathkeys lists for use inside
   *     the optimizer.
   *
+ *     OK, now for how it *really* works:
+ *
+ *     We did implement pathkeys just as described above, and found that the
+ *     planner spent a huge amount of time comparing pathkeys, because the
+ *     representation of pathkeys as unordered lists made it expensive to decide
+ *     whether two were equal or not.  So, we've modified the representation
+ *     as described next.
+ *
+ *     If we scan the WHERE clause for equijoin clauses (mergejoinable clauses)
+ *     during planner startup, we can construct lists of equivalent pathkey items
+ *     for the query.  There could be more than two items per equivalence set;
+ *     for example, WHERE A.X = B.Y AND B.Y = C.Z AND D.R = E.S creates the
+ *     equivalence sets { A.X B.Y C.Z } and { D.R E.S } (plus associated sortops).
+ *     Any pathkey item that belongs to an equivalence set implies that all the
+ *     other items in its set apply to the relation too, or at least all the ones
+ *     that are for fields present in the relation.  (Some of the items in the
+ *     set might be for as-yet-unjoined relations.)  Furthermore, any multi-item
+ *     pathkey sublist that appears at any stage of planning the query *must* be
+ *     a subset of one or another of these equivalence sets; there's no way we'd
+ *     have put two items in the same pathkey sublist unless they were equijoined
+ *     in WHERE.
+ *
+ *     Now suppose that we allow a pathkey sublist to contain pathkey items for
+ *     vars that are not yet part of the pathkey's relation.  This introduces
+ *     no logical difficulty, because such items can easily be seen to be
+ *     irrelevant; we just mandate that they be ignored.  But having allowed
+ *     this, we can declare (by fiat) that any multiple-item pathkey sublist
+ *     must be equal() to the appropriate equivalence set.  In effect, whenever
+ *     we make a pathkey sublist that mentions any var appearing in an
+ *     equivalence set, we instantly add all the other vars equivalenced to it,
+ *     whether they appear yet in the pathkey's relation or not.  And we also
+ *     mandate that the pathkey sublist appear in the same order as the
+ *     equivalence set it comes from.  (In practice, we simply return a pointer
+ *     to the relevant equivalence set without building any new sublist at all.)
+ *     This makes comparing pathkeys very simple and fast, and saves a lot of
+ *     work and memory space for pathkey construction as well.
+ *
+ *     Note that pathkey sublists having just one item still exist, and are
+ *     not expected to be equal() to any equivalence set.  This occurs when
+ *     we describe a sort order that involves a var that's not mentioned in
+ *     any equijoin clause of the WHERE.  We could add singleton sets containing
+ *     such vars to the query's list of equivalence sets, but there's little
+ *     point in doing so.
+ *
+ *     By the way, it's OK and even useful for us to build equivalence sets
+ *     that mention multiple vars from the same relation.  For example, if
+ *     we have WHERE A.X = A.Y and we are scanning A using an index on X,
+ *     we can legitimately conclude that the path is sorted by Y as well;
+ *     and this could be handy if Y is the variable used in other join clauses
+ *     or ORDER BY.  So, any WHERE clause with a mergejoinable operator can
+ *     contribute to an equivalence set, even if it's not a join clause.
+ *
   *     -- bjm & tgl
   *--------------------
   */
@@ -113,6 +186,129 @@ makePathKeyItem(Node *key, Oid sortop)
         return item;
  }
  
+/*
+ * add_equijoined_keys
+ *       The given clause has a mergejoinable operator, so its two sides
+ *       can be considered equal after restriction clause application; in
+ *       particular, any pathkey mentioning one side (with the correct sortop)
+ *       can be expanded to include the other as well.  Record the vars and
+ *       associated sortops in the query's equi_key_list for future use.
+ *
+ * The query's equi_key_list field points to a list of sublists of PathKeyItem
+ * nodes, where each sublist is a set of two or more vars+sortops that have
+ * been identified as logically equivalent (and, therefore, we may consider
+ * any two in a set to be equal).  As described above, we will subsequently
+ * use direct pointers to one of these sublists to represent any pathkey
+ * that involves an equijoined variable.
+ *
+ * This code would actually work fine with expressions more complex than
+ * a single Var, but currently it won't see any because check_mergejoinable
+ * won't accept such clauses as mergejoinable.
+ */
+void
+add_equijoined_keys(Query *root, RestrictInfo *restrictinfo)
+{
+       Expr       *clause = restrictinfo->clause;
+       PathKeyItem *item1 = makePathKeyItem((Node *) get_leftop(clause),
+                                                                                restrictinfo->left_sortop);
+       PathKeyItem *item2 = makePathKeyItem((Node *) get_rightop(clause),
+                                                                                restrictinfo->right_sortop);
+       List       *newset,
+                          *cursetlink;
+
+       /* We might see a clause X=X; don't make a single-element list from it */
+       if (equal(item1, item2))
+               return;
+       /*
+        * Our plan is to make a two-element set, then sweep through the existing
+        * equijoin sets looking for matches to item1 or item2.  When we find one,
+        * we remove that set from equi_key_list and union it into our new set.
+        * When done, we add the new set to the front of equi_key_list.
+        *
+        * This is a standard UNION-FIND problem, for which there exist better
+        * data structures than simple lists.  If this code ever proves to be
+        * a bottleneck then it could be sped up --- but for now, simple is
+        * beautiful.
+        */
+       newset = lcons(item1, lcons(item2, NIL));
+
+       foreach(cursetlink, root->equi_key_list)
+       {
+               List       *curset = lfirst(cursetlink);
+
+               if (member(item1, curset) || member(item2, curset))
+               {
+                       /* Found a set to merge into our new set */
+                       newset = LispUnion(newset, curset);
+                       /* Remove old set from equi_key_list.  NOTE this does not change
+                        * lnext(cursetlink), so the outer foreach doesn't break.
+                        */
+                       root->equi_key_list = lremove(curset, root->equi_key_list);
+                       freeList(curset);       /* might as well recycle old cons cells */
+               }
+       }
+
+       root->equi_key_list = lcons(newset, root->equi_key_list);
+}
+
+/*
+ * make_canonical_pathkey
+ *       Given a PathKeyItem, find the equi_key_list subset it is a member of,
+ *       if any.  If so, return a pointer to that sublist, which is the
+ *       canonical representation (for this query) of that PathKeyItem's
+ *       equivalence set.  If it is not found, return a single-element list
+ *       containing the PathKeyItem (when the item has no equivalence peers,
+ *       we just allow it to be a standalone list).
+ *
+ * Note that this function must not be used until after we have completed
+ * scanning the WHERE clause for equijoin operators.
+ */
+static List *
+make_canonical_pathkey(Query *root, PathKeyItem *item)
+{
+       List       *cursetlink;
+
+       foreach(cursetlink, root->equi_key_list)
+       {
+               List       *curset = lfirst(cursetlink);
+
+               if (member(item, curset))
+                       return curset;
+       }
+       return lcons(item, NIL);
+}
+
+/*
+ * canonicalize_pathkeys
+ *        Convert a not-necessarily-canonical pathkeys list to canonical form.
+ *
+ * Note that this function must not be used until after we have completed
+ * scanning the WHERE clause for equijoin operators.
+ */
+List *
+canonicalize_pathkeys(Query *root, List *pathkeys)
+{
+       List       *new_pathkeys = NIL;
+       List       *i;
+
+       foreach(i, pathkeys)
+       {
+               List               *pathkey = (List *) lfirst(i);
+               PathKeyItem        *item;
+
+               /*
+                * It's sufficient to look at the first entry in the sublist;
+                * if there are more entries, they're already part of an
+                * equivalence set by definition.
+                */
+               Assert(pathkey != NIL);
+               item = (PathKeyItem *) lfirst(pathkey);
+               new_pathkeys = lappend(new_pathkeys,
+                                                          make_canonical_pathkey(root, item));
+       }
+       return new_pathkeys;
+}
+
  /****************************************************************************
   *             PATHKEY COMPARISONS
   ****************************************************************************/
@@ -126,15 +322,21 @@ makePathKeyItem(Node *key, Oid sortop)
   *       it contains all the keys of the other plus more.  For example, either
   *       ((A) (B)) or ((A B)) is better than ((A)).
   *
- *     This gets called a lot, so it is optimized.
+ *       Because we actually only expect to see canonicalized pathkey sublists,
+ *       we don't have to do the full two-way-subset-inclusion test on each
+ *       pair of sublists that is implied by the above statement.  Instead we
+ *       just do an equal().  In the normal case where multi-element sublists
+ *       are pointers into the root's equi_key_list, equal() will be very fast:
+ *       it will recognize pointer equality when the sublists are the same,
+ *       and will fail at the first sublist element when they are not.
+ *
+ * Yes, this gets called enough to be worth coding it this tensely.
   */
  PathKeysComparison
  compare_pathkeys(List *keys1, List *keys2)
  {
         List       *key1,
                            *key2;
-       bool            key1_subsetof_key2 = true,
-                               key2_subsetof_key1 = true;
  
         for (key1 = keys1, key2 = keys2;
                  key1 != NIL && key2 != NIL;
@@ -142,36 +344,12 @@ compare_pathkeys(List *keys1, List *keys2)
         {
                 List       *subkey1 = lfirst(key1);
                 List       *subkey2 = lfirst(key2);
-               List       *i;
  
-               /* We have to do this the hard way since the ordering of the subkey
-                * lists is arbitrary.
+               /* We will never have two subkeys where one is a subset of the other,
+                * because of the canonicalization explained above.  Either they are
+                * equal or they ain't.
                  */
-               if (key1_subsetof_key2)
-               {
-                       foreach(i, subkey1)
-                       {
-                               if (! member(lfirst(i), subkey2))
-                               {
-                                       key1_subsetof_key2 = false;
-                                       break;
-                               }
-                       }
-               }
-
-               if (key2_subsetof_key1)
-               {
-                       foreach(i, subkey2)
-                       {
-                               if (! member(lfirst(i), subkey1))
-                               {
-                                       key2_subsetof_key1 = false;
-                                       break;
-                               }
-                       }
-               }
-
-               if (!key1_subsetof_key2 && !key2_subsetof_key1)
+               if (! equal(subkey1, subkey2))
                         return PATHKEYS_DIFFERENT; /* no need to keep looking */
         }
  
@@ -180,18 +358,11 @@ compare_pathkeys(List *keys1, List *keys2)
          * of the other list are not NIL --- no pathkey list should ever have
          * a NIL sublist.)
          */
-       if (key1 != NIL)
-               key1_subsetof_key2 = false;
-       if (key2 != NIL)
-               key2_subsetof_key1 = false;
-
-       if (key1_subsetof_key2 && key2_subsetof_key1)
+       if (key1 == NIL && key2 == NIL)
                 return PATHKEYS_EQUAL;
-       if (key1_subsetof_key2)
-               return PATHKEYS_BETTER2;
-       if (key2_subsetof_key1)
-               return PATHKEYS_BETTER1;
-       return PATHKEYS_DIFFERENT;
+       if (key1 != NIL)
+               return PATHKEYS_BETTER1; /* key1 is longer */
+       return PATHKEYS_BETTER2;        /* key2 is longer */
  }
  
  /*
@@ -215,16 +386,16 @@ pathkeys_contained_in(List *keys1, List *keys2)
  
  /*
   * get_cheapest_path_for_pathkeys
- *       Find the cheapest path in 'paths' that satisfies the given pathkeys.
- *       Return NULL if no such path.
+ *       Find the cheapest path (according to the specified criterion) that
+ *       satisfies the given pathkeys.  Return NULL if no such path.
   *
- * 'paths' is a list of possible paths (either inner or outer)
- * 'pathkeys' represents a required ordering
- * if 'indexpaths_only' is true, only IndexPaths will be considered.
+ * 'paths' is a list of possible paths that all generate the same relation
+ * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'cost_criterion' is STARTUP_COST or TOTAL_COST
   */
  Path *
  get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
-                                                          bool indexpaths_only)
+                                                          CostSelector cost_criterion)
  {
         Path       *matched_path = NULL;
         List       *i;
@@ -233,15 +404,55 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
         {
                 Path       *path = (Path *) lfirst(i);
  
-               if (indexpaths_only && ! IsA(path, IndexPath))
+               /*
+                * Since cost comparison is a lot cheaper than pathkey comparison,
+                * do that first.  (XXX is that still true?)
+                */
+               if (matched_path != NULL &&
+                       compare_path_costs(matched_path, path, cost_criterion) <= 0)
                         continue;
  
                 if (pathkeys_contained_in(pathkeys, path->pathkeys))
-               {
-                       if (matched_path == NULL ||
-                               path->path_cost < matched_path->path_cost)
-                               matched_path = path;
-               }
+                       matched_path = path;
+       }
+       return matched_path;
+}
+
+/*
+ * get_cheapest_fractional_path_for_pathkeys
+ *       Find the cheapest path (for retrieving a specified fraction of all
+ *       the tuples) that satisfies the given pathkeys.
+ *       Return NULL if no such path.
+ *
+ * See compare_fractional_path_costs() for the interpretation of the fraction
+ * parameter.
+ *
+ * 'paths' is a list of possible paths that all generate the same relation
+ * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'fraction' is the fraction of the total tuples expected to be retrieved
+ */
+Path *
+get_cheapest_fractional_path_for_pathkeys(List *paths,
+                                                                                 List *pathkeys,
+                                                                                 double fraction)
+{
+       Path       *matched_path = NULL;
+       List       *i;
+
+       foreach(i, paths)
+       {
+               Path       *path = (Path *) lfirst(i);
+
+               /*
+                * Since cost comparison is a lot cheaper than pathkey comparison,
+                * do that first.
+                */
+               if (matched_path != NULL &&
+                       compare_fractional_path_costs(matched_path, path, fraction) <= 0)
+                       continue;
+
+               if (pathkeys_contained_in(pathkeys, path->pathkeys))
+                       matched_path = path;
         }
         return matched_path;
  }
@@ -255,18 +466,22 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
   *       Build a pathkeys list that describes the ordering induced by an index
   *       scan using the given index.  (Note that an unordered index doesn't
   *       induce any ordering; such an index will have no sortop OIDS in
- *       its "ordering" field.)
+ *       its "ordering" field, and we will return NIL.)
   *
- * Vars in the resulting pathkeys list are taken from the rel's targetlist.
- * If we can't find the indexkey in the targetlist, we assume that the
- * ordering of that key is not interesting.
+ * If 'scandir' is BackwardScanDirection, attempt to build pathkeys
+ * representing a backwards scan of the index.  Return NIL if can't do it.
   */
  List *
-build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index)
+build_index_pathkeys(Query *root,
+                                        RelOptInfo *rel,
+                                        IndexOptInfo *index,
+                                        ScanDirection scandir)
  {
         List       *retval = NIL;
         int                *indexkeys = index->indexkeys;
         Oid                *ordering = index->ordering;
+       PathKeyItem *item;
+       Oid                     sortop;
  
         if (!indexkeys || indexkeys[0] == 0 ||
                 !ordering || ordering[0] == InvalidOid)
@@ -275,8 +490,6 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index)
         if (index->indproc)
         {
                 /* Functional index: build a representation of the function call */
-               int                     relid = lfirsti(rel->relids);
-               Oid                     reloid = getrelid(relid, root->rtable);
                 Func       *funcnode = makeNode(Func);
                 List       *funcargs = NIL;
  
@@ -291,43 +504,42 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index)
  
                 while (*indexkeys != 0)
                 {
-                       int                     varattno = *indexkeys;
-                       Oid                     vartypeid = get_atttype(reloid, varattno);
-                       int32           type_mod = get_atttypmod(reloid, varattno);
-
                         funcargs = lappend(funcargs,
-                                                          makeVar(relid, varattno, vartypeid,
-                                                                          type_mod, 0));
+                                                          find_indexkey_var(root, rel, *indexkeys));
                         indexkeys++;
                 }
  
+               sortop = *ordering;
+               if (ScanDirectionIsBackward(scandir))
+               {
+                       sortop = get_commutator(sortop);
+                       if (sortop == InvalidOid)
+                               return NIL;             /* oops, no reverse sort operator? */
+               }
+
                 /* Make a one-sublist pathkeys list for the function expression */
-               retval = lcons(lcons(
-                       makePathKeyItem((Node *) make_funcclause(funcnode, funcargs),
-                                                       *ordering),
-                       NIL), NIL);
+               item = makePathKeyItem((Node *) make_funcclause(funcnode, funcargs),
+                                                          sortop);
+               retval = lcons(make_canonical_pathkey(root, item), NIL);
         }
         else
         {
                 /* Normal non-functional index */
-               List       *rel_tlist = rel->targetlist;
-
                 while (*indexkeys != 0 && *ordering != InvalidOid)
                 {
-                       Var             *relvar = find_indexkey_var(*indexkeys, rel_tlist);
+                       Var             *relvar = find_indexkey_var(root, rel, *indexkeys);
  
-                       /* If we can find no tlist entry for the n'th sort key,
-                        * then we're done generating pathkeys; any subsequent sort keys
-                        * no longer apply, since we can't represent the ordering properly
-                        * even if there are tlist entries for them.
-                        */
-                       if (!relvar)
-                               break;
-                       /* OK, make a one-element sublist for this sort key */
-                       retval = lappend(retval,
-                                                        lcons(makePathKeyItem((Node *) relvar,
-                                                                                                  *ordering),
-                                                                  NIL));
+                       sortop = *ordering;
+                       if (ScanDirectionIsBackward(scandir))
+                       {
+                               sortop = get_commutator(sortop);
+                               if (sortop == InvalidOid)
+                                       break;          /* oops, no reverse sort operator? */
+                       }
+
+                       /* OK, make a sublist for this sort key */
+                       item = makePathKeyItem((Node *) relvar, sortop);
+                       retval = lappend(retval, make_canonical_pathkey(root, item));
  
                         indexkeys++;
                         ordering++;
@@ -338,21 +550,37 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index)
  }
  
  /*
- * Find a var in a relation's targetlist that matches an indexkey attrnum.
+ * Find or make a Var node for the specified attribute of the rel.
+ *
+ * We first look for the var in the rel's target list, because that's
+ * easy and fast.  But the var might not be there (this should normally
+ * only happen for vars that are used in WHERE restriction clauses,
+ * but not in join clauses or in the SELECT target list).  In that case,
+ * gin up a Var node the hard way.
   */
  static Var *
-find_indexkey_var(int indexkey, List *tlist)
+find_indexkey_var(Query *root, RelOptInfo *rel, AttrNumber varattno)
  {
         List       *temp;
+       int                     relid;
+       Oid                     reloid,
+                               vartypeid;
+       int32           type_mod;
  
-       foreach(temp, tlist)
+       foreach(temp, rel->targetlist)
         {
                 Var        *tle_var = get_expr(lfirst(temp));
  
-               if (IsA(tle_var, Var) && tle_var->varattno == indexkey)
+               if (IsA(tle_var, Var) && tle_var->varattno == varattno)
                         return tle_var;
         }
-       return NULL;
+
+       relid = lfirsti(rel->relids);
+       reloid = getrelid(relid, root->rtable);
+       vartypeid = get_atttype(reloid, varattno);
+       type_mod = get_atttypmod(reloid, varattno);
+
+       return makeVar(relid, varattno, vartypeid, type_mod, 0);
  }
  
  /*
@@ -360,164 +588,33 @@ find_indexkey_var(int indexkey, List *tlist)
   *       Build the path keys for a join relation constructed by mergejoin or
   *       nestloop join.  These keys should include all the path key vars of the
   *       outer path (since the join will retain the ordering of the outer path)
- *       plus any vars of the inner path that are mergejoined to the outer vars.
+ *       plus any vars of the inner path that are equijoined to the outer vars.
   *
- *       Per the discussion at the top of this file, mergejoined inner vars
+ *       Per the discussion at the top of this file, equijoined inner vars
   *       can be considered path keys of the result, just the same as the outer
- *       vars they were joined with.
- *
- *       We can also use inner path vars as pathkeys of a nestloop join, but we
- *       must be careful that we only consider equijoin clauses and not general
- *       join clauses.  For example, "t1.a < t2.b" might be a join clause of a
- *       nestloop, but it doesn't result in b acquiring the ordering of a!
- *       joinpath.c handles that problem by only passing this routine clauses
- *       that are marked mergejoinable, even if a nestloop join is being built.
- *       Therefore we only have 't1.a = t2.b' style clauses, and can expect that
- *       the inner var will acquire the outer's ordering no matter which join
- *       method is actually used.
- *
- *       We drop pathkeys that are not vars of the join relation's tlist,
- *       on the assumption that they are not interesting to higher levels.
- *       (Is this correct??  To support expression pathkeys we might want to
- *       check that all vars mentioned in the key are in the tlist, instead.)
- *
- * All vars in the result are taken from the join relation's tlist,
- * not from the given pathkeys or joinclauses.
+ *       vars they were joined with; furthermore, it doesn't matter what kind
+ *       of join algorithm is actually used.
   *
   * 'outer_pathkeys' is the list of the outer path's path keys
   * 'join_rel_tlist' is the target list of the join relation
- * 'joinclauses' is the list of mergejoinable clauses to consider (note this
- *             is a list of RestrictInfos, not just bare qual clauses); can be NIL
+ * 'equi_key_list' is the query's list of pathkeyitem equivalence sets
   *
   * Returns the list of new path keys.
- *
   */
  List *
  build_join_pathkeys(List *outer_pathkeys,
                                         List *join_rel_tlist,
-                                       List *joinclauses)
+                                       List *equi_key_list)
  {
-       List       *final_pathkeys = NIL;
-       List       *i;
-
-       foreach(i, outer_pathkeys)
-       {
-               List       *outer_pathkey = lfirst(i);
-               List       *new_pathkey;
-
-               new_pathkey = build_join_pathkey(outer_pathkey, join_rel_tlist,
-                                                                                joinclauses);
-               /* if we can find no sortable vars for the n'th sort key,
-                * then we're done generating pathkeys; any subsequent sort keys
-                * no longer apply, since we can't represent the ordering properly.
-                */
-               if (new_pathkey == NIL)
-                       break;
-               final_pathkeys = lappend(final_pathkeys, new_pathkey);
-       }
-       return final_pathkeys;
-}
-
-/*
- * build_join_pathkey
- *       Generate an individual pathkey sublist, consisting of the outer vars
- *       already mentioned in 'pathkey' plus any inner vars that are joined to
- *       them (and thus can now also be considered path keys, per discussion
- *       at the top of this file).
- *
- *       Note that each returned pathkey uses the var node found in
- *       'join_rel_tlist' rather than the input pathkey or joinclause var node.
- *       (Is this important?)
- *
- * Returns a new pathkey (list of PathKeyItems).
- */
-static List *
-build_join_pathkey(List *pathkey,
-                                  List *join_rel_tlist,
-                                  List *joinclauses)
-{
-       List       *new_pathkey = NIL;
-       List       *i,
-                          *j;
-
-       foreach(i, pathkey)
-       {
-               PathKeyItem *key = (PathKeyItem *) lfirst(i);
-               Node       *tlist_key;
-
-               Assert(key && IsA(key, PathKeyItem));
-
-               tlist_key = matching_tlist_expr(key->key, join_rel_tlist);
-               if (tlist_key)
-                       new_pathkey = lcons(makePathKeyItem(tlist_key,
-                                                                                               key->sortop),
-                                                               new_pathkey);
-
-               foreach(j, joinclauses)
-               {
-                       RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
-                       Expr       *joinclause = restrictinfo->clause;
-                       /* We assume the clause is a binary opclause... */
-                       Node       *l = (Node *) get_leftop(joinclause);
-                       Node       *r = (Node *) get_rightop(joinclause);
-                       Node       *other_var = NULL;
-                       Oid                     other_sortop = InvalidOid;
-
-                       if (equal(key->key, l))
-                       {
-                               other_var = r;
-                               other_sortop = restrictinfo->right_sortop;
-                       }
-                       else if (equal(key->key, r))
-                       {
-                               other_var = l;
-                               other_sortop = restrictinfo->left_sortop;
-                       }
-
-                       if (other_var && other_sortop)
-                       {
-                               tlist_key = matching_tlist_expr(other_var, join_rel_tlist);
-                               if (tlist_key)
-                                       new_pathkey = lcons(makePathKeyItem(tlist_key,
-                                                                                                               other_sortop),
-                                                                               new_pathkey);
-                       }
-               }
-       }
-
-       return new_pathkey;
-}
-
-/*
- * commute_pathkeys
- *             Attempt to commute the operators in a set of pathkeys, producing
- *             pathkeys that describe the reverse sort order (DESC instead of ASC).
- *             Returns TRUE if successful (all the operators have commutators).
- *
- * CAUTION: given pathkeys are modified in place, even if not successful!!
- * Usually, caller should have just built or copied the pathkeys list to
- * ensure there are no unwanted side-effects.
- */
-bool
-commute_pathkeys(List *pathkeys)
-{
-       List       *i;
-
-       foreach(i, pathkeys)
-       {
-               List       *pathkey = lfirst(i);
-               List       *j;
-
-               foreach(j, pathkey)
-               {
-                       PathKeyItem        *key = lfirst(j);
-
-                       key->sortop = get_commutator(key->sortop);
-                       if (key->sortop == InvalidOid)
-                               return false;
-               }
-       }
-       return true;                            /* successful */
+       /*
+        * This used to be quite a complex bit of code, but now that all
+        * pathkey sublists start out life canonicalized, we don't have to
+        * do a darn thing here!  The inner-rel vars we used to need to add
+        * are *already* part of the outer pathkey!
+        *
+        * I'd remove the routine entirely, but maybe someday we'll need it...
+        */
+       return outer_pathkeys;
  }
  
  /****************************************************************************
@@ -529,11 +626,18 @@ commute_pathkeys(List *pathkeys)
   *             Generate a pathkeys list that represents the sort order specified
   *             by a list of SortClauses (GroupClauses will work too!)
   *
+ * NB: the result is NOT in canonical form, but must be passed through
+ * canonicalize_pathkeys() before it can be used for comparisons or
+ * labeling relation sort orders.  (We do things this way because
+ * union_planner needs to be able to construct requested pathkeys before
+ * the pathkey equivalence sets have been created for the query.)
+ *
   * 'sortclauses' is a list of SortClause or GroupClause nodes
   * 'tlist' is the targetlist to find the referenced tlist entries in
   */
  List *
-make_pathkeys_for_sortclauses(List *sortclauses, List *tlist)
+make_pathkeys_for_sortclauses(List *sortclauses,
+                                                         List *tlist)
  {
         List       *pathkeys = NIL;
         List       *i;
@@ -546,7 +650,11 @@ make_pathkeys_for_sortclauses(List *sortclauses, List *tlist)
  
                 sortkey = get_sortgroupclause_expr(sortcl, tlist);
                 pathkey = makePathKeyItem(sortkey, sortcl->sortop);
-               /* pathkey becomes a one-element sublist */
+               /*
+                * The pathkey becomes a one-element sublist, for now;
+                * canonicalize_pathkeys() might replace it with a longer
+                * sublist later.
+                */
                 pathkeys = lappend(pathkeys, lcons(pathkey, NIL));
         }
         return pathkeys;
@@ -599,6 +707,7 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
                 {
                         PathKeyItem        *keyitem = lfirst(j);
                         Node               *key = keyitem->key;
+                       Oid                             keyop = keyitem->sortop;
                         List               *k;
  
                         foreach(k, restrictinfos)
@@ -607,8 +716,10 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
  
                                 Assert(restrictinfo->mergejoinoperator != InvalidOid);
  
-                               if ((equal(key, get_leftop(restrictinfo->clause)) ||
-                                        equal(key, get_rightop(restrictinfo->clause))) &&
+                               if (((keyop == restrictinfo->left_sortop &&
+                                         equal(key, get_leftop(restrictinfo->clause))) ||
+                                        (keyop == restrictinfo->right_sortop &&
+                                         equal(key, get_rightop(restrictinfo->clause)))) &&
                                         ! member(restrictinfo, mergeclauses))
                                 {
                                         matched_restrictinfo = restrictinfo;
@@ -645,7 +756,7 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
   * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
   *                     that will be used in a merge join.
   * 'tlist' is a relation target list for either the inner or outer
- *                     side of the proposed join rel.
+ *                     side of the proposed join rel.  (Not actually needed anymore)
   *
   * Returns a pathkeys list that can be applied to the indicated relation.
   *
@@ -654,7 +765,9 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
   * just make the keys, eh?
   */
  List *
-make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist)
+make_pathkeys_for_mergeclauses(Query *root,
+                                                          List *mergeclauses,
+                                                          List *tlist)
  {
         List       *pathkeys = NIL;
         List       *i;
@@ -664,32 +777,24 @@ make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist)
                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
                 Node       *key;
                 Oid                     sortop;
+               PathKeyItem *item;
  
                 Assert(restrictinfo->mergejoinoperator != InvalidOid);
  
                 /*
                  * Find the key and sortop needed for this mergeclause.
                  *
-                * We can use either side of the mergeclause, since we haven't yet
-                * committed to which side will be inner.
+                * Both sides of the mergeclause should appear in one of the
+                * query's pathkey equivalence classes, so it doesn't matter
+                * which one we use here.
                  */
-               key = matching_tlist_expr((Node *) get_leftop(restrictinfo->clause),
-                                                                 tlist);
+               key = (Node *) get_leftop(restrictinfo->clause);
                 sortop = restrictinfo->left_sortop;
-               if (! key)
-               {
-                       key = matching_tlist_expr((Node *) get_rightop(restrictinfo->clause),
-                                                                         tlist);
-                       sortop = restrictinfo->right_sortop;
-               }
-               if (! key)
-                       elog(ERROR, "make_pathkeys_for_mergeclauses: can't find key");
                 /*
                  * Add a pathkey sublist for this sort item
                  */
-               pathkeys = lappend(pathkeys,
-                                                  lcons(makePathKeyItem(key, sortop),
-                                                                NIL));
+               item = makePathKeyItem(key, sortop);
+               pathkeys = lappend(pathkeys, make_canonical_pathkey(root, item));
         }
  
         return pathkeys;
diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c

index ab0427ef32263d36eb468dc75af4f2dbf9dc6562..1e7dc43473b24f638a02263ae750c575ffb144b8 100644 (file)
--- a/src/backend/optimizer/path/tidpath.c
+++ b/src/backend/optimizer/path/tidpath.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.4 2000/02/07 04:40:59 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.5 2000/02/15 20:49:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -36,7 +36,7 @@
  #include "parser/parsetree.h"
  #include "utils/lsyscache.h"
  
-static List    *create_tidscan_joinpaths(RelOptInfo *);
+static void create_tidscan_joinpaths(RelOptInfo *rel);
  static List    *TidqualFromRestrictinfo(List *relids, List *restrictinfo);
  static bool    isEvaluable(int varno, Node *node);
  static Node    *TidequalClause(int varno, Expr *node);
@@ -234,61 +234,54 @@ TidqualFromRestrictinfo(List *relids, List *restrictinfo)
  
  /*
   * create_tidscan_joinpaths
- *       Creates a path corresponding to a tid_direct scan, returning the
- *       pathnode.
+ *       Create innerjoin paths if there are suitable joinclauses.
   *
+ * XXX does this actually work?
   */
-List *
+static void
  create_tidscan_joinpaths(RelOptInfo *rel)
  {
         List            *rlst = NIL,
                                 *lst;
-       TidPath         *pathnode = (TidPath *) NULL;
-       List            *restinfo,
-                               *tideval;
  
         foreach (lst, rel->joininfo)
         {
-               JoinInfo   *joininfo = (JoinInfo *)lfirst(lst);
+               JoinInfo   *joininfo = (JoinInfo *) lfirst(lst);
+               List            *restinfo,
+                                       *tideval;
  
                 restinfo = joininfo->jinfo_restrictinfo;
                 tideval = TidqualFromRestrictinfo(rel->relids, restinfo);
                 if (length(tideval) == 1)
                 {
-                       pathnode = makeNode(TidPath);
+                       TidPath         *pathnode = makeNode(TidPath);
  
                         pathnode->path.pathtype = T_TidScan;
                         pathnode->path.parent = rel;
                         pathnode->path.pathkeys = NIL;
-                       pathnode->path.path_cost = cost_tidscan(rel, tideval);
                         pathnode->tideval = tideval;
                         pathnode->unjoined_relids = joininfo->unjoined_relids;
+
+                       cost_tidscan(&pathnode->path, rel, tideval);
+
                         rlst = lappend(rlst, pathnode);
                 }
         }
         rel->innerjoin = nconc(rel->innerjoin, rlst);
-       return rlst;
  }
  
  /*
   * create_tidscan_paths
- *       Creates a path corresponding to a tid direct scan, returning the
- *       pathnode List.
- *
+ *       Creates paths corresponding to tid direct scans of the given rel.
+ *       Candidate paths are added to the rel's pathlist (using add_path).
   */
-List *
+void
  create_tidscan_paths(Query *root, RelOptInfo *rel)
  {
-       List    *rlst = NIL;
-       TidPath *pathnode = (TidPath *) NULL;
         List    *tideval = TidqualFromRestrictinfo(rel->relids,
                                                                                            rel->baserestrictinfo);
         
         if (tideval)
-               pathnode = create_tidscan_path(rel, tideval);
-       if (pathnode)
-               rlst = lcons(pathnode, rlst);
+               add_path(rel, (Path *) create_tidscan_path(rel, tideval));
         create_tidscan_joinpaths(rel);
-
-       return rlst;
  }
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 97a021a2dd221fbf9deeaacc765099108dc077a9..55af1426fdca3e75bb7ec953da212a2eb31a65da 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.84 2000/02/07 04:41:00 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.85 2000/02/15 20:49:18 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -57,7 +57,9 @@ static Node *fix_indxqual_operand(Node *node, int baserelid,
                                                                   Form_pg_index index,
                                                                   Oid *opclass);
  static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
-                          List *indxid, List *indxqual, List *indxqualorig);
+                                                                List *indxid, List *indxqual,
+                                                                List *indxqualorig,
+                                                                ScanDirection indexscandir);
  static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid,
                          List *tideval);
  static NestLoop *make_nestloop(List *qptlist, List *qpqual, Plan *lefttree,
@@ -427,9 +429,13 @@ create_indexscan_node(Query *root,
                                                            baserelid,
                                                            best_path->indexid,
                                                            fixed_indxqual,
-                                                          indxqual);
+                                                          indxqual,
+                                                          best_path->indexscandir);
  
         copy_path_costsize(&scan_node->scan.plan, &best_path->path);
+       /* set up rows estimate (just to make EXPLAIN output reasonable) */
+       if (plan_rows < 1.0)
+               plan_rows = 1.0;
         scan_node->scan.plan.plan_rows = plan_rows;
  
         return scan_node;
@@ -437,16 +443,14 @@ create_indexscan_node(Query *root,
  
  static TidScan *
  make_tidscan(List *qptlist,
-                       List *qpqual,
-                       Index scanrelid,        
-                       List *tideval)
+                        List *qpqual,
+                        Index scanrelid,       
+                        List *tideval)
  {
         TidScan *node = makeNode(TidScan);
         Plan    *plan = &node->scan.plan;
  
-       plan->cost = 0;
-       plan->plan_rows = 0;
-       plan->plan_width = 0;
+       /* cost should be inserted by caller */
         plan->state = (EState *) NULL;
         plan->targetlist = qptlist;
         plan->qual = qpqual;
@@ -1038,13 +1042,15 @@ copy_path_costsize(Plan *dest, Path *src)
  {
         if (src)
         {
-               dest->cost = src->path_cost;
+               dest->startup_cost = src->startup_cost;
+               dest->total_cost = src->total_cost;
                 dest->plan_rows = src->parent->rows;
                 dest->plan_width = src->parent->width;
         }
         else
         {
-               dest->cost = 0;
+               dest->startup_cost = 0;
+               dest->total_cost = 0;
                 dest->plan_rows = 0;
                 dest->plan_width = 0;
         }
@@ -1061,13 +1067,15 @@ copy_plan_costsize(Plan *dest, Plan *src)
  {
         if (src)
         {
-               dest->cost = src->cost;
+               dest->startup_cost = src->startup_cost;
+               dest->total_cost = src->total_cost;
                 dest->plan_rows = src->plan_rows;
                 dest->plan_width = src->plan_width;
         }
         else
         {
-               dest->cost = 0;
+               dest->startup_cost = 0;
+               dest->total_cost = 0;
                 dest->plan_rows = 0;
                 dest->plan_width = 0;
         }
@@ -1130,7 +1138,7 @@ make_seqscan(List *qptlist,
         SeqScan    *node = makeNode(SeqScan);
         Plan       *plan = &node->plan;
  
-       copy_plan_costsize(plan, NULL);
+       /* cost should be inserted by caller */
         plan->state = (EState *) NULL;
         plan->targetlist = qptlist;
         plan->qual = qpqual;
@@ -1148,12 +1156,13 @@ make_indexscan(List *qptlist,
                            Index scanrelid,
                            List *indxid,
                            List *indxqual,
-                          List *indxqualorig)
+                          List *indxqualorig,
+                          ScanDirection indexscandir)
  {
         IndexScan  *node = makeNode(IndexScan);
         Plan       *plan = &node->scan.plan;
  
-       copy_plan_costsize(plan, NULL);
+       /* cost should be inserted by caller */
         plan->state = (EState *) NULL;
         plan->targetlist = qptlist;
         plan->qual = qpqual;
@@ -1163,7 +1172,7 @@ make_indexscan(List *qptlist,
         node->indxid = indxid;
         node->indxqual = indxqual;
         node->indxqualorig = indxqualorig;
-       node->indxorderdir = NoMovementScanDirection;
+       node->indxorderdir = indexscandir;
         node->scan.scanstate = (CommonScanState *) NULL;
  
         return node;
@@ -1219,6 +1228,10 @@ make_hash(List *tlist, Var *hashkey, Plan *lefttree)
         Plan       *plan = &node->plan;
  
         copy_plan_costsize(plan, lefttree);
+       /* For plausibility, make startup & total costs equal total cost of
+        * input plan; this only affects EXPLAIN display not decisions.
+        */
+       plan->startup_cost = plan->total_cost;
         plan->state = (EState *) NULL;
         plan->targetlist = tlist;
         plan->qual = NULL;
@@ -1255,9 +1268,12 @@ make_sort(List *tlist, Oid nonameid, Plan *lefttree, int keycount)
  {
         Sort       *node = makeNode(Sort);
         Plan       *plan = &node->plan;
+       Path            sort_path;              /* dummy for result of cost_sort */
  
-       copy_plan_costsize(plan, lefttree);
-       plan->cost += cost_sort(NIL, plan->plan_rows, plan->plan_width);
+       copy_plan_costsize(plan, lefttree);     /* only care about copying size */
+       cost_sort(&sort_path, NIL, lefttree->plan_rows, lefttree->plan_width);
+       plan->startup_cost = sort_path.startup_cost + lefttree->total_cost;
+       plan->total_cost = sort_path.total_cost + lefttree->total_cost;
         plan->state = (EState *) NULL;
         plan->targetlist = tlist;
         plan->qual = NIL;
@@ -1279,7 +1295,11 @@ make_material(List *tlist,
         Plan       *plan = &node->plan;
  
         copy_plan_costsize(plan, lefttree);
-       /* XXX shouldn't we charge some additional cost for materialization? */
+       /* For plausibility, make startup & total costs equal total cost of
+        * input plan; this only affects EXPLAIN display not decisions.
+        * XXX shouldn't we charge some additional cost for materialization?
+        */
+       plan->startup_cost = plan->total_cost;
         plan->state = (EState *) NULL;
         plan->targetlist = tlist;
         plan->qual = NIL;
@@ -1292,30 +1312,38 @@ make_material(List *tlist,
  }
  
  Agg *
-make_agg(List *tlist, Plan *lefttree)
+make_agg(List *tlist, List *qual, Plan *lefttree)
  {
         Agg                *node = makeNode(Agg);
+       Plan       *plan = &node->plan;
  
-       copy_plan_costsize(&node->plan, lefttree);
+       copy_plan_costsize(plan, lefttree);
+       /*
+        * Charge one cpu_operator_cost per aggregate function per input tuple.
+        */
+       plan->total_cost += cpu_operator_cost * plan->plan_rows *
+               (length(pull_agg_clause((Node *) tlist)) +
+                length(pull_agg_clause((Node *) qual)));
         /*
-        * The tuple width from the input node is OK, as is the cost (we are
-        * ignoring the cost of computing the aggregate; is there any value
-        * in accounting for it?).  But the tuple count is bogus.  We will
-        * produce a single tuple if the input is not a Group, and a tuple
-        * per group otherwise.  For now, estimate the number of groups as
-        * 10% of the number of tuples --- bogus, but how to do better?
+        * We will produce a single output tuple if the input is not a Group,
+        * and a tuple per group otherwise.  For now, estimate the number of
+        * groups as 10% of the number of tuples --- bogus, but how to do better?
          * (Note we assume the input Group node is in "tuplePerGroup" mode,
          * so it didn't reduce its row count already.)
          */
         if (IsA(lefttree, Group))
-               node->plan.plan_rows *= 0.1;
+               plan->plan_rows *= 0.1;
         else
-               node->plan.plan_rows = 1;
-       node->plan.state = (EState *) NULL;
-       node->plan.qual = NULL;
-       node->plan.targetlist = tlist;
-       node->plan.lefttree = lefttree;
-       node->plan.righttree = (Plan *) NULL;
+       {
+               plan->plan_rows = 1;
+               plan->startup_cost = plan->total_cost;
+       }
+
+       plan->state = (EState *) NULL;
+       plan->qual = qual;
+       plan->targetlist = tlist;
+       plan->lefttree = lefttree;
+       plan->righttree = (Plan *) NULL;
  
         return node;
  }
@@ -1328,8 +1356,14 @@ make_group(List *tlist,
                    Plan *lefttree)
  {
         Group      *node = makeNode(Group);
+       Plan       *plan = &node->plan;
  
-       copy_plan_costsize(&node->plan, lefttree);
+       copy_plan_costsize(plan, lefttree);
+       /*
+        * Charge one cpu_operator_cost per comparison per input tuple.
+        * We assume all columns get compared at most of the tuples.
+        */
+       plan->total_cost += cpu_operator_cost * plan->plan_rows * ngrp;
         /*
          * If tuplePerGroup (which is named exactly backwards) is true,
          * we will return all the input tuples, so the input node's row count
@@ -1338,12 +1372,13 @@ make_group(List *tlist,
          * tuples --- bogus, but how to do better?
          */
         if (! tuplePerGroup)
-               node->plan.plan_rows *= 0.1;
-       node->plan.state = (EState *) NULL;
-       node->plan.qual = NULL;
-       node->plan.targetlist = tlist;
-       node->plan.lefttree = lefttree;
-       node->plan.righttree = (Plan *) NULL;
+               plan->plan_rows *= 0.1;
+
+       plan->state = (EState *) NULL;
+       plan->qual = NULL;
+       plan->targetlist = tlist;
+       plan->lefttree = lefttree;
+       plan->righttree = (Plan *) NULL;
         node->tuplePerGroup = tuplePerGroup;
         node->numCols = ngrp;
         node->grpColIdx = grpColIdx;
@@ -1367,11 +1402,17 @@ make_unique(List *tlist, Plan *lefttree, List *distinctList)
         List       *slitem;
  
         copy_plan_costsize(plan, lefttree);
+       /*
+        * Charge one cpu_operator_cost per comparison per input tuple.
+        * We assume all columns get compared at most of the tuples.
+        */
+       plan->total_cost += cpu_operator_cost * plan->plan_rows * numCols;
         /*
          * As for Group, we make the unsupported assumption that there will be
          * 10% as many tuples out as in.
          */
         plan->plan_rows *= 0.1;
+
         plan->state = (EState *) NULL;
         plan->targetlist = tlist;
         plan->qual = NIL;
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index b94cc3e4b425e532d1f8683c62df909143fdbaa8..6b6f3971719d05b0eefec89cb022a6daa29c7998 100644 (file)
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.44 2000/02/07 04:41:00 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.45 2000/02/15 20:49:18 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -21,6 +21,7 @@
  #include "optimizer/cost.h"
  #include "optimizer/joininfo.h"
  #include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
  #include "optimizer/planmain.h"
  #include "optimizer/tlist.h"
  #include "optimizer/var.h"
@@ -31,7 +32,6 @@ static void add_restrict_and_join_to_rel(Query *root, Node *clause);
  static void add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo,
                                                                   Relids join_relids);
  static void add_vars_to_targetlist(Query *root, List *vars);
-static void set_restrictinfo_joininfo(RestrictInfo *restrictinfo);
  static void check_mergejoinable(RestrictInfo *restrictinfo);
  static void check_hashjoinable(RestrictInfo *restrictinfo);
  
@@ -150,7 +150,9 @@ add_restrict_and_join_to_rels(Query *root, List *clauses)
   *       Add clause information to either the 'RestrictInfo' or 'JoinInfo' field
   *       (depending on whether the clause is a join) of each base relation
   *       mentioned in the clause.  A RestrictInfo node is created and added to
- *       the appropriate list for each rel.
+ *       the appropriate list for each rel.  Also, if the clause uses a
+ *       mergejoinable operator, enter the left- and right-side expressions
+ *       into the query's lists of equijoined vars.
   */
  static void
  add_restrict_and_join_to_rel(Query *root, Node *clause)
@@ -181,14 +183,29 @@ add_restrict_and_join_to_rel(Query *root, Node *clause)
  
                 rel->baserestrictinfo = lcons(restrictinfo,
                                                                           rel->baserestrictinfo);
+               /*
+                * Check for a "mergejoinable" clause even though it's not a join
+                * clause.  This is so that we can recognize that "a.x = a.y" makes
+                * x and y eligible to be considered equal, even when they belong
+                * to the same rel.  Without this, we would not recognize that
+                * "a.x = a.y AND a.x = b.z AND a.y = c.q" allows us to consider
+                * z and q equal after their rels are joined.
+                */
+               check_mergejoinable(restrictinfo);
         }
         else
         {
                 /*
                  * 'clause' is a join clause, since there is more than one atom in
                  * the relid list.  Set additional RestrictInfo fields for joining.
+                *
+                * We need the merge info whether or not mergejoin is enabled (for
+                * constructing equijoined-var lists), but we don't bother setting
+                * hash info if hashjoin is disabled.
                  */
-               set_restrictinfo_joininfo(restrictinfo);
+               check_mergejoinable(restrictinfo);
+               if (enable_hashjoin)
+                       check_hashjoinable(restrictinfo);
                 /*
                  * Add clause to the join lists of all the relevant
                  * relations.  (If, perchance, 'clause' contains NO vars, then
@@ -202,6 +219,15 @@ add_restrict_and_join_to_rel(Query *root, Node *clause)
                  */
                 add_vars_to_targetlist(root, vars);
         }
+
+       /*
+        * If the clause has a mergejoinable operator, then the two sides
+        * represent equivalent PathKeyItems for path keys: any path that is
+        * sorted by one side will also be sorted by the other (after joining,
+        * that is).  Record the key equivalence for future use.
+        */
+       if (restrictinfo->mergejoinoperator != InvalidOid)
+               add_equijoined_keys(root, restrictinfo);
  }
  
  /*
@@ -247,24 +273,10 @@ add_join_info_to_rels(Query *root, RestrictInfo *restrictinfo,
  
  /*****************************************************************************
   *
- *      JOININFO
+ *      CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES
   *
   *****************************************************************************/
  
-/*
- * set_restrictinfo_joininfo
- *       Examine a RestrictInfo that has been determined to be a join clause,
- *       and set the merge and hash info fields if it can be merge/hash joined.
- */
-static void
-set_restrictinfo_joininfo(RestrictInfo *restrictinfo)
-{
-       if (enable_mergejoin)
-               check_mergejoinable(restrictinfo);
-       if (enable_hashjoin)
-               check_hashjoinable(restrictinfo);
-}
-
  /*
   * check_mergejoinable
   *       If the restrictinfo's clause is mergejoinable, set the mergejoin
@@ -272,10 +284,7 @@ set_restrictinfo_joininfo(RestrictInfo *restrictinfo)
   *
   *       Currently, we support mergejoin for binary opclauses where
   *       both operands are simple Vars and the operator is a mergejoinable
- *       operator.  (Note: since we are only examining clauses that were
- *       classified as joins, it is certain that the two Vars belong to
- *       different relations... if we accepted more general clause structures
- *       we might need to check that the two sides refer to different rels...)
+ *       operator.
   */
  static void
  check_mergejoinable(RestrictInfo *restrictinfo)
@@ -320,10 +329,7 @@ check_mergejoinable(RestrictInfo *restrictinfo)
   *
   *       Currently, we support hashjoin for binary opclauses where
   *       both operands are simple Vars and the operator is a hashjoinable
- *       operator.  (Note: since we are only examining clauses that were
- *       classified as joins, it is certain that the two Vars belong to
- *       different relations... if we accepted more general clause structures
- *       we might need to check that the two sides refer to different rels...)
+ *       operator.
   */
  static void
  check_hashjoinable(RestrictInfo *restrictinfo)
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index a414a910fefe8e55fe5f3bee9e4cd5424f367307..cfa134a3889fea78419bfa7c0eab2baaee99ba48 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.51 2000/02/07 04:41:00 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.52 2000/02/15 20:49:18 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,6 +19,7 @@
  
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
  #include "optimizer/planmain.h"
  #include "optimizer/prep.h"
@@ -27,10 +28,11 @@
  #include "utils/lsyscache.h"
  
  
-static Plan *subplanner(Query *root, List *flat_tlist, List *qual);
+static Plan *subplanner(Query *root, List *flat_tlist, List *qual,
+                                               double tuple_fraction);
  
  
-/*
+/*--------------------
   * query_planner
   *       Routine to create a query plan.  It does so by first creating a
   *       subplan for the topmost level of attributes in the query.  Then,
@@ -41,25 +43,41 @@ static Plan *subplanner(Query *root, List *flat_tlist, List *qual);
   *       be placed where and any relation level qualifications to be
   *       satisfied.
   *
- *       tlist is the target list of the query (do NOT use root->targetList!)
- *       qual is the qualification of the query (likewise!)
+ * tlist is the target list of the query (do NOT use root->targetList!)
+ * qual is the qualification of the query (likewise!)
+ * tuple_fraction is the fraction of tuples we expect will be retrieved
+ *
+ * Note: the Query node now also includes a query_pathkeys field, which
+ * is both an input and an output of query_planner().  The input value
+ * signals query_planner that the indicated sort order is wanted in the
+ * final output plan.  The output value is the actual pathkeys of the
+ * selected path.  This might not be the same as what the caller requested;
+ * the caller must do pathkeys_contained_in() to decide whether an
+ * explicit sort is still needed.  (The main reason query_pathkeys is a
+ * Query field and not a passed parameter is that the low-level routines
+ * in indxpath.c need to see it.)  The pathkeys value passed to query_planner
+ * has not yet been "canonicalized", since the necessary info does not get
+ * computed until subplanner() scans the qual clauses.  We canonicalize it
+ * inside subplanner() as soon as that task is done.  The output value
+ * will be in canonical form as well.
   *
- *       Note: the Query node now also includes a query_pathkeys field, which
- *       is both an input and an output of query_planner().  The input value
- *       signals query_planner that the indicated sort order is wanted in the
- *       final output plan.  The output value is the actual pathkeys of the
- *       selected path.  This might not be the same as what the caller requested;
- *       the caller must do pathkeys_contained_in() to decide whether an
- *       explicit sort is still needed.  (The main reason query_pathkeys is a
- *       Query field and not a passed parameter is that the low-level routines
- *       in indxpath.c need to see it.)
+ * tuple_fraction is interpreted as follows:
+ *    0 (or less): expect all tuples to be retrieved (normal case)
+ *       0 < tuple_fraction < 1: expect the given fraction of tuples available
+ *             from the plan to be retrieved
+ *       tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
+ *             expected to be retrieved (ie, a LIMIT specification)
+ * Note that while this routine and its subroutines treat a negative
+ * tuple_fraction the same as 0, union_planner has a different interpretation.
   *
- *       Returns a query plan.
+ * Returns a query plan.
+ *--------------------
   */
  Plan *
  query_planner(Query *root,
                           List *tlist,
-                         List *qual)
+                         List *qual,
+                         double tuple_fraction)
  {
         List       *constant_qual = NIL;
         List       *var_only_tlist;
@@ -149,7 +167,7 @@ query_planner(Query *root,
         /*
          * Choose the best access path and build a plan for it.
          */
-       subplan = subplanner(root, var_only_tlist, qual);
+       subplan = subplanner(root, var_only_tlist, qual, tuple_fraction);
  
         /*
          * Build a result node to control the plan if we have constant quals.
@@ -192,33 +210,50 @@ query_planner(Query *root,
   *      Subplanner creates an entire plan consisting of joins and scans
   *      for processing a single level of attributes.
   *
- *      flat_tlist is the flattened target list
- *      qual is the qualification to be satisfied
+ * flat_tlist is the flattened target list
+ * qual is the qualification to be satisfied
+ * tuple_fraction is the fraction of tuples we expect will be retrieved
   *
- *      Returns a subplan.
+ * See query_planner() comments about the interpretation of tuple_fraction.
   *
+ * Returns a subplan.
   */
  static Plan *
  subplanner(Query *root,
                    List *flat_tlist,
-                  List *qual)
+                  List *qual,
+                  double tuple_fraction)
  {
         RelOptInfo *final_rel;
-       Cost            cheapest_cost;
-       Path       *sortedpath;
+       Path       *cheapestpath;
+       Path            sort_path;              /* dummy for result of cost_sort */
+       Path       *presortedpath;
  
         /*
          * Initialize the targetlist and qualification, adding entries to
          * base_rel_list as relation references are found (e.g., in the
-        * qualification, the targetlist, etc.)
+        * qualification, the targetlist, etc.).  Restrict and join clauses
+        * are added to appropriate lists belonging to the mentioned relations,
+        * and we also build lists of equijoined keys for pathkey construction.
          */
         root->base_rel_list = NIL;
         root->join_rel_list = NIL;
+       root->equi_key_list = NIL;
  
         make_var_only_tlist(root, flat_tlist);
         add_restrict_and_join_to_rels(root, qual);
         add_missing_rels_to_query(root);
  
+       /*
+        * We should now have all the pathkey equivalence sets built,
+        * so it's now possible to convert the requested query_pathkeys
+        * to canonical form.
+        */
+       root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
+
+       /*
+        * Ready to do the primary planning.
+        */
         final_rel = make_one_rel(root);
  
         if (! final_rel)
@@ -258,96 +293,81 @@ subplanner(Query *root,
                 foreach(pathnode, final_rel->pathlist)
                 {
                         if (xfunc_do_predmig((Path *) lfirst(pathnode)))
-                               set_cheapest(final_rel, final_rel->pathlist);
+                               set_cheapest(final_rel);
                 }
         }
  #endif
  
         /*
-        * Determine the cheapest path and create a subplan to execute it.
+        * Now that we have an estimate of the final rel's size, we can convert
+        * a tuple_fraction specified as an absolute count (ie, a LIMIT option)
+        * into a fraction of the total tuples.
+        */
+       if (tuple_fraction >= 1.0)
+               tuple_fraction /= final_rel->rows;
+
+       /*
+        * Determine the cheapest path, independently of any ordering
+        * considerations.  We do, however, take into account whether the
+        * whole plan is expected to be evaluated or not.
+        */
+       if (tuple_fraction <= 0.0 || tuple_fraction >= 1.0)
+               cheapestpath = final_rel->cheapest_total_path;
+       else
+               cheapestpath =
+                       get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
+                                                                                                         NIL,
+                                                                                                         tuple_fraction);
+
+       Assert(cheapestpath != NULL);
+
+       /*
+        * Select the best path and create a subplan to execute it.
          *
          * If no special sort order is wanted, or if the cheapest path is
-        * already appropriately ordered, just use the cheapest path.
+        * already appropriately ordered, we use the cheapest path found above.
          */
         if (root->query_pathkeys == NIL ||
                 pathkeys_contained_in(root->query_pathkeys,
-                                                         final_rel->cheapestpath->pathkeys))
+                                                         cheapestpath->pathkeys))
         {
-               root->query_pathkeys = final_rel->cheapestpath->pathkeys;
-               return create_plan(root, final_rel->cheapestpath);
+               root->query_pathkeys = cheapestpath->pathkeys;
+               return create_plan(root, cheapestpath);
         }
  
         /*
          * Otherwise, look to see if we have an already-ordered path that is
-        * cheaper than doing an explicit sort on cheapestpath.
+        * cheaper than doing an explicit sort on the cheapest-total-cost path.
          */
-       cheapest_cost = final_rel->cheapestpath->path_cost +
-               cost_sort(root->query_pathkeys, final_rel->rows, final_rel->width);
-
-       sortedpath = get_cheapest_path_for_pathkeys(final_rel->pathlist,
-                                                                                               root->query_pathkeys,
-                                                                                               false);
-       if (sortedpath)
+       cheapestpath = final_rel->cheapest_total_path;
+       cost_sort(&sort_path, root->query_pathkeys,
+                         final_rel->rows, final_rel->width);
+       sort_path.startup_cost += cheapestpath->total_cost;
+       sort_path.total_cost += cheapestpath->total_cost;
+
+       presortedpath =
+               get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
+                                                                                                 root->query_pathkeys,
+                                                                                                 tuple_fraction);
+       if (presortedpath)
         {
-               if (sortedpath->path_cost <= cheapest_cost)
+               if (compare_fractional_path_costs(presortedpath, &sort_path,
+                                                                                 tuple_fraction) <= 0)
                 {
                         /* Found a better presorted path, use it */
-                       root->query_pathkeys = sortedpath->pathkeys;
-                       return create_plan(root, sortedpath);
+                       root->query_pathkeys = presortedpath->pathkeys;
+                       return create_plan(root, presortedpath);
                 }
                 /* otherwise, doing it the hard way is still cheaper */
         }
-       else
-       {
-               /*
-                * If we found no usable presorted path at all, it is possible
-                * that the user asked for descending sort order.  Check to see
-                * if we can satisfy the pathkeys by using a backwards indexscan.
-                * To do this, we commute all the operators in the pathkeys and
-                * then look for a matching path that is an IndexPath.
-                */
-               List       *commuted_pathkeys = copyObject(root->query_pathkeys);
-
-               if (commute_pathkeys(commuted_pathkeys))
-               {
-                       /* pass 'true' to force only IndexPaths to be considered */
-                       sortedpath = get_cheapest_path_for_pathkeys(final_rel->pathlist,
-                                                                                                               commuted_pathkeys,
-                                                                                                               true);
-                       if (sortedpath && sortedpath->path_cost <= cheapest_cost)
-                       {
-                               /*
-                                * Kluge here: since IndexPath has no representation for
-                                * backwards scan, we have to convert to Plan format and
-                                * then poke the result.
-                                */
-                               Plan       *sortedplan = create_plan(root, sortedpath);
-                               List       *sortedpathkeys;
-
-                               Assert(IsA(sortedplan, IndexScan));
-                               ((IndexScan *) sortedplan)->indxorderdir = BackwardScanDirection;
-                               /*
-                                * Need to generate commuted keys representing the actual
-                                * sort order.  This should succeed, probably, but just in
-                                * case it does not, use the original root->query_pathkeys
-                                * as a conservative approximation.
-                                */
-                               sortedpathkeys = copyObject(sortedpath->pathkeys);
-                               if (commute_pathkeys(sortedpathkeys))
-                                       root->query_pathkeys = sortedpathkeys;
-
-                               return sortedplan;
-                       }
-               }
-       }
  
         /*
-        * Nothing for it but to sort the cheapestpath --- but we let the
-        * caller do that.  union_planner has to be able to add a sort node
+        * Nothing for it but to sort the cheapest-total-cost path --- but we let
+        * the caller do that.  union_planner has to be able to add a sort node
          * anyway, so no need for extra code here.  (Furthermore, the given
-        * pathkeys might involve something we can't compute here, such as
-        * an aggregate function...)
+        * pathkeys might involve something we can't compute here, such as an
+        * aggregate function...)
          */
-       root->query_pathkeys = final_rel->cheapestpath->pathkeys;
-       return create_plan(root, final_rel->cheapestpath);
+       root->query_pathkeys = cheapestpath->pathkeys;
+       return create_plan(root, cheapestpath);
  }
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 28483fd4734966caab4ec8ee3bbeadbe6a6d099c..cf400f8df1bb84b77bc176657f3c86f96e96f005 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.74 2000/01/27 18:11:31 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.75 2000/02/15 20:49:18 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -61,7 +61,7 @@ planner(Query *parse)
  
         transformKeySetQuery(parse);
  
-       result_plan = union_planner(parse);
+       result_plan = union_planner(parse, -1.0 /* default case */);
  
         Assert(PlannerQueryLevel == 1);
         if (PlannerPlanId > 0)
@@ -76,23 +76,39 @@ planner(Query *parse)
         return result_plan;
  }
  
-/*
+/*--------------------
   * union_planner
+ *       Invokes the planner on union-type queries (both regular UNIONs and
+ *       appends produced by inheritance), recursing if necessary to get them
+ *       all, then processes normal plans.
   *
- *       Invokes the planner on union queries if there are any left,
- *       recursing if necessary to get them all, then processes normal plans.
+ * parse is the querytree produced by the parser & rewriter.
+ * tuple_fraction is the fraction of tuples we expect will be retrieved
   *
- * Returns a query plan.
+ * tuple_fraction is interpreted as follows:
+ *    < 0: determine fraction by inspection of query (normal case)
+ *    0: expect all tuples to be retrieved
+ *       0 < tuple_fraction < 1: expect the given fraction of tuples available
+ *             from the plan to be retrieved
+ *       tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
+ *             expected to be retrieved (ie, a LIMIT specification)
+ * The normal case is to pass -1, but some callers pass values >= 0 to
+ * override this routine's determination of the appropriate fraction.
   *
+ * Returns a query plan.
+ *--------------------
   */
  Plan *
-union_planner(Query *parse)
+union_planner(Query *parse,
+                         double tuple_fraction)
  {
         List       *tlist = parse->targetList;
         List       *rangetable = parse->rtable;
         Plan       *result_plan = (Plan *) NULL;
         AttrNumber *groupColIdx = NULL;
         List       *current_pathkeys = NIL;
+       List       *group_pathkeys;
+       List       *sort_pathkeys;
         Index           rt_index;
  
         /*
@@ -139,6 +155,12 @@ union_planner(Query *parse)
                  * Actually, for a normal UNION we have done an explicit sort; ought
                  * to change interface to plan_union_queries to pass that info back!
                  */
+
+               /* Calculate pathkeys that represent grouping/ordering requirements */
+               group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause,
+                                                                                                          tlist);
+               sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
+                                                                                                         tlist);
         }
         else if ((rt_index = first_inherit_rt_entry(rangetable)) != -1)
         {
@@ -176,6 +198,12 @@ union_planner(Query *parse)
                  * We leave current_pathkeys NIL indicating we do not know sort order
                  * of the Append-ed results.
                  */
+
+               /* Calculate pathkeys that represent grouping/ordering requirements */
+               group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause,
+                                                                                                          tlist);
+               sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
+                                                                                                         tlist);
         }
         else
         {
@@ -229,32 +257,131 @@ union_planner(Query *parse)
                  */
                 sub_tlist = make_subplanTargetList(parse, tlist, &groupColIdx);
  
+               /* Calculate pathkeys that represent grouping/ordering requirements */
+               group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause,
+                                                                                                          tlist);
+               sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
+                                                                                                         tlist);
+
                 /*
                  * Figure out whether we need a sorted result from query_planner.
                  *
                  * If we have a GROUP BY clause, then we want a result sorted
                  * properly for grouping.  Otherwise, if there is an ORDER BY clause,
-                * we want to sort by the ORDER BY clause.
+                * we want to sort by the ORDER BY clause.  (Note: if we have both,
+                * and ORDER BY is a superset of GROUP BY, it would be tempting to
+                * request sort by ORDER BY --- but that might just leave us failing
+                * to exploit an available sort order at all.  Needs more thought...)
                  */
                 if (parse->groupClause)
+                       parse->query_pathkeys = group_pathkeys;
+               else if (parse->sortClause)
+                       parse->query_pathkeys = sort_pathkeys;
+               else
+                       parse->query_pathkeys = NIL;
+
+               /*
+                * Figure out whether we expect to retrieve all the tuples that the
+                * plan can generate, or to stop early due to a LIMIT or other
+                * factors.  If the caller passed a value >= 0, believe that value,
+                * else do our own examination of the query context.
+                */
+               if (tuple_fraction < 0.0)
                 {
-                       parse->query_pathkeys =
-                               make_pathkeys_for_sortclauses(parse->groupClause, tlist);
+                       /* Initial assumption is we need all the tuples */
+                       tuple_fraction = 0.0;
+                       /*
+                        * Check for a LIMIT.
+                        *
+                        * For now, we deliberately ignore the OFFSET clause, so that
+                        * queries with the same LIMIT and different OFFSETs will get
+                        * the same queryplan and therefore generate consistent results
+                        * (to the extent the planner can guarantee that, anyway).
+                        * XXX Perhaps it would be better to use the OFFSET too, and tell
+                        * users to specify ORDER BY if they want consistent results
+                        * across different LIMIT queries.
+                        */
+                       if (parse->limitCount != NULL)
+                       {
+                               if (IsA(parse->limitCount, Const))
+                               {
+                                       Const      *ccount = (Const *) parse->limitCount;
+                                       tuple_fraction = (double) ((int) (ccount->constvalue));
+                                       /* the constant can legally be either 0 ("ALL") or a
+                                        * positive integer; either is consistent with our
+                                        * conventions for tuple_fraction.
+                                        */
+                               }
+                               else
+                               {
+                                       /* It's a PARAM ... don't know exactly what the limit
+                                        * will be, but for lack of a better idea assume 10%
+                                        * of the plan's result is wanted.
+                                        */
+                                       tuple_fraction = 0.10;
+                               }
+                       }
+                       /*
+                        * Check for a retrieve-into-portal, ie DECLARE CURSOR.
+                        *
+                        * We have no real idea how many tuples the user will ultimately
+                        * FETCH from a cursor, but it seems a good bet that he doesn't
+                        * want 'em all.  Optimize for 10% retrieval (you gotta better
+                        * number?)
+                        */
+                       if (parse->isPortal)
+                               tuple_fraction = 0.10;
                 }
-               else if (parse->sortClause)
+               /*
+                * Adjust tuple_fraction if we see that we are going to apply
+                * grouping/aggregation/etc.  This is not overridable by the
+                * caller, since it reflects plan actions that this routine
+                * will certainly take, not assumptions about context.
+                */
+               if (parse->groupClause)
                 {
-                       parse->query_pathkeys =
-                               make_pathkeys_for_sortclauses(parse->sortClause, tlist);
+                       /*
+                        * In GROUP BY mode, we have the little problem that we don't
+                        * really know how many input tuples will be needed to make a
+                        * group, so we can't translate an output LIMIT count into an
+                        * input count.  For lack of a better idea, assume 10% of the
+                        * input data will be processed if there is any output limit.
+                        */
+                       if (tuple_fraction > 0.0)
+                               tuple_fraction = 0.10;
+                       /*
+                        * If both GROUP BY and ORDER BY are specified, we will need
+                        * two levels of sort --- and, therefore, certainly need to
+                        * read all the input tuples --- unless ORDER BY is a subset
+                        * of GROUP BY.  (Although we are comparing non-canonicalized
+                        * pathkeys here, it should be OK since they will both contain
+                        * only single-element sublists at this point.  See pathkeys.c.)
+                        */
+                       if (parse->groupClause && parse->sortClause &&
+                               ! pathkeys_contained_in(sort_pathkeys, group_pathkeys))
+                               tuple_fraction = 0.0;
                 }
-               else
+               else if (parse->hasAggs)
                 {
-                       parse->query_pathkeys = NIL;
+                       /* Ungrouped aggregate will certainly want all the input tuples. */
+                       tuple_fraction = 0.0;
+               }
+               else if (parse->distinctClause)
+               {
+                       /*
+                        * SELECT DISTINCT, like GROUP, will absorb an unpredictable
+                        * number of input tuples per output tuple.  So, fall back to
+                        * our same old 10% default...
+                        */
+                       if (tuple_fraction > 0.0)
+                               tuple_fraction = 0.10;
                 }
  
                 /* Generate the (sub) plan */
                 result_plan = query_planner(parse,
                                                                         sub_tlist,
-                                                                       (List *) parse->qual);
+                                                                       (List *) parse->qual,
+                                                                       tuple_fraction);
  
                 /* query_planner returns actual sort order (which is not
                  * necessarily what we requested) in query_pathkeys.
@@ -266,6 +393,13 @@ union_planner(Query *parse)
         if (! result_plan)
                 elog(ERROR, "union_planner: failed to create plan");
  
+       /*
+        * We couldn't canonicalize group_pathkeys and sort_pathkeys before
+        * running query_planner(), so do it now.
+        */
+       group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys);
+       sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys);
+
         /*
          * If we have a GROUP BY clause, insert a group node (plus the
          * appropriate sort node, if necessary).
@@ -274,7 +408,6 @@ union_planner(Query *parse)
         {
                 bool            tuplePerGroup;
                 List       *group_tlist;
-               List       *group_pathkeys;
                 bool            is_sorted;
  
                 /*
@@ -300,8 +433,6 @@ union_planner(Query *parse)
                  * Figure out whether the path result is already ordered the way we
                  * need it --- if so, no need for an explicit sort step.
                  */
-               group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause,
-                                                                                                          tlist);
                 if (pathkeys_contained_in(group_pathkeys, current_pathkeys))
                 {
                         is_sorted = true;       /* no sort needed now */
@@ -352,15 +483,15 @@ union_planner(Query *parse)
         }
  
         /*
-        * If aggregate is present, insert the agg node
+        * If aggregate is present, insert the Agg node
+        *
+        * HAVING clause, if any, becomes qual of the Agg node
          */
         if (parse->hasAggs)
         {
-               result_plan = (Plan *) make_agg(tlist, result_plan);
-
-               /* HAVING clause, if any, becomes qual of the Agg node */
-               result_plan->qual = (List *) parse->havingQual;
-
+               result_plan = (Plan *) make_agg(tlist,
+                                                                               (List *) parse->havingQual,
+                                                                               result_plan);
                 /* Note: Agg does not affect any existing sort order of the tuples */
         }
  
@@ -370,10 +501,6 @@ union_planner(Query *parse)
          */
         if (parse->sortClause)
         {
-               List       *sort_pathkeys;
-
-               sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
-                                                                                                         tlist);
                 if (! pathkeys_contained_in(sort_pathkeys, current_pathkeys))
                 {
                         result_plan = make_sortplan(tlist, parse->sortClause, result_plan);
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c

index da95a2df41f6796f739848cbccf60a5e858b3d05..2790b2740b69ba1f5e0280cba6e4bb375f25db61 100644 (file)
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.27 2000/01/26 05:56:38 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.28 2000/02/15 20:49:18 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -123,6 +123,7 @@ static Node *
  make_subplan(SubLink *slink)
  {
         SubPlan    *node = makeNode(SubPlan);
+       double          tuple_fraction;
         Plan       *plan;
         List       *lst;
         Node       *result;
@@ -132,7 +133,26 @@ make_subplan(SubLink *slink)
  
         PlannerQueryLevel++;            /* we becomes child */
  
-       node->plan = plan = union_planner((Query *) slink->subselect);
+       /*
+        * For an EXISTS subplan, tell lower-level planner to expect that
+        * only the first tuple will be retrieved.  For ALL, ANY, and MULTIEXPR
+        * subplans, we will be able to stop evaluating if the test condition
+        * fails, so very often not all the tuples will be retrieved; for lack
+        * of a better idea, specify 50% retrieval.  For EXPR_SUBLINK use default
+        * behavior.
+        *
+        * NOTE: if you change these numbers, also change cost_qual_eval_walker
+        * in costsize.c.
+        */
+       if (slink->subLinkType == EXISTS_SUBLINK)
+               tuple_fraction = 1.0;   /* just like a LIMIT 1 */
+       else if (slink->subLinkType == EXPR_SUBLINK)
+               tuple_fraction = -1.0;  /* default behavior */
+       else
+               tuple_fraction = 0.5;   /* 50% */
+
+       node->plan = plan = union_planner((Query *) slink->subselect,
+                                                                         tuple_fraction);
  
         /*
          * Assign subPlan, extParam and locParam to plan nodes. At the moment,
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 9a86cb23488dd051012213c8e522bdee6a70324b..10a48c666e66ca7645bab4e5404237901dacec59 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.44 2000/02/15 03:37:26 thomas Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.45 2000/02/15 20:49:19 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -122,28 +122,35 @@ plan_union_queries(Query *parse)
         /* Is this a simple one */
         if (!union_all_found ||
                 !union_found ||
-       /* A trailing UNION negates the affect of earlier UNION ALLs */
+               /* A trailing UNION negates the effect of earlier UNION ALLs */
                 !last_union_all_flag)
         {
                 List       *hold_unionClause = parse->unionClause;
+               double          tuple_fraction = -1.0; /* default processing */
  
-               /* we will do this later, so don't do it now */
+               /* we will do sorting later, so don't do it now */
                 if (!union_all_found ||
                         !last_union_all_flag)
                 {
                         parse->sortClause = NIL;
                         parse->distinctClause = NIL;
+                       /*
+                        * force lower-level planning to assume that all tuples will
+                        * be retrieved, even if it sees a LIMIT in the query node.
+                        */
+                       tuple_fraction = 0.0;
                 }
  
                 parse->unionClause = NIL;               /* prevent recursion */
-               union_plans = lcons(union_planner(parse), NIL);
+               union_plans = lcons(union_planner(parse, tuple_fraction), NIL);
                 union_rts = lcons(parse->rtable, NIL);
  
                 foreach(ulist, hold_unionClause)
                 {
                         Query      *union_query = lfirst(ulist);
  
-                       union_plans = lappend(union_plans, union_planner(union_query));
+                       union_plans = lappend(union_plans,
+                                                                 union_planner(union_query, tuple_fraction));
                         union_rts = lappend(union_rts, union_query->rtable);
                 }
         }
@@ -165,9 +172,12 @@ plan_union_queries(Query *parse)
  
                 /*
                  * Recursion, but UNION only. The last one is a UNION, so it will
-                * not come here in recursion,
+                * not come here in recursion.
+                *
+                * XXX is it OK to pass default -1 to union_planner in this path,
+                * or should we force a tuple_fraction value?
                  */
-               union_plans = lcons(union_planner(parse), NIL);
+               union_plans = lcons(union_planner(parse, -1.0), NIL);
                 union_rts = lcons(parse->rtable, NIL);
  
                 /* Append the remaining UNION ALLs */
@@ -175,7 +185,8 @@ plan_union_queries(Query *parse)
                 {
                         Query      *union_all_query = lfirst(ulist);
  
-                       union_plans = lappend(union_plans, union_planner(union_all_query));
+                       union_plans = lappend(union_plans,
+                                                                 union_planner(union_all_query, -1.0));
                         union_rts = lappend(union_rts, union_all_query->rtable);
                 }
         }
@@ -295,6 +306,7 @@ plan_inherit_query(Relids relids,
         List       *union_plans = NIL;
         List       *union_rtentries = NIL;
         List       *save_tlist = root->targetList;
+       double tuple_fraction;
         List       *i;
  
         /*
@@ -303,6 +315,17 @@ plan_inherit_query(Relids relids,
          */
         root->targetList = NIL;
  
+       /*
+        * If we are going to need sorting or grouping at the top level,
+        * force lower-level planners to assume that all tuples will be
+        * retrieved.
+        */
+       if (root->distinctClause || root->sortClause ||
+               root->groupClause || root->hasAggs)
+               tuple_fraction = 0.0; /* will need all tuples from each subplan */
+       else
+               tuple_fraction = -1.0; /* default behavior is OK (I think) */
+
         foreach(i, relids)
         {
                 int                     relid = lfirsti(i);
@@ -344,7 +367,8 @@ plan_inherit_query(Relids relids,
                                                           relid,
                                                           new_root);
  
-               union_plans = lappend(union_plans, union_planner(new_root));
+               union_plans = lappend(union_plans,
+                                                         union_planner(new_root, tuple_fraction));
                 union_rtentries = lappend(union_rtentries, new_rt_entry);
         }
  
@@ -551,14 +575,17 @@ make_append(List *appendplans,
         node->unionrtables = unionrtables;
         node->inheritrelid = rt_index;
         node->inheritrtable = inheritrtable;
-       node->plan.cost = 0;
+       node->plan.startup_cost = 0;
+       node->plan.total_cost = 0;
         node->plan.plan_rows = 0;
         node->plan.plan_width = 0;
         foreach(subnode, appendplans)
         {
                 Plan   *subplan = (Plan *) lfirst(subnode);
  
-               node->plan.cost += subplan->cost;
+               if (subnode == appendplans)     /* first node? */
+                       node->plan.startup_cost = subplan->startup_cost;
+               node->plan.total_cost += subplan->total_cost;
                 node->plan.plan_rows += subplan->plan_rows;
                 if (node->plan.plan_width < subplan->plan_width)
                         node->plan.plan_width = subplan->plan_width;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index 7c3c20b855f0327ae097eedb25f878f64bd0d72a..ba991388de0962d7a234c2d3876e90fde93a6e62 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.59 2000/02/07 04:41:01 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.60 2000/02/15 20:49:20 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -29,67 +29,122 @@
   *****************************************************************************/
  
  /*
- * path_is_cheaper
- *       Returns t iff 'path1' is cheaper than 'path2'.
+ * compare_path_costs
+ *       Return -1, 0, or +1 according as path1 is cheaper, the same cost,
+ *       or more expensive than path2 for the specified criterion.
+ */
+int
+compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
+{
+       if (criterion == STARTUP_COST)
+       {
+               if (path1->startup_cost < path2->startup_cost)
+                       return -1;
+               if (path1->startup_cost > path2->startup_cost)
+                       return +1;
+               /*
+                * If paths have the same startup cost (not at all unlikely),
+                * order them by total cost.
+                */
+               if (path1->total_cost < path2->total_cost)
+                       return -1;
+               if (path1->total_cost > path2->total_cost)
+                       return +1;
+       }
+       else
+       {
+               if (path1->total_cost < path2->total_cost)
+                       return -1;
+               if (path1->total_cost > path2->total_cost)
+                       return +1;
+               /*
+                * If paths have the same total cost, order them by startup cost.
+                */
+               if (path1->startup_cost < path2->startup_cost)
+                       return -1;
+               if (path1->startup_cost > path2->startup_cost)
+                       return +1;
+       }
+       return 0;
+}
+
+/*
+ * compare_path_fractional_costs
+ *       Return -1, 0, or +1 according as path1 is cheaper, the same cost,
+ *       or more expensive than path2 for fetching the specified fraction
+ *       of the total tuples.
   *
+ * If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the
+ * path with the cheaper total_cost.
   */
-bool
-path_is_cheaper(Path *path1, Path *path2)
+int
+compare_fractional_path_costs(Path *path1, Path *path2,
+                                                         double fraction)
  {
-       return (bool) (path1->path_cost < path2->path_cost);
+       Cost            cost1,
+                               cost2;
+
+       if (fraction <= 0.0 || fraction >= 1.0)
+               return compare_path_costs(path1, path2, TOTAL_COST);
+       cost1 = path1->startup_cost +
+               fraction * (path1->total_cost - path1->startup_cost);
+       cost2 = path2->startup_cost +
+               fraction * (path2->total_cost - path2->startup_cost);
+       if (cost1 < cost2)
+               return -1;
+       if (cost1 > cost2)
+               return +1;
+       return 0;
  }
  
  /*
   * set_cheapest
- *       Finds the minimum cost path from among a relation's paths.
+ *       Find the minimum-cost paths from among a relation's paths,
+ *       and save them in the rel's cheapest-path fields.
   *
- * 'parent_rel' is the parent relation
- * 'pathlist' is a list of path nodes corresponding to 'parent_rel'
- *
- * Returns and sets the relation entry field with the pathnode that
- * is minimum.
+ * This is normally called only after we've finished constructing the path
+ * list for the rel node.
   *
+ * If we find two paths of identical costs, try to keep the better-sorted one.
+ * The paths might have unrelated sort orderings, in which case we can only
+ * guess which might be better to keep, but if one is superior then we
+ * definitely should keep it.
   */
-Path *
-set_cheapest(RelOptInfo *parent_rel, List *pathlist)
+void
+set_cheapest(RelOptInfo *parent_rel)
  {
+       List       *pathlist = parent_rel->pathlist;
         List       *p;
-       Path       *cheapest_so_far;
+       Path       *cheapest_startup_path;
+       Path       *cheapest_total_path;
  
         Assert(IsA(parent_rel, RelOptInfo));
         Assert(pathlist != NIL);
  
-       cheapest_so_far = (Path *) lfirst(pathlist);
+       cheapest_startup_path = cheapest_total_path = (Path *) lfirst(pathlist);
  
         foreach(p, lnext(pathlist))
         {
                 Path       *path = (Path *) lfirst(p);
-
-               if (path_is_cheaper(path, cheapest_so_far))
-                       cheapest_so_far = path;
+               int                     cmp;
+
+               cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
+               if (cmp > 0 ||
+                       (cmp == 0 &&
+                        compare_pathkeys(cheapest_startup_path->pathkeys,
+                                                         path->pathkeys) == PATHKEYS_BETTER2))
+                       cheapest_startup_path = path;
+
+               cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
+               if (cmp > 0 ||
+                       (cmp == 0 &&
+                        compare_pathkeys(cheapest_total_path->pathkeys,
+                                                         path->pathkeys) == PATHKEYS_BETTER2))
+                       cheapest_total_path = path;
         }
  
-       parent_rel->cheapestpath = cheapest_so_far;
-
-       return cheapest_so_far;
-}
-
-/*
- * add_pathlist
- *       Consider each path given in new_paths, and add it to the parent rel's
- *       pathlist if it seems worthy.
- */
-void
-add_pathlist(RelOptInfo *parent_rel, List *new_paths)
-{
-       List       *p1;
-
-       foreach(p1, new_paths)
-       {
-               Path       *new_path = (Path *) lfirst(p1);
-
-               add_path(parent_rel, new_path);
-       }
+       parent_rel->cheapest_startup_path = cheapest_startup_path;
+       parent_rel->cheapest_total_path = cheapest_total_path;
  }
  
  /*
@@ -97,12 +152,18 @@ add_pathlist(RelOptInfo *parent_rel, List *new_paths)
   *       Consider a potential implementation path for the specified parent rel,
   *       and add it to the rel's pathlist if it is worthy of consideration.
   *       A path is worthy if it has either a better sort order (better pathkeys)
- *       or cheaper cost than any of the existing old paths.
+ *       or cheaper cost (on either dimension) than any of the existing old paths.
   *
   *       Unless parent_rel->pruneable is false, we also remove from the rel's
   *       pathlist any old paths that are dominated by new_path --- that is,
   *       new_path is both cheaper and at least as well ordered.
   *
+ *       NOTE: discarded Path objects are immediately pfree'd to reduce planner
+ *       memory consumption.  We dare not try to free the substructure of a Path,
+ *       since much of it may be shared with other Paths or the query tree itself;
+ *       but just recycling discarded Path nodes is a very useful savings in
+ *       a large join tree.
+ *
   * 'parent_rel' is the relation entry to which the path corresponds.
   * 'new_path' is a potential path for parent_rel.
   *
@@ -124,26 +185,40 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
         {
                 Path       *old_path = (Path *) lfirst(p1);
                 bool            remove_old = false;     /* unless new proves superior */
+               int                     costcmp;
  
-               switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
+               costcmp = compare_path_costs(new_path, old_path, TOTAL_COST);
+               /*
+                * If the two paths compare differently for startup and total cost,
+                * then we want to keep both, and we can skip the (much slower)
+                * comparison of pathkeys.  If they compare the same, proceed with
+                * the pathkeys comparison.  Note this test relies on the fact that
+                * compare_path_costs will only return 0 if both costs are equal
+                * (and, therefore, there's no need to call it twice in that case).
+                */
+               if (costcmp == 0 ||
+                       costcmp == compare_path_costs(new_path, old_path, STARTUP_COST))
                 {
-                       case PATHKEYS_EQUAL:
-                               if (new_path->path_cost < old_path->path_cost)
-                                       remove_old = true; /* new dominates old */
-                               else
-                                       accept_new = false;     /* old equals or dominates new */
-                               break;
-                       case PATHKEYS_BETTER1:
-                               if (new_path->path_cost <= old_path->path_cost)
-                                       remove_old = true; /* new dominates old */
-                               break;
-                       case PATHKEYS_BETTER2:
-                               if (new_path->path_cost >= old_path->path_cost)
-                                       accept_new = false;     /* old dominates new */
-                               break;
-                       case PATHKEYS_DIFFERENT:
-                               /* keep both paths, since they have different ordering */
-                               break;
+                       switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
+                       {
+                               case PATHKEYS_EQUAL:
+                                       if (costcmp < 0)
+                                               remove_old = true; /* new dominates old */
+                                       else
+                                               accept_new = false;     /* old equals or dominates new */
+                                       break;
+                               case PATHKEYS_BETTER1:
+                                       if (costcmp <= 0)
+                                               remove_old = true; /* new dominates old */
+                                       break;
+                               case PATHKEYS_BETTER2:
+                                       if (costcmp >= 0)
+                                               accept_new = false;     /* old dominates new */
+                                       break;
+                               case PATHKEYS_DIFFERENT:
+                                       /* keep both paths, since they have different ordering */
+                                       break;
+                       }
                 }
  
                 /*
@@ -156,6 +231,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                                 lnext(p1_prev) = lnext(p1);
                         else
                                 parent_rel->pathlist = lnext(p1);
+                       pfree(old_path);
                 }
                 else
                         p1_prev = p1;
@@ -174,6 +250,11 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                 /* Accept the path */
                 parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
         }
+       else
+       {
+               /* Reject and recycle the path */
+               pfree(new_path);
+       }
  }
  
  
@@ -195,7 +276,8 @@ create_seqscan_path(RelOptInfo *rel)
         pathnode->pathtype = T_SeqScan;
         pathnode->parent = rel;
         pathnode->pathkeys = NIL;       /* seqscan has unordered result */
-       pathnode->path_cost = cost_seqscan(rel);
+
+       cost_seqscan(pathnode, rel);
  
         return pathnode;
  }
@@ -208,6 +290,10 @@ create_seqscan_path(RelOptInfo *rel)
   * 'index' is an index on 'rel'
   * 'restriction_clauses' is a list of RestrictInfo nodes
   *                     to be used as index qual conditions in the scan.
+ * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
+ *                     if the caller expects a specific scan direction,
+ *                     or NoMovementScanDirection if the caller is willing to accept
+ *                     an unordered index.
   *
   * Returns the new path node.
   */
@@ -215,14 +301,31 @@ IndexPath  *
  create_index_path(Query *root,
                                   RelOptInfo *rel,
                                   IndexOptInfo *index,
-                                 List *restriction_clauses)
+                                 List *restriction_clauses,
+                                 ScanDirection indexscandir)
  {
         IndexPath  *pathnode = makeNode(IndexPath);
         List       *indexquals;
  
         pathnode->path.pathtype = T_IndexScan;
         pathnode->path.parent = rel;
-       pathnode->path.pathkeys = build_index_pathkeys(root, rel, index);
+
+       pathnode->path.pathkeys = build_index_pathkeys(root, rel, index,
+                                                                                                  indexscandir);
+       if (pathnode->path.pathkeys == NIL)
+       {
+               /* No ordering available from index, is that OK? */
+               if (! ScanDirectionIsNoMovement(indexscandir))
+                       elog(ERROR, "create_index_path: failed to create ordered index scan");
+       }
+       else
+       {
+               /* The index is ordered, and build_index_pathkeys defaulted to
+                * forward scan, so make sure we mark the pathnode properly.
+                */
+               if (ScanDirectionIsNoMovement(indexscandir))
+                       indexscandir = ForwardScanDirection;
+       }
  
         indexquals = get_actual_clauses(restriction_clauses);
         /* expand special operators to indexquals the executor can handle */
@@ -234,10 +337,10 @@ create_index_path(Query *root,
          */
         pathnode->indexid = lconsi(index->indexoid, NIL);
         pathnode->indexqual = lcons(indexquals, NIL);
+       pathnode->indexscandir = indexscandir;
         pathnode->joinrelids = NIL;     /* no join clauses here */
  
-       pathnode->path.path_cost = cost_index(root, rel, index, indexquals,
-                                                                                 false);
+       cost_index(&pathnode->path, root, rel, index, indexquals, false);
  
         return pathnode;
  }
@@ -256,13 +359,14 @@ create_tidscan_path(RelOptInfo *rel, List *tideval)
         pathnode->path.pathtype = T_TidScan;
         pathnode->path.parent = rel;
         pathnode->path.pathkeys = NIL;
-       pathnode->path.path_cost = cost_tidscan(rel, tideval);
-       /* divide selectivity for each clause to get an equal selectivity
-        * as IndexScan does OK ? 
-       */
         pathnode->tideval = copyObject(tideval); /* is copy really necessary? */
         pathnode->unjoined_relids = NIL;
  
+       cost_tidscan(&pathnode->path, rel, tideval);
+       /* divide selectivity for each clause to get an equal selectivity
+        * as IndexScan does OK ? 
+        */
+
         return pathnode;
  }
  
@@ -296,9 +400,8 @@ create_nestloop_path(RelOptInfo *joinrel,
         pathnode->joinrestrictinfo = restrict_clauses;
         pathnode->path.pathkeys = pathkeys;
  
-       pathnode->path.path_cost = cost_nestloop(outer_path,
-                                                                                        inner_path,
-                                                                                        IsA(inner_path, IndexPath));
+       cost_nestloop(&pathnode->path, outer_path, inner_path,
+                                 restrict_clauses, IsA(inner_path, IndexPath));
  
         return pathnode;
  }
@@ -350,10 +453,13 @@ create_mergejoin_path(RelOptInfo *joinrel,
         pathnode->path_mergeclauses = mergeclauses;
         pathnode->outersortkeys = outersortkeys;
         pathnode->innersortkeys = innersortkeys;
-       pathnode->jpath.path.path_cost = cost_mergejoin(outer_path,
-                                                                                                       inner_path,
-                                                                                                       outersortkeys,
-                                                                                                       innersortkeys);
+
+       cost_mergejoin(&pathnode->jpath.path,
+                                  outer_path,
+                                  inner_path,
+                                  restrict_clauses,
+                                  outersortkeys,
+                                  innersortkeys);
  
         return pathnode;
  }
@@ -388,9 +494,12 @@ create_hashjoin_path(RelOptInfo *joinrel,
         /* A hashjoin never has pathkeys, since its ordering is unpredictable */
         pathnode->jpath.path.pathkeys = NIL;
         pathnode->path_hashclauses = hashclauses;
-       pathnode->jpath.path.path_cost = cost_hashjoin(outer_path,
-                                                                                                  inner_path,
-                                                                                                  innerdisbursion);
+
+       cost_hashjoin(&pathnode->jpath.path,
+                                 outer_path,
+                                 inner_path,
+                                 restrict_clauses,
+                                 innerdisbursion);
  
         return pathnode;
  }
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c

index d81eebfbb1317113fd58dc293302a47e4ae4be76..8663cdb0241de0d4aaa475cbfca59a20b9514ee1 100644 (file)
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.46 2000/01/26 05:56:40 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.47 2000/02/15 20:49:20 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -118,6 +118,7 @@ find_secondary_indexes(Query *root, Index relid)
                 }
                 else
                         info->indpred = NIL;
+               info->lossy = index->indislossy;
  
                 for (i = 0; i < INDEX_MAX_KEYS; i++)
                         info->indexkeys[i] = index->indkey[i];
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c

index d22daa0f638c7677abe51fa0ffebd12c69c5fe4a..f11dd60d243f3fe7f1a10a78eac2b4156a9bf7bc 100644 (file)
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.23 2000/02/07 04:41:02 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.24 2000/02/15 20:49:21 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -62,12 +62,14 @@ get_base_rel(Query *root, int relid)
         rel->width = 0;
         rel->targetlist = NIL;
         rel->pathlist = NIL;
-       rel->cheapestpath = (Path *) NULL;
+       rel->cheapest_startup_path = NULL;
+       rel->cheapest_total_path = NULL;
         rel->pruneable = true;
         rel->indexed = false;
         rel->pages = 0;
         rel->tuples = 0;
         rel->baserestrictinfo = NIL;
+       rel->baserestrictcost = 0;
         rel->joininfo = NIL;
         rel->innerjoin = NIL;
  
@@ -180,12 +182,14 @@ get_join_rel(Query *root,
         joinrel->width = 0;
         joinrel->targetlist = NIL;
         joinrel->pathlist = NIL;
-       joinrel->cheapestpath = (Path *) NULL;
+       joinrel->cheapest_startup_path = NULL;
+       joinrel->cheapest_total_path = NULL;
         joinrel->pruneable = true;
         joinrel->indexed = false;
         joinrel->pages = 0;
         joinrel->tuples = 0;
         joinrel->baserestrictinfo = NIL;
+       joinrel->baserestrictcost = 0;
         joinrel->joininfo = NIL;
         joinrel->innerjoin = NIL;
  
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index ab41413432c5d6d5481cf0799ee60fc0ccc3d7fa..30106744ded9f3e091e5dbbb5260eb3b62f69d3a 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.54 2000/01/26 05:57:14 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.55 2000/02/15 20:49:21 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -756,7 +756,9 @@ getattstatistics(Oid relid,
  static void
  genericcostestimate(Query *root, RelOptInfo *rel,
                                         IndexOptInfo *index, List *indexQuals,
-                                       Cost *indexAccessCost, Selectivity *indexSelectivity)
+                                       Cost *indexStartupCost,
+                                       Cost *indexTotalCost,
+                                       Selectivity *indexSelectivity)
  {
         double numIndexTuples;
         double numIndexPages;
@@ -771,8 +773,17 @@ genericcostestimate(Query *root, RelOptInfo *rel,
         /* Estimate the number of index pages that will be retrieved */
         numIndexPages = *indexSelectivity * index->pages;
  
-       /* Compute the index access cost */
-    *indexAccessCost = numIndexPages + cpu_index_page_weight * numIndexTuples;
+       /*
+        * Compute the index access cost.
+        *
+        * Our generic assumption is that the index pages will be read
+        * sequentially, so they have cost 1.0 each, not random_page_cost.
+        * Also, we charge for evaluation of the indexquals at each index tuple.
+     * All the costs are assumed to be paid incrementally during the scan.
+     */
+    *indexStartupCost = 0;
+    *indexTotalCost = numIndexPages +
+               (cpu_index_tuple_cost + cost_qual_eval(indexQuals)) * numIndexTuples;
  }
  
  /*
@@ -782,35 +793,43 @@ genericcostestimate(Query *root, RelOptInfo *rel,
  void
  btcostestimate(Query *root, RelOptInfo *rel,
                            IndexOptInfo *index, List *indexQuals,
-                          Cost *indexAccessCost, Selectivity *indexSelectivity)
+                          Cost *indexStartupCost,
+                          Cost *indexTotalCost,
+                          Selectivity *indexSelectivity)
  {
         genericcostestimate(root, rel, index, indexQuals,
-                                               indexAccessCost, indexSelectivity);
+                                               indexStartupCost, indexTotalCost, indexSelectivity);
  }
  
  void
  rtcostestimate(Query *root, RelOptInfo *rel,
                            IndexOptInfo *index, List *indexQuals,
-                          Cost *indexAccessCost, Selectivity *indexSelectivity)
+                          Cost *indexStartupCost,
+                          Cost *indexTotalCost,
+                          Selectivity *indexSelectivity)
  {
         genericcostestimate(root, rel, index, indexQuals,
-                                               indexAccessCost, indexSelectivity);
+                                               indexStartupCost, indexTotalCost, indexSelectivity);
  }
  
  void
  hashcostestimate(Query *root, RelOptInfo *rel,
                                  IndexOptInfo *index, List *indexQuals,
-                                Cost *indexAccessCost, Selectivity *indexSelectivity)
+                                Cost *indexStartupCost,
+                                Cost *indexTotalCost,
+                                Selectivity *indexSelectivity)
  {
         genericcostestimate(root, rel, index, indexQuals,
-                                               indexAccessCost, indexSelectivity);
+                                               indexStartupCost, indexTotalCost, indexSelectivity);
  }
  
  void
  gistcostestimate(Query *root, RelOptInfo *rel,
                                  IndexOptInfo *index, List *indexQuals,
-                                Cost *indexAccessCost, Selectivity *indexSelectivity)
+                                Cost *indexStartupCost,
+                                Cost *indexTotalCost,
+                                Selectivity *indexSelectivity)
  {
         genericcostestimate(root, rel, index, indexQuals,
-                                               indexAccessCost, indexSelectivity);
+                                               indexStartupCost, indexTotalCost, indexSelectivity);
  }
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c

index 50a15fc7d0b721ad17d92505b5b3482a2fdce90a..8f8c2fad8c7dd0c94a1cb85e02c3ce529727d9f3 100644 (file)
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -3,7 +3,7 @@
   *
   * Copyright 2000 by PostgreSQL Global Development Group
   *
- * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.10 2000/02/07 23:10:07 petere Exp $
+ * $Header: /cvsroot/pgsql/src/bin/psql/tab-complete.c,v 1.11 2000/02/15 20:49:22 tgl Exp $
   */
  
  /*-----------
@@ -172,8 +172,30 @@ char ** psql_completion(char *text, int start, int end)
      };
  
      static char * pgsql_variables[] = {
-        "Client_Encoding", "Names", "DateStyle", "Server_Encoding", "TimeZone",
-        "TRANSACTION", "Cost_Heap", "Cost_Index", "GEQO", "KSQO", "Query_Limit",
+               /* these SET arguments are known in gram.y */
+               "TRANSACTION ISOLATION LEVEL",
+               "NAMES",
+               /* rest should match table in src/backend/commands/variable.c */
+               "DateStyle",
+               "TimeZone",
+               "effective_cache_size",
+               "random_page_cost",
+               "cpu_tuple_cost",
+               "cpu_index_tuple_cost",
+               "cpu_operator_cost",
+               "enable_seqscan",
+               "enable_indexscan",
+               "enable_tidscan",
+               "enable_sort",
+               "enable_nestloop",
+               "enable_mergejoin",
+               "enable_hashjoin",
+               "GEQO",
+               "client_encoding",
+               "server_encoding",
+               "KSQO",
+               "XactIsoLevel",
+               "PG_Options",
          NULL
      };
  
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index d6d8ff6ffba407164c55fa4f40c4ec0e8dcedd39..bf93830ca21ccd3ca45352e8815fa2f7fd7a2640 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: catversion.h,v 1.13 2000/01/27 18:11:40 tgl Exp $
+ * $Id: catversion.h,v 1.14 2000/02/15 20:49:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,6 +53,6 @@
   */
  
  /*                          yyyymmddN */
-#define CATALOG_VERSION_NO  200001271
+#define CATALOG_VERSION_NO  200002151
  
  #endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index 064232d62ccaaae15666415d52ab2bc1b10ac1bc..87107b1df367553325787ed1c12876d32a2f74ae 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pg_proc.h,v 1.120 2000/02/10 19:51:46 momjian Exp $
+ * $Id: pg_proc.h,v 1.121 2000/02/15 20:49:23 tgl Exp $
   *
   * NOTES
   *       The script catalog/genbki.sh reads this file and generates .bki
@@ -212,9 +212,9 @@ DESCR("not equal");
  DATA(insert OID =  89 (  version                  PGUID 11 f t f 0 f 25 "" 100 0 0 100 version - ));
  DESCR("PostgreSQL version string");
  
-DATA(insert OID = 1265 (  rtcostestimate   PGUID 11 f t f 6 f 0 "0 0 0 0 0 0" 100 0 0 100  rtcostestimate - ));
+DATA(insert OID = 1265 (  rtcostestimate   PGUID 11 f t f 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100  rtcostestimate - ));
  DESCR("r-tree cost estimator");
-DATA(insert OID = 1268 (  btcostestimate   PGUID 11 f t f 6 f 0 "0 0 0 0 0 0" 100 0 0 100  btcostestimate - ));
+DATA(insert OID = 1268 (  btcostestimate   PGUID 11 f t f 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100  btcostestimate - ));
  DESCR("btree cost estimator");
  
  /* OIDS 100 - 199 */
@@ -796,7 +796,7 @@ DESCR("convert name to char()");
  DATA(insert OID =  409 (  bpchar_name     PGUID 11 f t t 1 f   19 "1042" 100 0 0 100   bpchar_name - ));
  DESCR("convert char() to name");
  
-DATA(insert OID =  438 (  hashcostestimate PGUID 11 f t f 6 f 0 "0 0 0 0 0 0" 100 0 0 100  hashcostestimate - ));
+DATA(insert OID =  438 (  hashcostestimate PGUID 11 f t f 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100  hashcostestimate - ));
  DESCR("hash index cost estimator");
  
  DATA(insert OID = 440 (  hashgettuple     PGUID 11 f t f 2 f 23 "0" 100 0 0 100  hashgettuple - ));
@@ -1031,7 +1031,7 @@ DESCR("larger of two");
  DATA(insert OID = 771 (  int2smaller      PGUID 11 f t t 2 f 21 "21 21" 100 0 0 100  int2smaller - ));
  DESCR("smaller of two");
  
-DATA(insert OID = 772 (  gistcostestimate  PGUID 11 f t f 6 f 0 "0 0 0 0 0 0" 100 0 0 100  gistcostestimate - ));
+DATA(insert OID = 772 (  gistcostestimate  PGUID 11 f t f 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100  gistcostestimate - ));
  DESCR("gist cost estimator");
  DATA(insert OID = 774 (  gistgettuple     PGUID 11 f t f 2 f 23 "0" 100 0 0 100  gistgettuple - ));
  DESCR("gist(internal)");
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h

index c1d6c4c1ca5f427f803564de7fd2927f48bcfe5d..161b53c25af97db7ad2d96d18aaccde3f7c75f94 100644 (file)
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodes.h,v 1.63 2000/01/26 05:58:16 momjian Exp $
+ * $Id: nodes.h,v 1.64 2000/02/15 20:49:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -257,6 +257,9 @@ typedef struct Node
         (IsA(t, Noname) || IsA(t, Material) || IsA(t, Sort) || \
          IsA(t, Unique))
  
+#define IsA_Value(t) \
+       (IsA(t, Integer) || IsA(t, Float) || IsA(t, String))
+
  /* ----------------------------------------------------------------
   *                                       extern declarations follow
   * ----------------------------------------------------------------
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index 6eb47618c5e02ee14961e94ea4722291851ed187..df7bec10f0008912ca33a9ec008fbd73a4d4cf1e 100644 (file)
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: parsenodes.h,v 1.98 2000/02/15 03:38:14 thomas Exp $
+ * $Id: parsenodes.h,v 1.99 2000/02/15 20:49:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -72,6 +72,7 @@ typedef struct Query
         /* internal to planner */
         List       *base_rel_list;      /* list of base-relation RelOptInfos */
         List       *join_rel_list;      /* list of join-relation RelOptInfos */
+       List       *equi_key_list;      /* list of lists of equijoined PathKeyItems */
         List       *query_pathkeys; /* pathkeys for query_planner()'s result */
  } Query;
  
@@ -1124,7 +1125,6 @@ typedef struct RangeTblEntry
  {
         NodeTag         type;
         char       *relname;            /* real name of the relation */
-//     char       *refname;            /* reference name (given in FROM clause) */
  #ifndef DISABLE_JOIN_SYNTAX
         Attr       *ref;                        /* reference names (given in FROM clause) */
  #endif
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h

index ff83431e580841812a81e70e7013392822d0776a..2731c57948cc13db433cd55f81a9b417ffc7cba2 100644 (file)
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: plannodes.h,v 1.37 2000/01/27 18:11:44 tgl Exp $
+ * $Id: plannodes.h,v 1.38 2000/02/15 20:49:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -65,10 +65,15 @@ typedef struct Plan
  {
         NodeTag         type;
  
-       /* planner's estimates of cost and result size */
-       Cost            cost;
-       double          plan_rows;
-       int                     plan_width;
+       /* estimated execution costs for plan (see costsize.c for more info) */
+       Cost            startup_cost;   /* cost expended before fetching any tuples */
+       Cost            total_cost;             /* total cost (assuming all tuples fetched) */
+
+       /* planner's estimate of result size (note: LIMIT, if any, is not
+        * considered in setting plan_rows)
+        */
+       double          plan_rows;              /* number of rows plan is expected to emit */
+       int                     plan_width;             /* average row width in bytes */
  
         EState     *state;                      /* at execution time, state's of
                                                                  * individual nodes point to one EState
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 529aa5cea7a8c88d0e64c102739cc48d8946ca82..3efdaa5b32562da5c1689dcd02e3d35e541f602a 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,13 +7,14 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: relation.h,v 1.43 2000/02/07 04:41:02 tgl Exp $
+ * $Id: relation.h,v 1.44 2000/02/15 20:49:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #ifndef RELATION_H
  #define RELATION_H
  
+#include "access/sdir.h"
  #include "nodes/parsenodes.h"
  
  /*
@@ -25,6 +26,12 @@
  
  typedef List *Relids;
  
+/*
+ * When looking for a "cheapest path", this enum specifies whether we want
+ * cheapest startup cost or cheapest total cost.
+ */
+typedef enum CostSelector { STARTUP_COST, TOTAL_COST } CostSelector;
+
  /*
   * RelOptInfo
   *             Per-relation information for planning/optimization
@@ -38,10 +45,14 @@ typedef List *Relids;
   *                        clauses have been applied (ie, output rows of a plan for it)
   *             width - avg. number of bytes per tuple in the relation after the
   *                             appropriate projections have been done (ie, output width)
- *             targetlist - List of TargetList nodes
+ *             targetlist - List of TargetEntry nodes for the attributes we need
+ *                                      to output from this relation
   *             pathlist - List of Path nodes, one for each potentially useful
   *                                method of generating the relation
- *             cheapestpath -  least expensive Path (regardless of ordering)
+ *             cheapest_startup_path - the pathlist member with lowest startup cost
+ *                                                             (regardless of its ordering)
+ *             cheapest_total_path - the pathlist member with lowest total cost
+ *                                                       (regardless of its ordering)
   *             pruneable - flag to let the planner know whether it can prune the
   *                                     pathlist of this RelOptInfo or not.
   *
@@ -57,6 +68,8 @@ typedef List *Relids;
   *             baserestrictinfo - List of RestrictInfo nodes, containing info about
   *                                     each qualification clause in which this relation
   *                                     participates (only used for base rels)
+ *             baserestrictcost - Estimated cost of evaluating the baserestrictinfo
+ *                                     clauses at a single tuple (only used for base rels)
   *             joininfo  - List of JoinInfo nodes, containing info about each join
   *                                     clause in which this relation participates
   *             innerjoin - List of Path nodes that represent indices that may be used
@@ -74,6 +87,10 @@ typedef List *Relids;
   * (field joinrestrictinfo), not in the parent relation.  But it's OK for
   * the RelOptInfo to store the joininfo lists, because those are the same
   * for a given rel no matter how we form it.
+ *
+ * We store baserestrictcost in the RelOptInfo (for base relations) because
+ * we know we will need it at least once (to price the sequential scan)
+ * and may need it multiple times to price index scans.
   */
  
  typedef struct RelOptInfo
@@ -90,7 +107,8 @@ typedef struct RelOptInfo
         /* materialization information */
         List       *targetlist;
         List       *pathlist;           /* Path structures */
-       struct Path *cheapestpath;
+       struct Path *cheapest_startup_path;
+       struct Path *cheapest_total_path;
         bool            pruneable;
  
         /* statistics from pg_class (only valid if it's a base rel!) */
@@ -100,6 +118,7 @@ typedef struct RelOptInfo
  
         /* used by various scans and joins: */
         List       *baserestrictinfo; /* RestrictInfo structures (if base rel) */
+       Cost            baserestrictcost; /* cost of evaluating the above */
         List       *joininfo;           /* JoinInfo structures */
         List       *innerjoin;          /* potential indexscans for nestloop joins */
         /* innerjoin indexscans are not in the main pathlist because they are
@@ -126,6 +145,7 @@ typedef struct RelOptInfo
   *             amcostestimate - OID of the relam's cost estimator
   *             indproc   - OID of the function if a functional index, else 0
   *             indpred   - index predicate if a partial index, else NULL
+ *             lossy     - true if index is lossy (may return non-matching tuples)
   *
   *             NB. the last element of the arrays classlist, indexkeys and ordering
   *                     is always 0.
@@ -151,6 +171,7 @@ typedef struct IndexOptInfo
  
         Oid                     indproc;                /* if a functional index */
         List       *indpred;            /* if a partial index */
+       bool            lossy;                  /* if a lossy index */
  } IndexOptInfo;
  
  /*
@@ -190,7 +211,9 @@ typedef struct Path
  
         RelOptInfo *parent;                     /* the relation this path can build */
  
-       Cost            path_cost;              /* estimated execution cost of path */
+       /* estimated execution costs for path (see costsize.c for more info) */
+       Cost            startup_cost;   /* cost expended before fetching any tuples */
+       Cost            total_cost;             /* total cost (assuming all tuples fetched) */
  
         NodeTag         pathtype;               /* tag identifying scan/join method */
         /* XXX why is pathtype separate from the NodeTag? */
@@ -207,27 +230,34 @@ typedef struct Path
   * the same tuple more than once, even if it is matched in multiple scans.)
   *
   * 'indexid' is a list of index relation OIDs, one per scan to be performed.
+ *
   * 'indexqual' is a list of index qualifications, also one per scan.
   * Each entry in 'indexqual' is a sublist of qualification expressions with
   * implicit AND semantics across the sublist items.  Only expressions that
   * are usable as indexquals (as determined by indxpath.c) may appear here.
- *
   * NOTE that the semantics of the top-level list in 'indexqual' is OR
   * combination, while the sublists are implicitly AND combinations!
+ *
+ * 'indexscandir' is one of:
+ *             ForwardScanDirection: forward scan of an ordered index
+ *             BackwardScanDirection: backward scan of an ordered index
+ *             NoMovementScanDirection: scan of an unordered index, or don't care
+ * (The executor doesn't care whether it gets ForwardScanDirection or
+ * NoMovementScanDirection for an indexscan, but the planner wants to
+ * distinguish ordered from unordered indexes for building pathkeys.)
+ *
+ * 'joinrelids' is only used in IndexPaths that are constructed for use
+ * as the inner path of a nestloop join.  These paths have indexquals
+ * that refer to values of other rels, so those other rels must be
+ * included in the outer joinrel in order to make a usable join.
   *----------
   */
-
  typedef struct IndexPath
  {
         Path            path;
         List       *indexid;
         List       *indexqual;
-       /*
-        * joinrelids is only used in IndexPaths that are constructed for use
-        * as the inner path of a nestloop join.  These paths have indexquals
-        * that refer to values of other rels, so those other rels must be
-        * included in the outer joinrel in order to make a usable join.
-        */
+       ScanDirection indexscandir;
         Relids          joinrelids;                     /* other rels mentioned in indexqual */
  } IndexPath;
  
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 79153c01d83e421466cf4b4487e1e6d392861b03..960a2ea9e9aafd72d9a79689ff6581cf0e6c276b 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: cost.h,v 1.29 2000/02/07 04:41:04 tgl Exp $
+ * $Id: cost.h,v 1.30 2000/02/15 20:49:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -17,9 +17,12 @@
  #include "nodes/relation.h"
  
  /* defaults for costsize.c's Cost parameters */
-/* NB: cost-estimation code should use the variables, not the constants! */
-#define CPU_PAGE_WEIGHT  0.033
-#define CPU_INDEX_PAGE_WEIGHT  0.017
+/* NB: cost-estimation code should use the variables, not these constants! */
+#define DEFAULT_EFFECTIVE_CACHE_SIZE  1000.0   /* measured in pages */
+#define DEFAULT_RANDOM_PAGE_COST  4.0
+#define DEFAULT_CPU_TUPLE_COST  0.01
+#define DEFAULT_CPU_INDEX_TUPLE_COST 0.001
+#define DEFAULT_CPU_OPERATOR_COST  0.0025
  
  /* defaults for function attributes used for expensive function calculations */
  #define BYTE_PCT 100
@@ -33,8 +36,12 @@
   *       routines to compute costs and sizes
   */
  
-extern Cost cpu_page_weight;
-extern Cost cpu_index_page_weight;
+/* parameter variables and flags */
+extern double effective_cache_size;
+extern Cost random_page_cost;
+extern Cost cpu_tuple_cost;
+extern Cost cpu_index_tuple_cost;
+extern Cost cpu_operator_cost;
  extern Cost disable_cost;
  extern bool enable_seqscan;
  extern bool enable_indexscan;
@@ -44,17 +51,20 @@ extern bool enable_nestloop;
  extern bool enable_mergejoin;
  extern bool enable_hashjoin;
  
-extern Cost cost_seqscan(RelOptInfo *baserel);
-extern Cost cost_index(Query *root, RelOptInfo *baserel, IndexOptInfo *index,
+extern void cost_seqscan(Path *path, RelOptInfo *baserel);
+extern void cost_index(Path *path, Query *root,
+                                          RelOptInfo *baserel, IndexOptInfo *index,
                                            List *indexQuals, bool is_injoin);
-extern Cost cost_tidscan(RelOptInfo *baserel, List *tideval);
-extern Cost cost_sort(List *pathkeys, double tuples, int width);
-extern Cost cost_nestloop(Path *outer_path, Path *inner_path,
-                                                 bool is_indexjoin);
-extern Cost cost_mergejoin(Path *outer_path, Path *inner_path,
+extern void cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval);
+extern void cost_sort(Path *path, List *pathkeys, double tuples, int width);
+extern void cost_nestloop(Path *path, Path *outer_path, Path *inner_path,
+                                                 List *restrictlist, bool is_indexjoin);
+extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
+                                                  List *restrictlist,
                                                    List *outersortkeys, List *innersortkeys);
-extern Cost cost_hashjoin(Path *outer_path, Path *inner_path,
-                                                 Selectivity innerdisbursion);
+extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path,
+                                                 List *restrictlist, Selectivity innerdisbursion);
+extern Cost cost_qual_eval(List *quals);
  extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
  extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
                                                                            RelOptInfo *outer_rel,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index eefb2553b3d6649aefad807b2404693189980f89..e59848278f4f626c011a0277e984e8451c6bb5b0 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pathnode.h,v 1.25 2000/02/07 04:41:04 tgl Exp $
+ * $Id: pathnode.h,v 1.26 2000/02/15 20:49:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,15 +19,18 @@
  /*
   * prototypes for pathnode.c
   */
-extern bool path_is_cheaper(Path *path1, Path *path2);
-extern Path *set_cheapest(RelOptInfo *parent_rel, List *pathlist);
+extern int compare_path_costs(Path *path1, Path *path2,
+                                                         CostSelector criterion);
+extern int compare_fractional_path_costs(Path *path1, Path *path2,
+                                                                                double fraction);
+extern void set_cheapest(RelOptInfo *parent_rel);
  extern void add_path(RelOptInfo *parent_rel, Path *new_path);
-extern void add_pathlist(RelOptInfo *parent_rel, List *new_paths);
  
  extern Path *create_seqscan_path(RelOptInfo *rel);
  extern IndexPath *create_index_path(Query *root, RelOptInfo *rel,
                                                                         IndexOptInfo *index,
-                                                                       List *restriction_clauses);
+                                                                       List *restriction_clauses,
+                                                                       ScanDirection indexscandir);
  extern TidPath *create_tidscan_path(RelOptInfo *rel, List *tideval);
  
  extern NestPath *create_nestloop_path(RelOptInfo *joinrel,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h

index 256aac90d754c2dd6db3ac1784f3788bd9b787dd..d7a0cc2d54602f4c6b21977d2adb13f2314cc3fc 100644 (file)
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: paths.h,v 1.42 2000/02/07 04:41:04 tgl Exp $
+ * $Id: paths.h,v 1.43 2000/02/15 20:49:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -33,9 +33,9 @@ extern RelOptInfo *make_one_rel(Query *root);
   * indxpath.c
   *       routines to generate index paths
   */
-extern List *create_index_paths(Query *root, RelOptInfo *rel, List *indices,
-                                                               List *restrictinfo_list,
-                                                               List *joininfo_list);
+extern void create_index_paths(Query *root, RelOptInfo *rel, List *indices,
+                                                          List *restrictinfo_list,
+                                                          List *joininfo_list);
  extern Oid indexable_operator(Expr *clause, Oid opclass, Oid relam,
                                                           bool indexkey_on_left);
  extern List *extract_or_indexqual_conditions(RelOptInfo *rel,
@@ -47,14 +47,14 @@ extern List *expand_indexqual_conditions(List *indexquals);
   * orindxpath.c
   *       additional routines for indexable OR clauses
   */
-extern List *create_or_index_paths(Query *root, RelOptInfo *rel,
-                                                                  List *clauses);
+extern void create_or_index_paths(Query *root, RelOptInfo *rel,
+                                                                 List *clauses);
  
  /*
   * tidpath.h
   *       routines to generate tid paths
   */
-extern List *create_tidscan_paths(Query *root, RelOptInfo *rel);
+extern void create_tidscan_paths(Query *root, RelOptInfo *rel);
  
  /*
   * joinpath.c
@@ -89,20 +89,27 @@ typedef enum
         PATHKEYS_DIFFERENT                      /* neither pathkey includes the other */
  } PathKeysComparison;
  
+extern void add_equijoined_keys(Query *root, RestrictInfo *restrictinfo);
+extern List *canonicalize_pathkeys(Query *root, List *pathkeys);
  extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
  extern bool pathkeys_contained_in(List *keys1, List *keys2);
  extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
-                                                                                       bool indexpaths_only);
+                                                                                       CostSelector cost_criterion);
+extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
+                                                                                                          List *pathkeys,
+                                                                                                          double fraction);
  extern List *build_index_pathkeys(Query *root, RelOptInfo *rel,
-                                                                 IndexOptInfo *index);
+                                                                 IndexOptInfo *index,
+                                                                 ScanDirection scandir);
  extern List *build_join_pathkeys(List *outer_pathkeys,
-                                                                List *join_rel_tlist, List *joinclauses);
-extern bool commute_pathkeys(List *pathkeys);
+                                                                List *join_rel_tlist,
+                                                                List *equi_key_list);
  extern List *make_pathkeys_for_sortclauses(List *sortclauses,
                                                                                    List *tlist);
  extern List *find_mergeclauses_for_pathkeys(List *pathkeys,
                                                                                         List *restrictinfos);
-extern List *make_pathkeys_for_mergeclauses(List *mergeclauses,
+extern List *make_pathkeys_for_mergeclauses(Query *root,
+                                                                                       List *mergeclauses,
                                                                                         List *tlist);
  
  #endif  /* PATHS_H */
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h

index 340f54485cb7c0470cd24d2328d5d90a4530104b..2d61a035bf86ea72d77a42f5a17aa6873938dc46 100644 (file)
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: planmain.h,v 1.37 2000/01/27 18:11:45 tgl Exp $
+ * $Id: planmain.h,v 1.38 2000/02/15 20:49:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -20,7 +20,8 @@
  /*
   * prototypes for plan/planmain.c
   */
-extern Plan *query_planner(Query *root, List *tlist, List *qual);
+extern Plan *query_planner(Query *root, List *tlist, List *qual,
+                                                  double tuple_fraction);
  
  /*
   * prototypes for plan/createplan.c
@@ -29,7 +30,7 @@ extern Plan *create_plan(Query *root, Path *best_path);
  extern SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
  extern Sort *make_sort(List *tlist, Oid nonameid, Plan *lefttree,
                   int keycount);
-extern Agg *make_agg(List *tlist, Plan *lefttree);
+extern Agg *make_agg(List *tlist, List *qual, Plan *lefttree);
  extern Group *make_group(List *tlist, bool tuplePerGroup, int ngrp,
                    AttrNumber *grpColIdx, Plan *lefttree);
  extern Noname *make_noname(List *tlist, List *pathkeys, Plan *subplan);
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h

index 00a6e55dfd0affdb4ed3891a9601bf0a1a14776b..c06f41b852ebd022ceec1bf135559783d7b767b1 100644 (file)
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: planner.h,v 1.13 2000/01/26 05:58:21 momjian Exp $
+ * $Id: planner.h,v 1.14 2000/02/15 20:49:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -21,7 +21,7 @@
  #include "nodes/plannodes.h"
  
  extern Plan *planner(Query *parse);
-extern Plan *union_planner(Query *parse);
+extern Plan *union_planner(Query *parse, double tuple_fraction);
  extern void pg_checkretval(Oid rettype, List *querytree_list);
  
  #endif  /* PLANNER_H */
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h

index cb7ab0e802736f18a0e0f968da9c920510af9c58..7f9dcc6c4696757a93532358dadb7c947f717a95 100644 (file)
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: builtins.h,v 1.100 2000/02/10 19:51:52 momjian Exp $
+ * $Id: builtins.h,v 1.101 2000/02/15 20:49:27 tgl Exp $
   *
   * NOTES
   *       This should normally only be included by fmgr.h.
@@ -403,19 +403,23 @@ extern bool convert_to_scalar(Datum value, Oid typid, double *scaleval);
  
  extern void btcostestimate(Query *root, RelOptInfo *rel,
                                                    IndexOptInfo *index, List *indexQuals,
-                                                  Cost *indexAccessCost,
+                                                  Cost *indexStartupCost,
+                                                  Cost *indexTotalCost,
                                                    Selectivity *indexSelectivity);
  extern void rtcostestimate(Query *root, RelOptInfo *rel,
                                                    IndexOptInfo *index, List *indexQuals,
-                                                  Cost *indexAccessCost,
+                                                  Cost *indexStartupCost,
+                                                  Cost *indexTotalCost,
                                                    Selectivity *indexSelectivity);
  extern void hashcostestimate(Query *root, RelOptInfo *rel,
                                                          IndexOptInfo *index, List *indexQuals,
-                                                        Cost *indexAccessCost,
+                                                        Cost *indexStartupCost,
+                                                        Cost *indexTotalCost,
                                                          Selectivity *indexSelectivity);
  extern void gistcostestimate(Query *root, RelOptInfo *rel,
                                                          IndexOptInfo *index, List *indexQuals,
-                                                        Cost *indexAccessCost,
+                                                        Cost *indexStartupCost,
+                                                        Cost *indexTotalCost,
                                                          Selectivity *indexSelectivity);
  
  /* tid.c */
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c

index 94bf6bf0b866fdfbb437c81bb9d3bdf063e2bf89..9202cb23c965431b284de3b0026b55a519a186ac 100644 (file)
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-connect.c,v 1.118 2000/02/07 23:10:09 petere Exp $
+ *       $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-connect.c,v 1.119 2000/02/15 20:49:28 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -172,12 +172,6 @@ static struct EnvironmentOptions
         },
  #endif
         /* internal performance-related settings */
-       {
-               "PGCOSTHEAP", "cost_heap"
-       },
-       {
-               "PGCOSTINDEX", "cost_index"
-       },
         {
                 "PGGEQO", "geqo"
         },
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out

index 53561ab33c8d94903750c89d0354ae6781da8859..174925df849c77702856a39183d120f827ce1cf5 100644 (file)
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -93,8 +93,11 @@ SELECT * FROM tmp;
  DROP TABLE tmp;
  --
  -- rename -
---   should preserve indices
+--   should preserve indices, which we can check by seeing if a SELECT
+--   chooses an indexscan; however, in the absence of vacuum statistics
+--   it might not.  Therefore, vacuum first.
  --
+VACUUM ANALYZE tenk1;
  ALTER TABLE tenk1 RENAME TO ten_k;
  -- 20 values, sorted 
  SELECT unique1 FROM ten_k WHERE unique1 < 20;
diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out

index d6953ab5a571b6a54a16bee590e8182a299292ec..ed6cbac1df06d61e884e60393f32683154d17373 100644 (file)
--- a/src/test/regress/expected/select.out
+++ b/src/test/regress/expected/select.out
@@ -4,7 +4,8 @@
  -- btree index
  -- awk '{if($1<10){print;}else{next;}}' onek.data | sort +0n -1
  --
-SELECT onek.* WHERE onek.unique1 < 10;
+SELECT onek.* WHERE onek.unique1 < 10
+   ORDER BY onek.unique1;
   unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 
  ---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
         0 |     998 |   0 |    0 |   0 |      0 |       0 |        0 |           0 |         0 |        0 |   0 |    1 | AAAAAA   | KMBAAA   | OOOOxx
diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql

index bef443b9302553e9017a37a2acfdd92e30bc6d17..5ba66c46e8f62d3d4cd3acdfe6b5e2eb6b8cf1f9 100644 (file)
--- a/src/test/regress/sql/alter_table.sql
+++ b/src/test/regress/sql/alter_table.sql
@@ -141,8 +141,12 @@ DROP TABLE tmp;
  
  --
  -- rename -
---   should preserve indices
+--   should preserve indices, which we can check by seeing if a SELECT
+--   chooses an indexscan; however, in the absence of vacuum statistics
+--   it might not.  Therefore, vacuum first.
  --
+VACUUM ANALYZE tenk1;
+
  ALTER TABLE tenk1 RENAME TO ten_k;
  
  -- 20 values, sorted 
diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql

index 3d5e66c98cd3d6709069a4e31aa0667506313960..42b664eaaee200cc465d70a32817a359474564cd 100644 (file)
--- a/src/test/regress/sql/select.sql
+++ b/src/test/regress/sql/select.sql
@@ -5,7 +5,8 @@
  -- btree index
  -- awk '{if($1<10){print;}else{next;}}' onek.data | sort +0n -1
  --
-SELECT onek.* WHERE onek.unique1 < 10;
+SELECT onek.* WHERE onek.unique1 < 10
+   ORDER BY onek.unique1;
  
  --
  -- awk '{if($1<20){print $1,$14;}else{next;}}' onek.data | sort +0nr -1
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 15 Feb 2000 20:49:31 +0000 (20:49 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 15 Feb 2000 20:49:31 +0000 (20:49 +0000)
doc/src/sgml/libpq++.sgml		patch \| blob \| history
doc/src/sgml/libpq.sgml		patch \| blob \| history
doc/src/sgml/ref/set.sgml		patch \| blob \| history
doc/src/sgml/ref/show.sgml		patch \| blob \| history
src/backend/commands/explain.c		patch \| blob \| history
src/backend/commands/variable.c		patch \| blob \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| history
src/backend/nodes/freefuncs.c		patch \| blob \| history
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/nodes/print.c		patch \| blob \| history
src/backend/nodes/readfuncs.c		patch \| blob \| history
src/backend/optimizer/README		patch \| blob \| history
src/backend/optimizer/geqo/geqo_eval.c		patch \| blob \| history
src/backend/optimizer/geqo/geqo_misc.c		patch \| blob \| history
src/backend/optimizer/path/allpaths.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/path/indxpath.c		patch \| blob \| history
src/backend/optimizer/path/joinpath.c		patch \| blob \| history
src/backend/optimizer/path/orindxpath.c		patch \| blob \| history
src/backend/optimizer/path/pathkeys.c		patch \| blob \| history
src/backend/optimizer/path/tidpath.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/plan/initsplan.c		patch \| blob \| history
src/backend/optimizer/plan/planmain.c		patch \| blob \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| history
src/backend/optimizer/plan/subselect.c		patch \| blob \| history
src/backend/optimizer/prep/prepunion.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/backend/optimizer/util/plancat.c		patch \| blob \| history
src/backend/optimizer/util/relnode.c		patch \| blob \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| history
src/bin/psql/tab-complete.c		patch \| blob \| history
src/include/catalog/catversion.h		patch \| blob \| history
src/include/catalog/pg_proc.h		patch \| blob \| history
src/include/nodes/nodes.h		patch \| blob \| history
src/include/nodes/parsenodes.h		patch \| blob \| history
src/include/nodes/plannodes.h		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/include/optimizer/cost.h		patch \| blob \| history
src/include/optimizer/pathnode.h		patch \| blob \| history
src/include/optimizer/paths.h		patch \| blob \| history
src/include/optimizer/planmain.h		patch \| blob \| history
src/include/optimizer/planner.h		patch \| blob \| history
src/include/utils/builtins.h		patch \| blob \| history
src/interfaces/libpq/fe-connect.c		patch \| blob \| history
src/test/regress/expected/alter_table.out		patch \| blob \| history
src/test/regress/expected/select.out		patch \| blob \| history
src/test/regress/sql/alter_table.sql		patch \| blob \| history
src/test/regress/sql/select.sql		patch \| blob \| history