]> granicus.if.org Git - postgresql/commitdiff
Create core infrastructure for KNNGIST.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 3 Dec 2010 01:50:48 +0000 (20:50 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 3 Dec 2010 01:51:37 +0000 (20:51 -0500)
This is a heavily revised version of builtin_knngist_core-0.9.  The
ordering operators are no longer mixed in with actual quals, which would
have confused not only humans but significant parts of the planner.
Instead, ordering operators are carried separately throughout planning and
execution.

Since the API for ambeginscan and amrescan functions had to be changed
anyway, this commit takes the opportunity to rationalize that a bit.
RelationGetIndexScan no longer forces a premature index_rescan call;
instead, callers of index_beginscan must call index_rescan too.  Aside from
making the AM-side initialization logic a bit less peculiar, this has the
advantage that we do not make a useless extra am_rescan call when there are
runtime key values.  AMs formerly could not assume that the key values
passed to amrescan were actually valid; now they can.

Teodor Sigaev and Tom Lane

40 files changed:
doc/src/sgml/catalogs.sgml
doc/src/sgml/indexam.sgml
src/backend/access/gin/ginscan.c
src/backend/access/gist/gistscan.c
src/backend/access/hash/hash.c
src/backend/access/index/genam.c
src/backend/access/index/indexam.c
src/backend/access/nbtree/nbtree.c
src/backend/commands/cluster.c
src/backend/commands/explain.c
src/backend/executor/execQual.c
src/backend/executor/execUtils.c
src/backend/executor/nodeBitmapIndexscan.c
src/backend/executor/nodeIndexscan.c
src/backend/executor/nodeMergejoin.c
src/backend/nodes/copyfuncs.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/plan/setrefs.c
src/backend/optimizer/plan/subselect.c
src/backend/optimizer/util/pathnode.c
src/backend/utils/adt/selfuncs.c
src/backend/utils/cache/lsyscache.c
src/include/access/genam.h
src/include/access/relscan.h
src/include/access/skey.h
src/include/catalog/catversion.h
src/include/catalog/pg_am.h
src/include/catalog/pg_proc.h
src/include/executor/nodeIndexscan.h
src/include/nodes/execnodes.h
src/include/nodes/plannodes.h
src/include/nodes/relation.h
src/include/optimizer/cost.h
src/include/optimizer/pathnode.h
src/include/pg_config_manual.h
src/include/utils/lsyscache.h

index 54a6dcc102cf256f87c7ef4fc845724efec8d353..217a04e4998b767884253b62af8b1e712c9a9a07 100644 (file)
       <entry><structfield>ambeginscan</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Start new scan</quote> function</entry>
+      <entry><quote>Prepare for index scan</quote> function</entry>
      </row>
 
      <row>
       <entry><structfield>amrescan</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Restart this scan</quote> function</entry>
+      <entry><quote>(Re)start index scan</quote> function</entry>
      </row>
 
      <row>
       <entry><structfield>amendscan</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>End this scan</quote> function</entry>
+      <entry><quote>Clean up after index scan</quote> function</entry>
      </row>
 
      <row>
index 925aac4571ff6f813fdb44ae3953be1e41e8b38c..d0905eb3e2851c16105c89cb8cfc34becdce266f 100644 (file)
@@ -268,6 +268,7 @@ void
 amcostestimate (PlannerInfo *root,
                 IndexOptInfo *index,
                 List *indexQuals,
+                List *indexOrderBys,
                 RelOptInfo *outer_rel,
                 Cost *indexStartupCost,
                 Cost *indexTotalCost,
@@ -318,19 +319,42 @@ amoptions (ArrayType *reloptions,
 IndexScanDesc
 ambeginscan (Relation indexRelation,
              int nkeys,
-             ScanKey key);
+             int norderbys);
 </programlisting>
-   Begin a new scan.  The <literal>key</> array (of length <literal>nkeys</>)
-   describes the scan key(s) for the index scan.  The result must be a
-   palloc'd struct. For implementation reasons the index access method
+   Prepare for an index scan.  The <literal>nkeys</> and <literal>norderbys</>
+   parameters indicate the number of quals and ordering operators that will be
+   used in the scan; these may be useful for space allocation purposes.
+   Note that the actual values of the scan keys aren't provided yet.
+   The result must be a palloc'd struct.
+   For implementation reasons the index access method
    <emphasis>must</> create this struct by calling
    <function>RelationGetIndexScan()</>.  In most cases
-   <function>ambeginscan</> itself does little beyond making that call;
+   <function>ambeginscan</> does little beyond making that call and perhaps
+   acquiring locks;
    the interesting parts of index-scan startup are in <function>amrescan</>.
   </para>
 
   <para>
 <programlisting>
+void
+amrescan (IndexScanDesc scan,
+          ScanKey keys,
+          int nkeys,
+          ScanKey orderbys,
+          int norderbys);
+</programlisting>
+   Start or restart an indexscan, possibly with new scan keys.  (To restart
+   using previously-passed keys, NULL is passed for <literal>keys</> and/or
+   <literal>orderbys</>.)  Note that it is not allowed for
+   the number of keys or order-by operators to be larger than
+   what was passed to <function>ambeginscan</>.  In practice the restart
+   feature is used when a new outer tuple is selected by a nested-loop join
+   and so a new key comparison value is needed, but the scan key structure
+   remains the same.
+  </para>
+
+  <para>
+<programlisting>
 boolean
 amgettuple (IndexScanDesc scan,
             ScanDirection direction);
@@ -393,22 +417,6 @@ amgetbitmap (IndexScanDesc scan,
   <para>
 <programlisting>
 void
-amrescan (IndexScanDesc scan,
-          ScanKey key);
-</programlisting>
-   Restart the given scan, possibly with new scan keys (to continue using
-   the old keys, NULL is passed for <literal>key</>).  Note that it is not
-   possible for the number of keys to be changed.  In practice the restart
-   feature is used when a new outer tuple is selected by a nested-loop join
-   and so a new key comparison value is needed, but the scan key structure
-   remains the same.  This function is also called by
-   <function>RelationGetIndexScan()</>, so it is used for initial setup
-   of an index scan as well as rescanning.
-  </para>
-
-  <para>
-<programlisting>
-void
 amendscan (IndexScanDesc scan);
 </programlisting>
    End a scan and release resources.  The <literal>scan</> struct itself
@@ -820,8 +828,9 @@ amrestrpos (IndexScanDesc scan);
   <title>Index Cost Estimation Functions</title>
 
   <para>
-   The <function>amcostestimate</> function is given a list of WHERE clauses that have
-   been determined to be usable with the index.  It must return estimates
+   The <function>amcostestimate</> function is given information describing
+   a possible index scan, including lists of WHERE and ORDER BY clauses that
+   have been determined to be usable with the index.  It must return estimates
    of the cost of accessing the index and the selectivity of the WHERE
    clauses (that is, the fraction of parent-table rows that will be
    retrieved during the index scan).  For simple cases, nearly all the
@@ -839,6 +848,7 @@ void
 amcostestimate (PlannerInfo *root,
                 IndexOptInfo *index,
                 List *indexQuals,
+                List *indexOrderBys,
                 RelOptInfo *outer_rel,
                 Cost *indexStartupCost,
                 Cost *indexTotalCost,
@@ -846,7 +856,7 @@ amcostestimate (PlannerInfo *root,
                 double *indexCorrelation);
 </programlisting>
 
-   The first four parameters are inputs:
+   The first five parameters are inputs:
 
    <variablelist>
     <varlistentry>
@@ -873,6 +883,17 @@ amcostestimate (PlannerInfo *root,
       <para>
        List of index qual clauses (implicitly ANDed);
        a <symbol>NIL</> list indicates no qualifiers are available.
+       Note that the list contains expression trees with RestrictInfo nodes
+       at the top, not ScanKeys.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term><parameter>indexOrderBys</></term>
+     <listitem>
+      <para>
+       List of indexable ORDER BY operators, or <symbol>NIL</> if none.
        Note that the list contains expression trees, not ScanKeys.
       </para>
      </listitem>
index a6604c4c9349a1c0968dde3dd6b09530bd853fae..3a5e52dc383c68e90d77d3e33f667f713559cab9 100644 (file)
@@ -26,11 +26,28 @@ Datum
 ginbeginscan(PG_FUNCTION_ARGS)
 {
        Relation        rel = (Relation) PG_GETARG_POINTER(0);
-       int                     keysz = PG_GETARG_INT32(1);
-       ScanKey         scankey = (ScanKey) PG_GETARG_POINTER(2);
+       int                     nkeys = PG_GETARG_INT32(1);
+       int                     norderbys = PG_GETARG_INT32(2);
        IndexScanDesc scan;
+       GinScanOpaque so;
+
+       /* no order by operators allowed */
+       Assert(norderbys == 0);
+
+       scan = RelationGetIndexScan(rel, nkeys, norderbys);
+
+       /* allocate private workspace */
+       so = (GinScanOpaque) palloc(sizeof(GinScanOpaqueData));
+       so->keys = NULL;
+       so->nkeys = 0;
+       so->tempCtx = AllocSetContextCreate(CurrentMemoryContext,
+                                                                               "Gin scan temporary context",
+                                                                               ALLOCSET_DEFAULT_MINSIZE,
+                                                                               ALLOCSET_DEFAULT_INITSIZE,
+                                                                               ALLOCSET_DEFAULT_MAXSIZE);
+       initGinState(&so->ginstate, scan->indexRelation);
 
-       scan = RelationGetIndexScan(rel, keysz, scankey);
+       scan->opaque = so;
 
        PG_RETURN_POINTER(scan);
 }
@@ -241,27 +258,10 @@ ginrescan(PG_FUNCTION_ARGS)
 {
        IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
        ScanKey         scankey = (ScanKey) PG_GETARG_POINTER(1);
-       GinScanOpaque so;
-
-       so = (GinScanOpaque) scan->opaque;
-
-       if (so == NULL)
-       {
-               /* if called from ginbeginscan */
-               so = (GinScanOpaque) palloc(sizeof(GinScanOpaqueData));
-               so->tempCtx = AllocSetContextCreate(CurrentMemoryContext,
-                                                                                       "Gin scan temporary context",
-                                                                                       ALLOCSET_DEFAULT_MINSIZE,
-                                                                                       ALLOCSET_DEFAULT_INITSIZE,
-                                                                                       ALLOCSET_DEFAULT_MAXSIZE);
-               initGinState(&so->ginstate, scan->indexRelation);
-               scan->opaque = so;
-       }
-       else
-       {
-               freeScanKeys(so->keys, so->nkeys);
-       }
+       /* remaining arguments are ignored */
+       GinScanOpaque so = (GinScanOpaque) scan->opaque;
 
+       freeScanKeys(so->keys, so->nkeys);
        so->keys = NULL;
 
        if (scankey && scan->numberOfKeys > 0)
@@ -280,14 +280,11 @@ ginendscan(PG_FUNCTION_ARGS)
        IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
        GinScanOpaque so = (GinScanOpaque) scan->opaque;
 
-       if (so != NULL)
-       {
-               freeScanKeys(so->keys, so->nkeys);
+       freeScanKeys(so->keys, so->nkeys);
 
-               MemoryContextDelete(so->tempCtx);
+       MemoryContextDelete(so->tempCtx);
 
-               pfree(so);
-       }
+       pfree(so);
 
        PG_RETURN_VOID();
 }
index 21f4ea54b7dc8b301779aea1aa22738da0ea4fc9..106714511a815e072e43047039802558eb44b3a9 100644 (file)
@@ -28,10 +28,24 @@ gistbeginscan(PG_FUNCTION_ARGS)
 {
        Relation        r = (Relation) PG_GETARG_POINTER(0);
        int                     nkeys = PG_GETARG_INT32(1);
-       ScanKey         key = (ScanKey) PG_GETARG_POINTER(2);
+       int                     norderbys = PG_GETARG_INT32(2);
        IndexScanDesc scan;
+       GISTScanOpaque so;
+
+       /* no order by operators allowed */
+       Assert(norderbys == 0);
+
+       scan = RelationGetIndexScan(r, nkeys, norderbys);
+
+       /* initialize opaque data */
+       so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData));
+       so->stack = NULL;
+       so->tempCxt = createTempGistContext();
+       so->curbuf = InvalidBuffer;
+       so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
+       initGISTstate(so->giststate, scan->indexRelation);
 
-       scan = RelationGetIndexScan(r, nkeys, key);
+       scan->opaque = so;
 
        PG_RETURN_POINTER(scan);
 }
@@ -41,33 +55,18 @@ gistrescan(PG_FUNCTION_ARGS)
 {
        IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
        ScanKey         key = (ScanKey) PG_GETARG_POINTER(1);
-       GISTScanOpaque so;
+       /* remaining arguments are ignored */
+       GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
        int                     i;
 
-       so = (GISTScanOpaque) scan->opaque;
-       if (so != NULL)
+       /* rescan an existing indexscan --- reset state */
+       gistfreestack(so->stack);
+       so->stack = NULL;
+       /* drop pins on buffers -- no locks held */
+       if (BufferIsValid(so->curbuf))
        {
-               /* rescan an existing indexscan --- reset state */
-               gistfreestack(so->stack);
-               so->stack = NULL;
-               /* drop pins on buffers -- no locks held */
-               if (BufferIsValid(so->curbuf))
-               {
-                       ReleaseBuffer(so->curbuf);
-                       so->curbuf = InvalidBuffer;
-               }
-       }
-       else
-       {
-               /* initialize opaque data */
-               so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData));
-               so->stack = NULL;
-               so->tempCxt = createTempGistContext();
+               ReleaseBuffer(so->curbuf);
                so->curbuf = InvalidBuffer;
-               so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
-               initGISTstate(so->giststate, scan->indexRelation);
-
-               scan->opaque = so;
        }
 
        /*
@@ -130,21 +129,16 @@ Datum
 gistendscan(PG_FUNCTION_ARGS)
 {
        IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
-       GISTScanOpaque so;
-
-       so = (GISTScanOpaque) scan->opaque;
-
-       if (so != NULL)
-       {
-               gistfreestack(so->stack);
-               if (so->giststate != NULL)
-                       freeGISTstate(so->giststate);
-               /* drop pins on buffers -- we aren't holding any locks */
-               if (BufferIsValid(so->curbuf))
-                       ReleaseBuffer(so->curbuf);
-               MemoryContextDelete(so->tempCxt);
-               pfree(scan->opaque);
-       }
+       GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
+
+       gistfreestack(so->stack);
+       if (so->giststate != NULL)
+               freeGISTstate(so->giststate);
+       /* drop pins on buffers -- we aren't holding any locks */
+       if (BufferIsValid(so->curbuf))
+               ReleaseBuffer(so->curbuf);
+       MemoryContextDelete(so->tempCxt);
+       pfree(so);
 
        PG_RETURN_VOID();
 }
index bb46446d713b6b8ed60e0ab7c15d09a4543b36e9..e53ec3d5eaa2f6f9ba22a224855f60a996cffa15 100644 (file)
@@ -366,12 +366,16 @@ Datum
 hashbeginscan(PG_FUNCTION_ARGS)
 {
        Relation        rel = (Relation) PG_GETARG_POINTER(0);
-       int                     keysz = PG_GETARG_INT32(1);
-       ScanKey         scankey = (ScanKey) PG_GETARG_POINTER(2);
+       int                     nkeys = PG_GETARG_INT32(1);
+       int                     norderbys = PG_GETARG_INT32(2);
        IndexScanDesc scan;
        HashScanOpaque so;
 
-       scan = RelationGetIndexScan(rel, keysz, scankey);
+       /* no order by operators allowed */
+       Assert(norderbys == 0);
+
+       scan = RelationGetIndexScan(rel, nkeys, norderbys);
+
        so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
        so->hashso_bucket_valid = false;
        so->hashso_bucket_blkno = 0;
@@ -396,26 +400,23 @@ hashrescan(PG_FUNCTION_ARGS)
 {
        IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
        ScanKey         scankey = (ScanKey) PG_GETARG_POINTER(1);
+       /* remaining arguments are ignored */
        HashScanOpaque so = (HashScanOpaque) scan->opaque;
        Relation        rel = scan->indexRelation;
 
-       /* if we are called from beginscan, so is still NULL */
-       if (so)
-       {
-               /* release any pin we still hold */
-               if (BufferIsValid(so->hashso_curbuf))
-                       _hash_dropbuf(rel, so->hashso_curbuf);
-               so->hashso_curbuf = InvalidBuffer;
-
-               /* release lock on bucket, too */
-               if (so->hashso_bucket_blkno)
-                       _hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE);
-               so->hashso_bucket_blkno = 0;
-
-               /* set position invalid (this will cause _hash_first call) */
-               ItemPointerSetInvalid(&(so->hashso_curpos));
-               ItemPointerSetInvalid(&(so->hashso_heappos));
-       }
+       /* release any pin we still hold */
+       if (BufferIsValid(so->hashso_curbuf))
+               _hash_dropbuf(rel, so->hashso_curbuf);
+       so->hashso_curbuf = InvalidBuffer;
+
+       /* release lock on bucket, too */
+       if (so->hashso_bucket_blkno)
+               _hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE);
+       so->hashso_bucket_blkno = 0;
+
+       /* set position invalid (this will cause _hash_first call) */
+       ItemPointerSetInvalid(&(so->hashso_curpos));
+       ItemPointerSetInvalid(&(so->hashso_heappos));
 
        /* Update scan key, if a new one is given */
        if (scankey && scan->numberOfKeys > 0)
@@ -423,8 +424,7 @@ hashrescan(PG_FUNCTION_ARGS)
                memmove(scan->keyData,
                                scankey,
                                scan->numberOfKeys * sizeof(ScanKeyData));
-               if (so)
-                       so->hashso_bucket_valid = false;
+               so->hashso_bucket_valid = false;
        }
 
        PG_RETURN_VOID();
index cd0212aa94d8eb067e445439ccc3c28960384d30..d0eaa36b3b5d9dab8a181efbc23c5d13702edbf2 100644 (file)
 /* ----------------
  *     RelationGetIndexScan -- Create and fill an IndexScanDesc.
  *
- *             This routine creates an index scan structure and sets its contents
- *             up correctly. This routine calls AMrescan to set up the scan with
- *             the passed key.
+ *             This routine creates an index scan structure and sets up initial
+ *             contents for it.
  *
  *             Parameters:
  *                             indexRelation -- index relation for scan.
- *                             nkeys -- count of scan keys.
- *                             key -- array of scan keys to restrict the index scan.
+ *                             nkeys -- count of scan keys (index qual conditions).
+ *                             norderbys -- count of index order-by operators.
  *
  *             Returns:
  *                             An initialized IndexScanDesc.
  * ----------------
  */
 IndexScanDesc
-RelationGetIndexScan(Relation indexRelation,
-                                        int nkeys, ScanKey key)
+RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
 {
        IndexScanDesc scan;
 
@@ -82,15 +80,19 @@ RelationGetIndexScan(Relation indexRelation,
        scan->indexRelation = indexRelation;
        scan->xs_snapshot = SnapshotNow;        /* may be set later */
        scan->numberOfKeys = nkeys;
+       scan->numberOfOrderBys = norderbys;
 
        /*
-        * We allocate the key space here, but the AM is responsible for actually
-        * filling it from the passed key array.
+        * We allocate key workspace here, but it won't get filled until amrescan.
         */
        if (nkeys > 0)
                scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
        else
                scan->keyData = NULL;
+       if (norderbys > 0)
+               scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys);
+       else
+               scan->orderByData = NULL;
 
        /*
         * During recovery we ignore killed tuples and don't bother to kill them
@@ -115,11 +117,6 @@ RelationGetIndexScan(Relation indexRelation,
        scan->xs_next_hot = InvalidOffsetNumber;
        scan->xs_prev_xmax = InvalidTransactionId;
 
-       /*
-        * Let the AM fill in the key and any opaque data it wants.
-        */
-       index_rescan(scan, key);
-
        return scan;
 }
 
@@ -140,6 +137,8 @@ IndexScanEnd(IndexScanDesc scan)
 {
        if (scan->keyData != NULL)
                pfree(scan->keyData);
+       if (scan->orderByData != NULL)
+               pfree(scan->orderByData);
 
        pfree(scan);
 }
@@ -286,7 +285,8 @@ systable_beginscan(Relation heapRelation,
                }
 
                sysscan->iscan = index_beginscan(heapRelation, irel,
-                                                                                snapshot, nkeys, key);
+                                                                                snapshot, nkeys, 0);
+               index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
                sysscan->scan = NULL;
        }
        else
@@ -450,7 +450,8 @@ systable_beginscan_ordered(Relation heapRelation,
        }
 
        sysscan->iscan = index_beginscan(heapRelation, indexRelation,
-                                                                        snapshot, nkeys, key);
+                                                                        snapshot, nkeys, 0);
+       index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
        sysscan->scan = NULL;
 
        return sysscan;
index d151ffda8c06d5e76d8903857efd46ac99a38b4a..8c79c6149b60df2e0e9b8484849741da44c81da5 100644 (file)
@@ -114,7 +114,7 @@ do { \
 } while(0)
 
 static IndexScanDesc index_beginscan_internal(Relation indexRelation,
-                                                int nkeys, ScanKey key);
+                                                int nkeys, int norderbys);
 
 
 /* ----------------------------------------------------------------
@@ -213,11 +213,11 @@ IndexScanDesc
 index_beginscan(Relation heapRelation,
                                Relation indexRelation,
                                Snapshot snapshot,
-                               int nkeys, ScanKey key)
+                               int nkeys, int norderbys)
 {
        IndexScanDesc scan;
 
-       scan = index_beginscan_internal(indexRelation, nkeys, key);
+       scan = index_beginscan_internal(indexRelation, nkeys, norderbys);
 
        /*
         * Save additional parameters into the scandesc.  Everything else was set
@@ -238,11 +238,11 @@ index_beginscan(Relation heapRelation,
 IndexScanDesc
 index_beginscan_bitmap(Relation indexRelation,
                                           Snapshot snapshot,
-                                          int nkeys, ScanKey key)
+                                          int nkeys)
 {
        IndexScanDesc scan;
 
-       scan = index_beginscan_internal(indexRelation, nkeys, key);
+       scan = index_beginscan_internal(indexRelation, nkeys, 0);
 
        /*
         * Save additional parameters into the scandesc.  Everything else was set
@@ -258,7 +258,7 @@ index_beginscan_bitmap(Relation indexRelation,
  */
 static IndexScanDesc
 index_beginscan_internal(Relation indexRelation,
-                                                int nkeys, ScanKey key)
+                                                int nkeys, int norderbys)
 {
        IndexScanDesc scan;
        FmgrInfo   *procedure;
@@ -278,7 +278,7 @@ index_beginscan_internal(Relation indexRelation,
                DatumGetPointer(FunctionCall3(procedure,
                                                                          PointerGetDatum(indexRelation),
                                                                          Int32GetDatum(nkeys),
-                                                                         PointerGetDatum(key)));
+                                                                         Int32GetDatum(norderbys)));
 
        return scan;
 }
@@ -286,23 +286,28 @@ index_beginscan_internal(Relation indexRelation,
 /* ----------------
  *             index_rescan  - (re)start a scan of an index
  *
- * The caller may specify a new set of scankeys (but the number of keys
- * cannot change).     To restart the scan without changing keys, pass NULL
- * for the key array.
- *
- * Note that this is also called when first starting an indexscan;
- * see RelationGetIndexScan.  Keys *must* be passed in that case,
- * unless scan->numberOfKeys is zero.
+ * During a restart, the caller may specify a new set of scankeys and/or
+ * orderbykeys; but the number of keys cannot differ from what index_beginscan
+ * was told.  (Later we might relax that to "must not exceed", but currently
+ * the index AMs tend to assume that scan->numberOfKeys is what to believe.)
+ * To restart the scan without changing keys, pass NULL for the key arrays.
+ * (Of course, keys *must* be passed on the first call, unless
+ * scan->numberOfKeys is zero.)
  * ----------------
  */
 void
-index_rescan(IndexScanDesc scan, ScanKey key)
+index_rescan(IndexScanDesc scan,
+                        ScanKey keys, int nkeys,
+                        ScanKey orderbys, int norderbys)
 {
        FmgrInfo   *procedure;
 
        SCAN_CHECKS;
        GET_SCAN_PROCEDURE(amrescan);
 
+       Assert(nkeys == scan->numberOfKeys);
+       Assert(norderbys == scan->numberOfOrderBys);
+
        /* Release any held pin on a heap page */
        if (BufferIsValid(scan->xs_cbuf))
        {
@@ -314,9 +319,12 @@ index_rescan(IndexScanDesc scan, ScanKey key)
 
        scan->kill_prior_tuple = false;         /* for safety */
 
-       FunctionCall2(procedure,
+       FunctionCall5(procedure,
                                  PointerGetDatum(scan),
-                                 PointerGetDatum(key));
+                                 PointerGetDatum(keys),
+                                 Int32GetDatum(nkeys),
+                                 PointerGetDatum(orderbys),
+                                 Int32GetDatum(norderbys));
 }
 
 /* ----------------
index 46aeb9e6adb1953b64e0451ed204158b11603c09..655a40090e98cb27bd82f9c1f9dee1aa72913c49 100644 (file)
@@ -337,12 +337,27 @@ Datum
 btbeginscan(PG_FUNCTION_ARGS)
 {
        Relation        rel = (Relation) PG_GETARG_POINTER(0);
-       int                     keysz = PG_GETARG_INT32(1);
-       ScanKey         scankey = (ScanKey) PG_GETARG_POINTER(2);
+       int                     nkeys = PG_GETARG_INT32(1);
+       int                     norderbys = PG_GETARG_INT32(2);
        IndexScanDesc scan;
+       BTScanOpaque so;
+
+       /* no order by operators allowed */
+       Assert(norderbys == 0);
 
        /* get the scan */
-       scan = RelationGetIndexScan(rel, keysz, scankey);
+       scan = RelationGetIndexScan(rel, nkeys, norderbys);
+
+       /* allocate private workspace */
+       so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
+       so->currPos.buf = so->markPos.buf = InvalidBuffer;
+       if (scan->numberOfKeys > 0)
+               so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
+       else
+               so->keyData = NULL;
+       so->killedItems = NULL;         /* until needed */
+       so->numKilled = 0;
+       scan->opaque = so;
 
        PG_RETURN_POINTER(scan);
 }
@@ -355,22 +370,8 @@ btrescan(PG_FUNCTION_ARGS)
 {
        IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
        ScanKey         scankey = (ScanKey) PG_GETARG_POINTER(1);
-       BTScanOpaque so;
-
-       so = (BTScanOpaque) scan->opaque;
-
-       if (so == NULL)                         /* if called from btbeginscan */
-       {
-               so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
-               so->currPos.buf = so->markPos.buf = InvalidBuffer;
-               if (scan->numberOfKeys > 0)
-                       so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
-               else
-                       so->keyData = NULL;
-               so->killedItems = NULL; /* until needed */
-               so->numKilled = 0;
-               scan->opaque = so;
-       }
+       /* remaining arguments are ignored */
+       BTScanOpaque so = (BTScanOpaque) scan->opaque;
 
        /* we aren't holding any read locks, but gotta drop the pins */
        if (BTScanPosIsValid(so->currPos))
index bb7cd746b1b51621a7586e3febdcb134cafadd3a..e1dbd6d985bd88496a27b76c86488493c8d0f61d 100644 (file)
@@ -875,8 +875,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
        if (OldIndex != NULL && !use_sort)
        {
                heapScan = NULL;
-               indexScan = index_beginscan(OldHeap, OldIndex,
-                                                                       SnapshotAny, 0, (ScanKey) NULL);
+               indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
+               index_rescan(indexScan, NULL, 0, NULL, 0);
        }
        else
        {
index a5e44c046f745fdc38e9d299dccfc7185e7a695c..81885b4fb74bcb97c4e44d8528a57d6481ed2054 100644 (file)
@@ -1017,6 +1017,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
                case T_IndexScan:
                        show_scan_qual(((IndexScan *) plan)->indexqualorig,
                                                   "Index Cond", planstate, ancestors, es);
+                       show_scan_qual(((IndexScan *) plan)->indexorderbyorig,
+                                                  "Order By", planstate, ancestors, es);
                        show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
                        break;
                case T_BitmapIndexScan:
index 27ea91c0140616e7c5c0866b271106f93db0bfe8..6bac6d062360e33e1f3e2e1e4505eca9d43343ca 100644 (file)
@@ -4694,7 +4694,7 @@ ExecInitExpr(Expr *node, PlanState *parent)
                                        Oid                     righttype;
                                        Oid                     proc;
 
-                                       get_op_opfamily_properties(opno, opfamily,
+                                       get_op_opfamily_properties(opno, opfamily, false,
                                                                                           &strategy,
                                                                                           &lefttype,
                                                                                           &righttype);
index 57806ca8f0f2f9e2be2e32cccc3bb9087fe0e6d1..6ad0f1e52addf7c665914a68c0e02c17ddaa157a 100644 (file)
@@ -1211,8 +1211,8 @@ check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
 retry:
        conflict = false;
        found_self = false;
-       index_scan = index_beginscan(heap, index, &DirtySnapshot,
-                                                                index_natts, scankeys);
+       index_scan = index_beginscan(heap, index, &DirtySnapshot, index_natts, 0);
+       index_rescan(index_scan, scankeys, index_natts, NULL, 0);
 
        while ((tup = index_getnext(index_scan,
                                                                ForwardScanDirection)) != NULL)
index 97ce0dde2947ad6823dd01c7214b50ce98cd5fd0..573e294882cb63021bbfc72c7f76a2deec27a37e 100644 (file)
@@ -95,7 +95,9 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node)
                doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys,
                                                                                   node->biss_NumArrayKeys);
                if (doscan)                             /* reset index scan */
-                       index_rescan(node->biss_ScanDesc, node->biss_ScanKeys);
+                       index_rescan(node->biss_ScanDesc,
+                                                node->biss_ScanKeys, node->biss_NumScanKeys,
+                                                NULL, 0);
        }
 
        /* must provide our own instrumentation support */
@@ -147,7 +149,9 @@ ExecReScanBitmapIndexScan(BitmapIndexScanState *node)
 
        /* reset index scan */
        if (node->biss_RuntimeKeysReady)
-               index_rescan(node->biss_ScanDesc, node->biss_ScanKeys);
+               index_rescan(node->biss_ScanDesc,
+                                        node->biss_ScanKeys, node->biss_NumScanKeys,
+                                        NULL, 0);
 }
 
 /* ----------------------------------------------------------------
@@ -256,6 +260,8 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
         * Initialize index-specific scan state
         */
        indexstate->biss_RuntimeKeysReady = false;
+       indexstate->biss_RuntimeKeys = NULL;
+       indexstate->biss_NumRuntimeKeys = 0;
 
        /*
         * build the index scan keys from the index qualification
@@ -264,6 +270,7 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
                                                   indexstate->biss_RelationDesc,
                                                   node->scan.scanrelid,
                                                   node->indexqual,
+                                                  false,
                                                   &indexstate->biss_ScanKeys,
                                                   &indexstate->biss_NumScanKeys,
                                                   &indexstate->biss_RuntimeKeys,
@@ -297,8 +304,17 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
        indexstate->biss_ScanDesc =
                index_beginscan_bitmap(indexstate->biss_RelationDesc,
                                                           estate->es_snapshot,
-                                                          indexstate->biss_NumScanKeys,
-                                                          indexstate->biss_ScanKeys);
+                                                          indexstate->biss_NumScanKeys);
+
+       /*
+        * If no run-time keys to calculate, go ahead and pass the scankeys to
+        * the index AM.
+        */
+       if (indexstate->biss_NumRuntimeKeys == 0 &&
+               indexstate->biss_NumArrayKeys == 0)
+               index_rescan(indexstate->biss_ScanDesc,
+                                        indexstate->biss_ScanKeys, indexstate->biss_NumScanKeys,
+                                        NULL, 0);
 
        /*
         * all done.
index ee5fc72c209ef62cdf33bfa1fe6f1601ffb97743..3aed2960d3fa017ab2da7ac2af345baaee33cd24 100644 (file)
@@ -181,7 +181,9 @@ ExecReScanIndexScan(IndexScanState *node)
        node->iss_RuntimeKeysReady = true;
 
        /* reset index scan */
-       index_rescan(node->iss_ScanDesc, node->iss_ScanKeys);
+       index_rescan(node->iss_ScanDesc,
+                                node->iss_ScanKeys, node->iss_NumScanKeys,
+                                node->iss_OrderByKeys, node->iss_NumOrderByKeys);
 
        ExecScanReScan(&node->ss);
 }
@@ -480,10 +482,11 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
         * initialize child expressions
         *
         * Note: we don't initialize all of the indexqual expression, only the
-        * sub-parts corresponding to runtime keys (see below).  The indexqualorig
-        * expression is always initialized even though it will only be used in
-        * some uncommon cases --- would be nice to improve that.  (Problem is
-        * that any SubPlans present in the expression must be found now...)
+        * sub-parts corresponding to runtime keys (see below).  Likewise for
+        * indexorderby, if any.  But the indexqualorig expression is always
+        * initialized even though it will only be used in some uncommon cases ---
+        * would be nice to improve that.  (Problem is that any SubPlans present
+        * in the expression must be found now...)
         */
        indexstate->ss.ps.targetlist = (List *)
                ExecInitExpr((Expr *) node->scan.plan.targetlist,
@@ -543,6 +546,8 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
         * Initialize index-specific scan state
         */
        indexstate->iss_RuntimeKeysReady = false;
+       indexstate->iss_RuntimeKeys = NULL;
+       indexstate->iss_NumRuntimeKeys = 0;
 
        /*
         * build the index scan keys from the index qualification
@@ -551,6 +556,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
                                                   indexstate->iss_RelationDesc,
                                                   node->scan.scanrelid,
                                                   node->indexqual,
+                                                  false,
                                                   &indexstate->iss_ScanKeys,
                                                   &indexstate->iss_NumScanKeys,
                                                   &indexstate->iss_RuntimeKeys,
@@ -558,6 +564,21 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
                                                   NULL,        /* no ArrayKeys */
                                                   NULL);
 
+       /*
+        * any ORDER BY exprs have to be turned into scankeys in the same way
+        */
+       ExecIndexBuildScanKeys((PlanState *) indexstate,
+                                                  indexstate->iss_RelationDesc,
+                                                  node->scan.scanrelid,
+                                                  node->indexorderby,
+                                                  true,
+                                                  &indexstate->iss_OrderByKeys,
+                                                  &indexstate->iss_NumOrderByKeys,
+                                                  &indexstate->iss_RuntimeKeys,
+                                                  &indexstate->iss_NumRuntimeKeys,
+                                                  NULL,        /* no ArrayKeys */
+                                                  NULL);
+
        /*
         * If we have runtime keys, we need an ExprContext to evaluate them. The
         * node's standard context won't do because we want to reset that context
@@ -584,7 +605,16 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
                                                                                           indexstate->iss_RelationDesc,
                                                                                           estate->es_snapshot,
                                                                                           indexstate->iss_NumScanKeys,
-                                                                                          indexstate->iss_ScanKeys);
+                                                                                          indexstate->iss_NumOrderByKeys);
+
+       /*
+        * If no run-time keys to calculate, go ahead and pass the scankeys to
+        * the index AM.
+        */
+       if (indexstate->iss_NumRuntimeKeys == 0)
+               index_rescan(indexstate->iss_ScanDesc,
+                                        indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys,
+                                        indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys);
 
        /*
         * all done.
@@ -624,12 +654,20 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
  * 5. NullTest ("indexkey IS NULL/IS NOT NULL").  We just fill in the
  * ScanKey properly.
  *
+ * This code is also used to prepare ORDER BY expressions for amcanorderbyop
+ * indexes.  The behavior is exactly the same, except that we have to look up
+ * the operator differently.  Note that only cases 1 and 2 are currently
+ * possible for ORDER BY.
+ *
  * Input params are:
  *
  * planstate: executor state node we are working for
  * index: the index we are building scan keys for
  * scanrelid: varno of the index's relation within current query
- * quals: indexquals expressions
+ * quals: indexquals (or indexorderbys) expressions
+ * isorderby: true if processing ORDER BY exprs, false if processing quals
+ * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none
+ * *numRuntimeKeys: number of pre-existing runtime keys
  *
  * Output params are:
  *
@@ -645,7 +683,8 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
  */
 void
 ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
-                                          List *quals, ScanKey *scanKeys, int *numScanKeys,
+                                          List *quals, bool isorderby,
+                                          ScanKey *scanKeys, int *numScanKeys,
                                           IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys,
                                           IndexArrayKeyInfo **arrayKeys, int *numArrayKeys)
 {
@@ -654,42 +693,30 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
        IndexRuntimeKeyInfo *runtime_keys;
        IndexArrayKeyInfo *array_keys;
        int                     n_scan_keys;
-       int                     extra_scan_keys;
        int                     n_runtime_keys;
+       int                     max_runtime_keys;
        int                     n_array_keys;
        int                     j;
 
+       /* Allocate array for ScanKey structs: one per qual */
+       n_scan_keys = list_length(quals);
+       scan_keys = (ScanKey) palloc(n_scan_keys * sizeof(ScanKeyData));
+
        /*
-        * If there are any RowCompareExpr quals, we need extra ScanKey entries
-        * for them, and possibly extra runtime-key entries.  Count up what's
-        * needed.      (The subsidiary ScanKey arrays for the RowCompareExprs could
-        * be allocated as separate chunks, but we have to count anyway to make
-        * runtime_keys large enough, so might as well just do one palloc.)
+        * runtime_keys array is dynamically resized as needed.  We handle it
+        * this way so that the same runtime keys array can be shared between
+        * indexquals and indexorderbys, which will be processed in separate
+        * calls of this function.  Caller must be sure to pass in NULL/0 for
+        * first call.
         */
-       n_scan_keys = list_length(quals);
-       extra_scan_keys = 0;
-       foreach(qual_cell, quals)
-       {
-               if (IsA(lfirst(qual_cell), RowCompareExpr))
-                       extra_scan_keys +=
-                               list_length(((RowCompareExpr *) lfirst(qual_cell))->opnos);
-       }
-       scan_keys = (ScanKey)
-               palloc((n_scan_keys + extra_scan_keys) * sizeof(ScanKeyData));
-       /* Allocate these arrays as large as they could possibly need to be */
-       runtime_keys = (IndexRuntimeKeyInfo *)
-               palloc((n_scan_keys + extra_scan_keys) * sizeof(IndexRuntimeKeyInfo));
+       runtime_keys = *runtimeKeys;
+       n_runtime_keys = max_runtime_keys = *numRuntimeKeys;
+
+       /* Allocate array_keys as large as it could possibly need to be */
        array_keys = (IndexArrayKeyInfo *)
                palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo));
-       n_runtime_keys = 0;
        n_array_keys = 0;
 
-       /*
-        * Below here, extra_scan_keys is index of first cell to use for next
-        * RowCompareExpr
-        */
-       extra_scan_keys = n_scan_keys;
-
        /*
         * for each opclause in the given qual, convert the opclause into a single
         * scan key
@@ -742,11 +769,14 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
                         */
                        opfamily = index->rd_opfamily[varattno - 1];
 
-                       get_op_opfamily_properties(opno, opfamily,
+                       get_op_opfamily_properties(opno, opfamily, isorderby,
                                                                           &op_strategy,
                                                                           &op_lefttype,
                                                                           &op_righttype);
 
+                       if (isorderby)
+                               flags |= SK_ORDER_BY;
+
                        /*
                         * rightop is the constant or variable comparison value
                         */
@@ -767,6 +797,21 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
                        else
                        {
                                /* Need to treat this one as a runtime key */
+                               if (n_runtime_keys >= max_runtime_keys)
+                               {
+                                       if (max_runtime_keys == 0)
+                                       {
+                                               max_runtime_keys = 8;
+                                               runtime_keys = (IndexRuntimeKeyInfo *)
+                                                       palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
+                                       }
+                                       else
+                                       {
+                                               max_runtime_keys *= 2;
+                                               runtime_keys = (IndexRuntimeKeyInfo *)
+                                                       repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
+                                       }
+                               }
                                runtime_keys[n_runtime_keys].scan_key = this_scan_key;
                                runtime_keys[n_runtime_keys].key_expr =
                                        ExecInitExpr(rightop, planstate);
@@ -794,12 +839,19 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
                        ListCell   *largs_cell = list_head(rc->largs);
                        ListCell   *rargs_cell = list_head(rc->rargs);
                        ListCell   *opnos_cell = list_head(rc->opnos);
-                       ScanKey         first_sub_key = &scan_keys[extra_scan_keys];
+                       ScanKey         first_sub_key;
+                       int                     n_sub_key;
+
+                       Assert(!isorderby);
+
+                       first_sub_key = (ScanKey)
+                               palloc(list_length(rc->opnos) * sizeof(ScanKeyData));
+                       n_sub_key = 0;
 
                        /* Scan RowCompare columns and generate subsidiary ScanKey items */
                        while (opnos_cell != NULL)
                        {
-                               ScanKey         this_sub_key = &scan_keys[extra_scan_keys];
+                               ScanKey         this_sub_key = &first_sub_key[n_sub_key];
                                int                     flags = SK_ROW_MEMBER;
                                Datum           scanvalue;
 
@@ -832,7 +884,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
                                        elog(ERROR, "bogus RowCompare index qualification");
                                opfamily = index->rd_opfamily[varattno - 1];
 
-                               get_op_opfamily_properties(opno, opfamily,
+                               get_op_opfamily_properties(opno, opfamily, isorderby,
                                                                                   &op_strategy,
                                                                                   &op_lefttype,
                                                                                   &op_righttype);
@@ -866,6 +918,21 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
                                else
                                {
                                        /* Need to treat this one as a runtime key */
+                                       if (n_runtime_keys >= max_runtime_keys)
+                                       {
+                                               if (max_runtime_keys == 0)
+                                               {
+                                                       max_runtime_keys = 8;
+                                                       runtime_keys = (IndexRuntimeKeyInfo *)
+                                                               palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
+                                               }
+                                               else
+                                               {
+                                                       max_runtime_keys *= 2;
+                                                       runtime_keys = (IndexRuntimeKeyInfo *)
+                                                               repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
+                                               }
+                                       }
                                        runtime_keys[n_runtime_keys].scan_key = this_sub_key;
                                        runtime_keys[n_runtime_keys].key_expr =
                                                ExecInitExpr(rightop, planstate);
@@ -885,11 +952,11 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
                                                                           op_righttype,        /* strategy subtype */
                                                                           opfuncid,            /* reg proc to use */
                                                                           scanvalue);          /* constant */
-                               extra_scan_keys++;
+                               n_sub_key++;
                        }
 
                        /* Mark the last subsidiary scankey correctly */
-                       scan_keys[extra_scan_keys - 1].sk_flags |= SK_ROW_END;
+                       first_sub_key[n_sub_key - 1].sk_flags |= SK_ROW_END;
 
                        /*
                         * We don't use ScanKeyEntryInitialize for the header because it
@@ -907,6 +974,8 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
                        /* indexkey op ANY (array-expression) */
                        ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
 
+                       Assert(!isorderby);
+
                        Assert(saop->useOr);
                        opno = saop->opno;
                        opfuncid = saop->opfuncid;
@@ -935,7 +1004,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
                         */
                        opfamily = index->rd_opfamily[varattno - 1];
 
-                       get_op_opfamily_properties(opno, opfamily,
+                       get_op_opfamily_properties(opno, opfamily, isorderby,
                                                                           &op_strategy,
                                                                           &op_lefttype,
                                                                           &op_righttype);
@@ -973,6 +1042,8 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
                        NullTest   *ntest = (NullTest *) clause;
                        int                     flags;
 
+                       Assert(!isorderby);
+
                        /*
                         * argument should be the index key Var, possibly relabeled
                         */
@@ -1020,12 +1091,9 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
                                 (int) nodeTag(clause));
        }
 
+       Assert(n_runtime_keys <= max_runtime_keys);
+
        /* Get rid of any unused arrays */
-       if (n_runtime_keys == 0)
-       {
-               pfree(runtime_keys);
-               runtime_keys = NULL;
-       }
        if (n_array_keys == 0)
        {
                pfree(array_keys);
index e8ce5bc02b39644011064a2f9be83f01a6402fc7..98d1615514bfdfdff7299b24198a4bcac15c41f7 100644 (file)
@@ -201,7 +201,7 @@ MJExamineQuals(List *mergeclauses,
                clause->rexpr = ExecInitExpr((Expr *) lsecond(qual->args), parent);
 
                /* Extract the operator's declared left/right datatypes */
-               get_op_opfamily_properties(qual->opno, opfamily,
+               get_op_opfamily_properties(qual->opno, opfamily, false,
                                                                   &op_strategy,
                                                                   &op_lefttype,
                                                                   &op_righttype);
index 0e0b4dc598a1502d0316557f38f4752b203dc323..4506518768d85bcde2698439d55c34d3a07dbb3b 100644 (file)
@@ -363,6 +363,8 @@ _copyIndexScan(IndexScan *from)
        COPY_SCALAR_FIELD(indexid);
        COPY_NODE_FIELD(indexqual);
        COPY_NODE_FIELD(indexqualorig);
+       COPY_NODE_FIELD(indexorderby);
+       COPY_NODE_FIELD(indexorderbyorig);
        COPY_SCALAR_FIELD(indexorderdir);
 
        return newnode;
index afbfccabda50c1b8c30735e5fb469f4deeb4afa6..5d09e16477d18f3e56328bc3d020846921d98cf9 100644 (file)
@@ -439,6 +439,8 @@ _outIndexScan(StringInfo str, IndexScan *node)
        WRITE_OID_FIELD(indexid);
        WRITE_NODE_FIELD(indexqual);
        WRITE_NODE_FIELD(indexqualorig);
+       WRITE_NODE_FIELD(indexorderby);
+       WRITE_NODE_FIELD(indexorderbyorig);
        WRITE_ENUM_FIELD(indexorderdir, ScanDirection);
 }
 
@@ -1424,6 +1426,7 @@ _outIndexPath(StringInfo str, IndexPath *node)
        WRITE_NODE_FIELD(indexinfo);
        WRITE_NODE_FIELD(indexclauses);
        WRITE_NODE_FIELD(indexquals);
+       WRITE_NODE_FIELD(indexorderbys);
        WRITE_BOOL_FIELD(isjoininner);
        WRITE_ENUM_FIELD(indexscandir, ScanDirection);
        WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
index 0724f9a6c9cefdfac57661556b71d24c1b2d94ac..e6edbdb1e845a35203072ac5648f580a01176905 100644 (file)
@@ -209,6 +209,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
  *
  * 'index' is the index to be used
  * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
+ * 'indexOrderBys' is the list of ORDER BY operators for amcanorderbyop indexes
  * 'outer_rel' is the outer relation when we are considering using the index
  *             scan as the inside of a nestloop join (hence, some of the indexQuals
  *             are join clauses, and we should expect repeated scans of the index);
@@ -218,18 +219,19 @@ cost_seqscan(Path *path, PlannerInfo *root,
  * additional fields of the IndexPath besides startup_cost and total_cost.
  * These fields are needed if the IndexPath is used in a BitmapIndexScan.
  *
+ * indexQuals is a list of RestrictInfo nodes, but indexOrderBys is a list of
+ * bare expressions.
+ *
  * NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
  * Any additional quals evaluated as qpquals may reduce the number of returned
  * tuples, but they won't reduce the number of tuples we have to fetch from
  * the table, so they don't reduce the scan cost.
- *
- * NOTE: as of 8.0, indexQuals is a list of RestrictInfo nodes, where formerly
- * it was a list of bare clause expressions.
  */
 void
 cost_index(IndexPath *path, PlannerInfo *root,
                   IndexOptInfo *index,
                   List *indexQuals,
+                  List *indexOrderBys,
                   RelOptInfo *outer_rel)
 {
        RelOptInfo *baserel = index->rel;
@@ -263,10 +265,11 @@ cost_index(IndexPath *path, PlannerInfo *root,
         * the fraction of main-table tuples we will have to retrieve) and its
         * correlation to the main-table tuple order.
         */
-       OidFunctionCall8(index->amcostestimate,
+       OidFunctionCall9(index->amcostestimate,
                                         PointerGetDatum(root),
                                         PointerGetDatum(index),
                                         PointerGetDatum(indexQuals),
+                                        PointerGetDatum(indexOrderBys),
                                         PointerGetDatum(outer_rel),
                                         PointerGetDatum(&indexStartupCost),
                                         PointerGetDatum(&indexTotalCost),
index f73e0e6dc6007ce768828480f74621752aaf57d2..90ccb3928b99061a64dce59d2ea214757b2e5255 100644 (file)
@@ -89,6 +89,9 @@ static bool match_rowcompare_to_indexcol(IndexOptInfo *index,
                                                         Oid opfamily,
                                                         RowCompareExpr *clause,
                                                         Relids outer_relids);
+static List *match_index_to_pathkeys(IndexOptInfo *index, List *pathkeys);
+static Expr *match_clause_to_ordering_op(IndexOptInfo *index,
+                                                       int indexcol, Expr *clause, Oid pk_opfamily);
 static Relids indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel);
 static bool matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel,
                                  Relids outer_relids);
@@ -286,6 +289,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
                IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
                IndexPath  *ipath;
                List       *restrictclauses;
+               List       *orderbyclauses;
                List       *index_pathkeys;
                List       *useful_pathkeys;
                bool            useful_predicate;
@@ -388,9 +392,24 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
                                                                                                  ForwardScanDirection);
                        useful_pathkeys = truncate_useless_pathkeys(root, rel,
                                                                                                                index_pathkeys);
+                       orderbyclauses = NIL;
+               }
+               else if (index->amcanorderbyop && possibly_useful_pathkeys &&
+                                istoplevel && outer_rel == NULL && scantype != ST_BITMAPSCAN)
+               {
+                       /* see if we can generate ordering operators for query_pathkeys */
+                       orderbyclauses = match_index_to_pathkeys(index,
+                                                                                                        root->query_pathkeys);
+                       if (orderbyclauses)
+                               useful_pathkeys = root->query_pathkeys;
+                       else
+                               useful_pathkeys = NIL;
                }
                else
+               {
                        useful_pathkeys = NIL;
+                       orderbyclauses = NIL;
+               }
 
                /*
                 * 3. Generate an indexscan path if there are relevant restriction
@@ -402,6 +421,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
                {
                        ipath = create_index_path(root, index,
                                                                          restrictclauses,
+                                                                         orderbyclauses,
                                                                          useful_pathkeys,
                                                                          index_is_ordered ?
                                                                          ForwardScanDirection :
@@ -425,6 +445,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
                        {
                                ipath = create_index_path(root, index,
                                                                                  restrictclauses,
+                                                                                 NIL,
                                                                                  useful_pathkeys,
                                                                                  BackwardScanDirection,
                                                                                  outer_rel);
@@ -1384,6 +1405,179 @@ match_rowcompare_to_indexcol(IndexOptInfo *index,
 }
 
 
+/****************************************************************************
+ *                             ----  ROUTINES TO CHECK ORDERING OPERATORS  ----
+ ****************************************************************************/
+
+/*
+ * match_index_to_pathkeys
+ *             Test whether an index can produce output ordered according to the
+ *             given pathkeys using "ordering operators".
+ *
+ * If it can, return a list of suitable ORDER BY expressions, each of the form
+ * "indexedcol operator pseudoconstant".  If not, return NIL.
+ */
+static List *
+match_index_to_pathkeys(IndexOptInfo *index, List *pathkeys)
+{
+       List       *orderbyexprs = NIL;
+       ListCell   *lc1;
+
+       /* Only indexes with the amcanorderbyop property are interesting here */
+       if (!index->amcanorderbyop)
+               return NIL;
+
+       foreach(lc1, pathkeys)
+       {
+               PathKey    *pathkey = (PathKey *) lfirst(lc1);
+               bool            found = false;
+               ListCell   *lc2;
+
+               /*
+                * Note: for any failure to match, we just return NIL immediately.
+                * There is no value in matching just some of the pathkeys.
+                */
+
+               /* Pathkey must request default sort order for the target opfamily */
+               if (pathkey->pk_strategy != BTLessStrategyNumber ||
+                       pathkey->pk_nulls_first)
+                       return NIL;
+
+               /* If eclass is volatile, no hope of using an indexscan */
+               if (pathkey->pk_eclass->ec_has_volatile)
+                       return NIL;
+
+               /* Try to match eclass member expression(s) to index */
+               foreach(lc2, pathkey->pk_eclass->ec_members)
+               {
+                       EquivalenceMember *member = (EquivalenceMember *) lfirst(lc2);
+                       int             indexcol;
+
+                       /* No possibility of match if it references other relations */
+                       if (!bms_equal(member->em_relids, index->rel->relids))
+                               continue;
+
+                       for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+                       {
+                               Expr   *expr;
+
+                               expr = match_clause_to_ordering_op(index,
+                                                                                                  indexcol,
+                                                                                                  member->em_expr,
+                                                                                                  pathkey->pk_opfamily);
+                               if (expr)
+                               {
+                                       orderbyexprs = lappend(orderbyexprs, expr);
+                                       found = true;
+                                       break;
+                               }
+                       }
+
+                       if (found)                      /* don't want to look at remaining members */
+                               break;
+               }
+
+               if (!found)                             /* fail if no match for this pathkey */
+                       return NIL;
+       }
+
+       return orderbyexprs;            /* success! */
+}
+
+/*
+ * match_clause_to_ordering_op
+ *       Determines whether an ordering operator expression matches an
+ *       index column.
+ *
+ *       This is similar to, but simpler than, match_clause_to_indexcol.
+ *       We only care about simple OpExpr cases.  The input is a bare
+ *       expression that is being ordered by, which must be of the form
+ *       (indexkey op const) or (const op indexkey) where op is an ordering
+ *       operator for the column's opfamily.
+ *
+ * 'index' is the index of interest.
+ * 'indexcol' is a column number of 'index' (counting from 0).
+ * 'clause' is the ordering expression to be tested.
+ * 'pk_opfamily' is the btree opfamily describing the required sort order.
+ *
+ * If successful, return 'clause' as-is if the indexkey is on the left,
+ * otherwise a commuted copy of 'clause'.  If no match, return NULL.
+ */
+static Expr *
+match_clause_to_ordering_op(IndexOptInfo *index,
+                                                       int indexcol,
+                                                       Expr *clause,
+                                                       Oid pk_opfamily)
+{
+       Oid                     opfamily = index->opfamily[indexcol];
+       Node       *leftop,
+                          *rightop;
+       Oid                     expr_op;
+       Oid                     sortfamily;
+       bool            commuted;
+
+       /*
+        * Clause must be a binary opclause.
+        */
+       if (!is_opclause(clause))
+               return NULL;
+       leftop = get_leftop(clause);
+       rightop = get_rightop(clause);
+       if (!leftop || !rightop)
+               return NULL;
+       expr_op = ((OpExpr *) clause)->opno;
+
+       /*
+        * Check for clauses of the form: (indexkey operator constant) or
+        * (constant operator indexkey).
+        */
+       if (match_index_to_operand(leftop, indexcol, index) &&
+               !contain_var_clause(rightop) &&
+               !contain_volatile_functions(rightop))
+       {
+               commuted = false;
+       }
+       else if (match_index_to_operand(rightop, indexcol, index) &&
+                        !contain_var_clause(leftop) &&
+                        !contain_volatile_functions(leftop))
+       {
+               /* Might match, but we need a commuted operator */
+               expr_op = get_commutator(expr_op);
+               if (expr_op == InvalidOid)
+                       return NULL;
+               commuted = true;
+       }
+       else
+               return NULL;
+
+       /*
+        * Is the (commuted) operator an ordering operator for the opfamily?
+        * And if so, does it yield the right sorting semantics?
+        */
+       sortfamily = get_op_opfamily_sortfamily(expr_op, opfamily);
+       if (sortfamily != pk_opfamily)
+               return NULL;
+
+       /* We have a match.  Return clause or a commuted version thereof. */
+       if (commuted)
+       {
+               OpExpr     *newclause = makeNode(OpExpr);
+
+               /* flat-copy all the fields of clause */
+               memcpy(newclause, clause, sizeof(OpExpr));
+
+               /* commute it */
+               newclause->opno = expr_op;
+               newclause->opfuncid = InvalidOid;
+               newclause->args = list_make2(rightop, leftop);
+
+               clause = (Expr *) newclause;
+       }
+
+       return clause;
+}
+
+
 /****************************************************************************
  *                             ----  ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS      ----
  ****************************************************************************/
@@ -2630,7 +2824,7 @@ expand_indexqual_rowcompare(RestrictInfo *rinfo,
        expr_op = linitial_oid(clause->opnos);
        if (!var_on_left)
                expr_op = get_commutator(expr_op);
-       get_op_opfamily_properties(expr_op, index->opfamily[indexcol],
+       get_op_opfamily_properties(expr_op, index->opfamily[indexcol], false,
                                                           &op_strategy,
                                                           &op_lefttype,
                                                           &op_righttype);
@@ -2698,7 +2892,7 @@ expand_indexqual_rowcompare(RestrictInfo *rinfo,
                        break;
 
                /* Add opfamily and datatypes to lists */
-               get_op_opfamily_properties(expr_op, index->opfamily[i],
+               get_op_opfamily_properties(expr_op, index->opfamily[i], false,
                                                                   &op_strategy,
                                                                   &op_lefttype,
                                                                   &op_righttype);
index 41ad512a2963e91b1efc9371c6885a8596b4c938..1bbf35ed74d193f1e0dd3022480572c9345a0b44 100644 (file)
@@ -81,6 +81,8 @@ static Node *replace_nestloop_params(PlannerInfo *root, Node *expr);
 static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root);
 static List *fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
                                                 List *indexquals);
+static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path,
+                                                       List *indexorderbys);
 static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index);
 static List *get_switched_clauses(List *clauses, Relids outerrelids);
 static List *order_qual_clauses(PlannerInfo *root, List *clauses);
@@ -89,6 +91,7 @@ static void copy_plan_costsize(Plan *dest, Plan *src);
 static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
 static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
                           Oid indexid, List *indexqual, List *indexqualorig,
+                          List *indexorderby, List *indexorderbyorig,
                           ScanDirection indexscandir);
 static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid,
                                          List *indexqual,
@@ -1028,11 +1031,13 @@ create_indexscan_plan(PlannerInfo *root,
                                          List *scan_clauses)
 {
        List       *indexquals = best_path->indexquals;
+       List       *indexorderbys = best_path->indexorderbys;
        Index           baserelid = best_path->path.parent->relid;
        Oid                     indexoid = best_path->indexinfo->indexoid;
        List       *qpqual;
        List       *stripped_indexquals;
        List       *fixed_indexquals;
+       List       *fixed_indexorderbys;
        ListCell   *l;
        IndexScan  *scan_plan;
 
@@ -1052,6 +1057,11 @@ create_indexscan_plan(PlannerInfo *root,
         */
        fixed_indexquals = fix_indexqual_references(root, best_path, indexquals);
 
+       /*
+        * Likewise fix up index attr references in the ORDER BY expressions.
+        */
+       fixed_indexorderbys = fix_indexorderby_references(root, best_path, indexorderbys);
+
        /*
         * If this is an innerjoin scan, the indexclauses will contain join
         * clauses that are not present in scan_clauses (since the passed-in value
@@ -1123,11 +1133,12 @@ create_indexscan_plan(PlannerInfo *root,
 
        /*
         * We have to replace any outer-relation variables with nestloop params
-        * in the indexqualorig and qpqual expressions.  A bit annoying to have to
-        * do this separately from the processing in fix_indexqual_references ---
-        * rethink this when generalizing the inner indexscan support.  But note
-        * we can't really do this earlier because it'd break the comparisons to
-        * predicates above ... (or would it?  Those wouldn't have outer refs)
+        * in the indexqualorig, qpqual, and indexorderbyorig expressions.  A bit
+        * annoying to have to do this separately from the processing in
+        * fix_indexqual_references --- rethink this when generalizing the inner
+        * indexscan support.  But note we can't really do this earlier because
+        * it'd break the comparisons to predicates above ... (or would it?  Those
+        * wouldn't have outer refs)
         */
        if (best_path->isjoininner)
        {
@@ -1135,6 +1146,8 @@ create_indexscan_plan(PlannerInfo *root,
                        replace_nestloop_params(root, (Node *) stripped_indexquals);
                qpqual = (List *)
                        replace_nestloop_params(root, (Node *) qpqual);
+               indexorderbys = (List *)
+                       replace_nestloop_params(root, (Node *) indexorderbys);
        }
 
        /* Finally ready to build the plan node */
@@ -1144,6 +1157,8 @@ create_indexscan_plan(PlannerInfo *root,
                                                           indexoid,
                                                           fixed_indexquals,
                                                           stripped_indexquals,
+                                                          fixed_indexorderbys,
+                                                          indexorderbys,
                                                           best_path->indexscandir);
 
        copy_path_costsize(&scan_plan->scan.plan, &best_path->path);
@@ -2394,6 +2409,63 @@ fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
        return fixed_indexquals;
 }
 
+/*
+ * fix_indexorderby_references
+ *       Adjust indexorderby clauses to the form the executor's index
+ *       machinery needs.
+ *
+ * This is a simplified version of fix_indexqual_references.  The input does
+ * not have RestrictInfo nodes, and we assume that indxqual.c already
+ * commuted the clauses to put the index keys on the left.  Also, we don't
+ * bother to support any cases except simple OpExprs, since nothing else
+ * is allowed for ordering operators.
+ */
+static List *
+fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path,
+                                                       List *indexorderbys)
+{
+       IndexOptInfo *index = index_path->indexinfo;
+       List       *fixed_indexorderbys;
+       ListCell   *l;
+
+       fixed_indexorderbys = NIL;
+
+       foreach(l, indexorderbys)
+       {
+               Node       *clause = (Node *) lfirst(l);
+
+               /*
+                * Replace any outer-relation variables with nestloop params.
+                *
+                * This also makes a copy of the clause, so it's safe to modify it
+                * in-place below.
+                */
+               clause = replace_nestloop_params(root, clause);
+
+               if (IsA(clause, OpExpr))
+               {
+                       OpExpr     *op = (OpExpr *) clause;
+
+                       if (list_length(op->args) != 2)
+                               elog(ERROR, "indexorderby clause is not binary opclause");
+
+                       /*
+                        * Now, determine which index attribute this is and change the
+                        * indexkey operand as needed.
+                        */
+                       linitial(op->args) = fix_indexqual_operand(linitial(op->args),
+                                                                                                          index);
+               }
+               else
+                       elog(ERROR, "unsupported indexorderby type: %d",
+                                (int) nodeTag(clause));
+
+               fixed_indexorderbys = lappend(fixed_indexorderbys, clause);
+       }
+
+       return fixed_indexorderbys;
+}
+
 /*
  * fix_indexqual_operand
  *       Convert an indexqual expression to a Var referencing the index column.
@@ -2685,6 +2757,8 @@ make_indexscan(List *qptlist,
                           Oid indexid,
                           List *indexqual,
                           List *indexqualorig,
+                          List *indexorderby,
+                          List *indexorderbyorig,
                           ScanDirection indexscandir)
 {
        IndexScan  *node = makeNode(IndexScan);
@@ -2699,6 +2773,8 @@ make_indexscan(List *qptlist,
        node->indexid = indexid;
        node->indexqual = indexqual;
        node->indexqualorig = indexqualorig;
+       node->indexorderby = indexorderby;
+       node->indexorderbyorig = indexorderbyorig;
        node->indexorderdir = indexscandir;
 
        return node;
index a1e59005921eb9fe7b076c24d8c8c25317d3958e..6d0b3dbce95695e14fc2fe180874a66ceb6c1549 100644 (file)
@@ -3135,7 +3135,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
 
        /* Estimate the cost of index scan */
        indexScanPath = create_index_path(root, indexInfo,
-                                                                         NIL, NIL,
+                                                                         NIL, NIL, NIL,
                                                                          ForwardScanDirection, NULL);
 
        return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
index 9aef7fc35a2eb325e21ce95a958e759a8f0b619b..0074679207ac89ea4fd5e042f9be5ad74e559816 100644 (file)
@@ -301,6 +301,10 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset)
                                        fix_scan_list(glob, splan->indexqual, rtoffset);
                                splan->indexqualorig =
                                        fix_scan_list(glob, splan->indexqualorig, rtoffset);
+                               splan->indexorderby =
+                                       fix_scan_list(glob, splan->indexorderby, rtoffset);
+                               splan->indexorderbyorig =
+                                       fix_scan_list(glob, splan->indexorderbyorig, rtoffset);
                        }
                        break;
                case T_BitmapIndexScan:
index 754753cc12d9a8f7f685f4ad1b92ce1b60bef3df..39ef420284d3cdb7b14a3db807113767eb66b2a7 100644 (file)
@@ -1942,10 +1942,13 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
                case T_IndexScan:
                        finalize_primnode((Node *) ((IndexScan *) plan)->indexqual,
                                                          &context);
+                       finalize_primnode((Node *) ((IndexScan *) plan)->indexorderby,
+                                                         &context);
 
                        /*
                         * we need not look at indexqualorig, since it will have the same
-                        * param references as indexqual.
+                        * param references as indexqual.  Likewise, we can ignore
+                        * indexorderbyorig.
                         */
                        context.paramids = bms_add_members(context.paramids, scan_params);
                        break;
index 231d221b21e24661405cc58e9d9da776b2afb879..2439d814ce879b935be1c4a04676ea97908e469c 100644 (file)
@@ -414,6 +414,8 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
  * 'index' is a usable index.
  * 'clause_groups' is a list of lists of RestrictInfo nodes
  *                     to be used as index qual conditions in the scan.
+ * 'indexorderbys' is a list of bare expressions (no RestrictInfos)
+ *                     to be used as index ordering operators in the scan.
  * 'pathkeys' describes the ordering of the path.
  * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
  *                     for an ordered index, or NoMovementScanDirection for
@@ -427,6 +429,7 @@ IndexPath *
 create_index_path(PlannerInfo *root,
                                  IndexOptInfo *index,
                                  List *clause_groups,
+                                 List *indexorderbys,
                                  List *pathkeys,
                                  ScanDirection indexscandir,
                                  RelOptInfo *outer_rel)
@@ -463,6 +466,7 @@ create_index_path(PlannerInfo *root,
        pathnode->indexinfo = index;
        pathnode->indexclauses = allclauses;
        pathnode->indexquals = indexquals;
+       pathnode->indexorderbys = indexorderbys;
 
        pathnode->isjoininner = (outer_rel != NULL);
        pathnode->indexscandir = indexscandir;
@@ -504,7 +508,7 @@ create_index_path(PlannerInfo *root,
                pathnode->rows = rel->rows;
        }
 
-       cost_index(pathnode, root, index, indexquals, outer_rel);
+       cost_index(pathnode, root, index, indexquals, indexorderbys, outer_rel);
 
        return pathnode;
 }
index 95397aa7cee3132db72138443bd7181ff5c32889..ef87f724ae99d0cb6c5a0ef8faf2d554767986af 100644 (file)
@@ -2631,7 +2631,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
        examine_variable(root, right, 0, &rightvar);
 
        /* Extract the operator's declared left/right datatypes */
-       get_op_opfamily_properties(opno, opfamily,
+       get_op_opfamily_properties(opno, opfamily, false,
                                                           &op_strategy,
                                                           &op_lefttype,
                                                           &op_righttype);
@@ -4646,7 +4646,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                        if (min)
                        {
                                index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
-                                                                                        1, scankeys);
+                                                                                        1, 0);
+                               index_rescan(index_scan, scankeys, 1, NULL, 0);
 
                                /* Fetch first tuple in sortop's direction */
                                if ((tup = index_getnext(index_scan,
@@ -4677,7 +4678,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                        if (max && have_data)
                        {
                                index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
-                                                                                        1, scankeys);
+                                                                                        1, 0);
+                               index_rescan(index_scan, scankeys, 1, NULL, 0);
 
                                /* Fetch first tuple in reverse direction */
                                if ((tup = index_getnext(index_scan,
@@ -5644,7 +5646,9 @@ string_to_bytea_const(const char *str, size_t str_len)
 
 static void
 genericcostestimate(PlannerInfo *root,
-                                       IndexOptInfo *index, List *indexQuals,
+                                       IndexOptInfo *index,
+                                       List *indexQuals,
+                                       List *indexOrderBys,
                                        RelOptInfo *outer_rel,
                                        double numIndexTuples,
                                        Cost *indexStartupCost,
@@ -5856,7 +5860,8 @@ genericcostestimate(PlannerInfo *root,
         * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
         * indexqual operator.  Because we have numIndexTuples as a per-scan
         * number, we have to multiply by num_sa_scans to get the correct result
-        * for ScalarArrayOpExpr cases.
+        * for ScalarArrayOpExpr cases.  Similarly add in costs for any index
+        * ORDER BY expressions.
         *
         * Note: this neglects the possible costs of rechecking lossy operators
         * and OR-clause expressions.  Detecting that that might be needed seems
@@ -5864,11 +5869,15 @@ genericcostestimate(PlannerInfo *root,
         * inaccuracies here ...
         */
        cost_qual_eval(&index_qual_cost, indexQuals, root);
-       qual_op_cost = cpu_operator_cost * list_length(indexQuals);
-       qual_arg_cost = index_qual_cost.startup +
-               index_qual_cost.per_tuple - qual_op_cost;
+       qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
+       cost_qual_eval(&index_qual_cost, indexOrderBys, root);
+       qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+       qual_op_cost = cpu_operator_cost *
+               (list_length(indexQuals) + list_length(indexOrderBys));
+       qual_arg_cost -= qual_op_cost;
        if (qual_arg_cost < 0)          /* just in case... */
                qual_arg_cost = 0;
+
        *indexStartupCost = qual_arg_cost;
        *indexTotalCost += qual_arg_cost;
        *indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
@@ -5901,11 +5910,12 @@ btcostestimate(PG_FUNCTION_ARGS)
        PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
        IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
        List       *indexQuals = (List *) PG_GETARG_POINTER(2);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+       List       *indexOrderBys = (List *) PG_GETARG_POINTER(3);
+       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
+       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
+       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
+       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
+       double     *indexCorrelation = (double *) PG_GETARG_POINTER(8);
        Oid                     relid;
        AttrNumber      colnum;
        VariableStatData vardata;
@@ -6082,7 +6092,8 @@ btcostestimate(PG_FUNCTION_ARGS)
                numIndexTuples = rint(numIndexTuples / num_sa_scans);
        }
 
-       genericcostestimate(root, index, indexQuals, outer_rel, numIndexTuples,
+       genericcostestimate(root, index, indexQuals, indexOrderBys,
+                                               outer_rel, numIndexTuples,
                                                indexStartupCost, indexTotalCost,
                                                indexSelectivity, indexCorrelation);
 
@@ -6206,13 +6217,14 @@ hashcostestimate(PG_FUNCTION_ARGS)
        PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
        IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
        List       *indexQuals = (List *) PG_GETARG_POINTER(2);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
-
-       genericcostestimate(root, index, indexQuals, outer_rel, 0.0,
+       List       *indexOrderBys = (List *) PG_GETARG_POINTER(3);
+       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
+       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
+       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
+       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
+       double     *indexCorrelation = (double *) PG_GETARG_POINTER(8);
+
+       genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0,
                                                indexStartupCost, indexTotalCost,
                                                indexSelectivity, indexCorrelation);
 
@@ -6225,13 +6237,14 @@ gistcostestimate(PG_FUNCTION_ARGS)
        PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
        IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
        List       *indexQuals = (List *) PG_GETARG_POINTER(2);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
-
-       genericcostestimate(root, index, indexQuals, outer_rel, 0.0,
+       List       *indexOrderBys = (List *) PG_GETARG_POINTER(3);
+       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
+       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
+       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
+       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
+       double     *indexCorrelation = (double *) PG_GETARG_POINTER(8);
+
+       genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0,
                                                indexStartupCost, indexTotalCost,
                                                indexSelectivity, indexCorrelation);
 
@@ -6262,11 +6275,12 @@ gincostestimate(PG_FUNCTION_ARGS)
        PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
        IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
        List       *indexQuals = (List *) PG_GETARG_POINTER(2);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
-       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-       double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+       List       *indexOrderBys = (List *) PG_GETARG_POINTER(3);
+       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
+       Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
+       Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
+       Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
+       double     *indexCorrelation = (double *) PG_GETARG_POINTER(8);
        ListCell           *l;
        int32              nfullscan = 0;
        List               *selectivityQuals;
@@ -6432,7 +6446,7 @@ gincostestimate(PG_FUNCTION_ARGS)
                 * Get the operator's strategy number and declared input data types
                 * within the index opfamily.
                 */
-               get_op_opfamily_properties(clause_op, index->opfamily[indexcol],
+               get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
                                                                   &strategy_op, &lefttype, &righttype);
 
                /*
@@ -6581,15 +6595,18 @@ gincostestimate(PG_FUNCTION_ARGS)
         * Add on index qual eval costs, much as in genericcostestimate
         */
        cost_qual_eval(&index_qual_cost, indexQuals, root);
-       qual_op_cost = cpu_operator_cost * list_length(indexQuals);
-       qual_arg_cost = index_qual_cost.startup +
-               index_qual_cost.per_tuple - qual_op_cost;
+       qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
+       cost_qual_eval(&index_qual_cost, indexOrderBys, root);
+       qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+       qual_op_cost = cpu_operator_cost *
+               (list_length(indexQuals) + list_length(indexOrderBys));
+       qual_arg_cost -= qual_op_cost;
        if (qual_arg_cost < 0)      /* just in case... */
                qual_arg_cost = 0;
 
        *indexStartupCost += qual_arg_cost;
        *indexTotalCost += qual_arg_cost;
-       *indexTotalCost += ( numTuples * *indexSelectivity ) * (cpu_index_tuple_cost + qual_op_cost);
+       *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
 
        PG_RETURN_VOID();
 }
index 9beae0d9ef18d47e153b0cca816a54455ee8cbdd..cbdfe05031f40b8e458166dd22a2da60e3b2f80c 100644 (file)
@@ -85,19 +85,42 @@ get_op_opfamily_strategy(Oid opno, Oid opfamily)
        return result;
 }
 
+/*
+ * get_op_opfamily_sortfamily
+ *
+ *             If the operator is an ordering operator within the specified opfamily,
+ *             return its amopsortfamily OID; else return InvalidOid.
+ */
+Oid
+get_op_opfamily_sortfamily(Oid opno, Oid opfamily)
+{
+       HeapTuple       tp;
+       Form_pg_amop amop_tup;
+       Oid                     result;
+
+       tp = SearchSysCache3(AMOPOPID,
+                                                ObjectIdGetDatum(opno),
+                                                CharGetDatum(AMOP_ORDER),
+                                                ObjectIdGetDatum(opfamily));
+       if (!HeapTupleIsValid(tp))
+               return InvalidOid;
+       amop_tup = (Form_pg_amop) GETSTRUCT(tp);
+       result = amop_tup->amopsortfamily;
+       ReleaseSysCache(tp);
+       return result;
+}
+
 /*
  * get_op_opfamily_properties
  *
  *             Get the operator's strategy number and declared input data types
  *             within the specified opfamily.
  *
- * This function only considers search operators, not ordering operators.
- *
  * Caller should already have verified that opno is a member of opfamily,
  * therefore we raise an error if the tuple is not found.
  */
 void
-get_op_opfamily_properties(Oid opno, Oid opfamily,
+get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op,
                                                   int *strategy,
                                                   Oid *lefttype,
                                                   Oid *righttype)
@@ -107,7 +130,7 @@ get_op_opfamily_properties(Oid opno, Oid opfamily,
 
        tp = SearchSysCache3(AMOPOPID,
                                                 ObjectIdGetDatum(opno),
-                                                CharGetDatum(AMOP_SEARCH),
+                                                CharGetDatum(ordering_op ? AMOP_ORDER : AMOP_SEARCH),
                                                 ObjectIdGetDatum(opfamily));
        if (!HeapTupleIsValid(tp))
                elog(ERROR, "operator %u is not a member of opfamily %u",
index 48380ef32fef2103356fcb7d1286ccc254807b24..896fb75fddb1549628af744d063d91300abd5aec 100644 (file)
@@ -135,11 +135,13 @@ extern bool index_insert(Relation indexRelation,
 extern IndexScanDesc index_beginscan(Relation heapRelation,
                                Relation indexRelation,
                                Snapshot snapshot,
-                               int nkeys, ScanKey key);
+                               int nkeys, int norderbys);
 extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation,
                                           Snapshot snapshot,
-                                          int nkeys, ScanKey key);
-extern void index_rescan(IndexScanDesc scan, ScanKey key);
+                                          int nkeys);
+extern void index_rescan(IndexScanDesc scan,
+                        ScanKey keys, int nkeys,
+                        ScanKey orderbys, int norderbys);
 extern void index_endscan(IndexScanDesc scan);
 extern void index_markpos(IndexScanDesc scan);
 extern void index_restrpos(IndexScanDesc scan);
@@ -161,7 +163,7 @@ extern FmgrInfo *index_getprocinfo(Relation irel, AttrNumber attnum,
  * index access method support routines (in genam.c)
  */
 extern IndexScanDesc RelationGetIndexScan(Relation indexRelation,
-                                        int nkeys, ScanKey key);
+                                        int nkeys, int norderbys);
 extern void IndexScanEnd(IndexScanDesc scan);
 extern char *BuildIndexValueDescription(Relation indexRelation,
                                                   Datum *values, bool *isnull);
index b4ec01ed12dfdcb0f0745f9c3f917a5c9bad61c1..f412fc3844498081fc9e4913d052195248799414 100644 (file)
@@ -62,8 +62,10 @@ typedef struct IndexScanDescData
        Relation        heapRelation;   /* heap relation descriptor, or NULL */
        Relation        indexRelation;  /* index relation descriptor */
        Snapshot        xs_snapshot;    /* snapshot to see */
-       int                     numberOfKeys;   /* number of scan keys */
-       ScanKey         keyData;                /* array of scan key descriptors */
+       int                     numberOfKeys;   /* number of index qualifier conditions */
+       int                     numberOfOrderBys;       /* number of ordering operators */
+       ScanKey         keyData;                        /* array of index qualifier descriptors */
+       ScanKey         orderByData;            /* array of ordering op descriptors */
 
        /* signaling to index AM about killing index tuples */
        bool            kill_prior_tuple;               /* last-returned tuple is dead */
index fcf81ba6abf8968825c93011ed1c6c48a4f024ea..c30a44bde377e3ce0584bd0f25553594cf88431c 100644 (file)
@@ -60,6 +60,11 @@ typedef uint16 StrategyNumber;
  * supported only for index scans, not heap scans; and not all index AMs
  * support them.
  *
+ * A ScanKey can also represent an ordering operator invocation, that is
+ * an ordering requirement "ORDER BY indexedcol op constant".  This looks
+ * the same as a comparison operator, except that the operator doesn't
+ * (usually) yield boolean.  We mark such ScanKeys with SK_ORDER_BY.
+ *
  * Note: in some places, ScanKeys are used as a convenient representation
  * for the invocation of an access method support procedure.  In this case
  * sk_strategy/sk_subtype are not meaningful, and sk_func may refer to a
@@ -122,6 +127,7 @@ typedef ScanKeyData *ScanKey;
 #define SK_SEARCHNULL          0x0020          /* scankey represents "col IS NULL" */
 #define SK_SEARCHNOTNULL       0x0040          /* scankey represents "col IS NOT
                                                                                 * NULL" */
+#define SK_ORDER_BY                    0x0080          /* scankey is for ORDER BY op */
 
 
 /*
index 8698a43371fbc4ffde7bfafb3daf3e38977ab2ce..f28162b439d4fe17dc4ad4cce23acdd42ee78b40 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201011241
+#define CATALOG_VERSION_NO     201012021
 
 #endif
index 5a18dee0bdbbbe8d1bd03c005ec2f86cba771614..a729690affe865dfae7c7b08405af45cf183d4d6 100644 (file)
@@ -52,11 +52,11 @@ CATALOG(pg_am,2601)
        bool            amclusterable;  /* does AM support cluster command? */
        Oid                     amkeytype;              /* type of data in index, or InvalidOid */
        regproc         aminsert;               /* "insert this tuple" function */
-       regproc         ambeginscan;    /* "start new scan" function */
+       regproc         ambeginscan;    /* "prepare for index scan" function */
        regproc         amgettuple;             /* "next valid tuple" function, or 0 */
        regproc         amgetbitmap;    /* "fetch all valid tuples" function, or 0 */
-       regproc         amrescan;               /* "restart this scan" function */
-       regproc         amendscan;              /* "end this scan" function */
+       regproc         amrescan;               /* "(re)start index scan" function */
+       regproc         amendscan;              /* "end index scan" function */
        regproc         ammarkpos;              /* "mark current scan position" function */
        regproc         amrestrpos;             /* "restore marked scan position" function */
        regproc         ambuild;                /* "build new index" function */
index 25a391282a379084401e6a0a780d1340076cb004..611adef83c2a59dbd861e9523a25c9ebbd105b44 100644 (file)
@@ -679,7 +679,7 @@ DATA(insert OID = 331 (  btinsert              PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16 "2
 DESCR("btree(internal)");
 DATA(insert OID = 333 (  btbeginscan      PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_       btbeginscan _null_ _null_ _null_ ));
 DESCR("btree(internal)");
-DATA(insert OID = 334 (  btrescan                 PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ btrescan _null_ _null_ _null_ ));
+DATA(insert OID = 334 (  btrescan                 PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ btrescan _null_ _null_ _null_ ));
 DESCR("btree(internal)");
 DATA(insert OID = 335 (  btendscan                PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ btendscan _null_ _null_ _null_ ));
 DESCR("btree(internal)");
@@ -693,7 +693,7 @@ DATA(insert OID = 332 (  btbulkdelete          PGNSP PGUID 12 1 0 0 f f f t f v 4 0 22
 DESCR("btree(internal)");
 DATA(insert OID = 972 (  btvacuumcleanup   PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ btvacuumcleanup _null_ _null_ _null_ ));
 DESCR("btree(internal)");
-DATA(insert OID = 1268 (  btcostestimate   PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_     btcostestimate _null_ _null_ _null_ ));
+DATA(insert OID = 1268 (  btcostestimate   PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_        btcostestimate _null_ _null_ _null_ ));
 DESCR("btree(internal)");
 DATA(insert OID = 2785 (  btoptions               PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  btoptions _null_ _null_ _null_ ));
 DESCR("btree(internal)");
@@ -798,7 +798,7 @@ DATA(insert OID = 441 (  hashinsert            PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16
 DESCR("hash(internal)");
 DATA(insert OID = 443 (  hashbeginscan    PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_       hashbeginscan _null_ _null_ _null_ ));
 DESCR("hash(internal)");
-DATA(insert OID = 444 (  hashrescan               PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ hashrescan _null_ _null_ _null_ ));
+DATA(insert OID = 444 (  hashrescan               PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ hashrescan _null_ _null_ _null_ ));
 DESCR("hash(internal)");
 DATA(insert OID = 445 (  hashendscan      PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ hashendscan _null_ _null_ _null_ ));
 DESCR("hash(internal)");
@@ -812,7 +812,7 @@ DATA(insert OID = 442 (  hashbulkdelete    PGNSP PGUID 12 1 0 0 f f f t f v 4 0
 DESCR("hash(internal)");
 DATA(insert OID = 425 (  hashvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ hashvacuumcleanup _null_ _null_ _null_ ));
 DESCR("hash(internal)");
-DATA(insert OID = 438 (  hashcostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_     hashcostestimate _null_ _null_ _null_ ));
+DATA(insert OID = 438 (  hashcostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_        hashcostestimate _null_ _null_ _null_ ));
 DESCR("hash(internal)");
 DATA(insert OID = 2786 (  hashoptions     PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  hashoptions _null_ _null_ _null_ ));
 DESCR("hash(internal)");
@@ -1094,7 +1094,7 @@ DATA(insert OID = 775 (  gistinsert                  PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16
 DESCR("gist(internal)");
 DATA(insert OID = 777 (  gistbeginscan    PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_       gistbeginscan _null_ _null_ _null_ ));
 DESCR("gist(internal)");
-DATA(insert OID = 778 (  gistrescan               PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ gistrescan _null_ _null_ _null_ ));
+DATA(insert OID = 778 (  gistrescan               PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gistrescan _null_ _null_ _null_ ));
 DESCR("gist(internal)");
 DATA(insert OID = 779 (  gistendscan      PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ gistendscan _null_ _null_ _null_ ));
 DESCR("gist(internal)");
@@ -1108,7 +1108,7 @@ DATA(insert OID = 776 (  gistbulkdelete    PGNSP PGUID 12 1 0 0 f f f t f v 4 0
 DESCR("gist(internal)");
 DATA(insert OID = 2561 (  gistvacuumcleanup   PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ gistvacuumcleanup _null_ _null_ _null_ ));
 DESCR("gist(internal)");
-DATA(insert OID = 772 (  gistcostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_     gistcostestimate _null_ _null_ _null_ ));
+DATA(insert OID = 772 (  gistcostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_        gistcostestimate _null_ _null_ _null_ ));
 DESCR("gist(internal)");
 DATA(insert OID = 2787 (  gistoptions     PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  gistoptions _null_ _null_ _null_ ));
 DESCR("gist(internal)");
@@ -4335,7 +4335,7 @@ DATA(insert OID = 2732 (  gininsert                  PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16
 DESCR("gin(internal)");
 DATA(insert OID = 2733 (  ginbeginscan    PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_       ginbeginscan _null_ _null_ _null_ ));
 DESCR("gin(internal)");
-DATA(insert OID = 2734 (  ginrescan               PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ ginrescan _null_ _null_ _null_ ));
+DATA(insert OID = 2734 (  ginrescan               PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ ginrescan _null_ _null_ _null_ ));
 DESCR("gin(internal)");
 DATA(insert OID = 2735 (  ginendscan      PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ ginendscan _null_ _null_ _null_ ));
 DESCR("gin(internal)");
@@ -4349,7 +4349,7 @@ DATA(insert OID = 2739 (  ginbulkdelete    PGNSP PGUID 12 1 0 0 f f f t f v 4 0
 DESCR("gin(internal)");
 DATA(insert OID = 2740 (  ginvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ ginvacuumcleanup _null_ _null_ _null_ ));
 DESCR("gin(internal)");
-DATA(insert OID = 2741 (  gincostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_     gincostestimate _null_ _null_ _null_ ));
+DATA(insert OID = 2741 (  gincostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_        gincostestimate _null_ _null_ _null_ ));
 DESCR("gin(internal)");
 DATA(insert OID = 2788 (  ginoptions      PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  ginoptions _null_ _null_ _null_ ));
 DESCR("gin(internal)");
index 48d35e4a48a72e18d8119739e382c3384d548acb..d1e0f380c0aca43bbdf2357156176a5277b854df 100644 (file)
@@ -25,8 +25,8 @@ extern void ExecReScanIndexScan(IndexScanState *node);
 
 /* routines exported to share code with nodeBitmapIndexscan.c */
 extern void ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
-                                          Index scanrelid,
-                                          List *quals, ScanKey *scanKeys, int *numScanKeys,
+                                          Index scanrelid, List *quals, bool isorderby,
+                                          ScanKey *scanKeys, int *numScanKeys,
                                           IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys,
                                           IndexArrayKeyInfo **arrayKeys, int *numArrayKeys);
 extern void ExecIndexEvalRuntimeKeys(ExprContext *econtext,
index 89f8e202e356e94d49d1f6a7b3729867c5d7e0c0..d669c24b981f11b1ad480c7c56cb22f33e613fac 100644 (file)
@@ -1182,10 +1182,12 @@ typedef struct
  *      IndexScanState information
  *
  *             indexqualorig      execution state for indexqualorig expressions
- *             ScanKeys                   Skey structures to scan index rel
- *             NumScanKeys                number of Skey structs
+ *             ScanKeys                   Skey structures for index quals
+ *             NumScanKeys                number of ScanKeys
+ *             OrderByKeys                Skey structures for index ordering operators
+ *             NumOrderByKeys     number of OrderByKeys
  *             RuntimeKeys                info about Skeys that must be evaluated at runtime
- *             NumRuntimeKeys     number of RuntimeKeys structs
+ *             NumRuntimeKeys     number of RuntimeKeys
  *             RuntimeKeysReady   true if runtime Skeys have been computed
  *             RuntimeContext     expr context for evaling runtime Skeys
  *             RelationDesc       index relation descriptor
@@ -1198,6 +1200,8 @@ typedef struct IndexScanState
        List       *indexqualorig;
        ScanKey         iss_ScanKeys;
        int                     iss_NumScanKeys;
+       ScanKey         iss_OrderByKeys;
+       int                     iss_NumOrderByKeys;
        IndexRuntimeKeyInfo *iss_RuntimeKeys;
        int                     iss_NumRuntimeKeys;
        bool            iss_RuntimeKeysReady;
@@ -1210,12 +1214,12 @@ typedef struct IndexScanState
  *      BitmapIndexScanState information
  *
  *             result                     bitmap to return output into, or NULL
- *             ScanKeys                   Skey structures to scan index rel
- *             NumScanKeys                number of Skey structs
+ *             ScanKeys                   Skey structures for index quals
+ *             NumScanKeys                number of ScanKeys
  *             RuntimeKeys                info about Skeys that must be evaluated at runtime
- *             NumRuntimeKeys     number of RuntimeKeys structs
+ *             NumRuntimeKeys     number of RuntimeKeys
  *             ArrayKeys                  info about Skeys that come from ScalarArrayOpExprs
- *             NumArrayKeys       number of ArrayKeys structs
+ *             NumArrayKeys       number of ArrayKeys
  *             RuntimeKeysReady   true if runtime Skeys have been computed
  *             RuntimeContext     expr context for evaling runtime Skeys
  *             RelationDesc       index relation descriptor
index fec4acea342b17b3539b0ca5977c92402f80af8f..b89eb55ad767ae138d3cb2ba8984221ae6e00586 100644 (file)
@@ -271,7 +271,10 @@ typedef Scan SeqScan;
  * be of the form (indexkey OP comparisonval) or (comparisonval OP indexkey).
  * The indexkey is a Var or expression referencing column(s) of the index's
  * base table. The comparisonval might be any expression, but it won't use
- * any columns of the base table.
+ * any columns of the base table.  The expressions are ordered by index
+ * column position (but items referencing the same index column can appear
+ * in any order).  indexqualorig is used at runtime only if we have to recheck
+ * a lossy indexqual.
  *
  * indexqual has the same form, but the expressions have been commuted if
  * necessary to put the indexkeys on the left, and the indexkeys are replaced
@@ -280,14 +283,26 @@ typedef Scan SeqScan;
  * table).     This is a bit hokey ... would be cleaner to use a special-purpose
  * node type that could not be mistaken for a regular Var.     But it will do
  * for now.
+ *
+ * indexorderbyorig is similarly the original form of any ORDER BY expressions
+ * that are being implemented by the index, while indexorderby is modified to
+ * have index column Vars on the left-hand side.  Here, multiple expressions
+ * must appear in exactly the ORDER BY order, and this is not necessarily the
+ * index column order.  Only the expressions are provided, not the auxiliary
+ * sort-order information from the ORDER BY SortGroupClauses; it's assumed
+ * that the sort ordering is fully determinable from the top-level operators.
+ * indexorderbyorig is unused at run time, but is needed for EXPLAIN.
+ * (Note these fields are used for amcanorderbyop cases, not amcanorder cases.)
  * ----------------
  */
 typedef struct IndexScan
 {
        Scan            scan;
        Oid                     indexid;                /* OID of index to scan */
-       List       *indexqual;          /* list of index quals (OpExprs) */
+       List       *indexqual;          /* list of index quals (usually OpExprs) */
        List       *indexqualorig;      /* the same in original form */
+       List       *indexorderby;               /* list of index ORDER BY exprs */
+       List       *indexorderbyorig;   /* the same in original form */
        ScanDirection indexorderdir;    /* forward or backward or don't care */
 } IndexScan;
 
index d084338f356206c354a0a85179ea358862eab5bb..e7ebcfcc81ab64053f630c08604127d136007ffb 100644 (file)
@@ -631,6 +631,13 @@ typedef struct Path
  * indexable operators appear in 'indexclauses', they are replaced by the
  * derived indexscannable conditions in 'indexquals'.
  *
+ * 'indexorderbys', if not NIL, is a list of ORDER BY expressions that have
+ * been found to be usable as ordering operators for an amcanorderbyop index.
+ * Note that these are not RestrictInfos, just bare expressions, since they
+ * generally won't yield booleans.  The list will match the path's pathkeys.
+ * Also, unlike the case for quals, it's guaranteed that each expression has
+ * the index key on the left side of the operator.
+ *
  * 'isjoininner' is TRUE if the path is a nestloop inner scan (that is,
  * some of the index conditions are join rather than restriction clauses).
  * Note that the path costs will be calculated differently from a plain
@@ -663,6 +670,7 @@ typedef struct IndexPath
        IndexOptInfo *indexinfo;
        List       *indexclauses;
        List       *indexquals;
+       List       *indexorderbys;
        bool            isjoininner;
        ScanDirection indexscandir;
        Cost            indextotalcost;
index 8df1b95abe7fc7e9e378591a0af4826d9a78a439..48de2a989f08091b41a2018cde7f0b67bd73fe5e 100644 (file)
@@ -67,7 +67,7 @@ extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
                                        double index_pages, PlannerInfo *root);
 extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
 extern void cost_index(IndexPath *path, PlannerInfo *root, IndexOptInfo *index,
-                  List *indexQuals, RelOptInfo *outer_rel);
+                  List *indexQuals, List *indexOrderBys, RelOptInfo *outer_rel);
 extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
                                          Path *bitmapqual, RelOptInfo *outer_rel);
 extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root);
index 53ebe5756b73f5ce8c99d79a982264227fb1fc95..2dde5e07ef548d425266546db20a01ab8309243a 100644 (file)
@@ -31,6 +31,7 @@ extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel);
 extern IndexPath *create_index_path(PlannerInfo *root,
                                  IndexOptInfo *index,
                                  List *clause_groups,
+                                 List *indexorderbys,
                                  List *pathkeys,
                                  ScanDirection indexscandir,
                                  RelOptInfo *outer_rel);
index 62d15cca3663e10b8ef37663f427b4c1d986ba9e..5f41adfcc222c6127a9ca417274972f909c587ba 100644 (file)
@@ -22,7 +22,7 @@
 /*
  * Maximum number of arguments to a function.
  *
- * The minimum value is 8 (index cost estimation uses 8-argument functions).
+ * The minimum value is 9 (index cost estimation uses 9-argument functions).
  * The maximum possible value is around 600 (limited by index tuple size in
  * pg_proc's index; BLCKSZ larger than 8K would allow more).  Values larger
  * than needed will waste memory and processing time, but do not directly
index b6104d7decaec7bb033ae217ec0322082ee484ac..7bf3f360f5bd1cfe3fcca32be8afc3226b79d224 100644 (file)
@@ -32,7 +32,8 @@ extern PGDLLIMPORT get_attavgwidth_hook_type get_attavgwidth_hook;
 
 extern bool op_in_opfamily(Oid opno, Oid opfamily);
 extern int     get_op_opfamily_strategy(Oid opno, Oid opfamily);
-extern void get_op_opfamily_properties(Oid opno, Oid opfamily,
+extern Oid     get_op_opfamily_sortfamily(Oid opno, Oid opfamily);
+extern void get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op,
                                                   int *strategy,
                                                   Oid *lefttype,
                                                   Oid *righttype);