Repair some REINDEX problems per recent discussions. The relcache is

[postgresql] / src / backend / executor / nodeIndexscan.c
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c

index c731cb1717be9f3e618d6e17fac734ee12f87790..6ab2f0a47bd7f2eaf71b1dd6d7dc1ab1d91278a4 100644 (file)
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -1,20 +1,19 @@
  /*-------------------------------------------------------------------------
   *
- * nodeIndexscan.c--
+ * nodeIndexscan.c
   *       Routines to support indexes and indexed scans of relations
   *
- * Copyright (c) 1994, Regents of the University of California
+ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.31 1999/02/03 21:16:14 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.84 2003/09/24 18:54:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  /*
   * INTERFACE ROUTINES
- *             ExecInsertIndexTuples   inserts tuples into indices on result relation
- *
   *             ExecIndexScan                   scans a relation using indices
   *             ExecIndexNext                   using index to retrieve next tuple
   *             ExecInitIndexScan               creates and initializes state info.
@@ -22,40 +21,58 @@
   *             ExecEndIndexScan                releases all storage.
   *             ExecIndexMarkPos                marks scan position.
   *             ExecIndexRestrPos               restores scan position.
- *
- *      NOTES
- *             the code supporting ExecInsertIndexTuples should be
- *             collected and merged with the genam stuff.
- *
   */
  #include "postgres.h"
  
-#include "executor/executor.h"
+#include "access/genam.h"
+#include "access/heapam.h"
  #include "executor/execdebug.h"
  #include "executor/nodeIndexscan.h"
-
-#include "optimizer/clauses.h" /* for get_op, get_leftop, get_rightop */
-#include "parser/parsetree.h"  /* for rt_fetch() */
-
-#include "access/skey.h"
-#include "access/heapam.h"
-#include "access/genam.h"
-#include "utils/palloc.h"
-#include "utils/mcxt.h"
-#include "catalog/index.h"
-#include "storage/bufmgr.h"
-#include "storage/lmgr.h"
+#include "miscadmin.h"
  #include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "parser/parsetree.h"
+
  
-/* ----------------
- *             Misc stuff to move to executor.h soon -cim 6/5/90
- * ----------------
- */
  #define NO_OP                  0
  #define LEFT_OP                        1
  #define RIGHT_OP               2
  
-static TupleTableSlot *IndexNext(IndexScan *node);
+/*
+ * In a multiple-index plan, we must take care to return any given tuple
+ * only once, even if it matches conditions of several index scans.  Our
+ * preferred way to do this is to record already-returned tuples in a hash
+ * table (using the TID as unique identifier).  However, in a very large
+ * scan this could conceivably run out of memory.  We limit the hash table
+ * to no more than SortMem KB; if it grows past that, we fall back to the
+ * pre-7.4 technique: evaluate the prior-scan index quals again for each
+ * tuple (which is space-efficient, but slow).
+ *
+ * When scanning backwards, we use scannum to determine when to emit the
+ * tuple --- we have to re-emit a tuple in the same scan as it was first
+ * encountered.
+ *
+ * Note: this code would break if the planner were ever to create a multiple
+ * index plan with overall backwards direction, because the hashtable code
+ * will emit a tuple the first time it is encountered (which would be the
+ * highest scan in which it matches the index), but the evaluate-the-quals
+ * code will emit a tuple in the lowest-numbered scan in which it's valid.
+ * This could be fixed at need by making the evaluate-the-quals case more
+ * complex.  Currently the planner will never create such a plan (since it
+ * considers multi-index plans unordered anyway), so there's no need for
+ * more complexity.
+ */
+typedef struct
+{
+       /* tid is the hash key and so must be first! */
+       ItemPointerData tid;            /* TID of a tuple we've returned */
+       int                     scannum;                /* number of scan we returned it in */
+} DupHashTabEntry;
+
+
+static TupleTableSlot *IndexNext(IndexScanState *node);
+static void create_duphash(IndexScanState *node);
+
  
  /* ----------------------------------------------------------------
   *             IndexNext
@@ -80,137 +97,208 @@ static TupleTableSlot *IndexNext(IndexScan *node);
   * ----------------------------------------------------------------
   */
  static TupleTableSlot *
-IndexNext(IndexScan *node)
+IndexNext(IndexScanState *node)
  {
         EState     *estate;
-       CommonScanState *scanstate;
-       IndexScanState *indexstate;
+       ExprContext *econtext;
         ScanDirection direction;
-       Snapshot        snapshot;
         IndexScanDescPtr scanDescs;
         IndexScanDesc scandesc;
-       Relation        heapRelation;
-       RetrieveIndexResult result;
-       HeapTuple               tuple;
+       Index           scanrelid;
+       HeapTuple       tuple;
         TupleTableSlot *slot;
-       Buffer          buffer = InvalidBuffer;
         int                     numIndices;
+       bool            bBackward;
+       int                     indexNumber;
  
-       /* ----------------
-        *      extract necessary information from index scan node
-        * ----------------
+       /*
+        * extract necessary information from index scan node
          */
-       estate = node->scan.plan.state;
+       estate = node->ss.ps.state;
         direction = estate->es_direction;
-       snapshot = estate->es_snapshot;
-       scanstate = node->scan.scanstate;
-       indexstate = node->indxstate;
-       scanDescs = indexstate->iss_ScanDescs;
-       heapRelation = scanstate->css_currentRelation;
-       numIndices = indexstate->iss_NumIndices;
-       slot = scanstate->css_ScanTupleSlot;
+       if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indxorderdir))
+       {
+               if (ScanDirectionIsForward(direction))
+                       direction = BackwardScanDirection;
+               else if (ScanDirectionIsBackward(direction))
+                       direction = ForwardScanDirection;
+       }
+       scanDescs = node->iss_ScanDescs;
+       numIndices = node->iss_NumIndices;
+       econtext = node->ss.ps.ps_ExprContext;
+       slot = node->ss.ss_ScanTupleSlot;
+       scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;
  
         /*
          * Check if we are evaluating PlanQual for tuple of this relation.
-        * Additional checking is not good, but no other way for now.
-        * We could introduce new nodes for this case and handle
-        * IndexScan --> NewNode switching in Init/ReScan plan...
+        * Additional checking is not good, but no other way for now. We could
+        * introduce new nodes for this case and handle IndexScan --> NewNode
+        * switching in Init/ReScan plan...
          */
-       if (estate->es_evTuple != NULL && 
-               estate->es_evTuple[node->scan.scanrelid - 1] != NULL)
+       if (estate->es_evTuple != NULL &&
+               estate->es_evTuple[scanrelid - 1] != NULL)
         {
-               int             iptr;
+               List       *qual;
  
-               slot->ttc_buffer = InvalidBuffer;
-               slot->ttc_shouldFree = false;
-               if (estate->es_evTupleNull[node->scan.scanrelid - 1])
-               {
-                       slot->val = NULL;       /* must not free tuple! */
-                       return (slot);
-               }
-               slot->val = estate->es_evTuple[node->scan.scanrelid - 1];
-               for (iptr = 0; iptr < numIndices; iptr++)
+               ExecClearTuple(slot);
+               if (estate->es_evTupleNull[scanrelid - 1])
+                       return slot;            /* return empty slot */
+
+               ExecStoreTuple(estate->es_evTuple[scanrelid - 1],
+                                          slot, InvalidBuffer, false);
+
+               /* Does the tuple meet any of the OR'd indxqual conditions? */
+               econtext->ecxt_scantuple = slot;
+
+               ResetExprContext(econtext);
+
+               foreach(qual, node->indxqualorig)
                 {
-                       scanstate->cstate.cs_ExprContext->ecxt_scantuple = slot;
-                       if (ExecQual(nth(iptr, node->indxqualorig),
-                                                scanstate->cstate.cs_ExprContext))
+                       if (ExecQual((List *) lfirst(qual), econtext, false))
                                 break;
                 }
-               if (iptr == numIndices) /* would not be returned by indices */
+               if (qual == NIL)                /* would not be returned by indices */
                         slot->val = NULL;
+
                 /* Flag for the next call that no more tuples */
-               estate->es_evTupleNull[node->scan.scanrelid - 1] = true;
-               return (slot);
-       }
+               estate->es_evTupleNull[scanrelid - 1] = true;
  
-       tuple = &(indexstate->iss_htup);
+               return slot;
+       }
  
-       /* ----------------
-        *      ok, now that we have what we need, fetch an index tuple.
-        *      if scanning this index succeeded then return the
-        *      appropriate heap tuple.. else return NULL.
-        * ----------------
+       /*
+        * ok, now that we have what we need, fetch an index tuple. if
+        * scanning this index succeeded then return the appropriate heap
+        * tuple.. else return NULL.
          */
-       while (indexstate->iss_IndexPtr < numIndices)
+       bBackward = ScanDirectionIsBackward(direction);
+       if (bBackward)
         {
-               scandesc = scanDescs[indexstate->iss_IndexPtr];
-               while ((result = index_getnext(scandesc, direction)) != NULL)
+               indexNumber = numIndices - node->iss_IndexPtr - 1;
+               if (indexNumber < 0)
                 {
-                       tuple->t_self = result->heap_iptr;
-                       heap_fetch(heapRelation, snapshot, tuple, &buffer);
-                       pfree(result);
-
-                       if (tuple->t_data != NULL)
+                       indexNumber = 0;
+                       node->iss_IndexPtr = numIndices - 1;
+               }
+       }
+       else
+       {
+               if ((indexNumber = node->iss_IndexPtr) < 0)
+               {
+                       indexNumber = 0;
+                       node->iss_IndexPtr = 0;
+               }
+       }
+       while (indexNumber < numIndices)
+       {
+               scandesc = scanDescs[node->iss_IndexPtr];
+               while ((tuple = index_getnext(scandesc, direction)) != NULL)
+               {
+                       /*
+                        * Store the scanned tuple in the scan tuple slot of the scan
+                        * state.  Note: we pass 'false' because tuples returned by
+                        * amgetnext are pointers onto disk pages and must not be
+                        * pfree()'d.
+                        */
+                       ExecStoreTuple(tuple,           /* tuple to store */
+                                                  slot,        /* slot to store in */
+                                                  scandesc->xs_cbuf,   /* buffer containing tuple */
+                                                  false);              /* don't pfree */
+
+                       /*
+                        * If it's a multiple-index scan, make sure not to double-report
+                        * a tuple matched by more than one index.  (See notes above.)
+                        */
+                       if (numIndices > 1)
                         {
-                               bool            prev_matches = false;
-                               int                     prev_index;
-
-                               /* ----------------
-                                *      store the scanned tuple in the scan tuple slot of
-                                *      the scan state.  Eventually we will only do this and not
-                                *      return a tuple.  Note: we pass 'false' because tuples
-                                *      returned by amgetnext are pointers onto disk pages and
-                                *      were not created with palloc() and so should not be pfree()'d.
-                                * ----------------
-                                */
-                               ExecStoreTuple(tuple,   /* tuple to store */
-                                                          slot,        /* slot to store in */
-                                                          buffer,      /* buffer associated with tuple  */
-                                                          false);      /* don't pfree */
-
-                               /*
-                                * We must check to see if the current tuple would have
-                                * been matched by an earlier index, so we don't double
-                                * report it. We do this by passing the tuple through
-                                * ExecQual and look for failure with all previous
-                                * qualifications.
-                                */
-                               for (prev_index = 0; prev_index < indexstate->iss_IndexPtr;
-                                        prev_index++)
+                               /* First try the hash table */
+                               if (node->iss_DupHash)
+                               {
+                                       DupHashTabEntry *entry;
+                                       bool    found;
+
+                                       entry = (DupHashTabEntry *)
+                                               hash_search(node->iss_DupHash,
+                                                                       &tuple->t_data->t_ctid,
+                                                                       HASH_ENTER,
+                                                                       &found);
+                                       if (entry == NULL ||
+                                               node->iss_DupHash->hctl->nentries > node->iss_MaxHash)
+                                       {
+                                               /* out of memory (either hard or soft limit) */
+                                               /* release hash table and fall thru to old code */
+                                               hash_destroy(node->iss_DupHash);
+                                               node->iss_DupHash = NULL;
+                                       }
+                                       else if (found)
+                                       {
+                                               /* pre-existing entry */
+
+                                               /*
+                                                * It's duplicate if first emitted in a different
+                                                * scan.  If same scan, we must be backing up, so
+                                                * okay to emit again.
+                                                */
+                                               if (entry->scannum != node->iss_IndexPtr)
+                                               {
+                                                       /* Dup, so drop it and loop back for another */
+                                                       ExecClearTuple(slot);
+                                                       continue;
+                                               }
+                                       }
+                                       else
+                                       {
+                                               /* new entry, finish filling it in */
+                                               entry->scannum = node->iss_IndexPtr;
+                                       }
+                               }
+                               /* If hash table has overflowed, do it the hard way */
+                               if (node->iss_DupHash == NULL &&
+                                       node->iss_IndexPtr > 0)
                                 {
-                                       scanstate->cstate.cs_ExprContext->ecxt_scantuple = slot;
-                                       if (ExecQual(nth(prev_index, node->indxqualorig),
-                                                                scanstate->cstate.cs_ExprContext))
+                                       bool            prev_matches = false;
+                                       int                     prev_index;
+                                       List       *qual;
+
+                                       econtext->ecxt_scantuple = slot;
+                                       ResetExprContext(econtext);
+                                       qual = node->indxqualorig;
+                                       for (prev_index = 0;
+                                                prev_index < node->iss_IndexPtr;
+                                                prev_index++)
+                                       {
+                                               if (ExecQual((List *) lfirst(qual), econtext, false))
+                                               {
+                                                       prev_matches = true;
+                                                       break;
+                                               }
+                                               qual = lnext(qual);
+                                       }
+                                       if (prev_matches)
                                         {
-                                               prev_matches = true;
-                                               break;
+                                               /* Dup, so drop it and loop back for another */
+                                               ExecClearTuple(slot);
+                                               continue;
                                         }
                                 }
-                               if (!prev_matches)
-                                       return slot;
-                               else
-                                       ExecClearTuple(slot);
                         }
-                       if (BufferIsValid(buffer))
-                               ReleaseBuffer(buffer);
+
+                       return slot;            /* OK to return tuple */
+               }
+
+               if (indexNumber < numIndices)
+               {
+                       indexNumber++;
+                       if (bBackward)
+                               node->iss_IndexPtr--;
+                       else
+                               node->iss_IndexPtr++;
                 }
-               if (indexstate->iss_IndexPtr < numIndices)
-                       indexstate->iss_IndexPtr++;
         }
-       /* ----------------
-        *      if we get here it means the index scan failed so we
-        *      are at the end of the scan..
-        * ----------------
+
+       /*
+        * if we get here it means the index scan failed so we are at the end
+        * of the scan..
          */
         return ExecClearTuple(slot);
  }
@@ -237,13 +325,19 @@ IndexNext(IndexScan *node)
   * ----------------------------------------------------------------
   */
  TupleTableSlot *
-ExecIndexScan(IndexScan *node)
+ExecIndexScan(IndexScanState *node)
  {
-       /* ----------------
-        *      use IndexNext as access method
-        * ----------------
+       /*
+        * If we have runtime keys and they've not already been set up, do it
+        * now.
+        */
+       if (node->iss_RuntimeKeyInfo && !node->iss_RuntimeKeysReady)
+               ExecReScan((PlanState *) node, NULL);
+
+       /*
+        * use IndexNext as access method
          */
-       return ExecScan(&node->scan, IndexNext);
+       return ExecScan(&node->ss, (ExecScanAccessMtd) IndexNext);
  }
  
  /* ----------------------------------------------------------------
@@ -252,97 +346,99 @@ ExecIndexScan(IndexScan *node)
   *             Recalculates the value of the scan keys whose value depends on
   *             information known at runtime and rescans the indexed relation.
   *             Updating the scan key was formerly done separately in
- *             ExecUpdateIndexScanKeys. Integrating it into ReScan
- *             makes rescans of indices and
- *             relations/general streams more uniform.
+ *             ExecUpdateIndexScanKeys. Integrating it into ReScan makes
+ *             rescans of indices and relations/general streams more uniform.
   *
   * ----------------------------------------------------------------
   */
  void
-ExecIndexReScan(IndexScan *node, ExprContext *exprCtxt, Plan *parent)
+ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt)
  {
         EState     *estate;
-       IndexScanState *indexstate;
-       ScanDirection direction;
+       ExprContext *econtext;
+       int                     numIndices;
         IndexScanDescPtr scanDescs;
         ScanKey    *scanKeys;
-       IndexScanDesc scan;
-       ScanKey         skey;
-       int                     numIndices;
-       int                     i;
-
-       Pointer    *runtimeKeyInfo;
+       ExprState ***runtimeKeyInfo;
         int                *numScanKeys;
-       List       *indxqual;
-       List       *qual;
-       int                     n_keys;
-       ScanKey         scan_keys;
-       int                *run_keys;
+       Index           scanrelid;
+       int                     i;
         int                     j;
-       Expr       *clause;
-       Node       *scanexpr;
-       Datum           scanvalue;
-       bool            isNull;
-       bool            isDone;
-
-       indexstate = node->indxstate;
-       estate = node->scan.plan.state;
-       direction = estate->es_direction;
-       numIndices = indexstate->iss_NumIndices;
-       scanDescs = indexstate->iss_ScanDescs;
-       scanKeys = indexstate->iss_ScanKeys;
-       runtimeKeyInfo = (Pointer *) indexstate->iss_RuntimeKeyInfo;
-       indxqual = node->indxqual;
-       numScanKeys = indexstate->iss_NumScanKeys;
-       indexstate->iss_IndexPtr = 0;
  
-       /* If this is re-scanning of PlanQual ... */
-       if (estate->es_evTuple != NULL && 
-               estate->es_evTuple[node->scan.scanrelid - 1] != NULL)
+       estate = node->ss.ps.state;
+       econtext = node->iss_RuntimeContext;            /* context for runtime
+                                                                                                * keys */
+       numIndices = node->iss_NumIndices;
+       scanDescs = node->iss_ScanDescs;
+       scanKeys = node->iss_ScanKeys;
+       runtimeKeyInfo = node->iss_RuntimeKeyInfo;
+       numScanKeys = node->iss_NumScanKeys;
+       scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;
+
+       if (econtext)
         {
-               estate->es_evTupleNull[node->scan.scanrelid - 1] = false;
-               return;
-       }
+               /*
+                * If we are being passed an outer tuple, save it for runtime key
+                * calc.  We also need to link it into the "regular" per-tuple
+                * econtext, so it can be used during indexqualorig evaluations.
+                */
+               if (exprCtxt != NULL)
+               {
+                       ExprContext *stdecontext;
  
-       /* it's possible in subselects */
-       if (exprCtxt == NULL)
-               exprCtxt = node->scan.scanstate->cstate.cs_ExprContext;
+                       econtext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
+                       stdecontext = node->ss.ps.ps_ExprContext;
+                       stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
+               }
  
-       node->scan.scanstate->cstate.cs_ExprContext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
+               /*
+                * Reset the runtime-key context so we don't leak memory as each
+                * outer tuple is scanned.      Note this assumes that we will
+                * recalculate *all* runtime keys on each call.
+                */
+               ResetExprContext(econtext);
+       }
  
         /*
-        * get the index qualifications and recalculate the appropriate values
+        * If we are doing runtime key calculations (ie, the index keys depend
+        * on data from an outer scan), compute the new key values
          */
-       for (i = 0; i < numIndices; i++)
+       if (runtimeKeyInfo)
         {
-               qual = nth(i, indxqual);
-               n_keys = numScanKeys[i];
-               scan_keys = (ScanKey) scanKeys[i];
-
-               if (runtimeKeyInfo)
+               for (i = 0; i < numIndices; i++)
                 {
-                       run_keys = (int *) runtimeKeyInfo[i];
+                       int                     n_keys;
+                       ScanKey         scan_keys;
+                       ExprState **run_keys;
+
+                       n_keys = numScanKeys[i];
+                       scan_keys = scanKeys[i];
+                       run_keys = runtimeKeyInfo[i];
+
                         for (j = 0; j < n_keys; j++)
                         {
-
                                 /*
                                  * If we have a run-time key, then extract the run-time
                                  * expression and evaluate it with respect to the current
                                  * outer tuple.  We then stick the result into the scan
                                  * key.
+                                *
+                                * Note: the result of the eval could be a pass-by-ref value
+                                * that's stored in the outer scan's tuple, not in
+                                * econtext->ecxt_per_tuple_memory.  We assume that the
+                                * outer tuple will stay put throughout our scan.  If this
+                                * is wrong, we could copy the result into our context
+                                * explicitly, but I think that's not necessary...
                                  */
-                               if (run_keys[j] != NO_OP)
+                               if (run_keys[j] != NULL)
                                 {
-                                       clause = nth(j, qual);
-                                       scanexpr = (run_keys[j] == RIGHT_OP) ?
-                                               (Node *) get_rightop(clause) : (Node *) get_leftop(clause);
-
-                                       /*
-                                        * pass in isDone but ignore it.  We don't iterate in
-                                        * quals
-                                        */
-                                       scanvalue = (Datum)
-                                               ExecEvalExpr(scanexpr, exprCtxt, &isNull, &isDone);
+                                       Datum           scanvalue;
+                                       bool            isNull;
+
+                                       scanvalue = ExecEvalExprSwitchContext(run_keys[j],
+                                                                                                                 econtext,
+                                                                                                                 &isNull,
+                                                                                                                 NULL);
                                         scan_keys[j].sk_argument = scanvalue;
                                         if (isNull)
                                                 scan_keys[j].sk_flags |= SK_ISNULL;
@@ -351,101 +447,100 @@ ExecIndexReScan(IndexScan *node, ExprContext *exprCtxt, Plan *parent)
                                 }
                         }
                 }
-               scan = scanDescs[i];
-               skey = scanKeys[i];
-               index_rescan(scan, direction, skey);
+
+               node->iss_RuntimeKeysReady = true;
+       }
+
+       /* If this is re-scanning of PlanQual ... */
+       if (estate->es_evTuple != NULL &&
+               estate->es_evTuple[scanrelid - 1] != NULL)
+       {
+               estate->es_evTupleNull[scanrelid - 1] = false;
+               return;
+       }
+
+       /* reset hash table */
+       if (numIndices > 1)
+       {
+               if (node->iss_DupHash)
+                       hash_destroy(node->iss_DupHash);
+               create_duphash(node);
+       }
+
+       /* reset index scans */
+       if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indxorderdir))
+               node->iss_IndexPtr = numIndices;
+       else
+               node->iss_IndexPtr = -1;
+
+       for (i = 0; i < numIndices; i++)
+       {
+               IndexScanDesc scan = scanDescs[i];
+               ScanKey         skey = scanKeys[i];
+
+               index_rescan(scan, skey);
         }
-       /* ----------------
-        *      perhaps return something meaningful
-        * ----------------
-        */
-       return;
  }
  
  /* ----------------------------------------------------------------
   *             ExecEndIndexScan
- *
- * old comments
- *             Releases any storage allocated through C routines.
- *             Returns nothing.
   * ----------------------------------------------------------------
   */
  void
-ExecEndIndexScan(IndexScan *node)
+ExecEndIndexScan(IndexScanState *node)
  {
-       CommonScanState *scanstate;
-       IndexScanState *indexstate;
-       Pointer    *runtimeKeyInfo;
-       ScanKey    *scanKeys;
-       List       *indxqual;
-       int                *numScanKeys;
         int                     numIndices;
+       RelationPtr indexRelationDescs;
+       IndexScanDescPtr indexScanDescs;
+       Relation        relation;
         int                     i;
  
-       scanstate = node->scan.scanstate;
-       indexstate = node->indxstate;
-       indxqual = node->indxqual;
-       runtimeKeyInfo = (Pointer *) indexstate->iss_RuntimeKeyInfo;
-
-       /* ----------------
-        *      extract information from the node
-        * ----------------
+       /*
+        * extract information from the node
          */
-       numIndices = indexstate->iss_NumIndices;
-       scanKeys = indexstate->iss_ScanKeys;
-       numScanKeys = indexstate->iss_NumScanKeys;
+       numIndices = node->iss_NumIndices;
+       indexRelationDescs = node->iss_RelationDescs;
+       indexScanDescs = node->iss_ScanDescs;
+       relation = node->ss.ss_currentRelation;
  
-       /* ----------------
-        *      Free the projection info and the scan attribute info
-        *
-        *      Note: we don't ExecFreeResultType(scanstate)
-        *                because the rule manager depends on the tupType
-        *                returned by ExecMain().  So for now, this
-        *                is freed at end-transaction time.  -cim 6/2/91
-        * ----------------
+       /*
+        * Free the exprcontext(s)
          */
-       ExecFreeProjectionInfo(&scanstate->cstate);
+       ExecFreeExprContext(&node->ss.ps);
+       if (node->iss_RuntimeContext)
+               FreeExprContext(node->iss_RuntimeContext);
  
-       /* ----------------
-        *      close the heap and index relations
-        * ----------------
+       /*
+        * clear out tuple table slots
          */
-       ExecCloseR((Plan *) node);
+       ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
+       ExecClearTuple(node->ss.ss_ScanTupleSlot);
  
-       /* ----------------
-        *      free the scan keys used in scanning the indices
-        * ----------------
+       /* drop hash table */
+       if (node->iss_DupHash)
+               hash_destroy(node->iss_DupHash);
+
+       /*
+        * close the index relations
          */
         for (i = 0; i < numIndices; i++)
         {
-               if (scanKeys[i] != NULL)
-                       pfree(scanKeys[i]);
-       }
-       pfree(scanKeys);
-       pfree(numScanKeys);
-
-       if (runtimeKeyInfo)
-       {
-               for (i = 0; i < numIndices; i++)
-               {
-                       List       *qual;
-                       int                     n_keys;
+               if (indexScanDescs[i] != NULL)
+                       index_endscan(indexScanDescs[i]);
  
-                       qual = nth(i, indxqual);
-                       n_keys = length(qual);
-                       if (n_keys > 0)
-                               pfree(runtimeKeyInfo[i]);
-               }
-               pfree(runtimeKeyInfo);
+               if (indexRelationDescs[i] != NULL)
+                       index_close(indexRelationDescs[i]);
         }
  
-       /* ----------------
-        *      clear out tuple table slots
-        * ----------------
+       /*
+        * close the heap relation.
+        *
+        * Currently, we do not release the AccessShareLock acquired by
+        * ExecInitIndexScan.  This lock should be held till end of
+        * transaction. (There is a faction that considers this too much
+        * locking, however.)
          */
-       ExecClearTuple(scanstate->cstate.cs_ResultTupleSlot);
-       ExecClearTuple(scanstate->css_ScanTupleSlot);
-/*       ExecClearTuple(scanstate->css_RawTupleSlot); */
+       heap_close(relation, NoLock);
  }
  
  /* ----------------------------------------------------------------
@@ -457,22 +552,20 @@ ExecEndIndexScan(IndexScan *node)
   * ----------------------------------------------------------------
   */
  void
-ExecIndexMarkPos(IndexScan *node)
+ExecIndexMarkPos(IndexScanState *node)
  {
-       IndexScanState *indexstate;
         IndexScanDescPtr indexScanDescs;
         IndexScanDesc scanDesc;
         int                     indexPtr;
  
-       indexstate = node->indxstate;
-       indexPtr = indexstate->iss_MarkIndexPtr = indexstate->iss_IndexPtr;
-       indexScanDescs = indexstate->iss_ScanDescs;
-       scanDesc = indexScanDescs[indexPtr];
+       indexPtr = node->iss_MarkIndexPtr = node->iss_IndexPtr;
+       if (indexPtr >= 0 && indexPtr < node->iss_NumIndices)
+       {
+               indexScanDescs = node->iss_ScanDescs;
+               scanDesc = indexScanDescs[indexPtr];
  
-#if 0
-       IndexScanMarkPosition(scanDesc);
-#endif
-       index_markpos(scanDesc);
+               index_markpos(scanDesc);
+       }
  }
  
  /* ----------------------------------------------------------------
@@ -481,32 +574,28 @@ ExecIndexMarkPos(IndexScan *node)
   * old comments
   *             Restores scan position by restoring the current index.
   *             Returns nothing.
- *
- *             XXX Assumes previously marked scan position belongs to current index
   * ----------------------------------------------------------------
   */
  void
-ExecIndexRestrPos(IndexScan *node)
+ExecIndexRestrPos(IndexScanState *node)
  {
-       IndexScanState *indexstate;
         IndexScanDescPtr indexScanDescs;
         IndexScanDesc scanDesc;
         int                     indexPtr;
  
-       indexstate = node->indxstate;
-       indexPtr = indexstate->iss_IndexPtr = indexstate->iss_MarkIndexPtr;
-       indexScanDescs = indexstate->iss_ScanDescs;
-       scanDesc = indexScanDescs[indexPtr];
+       indexPtr = node->iss_IndexPtr = node->iss_MarkIndexPtr;
+       if (indexPtr >= 0 && indexPtr < node->iss_NumIndices)
+       {
+               indexScanDescs = node->iss_ScanDescs;
+               scanDesc = indexScanDescs[indexPtr];
  
-#if 0
-       IndexScanRestorePosition(scanDesc);
-#endif
-       index_restrpos(scanDesc);
+               index_restrpos(scanDesc);
+       }
  }
  
  /* ----------------------------------------------------------------
   *             ExecInitIndexScan
-  *
+ *
   *             Initializes the index scan's state information, creates
   *             scan keys, and opens the base and index relations.
   *
@@ -516,543 +605,441 @@ ExecIndexRestrPos(IndexScan *node)
   *
   * old comments
   *             Creates the run-time state information for the node and
- *             sets the relation id to contain relevant decriptors.
+ *             sets the relation id to contain relevant descriptors.
   *
   *             Parameters:
   *               node: IndexNode node produced by the planner.
   *               estate: the execution state initialized in InitPlan.
   * ----------------------------------------------------------------
   */
-bool
-ExecInitIndexScan(IndexScan *node, EState *estate, Plan *parent)
+IndexScanState *
+ExecInitIndexScan(IndexScan *node, EState *estate)
  {
         IndexScanState *indexstate;
-       CommonScanState *scanstate;
         List       *indxqual;
         List       *indxid;
+       List       *listscan;
         int                     i;
         int                     numIndices;
         int                     indexPtr;
         ScanKey    *scanKeys;
         int                *numScanKeys;
-       RelationPtr relationDescs;
+       RelationPtr indexDescs;
         IndexScanDescPtr scanDescs;
-       Pointer    *runtimeKeyInfo;
+       ExprState ***runtimeKeyInfo;
         bool            have_runtime_keys;
-       List       *rangeTable;
         RangeTblEntry *rtentry;
         Index           relid;
         Oid                     reloid;
-
         Relation        currentRelation;
-       HeapScanDesc currentScanDesc;
-       ScanDirection direction;
-       int                     baseid;
  
-       List       *execParam = NULL;
-
-       /* ----------------
-        *      assign execution state to node
-        * ----------------
+       /*
+        * create state structure
          */
-       node->scan.plan.state = estate;
+       indexstate = makeNode(IndexScanState);
+       indexstate->ss.ps.plan = (Plan *) node;
+       indexstate->ss.ps.state = estate;
  
-       /* --------------------------------
-        *      Part 1)  initialize scan state
+       /*
+        * Miscellaneous initialization
          *
-        *      create new CommonScanState for node
-        * --------------------------------
+        * create expression context for node
          */
-       scanstate = makeNode(CommonScanState);
-/*
-       scanstate->ss_ProcOuterFlag = false;
-       scanstate->ss_OldRelId = 0;
-*/
-
-       node->scan.scanstate = scanstate;
+       ExecAssignExprContext(estate, &indexstate->ss.ps);
  
-       /* ----------------
-        *      assign node's base_id .. we don't use AssignNodeBaseid() because
-        *      the increment is done later on after we assign the index scan's
-        *      scanstate.      see below.
-        * ----------------
+       /*
+        * initialize child expressions
          */
-       baseid = estate->es_BaseId;
-/*       scanstate->csstate.cstate.bnode.base_id = baseid; */
-       scanstate->cstate.cs_base_id = baseid;
+       indexstate->ss.ps.targetlist = (List *)
+               ExecInitExpr((Expr *) node->scan.plan.targetlist,
+                                        (PlanState *) indexstate);
+       indexstate->ss.ps.qual = (List *)
+               ExecInitExpr((Expr *) node->scan.plan.qual,
+                                        (PlanState *) indexstate);
+       indexstate->indxqual = (List *)
+               ExecInitExpr((Expr *) node->indxqual,
+                                        (PlanState *) indexstate);
+       indexstate->indxqualorig = (List *)
+               ExecInitExpr((Expr *) node->indxqualorig,
+                                        (PlanState *) indexstate);
+
+#define INDEXSCAN_NSLOTS 2
  
-       /* ----------------
-        *      create expression context for node
-        * ----------------
+       /*
+        * tuple table initialization
          */
-       ExecAssignExprContext(estate, &scanstate->cstate);
+       ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
+       ExecInitScanTupleSlot(estate, &indexstate->ss);
  
-#define INDEXSCAN_NSLOTS 3
-       /* ----------------
-        *      tuple table initialization
-        * ----------------
-        */
-       ExecInitResultTupleSlot(estate, &scanstate->cstate);
-       ExecInitScanTupleSlot(estate, scanstate);
-/*       ExecInitRawTupleSlot(estate, scanstate); */
-
-       /* ----------------
-        *      initialize projection info.  result type comes from scan desc
-        *      below..
-        * ----------------
+       /*
+        * Initialize index-specific scan state
          */
-       ExecAssignProjectionInfo((Plan *) node, &scanstate->cstate);
-
-       /* --------------------------------
-         *  Part 2)  initialize index scan state
-         *
-         *  create new IndexScanState for node
-         * --------------------------------
-         */
-       indexstate = makeNode(IndexScanState);
         indexstate->iss_NumIndices = 0;
-       indexstate->iss_IndexPtr = 0;
+       indexstate->iss_IndexPtr = -1;
         indexstate->iss_ScanKeys = NULL;
         indexstate->iss_NumScanKeys = NULL;
         indexstate->iss_RuntimeKeyInfo = NULL;
+       indexstate->iss_RuntimeContext = NULL;
+       indexstate->iss_RuntimeKeysReady = false;
         indexstate->iss_RelationDescs = NULL;
         indexstate->iss_ScanDescs = NULL;
  
-       node->indxstate = indexstate;
-
-       /* ----------------
-        *      assign base id to index scan state also
-        * ----------------
-        */
-       indexstate->cstate.cs_base_id = baseid;
-       baseid++;
-       estate->es_BaseId = baseid;
-
-       /* ----------------
-        *      get the index node information
-        * ----------------
+       /*
+        * get the index node information
          */
         indxid = node->indxid;
-       indxqual = node->indxqual;
         numIndices = length(indxid);
-       indexPtr = 0;
+       indexPtr = -1;
  
         CXT1_printf("ExecInitIndexScan: context is %d\n", CurrentMemoryContext);
  
-       /* ----------------
-        *      scanKeys is used to keep track of the ScanKey's. This is needed
-        *      because a single scan may use several indices and each index has
-        *      its own ScanKey.
-        * ----------------
+       /*
+        * scanKeys is used to keep track of the ScanKey's. This is needed
+        * because a single scan may use several indices and each index has
+        * its own ScanKey.
          */
         numScanKeys = (int *) palloc(numIndices * sizeof(int));
         scanKeys = (ScanKey *) palloc(numIndices * sizeof(ScanKey));
-       relationDescs = (RelationPtr) palloc(numIndices * sizeof(Relation));
+       indexDescs = (RelationPtr) palloc(numIndices * sizeof(Relation));
         scanDescs = (IndexScanDescPtr) palloc(numIndices * sizeof(IndexScanDesc));
  
-       /* ----------------
-        *      initialize runtime key info.
-        * ----------------
+       /*
+        * initialize space for runtime key info (may not be needed)
          */
         have_runtime_keys = false;
-       runtimeKeyInfo = (Pointer *)
-               palloc(numIndices * sizeof(Pointer));
+       runtimeKeyInfo = (ExprState ***) palloc0(numIndices * sizeof(ExprState **));
  
-       /* ----------------
-        *      build the index scan keys from the index qualification
-        * ----------------
+       /*
+        * build the index scan keys from the index qualification
          */
+       indxqual = node->indxqual;
         for (i = 0; i < numIndices; i++)
         {
                 int                     j;
                 List       *qual;
                 int                     n_keys;
                 ScanKey         scan_keys;
-               int                *run_keys;
+               ExprState **run_keys;
  
-               qual = nth(i, indxqual);
+               qual = lfirst(indxqual);
+               indxqual = lnext(indxqual);
                 n_keys = length(qual);
-               scan_keys = (n_keys <= 0) ? NULL :
+               scan_keys = (n_keys <= 0) ? (ScanKey) NULL :
                         (ScanKey) palloc(n_keys * sizeof(ScanKeyData));
-               run_keys = (n_keys <= 0) ? NULL :
-                       (int *) palloc(n_keys * sizeof(int));
+               run_keys = (n_keys <= 0) ? (ExprState **) NULL :
+                       (ExprState **) palloc(n_keys * sizeof(ExprState *));
  
-               CXT1_printf("ExecInitIndexScan: context is %d\n",
-                                       CurrentMemoryContext);
+               CXT1_printf("ExecInitIndexScan: context is %d\n", CurrentMemoryContext);
  
-               /* ----------------
-                *      for each opclause in the given qual,
-                *      convert each qual's opclause into a single scan key
-                * ----------------
+               /*
+                * for each opclause in the given qual, convert each qual's
+                * opclause into a single scan key
                  */
+               listscan = qual;
                 for (j = 0; j < n_keys; j++)
                 {
-                       Expr       *clause; /* one part of index qual */
-                       Oper       *op;         /* operator used in scan.. */
-                       Node       *leftop; /* expr on lhs of operator */
-                       Node       *rightop;/* expr on rhs ... */
+                       OpExpr     *clause; /* one clause of index qual */
+                       Expr       *leftop; /* expr on lhs of operator */
+                       Expr       *rightop;    /* expr on rhs ... */
                         bits16          flags = 0;
  
-                       int                     scanvar;/* which var identifies varattno */
+                       int                     scanvar;        /* which var identifies varattno */
                         AttrNumber      varattno = 0;   /* att number used in scan */
-                       Oid                     opid;   /* operator id used in scan */
+                       Oid                     opfuncid;               /* operator id used in scan */
                         Datum           scanvalue = 0;  /* value used in scan (if const) */
  
-                       /* ----------------
-                        *      extract clause information from the qualification
-                        * ----------------
+                       /*
+                        * extract clause information from the qualification
                          */
-                       clause = nth(j, qual);
+                       clause = (OpExpr *) lfirst(listscan);
+                       listscan = lnext(listscan);
  
-                       op = (Oper *) clause->oper;
-                       if (!IsA(op, Oper))
-                               elog(ERROR, "ExecInitIndexScan: op not an Oper!");
+                       if (!IsA(clause, OpExpr))
+                               elog(ERROR, "indxqual is not an OpExpr");
  
-                       opid = op->opid;
+                       opfuncid = clause->opfuncid;
  
-                       /* ----------------
-                        *      Here we figure out the contents of the index qual.
-                        *      The usual case is (op var const) or (op const var)
-                        *      which means we form a scan key for the attribute
-                        *      listed in the var node and use the value of the const.
+                       /*
+                        * Here we figure out the contents of the index qual. The
+                        * usual case is (var op const) or (const op var) which means
+                        * we form a scan key for the attribute listed in the var node
+                        * and use the value of the const.
+                        *
+                        * If we don't have a const node, then it means that one of the
+                        * var nodes refers to the "scan" tuple and is used to
+                        * determine which attribute to scan, and the other expression
+                        * is used to calculate the value used in scanning the index.
+                        *
+                        * This means our index scan's scan key is a function of
+                        * information obtained during the execution of the plan in
+                        * which case we need to recalculate the index scan key at run
+                        * time.
                          *
-                        *      If we don't have a const node, then it means that
-                        *      one of the var nodes refers to the "scan" tuple and
-                        *      is used to determine which attribute to scan, and the
-                        *      other expression is used to calculate the value used in
-                        *      scanning the index.
+                        * Hence, we set have_runtime_keys to true and place the
+                        * appropriate subexpression in run_keys. The corresponding
+                        * scan key values are recomputed at run time.
                          *
-                        *      This means our index scan's scan key is a function of
-                        *      information obtained during the execution of the plan
-                        *      in which case we need to recalculate the index scan key
-                        *      at run time.
+                        * XXX Although this code *thinks* it can handle an indexqual
+                        * with the indexkey on either side, in fact it cannot.
+                        * Indexscans only work with quals that have the indexkey on
+                        * the left (the planner/optimizer makes sure it never passes
+                        * anything else).      The reason: the scankey machinery has no
+                        * provision for distinguishing which side of the operator is
+                        * the indexed attribute and which is the compared-to
+                        * constant. It just assumes that the attribute is on the left
+                        * :-(
                          *
-                        *      Hence, we set have_runtime_keys to true and then set
-                        *      the appropriate flag in run_keys to LEFT_OP or RIGHT_OP.
-                        *      The corresponding scan keys are recomputed at run time.
-                        * ----------------
+                        * I am leaving this code able to support both ways, even though
+                        * half of it is dead code, on the off chance that someone
+                        * will fix the scankey machinery someday --- tgl 8/11/99.
                          */
  
                         scanvar = NO_OP;
+                       run_keys[j] = NULL;
  
-                       /* ----------------
-                        *      determine information in leftop
-                        * ----------------
+                       /*
+                        * determine information in leftop
                          */
-                       leftop = (Node *) get_leftop(clause);
+                       leftop = (Expr *) get_leftop((Expr *) clause);
  
-                       if (IsA(leftop, Var) &&var_is_rel((Var *) leftop))
+                       if (leftop && IsA(leftop, RelabelType))
+                               leftop = ((RelabelType *) leftop)->arg;
+
+                       Assert(leftop != NULL);
+
+                       if (IsA(leftop, Var) &&
+                               var_is_rel((Var *) leftop))
                         {
-                               /* ----------------
-                                *      if the leftop is a "rel-var", then it means
-                                *      that it is a var node which tells us which
-                                *      attribute to use for our scan key.
-                                * ----------------
+                               /*
+                                * if the leftop is a "rel-var", then it means that it is
+                                * a var node which tells us which attribute to use for
+                                * our scan key.
                                  */
                                 varattno = ((Var *) leftop)->varattno;
                                 scanvar = LEFT_OP;
                         }
                         else if (IsA(leftop, Const))
                         {
-                               /* ----------------
-                                *      if the leftop is a const node then it means
-                                *      it identifies the value to place in our scan key.
-                                * ----------------
+                               /*
+                                * if the leftop is a const node then it means it
+                                * identifies the value to place in our scan key.
                                  */
-                               run_keys[j] = NO_OP;
                                 scanvalue = ((Const *) leftop)->constvalue;
-                       }
-                       else if (IsA(leftop, Param))
-                       {
-                               bool            isnull;
-
-                               /* ----------------
-                                *      if the leftop is a Param node then it means
-                                *      it identifies the value to place in our scan key.
-                                * ----------------
-                                */
-
-                               /* Life was so easy before ... subselects */
-                               if (((Param *) leftop)->paramkind == PARAM_EXEC)
-                               {
-                                       have_runtime_keys = true;
-                                       run_keys[j] = LEFT_OP;
-                                       execParam = lappendi(execParam, ((Param *) leftop)->paramid);
-                               }
-                               else
-                               {
-                                       scanvalue = ExecEvalParam((Param *) leftop,
-                                                                               scanstate->cstate.cs_ExprContext,
-                                                                                         &isnull);
-                                       if (isnull)
-                                               flags |= SK_ISNULL;
-
-                                       run_keys[j] = NO_OP;
-                               }
-                       }
-                       else if (leftop != NULL &&
-                                        is_funcclause(leftop) &&
-                                        var_is_rel(lfirst(((Expr *) leftop)->args)))
-                       {
-                               /* ----------------
-                                *      if the leftop is a func node then it means
-                                *      it identifies the value to place in our scan key.
-                                *      Since functional indices have only one attribute
-                                *      the attno must always be set to 1.
-                                * ----------------
-                                */
-                               varattno = 1;
-                               scanvar = LEFT_OP;
-
+                               if (((Const *) leftop)->constisnull)
+                                       flags |= SK_ISNULL;
                         }
                         else
                         {
-                               /* ----------------
-                                *      otherwise, the leftop contains information usable
-                                *      at runtime to figure out the value to place in our
-                                *      scan key.
-                                * ----------------
+                               /*
+                                * otherwise, the leftop contains an expression evaluable
+                                * at runtime to figure out the value to place in our scan
+                                * key.
                                  */
                                 have_runtime_keys = true;
-                               run_keys[j] = LEFT_OP;
-                               scanvalue = Int32GetDatum((int32) true);
+                               run_keys[j] = ExecInitExpr(leftop, (PlanState *) indexstate);
                         }
  
-                       /* ----------------
-                        *      now determine information in rightop
-                        * ----------------
+                       /*
+                        * now determine information in rightop
                          */
-                       rightop = (Node *) get_rightop(clause);
+                       rightop = (Expr *) get_rightop((Expr *) clause);
+
+                       if (rightop && IsA(rightop, RelabelType))
+                               rightop = ((RelabelType *) rightop)->arg;
  
-                       if (IsA(rightop, Var) &&var_is_rel((Var *) rightop))
+                       Assert(rightop != NULL);
+
+                       if (IsA(rightop, Var) &&
+                               var_is_rel((Var *) rightop))
                         {
-                               /* ----------------
-                                *      here we make sure only one op identifies the
-                                *      scan-attribute...
-                                * ----------------
+                               /*
+                                * here we make sure only one op identifies the
+                                * scan-attribute...
                                  */
                                 if (scanvar == LEFT_OP)
-                                       elog(ERROR, "ExecInitIndexScan: %s",
-                                                "both left and right op's are rel-vars");
-
-                               /* ----------------
-                                *      if the rightop is a "rel-var", then it means
-                                *      that it is a var node which tells us which
-                                *      attribute to use for our scan key.
-                                * ----------------
+                                       elog(ERROR, "both left and right operands are rel-vars");
+
+                               /*
+                                * if the rightop is a "rel-var", then it means that it is
+                                * a var node which tells us which attribute to use for
+                                * our scan key.
                                  */
                                 varattno = ((Var *) rightop)->varattno;
                                 scanvar = RIGHT_OP;
-
                         }
                         else if (IsA(rightop, Const))
                         {
-                               /* ----------------
-                                *      if the leftop is a const node then it means
-                                *      it identifies the value to place in our scan key.
-                                * ----------------
+                               /*
+                                * if the rightop is a const node then it means it
+                                * identifies the value to place in our scan key.
                                  */
-                               run_keys[j] = NO_OP;
                                 scanvalue = ((Const *) rightop)->constvalue;
-                       }
-                       else if (IsA(rightop, Param))
-                       {
-                               bool            isnull;
-
-                               /* ----------------
-                                *      if the rightop is a Param node then it means
-                                *      it identifies the value to place in our scan key.
-                                * ----------------
-                                */
-
-                               /* Life was so easy before ... subselects */
-                               if (((Param *) rightop)->paramkind == PARAM_EXEC)
-                               {
-                                       have_runtime_keys = true;
-                                       run_keys[j] = RIGHT_OP;
-                                       execParam = lappendi(execParam, ((Param *) rightop)->paramid);
-                               }
-                               else
-                               {
-                                       scanvalue = ExecEvalParam((Param *) rightop,
-                                                                               scanstate->cstate.cs_ExprContext,
-                                                                                         &isnull);
-                                       if (isnull)
-                                               flags |= SK_ISNULL;
-
-                                       run_keys[j] = NO_OP;
-                               }
-                       }
-                       else if (rightop != NULL &&
-                                        is_funcclause(rightop) &&
-                                        var_is_rel(lfirst(((Expr *) rightop)->args)))
-                       {
-                               /* ----------------
-                                *      if the rightop is a func node then it means
-                                *      it identifies the value to place in our scan key.
-                                *      Since functional indices have only one attribute
-                                *      the attno must always be set to 1.
-                                * ----------------
-                                */
-                               if (scanvar == LEFT_OP)
-                                       elog(ERROR, "ExecInitIndexScan: %s",
-                                                "both left and right ops are rel-vars");
-
-                               varattno = 1;
-                               scanvar = RIGHT_OP;
-
+                               if (((Const *) rightop)->constisnull)
+                                       flags |= SK_ISNULL;
                         }
                         else
                         {
-                               /* ----------------
-                                *      otherwise, the leftop contains information usable
-                                *      at runtime to figure out the value to place in our
-                                *      scan key.
-                                * ----------------
+                               /*
+                                * otherwise, the rightop contains an expression evaluable
+                                * at runtime to figure out the value to place in our scan
+                                * key.
                                  */
                                 have_runtime_keys = true;
-                               run_keys[j] = RIGHT_OP;
-                               scanvalue = Int32GetDatum((int32) true);
+                               run_keys[j] = ExecInitExpr(rightop, (PlanState *) indexstate);
                         }
  
-                       /* ----------------
-                        *      now check that at least one op tells us the scan
-                        *      attribute...
-                        * ----------------
+                       /*
+                        * now check that at least one op tells us the scan
+                        * attribute...
                          */
                         if (scanvar == NO_OP)
-                               elog(ERROR, "ExecInitIndexScan: %s",
-                                        "neither leftop nor rightop refer to scan relation");
+                               elog(ERROR, "neither left nor right operand refer to scan relation");
  
-                       /* ----------------
-                        *      initialize the scan key's fields appropriately
-                        * ----------------
+                       /*
+                        * initialize the scan key's fields appropriately
                          */
                         ScanKeyEntryInitialize(&scan_keys[j],
                                                                    flags,
                                                                    varattno,    /* attribute number to
                                                                                                  * scan */
-                                                                  (RegProcedure) opid, /* reg proc to use */
-                                                                  (Datum) scanvalue);  /* constant */
+                                                                  opfuncid,    /* reg proc to use */
+                                                                  scanvalue);  /* constant */
                 }
  
-               /* ----------------
-                *      store the key information into our array.
-                * ----------------
+               /*
+                * store the key information into our arrays.
                  */
                 numScanKeys[i] = n_keys;
                 scanKeys[i] = scan_keys;
-               runtimeKeyInfo[i] = (Pointer) run_keys;
+               runtimeKeyInfo[i] = run_keys;
         }
  
         indexstate->iss_NumIndices = numIndices;
+       if (ScanDirectionIsBackward(node->indxorderdir))
+               indexPtr = numIndices;
         indexstate->iss_IndexPtr = indexPtr;
         indexstate->iss_ScanKeys = scanKeys;
         indexstate->iss_NumScanKeys = numScanKeys;
  
-       /* ----------------
-        *      If all of our keys have the form (op var const) , then we have no
-        *      runtime keys so we store NULL in the runtime key info.
-        *      Otherwise runtime key info contains an array of pointers
-        *      (one for each index) to arrays of flags (one for each key)
-        *      which indicate that the qual needs to be evaluated at runtime.
-        *      -cim 10/24/89
-        * ----------------
+       /*
+        * If all of our keys have the form (op var const) , then we have no
+        * runtime keys so we store NULL in the runtime key info. Otherwise
+        * runtime key info contains an array of pointers (one for each index)
+        * to arrays of flags (one for each key) which indicate that the qual
+        * needs to be evaluated at runtime. -cim 10/24/89
+        *
+        * If we do have runtime keys, we need an ExprContext to evaluate them;
+        * the node's standard context won't do because we want to reset that
+        * context for every tuple.  So, build another context just like the
+        * other one... -tgl 7/11/00
          */
         if (have_runtime_keys)
-               indexstate->iss_RuntimeKeyInfo = (Pointer) runtimeKeyInfo;
+       {
+               ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
+
+               ExecAssignExprContext(estate, &indexstate->ss.ps);
+               indexstate->iss_RuntimeKeyInfo = runtimeKeyInfo;
+               indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
+               indexstate->ss.ps.ps_ExprContext = stdecontext;
+       }
         else
+       {
                 indexstate->iss_RuntimeKeyInfo = NULL;
+               indexstate->iss_RuntimeContext = NULL;
+               /* Get rid of the speculatively-allocated flag arrays, too */
+               for (i = 0; i < numIndices; i++)
+               {
+                       if (runtimeKeyInfo[i] != NULL)
+                               pfree(runtimeKeyInfo[i]);
+               }
+               pfree(runtimeKeyInfo);
+       }
  
-       /* ----------------
-        *      get the range table and direction information
-        *      from the execution state (these are needed to
-        *      open the relations).
-        * ----------------
-        */
-       rangeTable = estate->es_range_table;
-       direction = estate->es_direction;
-
-       /* ----------------
-        *      open the base relation
-        * ----------------
+       /*
+        * open the base relation and acquire AccessShareLock on it.
          */
         relid = node->scan.scanrelid;
-       rtentry = rt_fetch(relid, rangeTable);
+       rtentry = rt_fetch(relid, estate->es_range_table);
         reloid = rtentry->relid;
  
-       ExecOpenScanR(reloid,           /* relation */
-                                 0,                    /* nkeys */
-                                 (ScanKey) NULL,               /* scan key */
-                                 0,                    /* is index */
-                                 direction,    /* scan direction */
-                                 estate->es_snapshot,  /* */
-                                 &currentRelation,             /* return: rel desc */
-                                 (Pointer *) &currentScanDesc);                /* return: scan desc */
-
-       scanstate->css_currentRelation = currentRelation;
-       scanstate->css_currentScanDesc = currentScanDesc;
+       currentRelation = heap_open(reloid, AccessShareLock);
  
+       indexstate->ss.ss_currentRelation = currentRelation;
+       indexstate->ss.ss_currentScanDesc = NULL;       /* no heap scan here */
  
-       /* ----------------
-        *      get the scan type from the relation descriptor.
-        * ----------------
-        */
-       ExecAssignScanType(scanstate, RelationGetDescr(currentRelation));
-       ExecAssignResultTypeFromTL((Plan *) node, &scanstate->cstate);
-
-       /* ----------------
-        *      index scans don't have subtrees..
-        * ----------------
+       /*
+        * get the scan type from the relation descriptor.
          */
-/*       scanstate->ss_ProcOuterFlag = false; */
+       ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation), false);
  
-       /* ----------------
-        *      open the index relations and initialize
-        *      relation and scan descriptors.
-        * ----------------
+       /*
+        * open the index relations and initialize relation and scan
+        * descriptors.  Note we acquire no locks here; the index machinery
+        * does its own locks and unlocks.      (We rely on having AccessShareLock
+        * on the parent table to ensure the index won't go away!)
          */
+       listscan = indxid;
         for (i = 0; i < numIndices; i++)
         {
-               Oid                     indexOid;
-
-               indexOid = (Oid) nthi(i, indxid);
-
-               if (indexOid != 0)
-               {
-                       ExecOpenScanR(indexOid,         /* relation */
-                                                 numScanKeys[i],               /* nkeys */
-                                                 scanKeys[i],  /* scan key */
-                                                 true, /* is index */
-                                                 direction,    /* scan direction */
-                                                 estate->es_snapshot,
-                                                 &(relationDescs[i]),  /* return: rel desc */
-                                                 (Pointer *) &(scanDescs[i]));
-                       /* return: scan desc */
-               }
+               Oid                     indexOid = lfirsto(listscan);
+
+               indexDescs[i] = index_open(indexOid);
+               scanDescs[i] = index_beginscan(currentRelation,
+                                                                          indexDescs[i],
+                                                                          estate->es_snapshot,
+                                                                          numScanKeys[i],
+                                                                          scanKeys[i]);
+               listscan = lnext(listscan);
         }
  
-       indexstate->iss_RelationDescs = relationDescs;
+       indexstate->iss_RelationDescs = indexDescs;
         indexstate->iss_ScanDescs = scanDescs;
  
-       indexstate->cstate.cs_TupFromTlist = false;
+       /*
+        * Initialize result tuple type and projection info.
+        */
+       ExecAssignResultTypeFromTL(&indexstate->ss.ps);
+       ExecAssignScanProjectionInfo(&indexstate->ss);
  
         /*
-        * if there are some PARAM_EXEC in skankeys then force index rescan on
-        * first scan.
+        * Initialize hash table if needed.
          */
-       ((Plan *) node)->chgParam = execParam;
+       if (numIndices > 1)
+               create_duphash(indexstate);
+       else
+               indexstate->iss_DupHash = NULL;
  
-       /* ----------------
-        *      all done.
-        * ----------------
+       /*
+        * all done.
          */
-       return TRUE;
+       return indexstate;
+}
+
+static void
+create_duphash(IndexScanState *node)
+{
+       HASHCTL         hash_ctl;
+
+       MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+       hash_ctl.keysize = SizeOfIptrData;
+       hash_ctl.entrysize = sizeof(DupHashTabEntry);
+       hash_ctl.hash = tag_hash;
+       hash_ctl.hcxt = CurrentMemoryContext;
+       node->iss_DupHash = hash_create("DupHashTable",
+                                                                       (long) ceil(node->ss.ps.plan->plan_rows),
+                                                                       &hash_ctl,
+                                                                       HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+       if (node->iss_DupHash == NULL)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
+       node->iss_MaxHash = (SortMem * 1024L) /
+               (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(sizeof(DupHashTabEntry)));
  }
  
  int
  ExecCountSlotsIndexScan(IndexScan *node)
  {
         return ExecCountSlotsNode(outerPlan((Plan *) node)) +
-       ExecCountSlotsNode(innerPlan((Plan *) node)) + INDEXSCAN_NSLOTS;
+               ExecCountSlotsNode(innerPlan((Plan *) node)) + INDEXSCAN_NSLOTS;
  }