]> granicus.if.org Git - postgresql/commitdiff
Tweak processing of multiple-index-scan plans to reduce overhead when
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 22 Aug 2003 20:26:43 +0000 (20:26 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 22 Aug 2003 20:26:43 +0000 (20:26 +0000)
handling many-way scans: instead of re-evaluating all prior indexscan
quals to see if a tuple has been fetched more than once, use a hash table
indexed by tuple CTID.  But fall back to the old way if the hash table
grows to exceed SortMem.

src/backend/executor/nodeIndexscan.c
src/include/nodes/execnodes.h

index bfbcaf208de208786af37f7591f3da41aee85e63..f93802269da7ee3c09cdb3034e735210fc5fe1fe 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.82 2003/08/04 02:39:59 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.83 2003/08/22 20:26:43 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "access/heapam.h"
 #include "executor/execdebug.h"
 #include "executor/nodeIndexscan.h"
+#include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "parser/parsetree.h"
 
-/* ----------------
- *             Misc stuff to move to executor.h soon -cim 6/5/90
- * ----------------
- */
+
 #define NO_OP                  0
 #define LEFT_OP                        1
 #define RIGHT_OP               2
 
+/*
+ * In a multiple-index plan, we must take care to return any given tuple
+ * only once, even if it matches conditions of several index scans.  Our
+ * preferred way to do this is to record already-returned tuples in a hash
+ * table (using the TID as unique identifier).  However, in a very large
+ * scan this could conceivably run out of memory.  We limit the hash table
+ * to no more than SortMem KB; if it grows past that, we fall back to the
+ * pre-7.4 technique: evaluate the prior-scan index quals again for each
+ * tuple (which is space-efficient, but slow).
+ *
+ * When scanning backwards, we use scannum to determine when to emit the
+ * tuple --- we have to re-emit a tuple in the same scan as it was first
+ * encountered.
+ *
+ * Note: this code would break if the planner were ever to create a multiple
+ * index plan with overall backwards direction, because the hashtable code
+ * will emit a tuple the first time it is encountered (which would be the
+ * highest scan in which it matches the index), but the evaluate-the-quals
+ * code will emit a tuple in the lowest-numbered scan in which it's valid.
+ * This could be fixed at need by making the evaluate-the-quals case more
+ * complex.  Currently the planner will never create such a plan (since it
+ * considers multi-index plans unordered anyway), so there's no need for
+ * more complexity.
+ */
+typedef struct
+{
+       /* tid is the hash key and so must be first! */
+       ItemPointerData tid;            /* TID of a tuple we've returned */
+       int                     scannum;                /* number of scan we returned it in */
+} DupHashTabEntry;
+
+
 static TupleTableSlot *IndexNext(IndexScanState *node);
+static void create_duphash(IndexScanState *node);
+
 
 /* ----------------------------------------------------------------
  *             IndexNext
@@ -163,7 +195,7 @@ IndexNext(IndexScanState *node)
                while ((tuple = index_getnext(scandesc, direction)) != NULL)
                {
                        /*
-                        * store the scanned tuple in the scan tuple slot of the scan
+                        * Store the scanned tuple in the scan tuple slot of the scan
                         * state.  Note: we pass 'false' because tuples returned by
                         * amgetnext are pointers onto disk pages and must not be
                         * pfree()'d.
@@ -174,36 +206,80 @@ IndexNext(IndexScanState *node)
                                                   false);              /* don't pfree */
 
                        /*
-                        * We must check to see if the current tuple was already
-                        * matched by an earlier index, so we don't double-report it.
-                        * We do this by passing the tuple through ExecQual and
-                        * checking for failure with all previous qualifications.
+                        * If it's a multiple-index scan, make sure not to double-report
+                        * a tuple matched by more than one index.  (See notes above.)
                         */
-                       if (node->iss_IndexPtr > 0)
+                       if (numIndices > 1)
                        {
-                               bool            prev_matches = false;
-                               int                     prev_index;
-                               List       *qual;
-
-                               econtext->ecxt_scantuple = slot;
-                               ResetExprContext(econtext);
-                               qual = node->indxqualorig;
-                               for (prev_index = 0;
-                                        prev_index < node->iss_IndexPtr;
-                                        prev_index++)
+                               /* First try the hash table */
+                               if (node->iss_DupHash)
                                {
-                                       if (ExecQual((List *) lfirst(qual), econtext, false))
+                                       DupHashTabEntry *entry;
+                                       bool    found;
+
+                                       entry = (DupHashTabEntry *)
+                                               hash_search(node->iss_DupHash,
+                                                                       &tuple->t_data->t_ctid,
+                                                                       HASH_ENTER,
+                                                                       &found);
+                                       if (entry == NULL ||
+                                               node->iss_DupHash->hctl->nentries > node->iss_MaxHash)
+                                       {
+                                               /* out of memory (either hard or soft limit) */
+                                               /* release hash table and fall thru to old code */
+                                               hash_destroy(node->iss_DupHash);
+                                               node->iss_DupHash = NULL;
+                                       }
+                                       else if (found)
                                        {
-                                               prev_matches = true;
-                                               break;
+                                               /* pre-existing entry */
+
+                                               /*
+                                                * It's duplicate if first emitted in a different
+                                                * scan.  If same scan, we must be backing up, so
+                                                * okay to emit again.
+                                                */
+                                               if (entry->scannum != node->iss_IndexPtr)
+                                               {
+                                                       /* Dup, so drop it and loop back for another */
+                                                       ExecClearTuple(slot);
+                                                       continue;
+                                               }
+                                       }
+                                       else
+                                       {
+                                               /* new entry, finish filling it in */
+                                               entry->scannum = node->iss_IndexPtr;
                                        }
-                                       qual = lnext(qual);
                                }
-                               if (prev_matches)
+                               /* If hash table has overflowed, do it the hard way */
+                               if (node->iss_DupHash == NULL &&
+                                       node->iss_IndexPtr > 0)
                                {
-                                       /* Duplicate, so drop it and loop back for another */
-                                       ExecClearTuple(slot);
-                                       continue;
+                                       bool            prev_matches = false;
+                                       int                     prev_index;
+                                       List       *qual;
+
+                                       econtext->ecxt_scantuple = slot;
+                                       ResetExprContext(econtext);
+                                       qual = node->indxqualorig;
+                                       for (prev_index = 0;
+                                                prev_index < node->iss_IndexPtr;
+                                                prev_index++)
+                                       {
+                                               if (ExecQual((List *) lfirst(qual), econtext, false))
+                                               {
+                                                       prev_matches = true;
+                                                       break;
+                                               }
+                                               qual = lnext(qual);
+                                       }
+                                       if (prev_matches)
+                                       {
+                                               /* Dup, so drop it and loop back for another */
+                                               ExecClearTuple(slot);
+                                               continue;
+                                       }
                                }
                        }
 
@@ -383,6 +459,14 @@ ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt)
                return;
        }
 
+       /* reset hash table */
+       if (numIndices > 1)
+       {
+               if (node->iss_DupHash)
+                       hash_destroy(node->iss_DupHash);
+               create_duphash(node);
+       }
+
        /* reset index scans */
        if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indxorderdir))
                node->iss_IndexPtr = numIndices;
@@ -432,6 +516,10 @@ ExecEndIndexScan(IndexScanState *node)
        ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
        ExecClearTuple(node->ss.ss_ScanTupleSlot);
 
+       /* drop hash table */
+       if (node->iss_DupHash)
+               hash_destroy(node->iss_DupHash);
+
        /*
         * close the index relations
         */
@@ -507,7 +595,7 @@ ExecIndexRestrPos(IndexScanState *node)
 
 /* ----------------------------------------------------------------
  *             ExecInitIndexScan
 *
+ *
  *             Initializes the index scan's state information, creates
  *             scan keys, and opens the base and index relations.
  *
@@ -919,12 +1007,42 @@ ExecInitIndexScan(IndexScan *node, EState *estate)
        ExecAssignResultTypeFromTL(&indexstate->ss.ps);
        ExecAssignScanProjectionInfo(&indexstate->ss);
 
+       /*
+        * Initialize hash table if needed.
+        */
+       if (numIndices > 1)
+               create_duphash(indexstate);
+       else
+               indexstate->iss_DupHash = NULL;
+
        /*
         * all done.
         */
        return indexstate;
 }
 
+static void
+create_duphash(IndexScanState *node)
+{
+       HASHCTL         hash_ctl;
+
+       MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+       hash_ctl.keysize = SizeOfIptrData;
+       hash_ctl.entrysize = sizeof(DupHashTabEntry);
+       hash_ctl.hash = tag_hash;
+       hash_ctl.hcxt = CurrentMemoryContext;
+       node->iss_DupHash = hash_create("DupHashTable",
+                                                                       (long) ceil(node->ss.ps.plan->plan_rows),
+                                                                       &hash_ctl,
+                                                                       HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+       if (node->iss_DupHash == NULL)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
+       node->iss_MaxHash = (SortMem * 1024L) /
+               (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(sizeof(DupHashTabEntry)));
+}
+
 int
 ExecCountSlotsIndexScan(IndexScan *node)
 {
index 8d180009bfd97cb3f2ce34a123fee8de312f9934..97609f583857be569ef7dc6865e255e37bd09ce2 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: execnodes.h,v 1.104 2003/08/19 01:13:41 tgl Exp $
+ * $Id: execnodes.h,v 1.105 2003/08/22 20:26:43 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -768,6 +768,8 @@ typedef ScanState SeqScanState;
  *             RuntimeKeysReady   true if runtime Skeys have been computed
  *             RelationDescs      ptr to array of relation descriptors
  *             ScanDescs                  ptr to array of scan descriptors
+ *             DupHash                    hashtable for recognizing dups in multiple scan
+ *             MaxHash                    max # entries we will allow in hashtable
  * ----------------
  */
 typedef struct IndexScanState
@@ -785,6 +787,8 @@ typedef struct IndexScanState
        bool            iss_RuntimeKeysReady;
        RelationPtr iss_RelationDescs;
        IndexScanDescPtr iss_ScanDescs;
+       HTAB       *iss_DupHash;
+       long            iss_MaxHash;
 } IndexScanState;
 
 /* ----------------