]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeIndexscan.c
Tweak processing of multiple-index-scan plans to reduce overhead when
[postgresql] / src / backend / executor / nodeIndexscan.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeIndexscan.c
4  *        Routines to support indexes and indexed scans of relations
5  *
6  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.83 2003/08/22 20:26:43 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *              ExecIndexScan                   scans a relation using indices
18  *              ExecIndexNext                   using index to retrieve next tuple
19  *              ExecInitIndexScan               creates and initializes state info.
20  *              ExecIndexReScan                 rescans the indexed relation.
21  *              ExecEndIndexScan                releases all storage.
22  *              ExecIndexMarkPos                marks scan position.
23  *              ExecIndexRestrPos               restores scan position.
24  */
25 #include "postgres.h"
26
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "executor/execdebug.h"
30 #include "executor/nodeIndexscan.h"
31 #include "miscadmin.h"
32 #include "nodes/nodeFuncs.h"
33 #include "optimizer/clauses.h"
34 #include "parser/parsetree.h"
35
36
37 #define NO_OP                   0
38 #define LEFT_OP                 1
39 #define RIGHT_OP                2
40
41 /*
42  * In a multiple-index plan, we must take care to return any given tuple
43  * only once, even if it matches conditions of several index scans.  Our
44  * preferred way to do this is to record already-returned tuples in a hash
45  * table (using the TID as unique identifier).  However, in a very large
46  * scan this could conceivably run out of memory.  We limit the hash table
47  * to no more than SortMem KB; if it grows past that, we fall back to the
48  * pre-7.4 technique: evaluate the prior-scan index quals again for each
49  * tuple (which is space-efficient, but slow).
50  *
51  * When scanning backwards, we use scannum to determine when to emit the
52  * tuple --- we have to re-emit a tuple in the same scan as it was first
53  * encountered.
54  *
55  * Note: this code would break if the planner were ever to create a multiple
56  * index plan with overall backwards direction, because the hashtable code
57  * will emit a tuple the first time it is encountered (which would be the
58  * highest scan in which it matches the index), but the evaluate-the-quals
59  * code will emit a tuple in the lowest-numbered scan in which it's valid.
60  * This could be fixed at need by making the evaluate-the-quals case more
61  * complex.  Currently the planner will never create such a plan (since it
62  * considers multi-index plans unordered anyway), so there's no need for
63  * more complexity.
64  */
65 typedef struct
66 {
67         /* tid is the hash key and so must be first! */
68         ItemPointerData tid;            /* TID of a tuple we've returned */
69         int                     scannum;                /* number of scan we returned it in */
70 } DupHashTabEntry;
71
72
73 static TupleTableSlot *IndexNext(IndexScanState *node);
74 static void create_duphash(IndexScanState *node);
75
76
77 /* ----------------------------------------------------------------
78  *              IndexNext
79  *
80  *              Retrieve a tuple from the IndexScan node's currentRelation
81  *              using the indices in the IndexScanState information.
82  *
83  *              note: the old code mentions 'Primary indices'.  to my knowledge
84  *              we only support a single secondary index. -cim 9/11/89
85  *
86  * old comments:
87  *              retrieve a tuple from relation using the indices given.
88  *              The indices are used in the order they appear in 'indices'.
89  *              The indices may be primary or secondary indices:
90  *                * primary index --    scan the relation 'relID' using keys supplied.
91  *                * secondary index --  scan the index relation to get the 'tid' for
92  *                                                              a tuple in the relation 'relID'.
93  *              If the current index(pointed by 'indexPtr') fails to return a
94  *              tuple, the next index in the indices is used.
95  *
96  *                bug fix so that it should retrieve on a null scan key.
97  * ----------------------------------------------------------------
98  */
99 static TupleTableSlot *
100 IndexNext(IndexScanState *node)
101 {
102         EState     *estate;
103         ExprContext *econtext;
104         ScanDirection direction;
105         IndexScanDescPtr scanDescs;
106         IndexScanDesc scandesc;
107         Index           scanrelid;
108         HeapTuple       tuple;
109         TupleTableSlot *slot;
110         int                     numIndices;
111         bool            bBackward;
112         int                     indexNumber;
113
114         /*
115          * extract necessary information from index scan node
116          */
117         estate = node->ss.ps.state;
118         direction = estate->es_direction;
119         if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indxorderdir))
120         {
121                 if (ScanDirectionIsForward(direction))
122                         direction = BackwardScanDirection;
123                 else if (ScanDirectionIsBackward(direction))
124                         direction = ForwardScanDirection;
125         }
126         scanDescs = node->iss_ScanDescs;
127         numIndices = node->iss_NumIndices;
128         econtext = node->ss.ps.ps_ExprContext;
129         slot = node->ss.ss_ScanTupleSlot;
130         scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;
131
132         /*
133          * Check if we are evaluating PlanQual for tuple of this relation.
134          * Additional checking is not good, but no other way for now. We could
135          * introduce new nodes for this case and handle IndexScan --> NewNode
136          * switching in Init/ReScan plan...
137          */
138         if (estate->es_evTuple != NULL &&
139                 estate->es_evTuple[scanrelid - 1] != NULL)
140         {
141                 List       *qual;
142
143                 ExecClearTuple(slot);
144                 if (estate->es_evTupleNull[scanrelid - 1])
145                         return slot;            /* return empty slot */
146
147                 ExecStoreTuple(estate->es_evTuple[scanrelid - 1],
148                                            slot, InvalidBuffer, false);
149
150                 /* Does the tuple meet any of the OR'd indxqual conditions? */
151                 econtext->ecxt_scantuple = slot;
152
153                 ResetExprContext(econtext);
154
155                 foreach(qual, node->indxqualorig)
156                 {
157                         if (ExecQual((List *) lfirst(qual), econtext, false))
158                                 break;
159                 }
160                 if (qual == NIL)                /* would not be returned by indices */
161                         slot->val = NULL;
162
163                 /* Flag for the next call that no more tuples */
164                 estate->es_evTupleNull[scanrelid - 1] = true;
165
166                 return slot;
167         }
168
169         /*
170          * ok, now that we have what we need, fetch an index tuple. if
171          * scanning this index succeeded then return the appropriate heap
172          * tuple.. else return NULL.
173          */
174         bBackward = ScanDirectionIsBackward(direction);
175         if (bBackward)
176         {
177                 indexNumber = numIndices - node->iss_IndexPtr - 1;
178                 if (indexNumber < 0)
179                 {
180                         indexNumber = 0;
181                         node->iss_IndexPtr = numIndices - 1;
182                 }
183         }
184         else
185         {
186                 if ((indexNumber = node->iss_IndexPtr) < 0)
187                 {
188                         indexNumber = 0;
189                         node->iss_IndexPtr = 0;
190                 }
191         }
192         while (indexNumber < numIndices)
193         {
194                 scandesc = scanDescs[node->iss_IndexPtr];
195                 while ((tuple = index_getnext(scandesc, direction)) != NULL)
196                 {
197                         /*
198                          * Store the scanned tuple in the scan tuple slot of the scan
199                          * state.  Note: we pass 'false' because tuples returned by
200                          * amgetnext are pointers onto disk pages and must not be
201                          * pfree()'d.
202                          */
203                         ExecStoreTuple(tuple,           /* tuple to store */
204                                                    slot,        /* slot to store in */
205                                                    scandesc->xs_cbuf,   /* buffer containing tuple */
206                                                    false);              /* don't pfree */
207
208                         /*
209                          * If it's a multiple-index scan, make sure not to double-report
210                          * a tuple matched by more than one index.  (See notes above.)
211                          */
212                         if (numIndices > 1)
213                         {
214                                 /* First try the hash table */
215                                 if (node->iss_DupHash)
216                                 {
217                                         DupHashTabEntry *entry;
218                                         bool    found;
219
220                                         entry = (DupHashTabEntry *)
221                                                 hash_search(node->iss_DupHash,
222                                                                         &tuple->t_data->t_ctid,
223                                                                         HASH_ENTER,
224                                                                         &found);
225                                         if (entry == NULL ||
226                                                 node->iss_DupHash->hctl->nentries > node->iss_MaxHash)
227                                         {
228                                                 /* out of memory (either hard or soft limit) */
229                                                 /* release hash table and fall thru to old code */
230                                                 hash_destroy(node->iss_DupHash);
231                                                 node->iss_DupHash = NULL;
232                                         }
233                                         else if (found)
234                                         {
235                                                 /* pre-existing entry */
236
237                                                 /*
238                                                  * It's duplicate if first emitted in a different
239                                                  * scan.  If same scan, we must be backing up, so
240                                                  * okay to emit again.
241                                                  */
242                                                 if (entry->scannum != node->iss_IndexPtr)
243                                                 {
244                                                         /* Dup, so drop it and loop back for another */
245                                                         ExecClearTuple(slot);
246                                                         continue;
247                                                 }
248                                         }
249                                         else
250                                         {
251                                                 /* new entry, finish filling it in */
252                                                 entry->scannum = node->iss_IndexPtr;
253                                         }
254                                 }
255                                 /* If hash table has overflowed, do it the hard way */
256                                 if (node->iss_DupHash == NULL &&
257                                         node->iss_IndexPtr > 0)
258                                 {
259                                         bool            prev_matches = false;
260                                         int                     prev_index;
261                                         List       *qual;
262
263                                         econtext->ecxt_scantuple = slot;
264                                         ResetExprContext(econtext);
265                                         qual = node->indxqualorig;
266                                         for (prev_index = 0;
267                                                  prev_index < node->iss_IndexPtr;
268                                                  prev_index++)
269                                         {
270                                                 if (ExecQual((List *) lfirst(qual), econtext, false))
271                                                 {
272                                                         prev_matches = true;
273                                                         break;
274                                                 }
275                                                 qual = lnext(qual);
276                                         }
277                                         if (prev_matches)
278                                         {
279                                                 /* Dup, so drop it and loop back for another */
280                                                 ExecClearTuple(slot);
281                                                 continue;
282                                         }
283                                 }
284                         }
285
286                         return slot;            /* OK to return tuple */
287                 }
288
289                 if (indexNumber < numIndices)
290                 {
291                         indexNumber++;
292                         if (bBackward)
293                                 node->iss_IndexPtr--;
294                         else
295                                 node->iss_IndexPtr++;
296                 }
297         }
298
299         /*
300          * if we get here it means the index scan failed so we are at the end
301          * of the scan..
302          */
303         return ExecClearTuple(slot);
304 }
305
306 /* ----------------------------------------------------------------
307  *              ExecIndexScan(node)
308  *
309  * old comments:
310  *              Scans the relation using primary or secondary indices and returns
311  *                 the next qualifying tuple in the direction specified.
312  *              It calls ExecScan() and passes it the access methods which returns
313  *              the next tuple using the indices.
314  *
315  *              Conditions:
316  *                -- the "cursor" maintained by the AMI is positioned at the tuple
317  *                       returned previously.
318  *
319  *              Initial States:
320  *                -- the relation indicated is opened for scanning so that the
321  *                       "cursor" is positioned before the first qualifying tuple.
322  *                -- all index realtions are opened for scanning.
323  *                -- indexPtr points to the first index.
324  *                -- state variable ruleFlag = nil.
325  * ----------------------------------------------------------------
326  */
327 TupleTableSlot *
328 ExecIndexScan(IndexScanState *node)
329 {
330         /*
331          * If we have runtime keys and they've not already been set up, do it
332          * now.
333          */
334         if (node->iss_RuntimeKeyInfo && !node->iss_RuntimeKeysReady)
335                 ExecReScan((PlanState *) node, NULL);
336
337         /*
338          * use IndexNext as access method
339          */
340         return ExecScan(&node->ss, (ExecScanAccessMtd) IndexNext);
341 }
342
343 /* ----------------------------------------------------------------
344  *              ExecIndexReScan(node)
345  *
346  *              Recalculates the value of the scan keys whose value depends on
347  *              information known at runtime and rescans the indexed relation.
348  *              Updating the scan key was formerly done separately in
349  *              ExecUpdateIndexScanKeys. Integrating it into ReScan makes
350  *              rescans of indices and relations/general streams more uniform.
351  *
352  * ----------------------------------------------------------------
353  */
354 void
355 ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt)
356 {
357         EState     *estate;
358         ExprContext *econtext;
359         int                     numIndices;
360         IndexScanDescPtr scanDescs;
361         ScanKey    *scanKeys;
362         ExprState ***runtimeKeyInfo;
363         int                *numScanKeys;
364         Index           scanrelid;
365         int                     i;
366         int                     j;
367
368         estate = node->ss.ps.state;
369         econtext = node->iss_RuntimeContext;            /* context for runtime
370                                                                                                  * keys */
371         numIndices = node->iss_NumIndices;
372         scanDescs = node->iss_ScanDescs;
373         scanKeys = node->iss_ScanKeys;
374         runtimeKeyInfo = node->iss_RuntimeKeyInfo;
375         numScanKeys = node->iss_NumScanKeys;
376         scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;
377
378         if (econtext)
379         {
380                 /*
381                  * If we are being passed an outer tuple, save it for runtime key
382                  * calc.  We also need to link it into the "regular" per-tuple
383                  * econtext, so it can be used during indexqualorig evaluations.
384                  */
385                 if (exprCtxt != NULL)
386                 {
387                         ExprContext *stdecontext;
388
389                         econtext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
390                         stdecontext = node->ss.ps.ps_ExprContext;
391                         stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
392                 }
393
394                 /*
395                  * Reset the runtime-key context so we don't leak memory as each
396                  * outer tuple is scanned.      Note this assumes that we will
397                  * recalculate *all* runtime keys on each call.
398                  */
399                 ResetExprContext(econtext);
400         }
401
402         /*
403          * If we are doing runtime key calculations (ie, the index keys depend
404          * on data from an outer scan), compute the new key values
405          */
406         if (runtimeKeyInfo)
407         {
408                 for (i = 0; i < numIndices; i++)
409                 {
410                         int                     n_keys;
411                         ScanKey         scan_keys;
412                         ExprState **run_keys;
413
414                         n_keys = numScanKeys[i];
415                         scan_keys = scanKeys[i];
416                         run_keys = runtimeKeyInfo[i];
417
418                         for (j = 0; j < n_keys; j++)
419                         {
420                                 /*
421                                  * If we have a run-time key, then extract the run-time
422                                  * expression and evaluate it with respect to the current
423                                  * outer tuple.  We then stick the result into the scan
424                                  * key.
425                                  *
426                                  * Note: the result of the eval could be a pass-by-ref value
427                                  * that's stored in the outer scan's tuple, not in
428                                  * econtext->ecxt_per_tuple_memory.  We assume that the
429                                  * outer tuple will stay put throughout our scan.  If this
430                                  * is wrong, we could copy the result into our context
431                                  * explicitly, but I think that's not necessary...
432                                  */
433                                 if (run_keys[j] != NULL)
434                                 {
435                                         Datum           scanvalue;
436                                         bool            isNull;
437
438                                         scanvalue = ExecEvalExprSwitchContext(run_keys[j],
439                                                                                                                   econtext,
440                                                                                                                   &isNull,
441                                                                                                                   NULL);
442                                         scan_keys[j].sk_argument = scanvalue;
443                                         if (isNull)
444                                                 scan_keys[j].sk_flags |= SK_ISNULL;
445                                         else
446                                                 scan_keys[j].sk_flags &= ~SK_ISNULL;
447                                 }
448                         }
449                 }
450
451                 node->iss_RuntimeKeysReady = true;
452         }
453
454         /* If this is re-scanning of PlanQual ... */
455         if (estate->es_evTuple != NULL &&
456                 estate->es_evTuple[scanrelid - 1] != NULL)
457         {
458                 estate->es_evTupleNull[scanrelid - 1] = false;
459                 return;
460         }
461
462         /* reset hash table */
463         if (numIndices > 1)
464         {
465                 if (node->iss_DupHash)
466                         hash_destroy(node->iss_DupHash);
467                 create_duphash(node);
468         }
469
470         /* reset index scans */
471         if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indxorderdir))
472                 node->iss_IndexPtr = numIndices;
473         else
474                 node->iss_IndexPtr = -1;
475
476         for (i = 0; i < numIndices; i++)
477         {
478                 IndexScanDesc scan = scanDescs[i];
479                 ScanKey         skey = scanKeys[i];
480
481                 index_rescan(scan, skey);
482         }
483 }
484
485 /* ----------------------------------------------------------------
486  *              ExecEndIndexScan
487  * ----------------------------------------------------------------
488  */
489 void
490 ExecEndIndexScan(IndexScanState *node)
491 {
492         int                     numIndices;
493         RelationPtr indexRelationDescs;
494         IndexScanDescPtr indexScanDescs;
495         Relation        relation;
496         int                     i;
497
498         /*
499          * extract information from the node
500          */
501         numIndices = node->iss_NumIndices;
502         indexRelationDescs = node->iss_RelationDescs;
503         indexScanDescs = node->iss_ScanDescs;
504         relation = node->ss.ss_currentRelation;
505
506         /*
507          * Free the exprcontext(s)
508          */
509         ExecFreeExprContext(&node->ss.ps);
510         if (node->iss_RuntimeContext)
511                 FreeExprContext(node->iss_RuntimeContext);
512
513         /*
514          * clear out tuple table slots
515          */
516         ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
517         ExecClearTuple(node->ss.ss_ScanTupleSlot);
518
519         /* drop hash table */
520         if (node->iss_DupHash)
521                 hash_destroy(node->iss_DupHash);
522
523         /*
524          * close the index relations
525          */
526         for (i = 0; i < numIndices; i++)
527         {
528                 if (indexScanDescs[i] != NULL)
529                         index_endscan(indexScanDescs[i]);
530
531                 if (indexRelationDescs[i] != NULL)
532                         index_close(indexRelationDescs[i]);
533         }
534
535         /*
536          * close the heap relation.
537          *
538          * Currently, we do not release the AccessShareLock acquired by
539          * ExecInitIndexScan.  This lock should be held till end of
540          * transaction. (There is a faction that considers this too much
541          * locking, however.)
542          */
543         heap_close(relation, NoLock);
544 }
545
546 /* ----------------------------------------------------------------
547  *              ExecIndexMarkPos
548  *
549  * old comments
550  *              Marks scan position by marking the current index.
551  *              Returns nothing.
552  * ----------------------------------------------------------------
553  */
554 void
555 ExecIndexMarkPos(IndexScanState *node)
556 {
557         IndexScanDescPtr indexScanDescs;
558         IndexScanDesc scanDesc;
559         int                     indexPtr;
560
561         indexPtr = node->iss_MarkIndexPtr = node->iss_IndexPtr;
562         if (indexPtr >= 0 && indexPtr < node->iss_NumIndices)
563         {
564                 indexScanDescs = node->iss_ScanDescs;
565                 scanDesc = indexScanDescs[indexPtr];
566
567                 index_markpos(scanDesc);
568         }
569 }
570
571 /* ----------------------------------------------------------------
572  *              ExecIndexRestrPos
573  *
574  * old comments
575  *              Restores scan position by restoring the current index.
576  *              Returns nothing.
577  * ----------------------------------------------------------------
578  */
579 void
580 ExecIndexRestrPos(IndexScanState *node)
581 {
582         IndexScanDescPtr indexScanDescs;
583         IndexScanDesc scanDesc;
584         int                     indexPtr;
585
586         indexPtr = node->iss_IndexPtr = node->iss_MarkIndexPtr;
587         if (indexPtr >= 0 && indexPtr < node->iss_NumIndices)
588         {
589                 indexScanDescs = node->iss_ScanDescs;
590                 scanDesc = indexScanDescs[indexPtr];
591
592                 index_restrpos(scanDesc);
593         }
594 }
595
596 /* ----------------------------------------------------------------
597  *              ExecInitIndexScan
598  *
599  *              Initializes the index scan's state information, creates
600  *              scan keys, and opens the base and index relations.
601  *
602  *              Note: index scans have 2 sets of state information because
603  *                        we have to keep track of the base relation and the
604  *                        index relations.
605  *
606  * old comments
607  *              Creates the run-time state information for the node and
608  *              sets the relation id to contain relevant descriptors.
609  *
610  *              Parameters:
611  *                node: IndexNode node produced by the planner.
612  *                estate: the execution state initialized in InitPlan.
613  * ----------------------------------------------------------------
614  */
615 IndexScanState *
616 ExecInitIndexScan(IndexScan *node, EState *estate)
617 {
618         IndexScanState *indexstate;
619         List       *indxqual;
620         List       *indxid;
621         List       *listscan;
622         int                     i;
623         int                     numIndices;
624         int                     indexPtr;
625         ScanKey    *scanKeys;
626         int                *numScanKeys;
627         RelationPtr indexDescs;
628         IndexScanDescPtr scanDescs;
629         ExprState ***runtimeKeyInfo;
630         bool            have_runtime_keys;
631         RangeTblEntry *rtentry;
632         Index           relid;
633         Oid                     reloid;
634         Relation        currentRelation;
635
636         /*
637          * create state structure
638          */
639         indexstate = makeNode(IndexScanState);
640         indexstate->ss.ps.plan = (Plan *) node;
641         indexstate->ss.ps.state = estate;
642
643         /*
644          * Miscellaneous initialization
645          *
646          * create expression context for node
647          */
648         ExecAssignExprContext(estate, &indexstate->ss.ps);
649
650         /*
651          * initialize child expressions
652          */
653         indexstate->ss.ps.targetlist = (List *)
654                 ExecInitExpr((Expr *) node->scan.plan.targetlist,
655                                          (PlanState *) indexstate);
656         indexstate->ss.ps.qual = (List *)
657                 ExecInitExpr((Expr *) node->scan.plan.qual,
658                                          (PlanState *) indexstate);
659         indexstate->indxqual = (List *)
660                 ExecInitExpr((Expr *) node->indxqual,
661                                          (PlanState *) indexstate);
662         indexstate->indxqualorig = (List *)
663                 ExecInitExpr((Expr *) node->indxqualorig,
664                                          (PlanState *) indexstate);
665
666 #define INDEXSCAN_NSLOTS 2
667
668         /*
669          * tuple table initialization
670          */
671         ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
672         ExecInitScanTupleSlot(estate, &indexstate->ss);
673
674         /*
675          * Initialize index-specific scan state
676          */
677         indexstate->iss_NumIndices = 0;
678         indexstate->iss_IndexPtr = -1;
679         indexstate->iss_ScanKeys = NULL;
680         indexstate->iss_NumScanKeys = NULL;
681         indexstate->iss_RuntimeKeyInfo = NULL;
682         indexstate->iss_RuntimeContext = NULL;
683         indexstate->iss_RuntimeKeysReady = false;
684         indexstate->iss_RelationDescs = NULL;
685         indexstate->iss_ScanDescs = NULL;
686
687         /*
688          * get the index node information
689          */
690         indxid = node->indxid;
691         numIndices = length(indxid);
692         indexPtr = -1;
693
694         CXT1_printf("ExecInitIndexScan: context is %d\n", CurrentMemoryContext);
695
696         /*
697          * scanKeys is used to keep track of the ScanKey's. This is needed
698          * because a single scan may use several indices and each index has
699          * its own ScanKey.
700          */
701         numScanKeys = (int *) palloc(numIndices * sizeof(int));
702         scanKeys = (ScanKey *) palloc(numIndices * sizeof(ScanKey));
703         indexDescs = (RelationPtr) palloc(numIndices * sizeof(Relation));
704         scanDescs = (IndexScanDescPtr) palloc(numIndices * sizeof(IndexScanDesc));
705
706         /*
707          * initialize space for runtime key info (may not be needed)
708          */
709         have_runtime_keys = false;
710         runtimeKeyInfo = (ExprState ***) palloc0(numIndices * sizeof(ExprState **));
711
712         /*
713          * build the index scan keys from the index qualification
714          */
715         indxqual = node->indxqual;
716         for (i = 0; i < numIndices; i++)
717         {
718                 int                     j;
719                 List       *qual;
720                 int                     n_keys;
721                 ScanKey         scan_keys;
722                 ExprState **run_keys;
723
724                 qual = lfirst(indxqual);
725                 indxqual = lnext(indxqual);
726                 n_keys = length(qual);
727                 scan_keys = (n_keys <= 0) ? (ScanKey) NULL :
728                         (ScanKey) palloc(n_keys * sizeof(ScanKeyData));
729                 run_keys = (n_keys <= 0) ? (ExprState **) NULL :
730                         (ExprState **) palloc(n_keys * sizeof(ExprState *));
731
732                 CXT1_printf("ExecInitIndexScan: context is %d\n", CurrentMemoryContext);
733
734                 /*
735                  * for each opclause in the given qual, convert each qual's
736                  * opclause into a single scan key
737                  */
738                 listscan = qual;
739                 for (j = 0; j < n_keys; j++)
740                 {
741                         OpExpr     *clause; /* one clause of index qual */
742                         Expr       *leftop; /* expr on lhs of operator */
743                         Expr       *rightop;    /* expr on rhs ... */
744                         bits16          flags = 0;
745
746                         int                     scanvar;        /* which var identifies varattno */
747                         AttrNumber      varattno = 0;   /* att number used in scan */
748                         Oid                     opfuncid;               /* operator id used in scan */
749                         Datum           scanvalue = 0;  /* value used in scan (if const) */
750
751                         /*
752                          * extract clause information from the qualification
753                          */
754                         clause = (OpExpr *) lfirst(listscan);
755                         listscan = lnext(listscan);
756
757                         if (!IsA(clause, OpExpr))
758                                 elog(ERROR, "indxqual is not an OpExpr");
759
760                         opfuncid = clause->opfuncid;
761
762                         /*
763                          * Here we figure out the contents of the index qual. The
764                          * usual case is (var op const) or (const op var) which means
765                          * we form a scan key for the attribute listed in the var node
766                          * and use the value of the const.
767                          *
768                          * If we don't have a const node, then it means that one of the
769                          * var nodes refers to the "scan" tuple and is used to
770                          * determine which attribute to scan, and the other expression
771                          * is used to calculate the value used in scanning the index.
772                          *
773                          * This means our index scan's scan key is a function of
774                          * information obtained during the execution of the plan in
775                          * which case we need to recalculate the index scan key at run
776                          * time.
777                          *
778                          * Hence, we set have_runtime_keys to true and place the
779                          * appropriate subexpression in run_keys. The corresponding
780                          * scan key values are recomputed at run time.
781                          *
782                          * XXX Although this code *thinks* it can handle an indexqual
783                          * with the indexkey on either side, in fact it cannot.
784                          * Indexscans only work with quals that have the indexkey on
785                          * the left (the planner/optimizer makes sure it never passes
786                          * anything else).      The reason: the scankey machinery has no
787                          * provision for distinguishing which side of the operator is
788                          * the indexed attribute and which is the compared-to
789                          * constant. It just assumes that the attribute is on the left
790                          * :-(
791                          *
792                          * I am leaving this code able to support both ways, even though
793                          * half of it is dead code, on the off chance that someone
794                          * will fix the scankey machinery someday --- tgl 8/11/99.
795                          */
796
797                         scanvar = NO_OP;
798                         run_keys[j] = NULL;
799
800                         /*
801                          * determine information in leftop
802                          */
803                         leftop = (Expr *) get_leftop((Expr *) clause);
804
805                         if (leftop && IsA(leftop, RelabelType))
806                                 leftop = ((RelabelType *) leftop)->arg;
807
808                         Assert(leftop != NULL);
809
810                         if (IsA(leftop, Var) &&
811                                 var_is_rel((Var *) leftop))
812                         {
813                                 /*
814                                  * if the leftop is a "rel-var", then it means that it is
815                                  * a var node which tells us which attribute to use for
816                                  * our scan key.
817                                  */
818                                 varattno = ((Var *) leftop)->varattno;
819                                 scanvar = LEFT_OP;
820                         }
821                         else if (IsA(leftop, Const))
822                         {
823                                 /*
824                                  * if the leftop is a const node then it means it
825                                  * identifies the value to place in our scan key.
826                                  */
827                                 scanvalue = ((Const *) leftop)->constvalue;
828                                 if (((Const *) leftop)->constisnull)
829                                         flags |= SK_ISNULL;
830                         }
831                         else
832                         {
833                                 /*
834                                  * otherwise, the leftop contains an expression evaluable
835                                  * at runtime to figure out the value to place in our scan
836                                  * key.
837                                  */
838                                 have_runtime_keys = true;
839                                 run_keys[j] = ExecInitExpr(leftop, (PlanState *) indexstate);
840                         }
841
842                         /*
843                          * now determine information in rightop
844                          */
845                         rightop = (Expr *) get_rightop((Expr *) clause);
846
847                         if (rightop && IsA(rightop, RelabelType))
848                                 rightop = ((RelabelType *) rightop)->arg;
849
850                         Assert(rightop != NULL);
851
852                         if (IsA(rightop, Var) &&
853                                 var_is_rel((Var *) rightop))
854                         {
855                                 /*
856                                  * here we make sure only one op identifies the
857                                  * scan-attribute...
858                                  */
859                                 if (scanvar == LEFT_OP)
860                                         elog(ERROR, "both left and right operands are rel-vars");
861
862                                 /*
863                                  * if the rightop is a "rel-var", then it means that it is
864                                  * a var node which tells us which attribute to use for
865                                  * our scan key.
866                                  */
867                                 varattno = ((Var *) rightop)->varattno;
868                                 scanvar = RIGHT_OP;
869                         }
870                         else if (IsA(rightop, Const))
871                         {
872                                 /*
873                                  * if the rightop is a const node then it means it
874                                  * identifies the value to place in our scan key.
875                                  */
876                                 scanvalue = ((Const *) rightop)->constvalue;
877                                 if (((Const *) rightop)->constisnull)
878                                         flags |= SK_ISNULL;
879                         }
880                         else
881                         {
882                                 /*
883                                  * otherwise, the rightop contains an expression evaluable
884                                  * at runtime to figure out the value to place in our scan
885                                  * key.
886                                  */
887                                 have_runtime_keys = true;
888                                 run_keys[j] = ExecInitExpr(rightop, (PlanState *) indexstate);
889                         }
890
891                         /*
892                          * now check that at least one op tells us the scan
893                          * attribute...
894                          */
895                         if (scanvar == NO_OP)
896                                 elog(ERROR, "neither left nor right operand refer to scan relation");
897
898                         /*
899                          * initialize the scan key's fields appropriately
900                          */
901                         ScanKeyEntryInitialize(&scan_keys[j],
902                                                                    flags,
903                                                                    varattno,    /* attribute number to
904                                                                                                  * scan */
905                                                                    opfuncid,    /* reg proc to use */
906                                                                    scanvalue);  /* constant */
907                 }
908
909                 /*
910                  * store the key information into our arrays.
911                  */
912                 numScanKeys[i] = n_keys;
913                 scanKeys[i] = scan_keys;
914                 runtimeKeyInfo[i] = run_keys;
915         }
916
917         indexstate->iss_NumIndices = numIndices;
918         if (ScanDirectionIsBackward(node->indxorderdir))
919                 indexPtr = numIndices;
920         indexstate->iss_IndexPtr = indexPtr;
921         indexstate->iss_ScanKeys = scanKeys;
922         indexstate->iss_NumScanKeys = numScanKeys;
923
924         /*
925          * If all of our keys have the form (op var const) , then we have no
926          * runtime keys so we store NULL in the runtime key info. Otherwise
927          * runtime key info contains an array of pointers (one for each index)
928          * to arrays of flags (one for each key) which indicate that the qual
929          * needs to be evaluated at runtime. -cim 10/24/89
930          *
931          * If we do have runtime keys, we need an ExprContext to evaluate them;
932          * the node's standard context won't do because we want to reset that
933          * context for every tuple.  So, build another context just like the
934          * other one... -tgl 7/11/00
935          */
936         if (have_runtime_keys)
937         {
938                 ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
939
940                 ExecAssignExprContext(estate, &indexstate->ss.ps);
941                 indexstate->iss_RuntimeKeyInfo = runtimeKeyInfo;
942                 indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
943                 indexstate->ss.ps.ps_ExprContext = stdecontext;
944         }
945         else
946         {
947                 indexstate->iss_RuntimeKeyInfo = NULL;
948                 indexstate->iss_RuntimeContext = NULL;
949                 /* Get rid of the speculatively-allocated flag arrays, too */
950                 for (i = 0; i < numIndices; i++)
951                 {
952                         if (runtimeKeyInfo[i] != NULL)
953                                 pfree(runtimeKeyInfo[i]);
954                 }
955                 pfree(runtimeKeyInfo);
956         }
957
958         /*
959          * open the base relation and acquire AccessShareLock on it.
960          */
961         relid = node->scan.scanrelid;
962         rtentry = rt_fetch(relid, estate->es_range_table);
963         reloid = rtentry->relid;
964
965         currentRelation = heap_open(reloid, AccessShareLock);
966
967         if (!RelationGetForm(currentRelation)->relhasindex)
968                 ereport(ERROR,
969                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
970                                  errmsg("indexes of relation %u were deactivated",
971                                                 reloid)));
972
973         indexstate->ss.ss_currentRelation = currentRelation;
974         indexstate->ss.ss_currentScanDesc = NULL;       /* no heap scan here */
975
976         /*
977          * get the scan type from the relation descriptor.
978          */
979         ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation), false);
980
981         /*
982          * open the index relations and initialize relation and scan
983          * descriptors.  Note we acquire no locks here; the index machinery
984          * does its own locks and unlocks.      (We rely on having AccessShareLock
985          * on the parent table to ensure the index won't go away!)
986          */
987         listscan = indxid;
988         for (i = 0; i < numIndices; i++)
989         {
990                 Oid                     indexOid = lfirsto(listscan);
991
992                 indexDescs[i] = index_open(indexOid);
993                 scanDescs[i] = index_beginscan(currentRelation,
994                                                                            indexDescs[i],
995                                                                            estate->es_snapshot,
996                                                                            numScanKeys[i],
997                                                                            scanKeys[i]);
998                 listscan = lnext(listscan);
999         }
1000
1001         indexstate->iss_RelationDescs = indexDescs;
1002         indexstate->iss_ScanDescs = scanDescs;
1003
1004         /*
1005          * Initialize result tuple type and projection info.
1006          */
1007         ExecAssignResultTypeFromTL(&indexstate->ss.ps);
1008         ExecAssignScanProjectionInfo(&indexstate->ss);
1009
1010         /*
1011          * Initialize hash table if needed.
1012          */
1013         if (numIndices > 1)
1014                 create_duphash(indexstate);
1015         else
1016                 indexstate->iss_DupHash = NULL;
1017
1018         /*
1019          * all done.
1020          */
1021         return indexstate;
1022 }
1023
1024 static void
1025 create_duphash(IndexScanState *node)
1026 {
1027         HASHCTL         hash_ctl;
1028
1029         MemSet(&hash_ctl, 0, sizeof(hash_ctl));
1030         hash_ctl.keysize = SizeOfIptrData;
1031         hash_ctl.entrysize = sizeof(DupHashTabEntry);
1032         hash_ctl.hash = tag_hash;
1033         hash_ctl.hcxt = CurrentMemoryContext;
1034         node->iss_DupHash = hash_create("DupHashTable",
1035                                                                         (long) ceil(node->ss.ps.plan->plan_rows),
1036                                                                         &hash_ctl,
1037                                                                         HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
1038         if (node->iss_DupHash == NULL)
1039                 ereport(ERROR,
1040                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1041                                  errmsg("out of memory")));
1042         node->iss_MaxHash = (SortMem * 1024L) /
1043                 (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(sizeof(DupHashTabEntry)));
1044 }
1045
1046 int
1047 ExecCountSlotsIndexScan(IndexScan *node)
1048 {
1049         return ExecCountSlotsNode(outerPlan((Plan *) node)) +
1050                 ExecCountSlotsNode(innerPlan((Plan *) node)) + INDEXSCAN_NSLOTS;
1051 }