]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeIndexscan.c
Reimplement the linked list data structure used throughout the backend.
[postgresql] / src / backend / executor / nodeIndexscan.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeIndexscan.c
4  *        Routines to support indexes and indexed scans of relations
5  *
6  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/executor/nodeIndexscan.c,v 1.94 2004/05/26 04:41:16 neilc Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *              ExecIndexScan                   scans a relation using indices
18  *              ExecIndexNext                   using index to retrieve next tuple
19  *              ExecInitIndexScan               creates and initializes state info.
20  *              ExecIndexReScan                 rescans the indexed relation.
21  *              ExecEndIndexScan                releases all storage.
22  *              ExecIndexMarkPos                marks scan position.
23  *              ExecIndexRestrPos               restores scan position.
24  */
25 #include "postgres.h"
26
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "executor/execdebug.h"
30 #include "executor/nodeIndexscan.h"
31 #include "miscadmin.h"
32 #include "nodes/nodeFuncs.h"
33 #include "optimizer/clauses.h"
34 #include "parser/parsetree.h"
35
36
37 /*
38  * In a multiple-index plan, we must take care to return any given tuple
39  * only once, even if it matches conditions of several index scans.  Our
40  * preferred way to do this is to record already-returned tuples in a hash
41  * table (using the TID as unique identifier).  However, in a very large
42  * scan this could conceivably run out of memory.  We limit the hash table
43  * to no more than work_mem KB; if it grows past that, we fall back to the
44  * pre-7.4 technique: evaluate the prior-scan index quals again for each
45  * tuple (which is space-efficient, but slow).
46  *
47  * When scanning backwards, we use scannum to determine when to emit the
48  * tuple --- we have to re-emit a tuple in the same scan as it was first
49  * encountered.
50  *
51  * Note: this code would break if the planner were ever to create a multiple
52  * index plan with overall backwards direction, because the hashtable code
53  * will emit a tuple the first time it is encountered (which would be the
54  * highest scan in which it matches the index), but the evaluate-the-quals
55  * code will emit a tuple in the lowest-numbered scan in which it's valid.
56  * This could be fixed at need by making the evaluate-the-quals case more
57  * complex.  Currently the planner will never create such a plan (since it
58  * considers multi-index plans unordered anyway), so there's no need for
59  * more complexity.
60  */
61 typedef struct
62 {
63         /* tid is the hash key and so must be first! */
64         ItemPointerData tid;            /* TID of a tuple we've returned */
65         int                     scannum;                /* number of scan we returned it in */
66 } DupHashTabEntry;
67
68
69 static TupleTableSlot *IndexNext(IndexScanState *node);
70 static void create_duphash(IndexScanState *node);
71
72
73 /* ----------------------------------------------------------------
74  *              IndexNext
75  *
76  *              Retrieve a tuple from the IndexScan node's currentRelation
77  *              using the indices in the IndexScanState information.
78  *
79  *              note: the old code mentions 'Primary indices'.  to my knowledge
80  *              we only support a single secondary index. -cim 9/11/89
81  *
82  * old comments:
83  *              retrieve a tuple from relation using the indices given.
84  *              The indices are used in the order they appear in 'indices'.
85  *              The indices may be primary or secondary indices:
86  *                * primary index --    scan the relation 'relID' using keys supplied.
87  *                * secondary index --  scan the index relation to get the 'tid' for
88  *                                                              a tuple in the relation 'relID'.
89  *              If the current index(pointed by 'indexPtr') fails to return a
90  *              tuple, the next index in the indices is used.
91  *
92  *                bug fix so that it should retrieve on a null scan key.
93  * ----------------------------------------------------------------
94  */
95 static TupleTableSlot *
96 IndexNext(IndexScanState *node)
97 {
98         EState     *estate;
99         ExprContext *econtext;
100         ScanDirection direction;
101         IndexScanDescPtr scanDescs;
102         List      **lossyQuals;
103         IndexScanDesc scandesc;
104         List       *lossyQual;
105         Index           scanrelid;
106         HeapTuple       tuple;
107         TupleTableSlot *slot;
108         int                     numIndices;
109         bool            bBackward;
110         int                     indexNumber;
111
112         /*
113          * extract necessary information from index scan node
114          */
115         estate = node->ss.ps.state;
116         direction = estate->es_direction;
117         if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indxorderdir))
118         {
119                 if (ScanDirectionIsForward(direction))
120                         direction = BackwardScanDirection;
121                 else if (ScanDirectionIsBackward(direction))
122                         direction = ForwardScanDirection;
123         }
124         scanDescs = node->iss_ScanDescs;
125         lossyQuals = node->iss_LossyQuals;
126         numIndices = node->iss_NumIndices;
127         econtext = node->ss.ps.ps_ExprContext;
128         slot = node->ss.ss_ScanTupleSlot;
129         scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;
130
131         /*
132          * Clear any reference to the previously returned tuple.  The idea here
133          * is to not have the tuple slot be the last holder of a pin on that
134          * tuple's buffer; if it is, we'll need a separate visit to the bufmgr
135          * to release the buffer.  By clearing here, we get to have the release
136          * done by ReleaseAndReadBuffer inside index_getnext.
137          */
138         ExecClearTuple(slot);
139
140         /*
141          * Check if we are evaluating PlanQual for tuple of this relation.
142          * Additional checking is not good, but no other way for now. We could
143          * introduce new nodes for this case and handle IndexScan --> NewNode
144          * switching in Init/ReScan plan...
145          */
146         if (estate->es_evTuple != NULL &&
147                 estate->es_evTuple[scanrelid - 1] != NULL)
148         {
149                 ListCell   *qual;
150
151                 if (estate->es_evTupleNull[scanrelid - 1])
152                         return slot;            /* return empty slot */
153
154                 ExecStoreTuple(estate->es_evTuple[scanrelid - 1],
155                                            slot, InvalidBuffer, false);
156
157                 /* Does the tuple meet any of the OR'd indxqual conditions? */
158                 econtext->ecxt_scantuple = slot;
159
160                 ResetExprContext(econtext);
161
162                 foreach(qual, node->indxqualorig)
163                 {
164                         if (ExecQual((List *) lfirst(qual), econtext, false))
165                                 break;
166                 }
167                 if (qual == NULL)               /* would not be returned by indices */
168                         slot->val = NULL;
169
170                 /* Flag for the next call that no more tuples */
171                 estate->es_evTupleNull[scanrelid - 1] = true;
172
173                 return slot;
174         }
175
176         /*
177          * ok, now that we have what we need, fetch an index tuple. if
178          * scanning this index succeeded then return the appropriate heap
179          * tuple.. else return NULL.
180          */
181         bBackward = ScanDirectionIsBackward(direction);
182         if (bBackward)
183         {
184                 indexNumber = numIndices - node->iss_IndexPtr - 1;
185                 if (indexNumber < 0)
186                 {
187                         indexNumber = 0;
188                         node->iss_IndexPtr = numIndices - 1;
189                 }
190         }
191         else
192         {
193                 if ((indexNumber = node->iss_IndexPtr) < 0)
194                 {
195                         indexNumber = 0;
196                         node->iss_IndexPtr = 0;
197                 }
198         }
199         while (indexNumber < numIndices)
200         {
201                 scandesc = scanDescs[node->iss_IndexPtr];
202                 lossyQual = lossyQuals[node->iss_IndexPtr];
203
204                 while ((tuple = index_getnext(scandesc, direction)) != NULL)
205                 {
206                         /*
207                          * Store the scanned tuple in the scan tuple slot of the scan
208                          * state.  Note: we pass 'false' because tuples returned by
209                          * amgetnext are pointers onto disk pages and must not be
210                          * pfree()'d.
211                          */
212                         ExecStoreTuple(tuple,           /* tuple to store */
213                                                    slot,        /* slot to store in */
214                                                    scandesc->xs_cbuf,   /* buffer containing tuple */
215                                                    false);              /* don't pfree */
216
217                         /*
218                          * If any of the index operators involved in this scan are lossy,
219                          * recheck them by evaluating the original operator clauses.
220                          */
221                         if (lossyQual)
222                         {
223                                 econtext->ecxt_scantuple = slot;
224                                 ResetExprContext(econtext);
225                                 if (!ExecQual(lossyQual, econtext, false))
226                                 {
227                                         /* Fails lossy op, so drop it and loop back for another */
228                                         ExecClearTuple(slot);
229                                         continue;
230                                 }
231                         }
232
233                         /*
234                          * If it's a multiple-index scan, make sure not to double-report
235                          * a tuple matched by more than one index.  (See notes above.)
236                          */
237                         if (numIndices > 1)
238                         {
239                                 /* First try the hash table */
240                                 if (node->iss_DupHash)
241                                 {
242                                         DupHashTabEntry *entry;
243                                         bool    found;
244
245                                         entry = (DupHashTabEntry *)
246                                                 hash_search(node->iss_DupHash,
247                                                                         &tuple->t_data->t_ctid,
248                                                                         HASH_ENTER,
249                                                                         &found);
250                                         if (entry == NULL ||
251                                                 node->iss_DupHash->hctl->nentries > node->iss_MaxHash)
252                                         {
253                                                 /* out of memory (either hard or soft limit) */
254                                                 /* release hash table and fall thru to old code */
255                                                 hash_destroy(node->iss_DupHash);
256                                                 node->iss_DupHash = NULL;
257                                         }
258                                         else if (found)
259                                         {
260                                                 /* pre-existing entry */
261
262                                                 /*
263                                                  * It's duplicate if first emitted in a different
264                                                  * scan.  If same scan, we must be backing up, so
265                                                  * okay to emit again.
266                                                  */
267                                                 if (entry->scannum != node->iss_IndexPtr)
268                                                 {
269                                                         /* Dup, so drop it and loop back for another */
270                                                         ExecClearTuple(slot);
271                                                         continue;
272                                                 }
273                                         }
274                                         else
275                                         {
276                                                 /* new entry, finish filling it in */
277                                                 entry->scannum = node->iss_IndexPtr;
278                                         }
279                                 }
280                                 /* If hash table has overflowed, do it the hard way */
281                                 if (node->iss_DupHash == NULL &&
282                                         node->iss_IndexPtr > 0)
283                                 {
284                                         bool            prev_matches = false;
285                                         int                     prev_index;
286                                         ListCell   *qual;
287
288                                         econtext->ecxt_scantuple = slot;
289                                         ResetExprContext(econtext);
290                                         qual = list_head(node->indxqualorig);
291                                         for (prev_index = 0;
292                                                  prev_index < node->iss_IndexPtr;
293                                                  prev_index++)
294                                         {
295                                                 if (ExecQual((List *) lfirst(qual), econtext, false))
296                                                 {
297                                                         prev_matches = true;
298                                                         break;
299                                                 }
300                                                 qual = lnext(qual);
301                                         }
302                                         if (prev_matches)
303                                         {
304                                                 /* Dup, so drop it and loop back for another */
305                                                 ExecClearTuple(slot);
306                                                 continue;
307                                         }
308                                 }
309                         }
310
311                         return slot;            /* OK to return tuple */
312                 }
313
314                 if (indexNumber < numIndices)
315                 {
316                         indexNumber++;
317                         if (bBackward)
318                                 node->iss_IndexPtr--;
319                         else
320                                 node->iss_IndexPtr++;
321                 }
322         }
323
324         /*
325          * if we get here it means the index scan failed so we are at the end
326          * of the scan..
327          */
328         return ExecClearTuple(slot);
329 }
330
331 /* ----------------------------------------------------------------
332  *              ExecIndexScan(node)
333  *
334  * old comments:
335  *              Scans the relation using primary or secondary indices and returns
336  *                 the next qualifying tuple in the direction specified.
337  *              It calls ExecScan() and passes it the access methods which returns
338  *              the next tuple using the indices.
339  *
340  *              Conditions:
341  *                -- the "cursor" maintained by the AMI is positioned at the tuple
342  *                       returned previously.
343  *
344  *              Initial States:
345  *                -- the relation indicated is opened for scanning so that the
346  *                       "cursor" is positioned before the first qualifying tuple.
347  *                -- all index realtions are opened for scanning.
348  *                -- indexPtr points to the first index.
349  *                -- state variable ruleFlag = nil.
350  * ----------------------------------------------------------------
351  */
352 TupleTableSlot *
353 ExecIndexScan(IndexScanState *node)
354 {
355         /*
356          * If we have runtime keys and they've not already been set up, do it
357          * now.
358          */
359         if (node->iss_RuntimeKeyInfo && !node->iss_RuntimeKeysReady)
360                 ExecReScan((PlanState *) node, NULL);
361
362         /*
363          * use IndexNext as access method
364          */
365         return ExecScan(&node->ss, (ExecScanAccessMtd) IndexNext);
366 }
367
368 /* ----------------------------------------------------------------
369  *              ExecIndexReScan(node)
370  *
371  *              Recalculates the value of the scan keys whose value depends on
372  *              information known at runtime and rescans the indexed relation.
373  *              Updating the scan key was formerly done separately in
374  *              ExecUpdateIndexScanKeys. Integrating it into ReScan makes
375  *              rescans of indices and relations/general streams more uniform.
376  *
377  * ----------------------------------------------------------------
378  */
379 void
380 ExecIndexReScan(IndexScanState *node, ExprContext *exprCtxt)
381 {
382         EState     *estate;
383         ExprContext *econtext;
384         int                     numIndices;
385         IndexScanDescPtr scanDescs;
386         ScanKey    *scanKeys;
387         ExprState ***runtimeKeyInfo;
388         int                *numScanKeys;
389         Index           scanrelid;
390         int                     i;
391         int                     j;
392
393         estate = node->ss.ps.state;
394         econtext = node->iss_RuntimeContext;            /* context for runtime
395                                                                                                  * keys */
396         numIndices = node->iss_NumIndices;
397         scanDescs = node->iss_ScanDescs;
398         scanKeys = node->iss_ScanKeys;
399         runtimeKeyInfo = node->iss_RuntimeKeyInfo;
400         numScanKeys = node->iss_NumScanKeys;
401         scanrelid = ((IndexScan *) node->ss.ps.plan)->scan.scanrelid;
402
403         if (econtext)
404         {
405                 /*
406                  * If we are being passed an outer tuple, save it for runtime key
407                  * calc.  We also need to link it into the "regular" per-tuple
408                  * econtext, so it can be used during indexqualorig evaluations.
409                  */
410                 if (exprCtxt != NULL)
411                 {
412                         ExprContext *stdecontext;
413
414                         econtext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
415                         stdecontext = node->ss.ps.ps_ExprContext;
416                         stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
417                 }
418
419                 /*
420                  * Reset the runtime-key context so we don't leak memory as each
421                  * outer tuple is scanned.      Note this assumes that we will
422                  * recalculate *all* runtime keys on each call.
423                  */
424                 ResetExprContext(econtext);
425         }
426
427         /*
428          * If we are doing runtime key calculations (ie, the index keys depend
429          * on data from an outer scan), compute the new key values
430          */
431         if (runtimeKeyInfo)
432         {
433                 for (i = 0; i < numIndices; i++)
434                 {
435                         int                     n_keys;
436                         ScanKey         scan_keys;
437                         ExprState **run_keys;
438
439                         n_keys = numScanKeys[i];
440                         scan_keys = scanKeys[i];
441                         run_keys = runtimeKeyInfo[i];
442
443                         for (j = 0; j < n_keys; j++)
444                         {
445                                 /*
446                                  * If we have a run-time key, then extract the run-time
447                                  * expression and evaluate it with respect to the current
448                                  * outer tuple.  We then stick the result into the scan
449                                  * key.
450                                  *
451                                  * Note: the result of the eval could be a pass-by-ref value
452                                  * that's stored in the outer scan's tuple, not in
453                                  * econtext->ecxt_per_tuple_memory.  We assume that the
454                                  * outer tuple will stay put throughout our scan.  If this
455                                  * is wrong, we could copy the result into our context
456                                  * explicitly, but I think that's not necessary...
457                                  */
458                                 if (run_keys[j] != NULL)
459                                 {
460                                         Datum           scanvalue;
461                                         bool            isNull;
462
463                                         scanvalue = ExecEvalExprSwitchContext(run_keys[j],
464                                                                                                                   econtext,
465                                                                                                                   &isNull,
466                                                                                                                   NULL);
467                                         scan_keys[j].sk_argument = scanvalue;
468                                         if (isNull)
469                                                 scan_keys[j].sk_flags |= SK_ISNULL;
470                                         else
471                                                 scan_keys[j].sk_flags &= ~SK_ISNULL;
472                                 }
473                         }
474                 }
475
476                 node->iss_RuntimeKeysReady = true;
477         }
478
479         /* If this is re-scanning of PlanQual ... */
480         if (estate->es_evTuple != NULL &&
481                 estate->es_evTuple[scanrelid - 1] != NULL)
482         {
483                 estate->es_evTupleNull[scanrelid - 1] = false;
484                 return;
485         }
486
487         /* reset hash table */
488         if (numIndices > 1)
489         {
490                 if (node->iss_DupHash)
491                         hash_destroy(node->iss_DupHash);
492                 create_duphash(node);
493         }
494
495         /* reset index scans */
496         if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indxorderdir))
497                 node->iss_IndexPtr = numIndices;
498         else
499                 node->iss_IndexPtr = -1;
500
501         for (i = 0; i < numIndices; i++)
502         {
503                 IndexScanDesc scan = scanDescs[i];
504                 ScanKey         skey = scanKeys[i];
505
506                 index_rescan(scan, skey);
507         }
508 }
509
510 /* ----------------------------------------------------------------
511  *              ExecEndIndexScan
512  * ----------------------------------------------------------------
513  */
514 void
515 ExecEndIndexScan(IndexScanState *node)
516 {
517         int                     numIndices;
518         RelationPtr indexRelationDescs;
519         IndexScanDescPtr indexScanDescs;
520         Relation        relation;
521         int                     i;
522
523         /*
524          * extract information from the node
525          */
526         numIndices = node->iss_NumIndices;
527         indexRelationDescs = node->iss_RelationDescs;
528         indexScanDescs = node->iss_ScanDescs;
529         relation = node->ss.ss_currentRelation;
530
531         /*
532          * Free the exprcontext(s)
533          */
534         ExecFreeExprContext(&node->ss.ps);
535         if (node->iss_RuntimeContext)
536                 FreeExprContext(node->iss_RuntimeContext);
537
538         /*
539          * clear out tuple table slots
540          */
541         ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
542         ExecClearTuple(node->ss.ss_ScanTupleSlot);
543
544         /* drop hash table */
545         if (node->iss_DupHash)
546                 hash_destroy(node->iss_DupHash);
547
548         /*
549          * close the index relations
550          */
551         for (i = 0; i < numIndices; i++)
552         {
553                 if (indexScanDescs[i] != NULL)
554                         index_endscan(indexScanDescs[i]);
555
556                 if (indexRelationDescs[i] != NULL)
557                         index_close(indexRelationDescs[i]);
558         }
559
560         /*
561          * close the heap relation.
562          *
563          * Currently, we do not release the AccessShareLock acquired by
564          * ExecInitIndexScan.  This lock should be held till end of
565          * transaction. (There is a faction that considers this too much
566          * locking, however.)
567          */
568         heap_close(relation, NoLock);
569 }
570
571 /* ----------------------------------------------------------------
572  *              ExecIndexMarkPos
573  *
574  * old comments
575  *              Marks scan position by marking the current index.
576  *              Returns nothing.
577  * ----------------------------------------------------------------
578  */
579 void
580 ExecIndexMarkPos(IndexScanState *node)
581 {
582         IndexScanDescPtr indexScanDescs;
583         IndexScanDesc scanDesc;
584         int                     indexPtr;
585
586         indexPtr = node->iss_MarkIndexPtr = node->iss_IndexPtr;
587         if (indexPtr >= 0 && indexPtr < node->iss_NumIndices)
588         {
589                 indexScanDescs = node->iss_ScanDescs;
590                 scanDesc = indexScanDescs[indexPtr];
591
592                 index_markpos(scanDesc);
593         }
594 }
595
596 /* ----------------------------------------------------------------
597  *              ExecIndexRestrPos
598  *
599  * old comments
600  *              Restores scan position by restoring the current index.
601  *              Returns nothing.
602  * ----------------------------------------------------------------
603  */
604 void
605 ExecIndexRestrPos(IndexScanState *node)
606 {
607         IndexScanDescPtr indexScanDescs;
608         IndexScanDesc scanDesc;
609         int                     indexPtr;
610
611         indexPtr = node->iss_IndexPtr = node->iss_MarkIndexPtr;
612         if (indexPtr >= 0 && indexPtr < node->iss_NumIndices)
613         {
614                 indexScanDescs = node->iss_ScanDescs;
615                 scanDesc = indexScanDescs[indexPtr];
616
617                 index_restrpos(scanDesc);
618         }
619 }
620
621 /* ----------------------------------------------------------------
622  *              ExecInitIndexScan
623  *
624  *              Initializes the index scan's state information, creates
625  *              scan keys, and opens the base and index relations.
626  *
627  *              Note: index scans have 2 sets of state information because
628  *                        we have to keep track of the base relation and the
629  *                        index relations.
630  *
631  * old comments
632  *              Creates the run-time state information for the node and
633  *              sets the relation id to contain relevant descriptors.
634  *
635  *              Parameters:
636  *                node: IndexNode node produced by the planner.
637  *                estate: the execution state initialized in InitPlan.
638  * ----------------------------------------------------------------
639  */
640 IndexScanState *
641 ExecInitIndexScan(IndexScan *node, EState *estate)
642 {
643         IndexScanState *indexstate;
644         ListCell   *indxqual;
645         ListCell   *indxstrategy;
646         ListCell   *indxsubtype;
647         ListCell   *indxlossy;
648         ListCell   *indxid_item;
649         int                     i;
650         int                     numIndices;
651         int                     indexPtr;
652         ScanKey    *scanKeys;
653         int                *numScanKeys;
654         RelationPtr indexDescs;
655         IndexScanDescPtr scanDescs;
656         List      **lossyQuals;
657         ExprState ***runtimeKeyInfo;
658         bool            have_runtime_keys;
659         RangeTblEntry *rtentry;
660         Index           relid;
661         Oid                     reloid;
662         Relation        currentRelation;
663
664         /*
665          * create state structure
666          */
667         indexstate = makeNode(IndexScanState);
668         indexstate->ss.ps.plan = (Plan *) node;
669         indexstate->ss.ps.state = estate;
670
671         /*
672          * Miscellaneous initialization
673          *
674          * create expression context for node
675          */
676         ExecAssignExprContext(estate, &indexstate->ss.ps);
677
678         /*
679          * initialize child expressions
680          *
681          * Note: we don't initialize all of the indxqual expression, only the
682          * sub-parts corresponding to runtime keys (see below).  The indxqualorig
683          * expression is always initialized even though it will only be used in
684          * some uncommon cases --- would be nice to improve that.  (Problem is
685          * that any SubPlans present in the expression must be found now...)
686          */
687         indexstate->ss.ps.targetlist = (List *)
688                 ExecInitExpr((Expr *) node->scan.plan.targetlist,
689                                          (PlanState *) indexstate);
690         indexstate->ss.ps.qual = (List *)
691                 ExecInitExpr((Expr *) node->scan.plan.qual,
692                                          (PlanState *) indexstate);
693         indexstate->indxqualorig = (List *)
694                 ExecInitExpr((Expr *) node->indxqualorig,
695                                          (PlanState *) indexstate);
696
697 #define INDEXSCAN_NSLOTS 2
698
699         /*
700          * tuple table initialization
701          */
702         ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
703         ExecInitScanTupleSlot(estate, &indexstate->ss);
704
705         /*
706          * Initialize index-specific scan state
707          */
708         indexstate->iss_NumIndices = 0;
709         indexstate->iss_IndexPtr = -1;
710         indexstate->iss_ScanKeys = NULL;
711         indexstate->iss_NumScanKeys = NULL;
712         indexstate->iss_RuntimeKeyInfo = NULL;
713         indexstate->iss_RuntimeContext = NULL;
714         indexstate->iss_RuntimeKeysReady = false;
715         indexstate->iss_RelationDescs = NULL;
716         indexstate->iss_ScanDescs = NULL;
717         indexstate->iss_LossyQuals = NULL;
718
719         /*
720          * get the index node information
721          */
722         indxid_item = list_head(node->indxid);
723         numIndices = length(node->indxid);
724         indexPtr = -1;
725
726         CXT1_printf("ExecInitIndexScan: context is %d\n", CurrentMemoryContext);
727
728         /*
729          * scanKeys is used to keep track of the ScanKey's. This is needed
730          * because a single scan may use several indices and each index has
731          * its own ScanKey.
732          */
733         numScanKeys = (int *) palloc(numIndices * sizeof(int));
734         scanKeys = (ScanKey *) palloc(numIndices * sizeof(ScanKey));
735         indexDescs = (RelationPtr) palloc(numIndices * sizeof(Relation));
736         scanDescs = (IndexScanDescPtr) palloc(numIndices * sizeof(IndexScanDesc));
737         lossyQuals = (List **) palloc0(numIndices * sizeof(List *));
738
739         /*
740          * initialize space for runtime key info (may not be needed)
741          */
742         have_runtime_keys = false;
743         runtimeKeyInfo = (ExprState ***) palloc0(numIndices * sizeof(ExprState **));
744
745         /*
746          * build the index scan keys from the index qualification
747          */
748         indxqual = list_head(node->indxqual);
749         indxstrategy = list_head(node->indxstrategy);
750         indxsubtype = list_head(node->indxsubtype);
751         indxlossy = list_head(node->indxlossy);
752         for (i = 0; i < numIndices; i++)
753         {
754                 List       *quals;
755                 List       *strategies;
756                 List       *subtypes;
757                 List       *lossyflags;
758                 ListCell   *qual_cell;
759                 ListCell   *strategy_cell;
760                 ListCell   *subtype_cell;
761                 ListCell   *lossyflag_cell;
762                 int                     n_keys;
763                 ScanKey         scan_keys;
764                 ExprState **run_keys;
765                 int                     j;
766
767                 quals = (List *) lfirst(indxqual);
768                 indxqual = lnext(indxqual);
769                 strategies = (List *) lfirst(indxstrategy);
770                 indxstrategy = lnext(indxstrategy);
771                 subtypes = (List *) lfirst(indxsubtype);
772                 indxsubtype = lnext(indxsubtype);
773                 lossyflags = (List *) lfirst(indxlossy);
774                 indxlossy = lnext(indxlossy);
775                 n_keys = length(quals);
776                 scan_keys = (n_keys <= 0) ? NULL :
777                         (ScanKey) palloc(n_keys * sizeof(ScanKeyData));
778                 run_keys = (n_keys <= 0) ? NULL :
779                         (ExprState **) palloc(n_keys * sizeof(ExprState *));
780
781                 /*
782                  * for each opclause in the given qual, convert each qual's
783                  * opclause into a single scan key
784                  */
785                 qual_cell = list_head(quals);
786                 strategy_cell = list_head(strategies);
787                 subtype_cell = list_head(subtypes);
788                 lossyflag_cell = list_head(lossyflags);
789                 for (j = 0; j < n_keys; j++)
790                 {
791                         OpExpr     *clause;                     /* one clause of index qual */
792                         Expr       *leftop;                     /* expr on lhs of operator */
793                         Expr       *rightop;            /* expr on rhs ... */
794                         int                     flags = 0;
795                         AttrNumber      varattno;               /* att number used in scan */
796                         StrategyNumber strategy;        /* op's strategy number */
797                         Oid                     subtype;                /* op's strategy subtype */
798                         int                     lossy;                  /* op's recheck flag */
799                         RegProcedure opfuncid;          /* operator proc id used in scan */
800                         Datum           scanvalue;              /* value used in scan (if const) */
801
802                         /*
803                          * extract clause information from the qualification
804                          */
805                         clause = (OpExpr *) lfirst(qual_cell);
806                         qual_cell = lnext(qual_cell);
807                         strategy = lfirsti(strategy_cell);
808                         strategy_cell = lnext(strategy_cell);
809                         subtype = lfirsto(subtype_cell);
810                         subtype_cell = lnext(subtype_cell);
811                         lossy = lfirsti(lossyflag_cell);
812                         lossyflag_cell = lnext(lossyflag_cell);
813
814                         if (!IsA(clause, OpExpr))
815                                 elog(ERROR, "indxqual is not an OpExpr");
816
817                         opfuncid = clause->opfuncid;
818
819                         /*
820                          * Here we figure out the contents of the index qual. The
821                          * usual case is (var op const) which means we form a scan key
822                          * for the attribute listed in the var node and use the value of
823                          * the const as comparison data.
824                          *
825                          * If we don't have a const node, it means our scan key is a
826                          * function of information obtained during the execution of the
827                          * plan, in which case we need to recalculate the index scan key
828                          * at run time.  Hence, we set have_runtime_keys to true and place
829                          * the appropriate subexpression in run_keys. The corresponding
830                          * scan key values are recomputed at run time.
831                          */
832                         run_keys[j] = NULL;
833
834                         /*
835                          * determine information in leftop
836                          */
837                         leftop = (Expr *) get_leftop((Expr *) clause);
838
839                         if (leftop && IsA(leftop, RelabelType))
840                                 leftop = ((RelabelType *) leftop)->arg;
841
842                         Assert(leftop != NULL);
843
844                         if (!(IsA(leftop, Var) &&
845                                   var_is_rel((Var *) leftop)))
846                                 elog(ERROR, "indxqual doesn't have key on left side");
847
848                         varattno = ((Var *) leftop)->varattno;
849
850                         /*
851                          * now determine information in rightop
852                          */
853                         rightop = (Expr *) get_rightop((Expr *) clause);
854
855                         if (rightop && IsA(rightop, RelabelType))
856                                 rightop = ((RelabelType *) rightop)->arg;
857
858                         Assert(rightop != NULL);
859
860                         if (IsA(rightop, Const))
861                         {
862                                 /*
863                                  * if the rightop is a const node then it means it
864                                  * identifies the value to place in our scan key.
865                                  */
866                                 scanvalue = ((Const *) rightop)->constvalue;
867                                 if (((Const *) rightop)->constisnull)
868                                         flags |= SK_ISNULL;
869                         }
870                         else
871                         {
872                                 /*
873                                  * otherwise, the rightop contains an expression evaluable
874                                  * at runtime to figure out the value to place in our scan
875                                  * key.
876                                  */
877                                 have_runtime_keys = true;
878                                 run_keys[j] = ExecInitExpr(rightop, (PlanState *) indexstate);
879                                 scanvalue = (Datum) 0;
880                         }
881
882                         /*
883                          * initialize the scan key's fields appropriately
884                          */
885                         ScanKeyEntryInitialize(&scan_keys[j],
886                                                                    flags,
887                                                                    varattno,    /* attribute number to
888                                                                                                  * scan */
889                                                                    strategy,    /* op's strategy */
890                                                                    subtype,             /* strategy subtype */
891                                                                    opfuncid,    /* reg proc to use */
892                                                                    scanvalue);  /* constant */
893
894                         /*
895                          * If this operator is lossy, add its indxqualorig expression
896                          * to the list of quals to recheck.  The nth() calls here could
897                          * be avoided by chasing the lists in parallel to all the other
898                          * lists, but since lossy operators are very uncommon, it's
899                          * probably a waste of time to do so.
900                          */
901                         if (lossy)
902                         {
903                                 lossyQuals[i] = lappend(lossyQuals[i],
904                                                                                 nth(j,
905                                                                                         (List *) nth(i, indexstate->indxqualorig)));
906                         }
907                 }
908
909                 /*
910                  * store the key information into our arrays.
911                  */
912                 numScanKeys[i] = n_keys;
913                 scanKeys[i] = scan_keys;
914                 runtimeKeyInfo[i] = run_keys;
915         }
916
917         indexstate->iss_NumIndices = numIndices;
918         if (ScanDirectionIsBackward(node->indxorderdir))
919                 indexPtr = numIndices;
920         indexstate->iss_IndexPtr = indexPtr;
921         indexstate->iss_ScanKeys = scanKeys;
922         indexstate->iss_NumScanKeys = numScanKeys;
923
924         /*
925          * If all of our keys have the form (var op const), then we have no
926          * runtime keys so we store NULL in the runtime key info. Otherwise
927          * runtime key info contains an array of pointers (one for each index)
928          * to arrays of flags (one for each key) which indicate that the qual
929          * needs to be evaluated at runtime. -cim 10/24/89
930          *
931          * If we do have runtime keys, we need an ExprContext to evaluate them;
932          * the node's standard context won't do because we want to reset that
933          * context for every tuple.  So, build another context just like the
934          * other one... -tgl 7/11/00
935          */
936         if (have_runtime_keys)
937         {
938                 ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
939
940                 ExecAssignExprContext(estate, &indexstate->ss.ps);
941                 indexstate->iss_RuntimeKeyInfo = runtimeKeyInfo;
942                 indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
943                 indexstate->ss.ps.ps_ExprContext = stdecontext;
944         }
945         else
946         {
947                 indexstate->iss_RuntimeKeyInfo = NULL;
948                 indexstate->iss_RuntimeContext = NULL;
949                 /* Get rid of the speculatively-allocated flag arrays, too */
950                 for (i = 0; i < numIndices; i++)
951                 {
952                         if (runtimeKeyInfo[i] != NULL)
953                                 pfree(runtimeKeyInfo[i]);
954                 }
955                 pfree(runtimeKeyInfo);
956         }
957
958         /*
959          * open the base relation and acquire AccessShareLock on it.
960          */
961         relid = node->scan.scanrelid;
962         rtentry = rt_fetch(relid, estate->es_range_table);
963         reloid = rtentry->relid;
964
965         currentRelation = heap_open(reloid, AccessShareLock);
966
967         indexstate->ss.ss_currentRelation = currentRelation;
968         indexstate->ss.ss_currentScanDesc = NULL;       /* no heap scan here */
969
970         /*
971          * get the scan type from the relation descriptor.
972          */
973         ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation), false);
974
975         /*
976          * open the index relations and initialize relation and scan
977          * descriptors.  Note we acquire no locks here; the index machinery
978          * does its own locks and unlocks.      (We rely on having AccessShareLock
979          * on the parent table to ensure the index won't go away!)
980          */
981         for (i = 0; i < numIndices; i++)
982         {
983                 Oid                     indexOid = lfirsto(indxid_item);
984
985                 indexDescs[i] = index_open(indexOid);
986                 scanDescs[i] = index_beginscan(currentRelation,
987                                                                            indexDescs[i],
988                                                                            estate->es_snapshot,
989                                                                            numScanKeys[i],
990                                                                            scanKeys[i]);
991                 indxid_item = lnext(indxid_item);
992         }
993
994         indexstate->iss_RelationDescs = indexDescs;
995         indexstate->iss_ScanDescs = scanDescs;
996         indexstate->iss_LossyQuals = lossyQuals;
997
998         /*
999          * Initialize result tuple type and projection info.
1000          */
1001         ExecAssignResultTypeFromTL(&indexstate->ss.ps);
1002         ExecAssignScanProjectionInfo(&indexstate->ss);
1003
1004         /*
1005          * Initialize hash table if needed.
1006          */
1007         if (numIndices > 1)
1008                 create_duphash(indexstate);
1009         else
1010                 indexstate->iss_DupHash = NULL;
1011
1012         /*
1013          * all done.
1014          */
1015         return indexstate;
1016 }
1017
1018 static void
1019 create_duphash(IndexScanState *node)
1020 {
1021         HASHCTL         hash_ctl;
1022         long            nbuckets;
1023
1024         node->iss_MaxHash = (work_mem * 1024L) /
1025                 (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(sizeof(DupHashTabEntry)));
1026         MemSet(&hash_ctl, 0, sizeof(hash_ctl));
1027         hash_ctl.keysize = SizeOfIptrData;
1028         hash_ctl.entrysize = sizeof(DupHashTabEntry);
1029         hash_ctl.hash = tag_hash;
1030         hash_ctl.hcxt = CurrentMemoryContext;
1031         nbuckets = (long) ceil(node->ss.ps.plan->plan_rows);
1032         if (nbuckets < 1)
1033                 nbuckets = 1;
1034         if (nbuckets > node->iss_MaxHash)
1035                 nbuckets = node->iss_MaxHash;
1036         node->iss_DupHash = hash_create("DupHashTable",
1037                                                                         nbuckets,
1038                                                                         &hash_ctl,
1039                                                                         HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
1040         if (node->iss_DupHash == NULL)
1041                 ereport(ERROR,
1042                                 (errcode(ERRCODE_OUT_OF_MEMORY),
1043                                  errmsg("out of memory")));
1044 }
1045
1046 int
1047 ExecCountSlotsIndexScan(IndexScan *node)
1048 {
1049         return ExecCountSlotsNode(outerPlan((Plan *) node)) +
1050                 ExecCountSlotsNode(innerPlan((Plan *) node)) + INDEXSCAN_NSLOTS;
1051 }