]> granicus.if.org Git - postgresql/blob - src/backend/access/gin/ginget.c
2a3991f1d12725edb57c06acb422a35c3f162905
[postgresql] / src / backend / access / gin / ginget.c
1 /*-------------------------------------------------------------------------
2  *
3  * ginget.c
4  *        fetch tuples from a GIN scan.
5  *
6  *
7  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *                      src/backend/access/gin/ginget.c
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16
17 #include "access/gin_private.h"
18 #include "access/relscan.h"
19 #include "miscadmin.h"
20 #include "utils/datum.h"
21 #include "utils/memutils.h"
22
23
24 typedef struct pendingPosition
25 {
26         Buffer          pendingBuffer;
27         OffsetNumber firstOffset;
28         OffsetNumber lastOffset;
29         ItemPointerData item;
30         bool       *hasMatchKey;
31 } pendingPosition;
32
33
34 /*
35  * Convenience function for invoking a key's consistentFn
36  */
37 static bool
38 callConsistentFn(GinState *ginstate, GinScanKey key)
39 {
40         /*
41          * If we're dealing with a dummy EVERYTHING key, we don't want to call the
42          * consistentFn; just claim it matches.
43          */
44         if (key->searchMode == GIN_SEARCH_MODE_EVERYTHING)
45         {
46                 key->recheckCurItem = false;
47                 return true;
48         }
49
50         /*
51          * Initialize recheckCurItem in case the consistentFn doesn't know it
52          * should set it.  The safe assumption in that case is to force recheck.
53          */
54         key->recheckCurItem = true;
55
56         return DatumGetBool(FunctionCall8Coll(&ginstate->consistentFn[key->attnum - 1],
57                                                                  ginstate->supportCollation[key->attnum - 1],
58                                                                                   PointerGetDatum(key->entryRes),
59                                                                                   UInt16GetDatum(key->strategy),
60                                                                                   key->query,
61                                                                                   UInt32GetDatum(key->nuserentries),
62                                                                                   PointerGetDatum(key->extra_data),
63                                                                            PointerGetDatum(&key->recheckCurItem),
64                                                                                   PointerGetDatum(key->queryValues),
65                                                                          PointerGetDatum(key->queryCategories)));
66 }
67
68 /*
69  * Tries to refind previously taken ItemPointer on a posting page.
70  */
71 static bool
72 findItemInPostingPage(Page page, ItemPointer item, OffsetNumber *off)
73 {
74         OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
75         int                     res;
76
77         if (GinPageGetOpaque(page)->flags & GIN_DELETED)
78                 /* page was deleted by concurrent vacuum */
79                 return false;
80
81         /*
82          * scan page to find equal or first greater value
83          */
84         for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
85         {
86                 res = ginCompareItemPointers(item, GinDataPageGetItemPointer(page, *off));
87
88                 if (res <= 0)
89                         return true;
90         }
91
92         return false;
93 }
94
95 /*
96  * Goes to the next page if current offset is outside of bounds
97  */
98 static bool
99 moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack)
100 {
101         Page            page = BufferGetPage(stack->buffer);
102
103         if (stack->off > PageGetMaxOffsetNumber(page))
104         {
105                 /*
106                  * We scanned the whole page, so we should take right page
107                  */
108                 if (GinPageRightMost(page))
109                         return false;           /* no more pages */
110
111                 stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
112                 stack->blkno = BufferGetBlockNumber(stack->buffer);
113                 stack->off = FirstOffsetNumber;
114         }
115
116         return true;
117 }
118
119 /*
120  * Scan all pages of a posting tree and save all its heap ItemPointers
121  * in scanEntry->matchBitmap
122  */
123 static void
124 scanPostingTree(Relation index, GinScanEntry scanEntry,
125                                 BlockNumber rootPostingTree)
126 {
127         GinBtreeStack *stack;
128         Buffer          buffer;
129         Page            page;
130
131         /* Descend to the leftmost leaf page */
132         stack = ginScanBeginPostingTree(index, rootPostingTree);
133         buffer = stack->buffer;
134         IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
135
136         freeGinBtreeStack(stack);
137
138         /*
139          * Loop iterates through all leaf pages of posting tree
140          */
141         for (;;)
142         {
143                 page = BufferGetPage(buffer);
144
145                 if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 &&
146                         GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
147                 {
148                         tbm_add_tuples(scanEntry->matchBitmap,
149                                                    GinDataPageGetItemPointer(page, FirstOffsetNumber),
150                                                    GinPageGetOpaque(page)->maxoff, false);
151                         scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff;
152                 }
153
154                 if (GinPageRightMost(page))
155                         break;                          /* no more pages */
156
157                 buffer = ginStepRight(buffer, index, GIN_SHARE);
158         }
159
160         UnlockReleaseBuffer(buffer);
161 }
162
163 /*
164  * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
165  * match the search entry.      This supports three different match modes:
166  *
167  * 1. Partial-match support: scan from current point until the
168  *        comparePartialFn says we're done.
169  * 2. SEARCH_MODE_ALL: scan from current point (which should be first
170  *        key for the current attnum) until we hit null items or end of attnum
171  * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
172  *        key for the current attnum) until we hit end of attnum
173  *
174  * Returns true if done, false if it's necessary to restart scan from scratch
175  */
176 static bool
177 collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
178                                    GinScanEntry scanEntry)
179 {
180         OffsetNumber attnum;
181         Form_pg_attribute attr;
182
183         /* Initialize empty bitmap result */
184         scanEntry->matchBitmap = tbm_create(work_mem * 1024L);
185
186         /* Null query cannot partial-match anything */
187         if (scanEntry->isPartialMatch &&
188                 scanEntry->queryCategory != GIN_CAT_NORM_KEY)
189                 return true;
190
191         /* Locate tupdesc entry for key column (for attbyval/attlen data) */
192         attnum = scanEntry->attnum;
193         attr = btree->ginstate->origTupdesc->attrs[attnum - 1];
194
195         for (;;)
196         {
197                 Page            page;
198                 IndexTuple      itup;
199                 Datum           idatum;
200                 GinNullCategory icategory;
201
202                 /*
203                  * stack->off points to the interested entry, buffer is already locked
204                  */
205                 if (moveRightIfItNeeded(btree, stack) == false)
206                         return true;
207
208                 page = BufferGetPage(stack->buffer);
209                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
210
211                 /*
212                  * If tuple stores another attribute then stop scan
213                  */
214                 if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
215                         return true;
216
217                 /* Safe to fetch attribute value */
218                 idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
219
220                 /*
221                  * Check for appropriate scan stop conditions
222                  */
223                 if (scanEntry->isPartialMatch)
224                 {
225                         int32           cmp;
226
227                         /*
228                          * In partial match, stop scan at any null (including
229                          * placeholders); partial matches never match nulls
230                          */
231                         if (icategory != GIN_CAT_NORM_KEY)
232                                 return true;
233
234                         /*----------
235                          * Check of partial match.
236                          * case cmp == 0 => match
237                          * case cmp > 0 => not match and finish scan
238                          * case cmp < 0 => not match and continue scan
239                          *----------
240                          */
241                         cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
242                                                            btree->ginstate->supportCollation[attnum - 1],
243                                                                                                   scanEntry->queryKey,
244                                                                                                   idatum,
245                                                                                  UInt16GetDatum(scanEntry->strategy),
246                                                                         PointerGetDatum(scanEntry->extra_data)));
247
248                         if (cmp > 0)
249                                 return true;
250                         else if (cmp < 0)
251                         {
252                                 stack->off++;
253                                 continue;
254                         }
255                 }
256                 else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
257                 {
258                         /*
259                          * In ALL mode, we are not interested in null items, so we can
260                          * stop if we get to a null-item placeholder (which will be the
261                          * last entry for a given attnum).      We do want to include NULL_KEY
262                          * and EMPTY_ITEM entries, though.
263                          */
264                         if (icategory == GIN_CAT_NULL_ITEM)
265                                 return true;
266                 }
267
268                 /*
269                  * OK, we want to return the TIDs listed in this entry.
270                  */
271                 if (GinIsPostingTree(itup))
272                 {
273                         BlockNumber rootPostingTree = GinGetPostingTree(itup);
274
275                         /*
276                          * We should unlock current page (but not unpin) during tree scan
277                          * to prevent deadlock with vacuum processes.
278                          *
279                          * We save current entry value (idatum) to be able to re-find our
280                          * tuple after re-locking
281                          */
282                         if (icategory == GIN_CAT_NORM_KEY)
283                                 idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
284
285                         LockBuffer(stack->buffer, GIN_UNLOCK);
286
287                         /* Collect all the TIDs in this entry's posting tree */
288                         scanPostingTree(btree->index, scanEntry, rootPostingTree);
289
290                         /*
291                          * We lock again the entry page and while it was unlocked insert
292                          * might have occurred, so we need to re-find our position.
293                          */
294                         LockBuffer(stack->buffer, GIN_SHARE);
295                         page = BufferGetPage(stack->buffer);
296                         if (!GinPageIsLeaf(page))
297                         {
298                                 /*
299                                  * Root page becomes non-leaf while we unlock it. We will
300                                  * start again, this situation doesn't occur often - root can
301                                  * became a non-leaf only once per life of index.
302                                  */
303                                 return false;
304                         }
305
306                         /* Search forward to re-find idatum */
307                         for (;;)
308                         {
309                                 Datum           newDatum;
310                                 GinNullCategory newCategory;
311
312                                 if (moveRightIfItNeeded(btree, stack) == false)
313                                         elog(ERROR, "lost saved point in index");       /* must not happen !!! */
314
315                                 page = BufferGetPage(stack->buffer);
316                                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
317
318                                 if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
319                                         elog(ERROR, "lost saved point in index");       /* must not happen !!! */
320                                 newDatum = gintuple_get_key(btree->ginstate, itup,
321                                                                                         &newCategory);
322
323                                 if (ginCompareEntries(btree->ginstate, attnum,
324                                                                           newDatum, newCategory,
325                                                                           idatum, icategory) == 0)
326                                         break;          /* Found! */
327
328                                 stack->off++;
329                         }
330
331                         if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
332                                 pfree(DatumGetPointer(idatum));
333                 }
334                 else
335                 {
336                         tbm_add_tuples(scanEntry->matchBitmap,
337                                                    GinGetPosting(itup), GinGetNPosting(itup), false);
338                         scanEntry->predictNumberResult += GinGetNPosting(itup);
339                 }
340
341                 /*
342                  * Done with this entry, go to the next
343                  */
344                 stack->off++;
345         }
346 }
347
348 /*
349  * Start* functions setup beginning state of searches: finds correct buffer and pins it.
350  */
351 static void
352 startScanEntry(GinState *ginstate, GinScanEntry entry)
353 {
354         GinBtreeData btreeEntry;
355         GinBtreeStack *stackEntry;
356         Page            page;
357         bool            needUnlock;
358
359 restartScanEntry:
360         entry->buffer = InvalidBuffer;
361         ItemPointerSetMin(&entry->curItem);
362         entry->offset = InvalidOffsetNumber;
363         entry->list = NULL;
364         entry->nlist = 0;
365         entry->matchBitmap = NULL;
366         entry->matchResult = NULL;
367         entry->reduceResult = FALSE;
368         entry->predictNumberResult = 0;
369
370         /*
371          * we should find entry, and begin scan of posting tree or just store
372          * posting list in memory
373          */
374         ginPrepareEntryScan(&btreeEntry, entry->attnum,
375                                                 entry->queryKey, entry->queryCategory,
376                                                 ginstate);
377         stackEntry = ginFindLeafPage(&btreeEntry, GIN_ROOT_BLKNO, true);
378         page = BufferGetPage(stackEntry->buffer);
379         needUnlock = TRUE;
380
381         entry->isFinished = TRUE;
382
383         if (entry->isPartialMatch ||
384                 entry->queryCategory == GIN_CAT_EMPTY_QUERY)
385         {
386                 /*
387                  * btreeEntry.findItem locates the first item >= given search key.
388                  * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
389                  * because of the way the GIN_CAT_EMPTY_QUERY category code is
390                  * assigned.)  We scan forward from there and collect all TIDs needed
391                  * for the entry type.
392                  */
393                 btreeEntry.findItem(&btreeEntry, stackEntry);
394                 if (collectMatchBitmap(&btreeEntry, stackEntry, entry) == false)
395                 {
396                         /*
397                          * GIN tree was seriously restructured, so we will cleanup all
398                          * found data and rescan. See comments near 'return false' in
399                          * collectMatchBitmap()
400                          */
401                         if (entry->matchBitmap)
402                         {
403                                 if (entry->matchIterator)
404                                         tbm_end_iterate(entry->matchIterator);
405                                 entry->matchIterator = NULL;
406                                 tbm_free(entry->matchBitmap);
407                                 entry->matchBitmap = NULL;
408                         }
409                         LockBuffer(stackEntry->buffer, GIN_UNLOCK);
410                         freeGinBtreeStack(stackEntry);
411                         goto restartScanEntry;
412                 }
413
414                 if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
415                 {
416                         entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
417                         entry->isFinished = FALSE;
418                 }
419         }
420         else if (btreeEntry.findItem(&btreeEntry, stackEntry))
421         {
422                 IndexTuple      itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
423
424                 if (GinIsPostingTree(itup))
425                 {
426                         BlockNumber rootPostingTree = GinGetPostingTree(itup);
427                         GinBtreeStack *stack;
428                         Page            page;
429
430                         /*
431                          * We should unlock entry page before touching posting tree to
432                          * prevent deadlocks with vacuum processes. Because entry is never
433                          * deleted from page and posting tree is never reduced to the
434                          * posting list, we can unlock page after getting BlockNumber of
435                          * root of posting tree.
436                          */
437                         LockBuffer(stackEntry->buffer, GIN_UNLOCK);
438                         needUnlock = FALSE;
439
440                         stack = ginScanBeginPostingTree(ginstate->index, rootPostingTree);
441                         entry->buffer = stack->buffer;
442
443                         /*
444                          * We keep buffer pinned because we need to prevent deletion of
445                          * page during scan. See GIN's vacuum implementation. RefCount is
446                          * increased to keep buffer pinned after freeGinBtreeStack() call.
447                          */
448                         IncrBufferRefCount(entry->buffer);
449
450                         page = BufferGetPage(entry->buffer);
451                         entry->predictNumberResult = stack->predictNumber * GinPageGetOpaque(page)->maxoff;
452
453                         /*
454                          * Keep page content in memory to prevent durable page locking
455                          */
456                         entry->list = (ItemPointerData *) palloc(BLCKSZ);
457                         entry->nlist = GinPageGetOpaque(page)->maxoff;
458                         memcpy(entry->list,
459                                    GinDataPageGetItemPointer(page, FirstOffsetNumber),
460                                    GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
461
462                         LockBuffer(entry->buffer, GIN_UNLOCK);
463                         freeGinBtreeStack(stack);
464                         entry->isFinished = FALSE;
465                 }
466                 else if (GinGetNPosting(itup) > 0)
467                 {
468                         entry->nlist = GinGetNPosting(itup);
469                         entry->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * entry->nlist);
470                         memcpy(entry->list, GinGetPosting(itup), sizeof(ItemPointerData) * entry->nlist);
471                         entry->isFinished = FALSE;
472                 }
473         }
474
475         if (needUnlock)
476                 LockBuffer(stackEntry->buffer, GIN_UNLOCK);
477         freeGinBtreeStack(stackEntry);
478 }
479
480 static void
481 startScanKey(GinState *ginstate, GinScanKey key)
482 {
483         ItemPointerSetMin(&key->curItem);
484         key->curItemMatches = false;
485         key->recheckCurItem = false;
486         key->isFinished = false;
487 }
488
489 static void
490 startScan(IndexScanDesc scan)
491 {
492         GinScanOpaque so = (GinScanOpaque) scan->opaque;
493         GinState   *ginstate = &so->ginstate;
494         uint32          i;
495
496         for (i = 0; i < so->totalentries; i++)
497                 startScanEntry(ginstate, so->entries[i]);
498
499         if (GinFuzzySearchLimit > 0)
500         {
501                 /*
502                  * If all of keys more than threshold we will try to reduce result, we
503                  * hope (and only hope, for intersection operation of array our
504                  * supposition isn't true), that total result will not more than
505                  * minimal predictNumberResult.
506                  */
507
508                 for (i = 0; i < so->totalentries; i++)
509                         if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
510                                 return;
511
512                 for (i = 0; i < so->totalentries; i++)
513                         if (so->entries[i]->predictNumberResult > so->totalentries * GinFuzzySearchLimit)
514                         {
515                                 so->entries[i]->predictNumberResult /= so->totalentries;
516                                 so->entries[i]->reduceResult = TRUE;
517                         }
518         }
519
520         for (i = 0; i < so->nkeys; i++)
521                 startScanKey(ginstate, so->keys + i);
522 }
523
524 /*
525  * Gets next ItemPointer from PostingTree. Note, that we copy
526  * page into GinScanEntry->list array and unlock page, but keep it pinned
527  * to prevent interference with vacuum
528  */
529 static void
530 entryGetNextItem(GinState *ginstate, GinScanEntry entry)
531 {
532         Page            page;
533
534         for (;;)
535         {
536                 if (entry->offset < entry->nlist)
537                 {
538                         entry->curItem = entry->list[entry->offset++];
539                         return;
540                 }
541
542                 LockBuffer(entry->buffer, GIN_SHARE);
543                 page = BufferGetPage(entry->buffer);
544                 for (;;)
545                 {
546                         /*
547                          * It's needed to go by right link. During that we should refind
548                          * first ItemPointer greater that stored
549                          */
550                         if (GinPageRightMost(page))
551                         {
552                                 UnlockReleaseBuffer(entry->buffer);
553                                 ItemPointerSetInvalid(&entry->curItem);
554                                 entry->buffer = InvalidBuffer;
555                                 entry->isFinished = TRUE;
556                                 return;
557                         }
558
559                         entry->buffer = ginStepRight(entry->buffer,
560                                                                                  ginstate->index,
561                                                                                  GIN_SHARE);
562                         page = BufferGetPage(entry->buffer);
563
564                         entry->offset = InvalidOffsetNumber;
565                         if (!ItemPointerIsValid(&entry->curItem) ||
566                                 findItemInPostingPage(page, &entry->curItem, &entry->offset))
567                         {
568                                 /*
569                                  * Found position equal to or greater than stored
570                                  */
571                                 entry->nlist = GinPageGetOpaque(page)->maxoff;
572                                 memcpy(entry->list,
573                                            GinDataPageGetItemPointer(page, FirstOffsetNumber),
574                                            GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData));
575
576                                 LockBuffer(entry->buffer, GIN_UNLOCK);
577
578                                 if (!ItemPointerIsValid(&entry->curItem) ||
579                                         ginCompareItemPointers(&entry->curItem,
580                                                                            entry->list + entry->offset - 1) == 0)
581                                 {
582                                         /*
583                                          * First pages are deleted or empty, or we found exact
584                                          * position, so break inner loop and continue outer one.
585                                          */
586                                         break;
587                                 }
588
589                                 /*
590                                  * Find greater than entry->curItem position, store it.
591                                  */
592                                 entry->curItem = entry->list[entry->offset - 1];
593
594                                 return;
595                         }
596                 }
597         }
598 }
599
600 #define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE))
601 #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
602
603 /*
604  * Sets entry->curItem to next heap item pointer for one entry of one scan key,
605  * or sets entry->isFinished to TRUE if there are no more.
606  *
607  * Item pointers must be returned in ascending order.
608  *
609  * Note: this can return a "lossy page" item pointer, indicating that the
610  * entry potentially matches all items on that heap page.  However, it is
611  * not allowed to return both a lossy page pointer and exact (regular)
612  * item pointers for the same page.  (Doing so would break the key-combination
613  * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
614  * current implementation this is guaranteed by the behavior of tidbitmaps.
615  */
616 static void
617 entryGetItem(GinState *ginstate, GinScanEntry entry)
618 {
619         Assert(!entry->isFinished);
620
621         if (entry->matchBitmap)
622         {
623                 do
624                 {
625                         if (entry->matchResult == NULL ||
626                                 entry->offset >= entry->matchResult->ntuples)
627                         {
628                                 entry->matchResult = tbm_iterate(entry->matchIterator);
629
630                                 if (entry->matchResult == NULL)
631                                 {
632                                         ItemPointerSetInvalid(&entry->curItem);
633                                         tbm_end_iterate(entry->matchIterator);
634                                         entry->matchIterator = NULL;
635                                         entry->isFinished = TRUE;
636                                         break;
637                                 }
638
639                                 /*
640                                  * Reset counter to the beginning of entry->matchResult. Note:
641                                  * entry->offset is still greater than matchResult->ntuples if
642                                  * matchResult is lossy.  So, on next call we will get next
643                                  * result from TIDBitmap.
644                                  */
645                                 entry->offset = 0;
646                         }
647
648                         if (entry->matchResult->ntuples < 0)
649                         {
650                                 /*
651                                  * lossy result, so we need to check the whole page
652                                  */
653                                 ItemPointerSetLossyPage(&entry->curItem,
654                                                                                 entry->matchResult->blockno);
655
656                                 /*
657                                  * We might as well fall out of the loop; we could not
658                                  * estimate number of results on this page to support correct
659                                  * reducing of result even if it's enabled
660                                  */
661                                 break;
662                         }
663
664                         ItemPointerSet(&entry->curItem,
665                                                    entry->matchResult->blockno,
666                                                    entry->matchResult->offsets[entry->offset]);
667                         entry->offset++;
668                 } while (entry->reduceResult == TRUE && dropItem(entry));
669         }
670         else if (!BufferIsValid(entry->buffer))
671         {
672                 entry->offset++;
673                 if (entry->offset <= entry->nlist)
674                         entry->curItem = entry->list[entry->offset - 1];
675                 else
676                 {
677                         ItemPointerSetInvalid(&entry->curItem);
678                         entry->isFinished = TRUE;
679                 }
680         }
681         else
682         {
683                 do
684                 {
685                         entryGetNextItem(ginstate, entry);
686                 } while (entry->isFinished == FALSE &&
687                                  entry->reduceResult == TRUE &&
688                                  dropItem(entry));
689         }
690 }
691
692 /*
693  * Identify the "current" item among the input entry streams for this scan key,
694  * and test whether it passes the scan key qual condition.
695  *
696  * The current item is the smallest curItem among the inputs.  key->curItem
697  * is set to that value.  key->curItemMatches is set to indicate whether that
698  * TID passes the consistentFn test.  If so, key->recheckCurItem is set true
699  * iff recheck is needed for this item pointer (including the case where the
700  * item pointer is a lossy page pointer).
701  *
702  * If all entry streams are exhausted, sets key->isFinished to TRUE.
703  *
704  * Item pointers must be returned in ascending order.
705  *
706  * Note: this can return a "lossy page" item pointer, indicating that the
707  * key potentially matches all items on that heap page.  However, it is
708  * not allowed to return both a lossy page pointer and exact (regular)
709  * item pointers for the same page.  (Doing so would break the key-combination
710  * logic in scanGetItem.)
711  */
712 static void
713 keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
714 {
715         ItemPointerData minItem;
716         ItemPointerData curPageLossy;
717         uint32          i;
718         uint32          lossyEntry;
719         bool            haveLossyEntry;
720         GinScanEntry entry;
721         bool            res;
722         MemoryContext oldCtx;
723
724         Assert(!key->isFinished);
725
726         /*
727          * Find the minimum of the active entry curItems.
728          *
729          * Note: a lossy-page entry is encoded by a ItemPointer with max value for
730          * offset (0xffff), so that it will sort after any exact entries for the
731          * same page.  So we'll prefer to return exact pointers not lossy
732          * pointers, which is good.
733          */
734         ItemPointerSetMax(&minItem);
735
736         for (i = 0; i < key->nentries; i++)
737         {
738                 entry = key->scanEntry[i];
739                 if (entry->isFinished == FALSE &&
740                         ginCompareItemPointers(&entry->curItem, &minItem) < 0)
741                         minItem = entry->curItem;
742         }
743
744         if (ItemPointerIsMax(&minItem))
745         {
746                 /* all entries are finished */
747                 key->isFinished = TRUE;
748                 return;
749         }
750
751         /*
752          * We might have already tested this item; if so, no need to repeat work.
753          * (Note: the ">" case can happen, if minItem is exact but we previously
754          * had to set curItem to a lossy-page pointer.)
755          */
756         if (ginCompareItemPointers(&key->curItem, &minItem) >= 0)
757                 return;
758
759         /*
760          * OK, advance key->curItem and perform consistentFn test.
761          */
762         key->curItem = minItem;
763
764         /*
765          * Lossy-page entries pose a problem, since we don't know the correct
766          * entryRes state to pass to the consistentFn, and we also don't know what
767          * its combining logic will be (could be AND, OR, or even NOT). If the
768          * logic is OR then the consistentFn might succeed for all items in the
769          * lossy page even when none of the other entries match.
770          *
771          * If we have a single lossy-page entry then we check to see if the
772          * consistentFn will succeed with only that entry TRUE.  If so, we return
773          * a lossy-page pointer to indicate that the whole heap page must be
774          * checked.  (On subsequent calls, we'll do nothing until minItem is past
775          * the page altogether, thus ensuring that we never return both regular
776          * and lossy pointers for the same page.)
777          *
778          * This idea could be generalized to more than one lossy-page entry, but
779          * ideally lossy-page entries should be infrequent so it would seldom be
780          * the case that we have more than one at once.  So it doesn't seem worth
781          * the extra complexity to optimize that case. If we do find more than
782          * one, we just punt and return a lossy-page pointer always.
783          *
784          * Note that only lossy-page entries pointing to the current item's page
785          * should trigger this processing; we might have future lossy pages in the
786          * entry array, but they aren't relevant yet.
787          */
788         ItemPointerSetLossyPage(&curPageLossy,
789                                                         GinItemPointerGetBlockNumber(&key->curItem));
790
791         lossyEntry = 0;
792         haveLossyEntry = false;
793         for (i = 0; i < key->nentries; i++)
794         {
795                 entry = key->scanEntry[i];
796                 if (entry->isFinished == FALSE &&
797                         ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
798                 {
799                         if (haveLossyEntry)
800                         {
801                                 /* Multiple lossy entries, punt */
802                                 key->curItem = curPageLossy;
803                                 key->curItemMatches = true;
804                                 key->recheckCurItem = true;
805                                 return;
806                         }
807                         lossyEntry = i;
808                         haveLossyEntry = true;
809                 }
810         }
811
812         /* prepare for calling consistentFn in temp context */
813         oldCtx = MemoryContextSwitchTo(tempCtx);
814
815         if (haveLossyEntry)
816         {
817                 /* Single lossy-page entry, so see if whole page matches */
818                 memset(key->entryRes, FALSE, key->nentries);
819                 key->entryRes[lossyEntry] = TRUE;
820
821                 if (callConsistentFn(ginstate, key))
822                 {
823                         /* Yes, so clean up ... */
824                         MemoryContextSwitchTo(oldCtx);
825                         MemoryContextReset(tempCtx);
826
827                         /* and return lossy pointer for whole page */
828                         key->curItem = curPageLossy;
829                         key->curItemMatches = true;
830                         key->recheckCurItem = true;
831                         return;
832                 }
833         }
834
835         /*
836          * At this point we know that we don't need to return a lossy whole-page
837          * pointer, but we might have matches for individual exact item pointers,
838          * possibly in combination with a lossy pointer.  Our strategy if there's
839          * a lossy pointer is to try the consistentFn both ways and return a hit
840          * if it accepts either one (forcing the hit to be marked lossy so it will
841          * be rechecked).  An exception is that we don't need to try it both ways
842          * if the lossy pointer is in a "hidden" entry, because the consistentFn's
843          * result can't depend on that.
844          *
845          * Prepare entryRes array to be passed to consistentFn.
846          */
847         for (i = 0; i < key->nentries; i++)
848         {
849                 entry = key->scanEntry[i];
850                 if (entry->isFinished == FALSE &&
851                         ginCompareItemPointers(&entry->curItem, &key->curItem) == 0)
852                         key->entryRes[i] = TRUE;
853                 else
854                         key->entryRes[i] = FALSE;
855         }
856         if (haveLossyEntry)
857                 key->entryRes[lossyEntry] = TRUE;
858
859         res = callConsistentFn(ginstate, key);
860
861         if (!res && haveLossyEntry && lossyEntry < key->nuserentries)
862         {
863                 /* try the other way for the lossy item */
864                 key->entryRes[lossyEntry] = FALSE;
865
866                 res = callConsistentFn(ginstate, key);
867         }
868
869         key->curItemMatches = res;
870         /* If we matched a lossy entry, force recheckCurItem = true */
871         if (haveLossyEntry)
872                 key->recheckCurItem = true;
873
874         /* clean up after consistentFn calls */
875         MemoryContextSwitchTo(oldCtx);
876         MemoryContextReset(tempCtx);
877 }
878
879 /*
880  * Get next heap item pointer (after advancePast) from scan.
881  * Returns true if anything found.
882  * On success, *item and *recheck are set.
883  *
884  * Note: this is very nearly the same logic as in keyGetItem(), except
885  * that we know the keys are to be combined with AND logic, whereas in
886  * keyGetItem() the combination logic is known only to the consistentFn.
887  */
888 static bool
889 scanGetItem(IndexScanDesc scan, ItemPointer advancePast,
890                         ItemPointerData *item, bool *recheck)
891 {
892         GinScanOpaque so = (GinScanOpaque) scan->opaque;
893         GinState   *ginstate = &so->ginstate;
894         ItemPointerData myAdvancePast = *advancePast;
895         uint32          i;
896         bool            allFinished;
897         bool            match;
898
899         for (;;)
900         {
901                 /*
902                  * Advance any entries that are <= myAdvancePast.  In particular,
903                  * since entry->curItem was initialized with ItemPointerSetMin, this
904                  * ensures we fetch the first item for each entry on the first call.
905                  */
906                 allFinished = TRUE;
907
908                 for (i = 0; i < so->totalentries; i++)
909                 {
910                         GinScanEntry entry = so->entries[i];
911
912                         while (entry->isFinished == FALSE &&
913                                    ginCompareItemPointers(&entry->curItem,
914                                                                                   &myAdvancePast) <= 0)
915                                 entryGetItem(ginstate, entry);
916
917                         if (entry->isFinished == FALSE)
918                                 allFinished = FALSE;
919                 }
920
921                 if (allFinished)
922                 {
923                         /* all entries exhausted, so we're done */
924                         return false;
925                 }
926
927                 /*
928                  * Perform the consistentFn test for each scan key.  If any key
929                  * reports isFinished, meaning its subset of the entries is exhausted,
930                  * we can stop.  Otherwise, set *item to the minimum of the key
931                  * curItems.
932                  */
933                 ItemPointerSetMax(item);
934
935                 for (i = 0; i < so->nkeys; i++)
936                 {
937                         GinScanKey      key = so->keys + i;
938
939                         keyGetItem(&so->ginstate, so->tempCtx, key);
940
941                         if (key->isFinished)
942                                 return false;   /* finished one of keys */
943
944                         if (ginCompareItemPointers(&key->curItem, item) < 0)
945                                 *item = key->curItem;
946                 }
947
948                 Assert(!ItemPointerIsMax(item));
949
950                 /*----------
951                  * Now *item contains first ItemPointer after previous result.
952                  *
953                  * The item is a valid hit only if all the keys succeeded for either
954                  * that exact TID, or a lossy reference to the same page.
955                  *
956                  * This logic works only if a keyGetItem stream can never contain both
957                  * exact and lossy pointers for the same page.  Else we could have a
958                  * case like
959                  *
960                  *              stream 1                stream 2
961                  *              ...                             ...
962                  *              42/6                    42/7
963                  *              50/1                    42/0xffff
964                  *              ...                             ...
965                  *
966                  * We would conclude that 42/6 is not a match and advance stream 1,
967                  * thus never detecting the match to the lossy pointer in stream 2.
968                  * (keyGetItem has a similar problem versus entryGetItem.)
969                  *----------
970                  */
971                 match = true;
972                 for (i = 0; i < so->nkeys; i++)
973                 {
974                         GinScanKey      key = so->keys + i;
975
976                         if (key->curItemMatches)
977                         {
978                                 if (ginCompareItemPointers(item, &key->curItem) == 0)
979                                         continue;
980                                 if (ItemPointerIsLossyPage(&key->curItem) &&
981                                         GinItemPointerGetBlockNumber(&key->curItem) ==
982                                         GinItemPointerGetBlockNumber(item))
983                                         continue;
984                         }
985                         match = false;
986                         break;
987                 }
988
989                 if (match)
990                         break;
991
992                 /*
993                  * No hit.      Update myAdvancePast to this TID, so that on the next pass
994                  * we'll move to the next possible entry.
995                  */
996                 myAdvancePast = *item;
997         }
998
999         /*
1000          * We must return recheck = true if any of the keys are marked recheck.
1001          */
1002         *recheck = false;
1003         for (i = 0; i < so->nkeys; i++)
1004         {
1005                 GinScanKey      key = so->keys + i;
1006
1007                 if (key->recheckCurItem)
1008                 {
1009                         *recheck = true;
1010                         break;
1011                 }
1012         }
1013
1014         return TRUE;
1015 }
1016
1017
1018 /*
1019  * Functions for scanning the pending list
1020  */
1021
1022
1023 /*
1024  * Get ItemPointer of next heap row to be checked from pending list.
1025  * Returns false if there are no more. On pages with several heap rows
1026  * it returns each row separately, on page with part of heap row returns
1027  * per page data.  pos->firstOffset and pos->lastOffset are set to identify
1028  * the range of pending-list tuples belonging to this heap row.
1029  *
1030  * The pendingBuffer is presumed pinned and share-locked on entry, and is
1031  * pinned and share-locked on success exit.  On failure exit it's released.
1032  */
1033 static bool
1034 scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
1035 {
1036         OffsetNumber maxoff;
1037         Page            page;
1038         IndexTuple      itup;
1039
1040         ItemPointerSetInvalid(&pos->item);
1041         for (;;)
1042         {
1043                 page = BufferGetPage(pos->pendingBuffer);
1044
1045                 maxoff = PageGetMaxOffsetNumber(page);
1046                 if (pos->firstOffset > maxoff)
1047                 {
1048                         BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
1049
1050                         if (blkno == InvalidBlockNumber)
1051                         {
1052                                 UnlockReleaseBuffer(pos->pendingBuffer);
1053                                 pos->pendingBuffer = InvalidBuffer;
1054
1055                                 return false;
1056                         }
1057                         else
1058                         {
1059                                 /*
1060                                  * Here we must prevent deletion of next page by insertcleanup
1061                                  * process, which may be trying to obtain exclusive lock on
1062                                  * current page.  So, we lock next page before releasing the
1063                                  * current one
1064                                  */
1065                                 Buffer          tmpbuf = ReadBuffer(scan->indexRelation, blkno);
1066
1067                                 LockBuffer(tmpbuf, GIN_SHARE);
1068                                 UnlockReleaseBuffer(pos->pendingBuffer);
1069
1070                                 pos->pendingBuffer = tmpbuf;
1071                                 pos->firstOffset = FirstOffsetNumber;
1072                         }
1073                 }
1074                 else
1075                 {
1076                         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
1077                         pos->item = itup->t_tid;
1078                         if (GinPageHasFullRow(page))
1079                         {
1080                                 /*
1081                                  * find itempointer to the next row
1082                                  */
1083                                 for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
1084                                 {
1085                                         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
1086                                         if (!ItemPointerEquals(&pos->item, &itup->t_tid))
1087                                                 break;
1088                                 }
1089                         }
1090                         else
1091                         {
1092                                 /*
1093                                  * All itempointers are the same on this page
1094                                  */
1095                                 pos->lastOffset = maxoff + 1;
1096                         }
1097
1098                         /*
1099                          * Now pos->firstOffset points to the first tuple of current heap
1100                          * row, pos->lastOffset points to the first tuple of next heap row
1101                          * (or to the end of page)
1102                          */
1103                         break;
1104                 }
1105         }
1106
1107         return true;
1108 }
1109
1110 /*
1111  * Scan pending-list page from current tuple (off) up till the first of:
1112  * - match is found (then returns true)
1113  * - no later match is possible
1114  * - tuple's attribute number is not equal to entry's attrnum
1115  * - reach end of page
1116  *
1117  * datum[]/category[]/datumExtracted[] arrays are used to cache the results
1118  * of gintuple_get_key() on the current page.
1119  */
1120 static bool
1121 matchPartialInPendingList(GinState *ginstate, Page page,
1122                                                   OffsetNumber off, OffsetNumber maxoff,
1123                                                   GinScanEntry entry,
1124                                                   Datum *datum, GinNullCategory *category,
1125                                                   bool *datumExtracted)
1126 {
1127         IndexTuple      itup;
1128         int32           cmp;
1129
1130         /* Partial match to a null is not possible */
1131         if (entry->queryCategory != GIN_CAT_NORM_KEY)
1132                 return false;
1133
1134         while (off < maxoff)
1135         {
1136                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
1137
1138                 if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
1139                         return false;
1140
1141                 if (datumExtracted[off - 1] == false)
1142                 {
1143                         datum[off - 1] = gintuple_get_key(ginstate, itup,
1144                                                                                           &category[off - 1]);
1145                         datumExtracted[off - 1] = true;
1146                 }
1147
1148                 /* Once we hit nulls, no further match is possible */
1149                 if (category[off - 1] != GIN_CAT_NORM_KEY)
1150                         return false;
1151
1152                 /*----------
1153                  * Check partial match.
1154                  * case cmp == 0 => match
1155                  * case cmp > 0 => not match and end scan (no later match possible)
1156                  * case cmp < 0 => not match and continue scan
1157                  *----------
1158                  */
1159                 cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
1160                                                            ginstate->supportCollation[entry->attnum - 1],
1161                                                                                           entry->queryKey,
1162                                                                                           datum[off - 1],
1163                                                                                           UInt16GetDatum(entry->strategy),
1164                                                                                 PointerGetDatum(entry->extra_data)));
1165                 if (cmp == 0)
1166                         return true;
1167                 else if (cmp > 0)
1168                         return false;
1169
1170                 off++;
1171         }
1172
1173         return false;
1174 }
1175
1176 /*
1177  * Set up the entryRes array for each key by looking at
1178  * every entry for current heap row in pending list.
1179  *
1180  * Returns true if each scan key has at least one entryRes match.
1181  * This corresponds to the situations where the normal index search will
1182  * try to apply the key's consistentFn.  (A tuple not meeting that requirement
1183  * cannot be returned by the normal search since no entry stream will
1184  * source its TID.)
1185  *
1186  * The pendingBuffer is presumed pinned and share-locked on entry.
1187  */
1188 static bool
1189 collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
1190 {
1191         GinScanOpaque so = (GinScanOpaque) scan->opaque;
1192         OffsetNumber attrnum;
1193         Page            page;
1194         IndexTuple      itup;
1195         int                     i,
1196                                 j;
1197
1198         /*
1199          * Reset all entryRes and hasMatchKey flags
1200          */
1201         for (i = 0; i < so->nkeys; i++)
1202         {
1203                 GinScanKey      key = so->keys + i;
1204
1205                 memset(key->entryRes, FALSE, key->nentries);
1206         }
1207         memset(pos->hasMatchKey, FALSE, so->nkeys);
1208
1209         /*
1210          * Outer loop iterates over multiple pending-list pages when a single heap
1211          * row has entries spanning those pages.
1212          */
1213         for (;;)
1214         {
1215                 Datum           datum[BLCKSZ / sizeof(IndexTupleData)];
1216                 GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
1217                 bool            datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
1218
1219                 Assert(pos->lastOffset > pos->firstOffset);
1220                 memset(datumExtracted + pos->firstOffset - 1, 0,
1221                            sizeof(bool) * (pos->lastOffset - pos->firstOffset));
1222
1223                 page = BufferGetPage(pos->pendingBuffer);
1224
1225                 for (i = 0; i < so->nkeys; i++)
1226                 {
1227                         GinScanKey      key = so->keys + i;
1228
1229                         for (j = 0; j < key->nentries; j++)
1230                         {
1231                                 GinScanEntry entry = key->scanEntry[j];
1232                                 OffsetNumber StopLow = pos->firstOffset,
1233                                                         StopHigh = pos->lastOffset,
1234                                                         StopMiddle;
1235
1236                                 /* If already matched on earlier page, do no extra work */
1237                                 if (key->entryRes[j])
1238                                         continue;
1239
1240                                 /*
1241                                  * Interesting tuples are from pos->firstOffset to
1242                                  * pos->lastOffset and they are ordered by (attnum, Datum) as
1243                                  * it's done in entry tree.  So we can use binary search to
1244                                  * avoid linear scanning.
1245                                  */
1246                                 while (StopLow < StopHigh)
1247                                 {
1248                                         int                     res;
1249
1250                                         StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1251
1252                                         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
1253
1254                                         attrnum = gintuple_get_attrnum(&so->ginstate, itup);
1255
1256                                         if (key->attnum < attrnum)
1257                                         {
1258                                                 StopHigh = StopMiddle;
1259                                                 continue;
1260                                         }
1261                                         if (key->attnum > attrnum)
1262                                         {
1263                                                 StopLow = StopMiddle + 1;
1264                                                 continue;
1265                                         }
1266
1267                                         if (datumExtracted[StopMiddle - 1] == false)
1268                                         {
1269                                                 datum[StopMiddle - 1] =
1270                                                         gintuple_get_key(&so->ginstate, itup,
1271                                                                                          &category[StopMiddle - 1]);
1272                                                 datumExtracted[StopMiddle - 1] = true;
1273                                         }
1274
1275                                         if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
1276                                         {
1277                                                 /* special behavior depending on searchMode */
1278                                                 if (entry->searchMode == GIN_SEARCH_MODE_ALL)
1279                                                 {
1280                                                         /* match anything except NULL_ITEM */
1281                                                         if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
1282                                                                 res = -1;
1283                                                         else
1284                                                                 res = 0;
1285                                                 }
1286                                                 else
1287                                                 {
1288                                                         /* match everything */
1289                                                         res = 0;
1290                                                 }
1291                                         }
1292                                         else
1293                                         {
1294                                                 res = ginCompareEntries(&so->ginstate,
1295                                                                                                 entry->attnum,
1296                                                                                                 entry->queryKey,
1297                                                                                                 entry->queryCategory,
1298                                                                                                 datum[StopMiddle - 1],
1299                                                                                                 category[StopMiddle - 1]);
1300                                         }
1301
1302                                         if (res == 0)
1303                                         {
1304                                                 /*
1305                                                  * Found exact match (there can be only one, except in
1306                                                  * EMPTY_QUERY mode).
1307                                                  *
1308                                                  * If doing partial match, scan forward from here to
1309                                                  * end of page to check for matches.
1310                                                  *
1311                                                  * See comment above about tuple's ordering.
1312                                                  */
1313                                                 if (entry->isPartialMatch)
1314                                                         key->entryRes[j] =
1315                                                                 matchPartialInPendingList(&so->ginstate,
1316                                                                                                                   page,
1317                                                                                                                   StopMiddle,
1318                                                                                                                   pos->lastOffset,
1319                                                                                                                   entry,
1320                                                                                                                   datum,
1321                                                                                                                   category,
1322                                                                                                                   datumExtracted);
1323                                                 else
1324                                                         key->entryRes[j] = true;
1325
1326                                                 /* done with binary search */
1327                                                 break;
1328                                         }
1329                                         else if (res < 0)
1330                                                 StopHigh = StopMiddle;
1331                                         else
1332                                                 StopLow = StopMiddle + 1;
1333                                 }
1334
1335                                 if (StopLow >= StopHigh && entry->isPartialMatch)
1336                                 {
1337                                         /*
1338                                          * No exact match on this page.  If doing partial match,
1339                                          * scan from the first tuple greater than target value to
1340                                          * end of page.  Note that since we don't remember whether
1341                                          * the comparePartialFn told us to stop early on a
1342                                          * previous page, we will uselessly apply comparePartialFn
1343                                          * to the first tuple on each subsequent page.
1344                                          */
1345                                         key->entryRes[j] =
1346                                                 matchPartialInPendingList(&so->ginstate,
1347                                                                                                   page,
1348                                                                                                   StopHigh,
1349                                                                                                   pos->lastOffset,
1350                                                                                                   entry,
1351                                                                                                   datum,
1352                                                                                                   category,
1353                                                                                                   datumExtracted);
1354                                 }
1355
1356                                 pos->hasMatchKey[i] |= key->entryRes[j];
1357                         }
1358                 }
1359
1360                 /* Advance firstOffset over the scanned tuples */
1361                 pos->firstOffset = pos->lastOffset;
1362
1363                 if (GinPageHasFullRow(page))
1364                 {
1365                         /*
1366                          * We have examined all pending entries for the current heap row.
1367                          * Break out of loop over pages.
1368                          */
1369                         break;
1370                 }
1371                 else
1372                 {
1373                         /*
1374                          * Advance to next page of pending entries for the current heap
1375                          * row.  Complain if there isn't one.
1376                          */
1377                         ItemPointerData item = pos->item;
1378
1379                         if (scanGetCandidate(scan, pos) == false ||
1380                                 !ItemPointerEquals(&pos->item, &item))
1381                                 elog(ERROR, "could not find additional pending pages for same heap tuple");
1382                 }
1383         }
1384
1385         /*
1386          * Now return "true" if all scan keys have at least one matching datum
1387          */
1388         for (i = 0; i < so->nkeys; i++)
1389         {
1390                 if (pos->hasMatchKey[i] == false)
1391                         return false;
1392         }
1393
1394         return true;
1395 }
1396
1397 /*
1398  * Collect all matched rows from pending list into bitmap
1399  */
1400 static void
1401 scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
1402 {
1403         GinScanOpaque so = (GinScanOpaque) scan->opaque;
1404         MemoryContext oldCtx;
1405         bool            recheck,
1406                                 match;
1407         int                     i;
1408         pendingPosition pos;
1409         Buffer          metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
1410         BlockNumber blkno;
1411
1412         *ntids = 0;
1413
1414         LockBuffer(metabuffer, GIN_SHARE);
1415         blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
1416
1417         /*
1418          * fetch head of list before unlocking metapage. head page must be pinned
1419          * to prevent deletion by vacuum process
1420          */
1421         if (blkno == InvalidBlockNumber)
1422         {
1423                 /* No pending list, so proceed with normal scan */
1424                 UnlockReleaseBuffer(metabuffer);
1425                 return;
1426         }
1427
1428         pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
1429         LockBuffer(pos.pendingBuffer, GIN_SHARE);
1430         pos.firstOffset = FirstOffsetNumber;
1431         UnlockReleaseBuffer(metabuffer);
1432         pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
1433
1434         /*
1435          * loop for each heap row. scanGetCandidate returns full row or row's
1436          * tuples from first page.
1437          */
1438         while (scanGetCandidate(scan, &pos))
1439         {
1440                 /*
1441                  * Check entries in tuple and set up entryRes array.
1442                  *
1443                  * If pending tuples belonging to the current heap row are spread
1444                  * across several pages, collectMatchesForHeapRow will read all of
1445                  * those pages.
1446                  */
1447                 if (!collectMatchesForHeapRow(scan, &pos))
1448                         continue;
1449
1450                 /*
1451                  * Matching of entries of one row is finished, so check row using
1452                  * consistent functions.
1453                  */
1454                 oldCtx = MemoryContextSwitchTo(so->tempCtx);
1455                 recheck = false;
1456                 match = true;
1457
1458                 for (i = 0; i < so->nkeys; i++)
1459                 {
1460                         GinScanKey      key = so->keys + i;
1461
1462                         if (!callConsistentFn(&so->ginstate, key))
1463                         {
1464                                 match = false;
1465                                 break;
1466                         }
1467                         recheck |= key->recheckCurItem;
1468                 }
1469
1470                 MemoryContextSwitchTo(oldCtx);
1471                 MemoryContextReset(so->tempCtx);
1472
1473                 if (match)
1474                 {
1475                         tbm_add_tuples(tbm, &pos.item, 1, recheck);
1476                         (*ntids)++;
1477                 }
1478         }
1479
1480         pfree(pos.hasMatchKey);
1481 }
1482
1483
1484 #define GinIsNewKey(s)          ( ((GinScanOpaque) scan->opaque)->keys == NULL )
1485 #define GinIsVoidRes(s)         ( ((GinScanOpaque) scan->opaque)->isVoidRes )
1486
1487 Datum
1488 gingetbitmap(PG_FUNCTION_ARGS)
1489 {
1490         IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
1491         TIDBitmap  *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
1492         int64           ntids;
1493         ItemPointerData iptr;
1494         bool            recheck;
1495
1496         /*
1497          * Set up the scan keys, and check for unsatisfiable query.
1498          */
1499         if (GinIsNewKey(scan))
1500                 ginNewScanKey(scan);
1501
1502         if (GinIsVoidRes(scan))
1503                 PG_RETURN_INT64(0);
1504
1505         ntids = 0;
1506
1507         /*
1508          * First, scan the pending list and collect any matching entries into the
1509          * bitmap.      After we scan a pending item, some other backend could post it
1510          * into the main index, and so we might visit it a second time during the
1511          * main scan.  This is okay because we'll just re-set the same bit in the
1512          * bitmap.      (The possibility of duplicate visits is a major reason why GIN
1513          * can't support the amgettuple API, however.) Note that it would not do
1514          * to scan the main index before the pending list, since concurrent
1515          * cleanup could then make us miss entries entirely.
1516          */
1517         scanPendingInsert(scan, tbm, &ntids);
1518
1519         /*
1520          * Now scan the main index.
1521          */
1522         startScan(scan);
1523
1524         ItemPointerSetMin(&iptr);
1525
1526         for (;;)
1527         {
1528                 CHECK_FOR_INTERRUPTS();
1529
1530                 if (!scanGetItem(scan, &iptr, &iptr, &recheck))
1531                         break;
1532
1533                 if (ItemPointerIsLossyPage(&iptr))
1534                         tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
1535                 else
1536                         tbm_add_tuples(tbm, &iptr, 1, recheck);
1537                 ntids++;
1538         }
1539
1540         PG_RETURN_INT64(ntids);
1541 }