]> granicus.if.org Git - postgresql/blob - src/backend/access/gist/gistget.c
KNNGIST, otherwise known as order-by-operator support for GIST.
[postgresql] / src / backend / access / gist / gistget.c
1 /*-------------------------------------------------------------------------
2  *
3  * gistget.c
4  *        fetch tuples from a GiST scan.
5  *
6  *
7  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *        src/backend/access/gist/gistget.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "access/gist_private.h"
18 #include "access/relscan.h"
19 #include "executor/execdebug.h"
20 #include "miscadmin.h"
21 #include "pgstat.h"
22 #include "storage/bufmgr.h"
23 #include "utils/builtins.h"
24 #include "utils/memutils.h"
25
26
27 /*
28  * gistindex_keytest() -- does this index tuple satisfy the scan key(s)?
29  *
30  * The index tuple might represent either a heap tuple or a lower index page,
31  * depending on whether the containing page is a leaf page or not.
32  *
33  * On success return for a heap tuple, *recheck_p is set to indicate
34  * whether recheck is needed.  We recheck if any of the consistent() functions
35  * request it.  recheck is not interesting when examining a non-leaf entry,
36  * since we must visit the lower index page if there's any doubt.
37  *
38  * If we are doing an ordered scan, so->distances[] is filled with distance
39  * data from the distance() functions before returning success.
40  *
41  * We must decompress the key in the IndexTuple before passing it to the
42  * sk_funcs (which actually are the opclass Consistent or Distance methods).
43  *
44  * Note that this function is always invoked in a short-lived memory context,
45  * so we don't need to worry about cleaning up allocated memory, either here
46  * or in the implementation of any Consistent or Distance methods.
47  */
48 static bool
49 gistindex_keytest(IndexScanDesc scan,
50                                   IndexTuple tuple,
51                                   Page page,
52                                   OffsetNumber offset,
53                                   bool *recheck_p)
54 {
55         GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
56         GISTSTATE  *giststate = so->giststate;
57         ScanKey         key = scan->keyData;
58         int                     keySize = scan->numberOfKeys;
59         double     *distance_p;
60         Relation        r = scan->indexRelation;
61
62         *recheck_p = false;
63
64         /*
65          * If it's a leftover invalid tuple from pre-9.1, treat it as a match
66          * with minimum possible distances.  This means we'll always follow it
67          * to the referenced page.
68          */
69         if (GistTupleIsInvalid(tuple))
70         {
71                 int             i;
72
73                 for (i = 0; i < scan->numberOfOrderBys; i++)
74                         so->distances[i] = -get_float8_infinity();
75                 *recheck_p = true;              /* probably unnecessary */
76                 return true;
77         }
78
79         /* Check whether it matches according to the Consistent functions */
80         while (keySize > 0)
81         {
82                 Datum           datum;
83                 bool            isNull;
84
85                 datum = index_getattr(tuple,
86                                                           key->sk_attno,
87                                                           giststate->tupdesc,
88                                                           &isNull);
89
90                 if (key->sk_flags & SK_ISNULL)
91                 {
92                         /*
93                          * On non-leaf page we can't conclude that child hasn't NULL
94                          * values because of assumption in GiST: union (VAL, NULL) is VAL.
95                          * But if on non-leaf page key IS NULL, then all children are
96                          * NULL.
97                          */
98                         if (key->sk_flags & SK_SEARCHNULL)
99                         {
100                                 if (GistPageIsLeaf(page) && !isNull)
101                                         return false;
102                         }
103                         else
104                         {
105                                 Assert(key->sk_flags & SK_SEARCHNOTNULL);
106                                 if (isNull)
107                                         return false;
108                         }
109                 }
110                 else if (isNull)
111                 {
112                         return false;
113                 }
114                 else
115                 {
116                         Datum           test;
117                         bool            recheck;
118                         GISTENTRY       de;
119
120                         gistdentryinit(giststate, key->sk_attno - 1, &de,
121                                                    datum, r, page, offset,
122                                                    FALSE, isNull);
123
124                         /*
125                          * Call the Consistent function to evaluate the test.  The
126                          * arguments are the index datum (as a GISTENTRY*), the comparison
127                          * datum, the comparison operator's strategy number and subtype
128                          * from pg_amop, and the recheck flag.
129                          *
130                          * (Presently there's no need to pass the subtype since it'll
131                          * always be zero, but might as well pass it for possible future
132                          * use.)
133                          *
134                          * We initialize the recheck flag to true (the safest assumption)
135                          * in case the Consistent function forgets to set it.
136                          */
137                         recheck = true;
138
139                         test = FunctionCall5(&key->sk_func,
140                                                                  PointerGetDatum(&de),
141                                                                  key->sk_argument,
142                                                                  Int32GetDatum(key->sk_strategy),
143                                                                  ObjectIdGetDatum(key->sk_subtype),
144                                                                  PointerGetDatum(&recheck));
145
146                         if (!DatumGetBool(test))
147                                 return false;
148                         *recheck_p |= recheck;
149                 }
150
151                 key++;
152                 keySize--;
153         }
154
155         /* OK, it passes --- now let's compute the distances */
156         key = scan->orderByData;
157         distance_p = so->distances;
158         keySize = scan->numberOfOrderBys;
159         while (keySize > 0)
160         {
161                 Datum           datum;
162                 bool            isNull;
163
164                 datum = index_getattr(tuple,
165                                                           key->sk_attno,
166                                                           giststate->tupdesc,
167                                                           &isNull);
168
169                 if ((key->sk_flags & SK_ISNULL) || isNull)
170                 {
171                         /* Assume distance computes as null and sorts to the end */
172                         *distance_p = get_float8_infinity();
173                 }
174                 else
175                 {
176                         Datum           dist;
177                         GISTENTRY       de;
178
179                         gistdentryinit(giststate, key->sk_attno - 1, &de,
180                                                    datum, r, page, offset,
181                                                    FALSE, isNull);
182
183                         /*
184                          * Call the Distance function to evaluate the distance.  The
185                          * arguments are the index datum (as a GISTENTRY*), the comparison
186                          * datum, and the ordering operator's strategy number and subtype
187                          * from pg_amop.
188                          *
189                          * (Presently there's no need to pass the subtype since it'll
190                          * always be zero, but might as well pass it for possible future
191                          * use.)
192                          *
193                          * Note that Distance functions don't get a recheck argument.
194                          * We can't tolerate lossy distance calculations on leaf tuples;
195                          * there is no opportunity to re-sort the tuples afterwards.
196                          */
197                         dist = FunctionCall4(&key->sk_func,
198                                                                  PointerGetDatum(&de),
199                                                                  key->sk_argument,
200                                                                  Int32GetDatum(key->sk_strategy),
201                                                                  ObjectIdGetDatum(key->sk_subtype));
202
203                         *distance_p = DatumGetFloat8(dist);
204                 }
205
206                 key++;
207                 distance_p++;
208                 keySize--;
209         }
210
211         return true;
212 }
213
214 /*
215  * Scan all items on the GiST index page identified by *pageItem, and insert
216  * them into the queue (or directly to output areas)
217  *
218  * scan: index scan we are executing
219  * pageItem: search queue item identifying an index page to scan
220  * myDistances: distances array associated with pageItem, or NULL at the root
221  * tbm: if not NULL, gistgetbitmap's output bitmap
222  * ntids: if not NULL, gistgetbitmap's output tuple counter
223  *
224  * If tbm/ntids aren't NULL, we are doing an amgetbitmap scan, and heap
225  * tuples should be reported directly into the bitmap.  If they are NULL,
226  * we're doing a plain or ordered indexscan.  For a plain indexscan, heap
227  * tuple TIDs are returned into so->pageData[].  For an ordered indexscan,
228  * heap tuple TIDs are pushed into individual search queue items.
229  *
230  * If we detect that the index page has split since we saw its downlink
231  * in the parent, we push its new right sibling onto the queue so the
232  * sibling will be processed next.
233  */
234 static void
235 gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
236                          TIDBitmap *tbm, int64 *ntids)
237 {
238         GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
239         Buffer          buffer;
240         Page            page;
241         GISTPageOpaque opaque;
242         OffsetNumber maxoff;
243         OffsetNumber i;
244         GISTSearchTreeItem *tmpItem = so->tmpTreeItem;
245         bool            isNew;
246         MemoryContext oldcxt;
247
248         Assert(!GISTSearchItemIsHeap(*pageItem));
249
250         buffer = ReadBuffer(scan->indexRelation, pageItem->blkno);
251         LockBuffer(buffer, GIST_SHARE);
252         gistcheckpage(scan->indexRelation, buffer);
253         page = BufferGetPage(buffer);
254         opaque = GistPageGetOpaque(page);
255
256         /* check if page split occurred since visit to parent */
257         if (!XLogRecPtrIsInvalid(pageItem->data.parentlsn) &&
258                 XLByteLT(pageItem->data.parentlsn, opaque->nsn) &&
259                 opaque->rightlink != InvalidBlockNumber /* sanity check */ )
260         {
261                 /* There was a page split, follow right link to add pages */
262                 GISTSearchItem *item;
263
264                 /* This can't happen when starting at the root */
265                 Assert(myDistances != NULL);
266
267                 oldcxt = MemoryContextSwitchTo(so->queueCxt);
268
269                 /* Create new GISTSearchItem for the right sibling index page */
270                 item = palloc(sizeof(GISTSearchItem));
271                 item->next = NULL;
272                 item->blkno = opaque->rightlink;
273                 item->data.parentlsn = pageItem->data.parentlsn;
274
275                 /* Insert it into the queue using same distances as for this page */
276                 tmpItem->head = item;
277                 tmpItem->lastHeap = NULL;
278                 memcpy(tmpItem->distances, myDistances,
279                            sizeof(double) * scan->numberOfOrderBys);
280
281                 (void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew);
282
283                 MemoryContextSwitchTo(oldcxt);
284         }
285
286         so->nPageData = so->curPageData = 0;
287
288         /*
289          * check all tuples on page
290          */
291         maxoff = PageGetMaxOffsetNumber(page);
292         for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
293         {
294                 IndexTuple      it = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
295                 bool            match;
296                 bool            recheck;
297
298                 /*
299                  * Must call gistindex_keytest in tempCxt, and clean up any leftover
300                  * junk afterward.
301                  */
302                 oldcxt = MemoryContextSwitchTo(so->tempCxt);
303
304                 match = gistindex_keytest(scan, it, page, i, &recheck);
305
306                 MemoryContextSwitchTo(oldcxt);
307                 MemoryContextReset(so->tempCxt);
308
309                 /* Ignore tuple if it doesn't match */
310                 if (!match)
311                         continue;
312
313                 if (tbm && GistPageIsLeaf(page))
314                 {
315                         /*
316                          * getbitmap scan, so just push heap tuple TIDs into the bitmap
317                          * without worrying about ordering
318                          */
319                         tbm_add_tuples(tbm, &it->t_tid, 1, recheck);
320                         (*ntids)++;
321                 }
322                 else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page))
323                 {
324                         /*
325                          * Non-ordered scan, so report heap tuples in so->pageData[]
326                          */
327                         so->pageData[so->nPageData].heapPtr = it->t_tid;
328                         so->pageData[so->nPageData].recheck = recheck;
329                         so->nPageData++;
330                 }
331                 else
332                 {
333                         /*
334                          * Must push item into search queue.  We get here for any lower
335                          * index page, and also for heap tuples if doing an ordered
336                          * search.
337                          */
338                         GISTSearchItem *item;
339
340                         oldcxt = MemoryContextSwitchTo(so->queueCxt);
341
342                         /* Create new GISTSearchItem for this item */
343                         item = palloc(sizeof(GISTSearchItem));
344                         item->next = NULL;
345
346                         if (GistPageIsLeaf(page))
347                         {
348                                 /* Creating heap-tuple GISTSearchItem */
349                                 item->blkno = InvalidBlockNumber;
350                                 item->data.heap.heapPtr = it->t_tid;
351                                 item->data.heap.recheck = recheck;
352                         }
353                         else
354                         {
355                                 /* Creating index-page GISTSearchItem */
356                                 item->blkno = ItemPointerGetBlockNumber(&it->t_tid);
357                                 /* lsn of current page is lsn of parent page for child */
358                                 item->data.parentlsn = PageGetLSN(page);
359                         }
360
361                         /* Insert it into the queue using new distance data */
362                         tmpItem->head = item;
363                         tmpItem->lastHeap = GISTSearchItemIsHeap(*item) ? item : NULL;
364                         memcpy(tmpItem->distances, so->distances,
365                                    sizeof(double) * scan->numberOfOrderBys);
366
367                         (void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew);
368
369                         MemoryContextSwitchTo(oldcxt);
370                 }
371         }
372
373         UnlockReleaseBuffer(buffer);
374 }
375
376 /*
377  * Extract next item (in order) from search queue
378  *
379  * Returns a GISTSearchItem or NULL.  Caller must pfree item when done with it.
380  *
381  * NOTE: on successful return, so->curTreeItem is the GISTSearchTreeItem that
382  * contained the result item.  Callers can use so->curTreeItem->distances as
383  * the distances value for the item.
384  */
385 static GISTSearchItem *
386 getNextGISTSearchItem(GISTScanOpaque so)
387 {
388         for (;;)
389         {
390                 GISTSearchItem *item;
391
392                 /* Update curTreeItem if we don't have one */
393                 if (so->curTreeItem == NULL)
394                 {
395                         so->curTreeItem = (GISTSearchTreeItem *) rb_leftmost(so->queue);
396                         /* Done when tree is empty */
397                         if (so->curTreeItem == NULL)
398                                 break;
399                 }
400
401                 item = so->curTreeItem->head;
402                 if (item != NULL)
403                 {
404                         /* Delink item from chain */
405                         so->curTreeItem->head = item->next;
406                         /* Return item; caller is responsible to pfree it */
407                         return item;
408                 }
409
410                 /* curTreeItem is exhausted, so remove it from rbtree */
411                 rb_delete(so->queue, (RBNode *) so->curTreeItem);
412                 so->curTreeItem = NULL;
413         }
414
415         return NULL;
416 }
417
418 /*
419  * Fetch next heap tuple in an ordered search
420  */
421 static bool
422 getNextNearest(IndexScanDesc scan)
423 {
424         GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
425         bool            res = false;
426
427         do
428         {
429                 GISTSearchItem *item = getNextGISTSearchItem(so);
430
431                 if (!item)
432                         break;
433
434                 if (GISTSearchItemIsHeap(*item))
435                 {
436                         /* found a heap item at currently minimal distance */
437                         scan->xs_ctup.t_self = item->data.heap.heapPtr;
438                         scan->xs_recheck = item->data.heap.recheck;
439                         res = true;
440                 }
441                 else
442                 {
443                         /* visit an index page, extract its items into queue */
444                         CHECK_FOR_INTERRUPTS();
445
446                         gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL);
447                 }
448
449                 pfree(item);
450         } while (!res);
451
452         return res;
453 }
454
455 /*
456  * gistgettuple() -- Get the next tuple in the scan
457  */
458 Datum
459 gistgettuple(PG_FUNCTION_ARGS)
460 {
461         IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
462         GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
463
464         if (!so->qual_ok)
465                 PG_RETURN_BOOL(false);
466
467         if (so->firstCall)
468         {
469                 /* Begin the scan by processing the root page */
470                 GISTSearchItem fakeItem;
471
472                 pgstat_count_index_scan(scan->indexRelation);
473
474                 so->firstCall = false;
475                 so->curTreeItem = NULL;
476                 so->curPageData = so->nPageData = 0;
477
478                 fakeItem.blkno = GIST_ROOT_BLKNO;
479                 memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
480                 gistScanPage(scan, &fakeItem, NULL, NULL, NULL);
481         }
482
483         if (scan->numberOfOrderBys > 0)
484         {
485                 /* Must fetch tuples in strict distance order */
486                 PG_RETURN_BOOL(getNextNearest(scan));
487         }
488         else
489         {
490                 /* Fetch tuples index-page-at-a-time */
491                 for (;;)
492                 {
493                         if (so->curPageData < so->nPageData)
494                         {
495                                 /* continuing to return tuples from a leaf page */
496                                 scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr;
497                                 scan->xs_recheck = so->pageData[so->curPageData].recheck;
498                                 so->curPageData++;
499                                 PG_RETURN_BOOL(true);
500                         }
501
502                         /* find and process the next index page */
503                         do
504                         {
505                                 GISTSearchItem *item = getNextGISTSearchItem(so);
506
507                                 if (!item)
508                                         PG_RETURN_BOOL(false);
509
510                                 CHECK_FOR_INTERRUPTS();
511
512                                 /*
513                                  * While scanning a leaf page, ItemPointers of matching heap
514                                  * tuples are stored in so->pageData.  If there are any on
515                                  * this page, we fall out of the inner "do" and loop around
516                                  * to return them.
517                                  */
518                                 gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL);
519
520                                 pfree(item);
521                         } while (so->nPageData == 0);
522                 }
523         }
524
525         PG_RETURN_BOOL(false);          /* keep compiler quiet */
526 }
527
528 /*
529  * gistgetbitmap() -- Get a bitmap of all heap tuple locations
530  */
531 Datum
532 gistgetbitmap(PG_FUNCTION_ARGS)
533 {
534         IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
535         TIDBitmap  *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
536         GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
537         int64           ntids = 0;
538         GISTSearchItem fakeItem;
539
540         if (!so->qual_ok)
541                 PG_RETURN_INT64(0);
542
543         pgstat_count_index_scan(scan->indexRelation);
544
545         /* Begin the scan by processing the root page */
546         so->curTreeItem = NULL;
547         so->curPageData = so->nPageData = 0;
548
549         fakeItem.blkno = GIST_ROOT_BLKNO;
550         memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
551         gistScanPage(scan, &fakeItem, NULL, tbm, &ntids);
552
553         /*
554          * While scanning a leaf page, ItemPointers of matching heap tuples will
555          * be stored directly into tbm, so we don't need to deal with them here.
556          */
557         for (;;)
558         {
559                 GISTSearchItem *item = getNextGISTSearchItem(so);
560
561                 if (!item)
562                         break;
563
564                 CHECK_FOR_INTERRUPTS();
565
566                 gistScanPage(scan, item, so->curTreeItem->distances, tbm, &ntids);
567
568                 pfree(item);
569         }
570
571         PG_RETURN_INT64(ntids);
572 }