]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeSamplescan.c
Update copyright for 2019
[postgresql] / src / backend / executor / nodeSamplescan.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeSamplescan.c
4  *        Support routines for sample scans of relations (table sampling).
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/executor/nodeSamplescan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "access/hash.h"
18 #include "access/relscan.h"
19 #include "access/tsmapi.h"
20 #include "executor/executor.h"
21 #include "executor/nodeSamplescan.h"
22 #include "miscadmin.h"
23 #include "pgstat.h"
24 #include "storage/predicate.h"
25 #include "utils/builtins.h"
26 #include "utils/rel.h"
27 #include "utils/tqual.h"
28
29 static TupleTableSlot *SampleNext(SampleScanState *node);
30 static void tablesample_init(SampleScanState *scanstate);
31 static HeapTuple tablesample_getnext(SampleScanState *scanstate);
32 static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
33                                    HeapScanDesc scan);
34
35 /* ----------------------------------------------------------------
36  *                                              Scan Support
37  * ----------------------------------------------------------------
38  */
39
40 /* ----------------------------------------------------------------
41  *              SampleNext
42  *
43  *              This is a workhorse for ExecSampleScan
44  * ----------------------------------------------------------------
45  */
46 static TupleTableSlot *
47 SampleNext(SampleScanState *node)
48 {
49         HeapTuple       tuple;
50         TupleTableSlot *slot;
51
52         /*
53          * if this is first call within a scan, initialize
54          */
55         if (!node->begun)
56                 tablesample_init(node);
57
58         /*
59          * get the next tuple, and store it in our result slot
60          */
61         tuple = tablesample_getnext(node);
62
63         slot = node->ss.ss_ScanTupleSlot;
64
65         if (tuple)
66                 ExecStoreBufferHeapTuple(tuple, /* tuple to store */
67                                                                  slot,  /* slot to store in */
68                                                                  node->ss.ss_currentScanDesc->rs_cbuf); /* tuple's buffer */
69         else
70                 ExecClearTuple(slot);
71
72         return slot;
73 }
74
75 /*
76  * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
77  */
78 static bool
79 SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
80 {
81         /*
82          * No need to recheck for SampleScan, since like SeqScan we don't pass any
83          * checkable keys to heap_beginscan.
84          */
85         return true;
86 }
87
88 /* ----------------------------------------------------------------
89  *              ExecSampleScan(node)
90  *
91  *              Scans the relation using the sampling method and returns
92  *              the next qualifying tuple.
93  *              We call the ExecScan() routine and pass it the appropriate
94  *              access method functions.
95  * ----------------------------------------------------------------
96  */
97 static TupleTableSlot *
98 ExecSampleScan(PlanState *pstate)
99 {
100         SampleScanState *node = castNode(SampleScanState, pstate);
101
102         return ExecScan(&node->ss,
103                                         (ExecScanAccessMtd) SampleNext,
104                                         (ExecScanRecheckMtd) SampleRecheck);
105 }
106
107 /* ----------------------------------------------------------------
108  *              ExecInitSampleScan
109  * ----------------------------------------------------------------
110  */
111 SampleScanState *
112 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
113 {
114         SampleScanState *scanstate;
115         TableSampleClause *tsc = node->tablesample;
116         TsmRoutine *tsm;
117
118         Assert(outerPlan(node) == NULL);
119         Assert(innerPlan(node) == NULL);
120
121         /*
122          * create state structure
123          */
124         scanstate = makeNode(SampleScanState);
125         scanstate->ss.ps.plan = (Plan *) node;
126         scanstate->ss.ps.state = estate;
127         scanstate->ss.ps.ExecProcNode = ExecSampleScan;
128
129         /*
130          * Miscellaneous initialization
131          *
132          * create expression context for node
133          */
134         ExecAssignExprContext(estate, &scanstate->ss.ps);
135
136         /*
137          * open the scan relation
138          */
139         scanstate->ss.ss_currentRelation =
140                 ExecOpenScanRelation(estate,
141                                                          node->scan.scanrelid,
142                                                          eflags);
143
144         /* we won't set up the HeapScanDesc till later */
145         scanstate->ss.ss_currentScanDesc = NULL;
146
147         /* and create slot with appropriate rowtype */
148         ExecInitScanTupleSlot(estate, &scanstate->ss,
149                                                   RelationGetDescr(scanstate->ss.ss_currentRelation),
150                                                   &TTSOpsBufferHeapTuple);
151
152         /*
153          * Initialize result type and projection.
154          */
155         ExecInitResultTypeTL(&scanstate->ss.ps);
156         ExecAssignScanProjectionInfo(&scanstate->ss);
157
158         /*
159          * initialize child expressions
160          */
161         scanstate->ss.ps.qual =
162                 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
163
164         scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
165         scanstate->repeatable =
166                 ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
167
168         /*
169          * If we don't have a REPEATABLE clause, select a random seed.  We want to
170          * do this just once, since the seed shouldn't change over rescans.
171          */
172         if (tsc->repeatable == NULL)
173                 scanstate->seed = random();
174
175         /*
176          * Finally, initialize the TABLESAMPLE method handler.
177          */
178         tsm = GetTsmRoutine(tsc->tsmhandler);
179         scanstate->tsmroutine = tsm;
180         scanstate->tsm_state = NULL;
181
182         if (tsm->InitSampleScan)
183                 tsm->InitSampleScan(scanstate, eflags);
184
185         /* We'll do BeginSampleScan later; we can't evaluate params yet */
186         scanstate->begun = false;
187
188         return scanstate;
189 }
190
191 /* ----------------------------------------------------------------
192  *              ExecEndSampleScan
193  *
194  *              frees any storage allocated through C routines.
195  * ----------------------------------------------------------------
196  */
197 void
198 ExecEndSampleScan(SampleScanState *node)
199 {
200         /*
201          * Tell sampling function that we finished the scan.
202          */
203         if (node->tsmroutine->EndSampleScan)
204                 node->tsmroutine->EndSampleScan(node);
205
206         /*
207          * Free the exprcontext
208          */
209         ExecFreeExprContext(&node->ss.ps);
210
211         /*
212          * clean out the tuple table
213          */
214         if (node->ss.ps.ps_ResultTupleSlot)
215                 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
216         ExecClearTuple(node->ss.ss_ScanTupleSlot);
217
218         /*
219          * close heap scan
220          */
221         if (node->ss.ss_currentScanDesc)
222                 heap_endscan(node->ss.ss_currentScanDesc);
223 }
224
225 /* ----------------------------------------------------------------
226  *              ExecReScanSampleScan
227  *
228  *              Rescans the relation.
229  *
230  * ----------------------------------------------------------------
231  */
232 void
233 ExecReScanSampleScan(SampleScanState *node)
234 {
235         /* Remember we need to do BeginSampleScan again (if we did it at all) */
236         node->begun = false;
237
238         ExecScanReScan(&node->ss);
239 }
240
241
242 /*
243  * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
244  */
245 static void
246 tablesample_init(SampleScanState *scanstate)
247 {
248         TsmRoutine *tsm = scanstate->tsmroutine;
249         ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
250         Datum      *params;
251         Datum           datum;
252         bool            isnull;
253         uint32          seed;
254         bool            allow_sync;
255         int                     i;
256         ListCell   *arg;
257
258         params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
259
260         i = 0;
261         foreach(arg, scanstate->args)
262         {
263                 ExprState  *argstate = (ExprState *) lfirst(arg);
264
265                 params[i] = ExecEvalExprSwitchContext(argstate,
266                                                                                           econtext,
267                                                                                           &isnull);
268                 if (isnull)
269                         ereport(ERROR,
270                                         (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
271                                          errmsg("TABLESAMPLE parameter cannot be null")));
272                 i++;
273         }
274
275         if (scanstate->repeatable)
276         {
277                 datum = ExecEvalExprSwitchContext(scanstate->repeatable,
278                                                                                   econtext,
279                                                                                   &isnull);
280                 if (isnull)
281                         ereport(ERROR,
282                                         (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
283                                          errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
284
285                 /*
286                  * The REPEATABLE parameter has been coerced to float8 by the parser.
287                  * The reason for using float8 at the SQL level is that it will
288                  * produce unsurprising results both for users used to databases that
289                  * accept only integers in the REPEATABLE clause and for those who
290                  * might expect that REPEATABLE works like setseed() (a float in the
291                  * range from -1 to 1).
292                  *
293                  * We use hashfloat8() to convert the supplied value into a suitable
294                  * seed.  For regression-testing purposes, that has the convenient
295                  * property that REPEATABLE(0) gives a machine-independent result.
296                  */
297                 seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
298         }
299         else
300         {
301                 /* Use the seed selected by ExecInitSampleScan */
302                 seed = scanstate->seed;
303         }
304
305         /* Set default values for params that BeginSampleScan can adjust */
306         scanstate->use_bulkread = true;
307         scanstate->use_pagemode = true;
308
309         /* Let tablesample method do its thing */
310         tsm->BeginSampleScan(scanstate,
311                                                  params,
312                                                  list_length(scanstate->args),
313                                                  seed);
314
315         /* We'll use syncscan if there's no NextSampleBlock function */
316         allow_sync = (tsm->NextSampleBlock == NULL);
317
318         /* Now we can create or reset the HeapScanDesc */
319         if (scanstate->ss.ss_currentScanDesc == NULL)
320         {
321                 scanstate->ss.ss_currentScanDesc =
322                         heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
323                                                                         scanstate->ss.ps.state->es_snapshot,
324                                                                         0, NULL,
325                                                                         scanstate->use_bulkread,
326                                                                         allow_sync,
327                                                                         scanstate->use_pagemode);
328         }
329         else
330         {
331                 heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
332                                                            scanstate->use_bulkread,
333                                                            allow_sync,
334                                                            scanstate->use_pagemode);
335         }
336
337         pfree(params);
338
339         /* And we're initialized. */
340         scanstate->begun = true;
341 }
342
343 /*
344  * Get next tuple from TABLESAMPLE method.
345  *
346  * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
347  * perhaps be better to refactor to share more code.
348  */
349 static HeapTuple
350 tablesample_getnext(SampleScanState *scanstate)
351 {
352         TsmRoutine *tsm = scanstate->tsmroutine;
353         HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
354         HeapTuple       tuple = &(scan->rs_ctup);
355         Snapshot        snapshot = scan->rs_snapshot;
356         bool            pagemode = scan->rs_pageatatime;
357         BlockNumber blockno;
358         Page            page;
359         bool            all_visible;
360         OffsetNumber maxoffset;
361
362         if (!scan->rs_inited)
363         {
364                 /*
365                  * return null immediately if relation is empty
366                  */
367                 if (scan->rs_nblocks == 0)
368                 {
369                         Assert(!BufferIsValid(scan->rs_cbuf));
370                         tuple->t_data = NULL;
371                         return NULL;
372                 }
373                 if (tsm->NextSampleBlock)
374                 {
375                         blockno = tsm->NextSampleBlock(scanstate);
376                         if (!BlockNumberIsValid(blockno))
377                         {
378                                 tuple->t_data = NULL;
379                                 return NULL;
380                         }
381                 }
382                 else
383                         blockno = scan->rs_startblock;
384                 Assert(blockno < scan->rs_nblocks);
385                 heapgetpage(scan, blockno);
386                 scan->rs_inited = true;
387         }
388         else
389         {
390                 /* continue from previously returned page/tuple */
391                 blockno = scan->rs_cblock;      /* current page */
392         }
393
394         /*
395          * When not using pagemode, we must lock the buffer during tuple
396          * visibility checks.
397          */
398         if (!pagemode)
399                 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
400
401         page = (Page) BufferGetPage(scan->rs_cbuf);
402         all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
403         maxoffset = PageGetMaxOffsetNumber(page);
404
405         for (;;)
406         {
407                 OffsetNumber tupoffset;
408                 bool            finished;
409
410                 CHECK_FOR_INTERRUPTS();
411
412                 /* Ask the tablesample method which tuples to check on this page. */
413                 tupoffset = tsm->NextSampleTuple(scanstate,
414                                                                                  blockno,
415                                                                                  maxoffset);
416
417                 if (OffsetNumberIsValid(tupoffset))
418                 {
419                         ItemId          itemid;
420                         bool            visible;
421
422                         /* Skip invalid tuple pointers. */
423                         itemid = PageGetItemId(page, tupoffset);
424                         if (!ItemIdIsNormal(itemid))
425                                 continue;
426
427                         tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
428                         tuple->t_len = ItemIdGetLength(itemid);
429                         ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
430
431                         if (all_visible)
432                                 visible = true;
433                         else
434                                 visible = SampleTupleVisible(tuple, tupoffset, scan);
435
436                         /* in pagemode, heapgetpage did this for us */
437                         if (!pagemode)
438                                 CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
439                                                                                                 scan->rs_cbuf, snapshot);
440
441                         if (visible)
442                         {
443                                 /* Found visible tuple, return it. */
444                                 if (!pagemode)
445                                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
446                                 break;
447                         }
448                         else
449                         {
450                                 /* Try next tuple from same page. */
451                                 continue;
452                         }
453                 }
454
455                 /*
456                  * if we get here, it means we've exhausted the items on this page and
457                  * it's time to move to the next.
458                  */
459                 if (!pagemode)
460                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
461
462                 if (tsm->NextSampleBlock)
463                 {
464                         blockno = tsm->NextSampleBlock(scanstate);
465                         Assert(!scan->rs_syncscan);
466                         finished = !BlockNumberIsValid(blockno);
467                 }
468                 else
469                 {
470                         /* Without NextSampleBlock, just do a plain forward seqscan. */
471                         blockno++;
472                         if (blockno >= scan->rs_nblocks)
473                                 blockno = 0;
474
475                         /*
476                          * Report our new scan position for synchronization purposes.
477                          *
478                          * Note: we do this before checking for end of scan so that the
479                          * final state of the position hint is back at the start of the
480                          * rel.  That's not strictly necessary, but otherwise when you run
481                          * the same query multiple times the starting position would shift
482                          * a little bit backwards on every invocation, which is confusing.
483                          * We don't guarantee any specific ordering in general, though.
484                          */
485                         if (scan->rs_syncscan)
486                                 ss_report_location(scan->rs_rd, blockno);
487
488                         finished = (blockno == scan->rs_startblock);
489                 }
490
491                 /*
492                  * Reached end of scan?
493                  */
494                 if (finished)
495                 {
496                         if (BufferIsValid(scan->rs_cbuf))
497                                 ReleaseBuffer(scan->rs_cbuf);
498                         scan->rs_cbuf = InvalidBuffer;
499                         scan->rs_cblock = InvalidBlockNumber;
500                         tuple->t_data = NULL;
501                         scan->rs_inited = false;
502                         return NULL;
503                 }
504
505                 Assert(blockno < scan->rs_nblocks);
506                 heapgetpage(scan, blockno);
507
508                 /* Re-establish state for new page */
509                 if (!pagemode)
510                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
511
512                 page = (Page) BufferGetPage(scan->rs_cbuf);
513                 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
514                 maxoffset = PageGetMaxOffsetNumber(page);
515         }
516
517         /* Count successfully-fetched tuples as heap fetches */
518         pgstat_count_heap_getnext(scan->rs_rd);
519
520         return &(scan->rs_ctup);
521 }
522
523 /*
524  * Check visibility of the tuple.
525  */
526 static bool
527 SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
528 {
529         if (scan->rs_pageatatime)
530         {
531                 /*
532                  * In pageatatime mode, heapgetpage() already did visibility checks,
533                  * so just look at the info it left in rs_vistuples[].
534                  *
535                  * We use a binary search over the known-sorted array.  Note: we could
536                  * save some effort if we insisted that NextSampleTuple select tuples
537                  * in increasing order, but it's not clear that there would be enough
538                  * gain to justify the restriction.
539                  */
540                 int                     start = 0,
541                                         end = scan->rs_ntuples - 1;
542
543                 while (start <= end)
544                 {
545                         int                     mid = (start + end) / 2;
546                         OffsetNumber curoffset = scan->rs_vistuples[mid];
547
548                         if (tupoffset == curoffset)
549                                 return true;
550                         else if (tupoffset < curoffset)
551                                 end = mid - 1;
552                         else
553                                 start = mid + 1;
554                 }
555
556                 return false;
557         }
558         else
559         {
560                 /* Otherwise, we have to check the tuple individually. */
561                 return HeapTupleSatisfiesVisibility(tuple,
562                                                                                         scan->rs_snapshot,
563                                                                                         scan->rs_cbuf);
564         }
565 }