]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeSamplescan.c
Don't include heapam.h from others headers.
[postgresql] / src / backend / executor / nodeSamplescan.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeSamplescan.c
4  *        Support routines for sample scans of relations (table sampling).
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/executor/nodeSamplescan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "access/hash.h"
18 #include "access/heapam.h"
19 #include "access/relscan.h"
20 #include "access/tsmapi.h"
21 #include "executor/executor.h"
22 #include "executor/nodeSamplescan.h"
23 #include "miscadmin.h"
24 #include "pgstat.h"
25 #include "storage/predicate.h"
26 #include "utils/builtins.h"
27 #include "utils/rel.h"
28 #include "utils/tqual.h"
29
30 static TupleTableSlot *SampleNext(SampleScanState *node);
31 static void tablesample_init(SampleScanState *scanstate);
32 static HeapTuple tablesample_getnext(SampleScanState *scanstate);
33 static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
34                                    HeapScanDesc scan);
35
36 /* ----------------------------------------------------------------
37  *                                              Scan Support
38  * ----------------------------------------------------------------
39  */
40
41 /* ----------------------------------------------------------------
42  *              SampleNext
43  *
44  *              This is a workhorse for ExecSampleScan
45  * ----------------------------------------------------------------
46  */
47 static TupleTableSlot *
48 SampleNext(SampleScanState *node)
49 {
50         HeapTuple       tuple;
51         TupleTableSlot *slot;
52
53         /*
54          * if this is first call within a scan, initialize
55          */
56         if (!node->begun)
57                 tablesample_init(node);
58
59         /*
60          * get the next tuple, and store it in our result slot
61          */
62         tuple = tablesample_getnext(node);
63
64         slot = node->ss.ss_ScanTupleSlot;
65
66         if (tuple)
67                 ExecStoreBufferHeapTuple(tuple, /* tuple to store */
68                                                                  slot,  /* slot to store in */
69                                                                  node->ss.ss_currentScanDesc->rs_cbuf); /* tuple's buffer */
70         else
71                 ExecClearTuple(slot);
72
73         return slot;
74 }
75
76 /*
77  * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
78  */
79 static bool
80 SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
81 {
82         /*
83          * No need to recheck for SampleScan, since like SeqScan we don't pass any
84          * checkable keys to heap_beginscan.
85          */
86         return true;
87 }
88
89 /* ----------------------------------------------------------------
90  *              ExecSampleScan(node)
91  *
92  *              Scans the relation using the sampling method and returns
93  *              the next qualifying tuple.
94  *              We call the ExecScan() routine and pass it the appropriate
95  *              access method functions.
96  * ----------------------------------------------------------------
97  */
98 static TupleTableSlot *
99 ExecSampleScan(PlanState *pstate)
100 {
101         SampleScanState *node = castNode(SampleScanState, pstate);
102
103         return ExecScan(&node->ss,
104                                         (ExecScanAccessMtd) SampleNext,
105                                         (ExecScanRecheckMtd) SampleRecheck);
106 }
107
108 /* ----------------------------------------------------------------
109  *              ExecInitSampleScan
110  * ----------------------------------------------------------------
111  */
112 SampleScanState *
113 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
114 {
115         SampleScanState *scanstate;
116         TableSampleClause *tsc = node->tablesample;
117         TsmRoutine *tsm;
118
119         Assert(outerPlan(node) == NULL);
120         Assert(innerPlan(node) == NULL);
121
122         /*
123          * create state structure
124          */
125         scanstate = makeNode(SampleScanState);
126         scanstate->ss.ps.plan = (Plan *) node;
127         scanstate->ss.ps.state = estate;
128         scanstate->ss.ps.ExecProcNode = ExecSampleScan;
129
130         /*
131          * Miscellaneous initialization
132          *
133          * create expression context for node
134          */
135         ExecAssignExprContext(estate, &scanstate->ss.ps);
136
137         /*
138          * open the scan relation
139          */
140         scanstate->ss.ss_currentRelation =
141                 ExecOpenScanRelation(estate,
142                                                          node->scan.scanrelid,
143                                                          eflags);
144
145         /* we won't set up the HeapScanDesc till later */
146         scanstate->ss.ss_currentScanDesc = NULL;
147
148         /* and create slot with appropriate rowtype */
149         ExecInitScanTupleSlot(estate, &scanstate->ss,
150                                                   RelationGetDescr(scanstate->ss.ss_currentRelation),
151                                                   &TTSOpsBufferHeapTuple);
152
153         /*
154          * Initialize result type and projection.
155          */
156         ExecInitResultTypeTL(&scanstate->ss.ps);
157         ExecAssignScanProjectionInfo(&scanstate->ss);
158
159         /*
160          * initialize child expressions
161          */
162         scanstate->ss.ps.qual =
163                 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
164
165         scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
166         scanstate->repeatable =
167                 ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
168
169         /*
170          * If we don't have a REPEATABLE clause, select a random seed.  We want to
171          * do this just once, since the seed shouldn't change over rescans.
172          */
173         if (tsc->repeatable == NULL)
174                 scanstate->seed = random();
175
176         /*
177          * Finally, initialize the TABLESAMPLE method handler.
178          */
179         tsm = GetTsmRoutine(tsc->tsmhandler);
180         scanstate->tsmroutine = tsm;
181         scanstate->tsm_state = NULL;
182
183         if (tsm->InitSampleScan)
184                 tsm->InitSampleScan(scanstate, eflags);
185
186         /* We'll do BeginSampleScan later; we can't evaluate params yet */
187         scanstate->begun = false;
188
189         return scanstate;
190 }
191
192 /* ----------------------------------------------------------------
193  *              ExecEndSampleScan
194  *
195  *              frees any storage allocated through C routines.
196  * ----------------------------------------------------------------
197  */
198 void
199 ExecEndSampleScan(SampleScanState *node)
200 {
201         /*
202          * Tell sampling function that we finished the scan.
203          */
204         if (node->tsmroutine->EndSampleScan)
205                 node->tsmroutine->EndSampleScan(node);
206
207         /*
208          * Free the exprcontext
209          */
210         ExecFreeExprContext(&node->ss.ps);
211
212         /*
213          * clean out the tuple table
214          */
215         if (node->ss.ps.ps_ResultTupleSlot)
216                 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
217         ExecClearTuple(node->ss.ss_ScanTupleSlot);
218
219         /*
220          * close heap scan
221          */
222         if (node->ss.ss_currentScanDesc)
223                 heap_endscan(node->ss.ss_currentScanDesc);
224 }
225
226 /* ----------------------------------------------------------------
227  *              ExecReScanSampleScan
228  *
229  *              Rescans the relation.
230  *
231  * ----------------------------------------------------------------
232  */
233 void
234 ExecReScanSampleScan(SampleScanState *node)
235 {
236         /* Remember we need to do BeginSampleScan again (if we did it at all) */
237         node->begun = false;
238
239         ExecScanReScan(&node->ss);
240 }
241
242
243 /*
244  * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
245  */
246 static void
247 tablesample_init(SampleScanState *scanstate)
248 {
249         TsmRoutine *tsm = scanstate->tsmroutine;
250         ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
251         Datum      *params;
252         Datum           datum;
253         bool            isnull;
254         uint32          seed;
255         bool            allow_sync;
256         int                     i;
257         ListCell   *arg;
258
259         params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
260
261         i = 0;
262         foreach(arg, scanstate->args)
263         {
264                 ExprState  *argstate = (ExprState *) lfirst(arg);
265
266                 params[i] = ExecEvalExprSwitchContext(argstate,
267                                                                                           econtext,
268                                                                                           &isnull);
269                 if (isnull)
270                         ereport(ERROR,
271                                         (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
272                                          errmsg("TABLESAMPLE parameter cannot be null")));
273                 i++;
274         }
275
276         if (scanstate->repeatable)
277         {
278                 datum = ExecEvalExprSwitchContext(scanstate->repeatable,
279                                                                                   econtext,
280                                                                                   &isnull);
281                 if (isnull)
282                         ereport(ERROR,
283                                         (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
284                                          errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
285
286                 /*
287                  * The REPEATABLE parameter has been coerced to float8 by the parser.
288                  * The reason for using float8 at the SQL level is that it will
289                  * produce unsurprising results both for users used to databases that
290                  * accept only integers in the REPEATABLE clause and for those who
291                  * might expect that REPEATABLE works like setseed() (a float in the
292                  * range from -1 to 1).
293                  *
294                  * We use hashfloat8() to convert the supplied value into a suitable
295                  * seed.  For regression-testing purposes, that has the convenient
296                  * property that REPEATABLE(0) gives a machine-independent result.
297                  */
298                 seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
299         }
300         else
301         {
302                 /* Use the seed selected by ExecInitSampleScan */
303                 seed = scanstate->seed;
304         }
305
306         /* Set default values for params that BeginSampleScan can adjust */
307         scanstate->use_bulkread = true;
308         scanstate->use_pagemode = true;
309
310         /* Let tablesample method do its thing */
311         tsm->BeginSampleScan(scanstate,
312                                                  params,
313                                                  list_length(scanstate->args),
314                                                  seed);
315
316         /* We'll use syncscan if there's no NextSampleBlock function */
317         allow_sync = (tsm->NextSampleBlock == NULL);
318
319         /* Now we can create or reset the HeapScanDesc */
320         if (scanstate->ss.ss_currentScanDesc == NULL)
321         {
322                 scanstate->ss.ss_currentScanDesc =
323                         heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
324                                                                         scanstate->ss.ps.state->es_snapshot,
325                                                                         0, NULL,
326                                                                         scanstate->use_bulkread,
327                                                                         allow_sync,
328                                                                         scanstate->use_pagemode);
329         }
330         else
331         {
332                 heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
333                                                            scanstate->use_bulkread,
334                                                            allow_sync,
335                                                            scanstate->use_pagemode);
336         }
337
338         pfree(params);
339
340         /* And we're initialized. */
341         scanstate->begun = true;
342 }
343
344 /*
345  * Get next tuple from TABLESAMPLE method.
346  *
347  * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
348  * perhaps be better to refactor to share more code.
349  */
350 static HeapTuple
351 tablesample_getnext(SampleScanState *scanstate)
352 {
353         TsmRoutine *tsm = scanstate->tsmroutine;
354         HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
355         HeapTuple       tuple = &(scan->rs_ctup);
356         Snapshot        snapshot = scan->rs_snapshot;
357         bool            pagemode = scan->rs_pageatatime;
358         BlockNumber blockno;
359         Page            page;
360         bool            all_visible;
361         OffsetNumber maxoffset;
362
363         if (!scan->rs_inited)
364         {
365                 /*
366                  * return null immediately if relation is empty
367                  */
368                 if (scan->rs_nblocks == 0)
369                 {
370                         Assert(!BufferIsValid(scan->rs_cbuf));
371                         tuple->t_data = NULL;
372                         return NULL;
373                 }
374                 if (tsm->NextSampleBlock)
375                 {
376                         blockno = tsm->NextSampleBlock(scanstate);
377                         if (!BlockNumberIsValid(blockno))
378                         {
379                                 tuple->t_data = NULL;
380                                 return NULL;
381                         }
382                 }
383                 else
384                         blockno = scan->rs_startblock;
385                 Assert(blockno < scan->rs_nblocks);
386                 heapgetpage(scan, blockno);
387                 scan->rs_inited = true;
388         }
389         else
390         {
391                 /* continue from previously returned page/tuple */
392                 blockno = scan->rs_cblock;      /* current page */
393         }
394
395         /*
396          * When not using pagemode, we must lock the buffer during tuple
397          * visibility checks.
398          */
399         if (!pagemode)
400                 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
401
402         page = (Page) BufferGetPage(scan->rs_cbuf);
403         all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
404         maxoffset = PageGetMaxOffsetNumber(page);
405
406         for (;;)
407         {
408                 OffsetNumber tupoffset;
409                 bool            finished;
410
411                 CHECK_FOR_INTERRUPTS();
412
413                 /* Ask the tablesample method which tuples to check on this page. */
414                 tupoffset = tsm->NextSampleTuple(scanstate,
415                                                                                  blockno,
416                                                                                  maxoffset);
417
418                 if (OffsetNumberIsValid(tupoffset))
419                 {
420                         ItemId          itemid;
421                         bool            visible;
422
423                         /* Skip invalid tuple pointers. */
424                         itemid = PageGetItemId(page, tupoffset);
425                         if (!ItemIdIsNormal(itemid))
426                                 continue;
427
428                         tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
429                         tuple->t_len = ItemIdGetLength(itemid);
430                         ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
431
432                         if (all_visible)
433                                 visible = true;
434                         else
435                                 visible = SampleTupleVisible(tuple, tupoffset, scan);
436
437                         /* in pagemode, heapgetpage did this for us */
438                         if (!pagemode)
439                                 CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
440                                                                                                 scan->rs_cbuf, snapshot);
441
442                         if (visible)
443                         {
444                                 /* Found visible tuple, return it. */
445                                 if (!pagemode)
446                                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
447                                 break;
448                         }
449                         else
450                         {
451                                 /* Try next tuple from same page. */
452                                 continue;
453                         }
454                 }
455
456                 /*
457                  * if we get here, it means we've exhausted the items on this page and
458                  * it's time to move to the next.
459                  */
460                 if (!pagemode)
461                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
462
463                 if (tsm->NextSampleBlock)
464                 {
465                         blockno = tsm->NextSampleBlock(scanstate);
466                         Assert(!scan->rs_syncscan);
467                         finished = !BlockNumberIsValid(blockno);
468                 }
469                 else
470                 {
471                         /* Without NextSampleBlock, just do a plain forward seqscan. */
472                         blockno++;
473                         if (blockno >= scan->rs_nblocks)
474                                 blockno = 0;
475
476                         /*
477                          * Report our new scan position for synchronization purposes.
478                          *
479                          * Note: we do this before checking for end of scan so that the
480                          * final state of the position hint is back at the start of the
481                          * rel.  That's not strictly necessary, but otherwise when you run
482                          * the same query multiple times the starting position would shift
483                          * a little bit backwards on every invocation, which is confusing.
484                          * We don't guarantee any specific ordering in general, though.
485                          */
486                         if (scan->rs_syncscan)
487                                 ss_report_location(scan->rs_rd, blockno);
488
489                         finished = (blockno == scan->rs_startblock);
490                 }
491
492                 /*
493                  * Reached end of scan?
494                  */
495                 if (finished)
496                 {
497                         if (BufferIsValid(scan->rs_cbuf))
498                                 ReleaseBuffer(scan->rs_cbuf);
499                         scan->rs_cbuf = InvalidBuffer;
500                         scan->rs_cblock = InvalidBlockNumber;
501                         tuple->t_data = NULL;
502                         scan->rs_inited = false;
503                         return NULL;
504                 }
505
506                 Assert(blockno < scan->rs_nblocks);
507                 heapgetpage(scan, blockno);
508
509                 /* Re-establish state for new page */
510                 if (!pagemode)
511                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
512
513                 page = (Page) BufferGetPage(scan->rs_cbuf);
514                 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
515                 maxoffset = PageGetMaxOffsetNumber(page);
516         }
517
518         /* Count successfully-fetched tuples as heap fetches */
519         pgstat_count_heap_getnext(scan->rs_rd);
520
521         return &(scan->rs_ctup);
522 }
523
524 /*
525  * Check visibility of the tuple.
526  */
527 static bool
528 SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
529 {
530         if (scan->rs_pageatatime)
531         {
532                 /*
533                  * In pageatatime mode, heapgetpage() already did visibility checks,
534                  * so just look at the info it left in rs_vistuples[].
535                  *
536                  * We use a binary search over the known-sorted array.  Note: we could
537                  * save some effort if we insisted that NextSampleTuple select tuples
538                  * in increasing order, but it's not clear that there would be enough
539                  * gain to justify the restriction.
540                  */
541                 int                     start = 0,
542                                         end = scan->rs_ntuples - 1;
543
544                 while (start <= end)
545                 {
546                         int                     mid = (start + end) / 2;
547                         OffsetNumber curoffset = scan->rs_vistuples[mid];
548
549                         if (tupoffset == curoffset)
550                                 return true;
551                         else if (tupoffset < curoffset)
552                                 end = mid - 1;
553                         else
554                                 start = mid + 1;
555                 }
556
557                 return false;
558         }
559         else
560         {
561                 /* Otherwise, we have to check the tuple individually. */
562                 return HeapTupleSatisfiesVisibility(tuple,
563                                                                                         scan->rs_snapshot,
564                                                                                         scan->rs_cbuf);
565         }
566 }