1 /*-------------------------------------------------------------------------
4 * Support routines for sample scans of relations (table sampling).
6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/executor/nodeSamplescan.c
13 *-------------------------------------------------------------------------
17 #include "access/hash.h"
18 #include "access/relscan.h"
19 #include "access/tsmapi.h"
20 #include "executor/executor.h"
21 #include "executor/nodeSamplescan.h"
22 #include "miscadmin.h"
24 #include "storage/predicate.h"
25 #include "utils/builtins.h"
26 #include "utils/rel.h"
27 #include "utils/tqual.h"
29 static TupleTableSlot *SampleNext(SampleScanState *node);
30 static void tablesample_init(SampleScanState *scanstate);
31 static HeapTuple tablesample_getnext(SampleScanState *scanstate);
32 static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
35 /* ----------------------------------------------------------------
37 * ----------------------------------------------------------------
40 /* ----------------------------------------------------------------
43 * This is a workhorse for ExecSampleScan
44 * ----------------------------------------------------------------
46 static TupleTableSlot *
47 SampleNext(SampleScanState *node)
53 * if this is first call within a scan, initialize
56 tablesample_init(node);
59 * get the next tuple, and store it in our result slot
61 tuple = tablesample_getnext(node);
63 slot = node->ss.ss_ScanTupleSlot;
66 ExecStoreTuple(tuple, /* tuple to store */
67 slot, /* slot to store in */
68 node->ss.ss_currentScanDesc->rs_cbuf, /* tuple's buffer */
69 false); /* don't pfree this pointer */
77 * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
80 SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
83 * No need to recheck for SampleScan, since like SeqScan we don't pass any
84 * checkable keys to heap_beginscan.
89 /* ----------------------------------------------------------------
90 * ExecSampleScan(node)
92 * Scans the relation using the sampling method and returns
93 * the next qualifying tuple.
94 * We call the ExecScan() routine and pass it the appropriate
95 * access method functions.
96 * ----------------------------------------------------------------
98 static TupleTableSlot *
99 ExecSampleScan(PlanState *pstate)
101 SampleScanState *node = castNode(SampleScanState, pstate);
103 return ExecScan(&node->ss,
104 (ExecScanAccessMtd) SampleNext,
105 (ExecScanRecheckMtd) SampleRecheck);
108 /* ----------------------------------------------------------------
110 * ----------------------------------------------------------------
113 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
115 SampleScanState *scanstate;
116 TableSampleClause *tsc = node->tablesample;
119 Assert(outerPlan(node) == NULL);
120 Assert(innerPlan(node) == NULL);
123 * create state structure
125 scanstate = makeNode(SampleScanState);
126 scanstate->ss.ps.plan = (Plan *) node;
127 scanstate->ss.ps.state = estate;
128 scanstate->ss.ps.ExecProcNode = ExecSampleScan;
131 * Miscellaneous initialization
133 * create expression context for node
135 ExecAssignExprContext(estate, &scanstate->ss.ps);
138 * Initialize scan relation.
140 * Get the relation object id from the relid'th entry in the range table,
141 * open that relation and acquire appropriate lock on it.
143 scanstate->ss.ss_currentRelation =
144 ExecOpenScanRelation(estate,
145 node->scan.scanrelid,
148 /* we won't set up the HeapScanDesc till later */
149 scanstate->ss.ss_currentScanDesc = NULL;
151 /* and create slot with appropriate rowtype */
152 ExecInitScanTupleSlot(estate, &scanstate->ss,
153 RelationGetDescr(scanstate->ss.ss_currentRelation));
156 * Initialize result slot, type and projection. tuple table and result
157 * tuple initialization
159 ExecInitResultTupleSlotTL(estate, &scanstate->ss.ps);
160 ExecAssignScanProjectionInfo(&scanstate->ss);
163 * initialize child expressions
165 scanstate->ss.ps.qual =
166 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
168 scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
169 scanstate->repeatable =
170 ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
173 * If we don't have a REPEATABLE clause, select a random seed. We want to
174 * do this just once, since the seed shouldn't change over rescans.
176 if (tsc->repeatable == NULL)
177 scanstate->seed = random();
180 * Finally, initialize the TABLESAMPLE method handler.
182 tsm = GetTsmRoutine(tsc->tsmhandler);
183 scanstate->tsmroutine = tsm;
184 scanstate->tsm_state = NULL;
186 if (tsm->InitSampleScan)
187 tsm->InitSampleScan(scanstate, eflags);
189 /* We'll do BeginSampleScan later; we can't evaluate params yet */
190 scanstate->begun = false;
195 /* ----------------------------------------------------------------
198 * frees any storage allocated through C routines.
199 * ----------------------------------------------------------------
202 ExecEndSampleScan(SampleScanState *node)
205 * Tell sampling function that we finished the scan.
207 if (node->tsmroutine->EndSampleScan)
208 node->tsmroutine->EndSampleScan(node);
211 * Free the exprcontext
213 ExecFreeExprContext(&node->ss.ps);
216 * clean out the tuple table
218 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
219 ExecClearTuple(node->ss.ss_ScanTupleSlot);
224 if (node->ss.ss_currentScanDesc)
225 heap_endscan(node->ss.ss_currentScanDesc);
228 * close the heap relation.
230 ExecCloseScanRelation(node->ss.ss_currentRelation);
233 /* ----------------------------------------------------------------
234 * ExecReScanSampleScan
236 * Rescans the relation.
238 * ----------------------------------------------------------------
241 ExecReScanSampleScan(SampleScanState *node)
243 /* Remember we need to do BeginSampleScan again (if we did it at all) */
246 ExecScanReScan(&node->ss);
251 * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
254 tablesample_init(SampleScanState *scanstate)
256 TsmRoutine *tsm = scanstate->tsmroutine;
257 ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
266 params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
269 foreach(arg, scanstate->args)
271 ExprState *argstate = (ExprState *) lfirst(arg);
273 params[i] = ExecEvalExprSwitchContext(argstate,
278 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
279 errmsg("TABLESAMPLE parameter cannot be null")));
283 if (scanstate->repeatable)
285 datum = ExecEvalExprSwitchContext(scanstate->repeatable,
290 (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
291 errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
294 * The REPEATABLE parameter has been coerced to float8 by the parser.
295 * The reason for using float8 at the SQL level is that it will
296 * produce unsurprising results both for users used to databases that
297 * accept only integers in the REPEATABLE clause and for those who
298 * might expect that REPEATABLE works like setseed() (a float in the
299 * range from -1 to 1).
301 * We use hashfloat8() to convert the supplied value into a suitable
302 * seed. For regression-testing purposes, that has the convenient
303 * property that REPEATABLE(0) gives a machine-independent result.
305 seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
309 /* Use the seed selected by ExecInitSampleScan */
310 seed = scanstate->seed;
313 /* Set default values for params that BeginSampleScan can adjust */
314 scanstate->use_bulkread = true;
315 scanstate->use_pagemode = true;
317 /* Let tablesample method do its thing */
318 tsm->BeginSampleScan(scanstate,
320 list_length(scanstate->args),
323 /* We'll use syncscan if there's no NextSampleBlock function */
324 allow_sync = (tsm->NextSampleBlock == NULL);
326 /* Now we can create or reset the HeapScanDesc */
327 if (scanstate->ss.ss_currentScanDesc == NULL)
329 scanstate->ss.ss_currentScanDesc =
330 heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
331 scanstate->ss.ps.state->es_snapshot,
333 scanstate->use_bulkread,
335 scanstate->use_pagemode);
339 heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
340 scanstate->use_bulkread,
342 scanstate->use_pagemode);
347 /* And we're initialized. */
348 scanstate->begun = true;
352 * Get next tuple from TABLESAMPLE method.
354 * Note: an awful lot of this is copied-and-pasted from heapam.c. It would
355 * perhaps be better to refactor to share more code.
358 tablesample_getnext(SampleScanState *scanstate)
360 TsmRoutine *tsm = scanstate->tsmroutine;
361 HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
362 HeapTuple tuple = &(scan->rs_ctup);
363 Snapshot snapshot = scan->rs_snapshot;
364 bool pagemode = scan->rs_pageatatime;
368 OffsetNumber maxoffset;
370 if (!scan->rs_inited)
373 * return null immediately if relation is empty
375 if (scan->rs_nblocks == 0)
377 Assert(!BufferIsValid(scan->rs_cbuf));
378 tuple->t_data = NULL;
381 if (tsm->NextSampleBlock)
383 blockno = tsm->NextSampleBlock(scanstate);
384 if (!BlockNumberIsValid(blockno))
386 tuple->t_data = NULL;
391 blockno = scan->rs_startblock;
392 Assert(blockno < scan->rs_nblocks);
393 heapgetpage(scan, blockno);
394 scan->rs_inited = true;
398 /* continue from previously returned page/tuple */
399 blockno = scan->rs_cblock; /* current page */
403 * When not using pagemode, we must lock the buffer during tuple
407 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
409 page = (Page) BufferGetPage(scan->rs_cbuf);
410 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
411 maxoffset = PageGetMaxOffsetNumber(page);
415 OffsetNumber tupoffset;
418 CHECK_FOR_INTERRUPTS();
420 /* Ask the tablesample method which tuples to check on this page. */
421 tupoffset = tsm->NextSampleTuple(scanstate,
425 if (OffsetNumberIsValid(tupoffset))
430 /* Skip invalid tuple pointers. */
431 itemid = PageGetItemId(page, tupoffset);
432 if (!ItemIdIsNormal(itemid))
435 tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
436 tuple->t_len = ItemIdGetLength(itemid);
437 ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
442 visible = SampleTupleVisible(tuple, tupoffset, scan);
444 /* in pagemode, heapgetpage did this for us */
446 CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
447 scan->rs_cbuf, snapshot);
451 /* Found visible tuple, return it. */
453 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
458 /* Try next tuple from same page. */
464 * if we get here, it means we've exhausted the items on this page and
465 * it's time to move to the next.
468 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
470 if (tsm->NextSampleBlock)
472 blockno = tsm->NextSampleBlock(scanstate);
473 Assert(!scan->rs_syncscan);
474 finished = !BlockNumberIsValid(blockno);
478 /* Without NextSampleBlock, just do a plain forward seqscan. */
480 if (blockno >= scan->rs_nblocks)
484 * Report our new scan position for synchronization purposes.
486 * Note: we do this before checking for end of scan so that the
487 * final state of the position hint is back at the start of the
488 * rel. That's not strictly necessary, but otherwise when you run
489 * the same query multiple times the starting position would shift
490 * a little bit backwards on every invocation, which is confusing.
491 * We don't guarantee any specific ordering in general, though.
493 if (scan->rs_syncscan)
494 ss_report_location(scan->rs_rd, blockno);
496 finished = (blockno == scan->rs_startblock);
500 * Reached end of scan?
504 if (BufferIsValid(scan->rs_cbuf))
505 ReleaseBuffer(scan->rs_cbuf);
506 scan->rs_cbuf = InvalidBuffer;
507 scan->rs_cblock = InvalidBlockNumber;
508 tuple->t_data = NULL;
509 scan->rs_inited = false;
513 Assert(blockno < scan->rs_nblocks);
514 heapgetpage(scan, blockno);
516 /* Re-establish state for new page */
518 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
520 page = (Page) BufferGetPage(scan->rs_cbuf);
521 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
522 maxoffset = PageGetMaxOffsetNumber(page);
525 /* Count successfully-fetched tuples as heap fetches */
526 pgstat_count_heap_getnext(scan->rs_rd);
528 return &(scan->rs_ctup);
532 * Check visibility of the tuple.
535 SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
537 if (scan->rs_pageatatime)
540 * In pageatatime mode, heapgetpage() already did visibility checks,
541 * so just look at the info it left in rs_vistuples[].
543 * We use a binary search over the known-sorted array. Note: we could
544 * save some effort if we insisted that NextSampleTuple select tuples
545 * in increasing order, but it's not clear that there would be enough
546 * gain to justify the restriction.
549 end = scan->rs_ntuples - 1;
553 int mid = (start + end) / 2;
554 OffsetNumber curoffset = scan->rs_vistuples[mid];
556 if (tupoffset == curoffset)
558 else if (tupoffset < curoffset)
568 /* Otherwise, we have to check the tuple individually. */
569 return HeapTupleSatisfiesVisibility(tuple,