1 /*-------------------------------------------------------------------------
4 * Support routines for sample scans of relations (table sampling).
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/executor/nodeSamplescan.c
13 *-------------------------------------------------------------------------
17 #include "access/hash.h"
18 #include "access/relscan.h"
19 #include "access/tsmapi.h"
20 #include "executor/executor.h"
21 #include "executor/nodeSamplescan.h"
22 #include "miscadmin.h"
24 #include "storage/predicate.h"
25 #include "utils/rel.h"
26 #include "utils/tqual.h"
28 static void InitScanRelation(SampleScanState *node, EState *estate, int eflags);
29 static TupleTableSlot *SampleNext(SampleScanState *node);
30 static void tablesample_init(SampleScanState *scanstate);
31 static HeapTuple tablesample_getnext(SampleScanState *scanstate);
32 static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
35 /* ----------------------------------------------------------------
37 * ----------------------------------------------------------------
40 /* ----------------------------------------------------------------
43 * This is a workhorse for ExecSampleScan
44 * ----------------------------------------------------------------
46 static TupleTableSlot *
47 SampleNext(SampleScanState *node)
53 * if this is first call within a scan, initialize
56 tablesample_init(node);
59 * get the next tuple, and store it in our result slot
61 tuple = tablesample_getnext(node);
63 slot = node->ss.ss_ScanTupleSlot;
66 ExecStoreTuple(tuple, /* tuple to store */
67 slot, /* slot to store in */
68 node->ss.ss_currentScanDesc->rs_cbuf, /* tuple's buffer */
69 false); /* don't pfree this pointer */
77 * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
80 SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
83 * No need to recheck for SampleScan, since like SeqScan we don't pass any
84 * checkable keys to heap_beginscan.
89 /* ----------------------------------------------------------------
90 * ExecSampleScan(node)
92 * Scans the relation using the sampling method and returns
93 * the next qualifying tuple.
94 * We call the ExecScan() routine and pass it the appropriate
95 * access method functions.
96 * ----------------------------------------------------------------
99 ExecSampleScan(SampleScanState *node)
101 return ExecScan((ScanState *) node,
102 (ExecScanAccessMtd) SampleNext,
103 (ExecScanRecheckMtd) SampleRecheck);
106 /* ----------------------------------------------------------------
109 * Set up to access the scan relation.
110 * ----------------------------------------------------------------
113 InitScanRelation(SampleScanState *node, EState *estate, int eflags)
115 Relation currentRelation;
118 * get the relation object id from the relid'th entry in the range table,
119 * open that relation and acquire appropriate lock on it.
121 currentRelation = ExecOpenScanRelation(estate,
122 ((SampleScan *) node->ss.ps.plan)->scan.scanrelid,
125 node->ss.ss_currentRelation = currentRelation;
127 /* we won't set up the HeapScanDesc till later */
128 node->ss.ss_currentScanDesc = NULL;
130 /* and report the scan tuple slot's rowtype */
131 ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
135 /* ----------------------------------------------------------------
137 * ----------------------------------------------------------------
140 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
142 SampleScanState *scanstate;
143 TableSampleClause *tsc = node->tablesample;
146 Assert(outerPlan(node) == NULL);
147 Assert(innerPlan(node) == NULL);
150 * create state structure
152 scanstate = makeNode(SampleScanState);
153 scanstate->ss.ps.plan = (Plan *) node;
154 scanstate->ss.ps.state = estate;
157 * Miscellaneous initialization
159 * create expression context for node
161 ExecAssignExprContext(estate, &scanstate->ss.ps);
164 * initialize child expressions
166 scanstate->ss.ps.targetlist = (List *)
167 ExecInitExpr((Expr *) node->scan.plan.targetlist,
168 (PlanState *) scanstate);
169 scanstate->ss.ps.qual = (List *)
170 ExecInitExpr((Expr *) node->scan.plan.qual,
171 (PlanState *) scanstate);
173 scanstate->args = (List *)
174 ExecInitExpr((Expr *) tsc->args,
175 (PlanState *) scanstate);
176 scanstate->repeatable =
177 ExecInitExpr(tsc->repeatable,
178 (PlanState *) scanstate);
181 * tuple table initialization
183 ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
184 ExecInitScanTupleSlot(estate, &scanstate->ss);
187 * initialize scan relation
189 InitScanRelation(scanstate, estate, eflags);
191 scanstate->ss.ps.ps_TupFromTlist = false;
194 * Initialize result tuple type and projection info.
196 ExecAssignResultTypeFromTL(&scanstate->ss.ps);
197 ExecAssignScanProjectionInfo(&scanstate->ss);
200 * If we don't have a REPEATABLE clause, select a random seed. We want to
201 * do this just once, since the seed shouldn't change over rescans.
203 if (tsc->repeatable == NULL)
204 scanstate->seed = random();
207 * Finally, initialize the TABLESAMPLE method handler.
209 tsm = GetTsmRoutine(tsc->tsmhandler);
210 scanstate->tsmroutine = tsm;
211 scanstate->tsm_state = NULL;
213 if (tsm->InitSampleScan)
214 tsm->InitSampleScan(scanstate, eflags);
216 /* We'll do BeginSampleScan later; we can't evaluate params yet */
217 scanstate->begun = false;
222 /* ----------------------------------------------------------------
225 * frees any storage allocated through C routines.
226 * ----------------------------------------------------------------
229 ExecEndSampleScan(SampleScanState *node)
232 * Tell sampling function that we finished the scan.
234 if (node->tsmroutine->EndSampleScan)
235 node->tsmroutine->EndSampleScan(node);
238 * Free the exprcontext
240 ExecFreeExprContext(&node->ss.ps);
243 * clean out the tuple table
245 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
246 ExecClearTuple(node->ss.ss_ScanTupleSlot);
251 if (node->ss.ss_currentScanDesc)
252 heap_endscan(node->ss.ss_currentScanDesc);
255 * close the heap relation.
257 ExecCloseScanRelation(node->ss.ss_currentRelation);
260 /* ----------------------------------------------------------------
261 * ExecReScanSampleScan
263 * Rescans the relation.
265 * ----------------------------------------------------------------
268 ExecReScanSampleScan(SampleScanState *node)
270 /* Remember we need to do BeginSampleScan again (if we did it at all) */
273 ExecScanReScan(&node->ss);
278 * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
281 tablesample_init(SampleScanState *scanstate)
283 TsmRoutine *tsm = scanstate->tsmroutine;
284 ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
293 params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
296 foreach(arg, scanstate->args)
298 ExprState *argstate = (ExprState *) lfirst(arg);
300 params[i] = ExecEvalExprSwitchContext(argstate,
306 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
307 errmsg("TABLESAMPLE parameter cannot be null")));
311 if (scanstate->repeatable)
313 datum = ExecEvalExprSwitchContext(scanstate->repeatable,
319 (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
320 errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
323 * The REPEATABLE parameter has been coerced to float8 by the parser.
324 * The reason for using float8 at the SQL level is that it will
325 * produce unsurprising results both for users used to databases that
326 * accept only integers in the REPEATABLE clause and for those who
327 * might expect that REPEATABLE works like setseed() (a float in the
328 * range from -1 to 1).
330 * We use hashfloat8() to convert the supplied value into a suitable
331 * seed. For regression-testing purposes, that has the convenient
332 * property that REPEATABLE(0) gives a machine-independent result.
334 seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
338 /* Use the seed selected by ExecInitSampleScan */
339 seed = scanstate->seed;
342 /* Set default values for params that BeginSampleScan can adjust */
343 scanstate->use_bulkread = true;
344 scanstate->use_pagemode = true;
346 /* Let tablesample method do its thing */
347 tsm->BeginSampleScan(scanstate,
349 list_length(scanstate->args),
352 /* We'll use syncscan if there's no NextSampleBlock function */
353 allow_sync = (tsm->NextSampleBlock == NULL);
355 /* Now we can create or reset the HeapScanDesc */
356 if (scanstate->ss.ss_currentScanDesc == NULL)
358 scanstate->ss.ss_currentScanDesc =
359 heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
360 scanstate->ss.ps.state->es_snapshot,
362 scanstate->use_bulkread,
364 scanstate->use_pagemode);
368 heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
369 scanstate->use_bulkread,
371 scanstate->use_pagemode);
376 /* And we're initialized. */
377 scanstate->begun = true;
381 * Get next tuple from TABLESAMPLE method.
383 * Note: an awful lot of this is copied-and-pasted from heapam.c. It would
384 * perhaps be better to refactor to share more code.
387 tablesample_getnext(SampleScanState *scanstate)
389 TsmRoutine *tsm = scanstate->tsmroutine;
390 HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
391 HeapTuple tuple = &(scan->rs_ctup);
392 Snapshot snapshot = scan->rs_snapshot;
393 bool pagemode = scan->rs_pageatatime;
397 OffsetNumber maxoffset;
399 if (!scan->rs_inited)
402 * return null immediately if relation is empty
404 if (scan->rs_nblocks == 0)
406 Assert(!BufferIsValid(scan->rs_cbuf));
407 tuple->t_data = NULL;
410 if (tsm->NextSampleBlock)
412 blockno = tsm->NextSampleBlock(scanstate);
413 if (!BlockNumberIsValid(blockno))
415 tuple->t_data = NULL;
420 blockno = scan->rs_startblock;
421 Assert(blockno < scan->rs_nblocks);
422 heapgetpage(scan, blockno);
423 scan->rs_inited = true;
427 /* continue from previously returned page/tuple */
428 blockno = scan->rs_cblock; /* current page */
432 * When not using pagemode, we must lock the buffer during tuple
436 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
438 page = (Page) BufferGetPage(scan->rs_cbuf);
439 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
440 maxoffset = PageGetMaxOffsetNumber(page);
444 OffsetNumber tupoffset;
447 CHECK_FOR_INTERRUPTS();
449 /* Ask the tablesample method which tuples to check on this page. */
450 tupoffset = tsm->NextSampleTuple(scanstate,
454 if (OffsetNumberIsValid(tupoffset))
459 /* Skip invalid tuple pointers. */
460 itemid = PageGetItemId(page, tupoffset);
461 if (!ItemIdIsNormal(itemid))
464 tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
465 tuple->t_len = ItemIdGetLength(itemid);
466 ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
471 visible = SampleTupleVisible(tuple, tupoffset, scan);
473 /* in pagemode, heapgetpage did this for us */
475 CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
476 scan->rs_cbuf, snapshot);
480 /* Found visible tuple, return it. */
482 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
487 /* Try next tuple from same page. */
493 * if we get here, it means we've exhausted the items on this page and
494 * it's time to move to the next.
497 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
499 if (tsm->NextSampleBlock)
501 blockno = tsm->NextSampleBlock(scanstate);
502 Assert(!scan->rs_syncscan);
503 finished = !BlockNumberIsValid(blockno);
507 /* Without NextSampleBlock, just do a plain forward seqscan. */
509 if (blockno >= scan->rs_nblocks)
513 * Report our new scan position for synchronization purposes.
515 * Note: we do this before checking for end of scan so that the
516 * final state of the position hint is back at the start of the
517 * rel. That's not strictly necessary, but otherwise when you run
518 * the same query multiple times the starting position would shift
519 * a little bit backwards on every invocation, which is confusing.
520 * We don't guarantee any specific ordering in general, though.
522 if (scan->rs_syncscan)
523 ss_report_location(scan->rs_rd, blockno);
525 finished = (blockno == scan->rs_startblock);
529 * Reached end of scan?
533 if (BufferIsValid(scan->rs_cbuf))
534 ReleaseBuffer(scan->rs_cbuf);
535 scan->rs_cbuf = InvalidBuffer;
536 scan->rs_cblock = InvalidBlockNumber;
537 tuple->t_data = NULL;
538 scan->rs_inited = false;
542 Assert(blockno < scan->rs_nblocks);
543 heapgetpage(scan, blockno);
545 /* Re-establish state for new page */
547 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
549 page = (Page) BufferGetPage(scan->rs_cbuf);
550 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
551 maxoffset = PageGetMaxOffsetNumber(page);
554 /* Count successfully-fetched tuples as heap fetches */
555 pgstat_count_heap_getnext(scan->rs_rd);
557 return &(scan->rs_ctup);
561 * Check visibility of the tuple.
564 SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
566 if (scan->rs_pageatatime)
569 * In pageatatime mode, heapgetpage() already did visibility checks,
570 * so just look at the info it left in rs_vistuples[].
572 * We use a binary search over the known-sorted array. Note: we could
573 * save some effort if we insisted that NextSampleTuple select tuples
574 * in increasing order, but it's not clear that there would be enough
575 * gain to justify the restriction.
578 end = scan->rs_ntuples - 1;
582 int mid = (start + end) / 2;
583 OffsetNumber curoffset = scan->rs_vistuples[mid];
585 if (tupoffset == curoffset)
587 else if (tupoffset < curoffset)
597 /* Otherwise, we have to check the tuple individually. */
598 return HeapTupleSatisfiesVisibility(tuple,