1 /*-------------------------------------------------------------------------
4 * Support routines for sample scans of relations (table sampling).
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/executor/nodeSamplescan.c
13 *-------------------------------------------------------------------------
17 #include "access/hash.h"
18 #include "access/relscan.h"
19 #include "access/tsmapi.h"
20 #include "executor/executor.h"
21 #include "executor/nodeSamplescan.h"
22 #include "miscadmin.h"
24 #include "storage/predicate.h"
25 #include "utils/builtins.h"
26 #include "utils/rel.h"
27 #include "utils/tqual.h"
29 static void InitScanRelation(SampleScanState *node, EState *estate, int eflags);
30 static TupleTableSlot *SampleNext(SampleScanState *node);
31 static void tablesample_init(SampleScanState *scanstate);
32 static HeapTuple tablesample_getnext(SampleScanState *scanstate);
33 static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
36 /* ----------------------------------------------------------------
38 * ----------------------------------------------------------------
41 /* ----------------------------------------------------------------
44 * This is a workhorse for ExecSampleScan
45 * ----------------------------------------------------------------
47 static TupleTableSlot *
48 SampleNext(SampleScanState *node)
54 * if this is first call within a scan, initialize
57 tablesample_init(node);
60 * get the next tuple, and store it in our result slot
62 tuple = tablesample_getnext(node);
64 slot = node->ss.ss_ScanTupleSlot;
67 ExecStoreTuple(tuple, /* tuple to store */
68 slot, /* slot to store in */
69 node->ss.ss_currentScanDesc->rs_cbuf, /* tuple's buffer */
70 false); /* don't pfree this pointer */
78 * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
81 SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
84 * No need to recheck for SampleScan, since like SeqScan we don't pass any
85 * checkable keys to heap_beginscan.
90 /* ----------------------------------------------------------------
91 * ExecSampleScan(node)
93 * Scans the relation using the sampling method and returns
94 * the next qualifying tuple.
95 * We call the ExecScan() routine and pass it the appropriate
96 * access method functions.
97 * ----------------------------------------------------------------
100 ExecSampleScan(SampleScanState *node)
102 return ExecScan((ScanState *) node,
103 (ExecScanAccessMtd) SampleNext,
104 (ExecScanRecheckMtd) SampleRecheck);
107 /* ----------------------------------------------------------------
110 * Set up to access the scan relation.
111 * ----------------------------------------------------------------
114 InitScanRelation(SampleScanState *node, EState *estate, int eflags)
116 Relation currentRelation;
119 * get the relation object id from the relid'th entry in the range table,
120 * open that relation and acquire appropriate lock on it.
122 currentRelation = ExecOpenScanRelation(estate,
123 ((SampleScan *) node->ss.ps.plan)->scan.scanrelid,
126 node->ss.ss_currentRelation = currentRelation;
128 /* we won't set up the HeapScanDesc till later */
129 node->ss.ss_currentScanDesc = NULL;
131 /* and report the scan tuple slot's rowtype */
132 ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
136 /* ----------------------------------------------------------------
138 * ----------------------------------------------------------------
141 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
143 SampleScanState *scanstate;
144 TableSampleClause *tsc = node->tablesample;
147 Assert(outerPlan(node) == NULL);
148 Assert(innerPlan(node) == NULL);
151 * create state structure
153 scanstate = makeNode(SampleScanState);
154 scanstate->ss.ps.plan = (Plan *) node;
155 scanstate->ss.ps.state = estate;
158 * Miscellaneous initialization
160 * create expression context for node
162 ExecAssignExprContext(estate, &scanstate->ss.ps);
165 * initialize child expressions
167 scanstate->ss.ps.targetlist = (List *)
168 ExecInitExpr((Expr *) node->scan.plan.targetlist,
169 (PlanState *) scanstate);
170 scanstate->ss.ps.qual = (List *)
171 ExecInitExpr((Expr *) node->scan.plan.qual,
172 (PlanState *) scanstate);
174 scanstate->args = (List *)
175 ExecInitExpr((Expr *) tsc->args,
176 (PlanState *) scanstate);
177 scanstate->repeatable =
178 ExecInitExpr(tsc->repeatable,
179 (PlanState *) scanstate);
182 * tuple table initialization
184 ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
185 ExecInitScanTupleSlot(estate, &scanstate->ss);
188 * initialize scan relation
190 InitScanRelation(scanstate, estate, eflags);
193 * Initialize result tuple type and projection info.
195 ExecAssignResultTypeFromTL(&scanstate->ss.ps);
196 ExecAssignScanProjectionInfo(&scanstate->ss);
199 * If we don't have a REPEATABLE clause, select a random seed. We want to
200 * do this just once, since the seed shouldn't change over rescans.
202 if (tsc->repeatable == NULL)
203 scanstate->seed = random();
206 * Finally, initialize the TABLESAMPLE method handler.
208 tsm = GetTsmRoutine(tsc->tsmhandler);
209 scanstate->tsmroutine = tsm;
210 scanstate->tsm_state = NULL;
212 if (tsm->InitSampleScan)
213 tsm->InitSampleScan(scanstate, eflags);
215 /* We'll do BeginSampleScan later; we can't evaluate params yet */
216 scanstate->begun = false;
221 /* ----------------------------------------------------------------
224 * frees any storage allocated through C routines.
225 * ----------------------------------------------------------------
228 ExecEndSampleScan(SampleScanState *node)
231 * Tell sampling function that we finished the scan.
233 if (node->tsmroutine->EndSampleScan)
234 node->tsmroutine->EndSampleScan(node);
237 * Free the exprcontext
239 ExecFreeExprContext(&node->ss.ps);
242 * clean out the tuple table
244 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
245 ExecClearTuple(node->ss.ss_ScanTupleSlot);
250 if (node->ss.ss_currentScanDesc)
251 heap_endscan(node->ss.ss_currentScanDesc);
254 * close the heap relation.
256 ExecCloseScanRelation(node->ss.ss_currentRelation);
259 /* ----------------------------------------------------------------
260 * ExecReScanSampleScan
262 * Rescans the relation.
264 * ----------------------------------------------------------------
267 ExecReScanSampleScan(SampleScanState *node)
269 /* Remember we need to do BeginSampleScan again (if we did it at all) */
272 ExecScanReScan(&node->ss);
277 * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
280 tablesample_init(SampleScanState *scanstate)
282 TsmRoutine *tsm = scanstate->tsmroutine;
283 ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
292 params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
295 foreach(arg, scanstate->args)
297 ExprState *argstate = (ExprState *) lfirst(arg);
299 params[i] = ExecEvalExprSwitchContext(argstate,
304 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
305 errmsg("TABLESAMPLE parameter cannot be null")));
309 if (scanstate->repeatable)
311 datum = ExecEvalExprSwitchContext(scanstate->repeatable,
316 (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
317 errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
320 * The REPEATABLE parameter has been coerced to float8 by the parser.
321 * The reason for using float8 at the SQL level is that it will
322 * produce unsurprising results both for users used to databases that
323 * accept only integers in the REPEATABLE clause and for those who
324 * might expect that REPEATABLE works like setseed() (a float in the
325 * range from -1 to 1).
327 * We use hashfloat8() to convert the supplied value into a suitable
328 * seed. For regression-testing purposes, that has the convenient
329 * property that REPEATABLE(0) gives a machine-independent result.
331 seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
335 /* Use the seed selected by ExecInitSampleScan */
336 seed = scanstate->seed;
339 /* Set default values for params that BeginSampleScan can adjust */
340 scanstate->use_bulkread = true;
341 scanstate->use_pagemode = true;
343 /* Let tablesample method do its thing */
344 tsm->BeginSampleScan(scanstate,
346 list_length(scanstate->args),
349 /* We'll use syncscan if there's no NextSampleBlock function */
350 allow_sync = (tsm->NextSampleBlock == NULL);
352 /* Now we can create or reset the HeapScanDesc */
353 if (scanstate->ss.ss_currentScanDesc == NULL)
355 scanstate->ss.ss_currentScanDesc =
356 heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
357 scanstate->ss.ps.state->es_snapshot,
359 scanstate->use_bulkread,
361 scanstate->use_pagemode);
365 heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
366 scanstate->use_bulkread,
368 scanstate->use_pagemode);
373 /* And we're initialized. */
374 scanstate->begun = true;
378 * Get next tuple from TABLESAMPLE method.
380 * Note: an awful lot of this is copied-and-pasted from heapam.c. It would
381 * perhaps be better to refactor to share more code.
384 tablesample_getnext(SampleScanState *scanstate)
386 TsmRoutine *tsm = scanstate->tsmroutine;
387 HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
388 HeapTuple tuple = &(scan->rs_ctup);
389 Snapshot snapshot = scan->rs_snapshot;
390 bool pagemode = scan->rs_pageatatime;
394 OffsetNumber maxoffset;
396 if (!scan->rs_inited)
399 * return null immediately if relation is empty
401 if (scan->rs_nblocks == 0)
403 Assert(!BufferIsValid(scan->rs_cbuf));
404 tuple->t_data = NULL;
407 if (tsm->NextSampleBlock)
409 blockno = tsm->NextSampleBlock(scanstate);
410 if (!BlockNumberIsValid(blockno))
412 tuple->t_data = NULL;
417 blockno = scan->rs_startblock;
418 Assert(blockno < scan->rs_nblocks);
419 heapgetpage(scan, blockno);
420 scan->rs_inited = true;
424 /* continue from previously returned page/tuple */
425 blockno = scan->rs_cblock; /* current page */
429 * When not using pagemode, we must lock the buffer during tuple
433 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
435 page = (Page) BufferGetPage(scan->rs_cbuf);
436 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
437 maxoffset = PageGetMaxOffsetNumber(page);
441 OffsetNumber tupoffset;
444 CHECK_FOR_INTERRUPTS();
446 /* Ask the tablesample method which tuples to check on this page. */
447 tupoffset = tsm->NextSampleTuple(scanstate,
451 if (OffsetNumberIsValid(tupoffset))
456 /* Skip invalid tuple pointers. */
457 itemid = PageGetItemId(page, tupoffset);
458 if (!ItemIdIsNormal(itemid))
461 tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
462 tuple->t_len = ItemIdGetLength(itemid);
463 ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
468 visible = SampleTupleVisible(tuple, tupoffset, scan);
470 /* in pagemode, heapgetpage did this for us */
472 CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
473 scan->rs_cbuf, snapshot);
477 /* Found visible tuple, return it. */
479 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
484 /* Try next tuple from same page. */
490 * if we get here, it means we've exhausted the items on this page and
491 * it's time to move to the next.
494 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
496 if (tsm->NextSampleBlock)
498 blockno = tsm->NextSampleBlock(scanstate);
499 Assert(!scan->rs_syncscan);
500 finished = !BlockNumberIsValid(blockno);
504 /* Without NextSampleBlock, just do a plain forward seqscan. */
506 if (blockno >= scan->rs_nblocks)
510 * Report our new scan position for synchronization purposes.
512 * Note: we do this before checking for end of scan so that the
513 * final state of the position hint is back at the start of the
514 * rel. That's not strictly necessary, but otherwise when you run
515 * the same query multiple times the starting position would shift
516 * a little bit backwards on every invocation, which is confusing.
517 * We don't guarantee any specific ordering in general, though.
519 if (scan->rs_syncscan)
520 ss_report_location(scan->rs_rd, blockno);
522 finished = (blockno == scan->rs_startblock);
526 * Reached end of scan?
530 if (BufferIsValid(scan->rs_cbuf))
531 ReleaseBuffer(scan->rs_cbuf);
532 scan->rs_cbuf = InvalidBuffer;
533 scan->rs_cblock = InvalidBlockNumber;
534 tuple->t_data = NULL;
535 scan->rs_inited = false;
539 Assert(blockno < scan->rs_nblocks);
540 heapgetpage(scan, blockno);
542 /* Re-establish state for new page */
544 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
546 page = (Page) BufferGetPage(scan->rs_cbuf);
547 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
548 maxoffset = PageGetMaxOffsetNumber(page);
551 /* Count successfully-fetched tuples as heap fetches */
552 pgstat_count_heap_getnext(scan->rs_rd);
554 return &(scan->rs_ctup);
558 * Check visibility of the tuple.
561 SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
563 if (scan->rs_pageatatime)
566 * In pageatatime mode, heapgetpage() already did visibility checks,
567 * so just look at the info it left in rs_vistuples[].
569 * We use a binary search over the known-sorted array. Note: we could
570 * save some effort if we insisted that NextSampleTuple select tuples
571 * in increasing order, but it's not clear that there would be enough
572 * gain to justify the restriction.
575 end = scan->rs_ntuples - 1;
579 int mid = (start + end) / 2;
580 OffsetNumber curoffset = scan->rs_vistuples[mid];
582 if (tupoffset == curoffset)
584 else if (tupoffset < curoffset)
594 /* Otherwise, we have to check the tuple individually. */
595 return HeapTupleSatisfiesVisibility(tuple,