]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeSamplescan.c
Allow tupleslots to have a fixed tupledesc, use in executor nodes.
[postgresql] / src / backend / executor / nodeSamplescan.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeSamplescan.c
4  *        Support routines for sample scans of relations (table sampling).
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/executor/nodeSamplescan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "access/hash.h"
18 #include "access/relscan.h"
19 #include "access/tsmapi.h"
20 #include "executor/executor.h"
21 #include "executor/nodeSamplescan.h"
22 #include "miscadmin.h"
23 #include "pgstat.h"
24 #include "storage/predicate.h"
25 #include "utils/builtins.h"
26 #include "utils/rel.h"
27 #include "utils/tqual.h"
28
29 static TupleTableSlot *SampleNext(SampleScanState *node);
30 static void tablesample_init(SampleScanState *scanstate);
31 static HeapTuple tablesample_getnext(SampleScanState *scanstate);
32 static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
33                                    HeapScanDesc scan);
34
35 /* ----------------------------------------------------------------
36  *                                              Scan Support
37  * ----------------------------------------------------------------
38  */
39
40 /* ----------------------------------------------------------------
41  *              SampleNext
42  *
43  *              This is a workhorse for ExecSampleScan
44  * ----------------------------------------------------------------
45  */
46 static TupleTableSlot *
47 SampleNext(SampleScanState *node)
48 {
49         HeapTuple       tuple;
50         TupleTableSlot *slot;
51
52         /*
53          * if this is first call within a scan, initialize
54          */
55         if (!node->begun)
56                 tablesample_init(node);
57
58         /*
59          * get the next tuple, and store it in our result slot
60          */
61         tuple = tablesample_getnext(node);
62
63         slot = node->ss.ss_ScanTupleSlot;
64
65         if (tuple)
66                 ExecStoreTuple(tuple,   /* tuple to store */
67                                            slot,        /* slot to store in */
68                                            node->ss.ss_currentScanDesc->rs_cbuf,        /* tuple's buffer */
69                                            false);      /* don't pfree this pointer */
70         else
71                 ExecClearTuple(slot);
72
73         return slot;
74 }
75
76 /*
77  * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
78  */
79 static bool
80 SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
81 {
82         /*
83          * No need to recheck for SampleScan, since like SeqScan we don't pass any
84          * checkable keys to heap_beginscan.
85          */
86         return true;
87 }
88
89 /* ----------------------------------------------------------------
90  *              ExecSampleScan(node)
91  *
92  *              Scans the relation using the sampling method and returns
93  *              the next qualifying tuple.
94  *              We call the ExecScan() routine and pass it the appropriate
95  *              access method functions.
96  * ----------------------------------------------------------------
97  */
98 static TupleTableSlot *
99 ExecSampleScan(PlanState *pstate)
100 {
101         SampleScanState *node = castNode(SampleScanState, pstate);
102
103         return ExecScan(&node->ss,
104                                         (ExecScanAccessMtd) SampleNext,
105                                         (ExecScanRecheckMtd) SampleRecheck);
106 }
107
108 /* ----------------------------------------------------------------
109  *              ExecInitSampleScan
110  * ----------------------------------------------------------------
111  */
112 SampleScanState *
113 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
114 {
115         SampleScanState *scanstate;
116         TableSampleClause *tsc = node->tablesample;
117         TsmRoutine *tsm;
118
119         Assert(outerPlan(node) == NULL);
120         Assert(innerPlan(node) == NULL);
121
122         /*
123          * create state structure
124          */
125         scanstate = makeNode(SampleScanState);
126         scanstate->ss.ps.plan = (Plan *) node;
127         scanstate->ss.ps.state = estate;
128         scanstate->ss.ps.ExecProcNode = ExecSampleScan;
129
130         /*
131          * Miscellaneous initialization
132          *
133          * create expression context for node
134          */
135         ExecAssignExprContext(estate, &scanstate->ss.ps);
136
137         /*
138          * Initialize scan relation.
139          *
140          * Get the relation object id from the relid'th entry in the range table,
141          * open that relation and acquire appropriate lock on it.
142          */
143         scanstate->ss.ss_currentRelation =
144                 ExecOpenScanRelation(estate,
145                                                          node->scan.scanrelid,
146                                                          eflags);
147
148         /* we won't set up the HeapScanDesc till later */
149         scanstate->ss.ss_currentScanDesc = NULL;
150
151         /* and create slot with appropriate rowtype */
152         ExecInitScanTupleSlot(estate, &scanstate->ss,
153                                                   RelationGetDescr(scanstate->ss.ss_currentRelation));
154
155         /*
156          * Initialize result slot, type and projection.
157          * tuple table and result tuple initialization
158          */
159         ExecInitResultTupleSlotTL(estate, &scanstate->ss.ps);
160         ExecAssignScanProjectionInfo(&scanstate->ss);
161
162         /*
163          * initialize child expressions
164          */
165         scanstate->ss.ps.qual =
166                 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
167
168         scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
169         scanstate->repeatable =
170                 ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
171
172         /*
173          * If we don't have a REPEATABLE clause, select a random seed.  We want to
174          * do this just once, since the seed shouldn't change over rescans.
175          */
176         if (tsc->repeatable == NULL)
177                 scanstate->seed = random();
178
179         /*
180          * Finally, initialize the TABLESAMPLE method handler.
181          */
182         tsm = GetTsmRoutine(tsc->tsmhandler);
183         scanstate->tsmroutine = tsm;
184         scanstate->tsm_state = NULL;
185
186         if (tsm->InitSampleScan)
187                 tsm->InitSampleScan(scanstate, eflags);
188
189         /* We'll do BeginSampleScan later; we can't evaluate params yet */
190         scanstate->begun = false;
191
192         return scanstate;
193 }
194
195 /* ----------------------------------------------------------------
196  *              ExecEndSampleScan
197  *
198  *              frees any storage allocated through C routines.
199  * ----------------------------------------------------------------
200  */
201 void
202 ExecEndSampleScan(SampleScanState *node)
203 {
204         /*
205          * Tell sampling function that we finished the scan.
206          */
207         if (node->tsmroutine->EndSampleScan)
208                 node->tsmroutine->EndSampleScan(node);
209
210         /*
211          * Free the exprcontext
212          */
213         ExecFreeExprContext(&node->ss.ps);
214
215         /*
216          * clean out the tuple table
217          */
218         ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
219         ExecClearTuple(node->ss.ss_ScanTupleSlot);
220
221         /*
222          * close heap scan
223          */
224         if (node->ss.ss_currentScanDesc)
225                 heap_endscan(node->ss.ss_currentScanDesc);
226
227         /*
228          * close the heap relation.
229          */
230         ExecCloseScanRelation(node->ss.ss_currentRelation);
231 }
232
233 /* ----------------------------------------------------------------
234  *              ExecReScanSampleScan
235  *
236  *              Rescans the relation.
237  *
238  * ----------------------------------------------------------------
239  */
240 void
241 ExecReScanSampleScan(SampleScanState *node)
242 {
243         /* Remember we need to do BeginSampleScan again (if we did it at all) */
244         node->begun = false;
245
246         ExecScanReScan(&node->ss);
247 }
248
249
250 /*
251  * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
252  */
253 static void
254 tablesample_init(SampleScanState *scanstate)
255 {
256         TsmRoutine *tsm = scanstate->tsmroutine;
257         ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
258         Datum      *params;
259         Datum           datum;
260         bool            isnull;
261         uint32          seed;
262         bool            allow_sync;
263         int                     i;
264         ListCell   *arg;
265
266         params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
267
268         i = 0;
269         foreach(arg, scanstate->args)
270         {
271                 ExprState  *argstate = (ExprState *) lfirst(arg);
272
273                 params[i] = ExecEvalExprSwitchContext(argstate,
274                                                                                           econtext,
275                                                                                           &isnull);
276                 if (isnull)
277                         ereport(ERROR,
278                                         (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
279                                          errmsg("TABLESAMPLE parameter cannot be null")));
280                 i++;
281         }
282
283         if (scanstate->repeatable)
284         {
285                 datum = ExecEvalExprSwitchContext(scanstate->repeatable,
286                                                                                   econtext,
287                                                                                   &isnull);
288                 if (isnull)
289                         ereport(ERROR,
290                                         (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
291                                          errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
292
293                 /*
294                  * The REPEATABLE parameter has been coerced to float8 by the parser.
295                  * The reason for using float8 at the SQL level is that it will
296                  * produce unsurprising results both for users used to databases that
297                  * accept only integers in the REPEATABLE clause and for those who
298                  * might expect that REPEATABLE works like setseed() (a float in the
299                  * range from -1 to 1).
300                  *
301                  * We use hashfloat8() to convert the supplied value into a suitable
302                  * seed.  For regression-testing purposes, that has the convenient
303                  * property that REPEATABLE(0) gives a machine-independent result.
304                  */
305                 seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
306         }
307         else
308         {
309                 /* Use the seed selected by ExecInitSampleScan */
310                 seed = scanstate->seed;
311         }
312
313         /* Set default values for params that BeginSampleScan can adjust */
314         scanstate->use_bulkread = true;
315         scanstate->use_pagemode = true;
316
317         /* Let tablesample method do its thing */
318         tsm->BeginSampleScan(scanstate,
319                                                  params,
320                                                  list_length(scanstate->args),
321                                                  seed);
322
323         /* We'll use syncscan if there's no NextSampleBlock function */
324         allow_sync = (tsm->NextSampleBlock == NULL);
325
326         /* Now we can create or reset the HeapScanDesc */
327         if (scanstate->ss.ss_currentScanDesc == NULL)
328         {
329                 scanstate->ss.ss_currentScanDesc =
330                         heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
331                                                                         scanstate->ss.ps.state->es_snapshot,
332                                                                         0, NULL,
333                                                                         scanstate->use_bulkread,
334                                                                         allow_sync,
335                                                                         scanstate->use_pagemode);
336         }
337         else
338         {
339                 heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
340                                                            scanstate->use_bulkread,
341                                                            allow_sync,
342                                                            scanstate->use_pagemode);
343         }
344
345         pfree(params);
346
347         /* And we're initialized. */
348         scanstate->begun = true;
349 }
350
351 /*
352  * Get next tuple from TABLESAMPLE method.
353  *
354  * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
355  * perhaps be better to refactor to share more code.
356  */
357 static HeapTuple
358 tablesample_getnext(SampleScanState *scanstate)
359 {
360         TsmRoutine *tsm = scanstate->tsmroutine;
361         HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
362         HeapTuple       tuple = &(scan->rs_ctup);
363         Snapshot        snapshot = scan->rs_snapshot;
364         bool            pagemode = scan->rs_pageatatime;
365         BlockNumber blockno;
366         Page            page;
367         bool            all_visible;
368         OffsetNumber maxoffset;
369
370         if (!scan->rs_inited)
371         {
372                 /*
373                  * return null immediately if relation is empty
374                  */
375                 if (scan->rs_nblocks == 0)
376                 {
377                         Assert(!BufferIsValid(scan->rs_cbuf));
378                         tuple->t_data = NULL;
379                         return NULL;
380                 }
381                 if (tsm->NextSampleBlock)
382                 {
383                         blockno = tsm->NextSampleBlock(scanstate);
384                         if (!BlockNumberIsValid(blockno))
385                         {
386                                 tuple->t_data = NULL;
387                                 return NULL;
388                         }
389                 }
390                 else
391                         blockno = scan->rs_startblock;
392                 Assert(blockno < scan->rs_nblocks);
393                 heapgetpage(scan, blockno);
394                 scan->rs_inited = true;
395         }
396         else
397         {
398                 /* continue from previously returned page/tuple */
399                 blockno = scan->rs_cblock;      /* current page */
400         }
401
402         /*
403          * When not using pagemode, we must lock the buffer during tuple
404          * visibility checks.
405          */
406         if (!pagemode)
407                 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
408
409         page = (Page) BufferGetPage(scan->rs_cbuf);
410         all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
411         maxoffset = PageGetMaxOffsetNumber(page);
412
413         for (;;)
414         {
415                 OffsetNumber tupoffset;
416                 bool            finished;
417
418                 CHECK_FOR_INTERRUPTS();
419
420                 /* Ask the tablesample method which tuples to check on this page. */
421                 tupoffset = tsm->NextSampleTuple(scanstate,
422                                                                                  blockno,
423                                                                                  maxoffset);
424
425                 if (OffsetNumberIsValid(tupoffset))
426                 {
427                         ItemId          itemid;
428                         bool            visible;
429
430                         /* Skip invalid tuple pointers. */
431                         itemid = PageGetItemId(page, tupoffset);
432                         if (!ItemIdIsNormal(itemid))
433                                 continue;
434
435                         tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
436                         tuple->t_len = ItemIdGetLength(itemid);
437                         ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
438
439                         if (all_visible)
440                                 visible = true;
441                         else
442                                 visible = SampleTupleVisible(tuple, tupoffset, scan);
443
444                         /* in pagemode, heapgetpage did this for us */
445                         if (!pagemode)
446                                 CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
447                                                                                                 scan->rs_cbuf, snapshot);
448
449                         if (visible)
450                         {
451                                 /* Found visible tuple, return it. */
452                                 if (!pagemode)
453                                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
454                                 break;
455                         }
456                         else
457                         {
458                                 /* Try next tuple from same page. */
459                                 continue;
460                         }
461                 }
462
463                 /*
464                  * if we get here, it means we've exhausted the items on this page and
465                  * it's time to move to the next.
466                  */
467                 if (!pagemode)
468                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
469
470                 if (tsm->NextSampleBlock)
471                 {
472                         blockno = tsm->NextSampleBlock(scanstate);
473                         Assert(!scan->rs_syncscan);
474                         finished = !BlockNumberIsValid(blockno);
475                 }
476                 else
477                 {
478                         /* Without NextSampleBlock, just do a plain forward seqscan. */
479                         blockno++;
480                         if (blockno >= scan->rs_nblocks)
481                                 blockno = 0;
482
483                         /*
484                          * Report our new scan position for synchronization purposes.
485                          *
486                          * Note: we do this before checking for end of scan so that the
487                          * final state of the position hint is back at the start of the
488                          * rel.  That's not strictly necessary, but otherwise when you run
489                          * the same query multiple times the starting position would shift
490                          * a little bit backwards on every invocation, which is confusing.
491                          * We don't guarantee any specific ordering in general, though.
492                          */
493                         if (scan->rs_syncscan)
494                                 ss_report_location(scan->rs_rd, blockno);
495
496                         finished = (blockno == scan->rs_startblock);
497                 }
498
499                 /*
500                  * Reached end of scan?
501                  */
502                 if (finished)
503                 {
504                         if (BufferIsValid(scan->rs_cbuf))
505                                 ReleaseBuffer(scan->rs_cbuf);
506                         scan->rs_cbuf = InvalidBuffer;
507                         scan->rs_cblock = InvalidBlockNumber;
508                         tuple->t_data = NULL;
509                         scan->rs_inited = false;
510                         return NULL;
511                 }
512
513                 Assert(blockno < scan->rs_nblocks);
514                 heapgetpage(scan, blockno);
515
516                 /* Re-establish state for new page */
517                 if (!pagemode)
518                         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
519
520                 page = (Page) BufferGetPage(scan->rs_cbuf);
521                 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
522                 maxoffset = PageGetMaxOffsetNumber(page);
523         }
524
525         /* Count successfully-fetched tuples as heap fetches */
526         pgstat_count_heap_getnext(scan->rs_rd);
527
528         return &(scan->rs_ctup);
529 }
530
531 /*
532  * Check visibility of the tuple.
533  */
534 static bool
535 SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
536 {
537         if (scan->rs_pageatatime)
538         {
539                 /*
540                  * In pageatatime mode, heapgetpage() already did visibility checks,
541                  * so just look at the info it left in rs_vistuples[].
542                  *
543                  * We use a binary search over the known-sorted array.  Note: we could
544                  * save some effort if we insisted that NextSampleTuple select tuples
545                  * in increasing order, but it's not clear that there would be enough
546                  * gain to justify the restriction.
547                  */
548                 int                     start = 0,
549                                         end = scan->rs_ntuples - 1;
550
551                 while (start <= end)
552                 {
553                         int                     mid = (start + end) / 2;
554                         OffsetNumber curoffset = scan->rs_vistuples[mid];
555
556                         if (tupoffset == curoffset)
557                                 return true;
558                         else if (tupoffset < curoffset)
559                                 end = mid - 1;
560                         else
561                                 start = mid + 1;
562                 }
563
564                 return false;
565         }
566         else
567         {
568                 /* Otherwise, we have to check the tuple individually. */
569                 return HeapTupleSatisfiesVisibility(tuple,
570                                                                                         scan->rs_snapshot,
571                                                                                         scan->rs_cbuf);
572         }
573 }