]> granicus.if.org Git - postgresql/blobdiff - src/backend/executor/nodeSamplescan.c
Make some small planner API cleanups.
[postgresql] / src / backend / executor / nodeSamplescan.c
index 4c1c5237b7d203c5bd19f48375d87586980776d5..da4a65fd30ad99b65027ece482844f62f8b0ec91 100644 (file)
@@ -3,7 +3,7 @@
  * nodeSamplescan.c
  *       Support routines for sample scans of relations (table sampling).
  *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  */
 #include "postgres.h"
 
-#include "access/tablesample.h"
+#include "access/hash.h"
+#include "access/heapam.h"
+#include "access/relscan.h"
+#include "access/tsmapi.h"
 #include "executor/executor.h"
 #include "executor/nodeSamplescan.h"
 #include "miscadmin.h"
-#include "parser/parsetree.h"
 #include "pgstat.h"
-#include "storage/bufmgr.h"
 #include "storage/predicate.h"
+#include "utils/builtins.h"
 #include "utils/rel.h"
-#include "utils/syscache.h"
-#include "utils/tqual.h"
 
-static void InitScanRelation(SampleScanState *node, EState *estate,
-                                int eflags, TableSampleClause *tablesample);
 static TupleTableSlot *SampleNext(SampleScanState *node);
-
+static void tablesample_init(SampleScanState *scanstate);
+static HeapTuple tablesample_getnext(SampleScanState *scanstate);
+static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
+                                  HeapScanDesc scan);
 
 /* ----------------------------------------------------------------
  *                                             Scan Support
@@ -45,24 +46,26 @@ static TupleTableSlot *SampleNext(SampleScanState *node);
 static TupleTableSlot *
 SampleNext(SampleScanState *node)
 {
-       TupleTableSlot *slot;
-       TableSampleDesc *tsdesc;
        HeapTuple       tuple;
+       TupleTableSlot *slot;
 
        /*
-        * get information from the scan state
+        * if this is first call within a scan, initialize
         */
-       slot = node->ss.ss_ScanTupleSlot;
-       tsdesc = node->tsdesc;
+       if (!node->begun)
+               tablesample_init(node);
+
+       /*
+        * get the next tuple, and store it in our result slot
+        */
+       tuple = tablesample_getnext(node);
 
-       tuple = tablesample_getnext(tsdesc);
+       slot = node->ss.ss_ScanTupleSlot;
 
        if (tuple)
-               ExecStoreTuple(tuple,   /* tuple to store */
-                                          slot,        /* slot to store in */
-                                          tsdesc->heapScan->rs_cbuf,           /* buffer associated
-                                                                                                                * with this tuple */
-                                          false);      /* don't pfree this pointer */
+               ExecStoreBufferHeapTuple(tuple, /* tuple to store */
+                                                                slot,  /* slot to store in */
+                                                                node->ss.ss_currentScanDesc->rs_cbuf); /* tuple's buffer */
        else
                ExecClearTuple(slot);
 
@@ -75,7 +78,10 @@ SampleNext(SampleScanState *node)
 static bool
 SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
 {
-       /* No need to recheck for SampleScan */
+       /*
+        * No need to recheck for SampleScan, since like SeqScan we don't pass any
+        * checkable keys to heap_beginscan.
+        */
        return true;
 }
 
@@ -88,50 +94,16 @@ SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
  *             access method functions.
  * ----------------------------------------------------------------
  */
-TupleTableSlot *
-ExecSampleScan(SampleScanState *node)
+static TupleTableSlot *
+ExecSampleScan(PlanState *pstate)
 {
-       return ExecScan((ScanState *) node,
+       SampleScanState *node = castNode(SampleScanState, pstate);
+
+       return ExecScan(&node->ss,
                                        (ExecScanAccessMtd) SampleNext,
                                        (ExecScanRecheckMtd) SampleRecheck);
 }
 
-/* ----------------------------------------------------------------
- *             InitScanRelation
- *
- *             Set up to access the scan relation.
- * ----------------------------------------------------------------
- */
-static void
-InitScanRelation(SampleScanState *node, EState *estate, int eflags,
-                                TableSampleClause *tablesample)
-{
-       Relation        currentRelation;
-
-       /*
-        * get the relation object id from the relid'th entry in the range table,
-        * open that relation and acquire appropriate lock on it.
-        */
-       currentRelation = ExecOpenScanRelation(estate,
-                                                               ((SampleScan *) node->ss.ps.plan)->scanrelid,
-                                                                                  eflags);
-
-       node->ss.ss_currentRelation = currentRelation;
-
-       /*
-        * Even though we aren't going to do a conventional seqscan, it is useful
-        * to create a HeapScanDesc --- many of the fields in it are usable.
-        */
-       node->ss.ss_currentScanDesc =
-               heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL,
-                                                               tablesample->tsmseqscan,
-                                                               tablesample->tsmpagemode);
-
-       /* and report the scan tuple slot's rowtype */
-       ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
-}
-
-
 /* ----------------------------------------------------------------
  *             ExecInitSampleScan
  * ----------------------------------------------------------------
@@ -140,12 +112,11 @@ SampleScanState *
 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
 {
        SampleScanState *scanstate;
-       RangeTblEntry *rte = rt_fetch(node->scanrelid,
-                                                                 estate->es_range_table);
+       TableSampleClause *tsc = node->tablesample;
+       TsmRoutine *tsm;
 
        Assert(outerPlan(node) == NULL);
        Assert(innerPlan(node) == NULL);
-       Assert(rte->tablesample != NULL);
 
        /*
         * create state structure
@@ -153,6 +124,7 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
        scanstate = makeNode(SampleScanState);
        scanstate->ss.ps.plan = (Plan *) node;
        scanstate->ss.ps.state = estate;
+       scanstate->ss.ps.ExecProcNode = ExecSampleScan;
 
        /*
         * Miscellaneous initialization
@@ -162,35 +134,56 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
        ExecAssignExprContext(estate, &scanstate->ss.ps);
 
        /*
-        * initialize child expressions
+        * open the scan relation
         */
-       scanstate->ss.ps.targetlist = (List *)
-               ExecInitExpr((Expr *) node->plan.targetlist,
-                                        (PlanState *) scanstate);
-       scanstate->ss.ps.qual = (List *)
-               ExecInitExpr((Expr *) node->plan.qual,
-                                        (PlanState *) scanstate);
+       scanstate->ss.ss_currentRelation =
+               ExecOpenScanRelation(estate,
+                                                        node->scan.scanrelid,
+                                                        eflags);
+
+       /* we won't set up the HeapScanDesc till later */
+       scanstate->ss.ss_currentScanDesc = NULL;
+
+       /* and create slot with appropriate rowtype */
+       ExecInitScanTupleSlot(estate, &scanstate->ss,
+                                                 RelationGetDescr(scanstate->ss.ss_currentRelation),
+                                                 &TTSOpsBufferHeapTuple);
 
        /*
-        * tuple table initialization
+        * Initialize result type and projection.
         */
-       ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
-       ExecInitScanTupleSlot(estate, &scanstate->ss);
+       ExecInitResultTypeTL(&scanstate->ss.ps);
+       ExecAssignScanProjectionInfo(&scanstate->ss);
 
        /*
-        * initialize scan relation
+        * initialize child expressions
         */
-       InitScanRelation(scanstate, estate, eflags, rte->tablesample);
+       scanstate->ss.ps.qual =
+               ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
 
-       scanstate->ss.ps.ps_TupFromTlist = false;
+       scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
+       scanstate->repeatable =
+               ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
 
        /*
-        * Initialize result tuple type and projection info.
+        * If we don't have a REPEATABLE clause, select a random seed.  We want to
+        * do this just once, since the seed shouldn't change over rescans.
         */
-       ExecAssignResultTypeFromTL(&scanstate->ss.ps);
-       ExecAssignScanProjectionInfo(&scanstate->ss);
+       if (tsc->repeatable == NULL)
+               scanstate->seed = random();
 
-       scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample);
+       /*
+        * Finally, initialize the TABLESAMPLE method handler.
+        */
+       tsm = GetTsmRoutine(tsc->tsmhandler);
+       scanstate->tsmroutine = tsm;
+       scanstate->tsm_state = NULL;
+
+       if (tsm->InitSampleScan)
+               tsm->InitSampleScan(scanstate, eflags);
+
+       /* We'll do BeginSampleScan later; we can't evaluate params yet */
+       scanstate->begun = false;
 
        return scanstate;
 }
@@ -207,7 +200,8 @@ ExecEndSampleScan(SampleScanState *node)
        /*
         * Tell sampling function that we finished the scan.
         */
-       tablesample_end(node->tsdesc);
+       if (node->tsmroutine->EndSampleScan)
+               node->tsmroutine->EndSampleScan(node);
 
        /*
         * Free the exprcontext
@@ -217,25 +211,17 @@ ExecEndSampleScan(SampleScanState *node)
        /*
         * clean out the tuple table
         */
-       ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
+       if (node->ss.ps.ps_ResultTupleSlot)
+               ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
        ExecClearTuple(node->ss.ss_ScanTupleSlot);
 
        /*
         * close heap scan
         */
-       heap_endscan(node->ss.ss_currentScanDesc);
-
-       /*
-        * close the heap relation.
-        */
-       ExecCloseScanRelation(node->ss.ss_currentRelation);
+       if (node->ss.ss_currentScanDesc)
+               heap_endscan(node->ss.ss_currentScanDesc);
 }
 
-/* ----------------------------------------------------------------
- *                                             Join Support
- * ----------------------------------------------------------------
- */
-
 /* ----------------------------------------------------------------
  *             ExecReScanSampleScan
  *
@@ -246,12 +232,334 @@ ExecEndSampleScan(SampleScanState *node)
 void
 ExecReScanSampleScan(SampleScanState *node)
 {
-       heap_rescan(node->ss.ss_currentScanDesc, NULL);
+       /* Remember we need to do BeginSampleScan again (if we did it at all) */
+       node->begun = false;
+
+       ExecScanReScan(&node->ss);
+}
+
+
+/*
+ * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
+ */
+static void
+tablesample_init(SampleScanState *scanstate)
+{
+       TsmRoutine *tsm = scanstate->tsmroutine;
+       ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
+       Datum      *params;
+       Datum           datum;
+       bool            isnull;
+       uint32          seed;
+       bool            allow_sync;
+       int                     i;
+       ListCell   *arg;
+
+       params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
+
+       i = 0;
+       foreach(arg, scanstate->args)
+       {
+               ExprState  *argstate = (ExprState *) lfirst(arg);
+
+               params[i] = ExecEvalExprSwitchContext(argstate,
+                                                                                         econtext,
+                                                                                         &isnull);
+               if (isnull)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                                        errmsg("TABLESAMPLE parameter cannot be null")));
+               i++;
+       }
+
+       if (scanstate->repeatable)
+       {
+               datum = ExecEvalExprSwitchContext(scanstate->repeatable,
+                                                                                 econtext,
+                                                                                 &isnull);
+               if (isnull)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
+                                        errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
+
+               /*
+                * The REPEATABLE parameter has been coerced to float8 by the parser.
+                * The reason for using float8 at the SQL level is that it will
+                * produce unsurprising results both for users used to databases that
+                * accept only integers in the REPEATABLE clause and for those who
+                * might expect that REPEATABLE works like setseed() (a float in the
+                * range from -1 to 1).
+                *
+                * We use hashfloat8() to convert the supplied value into a suitable
+                * seed.  For regression-testing purposes, that has the convenient
+                * property that REPEATABLE(0) gives a machine-independent result.
+                */
+               seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
+       }
+       else
+       {
+               /* Use the seed selected by ExecInitSampleScan */
+               seed = scanstate->seed;
+       }
+
+       /* Set default values for params that BeginSampleScan can adjust */
+       scanstate->use_bulkread = true;
+       scanstate->use_pagemode = true;
+
+       /* Let tablesample method do its thing */
+       tsm->BeginSampleScan(scanstate,
+                                                params,
+                                                list_length(scanstate->args),
+                                                seed);
+
+       /* We'll use syncscan if there's no NextSampleBlock function */
+       allow_sync = (tsm->NextSampleBlock == NULL);
+
+       /* Now we can create or reset the HeapScanDesc */
+       if (scanstate->ss.ss_currentScanDesc == NULL)
+       {
+               scanstate->ss.ss_currentScanDesc =
+                       heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
+                                                                       scanstate->ss.ps.state->es_snapshot,
+                                                                       0, NULL,
+                                                                       scanstate->use_bulkread,
+                                                                       allow_sync,
+                                                                       scanstate->use_pagemode);
+       }
+       else
+       {
+               heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
+                                                          scanstate->use_bulkread,
+                                                          allow_sync,
+                                                          scanstate->use_pagemode);
+       }
+
+       pfree(params);
+
+       /* And we're initialized. */
+       scanstate->begun = true;
+}
+
+/*
+ * Get next tuple from TABLESAMPLE method.
+ *
+ * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
+ * perhaps be better to refactor to share more code.
+ */
+static HeapTuple
+tablesample_getnext(SampleScanState *scanstate)
+{
+       TsmRoutine *tsm = scanstate->tsmroutine;
+       HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
+       HeapTuple       tuple = &(scan->rs_ctup);
+       Snapshot        snapshot = scan->rs_snapshot;
+       bool            pagemode = scan->rs_pageatatime;
+       BlockNumber blockno;
+       Page            page;
+       bool            all_visible;
+       OffsetNumber maxoffset;
+
+       if (!scan->rs_inited)
+       {
+               /*
+                * return null immediately if relation is empty
+                */
+               if (scan->rs_nblocks == 0)
+               {
+                       Assert(!BufferIsValid(scan->rs_cbuf));
+                       tuple->t_data = NULL;
+                       return NULL;
+               }
+               if (tsm->NextSampleBlock)
+               {
+                       blockno = tsm->NextSampleBlock(scanstate);
+                       if (!BlockNumberIsValid(blockno))
+                       {
+                               tuple->t_data = NULL;
+                               return NULL;
+                       }
+               }
+               else
+                       blockno = scan->rs_startblock;
+               Assert(blockno < scan->rs_nblocks);
+               heapgetpage(scan, blockno);
+               scan->rs_inited = true;
+       }
+       else
+       {
+               /* continue from previously returned page/tuple */
+               blockno = scan->rs_cblock;      /* current page */
+       }
 
        /*
-        * Tell sampling function to reset its state for rescan.
+        * When not using pagemode, we must lock the buffer during tuple
+        * visibility checks.
         */
-       tablesample_reset(node->tsdesc);
+       if (!pagemode)
+               LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+       page = (Page) BufferGetPage(scan->rs_cbuf);
+       all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+       maxoffset = PageGetMaxOffsetNumber(page);
+
+       for (;;)
+       {
+               OffsetNumber tupoffset;
+               bool            finished;
+
+               CHECK_FOR_INTERRUPTS();
+
+               /* Ask the tablesample method which tuples to check on this page. */
+               tupoffset = tsm->NextSampleTuple(scanstate,
+                                                                                blockno,
+                                                                                maxoffset);
+
+               if (OffsetNumberIsValid(tupoffset))
+               {
+                       ItemId          itemid;
+                       bool            visible;
+
+                       /* Skip invalid tuple pointers. */
+                       itemid = PageGetItemId(page, tupoffset);
+                       if (!ItemIdIsNormal(itemid))
+                               continue;
+
+                       tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+                       tuple->t_len = ItemIdGetLength(itemid);
+                       ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+                       if (all_visible)
+                               visible = true;
+                       else
+                               visible = SampleTupleVisible(tuple, tupoffset, scan);
+
+                       /* in pagemode, heapgetpage did this for us */
+                       if (!pagemode)
+                               CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
+                                                                                               scan->rs_cbuf, snapshot);
+
+                       if (visible)
+                       {
+                               /* Found visible tuple, return it. */
+                               if (!pagemode)
+                                       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+                               break;
+                       }
+                       else
+                       {
+                               /* Try next tuple from same page. */
+                               continue;
+                       }
+               }
+
+               /*
+                * if we get here, it means we've exhausted the items on this page and
+                * it's time to move to the next.
+                */
+               if (!pagemode)
+                       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+               if (tsm->NextSampleBlock)
+               {
+                       blockno = tsm->NextSampleBlock(scanstate);
+                       Assert(!scan->rs_syncscan);
+                       finished = !BlockNumberIsValid(blockno);
+               }
+               else
+               {
+                       /* Without NextSampleBlock, just do a plain forward seqscan. */
+                       blockno++;
+                       if (blockno >= scan->rs_nblocks)
+                               blockno = 0;
+
+                       /*
+                        * Report our new scan position for synchronization purposes.
+                        *
+                        * Note: we do this before checking for end of scan so that the
+                        * final state of the position hint is back at the start of the
+                        * rel.  That's not strictly necessary, but otherwise when you run
+                        * the same query multiple times the starting position would shift
+                        * a little bit backwards on every invocation, which is confusing.
+                        * We don't guarantee any specific ordering in general, though.
+                        */
+                       if (scan->rs_syncscan)
+                               ss_report_location(scan->rs_rd, blockno);
+
+                       finished = (blockno == scan->rs_startblock);
+               }
+
+               /*
+                * Reached end of scan?
+                */
+               if (finished)
+               {
+                       if (BufferIsValid(scan->rs_cbuf))
+                               ReleaseBuffer(scan->rs_cbuf);
+                       scan->rs_cbuf = InvalidBuffer;
+                       scan->rs_cblock = InvalidBlockNumber;
+                       tuple->t_data = NULL;
+                       scan->rs_inited = false;
+                       return NULL;
+               }
+
+               Assert(blockno < scan->rs_nblocks);
+               heapgetpage(scan, blockno);
+
+               /* Re-establish state for new page */
+               if (!pagemode)
+                       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+               page = (Page) BufferGetPage(scan->rs_cbuf);
+               all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+               maxoffset = PageGetMaxOffsetNumber(page);
+       }
+
+       /* Count successfully-fetched tuples as heap fetches */
+       pgstat_count_heap_getnext(scan->rs_rd);
+
+       return &(scan->rs_ctup);
+}
 
-       ExecScanReScan(&node->ss);
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+{
+       if (scan->rs_pageatatime)
+       {
+               /*
+                * In pageatatime mode, heapgetpage() already did visibility checks,
+                * so just look at the info it left in rs_vistuples[].
+                *
+                * We use a binary search over the known-sorted array.  Note: we could
+                * save some effort if we insisted that NextSampleTuple select tuples
+                * in increasing order, but it's not clear that there would be enough
+                * gain to justify the restriction.
+                */
+               int                     start = 0,
+                                       end = scan->rs_ntuples - 1;
+
+               while (start <= end)
+               {
+                       int                     mid = (start + end) / 2;
+                       OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+                       if (tupoffset == curoffset)
+                               return true;
+                       else if (tupoffset < curoffset)
+                               end = mid - 1;
+                       else
+                               start = mid + 1;
+               }
+
+               return false;
+       }
+       else
+       {
+               /* Otherwise, we have to check the tuple individually. */
+               return HeapTupleSatisfiesVisibility(tuple,
+                                                                                       scan->rs_snapshot,
+                                                                                       scan->rs_cbuf);
+       }
 }