]> granicus.if.org Git - postgresql/commitdiff
tableam: Avoid relying on relation size to determine validity of tids.
authorAndres Freund <andres@anarazel.de>
Sat, 18 May 2019 01:52:01 +0000 (18:52 -0700)
committerAndres Freund <andres@anarazel.de>
Sat, 18 May 2019 01:56:55 +0000 (18:56 -0700)
Instead add a tableam callback to do so. To avoid adding per
validation overhead, pass a scan to tuple_tid_valid. In heap's case
we'd otherwise incurred a RelationGetNumberOfBlocks() call for each
tid - which'd have added noticable overhead to nodeTidscan.c.

Author: Andres Freund
Reviewed-By: Ashwin Agrawal
Discussion: https://postgr.es/m/20190515185447.gno2jtqxyktylyvs@alap3.anarazel.de

src/backend/access/heap/heapam.c
src/backend/access/heap/heapam_handler.c
src/backend/access/table/tableam.c
src/backend/executor/nodeTidscan.c
src/backend/utils/adt/tid.c
src/include/access/heapam.h
src/include/access/tableam.h

index ec9853603fde49736e16ea3bdef691efca4d24b0..d8d4f3b1f5a64c755b45289a8380c81e0ed78388 100644 (file)
@@ -1654,8 +1654,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
 /*
  *     heap_get_latest_tid -  get the latest tid of a specified tuple
  *
- * Actually, this gets the latest version that is visible according to
- * the passed snapshot.  You can pass SnapshotDirty to get the very latest,
+ * Actually, this gets the latest version that is visible according to the
+ * scan's snapshot.  Create a scan using SnapshotDirty to get the very latest,
  * possibly uncommitted version.
  *
  * *tid is both an input and an output parameter: it is updated to
@@ -1663,28 +1663,20 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
  * if no version of the row passes the snapshot test.
  */
 void
-heap_get_latest_tid(Relation relation,
-                                       Snapshot snapshot,
+heap_get_latest_tid(TableScanDesc sscan,
                                        ItemPointer tid)
 {
-       BlockNumber blk;
+       Relation relation = sscan->rs_rd;
+       Snapshot snapshot = sscan->rs_snapshot;
        ItemPointerData ctid;
        TransactionId priorXmax;
 
-       /* this is to avoid Assert failures on bad input */
-       if (!ItemPointerIsValid(tid))
-               return;
-
        /*
-        * Since this can be called with user-supplied TID, don't trust the input
-        * too much.  (RelationGetNumberOfBlocks is an expensive check, so we
-        * don't check t_ctid links again this way.  Note that it would not do to
-        * call it just once and save the result, either.)
+        * table_get_latest_tid verified that the passed in tid is valid.  Assume
+        * that t_ctid links are valid however - there shouldn't be invalid ones
+        * in the table.
         */
-       blk = ItemPointerGetBlockNumber(tid);
-       if (blk >= RelationGetNumberOfBlocks(relation))
-               elog(ERROR, "block number %u is out of range for relation \"%s\"",
-                        blk, RelationGetRelationName(relation));
+       Assert(ItemPointerIsValid(tid));
 
        /*
         * Loop to chase down t_ctid links.  At top of loop, ctid is the tuple we
index 9aa468295ae9246e2e0c43fb45314a67ff9b0833..35553c7c92d93dee63b93520cac90d12eff95e39 100644 (file)
@@ -204,6 +204,15 @@ heapam_fetch_row_version(Relation relation,
        return false;
 }
 
+static bool
+heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
+{
+       HeapScanDesc hscan = (HeapScanDesc) scan;
+
+       return ItemPointerIsValid(tid) &&
+               ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
+}
+
 static bool
 heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
                                                                Snapshot snapshot)
@@ -2568,6 +2577,7 @@ static const TableAmRoutine heapam_methods = {
 
        .tuple_fetch_row_version = heapam_fetch_row_version,
        .tuple_get_latest_tid = heap_get_latest_tid,
+       .tuple_tid_valid = heapam_tuple_tid_valid,
        .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
        .compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
 
index baba1ea699bfca830901763f5cf4dc851aef7b7f..6e46befdfd9a85ef40313d788a9f9ab302cc6ad5 100644 (file)
@@ -213,6 +213,33 @@ table_index_fetch_tuple_check(Relation rel,
 }
 
 
+/* ------------------------------------------------------------------------
+ * Functions for non-modifying operations on individual tuples
+ * ------------------------------------------------------------------------
+ */
+
+void
+table_get_latest_tid(TableScanDesc scan, ItemPointer tid)
+{
+       Relation rel = scan->rs_rd;
+       const TableAmRoutine *tableam = rel->rd_tableam;
+
+       /*
+        * Since this can be called with user-supplied TID, don't trust the input
+        * too much.
+        */
+       if (!tableam->tuple_tid_valid(scan, tid))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("tid (%u, %u) is not valid for relation for relation \"%s\"",
+                                               ItemPointerGetBlockNumberNoCheck(tid),
+                                               ItemPointerGetOffsetNumberNoCheck(tid),
+                                               RelationGetRelationName(rel))));
+
+       return tableam->tuple_get_latest_tid(scan, tid);
+}
+
+
 /* ----------------------------------------------------------------------------
  * Functions to make modifications a bit simpler.
  * ----------------------------------------------------------------------------
index 156be56a57d3637a3f3ef79a386a7a4d6ea27975..93335864a1ec18f0162452628ecbf6580a0f4979 100644 (file)
@@ -129,19 +129,23 @@ static void
 TidListEval(TidScanState *tidstate)
 {
        ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
-       BlockNumber nblocks;
+       TableScanDesc scan;
        ItemPointerData *tidList;
        int                     numAllocTids;
        int                     numTids;
        ListCell   *l;
 
        /*
-        * We silently discard any TIDs that are out of range at the time of scan
-        * start.  (Since we hold at least AccessShareLock on the table, it won't
-        * be possible for someone to truncate away the blocks we intend to
-        * visit.)
+        * Start scan on-demand - initializing a scan isn't free (e.g. heap stats
+        * the size of the table), so it makes sense to delay that until needed -
+        * the node might never get executed.
         */
-       nblocks = RelationGetNumberOfBlocks(tidstate->ss.ss_currentRelation);
+       if (tidstate->ss.ss_currentScanDesc == NULL)
+               tidstate->ss.ss_currentScanDesc =
+                       table_beginscan(tidstate->ss.ss_currentRelation,
+                                                       tidstate->ss.ps.state->es_snapshot,
+                                                       0, NULL);
+       scan = tidstate->ss.ss_currentScanDesc;
 
        /*
         * We initialize the array with enough slots for the case that all quals
@@ -165,19 +169,27 @@ TidListEval(TidScanState *tidstate)
                                DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
                                                                                                                  econtext,
                                                                                                                  &isNull));
-                       if (!isNull &&
-                               ItemPointerIsValid(itemptr) &&
-                               ItemPointerGetBlockNumber(itemptr) < nblocks)
+                       if (isNull)
+                               continue;
+
+                       /*
+                        * We silently discard any TIDs that the AM considers invalid
+                        * (E.g. for heap, they could be out of range at the time of scan
+                        * start.  Since we hold at least AccessShareLock on the table, it
+                        * won't be possible for someone to truncate away the blocks we
+                        * intend to visit.).
+                        */
+                       if (!table_tuple_tid_valid(scan, itemptr))
+                               continue;
+
+                       if (numTids >= numAllocTids)
                        {
-                               if (numTids >= numAllocTids)
-                               {
-                                       numAllocTids *= 2;
-                                       tidList = (ItemPointerData *)
-                                               repalloc(tidList,
-                                                                numAllocTids * sizeof(ItemPointerData));
-                               }
-                               tidList[numTids++] = *itemptr;
+                               numAllocTids *= 2;
+                               tidList = (ItemPointerData *)
+                                       repalloc(tidList,
+                                                        numAllocTids * sizeof(ItemPointerData));
                        }
+                       tidList[numTids++] = *itemptr;
                }
                else if (tidexpr->exprstate && tidexpr->isarray)
                {
@@ -206,13 +218,15 @@ TidListEval(TidScanState *tidstate)
                        }
                        for (i = 0; i < ndatums; i++)
                        {
-                               if (!ipnulls[i])
-                               {
-                                       itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
-                                       if (ItemPointerIsValid(itemptr) &&
-                                               ItemPointerGetBlockNumber(itemptr) < nblocks)
-                                               tidList[numTids++] = *itemptr;
-                               }
+                               if (ipnulls[i])
+                                       continue;
+
+                               itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
+
+                               if (!table_tuple_tid_valid(scan, itemptr))
+                                       continue;
+
+                               tidList[numTids++] = *itemptr;
                        }
                        pfree(ipdatums);
                        pfree(ipnulls);
@@ -306,6 +320,7 @@ TidNext(TidScanState *node)
        EState     *estate;
        ScanDirection direction;
        Snapshot        snapshot;
+       TableScanDesc scan;
        Relation        heapRelation;
        TupleTableSlot *slot;
        ItemPointerData *tidList;
@@ -327,6 +342,7 @@ TidNext(TidScanState *node)
        if (node->tss_TidList == NULL)
                TidListEval(node);
 
+       scan = node->ss.ss_currentScanDesc;
        tidList = node->tss_TidList;
        numTids = node->tss_NumTids;
 
@@ -365,7 +381,7 @@ TidNext(TidScanState *node)
                 * current according to our snapshot.
                 */
                if (node->tss_isCurrentOf)
-                       table_get_latest_tid(heapRelation, snapshot, &tid);
+                       table_get_latest_tid(scan, &tid);
 
                if (table_fetch_row_version(heapRelation, &tid, snapshot, slot))
                        return slot;
@@ -442,6 +458,10 @@ ExecReScanTidScan(TidScanState *node)
        node->tss_NumTids = 0;
        node->tss_TidPtr = -1;
 
+       /* not really necessary, but seems good form */
+       if (node->ss.ss_currentScanDesc)
+               table_rescan(node->ss.ss_currentScanDesc, NULL);
+
        ExecScanReScan(&node->ss);
 }
 
@@ -455,6 +475,9 @@ ExecReScanTidScan(TidScanState *node)
 void
 ExecEndTidScan(TidScanState *node)
 {
+       if (node->ss.ss_currentScanDesc)
+               table_endscan(node->ss.ss_currentScanDesc);
+
        /*
         * Free the exprcontext
         */
index 6ab26d8ea8b4c4b0a570b11a59e924a45e949659..1aab30b6aab175919c6e9dfdd0a19a06209e8a77 100644 (file)
@@ -358,6 +358,7 @@ currtid_byreloid(PG_FUNCTION_ARGS)
        Relation        rel;
        AclResult       aclresult;
        Snapshot        snapshot;
+       TableScanDesc scan;
 
        result = (ItemPointer) palloc(sizeof(ItemPointerData));
        if (!reloid)
@@ -380,7 +381,9 @@ currtid_byreloid(PG_FUNCTION_ARGS)
        ItemPointerCopy(tid, result);
 
        snapshot = RegisterSnapshot(GetLatestSnapshot());
-       table_get_latest_tid(rel, snapshot, result);
+       scan = table_beginscan(rel, snapshot, 0, NULL);
+       table_get_latest_tid(scan, result);
+       table_endscan(scan);
        UnregisterSnapshot(snapshot);
 
        table_close(rel, AccessShareLock);
@@ -398,6 +401,7 @@ currtid_byrelname(PG_FUNCTION_ARGS)
        Relation        rel;
        AclResult       aclresult;
        Snapshot        snapshot;
+       TableScanDesc scan;
 
        relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
        rel = table_openrv(relrv, AccessShareLock);
@@ -415,7 +419,9 @@ currtid_byrelname(PG_FUNCTION_ARGS)
        ItemPointerCopy(tid, result);
 
        snapshot = RegisterSnapshot(GetLatestSnapshot());
-       table_get_latest_tid(rel, snapshot, result);
+       scan = table_beginscan(rel, snapshot, 0, NULL);
+       table_get_latest_tid(scan, result);
+       table_endscan(scan);
        UnregisterSnapshot(snapshot);
 
        table_close(rel, AccessShareLock);
index 77e5e603b032e4644fb3271f5202461288fe4ccb..6b8c7020c8c3abdff48c6eeda663fc65c67617a6 100644 (file)
@@ -134,8 +134,7 @@ extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
                                           Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
                                           bool *all_dead, bool first_call);
 
-extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
-                                       ItemPointer tid);
+extern void heap_get_latest_tid(TableScanDesc scan, ItemPointer tid);
 extern void setLastTid(const ItemPointer tid);
 
 extern BulkInsertState GetBulkInsertState(void);
index c5d64602036a31eb121f8ad9de61492c98a4c1fb..8fbeb020337f2b2248caa09693efe861a515886f 100644 (file)
@@ -308,12 +308,17 @@ typedef struct TableAmRoutine
                                                                                        Snapshot snapshot,
                                                                                        TupleTableSlot *slot);
 
+       /*
+        * Is tid valid for a scan of this relation.
+        */
+       bool            (*tuple_tid_valid) (TableScanDesc scan,
+                                                                       ItemPointer tid);
+
        /*
         * Return the latest version of the tuple at `tid`, by updating `tid` to
         * point at the newest version.
         */
-       void            (*tuple_get_latest_tid) (Relation rel,
-                                                                                Snapshot snapshot,
+       void            (*tuple_get_latest_tid) (TableScanDesc scan,
                                                                                 ItemPointer tid);
 
        /*
@@ -548,10 +553,10 @@ typedef struct TableAmRoutine
        /*
         * See table_relation_size().
         *
-        * Note that currently a few callers use the MAIN_FORKNUM size to vet the
-        * validity of tids (e.g. nodeTidscans.c), and others use it to figure out
-        * the range of potentially interesting blocks (brin, analyze). The
-        * abstraction around this will need to be improved in the near future.
+        * Note that currently a few callers use the MAIN_FORKNUM size to figure
+        * out the range of potentially interesting blocks (brin, analyze). It's
+        * probable that we'll need to revise the interface for those at some
+        * point.
         */
        uint64          (*relation_size) (Relation rel, ForkNumber forkNumber);
 
@@ -986,15 +991,25 @@ table_fetch_row_version(Relation rel,
 }
 
 /*
- * Return the latest version of the tuple at `tid`, by updating `tid` to
- * point at the newest version.
+ * Verify that `tid` is a potentially valid tuple identifier. That doesn't
+ * mean that the pointed to row needs to exist or be visible, but that
+ * attempting to fetch the row (e.g. with table_get_latest_tid() or
+ * table_fetch_row_version()) should not error out if called with that tid.
+ *
+ * `scan` needs to have been started via table_beginscan().
  */
-static inline void
-table_get_latest_tid(Relation rel, Snapshot snapshot, ItemPointer tid)
+static inline bool
+table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
 {
-       rel->rd_tableam->tuple_get_latest_tid(rel, snapshot, tid);
+       return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
 }
 
+/*
+ * Return the latest version of the tuple at `tid`, by updating `tid` to
+ * point at the newest version.
+ */
+extern void table_get_latest_tid(TableScanDesc scan, ItemPointer tid);
+
 /*
  * Return true iff tuple in slot satisfies the snapshot.
  *