granicus.if.org Git - postgresql/blob - src/include/access/tableam.h

   1 /*-------------------------------------------------------------------------
   2  *
   3  * tableam.h
   4  *        POSTGRES table access method definitions.
   5  *
   6  *
   7  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
   8  * Portions Copyright (c) 1994, Regents of the University of California
   9  *
  10  * src/include/access/tableam.h
  11  *
  12  *-------------------------------------------------------------------------
  13  */
  14 #ifndef TABLEAM_H
  15 #define TABLEAM_H
  16
  17 #include "access/relscan.h"
  18 #include "access/sdir.h"
  19 #include "utils/guc.h"
  20 #include "utils/rel.h"
  21 #include "utils/snapshot.h"
  22
  23
  24 #define DEFAULT_TABLE_ACCESS_METHOD     "heap"
  25
  26 extern char *default_table_access_method;
  27 extern bool synchronize_seqscans;
  28
  29
  30 struct BulkInsertStateData;
  31
  32
  33 /*
  34  * Result codes for table_{update,delete,lock}_tuple, and for visibility
  35  * routines inside table AMs.
  36  */
  37 typedef enum TM_Result
  38 {
  39         /*
  40          * Signals that the action succeeded (i.e. update/delete performed, lock
  41          * was acquired)
  42          */
  43         TM_Ok,
  44
  45         /* The affected tuple wasn't visible to the relevant snapshot */
  46         TM_Invisible,
  47
  48         /* The affected tuple was already modified by the calling backend */
  49         TM_SelfModified,
  50
  51         /*
  52          * The affected tuple was updated by another transaction. This includes
  53          * the case where tuple was moved to another partition.
  54          */
  55         TM_Updated,
  56
  57         /* The affected tuple was deleted by another transaction */
  58         TM_Deleted,
  59
  60         /*
  61          * The affected tuple is currently being modified by another session. This
  62          * will only be returned if (update/delete/lock)_tuple are instructed not
  63          * to wait.
  64          */
  65         TM_BeingModified,
  66
  67         /* lock couldn't be acquired, action skipped. Only used by lock_tuple */
  68         TM_WouldBlock
  69 } TM_Result;
  70
  71
  72 /*
  73  * When table_update, table_delete, or table_lock_tuple fail because the target
  74  * tuple is already outdated, they fill in this struct to provide information
  75  * to the caller about what happened.
  76  * ctid is the target's ctid link: it is the same as the target's TID if the
  77  * target was deleted, or the location of the replacement tuple if the target
  78  * was updated.
  79  * xmax is the outdating transaction's XID.  If the caller wants to visit the
  80  * replacement tuple, it must check that this matches before believing the
  81  * replacement is really a match.
  82  * cmax is the outdating command's CID, but only when the failure code is
  83  * TM_SelfModified (i.e., something in the current transaction outdated the
  84  * tuple); otherwise cmax is zero.  (We make this restriction because
  85  * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
  86  * transactions.)
  87  */
  88 typedef struct TM_FailureData
  89 {
  90         ItemPointerData ctid;
  91         TransactionId xmax;
  92         CommandId       cmax;
  93         bool            traversed;
  94 } TM_FailureData;
  95
  96 /* "options" flag bits for table_insert */
  97 #define TABLE_INSERT_SKIP_WAL           0x0001
  98 #define TABLE_INSERT_SKIP_FSM           0x0002
  99 #define TABLE_INSERT_FROZEN                     0x0004
 100 #define TABLE_INSERT_NO_LOGICAL         0x0008
 101
 102 /* flag bits fortable_lock_tuple */
 103 /* Follow tuples whose update is in progress if lock modes don't conflict  */
 104 #define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0)
 105 /* Follow update chain and lock lastest version of tuple */
 106 #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION               (1 << 1)
 107
 108
 109 /*
 110  * API struct for a table AM.  Note this must be allocated in a
 111  * server-lifetime manner, typically as a static const struct, which then gets
 112  * returned by FormData_pg_am.amhandler.
 113  *
 114  * I most cases it's not appropriate to directly call the callbacks directly,
 115  * instead use the table_* wrapper functions.
 116  *
 117  * GetTableAmRoutine() asserts that required callbacks are filled in, remember
 118  * to update when adding a callback.
 119  */
 120 typedef struct TableAmRoutine
 121 {
 122         /* this must be set to T_TableAmRoutine */
 123         NodeTag         type;
 124
 125
 126         /* ------------------------------------------------------------------------
 127          * Slot related callbacks.
 128          * ------------------------------------------------------------------------
 129          */
 130
 131         /*
 132          * Return slot implementation suitable for storing a tuple of this AM.
 133          */
 134         const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
 135
 136
 137         /* ------------------------------------------------------------------------
 138          * Table scan callbacks.
 139          * ------------------------------------------------------------------------
 140          */
 141
 142         /*
 143          * Start a scan of `rel`.  The callback has to return a TableScanDesc,
 144          * which will typically be embedded in a larger, AM specific, struct.
 145          *
 146          * If nkeys != 0, the results need to be filtered by those scan keys.
 147          *
 148          * pscan, if not NULL, will have already been initialized with
 149          * parallelscan_initialize(), and has to be for the same relation. Will
 150          * only be set coming from table_beginscan_parallel().
 151          *
 152          * allow_{strat, sync, pagemode} specify whether a scan strategy,
 153          * synchronized scans, or page mode may be used (although not every AM
 154          * will support those).
 155          *
 156          * is_{bitmapscan, samplescan} specify whether the scan is inteded to
 157          * support those types of scans.
 158          *
 159          * if temp_snap is true, the snapshot will need to be deallocated at
 160          * scan_end.
 161          */
 162         TableScanDesc (*scan_begin) (Relation rel,
 163                                                                  Snapshot snapshot,
 164                                                                  int nkeys, struct ScanKeyData *key,
 165                                                                  ParallelTableScanDesc pscan,
 166                                                                  bool allow_strat,
 167                                                                  bool allow_sync,
 168                                                                  bool allow_pagemode,
 169                                                                  bool is_bitmapscan,
 170                                                                  bool is_samplescan,
 171                                                                  bool temp_snap);
 172
 173         /*
 174          * Release resources and deallocate scan. If TableScanDesc.temp_snap,
 175          * TableScanDesc.rs_snapshot needs to be unregistered.
 176          */
 177         void            (*scan_end) (TableScanDesc scan);
 178
 179         /*
 180          * Restart relation scan.  If set_params is set to true, allow{strat,
 181          * sync, pagemode} (see scan_begin) changes should be taken into account.
 182          */
 183         void            (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key, bool set_params,
 184                                                                 bool allow_strat, bool allow_sync, bool allow_pagemode);
 185
 186         /*
 187          * Return next tuple from `scan`, store in slot.
 188          */
 189         bool            (*scan_getnextslot) (TableScanDesc scan,
 190                                                                          ScanDirection direction, TupleTableSlot *slot);
 191
 192
 193         /* ------------------------------------------------------------------------
 194          * Parallel table scan related functions.
 195          * ------------------------------------------------------------------------
 196          */
 197
 198         /*
 199          * Estimate the size of shared memory needed for a parallel scan of this
 200          * relation. The snapshot does not need to be accounted for.
 201          */
 202         Size            (*parallelscan_estimate) (Relation rel);
 203
 204         /*
 205          * Initialize ParallelTableScanDesc for a parallel scan of this relation.
 206          * pscan will be sized according to parallelscan_estimate() for the same
 207          * relation.
 208          */
 209         Size            (*parallelscan_initialize) (Relation rel, ParallelTableScanDesc pscan);
 210
 211         /*
 212          * Reinitilize `pscan` for a new scan. `rel` will be the same relation as
 213          * when `pscan` was initialized by parallelscan_initialize.
 214          */
 215         void            (*parallelscan_reinitialize) (Relation rel, ParallelTableScanDesc pscan);
 216
 217
 218         /* ------------------------------------------------------------------------
 219          * Index Scan Callbacks
 220          * ------------------------------------------------------------------------
 221          */
 222
 223         /*
 224          * Prepare to fetch tuples from the relation, as needed when fetching
 225          * tuples for an index scan.  The callback has to return a
 226          * IndexFetchTableData, which the AM will typically embed in a larger
 227          * structure with additional information.
 228          *
 229          * Tuples for an index scan can then be fetched via index_fetch_tuple.
 230          */
 231         struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
 232
 233         /*
 234          * Reset index fetch. Typically this will release cross index fetch
 235          * resources held in IndexFetchTableData.
 236          */
 237         void            (*index_fetch_reset) (struct IndexFetchTableData *data);
 238
 239         /*
 240          * Release resources and deallocate index fetch.
 241          */
 242         void            (*index_fetch_end) (struct IndexFetchTableData *data);
 243
 244         /*
 245          * Fetch tuple at `tid` into `slot`, after doing a visibility test
 246          * according to `snapshot`. If a tuple was found and passed the visibility
 247          * test, return true, false otherwise.
 248          *
 249          * Note that AMs that do not necessarily update indexes when indexed
 250          * columns do not change, need to return the current/correct version of a
 251          * tuple as appropriate, even if the tid points to an older version of the
 252          * tuple.
 253          *
 254          * *call_again is false on the first call to index_fetch_tuple for a tid.
 255          * If there potentially is another tuple matching the tid, *call_again
 256          * needs be set to true by index_fetch_tuple, signalling to the caller
 257          * that index_fetch_tuple should be called again for the same tid.
 258          *
 259          * *all_dead, if all_dead is not NULL, should be set to true if by
 260          * index_fetch_tuple iff it is guaranteed that no backend needs to see
 261          * that tuple. Index AMs can use that do avoid returning that tid in
 262          * future searches.
 263          */
 264         bool            (*index_fetch_tuple) (struct IndexFetchTableData *scan,
 265                                                                           ItemPointer tid,
 266                                                                           Snapshot snapshot,
 267                                                                           TupleTableSlot *slot,
 268                                                                           bool *call_again, bool *all_dead);
 269
 270         /* ------------------------------------------------------------------------
 271          * Callbacks for non-modifying operations on individual tuples
 272          * ------------------------------------------------------------------------
 273          */
 274
 275
 276         /*
 277          * Fetch tuple at `tid` into `slot, after doing a visibility test
 278          * according to `snapshot`. If a tuple was found and passed the visibility
 279          * test, returns true, false otherwise.
 280          */
 281         bool            (*tuple_fetch_row_version) (Relation rel,
 282                                                                                         ItemPointer tid,
 283                                                                                         Snapshot snapshot,
 284                                                                                         TupleTableSlot *slot);
 285
 286         /*
 287          * Return the latest version of the tuple at `tid`, by updating `tid` to
 288          * point at the newest version.
 289          */
 290         void            (*tuple_get_latest_tid) (Relation rel,
 291                                                                                  Snapshot snapshot,
 292                                                                                  ItemPointer tid);
 293
 294         /*
 295          * Does the tuple in `slot` satisfy `snapshot`?  The slot needs to be of
 296          * the appropriate type for the AM.
 297          */
 298         bool            (*tuple_satisfies_snapshot) (Relation rel,
 299                                                                                          TupleTableSlot *slot,
 300                                                                                          Snapshot snapshot);
 301
 302         /* see table_compute_xid_horizon_for_tuples() */
 303         TransactionId (*compute_xid_horizon_for_tuples) (Relation rel,
 304                                                                                                          ItemPointerData *items,
 305                                                                                                          int nitems);
 306
 307
 308         /* ------------------------------------------------------------------------
 309          * Manipulations of physical tuples.
 310          * ------------------------------------------------------------------------
 311          */
 312
 313         /* see table_insert() for reference about parameters */
 314         void            (*tuple_insert) (Relation rel, TupleTableSlot *slot, CommandId cid,
 315                                                                  int options, struct BulkInsertStateData *bistate);
 316
 317         /* see table_insert() for reference about parameters */
 318         void            (*tuple_insert_speculative) (Relation rel,
 319                                                                                          TupleTableSlot *slot,
 320                                                                                          CommandId cid,
 321                                                                                          int options,
 322                                                                                          struct BulkInsertStateData *bistate,
 323                                                                                          uint32 specToken);
 324
 325         /* see table_insert() for reference about parameters */
 326         void            (*tuple_complete_speculative) (Relation rel,
 327                                                                                            TupleTableSlot *slot,
 328                                                                                            uint32 specToken,
 329                                                                                            bool succeeded);
 330
 331         /* see table_insert() for reference about parameters */
 332         TM_Result       (*tuple_delete) (Relation rel,
 333                                                                  ItemPointer tid,
 334                                                                  CommandId cid,
 335                                                                  Snapshot snapshot,
 336                                                                  Snapshot crosscheck,
 337                                                                  bool wait,
 338                                                                  TM_FailureData *tmfd,
 339                                                                  bool changingPart);
 340
 341         /* see table_insert() for reference about parameters */
 342         TM_Result       (*tuple_update) (Relation rel,
 343                                                                  ItemPointer otid,
 344                                                                  TupleTableSlot *slot,
 345                                                                  CommandId cid,
 346                                                                  Snapshot snapshot,
 347                                                                  Snapshot crosscheck,
 348                                                                  bool wait,
 349                                                                  TM_FailureData *tmfd,
 350                                                                  LockTupleMode *lockmode,
 351                                                                  bool *update_indexes);
 352
 353         /* see table_insert() for reference about parameters */
 354         TM_Result       (*tuple_lock) (Relation rel,
 355                                                            ItemPointer tid,
 356                                                            Snapshot snapshot,
 357                                                            TupleTableSlot *slot,
 358                                                            CommandId cid,
 359                                                            LockTupleMode mode,
 360                                                            LockWaitPolicy wait_policy,
 361                                                            uint8 flags,
 362                                                            TM_FailureData *tmfd);
 363
 364 } TableAmRoutine;
 365
 366
 367 /* ----------------------------------------------------------------------------
 368  * Slot functions.
 369  * ----------------------------------------------------------------------------
 370  */
 371
 372 /*
 373  * Returns slot callbacks suitable for holding tuples of the appropriate type
 374  * for the relation.  Works for tables, views, foreign tables and partitioned
 375  * tables.
 376  */
 377 extern const TupleTableSlotOps *table_slot_callbacks(Relation rel);
 378
 379 /*
 380  * Returns slot using the callbacks returned by table_slot_callbacks(), and
 381  * registers it on *reglist.
 382  */
 383 extern TupleTableSlot *table_slot_create(Relation rel, List **reglist);
 384
 385
 386 /* ----------------------------------------------------------------------------
 387  * Table scan functions.
 388  * ----------------------------------------------------------------------------
 389  */
 390
 391 /*
 392  * Start a scan of `rel`. Returned tuples pass a visibility test of
 393  * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
 394  */
 395 static inline TableScanDesc
 396 table_beginscan(Relation rel, Snapshot snapshot,
 397                                 int nkeys, struct ScanKeyData *key)
 398 {
 399         return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL,
 400                                                                            true, true, true, false, false, false);
 401 }
 402
 403 /*
 404  * Like table_beginscan(), but for scanning catalog. It'll automatically use a
 405  * snapshot appropriate for scanning catalog relations.
 406  */
 407 extern TableScanDesc table_beginscan_catalog(Relation rel, int nkeys,
 408                                                 struct ScanKeyData *key);
 409
 410 /*
 411  * Like table_beginscan(), but table_beginscan_strat() offers an extended API
 412  * that lets the caller control whether a nondefault buffer access strategy
 413  * can be used, and whether syncscan can be chosen (possibly resulting in the
 414  * scan not starting from block zero).  Both of these default to true with
 415  * plain table_beginscan.
 416  */
 417 static inline TableScanDesc
 418 table_beginscan_strat(Relation rel, Snapshot snapshot,
 419                                           int nkeys, struct ScanKeyData *key,
 420                                           bool allow_strat, bool allow_sync)
 421 {
 422         return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL,
 423                                                                            allow_strat, allow_sync, true,
 424                                                                            false, false, false);
 425 }
 426
 427
 428 /*
 429  * table_beginscan_bm is an alternative entry point for setting up a
 430  * TableScanDesc for a bitmap heap scan.  Although that scan technology is
 431  * really quite unlike a standard seqscan, there is just enough commonality to
 432  * make it worth using the same data structure.
 433  */
 434 static inline TableScanDesc
 435 table_beginscan_bm(Relation rel, Snapshot snapshot,
 436                                    int nkeys, struct ScanKeyData *key)
 437 {
 438         return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL,
 439                                                                            false, false, true, true, false, false);
 440 }
 441
 442 /*
 443  * table_beginscan_sampling is an alternative entry point for setting up a
 444  * TableScanDesc for a TABLESAMPLE scan.  As with bitmap scans, it's worth
 445  * using the same data structure although the behavior is rather different.
 446  * In addition to the options offered by table_beginscan_strat, this call
 447  * also allows control of whether page-mode visibility checking is used.
 448  */
 449 static inline TableScanDesc
 450 table_beginscan_sampling(Relation rel, Snapshot snapshot,
 451                                                  int nkeys, struct ScanKeyData *key,
 452                                                  bool allow_strat, bool allow_sync, bool allow_pagemode)
 453 {
 454         return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL,
 455                                                                            allow_strat, allow_sync, allow_pagemode,
 456                                                                            false, true, false);
 457 }
 458
 459 /*
 460  * table_beginscan_analyze is an alternative entry point for setting up a
 461  * TableScanDesc for an ANALYZE scan.  As with bitmap scans, it's worth using
 462  * the same data structure although the behavior is rather different.
 463  */
 464 static inline TableScanDesc
 465 table_beginscan_analyze(Relation rel)
 466 {
 467         return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL,
 468                                                                            true, false, true,
 469                                                                            false, true, false);
 470 }
 471
 472 /*
 473  * End relation scan.
 474  */
 475 static inline void
 476 table_endscan(TableScanDesc scan)
 477 {
 478         scan->rs_rd->rd_tableam->scan_end(scan);
 479 }
 480
 481
 482 /*
 483  * Restart a relation scan.
 484  */
 485 static inline void
 486 table_rescan(TableScanDesc scan,
 487                          struct ScanKeyData *key)
 488 {
 489         scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
 490 }
 491
 492 /*
 493  * Restart a relation scan after changing params.
 494  *
 495  * This call allows changing the buffer strategy, syncscan, and pagemode
 496  * options before starting a fresh scan.  Note that although the actual use of
 497  * syncscan might change (effectively, enabling or disabling reporting), the
 498  * previously selected startblock will be kept.
 499  */
 500 static inline void
 501 table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key,
 502                                                 bool allow_strat, bool allow_sync, bool allow_pagemode)
 503 {
 504         scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
 505                                                                                  allow_strat, allow_sync,
 506                                                                                  allow_pagemode);
 507 }
 508
 509 /*
 510  * Update snapshot used by the scan.
 511  */
 512 extern void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot);
 513
 514
 515 /*
 516  * Return next tuple from `scan`, store in slot.
 517  */
 518 static inline bool
 519 table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 520 {
 521         slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
 522         return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
 523 }
 524
 525
 526 /* ----------------------------------------------------------------------------
 527  * Parallel table scan related functions.
 528  * ----------------------------------------------------------------------------
 529  */
 530
 531 /*
 532  * Estimate the size of shared memory needed for a parallel scan of this
 533  * relation.
 534  */
 535 extern Size table_parallelscan_estimate(Relation rel, Snapshot snapshot);
 536
 537 /*
 538  * Initialize ParallelTableScanDesc for a parallel scan of this
 539  * relation. `pscan` needs to be sized according to parallelscan_estimate()
 540  * for the same relation.  Call this just once in the leader process; then,
 541  * individual workers attach via table_beginscan_parallel.
 542  */
 543 extern void table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, Snapshot snapshot);
 544
 545 /*
 546  * Begin a parallel scan. `pscan` needs to have been initialized with
 547  * table_parallelscan_initialize(), for the same relation. The initialization
 548  * does not need to have happened in this backend.
 549  *
 550  * Caller must hold a suitable lock on the correct relation.
 551  */
 552 extern TableScanDesc table_beginscan_parallel(Relation rel, ParallelTableScanDesc pscan);
 553
 554 /*
 555  * Restart a parallel scan.  Call this in the leader process.  Caller is
 556  * responsible for making sure that all workers have finished the scan
 557  * beforehand.
 558  */
 559 static inline void
 560 table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
 561 {
 562         rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
 563 }
 564
 565
 566 /* ----------------------------------------------------------------------------
 567  *  Index scan related functions.
 568  * ----------------------------------------------------------------------------
 569  */
 570
 571 /*
 572  * Prepare to fetch tuples from the relation, as needed when fetching tuples
 573  * for an index scan.
 574  *
 575  * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
 576  */
 577 static inline IndexFetchTableData *
 578 table_index_fetch_begin(Relation rel)
 579 {
 580         return rel->rd_tableam->index_fetch_begin(rel);
 581 }
 582
 583 /*
 584  * Reset index fetch. Typically this will release cross index fetch resources
 585  * held in IndexFetchTableData.
 586  */
 587 static inline void
 588 table_index_fetch_reset(struct IndexFetchTableData *scan)
 589 {
 590         scan->rel->rd_tableam->index_fetch_reset(scan);
 591 }
 592
 593 /*
 594  * Release resources and deallocate index fetch.
 595  */
 596 static inline void
 597 table_index_fetch_end(struct IndexFetchTableData *scan)
 598 {
 599         scan->rel->rd_tableam->index_fetch_end(scan);
 600 }
 601
 602 /*
 603  * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
 604  * a visibility test according to `snapshot`. If a tuple was found and passed
 605  * the visibility test, returns true, false otherwise.
 606  *
 607  * *call_again needs to be false on the first call to table_index_fetch_tuple() for
 608  * a tid. If there potentially is another tuple matching the tid, *call_again
 609  * will be set to true, signalling that table_index_fetch_tuple() should be called
 610  * again for the same tid.
 611  *
 612  * *all_dead, if all_dead is not NULL, will be set to true by
 613  * table_index_fetch_tuple() iff it is guaranteed that no backend needs to see
 614  * that tuple. Index AMs can use that do avoid returning that tid in future
 615  * searches.
 616  *
 617  * The difference between this function and table_fetch_row_version is that
 618  * this function returns the currently visible version of a row if the AM
 619  * supports storing multiple row versions reachable via a single index entry
 620  * (like heap's HOT). Whereas table_fetch_row_version only evaluates the the
 621  * tuple exactly at `tid`. Outside of index entry ->table tuple lookups,
 622  * table_fetch_row_version is what's usually needed.
 623  */
 624 static inline bool
 625 table_index_fetch_tuple(struct IndexFetchTableData *scan,
 626                                                 ItemPointer tid,
 627                                                 Snapshot snapshot,
 628                                                 TupleTableSlot *slot,
 629                                                 bool *call_again, bool *all_dead)
 630 {
 631
 632         return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
 633                                                                                                         slot, call_again,
 634                                                                                                         all_dead);
 635 }
 636
 637 /*
 638  * This is a convenience wrapper around table_index_fetch_tuple() which
 639  * returns whether there are table tuple items corresponding to an index
 640  * entry.  This likely is only useful to verify if there's a conflict in a
 641  * unique index.
 642  */
 643 extern bool table_index_fetch_tuple_check(Relation rel,
 644                                                           ItemPointer tid,
 645                                                           Snapshot snapshot,
 646                                                           bool *all_dead);
 647
 648
 649 /* ------------------------------------------------------------------------
 650  * Functions for non-modifying operations on individual tuples
 651  * ------------------------------------------------------------------------
 652  */
 653
 654
 655 /*
 656  * Fetch tuple at `tid` into `slot, after doing a visibility test according to
 657  * `snapshot`. If a tuple was found and passed the visibility test, returns
 658  * true, false otherwise.
 659  *
 660  * See table_index_fetch_tuple's comment about what the difference between
 661  * these functions is. This function is the correct to use outside of
 662  * index entry->table tuple lookups.
 663  */
 664 static inline bool
 665 table_fetch_row_version(Relation rel,
 666                                                 ItemPointer tid,
 667                                                 Snapshot snapshot,
 668                                                 TupleTableSlot *slot)
 669 {
 670         return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
 671 }
 672
 673 /*
 674  * Return the latest version of the tuple at `tid`, by updating `tid` to
 675  * point at the newest version.
 676  */
 677 static inline void
 678 table_get_latest_tid(Relation rel, Snapshot snapshot, ItemPointer tid)
 679 {
 680         rel->rd_tableam->tuple_get_latest_tid(rel, snapshot, tid);
 681 }
 682
 683 /*
 684  * Return true iff tuple in slot satisfies the snapshot.
 685  *
 686  * This assumes the slot's tuple is valid, and of the appropriate type for the
 687  * AM.
 688  *
 689  * Some AMs might modify the data underlying the tuple as a side-effect. If so
 690  * they ought to mark the relevant buffer dirty.
 691  */
 692 static inline bool
 693 table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
 694 {
 695         return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
 696 }
 697
 698 /*
 699  * Compute the newest xid among the tuples pointed to by items. This is used
 700  * to compute what snapshots to conflict with when replaying WAL records for
 701  * page-level index vacuums.
 702  */
 703 static inline TransactionId
 704 table_compute_xid_horizon_for_tuples(Relation rel,
 705                                                                          ItemPointerData *items,
 706                                                                          int nitems)
 707 {
 708         return rel->rd_tableam->compute_xid_horizon_for_tuples(rel, items, nitems);
 709 }
 710
 711
 712 /* ----------------------------------------------------------------------------
 713  *  Functions for manipulations of physical tuples.
 714  * ----------------------------------------------------------------------------
 715  */
 716
 717 /*
 718  * Insert a tuple from a slot into table AM routine.
 719  *
 720  * The options bitmask allows to specify options that allow to change the
 721  * behaviour of the AM. Several options might be ignored by AMs not supporting
 722  * them.
 723  *
 724  * If the TABLE_INSERT_SKIP_WAL option is specified, the new tuple will not
 725  * necessarily logged to WAL, even for a non-temp relation. It is the AMs
 726  * choice whether this optimization is supported.
 727  *
 728  * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
 729  * free space in the relation. This can save some cycles when we know the
 730  * relation is new and doesn't contain useful amounts of free space.  It's
 731  * commonly passed directly to RelationGetBufferForTuple, see for more info.
 732  *
 733  * TABLE_INSERT_FROZEN should only be specified for inserts into
 734  * relfilenodes created during the current subtransaction and when
 735  * there are no prior snapshots or pre-existing portals open.
 736  * This causes rows to be frozen, which is an MVCC violation and
 737  * requires explicit options chosen by user.
 738  *
 739  * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
 740  * information for the tuple. This should solely be used during table rewrites
 741  * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
 742  * relation.
 743  *
 744  * Note that most of these options will be applied when inserting into the
 745  * heap's TOAST table, too, if the tuple requires any out-of-line data
 746  *
 747  *
 748  * The BulkInsertState object (if any; bistate can be NULL for default
 749  * behavior) is also just passed through to RelationGetBufferForTuple.
 750  *
 751  * On return the slot's tts_tid and tts_tableOid are updated to reflect the
 752  * insertion. But note that any toasting of fields within the slot is NOT
 753  * reflected in the slots contents.
 754  */
 755 static inline void
 756 table_insert(Relation rel, TupleTableSlot *slot, CommandId cid,
 757                          int options, struct BulkInsertStateData *bistate)
 758 {
 759         rel->rd_tableam->tuple_insert(rel, slot, cid, options,
 760                                                                   bistate);
 761 }
 762
 763 /*
 764  * Perform a "speculative insertion". These can be backed out afterwards
 765  * without aborting the whole transaction.  Other sessions can wait for the
 766  * speculative insertion to be confirmed, turning it into a regular tuple, or
 767  * aborted, as if it never existed.  Speculatively inserted tuples behave as
 768  * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
 769  *
 770  * A transaction having performed a speculative insertion has to either abort,
 771  * or finish the speculative insertion with
 772  * table_complete_speculative(succeeded = ...).
 773  */
 774 static inline void
 775 table_insert_speculative(Relation rel, TupleTableSlot *slot, CommandId cid,
 776                                                  int options, struct BulkInsertStateData *bistate, uint32 specToken)
 777 {
 778         rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
 779                                                                                           bistate, specToken);
 780 }
 781
 782 /*
 783  * Complete "speculative insertion" started in the same transaction. If
 784  * succeeded is true, the tuple is fully inserted, if false, it's removed.
 785  */
 786 static inline void
 787 table_complete_speculative(Relation rel, TupleTableSlot *slot, uint32 specToken,
 788                                                    bool succeeded)
 789 {
 790         rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
 791                                                                                                 succeeded);
 792 }
 793
 794 /*
 795  * Delete a tuple.
 796  *
 797  * NB: do not call this directly unless prepared to deal with
 798  * concurrent-update conditions.  Use simple_table_delete instead.
 799  *
 800  * Input parameters:
 801  *      relation - table to be modified (caller must hold suitable lock)
 802  *      tid - TID of tuple to be deleted
 803  *      cid - delete command ID (used for visibility test, and stored into
 804  *              cmax if successful)
 805  *      crosscheck - if not InvalidSnapshot, also check tuple against this
 806  *      wait - true if should wait for any conflicting update to commit/abort
 807  * Output parameters:
 808  *      tmfd - filled in failure cases (see below)
 809  *      changingPart - true iff the tuple is being moved to another partition
 810  *              table due to an update of the partition key. Otherwise, false.
 811  *
 812  * Normal, successful return value is TM_Ok, which
 813  * actually means we did delete it.  Failure return codes are
 814  * TM_SelfModified, TM_Updated, or TM_BeingModified
 815  * (the last only possible if wait == false).
 816  *
 817  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
 818  * t_xmax, and, if possible, and, if possible, t_cmax.  See comments for
 819  * struct TM_FailureData for additional info.
 820  */
 821 static inline TM_Result
 822 table_delete(Relation rel, ItemPointer tid, CommandId cid,
 823                          Snapshot snapshot, Snapshot crosscheck, bool wait,
 824                          TM_FailureData *tmfd, bool changingPart)
 825 {
 826         return rel->rd_tableam->tuple_delete(rel, tid, cid,
 827                                                                                  snapshot, crosscheck,
 828                                                                                  wait, tmfd, changingPart);
 829 }
 830
 831 /*
 832  * Update a tuple.
 833  *
 834  * NB: do not call this directly unless you are prepared to deal with
 835  * concurrent-update conditions.  Use simple_table_update instead.
 836  *
 837  * Input parameters:
 838  *      relation - table to be modified (caller must hold suitable lock)
 839  *      otid - TID of old tuple to be replaced
 840  *      newtup - newly constructed tuple data to store
 841  *      cid - update command ID (used for visibility test, and stored into
 842  *              cmax/cmin if successful)
 843  *      crosscheck - if not InvalidSnapshot, also check old tuple against this
 844  *      wait - true if should wait for any conflicting update to commit/abort
 845  * Output parameters:
 846  *      tmfd - filled in failure cases (see below)
 847  *      lockmode - filled with lock mode acquired on tuple
 848  *  update_indexes - in success cases this is set to true if new index entries
 849  *              are required for this tuple
 850  *
 851  * Normal, successful return value is TM_Ok, which
 852  * actually means we *did* update it.  Failure return codes are
 853  * TM_SelfModified, TM_Updated, or TM_BeingModified
 854  * (the last only possible if wait == false).
 855  *
 856  * On success, the header fields of *newtup are updated to match the new
 857  * stored tuple; in particular, newtup->t_self is set to the TID where the
 858  * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
 859  * update was done.  However, any TOAST changes in the new tuple's
 860  * data are not reflected into *newtup.
 861  *
 862  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
 863  * t_xmax, and, if possible, t_cmax.  See comments for struct TM_FailureData
 864  * for additional info.
 865  */
 866 static inline TM_Result
 867 table_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
 868                          CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait,
 869                          TM_FailureData *tmfd, LockTupleMode *lockmode,
 870                          bool *update_indexes)
 871 {
 872         return rel->rd_tableam->tuple_update(rel, otid, slot,
 873                                                                                  cid, snapshot, crosscheck,
 874                                                                                  wait, tmfd,
 875                                                                                  lockmode, update_indexes);
 876 }
 877
 878 /*
 879  * Lock a tuple in the specified mode.
 880  *
 881  * Input parameters:
 882  *      relation: relation containing tuple (caller must hold suitable lock)
 883  *      tid: TID of tuple to lock
 884  *      snapshot: snapshot to use for visibility determinations
 885  *      cid: current command ID (used for visibility test, and stored into
 886  *              tuple's cmax if lock is successful)
 887  *      mode: lock mode desired
 888  *      wait_policy: what to do if tuple lock is not available
 889  *      flags:
 890  *              If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
 891  *              also lock descendant tuples if lock modes don't conflict.
 892  *              If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, update chain and lock lastest
 893  *              version.
 894  *
 895  * Output parameters:
 896  *      *slot: contains the target tuple
 897  *      *tmfd: filled in failure cases (see below)
 898  *
 899  * Function result may be:
 900  *      TM_Ok: lock was successfully acquired
 901  *      TM_Invisible: lock failed because tuple was never visible to us
 902  *      TM_SelfModified: lock failed because tuple updated by self
 903  *      TM_Updated: lock failed because tuple updated by other xact
 904  *      TM_Deleted: lock failed because tuple deleted by other xact
 905  *      TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
 906  *
 907  * In the failure cases other than TM_Invisible, the routine fills *tmfd with
 908  * the tuple's t_ctid, t_xmax, and, if possible, t_cmax.  See comments for
 909  * struct TM_FailureData for additional info.
 910  */
 911 static inline TM_Result
 912 table_lock_tuple(Relation rel, ItemPointer tid, Snapshot snapshot,
 913                                  TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
 914                                  LockWaitPolicy wait_policy, uint8 flags,
 915                                  TM_FailureData *tmfd)
 916 {
 917         return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
 918                                                                            cid, mode, wait_policy,
 919                                                                            flags, tmfd);
 920 }
 921
 922
 923 /* ----------------------------------------------------------------------------
 924  * Functions to make modifications a bit simpler.
 925  * ----------------------------------------------------------------------------
 926  */
 927
 928 extern void simple_table_insert(Relation rel, TupleTableSlot *slot);
 929 extern void simple_table_delete(Relation rel, ItemPointer tid,
 930                                         Snapshot snapshot);
 931 extern void simple_table_update(Relation rel, ItemPointer otid,
 932                                         TupleTableSlot *slot, Snapshot snapshot,
 933                                         bool *update_indexes);
 934
 935
 936 /* ----------------------------------------------------------------------------
 937  * Helper functions to implement parallel scans for block oriented AMs.
 938  * ----------------------------------------------------------------------------
 939  */
 940
 941 extern Size table_block_parallelscan_estimate(Relation rel);
 942 extern Size table_block_parallelscan_initialize(Relation rel,
 943                                                                         ParallelTableScanDesc pscan);
 944 extern void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan);
 945 extern BlockNumber table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanDesc pbscan);
 946 extern void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanDesc pbscan);
 947
 948
 949 /* ----------------------------------------------------------------------------
 950  * Functions in tableamapi.c
 951  * ----------------------------------------------------------------------------
 952  */
 953
 954 extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
 955 extern const TableAmRoutine *GetTableAmRoutineByAmId(Oid amoid);
 956 extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
 957 extern bool check_default_table_access_method(char **newval, void **extra,
 958                                                                   GucSource source);
 959
 960 #endif                                                  /* TABLEAM_H */