granicus.if.org Git - postgresql/blob - src/include/storage/predicate_internals.h

   1 /*-------------------------------------------------------------------------
   2  *
   3  * predicate_internals.h
   4  *        POSTGRES internal predicate locking definitions.
   5  *
   6  *
   7  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
   8  * Portions Copyright (c) 1994, Regents of the University of California
   9  *
  10  * src/include/storage/predicate_internals.h
  11  *
  12  *-------------------------------------------------------------------------
  13  */
  14 #ifndef PREDICATE_INTERNALS_H
  15 #define PREDICATE_INTERNALS_H
  16
  17 #include "storage/lock.h"
  18
  19 /*
  20  * Commit number.
  21  */
  22 typedef uint64 SerCommitSeqNo;
  23
  24 /*
  25  * Reserved commit sequence numbers:
  26  *      - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
  27  *        used as a SerCommitSeqNo, even an invalid one
  28  *      - InvalidSerCommitSeqNo is used to indicate a transaction that
  29  *        hasn't committed yet, so use a number greater than all valid
  30  *        ones to make comparison do the expected thing
  31  *      - RecoverySerCommitSeqNo is used to refer to transactions that
  32  *        happened before a crash/recovery, since we restart the sequence
  33  *        at that point.  It's earlier than all normal sequence numbers,
  34  *        and is only used by recovered prepared transactions
  35  */
  36 #define InvalidSerCommitSeqNo           ((SerCommitSeqNo) UINT64CONST(0xFFFFFFFFFFFFFFFF))
  37 #define RecoverySerCommitSeqNo          ((SerCommitSeqNo) 1)
  38 #define FirstNormalSerCommitSeqNo       ((SerCommitSeqNo) 2)
  39
  40 /*
  41  * The SERIALIZABLEXACT struct contains information needed for each
  42  * serializable database transaction to support SSI techniques.
  43  *
  44  * A home-grown list is maintained in shared memory to manage these.
  45  * An entry is used when the serializable transaction acquires a snapshot.
  46  * Unless the transaction is rolled back, this entry must generally remain
  47  * until all concurrent transactions have completed.  (There are special
  48  * optimizations for READ ONLY transactions which often allow them to be
  49  * cleaned up earlier.)  A transaction which is rolled back is cleaned up
  50  * as soon as possible.
  51  *
  52  * Eligibility for cleanup of committed transactions is generally determined
  53  * by comparing the transaction's finishedBefore field to
  54  * SerializableGlobalXmin.
  55  */
  56 typedef struct SERIALIZABLEXACT
  57 {
  58         VirtualTransactionId vxid;      /* The executing process always has one of
  59                                                                  * these. */
  60
  61         /*
  62          * We use two numbers to track the order that transactions commit. Before
  63          * commit, a transaction is marked as prepared, and prepareSeqNo is set.
  64          * Shortly after commit, it's marked as committed, and commitSeqNo is set.
  65          * This doesn't give a strict commit order, but these two values together
  66          * are good enough for us, as we can always err on the safe side and
  67          * assume that there's a conflict, if we can't be sure of the exact
  68          * ordering of two commits.
  69          *
  70          * Note that a transaction is marked as prepared for a short period during
  71          * commit processing, even if two-phase commit is not used. But with
  72          * two-phase commit, a transaction can stay in prepared state for some
  73          * time.
  74          */
  75         SerCommitSeqNo prepareSeqNo;
  76         SerCommitSeqNo commitSeqNo;
  77
  78         /* these values are not both interesting at the same time */
  79         union
  80         {
  81                 SerCommitSeqNo earliestOutConflictCommit;               /* when committed with
  82                                                                                                                  * conflict out */
  83                 SerCommitSeqNo lastCommitBeforeSnapshot;                /* when not committed or
  84                                                                                                                  * no conflict out */
  85         }                       SeqNo;
  86         SHM_QUEUE       outConflicts;   /* list of write transactions whose data we
  87                                                                  * couldn't read. */
  88         SHM_QUEUE       inConflicts;    /* list of read transactions which couldn't
  89                                                                  * see our write. */
  90         SHM_QUEUE       predicateLocks; /* list of associated PREDICATELOCK objects */
  91         SHM_QUEUE       finishedLink;   /* list link in
  92                                                                  * FinishedSerializableTransactions */
  93
  94         /*
  95          * for r/o transactions: list of concurrent r/w transactions that we could
  96          * potentially have conflicts with, and vice versa for r/w transactions
  97          */
  98         SHM_QUEUE       possibleUnsafeConflicts;
  99
 100         TransactionId topXid;           /* top level xid for the transaction, if one
 101                                                                  * exists; else invalid */
 102         TransactionId finishedBefore;           /* invalid means still running; else
 103                                                                                  * the struct expires when no
 104                                                                                  * serializable xids are before this. */
 105         TransactionId xmin;                     /* the transaction's snapshot xmin */
 106         uint32          flags;                  /* OR'd combination of values defined below */
 107         int                     pid;                    /* pid of associated process */
 108 } SERIALIZABLEXACT;
 109
 110 #define SXACT_FLAG_COMMITTED                    0x00000001              /* already committed */
 111 #define SXACT_FLAG_PREPARED                             0x00000002              /* about to commit */
 112 #define SXACT_FLAG_ROLLED_BACK                  0x00000004              /* already rolled back */
 113 #define SXACT_FLAG_DOOMED                               0x00000008              /* will roll back */
 114 /*
 115  * The following flag actually means that the flagged transaction has a
 116  * conflict out *to a transaction which committed ahead of it*.  It's hard
 117  * to get that into a name of a reasonable length.
 118  */
 119 #define SXACT_FLAG_CONFLICT_OUT                 0x00000010
 120 #define SXACT_FLAG_READ_ONLY                    0x00000020
 121 #define SXACT_FLAG_DEFERRABLE_WAITING   0x00000040
 122 #define SXACT_FLAG_RO_SAFE                              0x00000080
 123 #define SXACT_FLAG_RO_UNSAFE                    0x00000100
 124 #define SXACT_FLAG_SUMMARY_CONFLICT_IN  0x00000200
 125 #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
 126
 127 /*
 128  * The following types are used to provide an ad hoc list for holding
 129  * SERIALIZABLEXACT objects.  An HTAB is overkill, since there is no need to
 130  * access these by key -- there are direct pointers to these objects where
 131  * needed.      If a shared memory list is created, these types can probably be
 132  * eliminated in favor of using the general solution.
 133  */
 134 typedef struct PredXactListElementData
 135 {
 136         SHM_QUEUE       link;
 137         SERIALIZABLEXACT sxact;
 138 }       PredXactListElementData;
 139
 140 typedef struct PredXactListElementData *PredXactListElement;
 141
 142 #define PredXactListElementDataSize \
 143                 ((Size)MAXALIGN(sizeof(PredXactListElementData)))
 144
 145 typedef struct PredXactListData
 146 {
 147         SHM_QUEUE       availableList;
 148         SHM_QUEUE       activeList;
 149
 150         /*
 151          * These global variables are maintained when registering and cleaning up
 152          * serializable transactions.  They must be global across all backends,
 153          * but are not needed outside the predicate.c source file. Protected by
 154          * SerializableXactHashLock.
 155          */
 156         TransactionId SxactGlobalXmin;          /* global xmin for active serializable
 157                                                                                  * transactions */
 158         int                     SxactGlobalXminCount;   /* how many active serializable
 159                                                                                  * transactions have this xmin */
 160         int                     WritableSxactCount;             /* how many non-read-only serializable
 161                                                                                  * transactions are active */
 162         SerCommitSeqNo LastSxactCommitSeqNo;            /* a strictly monotonically
 163                                                                                                  * increasing number for
 164                                                                                                  * commits of serializable
 165                                                                                                  * transactions */
 166         /* Protected by SerializableXactHashLock. */
 167         SerCommitSeqNo CanPartialClearThrough;          /* can clear predicate locks
 168                                                                                                  * and inConflicts for
 169                                                                                                  * committed transactions
 170                                                                                                  * through this seq no */
 171         /* Protected by SerializableFinishedListLock. */
 172         SerCommitSeqNo HavePartialClearedThrough;       /* have cleared through this
 173                                                                                                  * seq no */
 174         SERIALIZABLEXACT *OldCommittedSxact;            /* shared copy of dummy sxact */
 175
 176         PredXactListElement element;
 177 }       PredXactListData;
 178
 179 typedef struct PredXactListData *PredXactList;
 180
 181 #define PredXactListDataSize \
 182                 ((Size)MAXALIGN(sizeof(PredXactListData)))
 183
 184
 185 /*
 186  * The following types are used to provide lists of rw-conflicts between
 187  * pairs of transactions.  Since exactly the same information is needed,
 188  * they are also used to record possible unsafe transaction relationships
 189  * for purposes of identifying safe snapshots for read-only transactions.
 190  *
 191  * When a RWConflictData is not in use to record either type of relationship
 192  * between a pair of transactions, it is kept on an "available" list.  The
 193  * outLink field is used for maintaining that list.
 194  */
 195 typedef struct RWConflictData
 196 {
 197         SHM_QUEUE       outLink;                /* link for list of conflicts out from a sxact */
 198         SHM_QUEUE       inLink;                 /* link for list of conflicts in to a sxact */
 199         SERIALIZABLEXACT *sxactOut;
 200         SERIALIZABLEXACT *sxactIn;
 201 }       RWConflictData;
 202
 203 typedef struct RWConflictData *RWConflict;
 204
 205 #define RWConflictDataSize \
 206                 ((Size)MAXALIGN(sizeof(RWConflictData)))
 207
 208 typedef struct RWConflictPoolHeaderData
 209 {
 210         SHM_QUEUE       availableList;
 211         RWConflict      element;
 212 }       RWConflictPoolHeaderData;
 213
 214 typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader;
 215
 216 #define RWConflictPoolHeaderDataSize \
 217                 ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
 218
 219
 220 /*
 221  * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
 222  * transaction or any of its subtransactions.
 223  */
 224 typedef struct SERIALIZABLEXIDTAG
 225 {
 226         TransactionId xid;
 227 } SERIALIZABLEXIDTAG;
 228
 229 /*
 230  * The SERIALIZABLEXID struct provides a link from a TransactionId for a
 231  * serializable transaction to the related SERIALIZABLEXACT record, even if
 232  * the transaction has completed and its connection has been closed.
 233  *
 234  * These are created as new top level transaction IDs are first assigned to
 235  * transactions which are participating in predicate locking.  This may
 236  * never happen for a particular transaction if it doesn't write anything.
 237  * They are removed with their related serializable transaction objects.
 238  *
 239  * The SubTransGetTopmostTransaction method is used where necessary to get
 240  * from an XID which might be from a subtransaction to the top level XID.
 241  */
 242 typedef struct SERIALIZABLEXID
 243 {
 244         /* hash key */
 245         SERIALIZABLEXIDTAG tag;
 246
 247         /* data */
 248         SERIALIZABLEXACT *myXact;       /* pointer to the top level transaction data */
 249 } SERIALIZABLEXID;
 250
 251
 252 /*
 253  * The PREDICATELOCKTARGETTAG struct identifies a database object which can
 254  * be the target of predicate locks.
 255  *
 256  * Note that the hash function being used doesn't properly respect tag
 257  * length -- it will go to a four byte boundary past the end of the tag.
 258  * If you change this struct, make sure any slack space is initialized,
 259  * so that any random bytes in the middle or at the end are not included
 260  * in the hash.
 261  *
 262  * TODO SSI: If we always use the same fields for the same type of value, we
 263  * should rename these.  Holding off until it's clear there are no exceptions.
 264  * Since indexes are relations with blocks and tuples, it's looking likely that
 265  * the rename will be possible.  If not, we may need to divide the last field
 266  * and use part of it for a target type, so that we know how to interpret the
 267  * data..
 268  */
 269 typedef struct PREDICATELOCKTARGETTAG
 270 {
 271         uint32          locktag_field1; /* a 32-bit ID field */
 272         uint32          locktag_field2; /* a 32-bit ID field */
 273         uint32          locktag_field3; /* a 32-bit ID field */
 274         uint32          locktag_field4; /* a 32-bit ID field */
 275         uint32          locktag_field5; /* a 32-bit ID field */
 276 } PREDICATELOCKTARGETTAG;
 277
 278 /*
 279  * The PREDICATELOCKTARGET struct represents a database object on which there
 280  * are predicate locks.
 281  *
 282  * A hash list of these objects is maintained in shared memory.  An entry is
 283  * added when a predicate lock is requested on an object which doesn't
 284  * already have one.  An entry is removed when the last lock is removed from
 285  * its list.
 286  *
 287  * Because a particular target might become obsolete, due to update to a new
 288  * version, before the reading transaction is obsolete, we need some way to
 289  * prevent errors from reuse of a tuple ID.  Rather than attempting to clean
 290  * up the targets as the related tuples are pruned or vacuumed, we check the
 291  * xmin on access.      This should be far less costly.
 292  */
 293 typedef struct PREDICATELOCKTARGET
 294 {
 295         /* hash key */
 296         PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
 297
 298         /* data */
 299         SHM_QUEUE       predicateLocks; /* list of PREDICATELOCK objects assoc. with
 300                                                                  * predicate lock target */
 301 } PREDICATELOCKTARGET;
 302
 303
 304 /*
 305  * The PREDICATELOCKTAG struct identifies an individual predicate lock.
 306  *
 307  * It is the combination of predicate lock target (which is a lockable
 308  * object) and a serializable transaction which has acquired a lock on that
 309  * target.
 310  */
 311 typedef struct PREDICATELOCKTAG
 312 {
 313         PREDICATELOCKTARGET *myTarget;
 314         SERIALIZABLEXACT *myXact;
 315 } PREDICATELOCKTAG;
 316
 317 /*
 318  * The PREDICATELOCK struct represents an individual lock.
 319  *
 320  * An entry can be created here when the related database object is read, or
 321  * by promotion of multiple finer-grained targets.      All entries related to a
 322  * serializable transaction are removed when that serializable transaction is
 323  * cleaned up.  Entries can also be removed when they are combined into a
 324  * single coarser-grained lock entry.
 325  */
 326 typedef struct PREDICATELOCK
 327 {
 328         /* hash key */
 329         PREDICATELOCKTAG tag;           /* unique identifier of lock */
 330
 331         /* data */
 332         SHM_QUEUE       targetLink;             /* list link in PREDICATELOCKTARGET's list of
 333                                                                  * predicate locks */
 334         SHM_QUEUE       xactLink;               /* list link in SERIALIZABLEXACT's list of
 335                                                                  * predicate locks */
 336         SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
 337 } PREDICATELOCK;
 338
 339
 340 /*
 341  * The LOCALPREDICATELOCK struct represents a local copy of data which is
 342  * also present in the PREDICATELOCK table, organized for fast access without
 343  * needing to acquire a LWLock.  It is strictly for optimization.
 344  *
 345  * Each serializable transaction creates its own local hash table to hold a
 346  * collection of these.  This information is used to determine when a number
 347  * of fine-grained locks should be promoted to a single coarser-grained lock.
 348  * The information is maintained more-or-less in parallel to the
 349  * PREDICATELOCK data, but because this data is not protected by locks and is
 350  * only used in an optimization heuristic, it is allowed to drift in a few
 351  * corner cases where maintaining exact data would be expensive.
 352  *
 353  * The hash table is created when the serializable transaction acquires its
 354  * snapshot, and its memory is released upon completion of the transaction.
 355  */
 356 typedef struct LOCALPREDICATELOCK
 357 {
 358         /* hash key */
 359         PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
 360
 361         /* data */
 362         bool            held;                   /* is lock held, or just its children?  */
 363         int                     childLocks;             /* number of child locks currently held */
 364 } LOCALPREDICATELOCK;
 365
 366
 367 /*
 368  * The types of predicate locks which can be acquired.
 369  */
 370 typedef enum PredicateLockTargetType
 371 {
 372         PREDLOCKTAG_RELATION,
 373         PREDLOCKTAG_PAGE,
 374         PREDLOCKTAG_TUPLE
 375         /* TODO SSI: Other types may be needed for index locking */
 376 } PredicateLockTargetType;
 377
 378
 379 /*
 380  * This structure is used to quickly capture a copy of all predicate
 381  * locks.  This is currently used only by the pg_lock_status function,
 382  * which in turn is used by the pg_locks view.
 383  */
 384 typedef struct PredicateLockData
 385 {
 386         int                     nelements;
 387         PREDICATELOCKTARGETTAG *locktags;
 388         SERIALIZABLEXACT *xacts;
 389 } PredicateLockData;
 390
 391
 392 /*
 393  * These macros define how we map logical IDs of lockable objects into the
 394  * physical fields of PREDICATELOCKTARGETTAG.   Use these to set up values,
 395  * rather than accessing the fields directly.  Note multiple eval of target!
 396  */
 397 #define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
 398         ((locktag).locktag_field1 = (dboid), \
 399          (locktag).locktag_field2 = (reloid), \
 400          (locktag).locktag_field3 = InvalidBlockNumber, \
 401          (locktag).locktag_field4 = InvalidOffsetNumber, \
 402          (locktag).locktag_field5 = InvalidTransactionId)
 403
 404 #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
 405         ((locktag).locktag_field1 = (dboid), \
 406          (locktag).locktag_field2 = (reloid), \
 407          (locktag).locktag_field3 = (blocknum), \
 408          (locktag).locktag_field4 = InvalidOffsetNumber, \
 409          (locktag).locktag_field5 = InvalidTransactionId)
 410
 411 #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum,xmin) \
 412         ((locktag).locktag_field1 = (dboid), \
 413          (locktag).locktag_field2 = (reloid), \
 414          (locktag).locktag_field3 = (blocknum), \
 415          (locktag).locktag_field4 = (offnum), \
 416          (locktag).locktag_field5 = (xmin))
 417
 418 #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
 419         ((Oid) (locktag).locktag_field1)
 420 #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
 421         ((Oid) (locktag).locktag_field2)
 422 #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
 423         ((BlockNumber) (locktag).locktag_field3)
 424 #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
 425         ((OffsetNumber) (locktag).locktag_field4)
 426 #define GET_PREDICATELOCKTARGETTAG_XMIN(locktag) \
 427         ((TransactionId) (locktag).locktag_field5)
 428 #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag)                                                         \
 429         (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
 430          (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE :   \
 431           PREDLOCKTAG_RELATION))
 432
 433 /*
 434  * Two-phase commit statefile records. There are two types: for each
 435  * transaction, we generate one per-transaction record and a variable
 436  * number of per-predicate-lock records.
 437  */
 438 typedef enum TwoPhasePredicateRecordType
 439 {
 440         TWOPHASEPREDICATERECORD_XACT,
 441         TWOPHASEPREDICATERECORD_LOCK
 442 } TwoPhasePredicateRecordType;
 443
 444 /*
 445  * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
 446  * much is needed because most of it not meaningful for a recovered
 447  * prepared transaction.
 448  *
 449  * In particular, we do not record the in and out conflict lists for a
 450  * prepared transaction because the associated SERIALIZABLEXACTs will
 451  * not be available after recovery. Instead, we simply record the
 452  * existence of each type of conflict by setting the transaction's
 453  * summary conflict in/out flag.
 454  */
 455 typedef struct TwoPhasePredicateXactRecord
 456 {
 457         TransactionId xmin;
 458         uint32          flags;
 459 } TwoPhasePredicateXactRecord;
 460
 461 /* Per-lock state */
 462 typedef struct TwoPhasePredicateLockRecord
 463 {
 464         PREDICATELOCKTARGETTAG target;
 465 } TwoPhasePredicateLockRecord;
 466
 467 typedef struct TwoPhasePredicateRecord
 468 {
 469         TwoPhasePredicateRecordType type;
 470         union
 471         {
 472                 TwoPhasePredicateXactRecord xactRecord;
 473                 TwoPhasePredicateLockRecord lockRecord;
 474         }                       data;
 475 } TwoPhasePredicateRecord;
 476
 477 /*
 478  * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
 479  */
 480 #define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
 481
 482
 483 /*
 484  * Function definitions for functions needing awareness of predicate
 485  * locking internals.
 486  */
 487 extern PredicateLockData *GetPredicateLockStatusData(void);
 488
 489
 490 #endif   /* PREDICATE_INTERNALS_H */