granicus.if.org Git - postgresql/blob - src/backend/utils/cache/inval.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * inval.c
   4  *        POSTGRES cache invalidation dispatcher code.
   5  *
   6  *      This is subtle stuff, so pay attention:
   7  *
   8  *      When a tuple is updated or deleted, our standard time qualification rules
   9  *      consider that it is *still valid* so long as we are in the same command,
  10  *      ie, until the next CommandCounterIncrement() or transaction commit.
  11  *      (See utils/time/tqual.c, and note that system catalogs are generally
  12  *      scanned under SnapshotNow rules by the system, or plain user snapshots
  13  *      for user queries.)      At the command boundary, the old tuple stops
  14  *      being valid and the new version, if any, becomes valid.  Therefore,
  15  *      we cannot simply flush a tuple from the system caches during heap_update()
  16  *      or heap_delete().  The tuple is still good at that point; what's more,
  17  *      even if we did flush it, it might be reloaded into the caches by a later
  18  *      request in the same command.  So the correct behavior is to keep a list
  19  *      of outdated (updated/deleted) tuples and then do the required cache
  20  *      flushes at the next command boundary.  We must also keep track of
  21  *      inserted tuples so that we can flush "negative" cache entries that match
  22  *      the new tuples; again, that mustn't happen until end of command.
  23  *
  24  *      Once we have finished the command, we still need to remember inserted
  25  *      tuples (including new versions of updated tuples), so that we can flush
  26  *      them from the caches if we abort the transaction.  Similarly, we'd better
  27  *      be able to flush "negative" cache entries that may have been loaded in
  28  *      place of deleted tuples, so we still need the deleted ones too.
  29  *
  30  *      If we successfully complete the transaction, we have to broadcast all
  31  *      these invalidation events to other backends (via the SI message queue)
  32  *      so that they can flush obsolete entries from their caches.      Note we have
  33  *      to record the transaction commit before sending SI messages, otherwise
  34  *      the other backends won't see our updated tuples as good.
  35  *
  36  *      When a subtransaction aborts, we can process and discard any events
  37  *      it has queued.  When a subtransaction commits, we just add its events
  38  *      to the pending lists of the parent transaction.
  39  *
  40  *      In short, we need to remember until xact end every insert or delete
  41  *      of a tuple that might be in the system caches.  Updates are treated as
  42  *      two events, delete + insert, for simplicity.  (There are cases where
  43  *      it'd be possible to record just one event, but we don't currently try.)
  44  *
  45  *      We do not need to register EVERY tuple operation in this way, just those
  46  *      on tuples in relations that have associated catcaches.  We do, however,
  47  *      have to register every operation on every tuple that *could* be in a
  48  *      catcache, whether or not it currently is in our cache.  Also, if the
  49  *      tuple is in a relation that has multiple catcaches, we need to register
  50  *      an invalidation message for each such catcache.  catcache.c's
  51  *      PrepareToInvalidateCacheTuple() routine provides the knowledge of which
  52  *      catcaches may need invalidation for a given tuple.
  53  *
  54  *      Also, whenever we see an operation on a pg_class or pg_attribute tuple,
  55  *      we register a relcache flush operation for the relation described by that
  56  *      tuple.  pg_class updates trigger an smgr flush operation as well.
  57  *
  58  *      We keep the relcache and smgr flush requests in lists separate from the
  59  *      catcache tuple flush requests.  This allows us to issue all the pending
  60  *      catcache flushes before we issue relcache flushes, which saves us from
  61  *      loading a catcache tuple during relcache load only to flush it again
  62  *      right away.  Also, we avoid queuing multiple relcache flush requests for
  63  *      the same relation, since a relcache flush is relatively expensive to do.
  64  *      (XXX is it worth testing likewise for duplicate catcache flush entries?
  65  *      Probably not.)
  66  *
  67  *      If a relcache flush is issued for a system relation that we preload
  68  *      from the relcache init file, we must also delete the init file so that
  69  *      it will be rebuilt during the next backend restart.  The actual work of
  70  *      manipulating the init file is in relcache.c, but we keep track of the
  71  *      need for it here.
  72  *
  73  *      The request lists proper are kept in CurTransactionContext of their
  74  *      creating (sub)transaction, since they can be forgotten on abort of that
  75  *      transaction but must be kept till top-level commit otherwise.  For
  76  *      simplicity we keep the controlling list-of-lists in TopTransactionContext.
  77  *
  78  *
  79  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  80  * Portions Copyright (c) 1994, Regents of the University of California
  81  *
  82  * IDENTIFICATION
  83  *        $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.86 2008/06/19 21:32:56 tgl Exp $
  84  *
  85  *-------------------------------------------------------------------------
  86  */
  87 #include "postgres.h"
  88
  89 #include "access/twophase_rmgr.h"
  90 #include "access/xact.h"
  91 #include "catalog/catalog.h"
  92 #include "miscadmin.h"
  93 #include "storage/sinval.h"
  94 #include "storage/smgr.h"
  95 #include "utils/inval.h"
  96 #include "utils/memutils.h"
  97 #include "utils/rel.h"
  98 #include "utils/syscache.h"
  99
 100
 101 /*
 102  * To minimize palloc traffic, we keep pending requests in successively-
 103  * larger chunks (a slightly more sophisticated version of an expansible
 104  * array).      All request types can be stored as SharedInvalidationMessage
 105  * records.  The ordering of requests within a list is never significant.
 106  */
 107 typedef struct InvalidationChunk
 108 {
 109         struct InvalidationChunk *next;         /* list link */
 110         int                     nitems;                 /* # items currently stored in chunk */
 111         int                     maxitems;               /* size of allocated array in this chunk */
 112         SharedInvalidationMessage msgs[1];      /* VARIABLE LENGTH ARRAY */
 113 } InvalidationChunk;                    /* VARIABLE LENGTH STRUCTURE */
 114
 115 typedef struct InvalidationListHeader
 116 {
 117         InvalidationChunk *cclist;      /* list of chunks holding catcache msgs */
 118         InvalidationChunk *rclist;      /* list of chunks holding relcache/smgr msgs */
 119 } InvalidationListHeader;
 120
 121 /*----------------
 122  * Invalidation info is divided into two lists:
 123  *      1) events so far in current command, not yet reflected to caches.
 124  *      2) events in previous commands of current transaction; these have
 125  *         been reflected to local caches, and must be either broadcast to
 126  *         other backends or rolled back from local cache when we commit
 127  *         or abort the transaction.
 128  * Actually, we need two such lists for each level of nested transaction,
 129  * so that we can discard events from an aborted subtransaction.  When
 130  * a subtransaction commits, we append its lists to the parent's lists.
 131  *
 132  * The relcache-file-invalidated flag can just be a simple boolean,
 133  * since we only act on it at transaction commit; we don't care which
 134  * command of the transaction set it.
 135  *----------------
 136  */
 137
 138 typedef struct TransInvalidationInfo
 139 {
 140         /* Back link to parent transaction's info */
 141         struct TransInvalidationInfo *parent;
 142
 143         /* Subtransaction nesting depth */
 144         int                     my_level;
 145
 146         /* head of current-command event list */
 147         InvalidationListHeader CurrentCmdInvalidMsgs;
 148
 149         /* head of previous-commands event list */
 150         InvalidationListHeader PriorCmdInvalidMsgs;
 151
 152         /* init file must be invalidated? */
 153         bool            RelcacheInitFileInval;
 154 } TransInvalidationInfo;
 155
 156 static TransInvalidationInfo *transInvalInfo = NULL;
 157
 158 /*
 159  * Dynamically-registered callback functions.  Current implementation
 160  * assumes there won't be very many of these at once; could improve if needed.
 161  */
 162
 163 #define MAX_CACHE_CALLBACKS 20
 164
 165 static struct CACHECALLBACK
 166 {
 167         int16           id;                             /* cache number or message type id */
 168         CacheCallbackFunction function;
 169         Datum           arg;
 170 }       cache_callback_list[MAX_CACHE_CALLBACKS];
 171
 172 static int      cache_callback_count = 0;
 173
 174 /* info values for 2PC callback */
 175 #define TWOPHASE_INFO_MSG                       0       /* SharedInvalidationMessage */
 176 #define TWOPHASE_INFO_FILE_BEFORE       1       /* relcache file inval */
 177 #define TWOPHASE_INFO_FILE_AFTER        2       /* relcache file inval */
 178
 179 static void PersistInvalidationMessage(SharedInvalidationMessage *msg);
 180
 181
 182 /* ----------------------------------------------------------------
 183  *                              Invalidation list support functions
 184  *
 185  * These three routines encapsulate processing of the "chunked"
 186  * representation of what is logically just a list of messages.
 187  * ----------------------------------------------------------------
 188  */
 189
 190 /*
 191  * AddInvalidationMessage
 192  *              Add an invalidation message to a list (of chunks).
 193  *
 194  * Note that we do not pay any great attention to maintaining the original
 195  * ordering of the messages.
 196  */
 197 static void
 198 AddInvalidationMessage(InvalidationChunk **listHdr,
 199                                            SharedInvalidationMessage *msg)
 200 {
 201         InvalidationChunk *chunk = *listHdr;
 202
 203         if (chunk == NULL)
 204         {
 205                 /* First time through; create initial chunk */
 206 #define FIRSTCHUNKSIZE 32
 207                 chunk = (InvalidationChunk *)
 208                         MemoryContextAlloc(CurTransactionContext,
 209                                                            sizeof(InvalidationChunk) +
 210                                         (FIRSTCHUNKSIZE - 1) *sizeof(SharedInvalidationMessage));
 211                 chunk->nitems = 0;
 212                 chunk->maxitems = FIRSTCHUNKSIZE;
 213                 chunk->next = *listHdr;
 214                 *listHdr = chunk;
 215         }
 216         else if (chunk->nitems >= chunk->maxitems)
 217         {
 218                 /* Need another chunk; double size of last chunk */
 219                 int                     chunksize = 2 * chunk->maxitems;
 220
 221                 chunk = (InvalidationChunk *)
 222                         MemoryContextAlloc(CurTransactionContext,
 223                                                            sizeof(InvalidationChunk) +
 224                                                  (chunksize - 1) *sizeof(SharedInvalidationMessage));
 225                 chunk->nitems = 0;
 226                 chunk->maxitems = chunksize;
 227                 chunk->next = *listHdr;
 228                 *listHdr = chunk;
 229         }
 230         /* Okay, add message to current chunk */
 231         chunk->msgs[chunk->nitems] = *msg;
 232         chunk->nitems++;
 233 }
 234
 235 /*
 236  * Append one list of invalidation message chunks to another, resetting
 237  * the source chunk-list pointer to NULL.
 238  */
 239 static void
 240 AppendInvalidationMessageList(InvalidationChunk **destHdr,
 241                                                           InvalidationChunk **srcHdr)
 242 {
 243         InvalidationChunk *chunk = *srcHdr;
 244
 245         if (chunk == NULL)
 246                 return;                                 /* nothing to do */
 247
 248         while (chunk->next != NULL)
 249                 chunk = chunk->next;
 250
 251         chunk->next = *destHdr;
 252
 253         *destHdr = *srcHdr;
 254
 255         *srcHdr = NULL;
 256 }
 257
 258 /*
 259  * Process a list of invalidation messages.
 260  *
 261  * This is a macro that executes the given code fragment for each message in
 262  * a message chunk list.  The fragment should refer to the message as *msg.
 263  */
 264 #define ProcessMessageList(listHdr, codeFragment) \
 265         do { \
 266                 InvalidationChunk *_chunk; \
 267                 for (_chunk = (listHdr); _chunk != NULL; _chunk = _chunk->next) \
 268                 { \
 269                         int             _cindex; \
 270                         for (_cindex = 0; _cindex < _chunk->nitems; _cindex++) \
 271                         { \
 272                                 SharedInvalidationMessage *msg = &_chunk->msgs[_cindex]; \
 273                                 codeFragment; \
 274                         } \
 275                 } \
 276         } while (0)
 277
 278 /*
 279  * Process a list of invalidation messages group-wise.
 280  *
 281  * As above, but the code fragment can handle an array of messages.
 282  * The fragment should refer to the messages as msgs[], with n entries.
 283  */
 284 #define ProcessMessageListMulti(listHdr, codeFragment) \
 285         do { \
 286                 InvalidationChunk *_chunk; \
 287                 for (_chunk = (listHdr); _chunk != NULL; _chunk = _chunk->next) \
 288                 { \
 289                         SharedInvalidationMessage *msgs = _chunk->msgs; \
 290                         int             n = _chunk->nitems; \
 291                         codeFragment; \
 292                 } \
 293         } while (0)
 294
 295
 296 /* ----------------------------------------------------------------
 297  *                              Invalidation set support functions
 298  *
 299  * These routines understand about the division of a logical invalidation
 300  * list into separate physical lists for catcache and relcache/smgr entries.
 301  * ----------------------------------------------------------------
 302  */
 303
 304 /*
 305  * Add a catcache inval entry
 306  */
 307 static void
 308 AddCatcacheInvalidationMessage(InvalidationListHeader *hdr,
 309                                                            int id, uint32 hashValue,
 310                                                            ItemPointer tuplePtr, Oid dbId)
 311 {
 312         SharedInvalidationMessage msg;
 313
 314         msg.cc.id = (int16) id;
 315         msg.cc.tuplePtr = *tuplePtr;
 316         msg.cc.dbId = dbId;
 317         msg.cc.hashValue = hashValue;
 318         AddInvalidationMessage(&hdr->cclist, &msg);
 319 }
 320
 321 /*
 322  * Add a relcache inval entry
 323  */
 324 static void
 325 AddRelcacheInvalidationMessage(InvalidationListHeader *hdr,
 326                                                            Oid dbId, Oid relId)
 327 {
 328         SharedInvalidationMessage msg;
 329
 330         /* Don't add a duplicate item */
 331         /* We assume dbId need not be checked because it will never change */
 332         ProcessMessageList(hdr->rclist,
 333                                            if (msg->rc.id == SHAREDINVALRELCACHE_ID &&
 334                                                    msg->rc.relId == relId)
 335                                            return);
 336
 337         /* OK, add the item */
 338         msg.rc.id = SHAREDINVALRELCACHE_ID;
 339         msg.rc.dbId = dbId;
 340         msg.rc.relId = relId;
 341         AddInvalidationMessage(&hdr->rclist, &msg);
 342 }
 343
 344 /*
 345  * Add an smgr inval entry
 346  */
 347 static void
 348 AddSmgrInvalidationMessage(InvalidationListHeader *hdr,
 349                                                    RelFileNode rnode)
 350 {
 351         SharedInvalidationMessage msg;
 352
 353         /* Don't add a duplicate item */
 354         ProcessMessageList(hdr->rclist,
 355                                            if (msg->sm.id == SHAREDINVALSMGR_ID &&
 356                                                    RelFileNodeEquals(msg->sm.rnode, rnode))
 357                                            return);
 358
 359         /* OK, add the item */
 360         msg.sm.id = SHAREDINVALSMGR_ID;
 361         msg.sm.rnode = rnode;
 362         AddInvalidationMessage(&hdr->rclist, &msg);
 363 }
 364
 365 /*
 366  * Append one list of invalidation messages to another, resetting
 367  * the source list to empty.
 368  */
 369 static void
 370 AppendInvalidationMessages(InvalidationListHeader *dest,
 371                                                    InvalidationListHeader *src)
 372 {
 373         AppendInvalidationMessageList(&dest->cclist, &src->cclist);
 374         AppendInvalidationMessageList(&dest->rclist, &src->rclist);
 375 }
 376
 377 /*
 378  * Execute the given function for all the messages in an invalidation list.
 379  * The list is not altered.
 380  *
 381  * catcache entries are processed first, for reasons mentioned above.
 382  */
 383 static void
 384 ProcessInvalidationMessages(InvalidationListHeader *hdr,
 385                                                         void (*func) (SharedInvalidationMessage *msg))
 386 {
 387         ProcessMessageList(hdr->cclist, func(msg));
 388         ProcessMessageList(hdr->rclist, func(msg));
 389 }
 390
 391 /*
 392  * As above, but the function is able to process an array of messages
 393  * rather than just one at a time.
 394  */
 395 static void
 396 ProcessInvalidationMessagesMulti(InvalidationListHeader *hdr,
 397                                                                  void (*func) (const SharedInvalidationMessage *msgs, int n))
 398 {
 399         ProcessMessageListMulti(hdr->cclist, func(msgs, n));
 400         ProcessMessageListMulti(hdr->rclist, func(msgs, n));
 401 }
 402
 403 /* ----------------------------------------------------------------
 404  *                                        private support functions
 405  * ----------------------------------------------------------------
 406  */
 407
 408 /*
 409  * RegisterCatcacheInvalidation
 410  *
 411  * Register an invalidation event for a catcache tuple entry.
 412  */
 413 static void
 414 RegisterCatcacheInvalidation(int cacheId,
 415                                                          uint32 hashValue,
 416                                                          ItemPointer tuplePtr,
 417                                                          Oid dbId)
 418 {
 419         AddCatcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
 420                                                                    cacheId, hashValue, tuplePtr, dbId);
 421 }
 422
 423 /*
 424  * RegisterRelcacheInvalidation
 425  *
 426  * As above, but register a relcache invalidation event.
 427  */
 428 static void
 429 RegisterRelcacheInvalidation(Oid dbId, Oid relId)
 430 {
 431         AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
 432                                                                    dbId, relId);
 433
 434         /*
 435          * Most of the time, relcache invalidation is associated with system
 436          * catalog updates, but there are a few cases where it isn't.  Quick
 437          * hack to ensure that the next CommandCounterIncrement() will think
 438          * that we need to do CommandEndInvalidationMessages().
 439          */
 440         (void) GetCurrentCommandId(true);
 441
 442         /*
 443          * If the relation being invalidated is one of those cached in the
 444          * relcache init file, mark that we need to zap that file at commit.
 445          */
 446         if (RelationIdIsInInitFile(relId))
 447                 transInvalInfo->RelcacheInitFileInval = true;
 448 }
 449
 450 /*
 451  * RegisterSmgrInvalidation
 452  *
 453  * As above, but register an smgr invalidation event.
 454  */
 455 static void
 456 RegisterSmgrInvalidation(RelFileNode rnode)
 457 {
 458         AddSmgrInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
 459                                                            rnode);
 460
 461         /*
 462          * As above, just in case there is not an associated catalog change.
 463          */
 464         (void) GetCurrentCommandId(true);
 465 }
 466
 467 /*
 468  * LocalExecuteInvalidationMessage
 469  *
 470  * Process a single invalidation message (which could be of any type).
 471  * Only the local caches are flushed; this does not transmit the message
 472  * to other backends.
 473  */
 474 static void
 475 LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
 476 {
 477         int                     i;
 478
 479         if (msg->id >= 0)
 480         {
 481                 if (msg->cc.dbId == MyDatabaseId || msg->cc.dbId == 0)
 482                 {
 483                         CatalogCacheIdInvalidate(msg->cc.id,
 484                                                                          msg->cc.hashValue,
 485                                                                          &msg->cc.tuplePtr);
 486
 487                         for (i = 0; i < cache_callback_count; i++)
 488                         {
 489                                 struct CACHECALLBACK *ccitem = cache_callback_list + i;
 490
 491                                 if (ccitem->id == msg->cc.id)
 492                                         (*ccitem->function) (ccitem->arg, InvalidOid);
 493                         }
 494                 }
 495         }
 496         else if (msg->id == SHAREDINVALRELCACHE_ID)
 497         {
 498                 if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid)
 499                 {
 500                         RelationCacheInvalidateEntry(msg->rc.relId);
 501
 502                         for (i = 0; i < cache_callback_count; i++)
 503                         {
 504                                 struct CACHECALLBACK *ccitem = cache_callback_list + i;
 505
 506                                 if (ccitem->id == SHAREDINVALRELCACHE_ID)
 507                                         (*ccitem->function) (ccitem->arg, msg->rc.relId);
 508                         }
 509                 }
 510         }
 511         else if (msg->id == SHAREDINVALSMGR_ID)
 512         {
 513                 /*
 514                  * We could have smgr entries for relations of other databases, so no
 515                  * short-circuit test is possible here.
 516                  */
 517                 smgrclosenode(msg->sm.rnode);
 518         }
 519         else
 520                 elog(FATAL, "unrecognized SI message id: %d", msg->id);
 521 }
 522
 523 /*
 524  *              InvalidateSystemCaches
 525  *
 526  *              This blows away all tuples in the system catalog caches and
 527  *              all the cached relation descriptors and smgr cache entries.
 528  *              Relation descriptors that have positive refcounts are then rebuilt.
 529  *
 530  *              We call this when we see a shared-inval-queue overflow signal,
 531  *              since that tells us we've lost some shared-inval messages and hence
 532  *              don't know what needs to be invalidated.
 533  */
 534 static void
 535 InvalidateSystemCaches(void)
 536 {
 537         int                     i;
 538
 539         ResetCatalogCaches();
 540         RelationCacheInvalidate();      /* gets smgr cache too */
 541
 542         for (i = 0; i < cache_callback_count; i++)
 543         {
 544                 struct CACHECALLBACK *ccitem = cache_callback_list + i;
 545
 546                 (*ccitem->function) (ccitem->arg, InvalidOid);
 547         }
 548 }
 549
 550 /*
 551  * PrepareForTupleInvalidation
 552  *              Detect whether invalidation of this tuple implies invalidation
 553  *              of catalog/relation cache entries; if so, register inval events.
 554  */
 555 static void
 556 PrepareForTupleInvalidation(Relation relation, HeapTuple tuple)
 557 {
 558         Oid                     tupleRelId;
 559         Oid                     databaseId;
 560         Oid                     relationId;
 561
 562         /* Do nothing during bootstrap */
 563         if (IsBootstrapProcessingMode())
 564                 return;
 565
 566         /*
 567          * We only need to worry about invalidation for tuples that are in system
 568          * relations; user-relation tuples are never in catcaches and can't affect
 569          * the relcache either.
 570          */
 571         if (!IsSystemRelation(relation))
 572                 return;
 573
 574         /*
 575          * TOAST tuples can likewise be ignored here. Note that TOAST tables are
 576          * considered system relations so they are not filtered by the above test.
 577          */
 578         if (IsToastRelation(relation))
 579                 return;
 580
 581         /*
 582          * First let the catcache do its thing
 583          */
 584         PrepareToInvalidateCacheTuple(relation, tuple,
 585                                                                   RegisterCatcacheInvalidation);
 586
 587         /*
 588          * Now, is this tuple one of the primary definers of a relcache entry?
 589          */
 590         tupleRelId = RelationGetRelid(relation);
 591
 592         if (tupleRelId == RelationRelationId)
 593         {
 594                 Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple);
 595                 RelFileNode rnode;
 596
 597                 relationId = HeapTupleGetOid(tuple);
 598                 if (classtup->relisshared)
 599                         databaseId = InvalidOid;
 600                 else
 601                         databaseId = MyDatabaseId;
 602
 603                 /*
 604                  * We need to send out an smgr inval as well as a relcache inval. This
 605                  * is needed because other backends might possibly possess smgr cache
 606                  * but not relcache entries for the target relation.
 607                  *
 608                  * Note: during a pg_class row update that assigns a new relfilenode
 609                  * or reltablespace value, we will be called on both the old and new
 610                  * tuples, and thus will broadcast invalidation messages showing both
 611                  * the old and new RelFileNode values.  This ensures that other
 612                  * backends will close smgr references to the old file.
 613                  *
 614                  * XXX possible future cleanup: it might be better to trigger smgr
 615                  * flushes explicitly, rather than indirectly from pg_class updates.
 616                  */
 617                 if (classtup->reltablespace)
 618                         rnode.spcNode = classtup->reltablespace;
 619                 else
 620                         rnode.spcNode = MyDatabaseTableSpace;
 621                 rnode.dbNode = databaseId;
 622                 rnode.relNode = classtup->relfilenode;
 623                 RegisterSmgrInvalidation(rnode);
 624         }
 625         else if (tupleRelId == AttributeRelationId)
 626         {
 627                 Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple);
 628
 629                 relationId = atttup->attrelid;
 630
 631                 /*
 632                  * KLUGE ALERT: we always send the relcache event with MyDatabaseId,
 633                  * even if the rel in question is shared (which we can't easily tell).
 634                  * This essentially means that only backends in this same database
 635                  * will react to the relcache flush request.  This is in fact
 636                  * appropriate, since only those backends could see our pg_attribute
 637                  * change anyway.  It looks a bit ugly though.  (In practice, shared
 638                  * relations can't have schema changes after bootstrap, so we should
 639                  * never come here for a shared rel anyway.)
 640                  */
 641                 databaseId = MyDatabaseId;
 642         }
 643         else if (tupleRelId == IndexRelationId)
 644         {
 645                 Form_pg_index indextup = (Form_pg_index) GETSTRUCT(tuple);
 646
 647                 /*
 648                  * When a pg_index row is updated, we should send out a relcache inval
 649                  * for the index relation.      As above, we don't know the shared status
 650                  * of the index, but in practice it doesn't matter since indexes of
 651                  * shared catalogs can't have such updates.
 652                  */
 653                 relationId = indextup->indexrelid;
 654                 databaseId = MyDatabaseId;
 655         }
 656         else
 657                 return;
 658
 659         /*
 660          * Yes.  We need to register a relcache invalidation event.
 661          */
 662         RegisterRelcacheInvalidation(databaseId, relationId);
 663 }
 664
 665
 666 /* ----------------------------------------------------------------
 667  *                                        public functions
 668  * ----------------------------------------------------------------
 669  */
 670
 671 /*
 672  * AcceptInvalidationMessages
 673  *              Read and process invalidation messages from the shared invalidation
 674  *              message queue.
 675  *
 676  * Note:
 677  *              This should be called as the first step in processing a transaction.
 678  */
 679 void
 680 AcceptInvalidationMessages(void)
 681 {
 682         ReceiveSharedInvalidMessages(LocalExecuteInvalidationMessage,
 683                                                                  InvalidateSystemCaches);
 684
 685         /*
 686          * Test code to force cache flushes anytime a flush could happen.
 687          *
 688          * If used with CLOBBER_FREED_MEMORY, CLOBBER_CACHE_ALWAYS provides a
 689          * fairly thorough test that the system contains no cache-flush hazards.
 690          * However, it also makes the system unbelievably slow --- the regression
 691          * tests take about 100 times longer than normal.
 692          *
 693          * If you're a glutton for punishment, try CLOBBER_CACHE_RECURSIVELY. This
 694          * slows things by at least a factor of 10000, so I wouldn't suggest
 695          * trying to run the entire regression tests that way.  It's useful to try
 696          * a few simple tests, to make sure that cache reload isn't subject to
 697          * internal cache-flush hazards, but after you've done a few thousand
 698          * recursive reloads it's unlikely you'll learn more.
 699          */
 700 #if defined(CLOBBER_CACHE_ALWAYS)
 701         {
 702                 static bool in_recursion = false;
 703
 704                 if (!in_recursion)
 705                 {
 706                         in_recursion = true;
 707                         InvalidateSystemCaches();
 708                         in_recursion = false;
 709                 }
 710         }
 711 #elif defined(CLOBBER_CACHE_RECURSIVELY)
 712         InvalidateSystemCaches();
 713 #endif
 714 }
 715
 716 /*
 717  * AtStart_Inval
 718  *              Initialize inval lists at start of a main transaction.
 719  */
 720 void
 721 AtStart_Inval(void)
 722 {
 723         Assert(transInvalInfo == NULL);
 724         transInvalInfo = (TransInvalidationInfo *)
 725                 MemoryContextAllocZero(TopTransactionContext,
 726                                                            sizeof(TransInvalidationInfo));
 727         transInvalInfo->my_level = GetCurrentTransactionNestLevel();
 728 }
 729
 730 /*
 731  * AtPrepare_Inval
 732  *              Save the inval lists state at 2PC transaction prepare.
 733  *
 734  * In this phase we just generate 2PC records for all the pending invalidation
 735  * work.
 736  */
 737 void
 738 AtPrepare_Inval(void)
 739 {
 740         /* Must be at top of stack */
 741         Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
 742
 743         /*
 744          * Relcache init file invalidation requires processing both before and
 745          * after we send the SI messages.
 746          */
 747         if (transInvalInfo->RelcacheInitFileInval)
 748                 RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_BEFORE,
 749                                                            NULL, 0);
 750
 751         AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
 752                                                            &transInvalInfo->CurrentCmdInvalidMsgs);
 753
 754         ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
 755                                                                 PersistInvalidationMessage);
 756
 757         if (transInvalInfo->RelcacheInitFileInval)
 758                 RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_AFTER,
 759                                                            NULL, 0);
 760 }
 761
 762 /*
 763  * PostPrepare_Inval
 764  *              Clean up after successful PREPARE.
 765  *
 766  * Here, we want to act as though the transaction aborted, so that we will
 767  * undo any syscache changes it made, thereby bringing us into sync with the
 768  * outside world, which doesn't believe the transaction committed yet.
 769  *
 770  * If the prepared transaction is later aborted, there is nothing more to
 771  * do; if it commits, we will receive the consequent inval messages just
 772  * like everyone else.
 773  */
 774 void
 775 PostPrepare_Inval(void)
 776 {
 777         AtEOXact_Inval(false);
 778 }
 779
 780 /*
 781  * AtSubStart_Inval
 782  *              Initialize inval lists at start of a subtransaction.
 783  */
 784 void
 785 AtSubStart_Inval(void)
 786 {
 787         TransInvalidationInfo *myInfo;
 788
 789         Assert(transInvalInfo != NULL);
 790         myInfo = (TransInvalidationInfo *)
 791                 MemoryContextAllocZero(TopTransactionContext,
 792                                                            sizeof(TransInvalidationInfo));
 793         myInfo->parent = transInvalInfo;
 794         myInfo->my_level = GetCurrentTransactionNestLevel();
 795         transInvalInfo = myInfo;
 796 }
 797
 798 /*
 799  * PersistInvalidationMessage
 800  *              Write an invalidation message to the 2PC state file.
 801  */
 802 static void
 803 PersistInvalidationMessage(SharedInvalidationMessage *msg)
 804 {
 805         RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_MSG,
 806                                                    msg, sizeof(SharedInvalidationMessage));
 807 }
 808
 809 /*
 810  * inval_twophase_postcommit
 811  *              Process an invalidation message from the 2PC state file.
 812  */
 813 void
 814 inval_twophase_postcommit(TransactionId xid, uint16 info,
 815                                                   void *recdata, uint32 len)
 816 {
 817         SharedInvalidationMessage *msg;
 818
 819         switch (info)
 820         {
 821                 case TWOPHASE_INFO_MSG:
 822                         msg = (SharedInvalidationMessage *) recdata;
 823                         Assert(len == sizeof(SharedInvalidationMessage));
 824                         SendSharedInvalidMessages(msg, 1);
 825                         break;
 826                 case TWOPHASE_INFO_FILE_BEFORE:
 827                         RelationCacheInitFileInvalidate(true);
 828                         break;
 829                 case TWOPHASE_INFO_FILE_AFTER:
 830                         RelationCacheInitFileInvalidate(false);
 831                         break;
 832                 default:
 833                         Assert(false);
 834                         break;
 835         }
 836 }
 837
 838
 839 /*
 840  * AtEOXact_Inval
 841  *              Process queued-up invalidation messages at end of main transaction.
 842  *
 843  * If isCommit, we must send out the messages in our PriorCmdInvalidMsgs list
 844  * to the shared invalidation message queue.  Note that these will be read
 845  * not only by other backends, but also by our own backend at the next
 846  * transaction start (via AcceptInvalidationMessages).  This means that
 847  * we can skip immediate local processing of anything that's still in
 848  * CurrentCmdInvalidMsgs, and just send that list out too.
 849  *
 850  * If not isCommit, we are aborting, and must locally process the messages
 851  * in PriorCmdInvalidMsgs.      No messages need be sent to other backends,
 852  * since they'll not have seen our changed tuples anyway.  We can forget
 853  * about CurrentCmdInvalidMsgs too, since those changes haven't touched
 854  * the caches yet.
 855  *
 856  * In any case, reset the various lists to empty.  We need not physically
 857  * free memory here, since TopTransactionContext is about to be emptied
 858  * anyway.
 859  *
 860  * Note:
 861  *              This should be called as the last step in processing a transaction.
 862  */
 863 void
 864 AtEOXact_Inval(bool isCommit)
 865 {
 866         if (isCommit)
 867         {
 868                 /* Must be at top of stack */
 869                 Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
 870
 871                 /*
 872                  * Relcache init file invalidation requires processing both before and
 873                  * after we send the SI messages.  However, we need not do anything
 874                  * unless we committed.
 875                  */
 876                 if (transInvalInfo->RelcacheInitFileInval)
 877                         RelationCacheInitFileInvalidate(true);
 878
 879                 AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
 880                                                                    &transInvalInfo->CurrentCmdInvalidMsgs);
 881
 882                 ProcessInvalidationMessagesMulti(&transInvalInfo->PriorCmdInvalidMsgs,
 883                                                                                  SendSharedInvalidMessages);
 884
 885                 if (transInvalInfo->RelcacheInitFileInval)
 886                         RelationCacheInitFileInvalidate(false);
 887         }
 888         else if (transInvalInfo != NULL)
 889         {
 890                 /* Must be at top of stack */
 891                 Assert(transInvalInfo->parent == NULL);
 892
 893                 ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
 894                                                                         LocalExecuteInvalidationMessage);
 895         }
 896
 897         /* Need not free anything explicitly */
 898         transInvalInfo = NULL;
 899 }
 900
 901 /*
 902  * AtEOSubXact_Inval
 903  *              Process queued-up invalidation messages at end of subtransaction.
 904  *
 905  * If isCommit, process CurrentCmdInvalidMsgs if any (there probably aren't),
 906  * and then attach both CurrentCmdInvalidMsgs and PriorCmdInvalidMsgs to the
 907  * parent's PriorCmdInvalidMsgs list.
 908  *
 909  * If not isCommit, we are aborting, and must locally process the messages
 910  * in PriorCmdInvalidMsgs.      No messages need be sent to other backends.
 911  * We can forget about CurrentCmdInvalidMsgs too, since those changes haven't
 912  * touched the caches yet.
 913  *
 914  * In any case, pop the transaction stack.      We need not physically free memory
 915  * here, since CurTransactionContext is about to be emptied anyway
 916  * (if aborting).  Beware of the possibility of aborting the same nesting
 917  * level twice, though.
 918  */
 919 void
 920 AtEOSubXact_Inval(bool isCommit)
 921 {
 922         int                     my_level = GetCurrentTransactionNestLevel();
 923         TransInvalidationInfo *myInfo = transInvalInfo;
 924
 925         if (isCommit)
 926         {
 927                 /* Must be at non-top of stack */
 928                 Assert(myInfo != NULL && myInfo->parent != NULL);
 929                 Assert(myInfo->my_level == my_level);
 930
 931                 /* If CurrentCmdInvalidMsgs still has anything, fix it */
 932                 CommandEndInvalidationMessages();
 933
 934                 /* Pass up my inval messages to parent */
 935                 AppendInvalidationMessages(&myInfo->parent->PriorCmdInvalidMsgs,
 936                                                                    &myInfo->PriorCmdInvalidMsgs);
 937
 938                 /* Pending relcache inval becomes parent's problem too */
 939                 if (myInfo->RelcacheInitFileInval)
 940                         myInfo->parent->RelcacheInitFileInval = true;
 941
 942                 /* Pop the transaction state stack */
 943                 transInvalInfo = myInfo->parent;
 944
 945                 /* Need not free anything else explicitly */
 946                 pfree(myInfo);
 947         }
 948         else if (myInfo != NULL && myInfo->my_level == my_level)
 949         {
 950                 /* Must be at non-top of stack */
 951                 Assert(myInfo->parent != NULL);
 952
 953                 ProcessInvalidationMessages(&myInfo->PriorCmdInvalidMsgs,
 954                                                                         LocalExecuteInvalidationMessage);
 955
 956                 /* Pop the transaction state stack */
 957                 transInvalInfo = myInfo->parent;
 958
 959                 /* Need not free anything else explicitly */
 960                 pfree(myInfo);
 961         }
 962 }
 963
 964 /*
 965  * CommandEndInvalidationMessages
 966  *              Process queued-up invalidation messages at end of one command
 967  *              in a transaction.
 968  *
 969  * Here, we send no messages to the shared queue, since we don't know yet if
 970  * we will commit.      We do need to locally process the CurrentCmdInvalidMsgs
 971  * list, so as to flush our caches of any entries we have outdated in the
 972  * current command.  We then move the current-cmd list over to become part
 973  * of the prior-cmds list.
 974  *
 975  * Note:
 976  *              This should be called during CommandCounterIncrement(),
 977  *              after we have advanced the command ID.
 978  */
 979 void
 980 CommandEndInvalidationMessages(void)
 981 {
 982         /*
 983          * You might think this shouldn't be called outside any transaction, but
 984          * bootstrap does it, and also ABORT issued when not in a transaction. So
 985          * just quietly return if no state to work on.
 986          */
 987         if (transInvalInfo == NULL)
 988                 return;
 989
 990         ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs,
 991                                                                 LocalExecuteInvalidationMessage);
 992         AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
 993                                                            &transInvalInfo->CurrentCmdInvalidMsgs);
 994 }
 995
 996
 997 /*
 998  * BeginNonTransactionalInvalidation
 999  *              Prepare for invalidation messages for nontransactional updates.
1000  *
1001  * A nontransactional invalidation is one that must be sent whether or not
1002  * the current transaction eventually commits.  We arrange for all invals
1003  * queued between this call and EndNonTransactionalInvalidation() to be sent
1004  * immediately when the latter is called.
1005  *
1006  * Currently, this is only used by heap_page_prune(), and only when it is
1007  * invoked during VACUUM FULL's first pass over a table.  We expect therefore
1008  * that we are not inside a subtransaction and there are no already-pending
1009  * invalidations.  This could be relaxed by setting up a new nesting level of
1010  * invalidation data, but for now there's no need.  Note that heap_page_prune
1011  * knows that this function does not change any state, and therefore there's
1012  * no need to worry about cleaning up if there's an elog(ERROR) before
1013  * reaching EndNonTransactionalInvalidation (the invals will just be thrown
1014  * away if that happens).
1015  */
1016 void
1017 BeginNonTransactionalInvalidation(void)
1018 {
1019         /* Must be at top of stack */
1020         Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
1021
1022         /* Must not have any previously-queued activity */
1023         Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL);
1024         Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL);
1025         Assert(transInvalInfo->CurrentCmdInvalidMsgs.cclist == NULL);
1026         Assert(transInvalInfo->CurrentCmdInvalidMsgs.rclist == NULL);
1027         Assert(transInvalInfo->RelcacheInitFileInval == false);
1028 }
1029
1030 /*
1031  * EndNonTransactionalInvalidation
1032  *              Process queued-up invalidation messages for nontransactional updates.
1033  *
1034  * We expect to find messages in CurrentCmdInvalidMsgs only (else there
1035  * was a CommandCounterIncrement within the "nontransactional" update).
1036  * We must process them locally and send them out to the shared invalidation
1037  * message queue.
1038  *
1039  * We must also reset the lists to empty and explicitly free memory (we can't
1040  * rely on end-of-transaction cleanup for that).
1041  */
1042 void
1043 EndNonTransactionalInvalidation(void)
1044 {
1045         InvalidationChunk *chunk;
1046         InvalidationChunk *next;
1047
1048         /* Must be at top of stack */
1049         Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
1050
1051         /* Must not have any prior-command messages */
1052         Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL);
1053         Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL);
1054
1055         /*
1056          * At present, this function is only used for CTID-changing updates;
1057          * since the relcache init file doesn't store any tuple CTIDs, we
1058          * don't have to invalidate it.  That might not be true forever
1059          * though, in which case we'd need code similar to AtEOXact_Inval.
1060          */
1061
1062         /* Send out the invals */
1063         ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs,
1064                                                                 LocalExecuteInvalidationMessage);
1065         ProcessInvalidationMessagesMulti(&transInvalInfo->CurrentCmdInvalidMsgs,
1066                                                                          SendSharedInvalidMessages);
1067
1068         /* Clean up and release memory */
1069         for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.cclist;
1070                  chunk != NULL;
1071                  chunk = next)
1072         {
1073                 next = chunk->next;
1074                 pfree(chunk);
1075         }
1076         for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.rclist;
1077                  chunk != NULL;
1078                  chunk = next)
1079         {
1080                 next = chunk->next;
1081                 pfree(chunk);
1082         }
1083         transInvalInfo->CurrentCmdInvalidMsgs.cclist = NULL;
1084         transInvalInfo->CurrentCmdInvalidMsgs.rclist = NULL;
1085         transInvalInfo->RelcacheInitFileInval = false;
1086 }
1087
1088
1089 /*
1090  * CacheInvalidateHeapTuple
1091  *              Register the given tuple for invalidation at end of command
1092  *              (ie, current command is creating or outdating this tuple).
1093  */
1094 void
1095 CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple)
1096 {
1097         PrepareForTupleInvalidation(relation, tuple);
1098 }
1099
1100 /*
1101  * CacheInvalidateRelcache
1102  *              Register invalidation of the specified relation's relcache entry
1103  *              at end of command.
1104  *
1105  * This is used in places that need to force relcache rebuild but aren't
1106  * changing any of the tuples recognized as contributors to the relcache
1107  * entry by PrepareForTupleInvalidation.  (An example is dropping an index.)
1108  * We assume in particular that relfilenode/reltablespace aren't changing
1109  * (so the rd_node value is still good).
1110  *
1111  * XXX most callers of this probably don't need to force an smgr flush.
1112  */
1113 void
1114 CacheInvalidateRelcache(Relation relation)
1115 {
1116         Oid                     databaseId;
1117         Oid                     relationId;
1118
1119         relationId = RelationGetRelid(relation);
1120         if (relation->rd_rel->relisshared)
1121                 databaseId = InvalidOid;
1122         else
1123                 databaseId = MyDatabaseId;
1124
1125         RegisterRelcacheInvalidation(databaseId, relationId);
1126         RegisterSmgrInvalidation(relation->rd_node);
1127 }
1128
1129 /*
1130  * CacheInvalidateRelcacheByTuple
1131  *              As above, but relation is identified by passing its pg_class tuple.
1132  */
1133 void
1134 CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
1135 {
1136         Form_pg_class classtup = (Form_pg_class) GETSTRUCT(classTuple);
1137         Oid                     databaseId;
1138         Oid                     relationId;
1139         RelFileNode rnode;
1140
1141         relationId = HeapTupleGetOid(classTuple);
1142         if (classtup->relisshared)
1143                 databaseId = InvalidOid;
1144         else
1145                 databaseId = MyDatabaseId;
1146         if (classtup->reltablespace)
1147                 rnode.spcNode = classtup->reltablespace;
1148         else
1149                 rnode.spcNode = MyDatabaseTableSpace;
1150         rnode.dbNode = databaseId;
1151         rnode.relNode = classtup->relfilenode;
1152
1153         RegisterRelcacheInvalidation(databaseId, relationId);
1154         RegisterSmgrInvalidation(rnode);
1155 }
1156
1157 /*
1158  * CacheInvalidateRelcacheByRelid
1159  *              As above, but relation is identified by passing its OID.
1160  *              This is the least efficient of the three options; use one of
1161  *              the above routines if you have a Relation or pg_class tuple.
1162  */
1163 void
1164 CacheInvalidateRelcacheByRelid(Oid relid)
1165 {
1166         HeapTuple       tup;
1167
1168         tup = SearchSysCache(RELOID,
1169                                                  ObjectIdGetDatum(relid),
1170                                                  0, 0, 0);
1171         if (!HeapTupleIsValid(tup))
1172                 elog(ERROR, "cache lookup failed for relation %u", relid);
1173         CacheInvalidateRelcacheByTuple(tup);
1174         ReleaseSysCache(tup);
1175 }
1176
1177 /*
1178  * CacheRegisterSyscacheCallback
1179  *              Register the specified function to be called for all future
1180  *              invalidation events in the specified cache.
1181  *
1182  * NOTE: currently, the OID argument to the callback routine is not
1183  * provided for syscache callbacks; the routine doesn't really get any
1184  * useful info as to exactly what changed.      It should treat every call
1185  * as a "cache flush" request.
1186  */
1187 void
1188 CacheRegisterSyscacheCallback(int cacheid,
1189                                                           CacheCallbackFunction func,
1190                                                           Datum arg)
1191 {
1192         if (cache_callback_count >= MAX_CACHE_CALLBACKS)
1193                 elog(FATAL, "out of cache_callback_list slots");
1194
1195         cache_callback_list[cache_callback_count].id = cacheid;
1196         cache_callback_list[cache_callback_count].function = func;
1197         cache_callback_list[cache_callback_count].arg = arg;
1198
1199         ++cache_callback_count;
1200 }
1201
1202 /*
1203  * CacheRegisterRelcacheCallback
1204  *              Register the specified function to be called for all future
1205  *              relcache invalidation events.  The OID of the relation being
1206  *              invalidated will be passed to the function.
1207  *
1208  * NOTE: InvalidOid will be passed if a cache reset request is received.
1209  * In this case the called routines should flush all cached state.
1210  */
1211 void
1212 CacheRegisterRelcacheCallback(CacheCallbackFunction func,
1213                                                           Datum arg)
1214 {
1215         if (cache_callback_count >= MAX_CACHE_CALLBACKS)
1216                 elog(FATAL, "out of cache_callback_list slots");
1217
1218         cache_callback_list[cache_callback_count].id = SHAREDINVALRELCACHE_ID;
1219         cache_callback_list[cache_callback_count].function = func;
1220         cache_callback_list[cache_callback_count].arg = arg;
1221
1222         ++cache_callback_count;
1223 }