granicus.if.org Git - postgresql/blob - src/backend/catalog/storage.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * storage.c
   4  *        code to create and destroy physical storage for relations
   5  *
   6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        src/backend/catalog/storage.c
  12  *
  13  * NOTES
  14  *        Some of this code used to be in storage/smgr/smgr.c, and the
  15  *        function names still reflect that.
  16  *
  17  *-------------------------------------------------------------------------
  18  */
  19
  20 #include "postgres.h"
  21
  22 #include "miscadmin.h"
  23
  24 #include "access/visibilitymap.h"
  25 #include "access/xact.h"
  26 #include "access/xlog.h"
  27 #include "access/xloginsert.h"
  28 #include "access/xlogutils.h"
  29 #include "catalog/storage.h"
  30 #include "catalog/storage_xlog.h"
  31 #include "storage/freespace.h"
  32 #include "storage/smgr.h"
  33 #include "utils/memutils.h"
  34 #include "utils/rel.h"
  35
  36 /*
  37  * We keep a list of all relations (represented as RelFileNode values)
  38  * that have been created or deleted in the current transaction.  When
  39  * a relation is created, we create the physical file immediately, but
  40  * remember it so that we can delete the file again if the current
  41  * transaction is aborted.  Conversely, a deletion request is NOT
  42  * executed immediately, but is just entered in the list.  When and if
  43  * the transaction commits, we can delete the physical file.
  44  *
  45  * To handle subtransactions, every entry is marked with its transaction
  46  * nesting level.  At subtransaction commit, we reassign the subtransaction's
  47  * entries to the parent nesting level.  At subtransaction abort, we can
  48  * immediately execute the abort-time actions for all entries of the current
  49  * nesting level.
  50  *
  51  * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
  52  * unbetimes.  It'd probably be OK to keep it in TopTransactionContext,
  53  * but I'm being paranoid.
  54  */
  55
  56 typedef struct PendingRelDelete
  57 {
  58         RelFileNode relnode;            /* relation that may need to be deleted */
  59         BackendId       backend;                /* InvalidBackendId if not a temp rel */
  60         bool            atCommit;               /* T=delete at commit; F=delete at abort */
  61         int                     nestLevel;              /* xact nesting level of request */
  62         struct PendingRelDelete *next;  /* linked-list link */
  63 } PendingRelDelete;
  64
  65 static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
  66
  67 /*
  68  * RelationCreateStorage
  69  *              Create physical storage for a relation.
  70  *
  71  * Create the underlying disk file storage for the relation. This only
  72  * creates the main fork; additional forks are created lazily by the
  73  * modules that need them.
  74  *
  75  * This function is transactional. The creation is WAL-logged, and if the
  76  * transaction aborts later on, the storage will be destroyed.
  77  */
  78 SMgrRelation
  79 RelationCreateStorage(RelFileNode rnode, char relpersistence)
  80 {
  81         PendingRelDelete *pending;
  82         SMgrRelation srel;
  83         BackendId       backend;
  84         bool            needs_wal;
  85
  86         switch (relpersistence)
  87         {
  88                 case RELPERSISTENCE_TEMP:
  89                         backend = BackendIdForTempRelations();
  90                         needs_wal = false;
  91                         break;
  92                 case RELPERSISTENCE_UNLOGGED:
  93                         backend = InvalidBackendId;
  94                         needs_wal = false;
  95                         break;
  96                 case RELPERSISTENCE_PERMANENT:
  97                         backend = InvalidBackendId;
  98                         needs_wal = true;
  99                         break;
 100                 default:
 101                         elog(ERROR, "invalid relpersistence: %c", relpersistence);
 102                         return NULL;            /* placate compiler */
 103         }
 104
 105         srel = smgropen(rnode, backend);
 106         smgrcreate(srel, MAIN_FORKNUM, false);
 107
 108         if (needs_wal)
 109                 log_smgrcreate(&srel->smgr_rnode.node, MAIN_FORKNUM);
 110
 111         /* Add the relation to the list of stuff to delete at abort */
 112         pending = (PendingRelDelete *)
 113                 MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
 114         pending->relnode = rnode;
 115         pending->backend = backend;
 116         pending->atCommit = false;      /* delete if abort */
 117         pending->nestLevel = GetCurrentTransactionNestLevel();
 118         pending->next = pendingDeletes;
 119         pendingDeletes = pending;
 120
 121         return srel;
 122 }
 123
 124 /*
 125  * Perform XLogInsert of an XLOG_SMGR_CREATE record to WAL.
 126  */
 127 void
 128 log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum)
 129 {
 130         xl_smgr_create xlrec;
 131
 132         /*
 133          * Make an XLOG entry reporting the file creation.
 134          */
 135         xlrec.rnode = *rnode;
 136         xlrec.forkNum = forkNum;
 137
 138         XLogBeginInsert();
 139         XLogRegisterData((char *) &xlrec, sizeof(xlrec));
 140         XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE);
 141 }
 142
 143 /*
 144  * RelationDropStorage
 145  *              Schedule unlinking of physical storage at transaction commit.
 146  */
 147 void
 148 RelationDropStorage(Relation rel)
 149 {
 150         PendingRelDelete *pending;
 151
 152         /* Add the relation to the list of stuff to delete at commit */
 153         pending = (PendingRelDelete *)
 154                 MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
 155         pending->relnode = rel->rd_node;
 156         pending->backend = rel->rd_backend;
 157         pending->atCommit = true;       /* delete if commit */
 158         pending->nestLevel = GetCurrentTransactionNestLevel();
 159         pending->next = pendingDeletes;
 160         pendingDeletes = pending;
 161
 162         /*
 163          * NOTE: if the relation was created in this transaction, it will now be
 164          * present in the pending-delete list twice, once with atCommit true and
 165          * once with atCommit false.  Hence, it will be physically deleted at end
 166          * of xact in either case (and the other entry will be ignored by
 167          * smgrDoPendingDeletes, so no error will occur).  We could instead remove
 168          * the existing list entry and delete the physical file immediately, but
 169          * for now I'll keep the logic simple.
 170          */
 171
 172         RelationCloseSmgr(rel);
 173 }
 174
 175 /*
 176  * RelationPreserveStorage
 177  *              Mark a relation as not to be deleted after all.
 178  *
 179  * We need this function because relation mapping changes are committed
 180  * separately from commit of the whole transaction, so it's still possible
 181  * for the transaction to abort after the mapping update is done.
 182  * When a new physical relation is installed in the map, it would be
 183  * scheduled for delete-on-abort, so we'd delete it, and be in trouble.
 184  * The relation mapper fixes this by telling us to not delete such relations
 185  * after all as part of its commit.
 186  *
 187  * We also use this to reuse an old build of an index during ALTER TABLE, this
 188  * time removing the delete-at-commit entry.
 189  *
 190  * No-op if the relation is not among those scheduled for deletion.
 191  */
 192 void
 193 RelationPreserveStorage(RelFileNode rnode, bool atCommit)
 194 {
 195         PendingRelDelete *pending;
 196         PendingRelDelete *prev;
 197         PendingRelDelete *next;
 198
 199         prev = NULL;
 200         for (pending = pendingDeletes; pending != NULL; pending = next)
 201         {
 202                 next = pending->next;
 203                 if (RelFileNodeEquals(rnode, pending->relnode)
 204                         && pending->atCommit == atCommit)
 205                 {
 206                         /* unlink and delete list entry */
 207                         if (prev)
 208                                 prev->next = next;
 209                         else
 210                                 pendingDeletes = next;
 211                         pfree(pending);
 212                         /* prev does not change */
 213                 }
 214                 else
 215                 {
 216                         /* unrelated entry, don't touch it */
 217                         prev = pending;
 218                 }
 219         }
 220 }
 221
 222 /*
 223  * RelationTruncate
 224  *              Physically truncate a relation to the specified number of blocks.
 225  *
 226  * This includes getting rid of any buffers for the blocks that are to be
 227  * dropped.
 228  */
 229 void
 230 RelationTruncate(Relation rel, BlockNumber nblocks)
 231 {
 232         bool            fsm;
 233         bool            vm;
 234         bool            need_fsm_vacuum = false;
 235         ForkNumber      forks[MAX_FORKNUM];
 236         BlockNumber     blocks[MAX_FORKNUM];
 237         int             nforks = 0;
 238
 239         /* Open it at the smgr level if not already done */
 240         RelationOpenSmgr(rel);
 241
 242         /*
 243          * Make sure smgr_targblock etc aren't pointing somewhere past new end
 244          */
 245         rel->rd_smgr->smgr_targblock = InvalidBlockNumber;
 246         rel->rd_smgr->smgr_fsm_nblocks = InvalidBlockNumber;
 247         rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
 248
 249         /* Prepare for truncation of MAIN fork of the relation */
 250         forks[nforks] = MAIN_FORKNUM;
 251         blocks[nforks] = nblocks;
 252         nforks++;
 253
 254         /*  Prepare for truncation of the FSM if it exists */
 255         fsm = smgrexists(rel->rd_smgr, FSM_FORKNUM);
 256         if (fsm)
 257         {
 258                 blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, nblocks);
 259                 if (BlockNumberIsValid(blocks[nforks]))
 260                 {
 261                         forks[nforks] = FSM_FORKNUM;
 262                         nforks++;
 263                         need_fsm_vacuum = true;
 264                 }
 265         }
 266
 267         /* Prepare for truncation of the visibility map too if it exists */
 268         vm = smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM);
 269         if (vm)
 270         {
 271                 blocks[nforks] = visibilitymap_prepare_truncate(rel, nblocks);
 272                 if (BlockNumberIsValid(blocks[nforks]))
 273                 {
 274                         forks[nforks] = VISIBILITYMAP_FORKNUM;
 275                         nforks++;
 276                 }
 277         }
 278
 279         /*
 280          * We WAL-log the truncation before actually truncating, which means
 281          * trouble if the truncation fails. If we then crash, the WAL replay
 282          * likely isn't going to succeed in the truncation either, and cause a
 283          * PANIC. It's tempting to put a critical section here, but that cure
 284          * would be worse than the disease. It would turn a usually harmless
 285          * failure to truncate, that might spell trouble at WAL replay, into a
 286          * certain PANIC.
 287          */
 288         if (RelationNeedsWAL(rel))
 289         {
 290                 /*
 291                  * Make an XLOG entry reporting the file truncation.
 292                  */
 293                 XLogRecPtr      lsn;
 294                 xl_smgr_truncate xlrec;
 295
 296                 xlrec.blkno = nblocks;
 297                 xlrec.rnode = rel->rd_node;
 298                 xlrec.flags = SMGR_TRUNCATE_ALL;
 299
 300                 XLogBeginInsert();
 301                 XLogRegisterData((char *) &xlrec, sizeof(xlrec));
 302
 303                 lsn = XLogInsert(RM_SMGR_ID,
 304                                                  XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
 305
 306                 /*
 307                  * Flush, because otherwise the truncation of the main relation might
 308                  * hit the disk before the WAL record, and the truncation of the FSM
 309                  * or visibility map. If we crashed during that window, we'd be left
 310                  * with a truncated heap, but the FSM or visibility map would still
 311                  * contain entries for the non-existent heap pages.
 312                  */
 313                 if (fsm || vm)
 314                         XLogFlush(lsn);
 315         }
 316
 317         /* Do the real work to truncate relation forks */
 318         smgrtruncate(rel->rd_smgr, forks, nforks, blocks);
 319
 320         /*
 321          * Update upper-level FSM pages to account for the truncation.
 322          * This is important because the just-truncated pages were likely
 323          * marked as all-free, and would be preferentially selected.
 324          */
 325         if (need_fsm_vacuum)
 326                 FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber);
 327 }
 328
 329 /*
 330  * Copy a fork's data, block by block.
 331  *
 332  * Note that this requires that there is no dirty data in shared buffers. If
 333  * it's possible that there are, callers need to flush those using
 334  * e.g. FlushRelationBuffers(rel).
 335  */
 336 void
 337 RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
 338                                         ForkNumber forkNum, char relpersistence)
 339 {
 340         PGAlignedBlock buf;
 341         Page            page;
 342         bool            use_wal;
 343         bool            copying_initfork;
 344         BlockNumber nblocks;
 345         BlockNumber blkno;
 346
 347         page = (Page) buf.data;
 348
 349         /*
 350          * The init fork for an unlogged relation in many respects has to be
 351          * treated the same as normal relation, changes need to be WAL logged and
 352          * it needs to be synced to disk.
 353          */
 354         copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
 355                 forkNum == INIT_FORKNUM;
 356
 357         /*
 358          * We need to log the copied data in WAL iff WAL archiving/streaming is
 359          * enabled AND it's a permanent relation.
 360          */
 361         use_wal = XLogIsNeeded() &&
 362                 (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
 363
 364         nblocks = smgrnblocks(src, forkNum);
 365
 366         for (blkno = 0; blkno < nblocks; blkno++)
 367         {
 368                 /* If we got a cancel signal during the copy of the data, quit */
 369                 CHECK_FOR_INTERRUPTS();
 370
 371                 smgrread(src, forkNum, blkno, buf.data);
 372
 373                 if (!PageIsVerified(page, blkno))
 374                         ereport(ERROR,
 375                                         (errcode(ERRCODE_DATA_CORRUPTED),
 376                                          errmsg("invalid page in block %u of relation %s",
 377                                                         blkno,
 378                                                         relpathbackend(src->smgr_rnode.node,
 379                                                                                    src->smgr_rnode.backend,
 380                                                                                    forkNum))));
 381
 382                 /*
 383                  * WAL-log the copied page. Unfortunately we don't know what kind of a
 384                  * page this is, so we have to log the full page including any unused
 385                  * space.
 386                  */
 387                 if (use_wal)
 388                         log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
 389
 390                 PageSetChecksumInplace(page, blkno);
 391
 392                 /*
 393                  * Now write the page.  We say skipFsync = true because there's no
 394                  * need for smgr to schedule an fsync for this write; we'll do it
 395                  * ourselves below.
 396                  */
 397                 smgrextend(dst, forkNum, blkno, buf.data, true);
 398         }
 399
 400         /*
 401          * If the rel is WAL-logged, must fsync before commit.  We use heap_sync
 402          * to ensure that the toast table gets fsync'd too.  (For a temp or
 403          * unlogged rel we don't care since the data will be gone after a crash
 404          * anyway.)
 405          *
 406          * It's obvious that we must do this when not WAL-logging the copy. It's
 407          * less obvious that we have to do it even if we did WAL-log the copied
 408          * pages. The reason is that since we're copying outside shared buffers, a
 409          * CHECKPOINT occurring during the copy has no way to flush the previously
 410          * written data to disk (indeed it won't know the new rel even exists).  A
 411          * crash later on would replay WAL from the checkpoint, therefore it
 412          * wouldn't replay our earlier WAL entries. If we do not fsync those pages
 413          * here, they might still not be on disk when the crash occurs.
 414          */
 415         if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork)
 416                 smgrimmedsync(dst, forkNum);
 417 }
 418
 419 /*
 420  *      smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
 421  *
 422  * This also runs when aborting a subxact; we want to clean up a failed
 423  * subxact immediately.
 424  *
 425  * Note: It's possible that we're being asked to remove a relation that has
 426  * no physical storage in any fork. In particular, it's possible that we're
 427  * cleaning up an old temporary relation for which RemovePgTempFiles has
 428  * already recovered the physical storage.
 429  */
 430 void
 431 smgrDoPendingDeletes(bool isCommit)
 432 {
 433         int                     nestLevel = GetCurrentTransactionNestLevel();
 434         PendingRelDelete *pending;
 435         PendingRelDelete *prev;
 436         PendingRelDelete *next;
 437         int                     nrels = 0,
 438                                 i = 0,
 439                                 maxrels = 0;
 440         SMgrRelation *srels = NULL;
 441
 442         prev = NULL;
 443         for (pending = pendingDeletes; pending != NULL; pending = next)
 444         {
 445                 next = pending->next;
 446                 if (pending->nestLevel < nestLevel)
 447                 {
 448                         /* outer-level entries should not be processed yet */
 449                         prev = pending;
 450                 }
 451                 else
 452                 {
 453                         /* unlink list entry first, so we don't retry on failure */
 454                         if (prev)
 455                                 prev->next = next;
 456                         else
 457                                 pendingDeletes = next;
 458                         /* do deletion if called for */
 459                         if (pending->atCommit == isCommit)
 460                         {
 461                                 SMgrRelation srel;
 462
 463                                 srel = smgropen(pending->relnode, pending->backend);
 464
 465                                 /* allocate the initial array, or extend it, if needed */
 466                                 if (maxrels == 0)
 467                                 {
 468                                         maxrels = 8;
 469                                         srels = palloc(sizeof(SMgrRelation) * maxrels);
 470                                 }
 471                                 else if (maxrels <= nrels)
 472                                 {
 473                                         maxrels *= 2;
 474                                         srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
 475                                 }
 476
 477                                 srels[nrels++] = srel;
 478                         }
 479                         /* must explicitly free the list entry */
 480                         pfree(pending);
 481                         /* prev does not change */
 482                 }
 483         }
 484
 485         if (nrels > 0)
 486         {
 487                 smgrdounlinkall(srels, nrels, false);
 488
 489                 for (i = 0; i < nrels; i++)
 490                         smgrclose(srels[i]);
 491
 492                 pfree(srels);
 493         }
 494 }
 495
 496 /*
 497  * smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted.
 498  *
 499  * The return value is the number of relations scheduled for termination.
 500  * *ptr is set to point to a freshly-palloc'd array of RelFileNodes.
 501  * If there are no relations to be deleted, *ptr is set to NULL.
 502  *
 503  * Only non-temporary relations are included in the returned list.  This is OK
 504  * because the list is used only in contexts where temporary relations don't
 505  * matter: we're either writing to the two-phase state file (and transactions
 506  * that have touched temp tables can't be prepared) or we're writing to xlog
 507  * (and all temporary files will be zapped if we restart anyway, so no need
 508  * for redo to do it also).
 509  *
 510  * Note that the list does not include anything scheduled for termination
 511  * by upper-level transactions.
 512  */
 513 int
 514 smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
 515 {
 516         int                     nestLevel = GetCurrentTransactionNestLevel();
 517         int                     nrels;
 518         RelFileNode *rptr;
 519         PendingRelDelete *pending;
 520
 521         nrels = 0;
 522         for (pending = pendingDeletes; pending != NULL; pending = pending->next)
 523         {
 524                 if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
 525                         && pending->backend == InvalidBackendId)
 526                         nrels++;
 527         }
 528         if (nrels == 0)
 529         {
 530                 *ptr = NULL;
 531                 return 0;
 532         }
 533         rptr = (RelFileNode *) palloc(nrels * sizeof(RelFileNode));
 534         *ptr = rptr;
 535         for (pending = pendingDeletes; pending != NULL; pending = pending->next)
 536         {
 537                 if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
 538                         && pending->backend == InvalidBackendId)
 539                 {
 540                         *rptr = pending->relnode;
 541                         rptr++;
 542                 }
 543         }
 544         return nrels;
 545 }
 546
 547 /*
 548  *      PostPrepare_smgr -- Clean up after a successful PREPARE
 549  *
 550  * What we have to do here is throw away the in-memory state about pending
 551  * relation deletes.  It's all been recorded in the 2PC state file and
 552  * it's no longer smgr's job to worry about it.
 553  */
 554 void
 555 PostPrepare_smgr(void)
 556 {
 557         PendingRelDelete *pending;
 558         PendingRelDelete *next;
 559
 560         for (pending = pendingDeletes; pending != NULL; pending = next)
 561         {
 562                 next = pending->next;
 563                 pendingDeletes = next;
 564                 /* must explicitly free the list entry */
 565                 pfree(pending);
 566         }
 567 }
 568
 569
 570 /*
 571  * AtSubCommit_smgr() --- Take care of subtransaction commit.
 572  *
 573  * Reassign all items in the pending-deletes list to the parent transaction.
 574  */
 575 void
 576 AtSubCommit_smgr(void)
 577 {
 578         int                     nestLevel = GetCurrentTransactionNestLevel();
 579         PendingRelDelete *pending;
 580
 581         for (pending = pendingDeletes; pending != NULL; pending = pending->next)
 582         {
 583                 if (pending->nestLevel >= nestLevel)
 584                         pending->nestLevel = nestLevel - 1;
 585         }
 586 }
 587
 588 /*
 589  * AtSubAbort_smgr() --- Take care of subtransaction abort.
 590  *
 591  * Delete created relations and forget about deleted relations.
 592  * We can execute these operations immediately because we know this
 593  * subtransaction will not commit.
 594  */
 595 void
 596 AtSubAbort_smgr(void)
 597 {
 598         smgrDoPendingDeletes(false);
 599 }
 600
 601 void
 602 smgr_redo(XLogReaderState *record)
 603 {
 604         XLogRecPtr      lsn = record->EndRecPtr;
 605         uint8           info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 606
 607         /* Backup blocks are not used in smgr records */
 608         Assert(!XLogRecHasAnyBlockRefs(record));
 609
 610         if (info == XLOG_SMGR_CREATE)
 611         {
 612                 xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
 613                 SMgrRelation reln;
 614
 615                 reln = smgropen(xlrec->rnode, InvalidBackendId);
 616                 smgrcreate(reln, xlrec->forkNum, true);
 617         }
 618         else if (info == XLOG_SMGR_TRUNCATE)
 619         {
 620                 xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
 621                 SMgrRelation reln;
 622                 Relation        rel;
 623                 ForkNumber      forks[MAX_FORKNUM];
 624                 BlockNumber     blocks[MAX_FORKNUM];
 625                 int             nforks = 0;
 626                 bool            need_fsm_vacuum = false;
 627
 628                 reln = smgropen(xlrec->rnode, InvalidBackendId);
 629
 630                 /*
 631                  * Forcibly create relation if it doesn't exist (which suggests that
 632                  * it was dropped somewhere later in the WAL sequence).  As in
 633                  * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
 634                  * log as best we can until the drop is seen.
 635                  */
 636                 smgrcreate(reln, MAIN_FORKNUM, true);
 637
 638                 /*
 639                  * Before we perform the truncation, update minimum recovery point to
 640                  * cover this WAL record. Once the relation is truncated, there's no
 641                  * going back. The buffer manager enforces the WAL-first rule for
 642                  * normal updates to relation files, so that the minimum recovery
 643                  * point is always updated before the corresponding change in the data
 644                  * file is flushed to disk. We have to do the same manually here.
 645                  *
 646                  * Doing this before the truncation means that if the truncation fails
 647                  * for some reason, you cannot start up the system even after restart,
 648                  * until you fix the underlying situation so that the truncation will
 649                  * succeed. Alternatively, we could update the minimum recovery point
 650                  * after truncation, but that would leave a small window where the
 651                  * WAL-first rule could be violated.
 652                  */
 653                 XLogFlush(lsn);
 654
 655                 /* Prepare for truncation of MAIN fork */
 656                 if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
 657                 {
 658                         forks[nforks] = MAIN_FORKNUM;
 659                         blocks[nforks] = xlrec->blkno;
 660                         nforks++;
 661
 662                         /* Also tell xlogutils.c about it */
 663                         XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno);
 664                 }
 665
 666                 /* Prepare for truncation of FSM and VM too */
 667                 rel = CreateFakeRelcacheEntry(xlrec->rnode);
 668
 669                 if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
 670                         smgrexists(reln, FSM_FORKNUM))
 671                 {
 672                         blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, xlrec->blkno);
 673                         if (BlockNumberIsValid(blocks[nforks]))
 674                         {
 675                                 forks[nforks] = FSM_FORKNUM;
 676                                 nforks++;
 677                                 need_fsm_vacuum = true;
 678                         }
 679                 }
 680                 if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
 681                         smgrexists(reln, VISIBILITYMAP_FORKNUM))
 682                 {
 683                         blocks[nforks] = visibilitymap_prepare_truncate(rel, xlrec->blkno);
 684                         if (BlockNumberIsValid(blocks[nforks]))
 685                         {
 686                                 forks[nforks] = VISIBILITYMAP_FORKNUM;
 687                                 nforks++;
 688                         }
 689                 }
 690
 691                 /* Do the real work to truncate relation forks */
 692                 if (nforks > 0)
 693                         smgrtruncate(reln, forks, nforks, blocks);
 694
 695                 /*
 696                  * Update upper-level FSM pages to account for the truncation.
 697                  * This is important because the just-truncated pages were likely
 698                  * marked as all-free, and would be preferentially selected.
 699                  */
 700                 if (need_fsm_vacuum)
 701                         FreeSpaceMapVacuumRange(rel, xlrec->blkno,
 702                                                                         InvalidBlockNumber);
 703
 704                 FreeFakeRelcacheEntry(rel);
 705         }
 706         else
 707                 elog(PANIC, "smgr_redo: unknown op code %u", info);
 708 }