From 5f9a86b353fa463534746809890756677270dad2 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sat, 12 Dec 2015 14:19:29 +0100 Subject: [PATCH] Fix ALTER TABLE ... SET TABLESPACE for unlogged relations. Changing the tablespace of an unlogged relation did not WAL log the creation and content of the init fork. Thus, after a standby is promoted, unlogged relation cannot be accessed anymore, with errors like: ERROR: 58P01: could not open file "pg_tblspc/...": No such file or directory Additionally the init fork was not synced to disk, independent of the configured wal_level, a relatively small durability risk. Investigation of that problem also brought to light that, even for permanent relations, the creation of !main forks was not WAL logged, i.e. no XLOG_SMGR_CREATE record were emitted. That mostly turns out not to be a problem, because these files were created when the actual relation data is copied; nonexistent files are not treated as an error condition during replay. But that doesn't work for empty files, and generally feels a bit haphazard. Luckily, outside init and main forks, empty forks don't occur often or are not a problem. Add the required WAL logging and syncing to disk. Reported-By: Michael Paquier Author: Michael Paquier and Andres Freund Discussion: 20151210163230.GA11331@alap3.anarazel.de Backpatch: 9.1, where unlogged relations were introduced --- src/backend/commands/tablecmds.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 4aea25cd53..eac7e5d71b 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8049,6 +8049,15 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) if (smgrexists(rel->rd_smgr, forkNum)) { smgrcreate(dstrel, forkNum, false); + + /* + * WAL log creation if the relation is persistent, or this is the + * init fork of an unlogged relation. + */ + if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT || + (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED && + forkNum == INIT_FORKNUM)) + log_smgrcreate(&newrnode, forkNum); copy_relation_data(rel->rd_smgr, dstrel, forkNum, rel->rd_rel->relpersistence); } @@ -8090,6 +8099,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, char *buf; Page page; bool use_wal; + bool copying_initfork; BlockNumber nblocks; BlockNumber blkno; @@ -8102,11 +8112,20 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, buf = (char *) palloc(BLCKSZ); page = (Page) buf; + /* + * The init fork for an unlogged relation in many respects has to be + * treated the same as normal relation, changes need to be WAL logged and + * it needs to be synced to disk. + */ + copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED && + forkNum == INIT_FORKNUM; + /* * We need to log the copied data in WAL iff WAL archiving/streaming is * enabled AND it's a permanent relation. */ - use_wal = XLogIsNeeded() && relpersistence == RELPERSISTENCE_PERMANENT; + use_wal = XLogIsNeeded() && + (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork); nblocks = smgrnblocks(src, forkNum); @@ -8145,7 +8164,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, * wouldn't replay our earlier WAL entries. If we do not fsync those pages * here, they might still not be on disk when the crash occurs. */ - if (relpersistence == RELPERSISTENCE_PERMANENT) + if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork) smgrimmedsync(dst, forkNum); } -- 2.40.0