automatically truncated after a crash or unclean shutdown. The contents
of an unlogged table are also not replicated to standby servers.
Any indexes created on an unlogged table are automatically unlogged as
- well; however, unlogged <link linkend="GiST">GiST indexes</link> are
- currently not supported and cannot be created on an unlogged table.
+ well.
</para>
</listitem>
</varlistentry>
#include "access/genam.h"
#include "access/gist_private.h"
+#include "access/heapam_xlog.h"
#include "catalog/index.h"
#include "catalog/pg_collation.h"
#include "miscadmin.h"
Datum
gistbuildempty(PG_FUNCTION_ARGS)
{
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("unlogged GiST indexes are not supported")));
+ Relation index = (Relation) PG_GETARG_POINTER(0);
+ Buffer buffer;
+
+ /* Initialize the root page */
+ buffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /* Initialize and xlog buffer */
+ START_CRIT_SECTION();
+ GISTInitBuffer(buffer, F_LEAF);
+ MarkBufferDirty(buffer);
+ log_newpage_buffer(buffer);
+ END_CRIT_SECTION();
+
+ /* Unlock and release the buffer */
+ UnlockReleaseBuffer(buffer);
PG_RETURN_VOID();
}
dist, oldrlink, oldnsn, leftchildbuf,
markfollowright);
else
- recptr = GetXLogRecPtrForTemp();
+ recptr = gistGetFakeLSN(rel);
for (ptr = dist; ptr; ptr = ptr->next)
{
}
else
{
- recptr = GetXLogRecPtrForTemp();
+ recptr = gistGetFakeLSN(rel);
PageSetLSN(page, recptr);
}
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
- /*
- * We can't yet handle unlogged GiST indexes, because we depend on LSNs.
- * This is duplicative of an error in gistbuildempty, but we want to check
- * here so as to throw error before doing all the index-build work.
- */
- if (heap->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("unlogged GiST indexes are not supported")));
-
/* no locking is needed */
buildstate.giststate = initGISTstate(index);
PageSetTLI(page, ThisTimeLineID);
}
else
- PageSetLSN(page, GetXLogRecPtrForTemp());
+ PageSetLSN(page, gistGetFakeLSN(heap));
UnlockReleaseBuffer(buffer);
}
/*
- * Temporary GiST indexes are not WAL-logged, but we need LSNs to detect
- * concurrent page splits anyway. GetXLogRecPtrForTemp() provides a fake
- * sequence of LSNs for that purpose. Each call generates an LSN that is
- * greater than any previous value returned by this function in the same
- * session.
+ * Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
+ * to detect concurrent page splits anyway. This function provides a fake
+ * sequence of LSNs for that purpose.
*/
XLogRecPtr
-GetXLogRecPtrForTemp(void)
+gistGetFakeLSN(Relation rel)
{
static XLogRecPtr counter = 1;
- counter++;
- return counter;
+
+ if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+ {
+ /*
+ * Temporary relations are only accessible in our session, so a
+ * simple backend-local counter will do.
+ */
+ return counter++;
+ }
+ else
+ {
+ /*
+ * Unlogged relations are accessible from other backends, and survive
+ * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
+ */
+ Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
+ return GetFakeLSNForUnloggedRel();
+ }
}
PageSetTLI(page, ThisTimeLineID);
}
else
- PageSetLSN(page, GetXLogRecPtrForTemp());
+ PageSetLSN(page, gistGetFakeLSN(rel));
END_CRIT_SECTION();
}
XLogRecPtr asyncXactLSN; /* LSN of newest async commit/abort */
XLogSegNo lastRemovedSegNo; /* latest removed/recycled XLOG segment */
+ /* Fake LSN counter, for unlogged relations. Protected by ulsn_lck */
+ XLogRecPtr unloggedLSN;
+ slock_t ulsn_lck;
+
/* Protected by WALWriteLock: */
XLogCtlWrite Write;
return ControlFile->system_identifier;
}
+/*
+ * Returns a fake LSN for unlogged relations.
+ *
+ * Each call generates an LSN that is greater than any previous value
+ * returned. The current counter value is saved and restored across clean
+ * shutdowns, but like unlogged relations, does not survive a crash. This can
+ * be used in lieu of real LSN values returned by XLogInsert, if you need an
+ * LSN-like increasing sequence of numbers without writing any WAL.
+ */
+XLogRecPtr
+GetFakeLSNForUnloggedRel(void)
+{
+ XLogRecPtr nextUnloggedLSN;
+
+ /* use volatile pointer to prevent code rearrangement */
+ volatile XLogCtlData *xlogctl = XLogCtl;
+
+ /* increment the unloggedLSN counter, need SpinLock */
+ SpinLockAcquire(&xlogctl->ulsn_lck);
+ nextUnloggedLSN = xlogctl->unloggedLSN++;
+ SpinLockRelease(&xlogctl->ulsn_lck);
+
+ return nextUnloggedLSN;
+}
+
/*
* Auto-tune the number of XLOG buffers.
*
XLogCtl->WalWriterSleeping = false;
XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
SpinLockInit(&XLogCtl->info_lck);
+ SpinLockInit(&XLogCtl->ulsn_lck);
InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
/*
ControlFile->time = checkPoint.time;
ControlFile->checkPoint = checkPoint.redo;
ControlFile->checkPointCopy = checkPoint;
+ ControlFile->unloggedLSN = 1;
/* Set important parameter values for use when replaying WAL */
ControlFile->MaxConnections = MaxConnections;
XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
XLogCtl->ckptXid = checkPoint.nextXid;
+ /*
+ * Initialize unlogged LSN. On a clean shutdown, it's restored from the
+ * control file. On recovery, all unlogged relations are blown away, so
+ * the unlogged LSN counter can be reset too.
+ */
+ if (ControlFile->state == DB_SHUTDOWNED)
+ XLogCtl->unloggedLSN = ControlFile->unloggedLSN;
+ else
+ XLogCtl->unloggedLSN = 1;
+
/*
* We must replay WAL entries using the same TimeLineID they were created
* under, so temporarily adopt the TLI indicated by the checkpoint (see
/* crash recovery should always recover to the end of WAL */
ControlFile->minRecoveryPoint = InvalidXLogRecPtr;
ControlFile->minRecoveryPointTLI = 0;
+
+ /*
+ * Persist unloggedLSN value. It's reset on crash recovery, so this goes
+ * unused on non-shutdown checkpoints, but seems useful to store it always
+ * for debugging purposes.
+ */
+ SpinLockAcquire(&XLogCtl->ulsn_lck);
+ ControlFile->unloggedLSN = XLogCtl->unloggedLSN;
+ SpinLockRelease(&XLogCtl->ulsn_lck);
+
UpdateControlFile();
LWLockRelease(ControlFileLock);
* Force XLOG flush up to buffer's LSN. This implements the basic WAL
* rule that log updates must hit disk before any of the data-file changes
* they describe do.
+ *
+ * However, this rule does not apply to unlogged relations, which will be
+ * lost after a crash anyway. Most unlogged relation pages do not bear
+ * LSNs since we never emit WAL records for them, and therefore flushing
+ * up through the buffer LSN would be useless, but harmless. However, GiST
+ * indexes use LSNs internally to track page-splits, and therefore unlogged
+ * GiST pages bear "fake" LSNs generated by GetFakeLSNForUnloggedRel. It
+ * is unlikely but possible that the fake LSN counter could advance past
+ * the WAL insertion point; and if it did happen, attempting to flush WAL
+ * through that location would fail, with disastrous system-wide
+ * consequences. To make sure that can't happen, skip the flush if the
+ * buffer isn't permanent.
*/
- recptr = BufferGetLSN(buf);
- XLogFlush(recptr);
+ if (buf->flags & BM_PERMANENT)
+ {
+ recptr = BufferGetLSN(buf);
+ XLogFlush(recptr);
+ }
/*
* Now it's safe to write buffer to disk. Note that no one else should
ControlFile.checkPointCopy.oldestMultiDB);
printf(_("Time of latest checkpoint: %s\n"),
ckpttime_str);
+ printf(_("Fake LSN counter for unlogged rels: %X/%X\n"),
+ (uint32) (ControlFile.unloggedLSN >> 32),
+ (uint32) ControlFile.unloggedLSN);
printf(_("Min recovery ending location: %X/%X\n"),
(uint32) (ControlFile.minRecoveryPoint >> 32),
(uint32) ControlFile.minRecoveryPoint);
ControlFile.state = DB_SHUTDOWNED;
ControlFile.time = (pg_time_t) time(NULL);
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
+ ControlFile.unloggedLSN = 1;
/* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */
GISTENTRY *entry2, bool isnull2,
Datum *dst, bool *dstisnull);
-extern XLogRecPtr GetXLogRecPtrForTemp(void);
+extern XLogRecPtr gistGetFakeLSN(Relation rel);
/* gistvacuum.c */
extern Datum gistbulkdelete(PG_FUNCTION_ARGS);
extern void UpdateControlFile(void);
extern uint64 GetSystemIdentifier(void);
+extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
extern Size XLOGShmemSize(void);
extern void XLOGShmemInit(void);
extern void BootStrapXLOG(void);
/* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION 934
+#define PG_CONTROL_VERSION 935
/*
* Body of CheckPoint XLOG records. This is declared here because we keep
CheckPoint checkPointCopy; /* copy of last check point record */
+ XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */
+
/*
* These two values determine the minimum point we must recover up to
* before starting up: