1 /*-------------------------------------------------------------------------
3 * PostgreSQL snapshot manager
5 * We keep track of snapshots in two ways: those "registered" by resowner.c,
6 * and the "active snapshot" stack. All snapshots in either of them live in
7 * persistent memory. When a snapshot is no longer in any of these lists
8 * (tracked by separate refcounts on each snapshot), its memory can be freed.
10 * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
11 * regd_count and count it in RegisteredSnapshots, but this reference is not
12 * tracked by a resource owner. We used to use the TopTransactionResourceOwner
13 * to track this snapshot reference, but that introduces logical circularity
14 * and thus makes it impossible to clean up in a sane fashion. It's better to
15 * handle this reference as an internally-tracked registration, so that this
16 * module is entirely lower-level than ResourceOwners.
18 * Likewise, any snapshots that have been exported by pg_export_snapshot
19 * have regd_count = 1 and are counted in RegisteredSnapshots, but are not
20 * tracked by any resource owner.
22 * These arrangements let us reset MyPgXact->xmin when there are no snapshots
23 * referenced by this transaction. (One possible improvement would be to be
24 * able to advance Xmin when the snapshot with the earliest Xmin is no longer
25 * referenced. That's a bit harder though, it requires more locking, and
26 * anyway it should be rather uncommon to keep temporary snapshots referenced
30 * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
31 * Portions Copyright (c) 1994, Regents of the University of California
34 * src/backend/utils/time/snapmgr.c
36 *-------------------------------------------------------------------------
43 #include "access/transam.h"
44 #include "access/xact.h"
45 #include "miscadmin.h"
46 #include "storage/predicate.h"
47 #include "storage/proc.h"
48 #include "storage/procarray.h"
49 #include "storage/sinval.h"
50 #include "utils/builtins.h"
51 #include "utils/memutils.h"
52 #include "utils/resowner_private.h"
53 #include "utils/snapmgr.h"
54 #include "utils/syscache.h"
55 #include "utils/tqual.h"
59 * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
60 * mode, and to the latest one taken in a read-committed transaction.
61 * SecondarySnapshot is a snapshot that's always up-to-date as of the current
62 * instant, even in transaction-snapshot mode. It should only be used for
63 * special-purpose code (say, RI checking.) CatalogSnapshot points to an
64 * MVCC snapshot intended to be used for catalog scans; we must refresh it
65 * whenever a system catalog change occurs.
67 * These SnapshotData structs are static to simplify memory allocation
68 * (see the hack in GetSnapshotData to avoid repeated malloc/free).
70 static SnapshotData CurrentSnapshotData = {HeapTupleSatisfiesMVCC};
71 static SnapshotData SecondarySnapshotData = {HeapTupleSatisfiesMVCC};
72 static SnapshotData CatalogSnapshotData = {HeapTupleSatisfiesMVCC};
74 /* Pointers to valid snapshots */
75 static Snapshot CurrentSnapshot = NULL;
76 static Snapshot SecondarySnapshot = NULL;
77 static Snapshot CatalogSnapshot = NULL;
80 * Staleness detection for CatalogSnapshot.
82 static bool CatalogSnapshotStale = true;
85 * These are updated by GetSnapshotData. We initialize them this way
86 * for the convenience of TransactionIdIsInProgress: even in bootstrap
87 * mode, we don't want it to say that BootstrapTransactionId is in progress.
89 * RecentGlobalXmin is initialized to InvalidTransactionId, to ensure that no
90 * one tries to use a stale value. Readers should ensure that it has been set
91 * to something else before using it.
93 TransactionId TransactionXmin = FirstNormalTransactionId;
94 TransactionId RecentXmin = FirstNormalTransactionId;
95 TransactionId RecentGlobalXmin = InvalidTransactionId;
98 * Elements of the active snapshot stack.
100 * Each element here accounts for exactly one active_count on SnapshotData.
102 * NB: the code assumes that elements in this list are in non-increasing
103 * order of as_level; also, the list must be NULL-terminated.
105 typedef struct ActiveSnapshotElt
109 struct ActiveSnapshotElt *as_next;
112 /* Top of the stack of active snapshots */
113 static ActiveSnapshotElt *ActiveSnapshot = NULL;
116 * How many snapshots is resowner.c tracking for us?
118 * Note: for now, a simple counter is enough. However, if we ever want to be
119 * smarter about advancing our MyPgXact->xmin we will need to be more
120 * sophisticated about this, perhaps keeping our own list of snapshots.
122 static int RegisteredSnapshots = 0;
124 /* first GetTransactionSnapshot call in a transaction? */
125 bool FirstSnapshotSet = false;
128 * Remember the serializable transaction snapshot, if any. We cannot trust
129 * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
130 * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
132 static Snapshot FirstXactSnapshot = NULL;
134 /* Define pathname of exported-snapshot files */
135 #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
136 #define XactExportFilePath(path, xid, num, suffix) \
137 snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%d%s", \
140 /* Current xact's exported snapshots (a list of Snapshot structs) */
141 static List *exportedSnapshots = NIL;
144 static Snapshot CopySnapshot(Snapshot snapshot);
145 static void FreeSnapshot(Snapshot snapshot);
146 static void SnapshotResetXmin(void);
150 * GetTransactionSnapshot
151 * Get the appropriate snapshot for a new query in a transaction.
153 * Note that the return value may point at static storage that will be modified
154 * by future calls and by CommandCounterIncrement(). Callers should call
155 * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
159 GetTransactionSnapshot(void)
161 /* First call in transaction? */
162 if (!FirstSnapshotSet)
164 Assert(RegisteredSnapshots == 0);
165 Assert(FirstXactSnapshot == NULL);
168 * In transaction-snapshot mode, the first snapshot must live until
169 * end of xact regardless of what the caller does with it, so we must
170 * make a copy of it rather than returning CurrentSnapshotData
171 * directly. Furthermore, if we're running in serializable mode,
172 * predicate.c needs to wrap the snapshot fetch in its own processing.
174 if (IsolationUsesXactSnapshot())
176 /* First, create the snapshot in CurrentSnapshotData */
177 if (IsolationIsSerializable())
178 CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData);
180 CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
181 /* Make a saved copy */
182 CurrentSnapshot = CopySnapshot(CurrentSnapshot);
183 FirstXactSnapshot = CurrentSnapshot;
184 /* Mark it as "registered" in FirstXactSnapshot */
185 FirstXactSnapshot->regd_count++;
186 RegisteredSnapshots++;
189 CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
191 /* Don't allow catalog snapshot to be older than xact snapshot. */
192 CatalogSnapshotStale = true;
194 FirstSnapshotSet = true;
195 return CurrentSnapshot;
198 if (IsolationUsesXactSnapshot())
199 return CurrentSnapshot;
201 /* Don't allow catalog snapshot to be older than xact snapshot. */
202 CatalogSnapshotStale = true;
204 CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
206 return CurrentSnapshot;
211 * Get a snapshot that is up-to-date as of the current instant,
212 * even if we are executing in transaction-snapshot mode.
215 GetLatestSnapshot(void)
217 /* If first call in transaction, go ahead and set the xact snapshot */
218 if (!FirstSnapshotSet)
219 return GetTransactionSnapshot();
221 SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
223 return SecondarySnapshot;
228 * Get a snapshot that is sufficiently up-to-date for scan of the
229 * system catalog with the specified OID.
232 GetCatalogSnapshot(Oid relid)
235 * If the caller is trying to scan a relation that has no syscache,
236 * no catcache invalidations will be sent when it is updated. For a
237 * a few key relations, snapshot invalidations are sent instead. If
238 * we're trying to scan a relation for which neither catcache nor
239 * snapshot invalidations are sent, we must refresh the snapshot every
242 if (!CatalogSnapshotStale && !RelationInvalidatesSnapshotsOnly(relid) &&
243 !RelationHasSysCache(relid))
244 CatalogSnapshotStale = true;
246 if (CatalogSnapshotStale)
248 /* Get new snapshot. */
249 CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData);
252 * Mark new snapshost as valid. We must do this last, in case an
253 * ERROR occurs inside GetSnapshotData().
255 CatalogSnapshotStale = false;
258 return CatalogSnapshot;
262 * Mark the current catalog snapshot as invalid. We could change this API
263 * to allow the caller to provide more fine-grained invalidation details, so
264 * that a change to relation A wouldn't prevent us from using our cached
265 * snapshot to scan relation B, but so far there's no evidence that the CPU
266 * cycles we spent tracking such fine details would be well-spent.
269 InvalidateCatalogSnapshot()
271 CatalogSnapshotStale = true;
275 * SnapshotSetCommandId
276 * Propagate CommandCounterIncrement into the static snapshots, if set
279 SnapshotSetCommandId(CommandId curcid)
281 if (!FirstSnapshotSet)
285 CurrentSnapshot->curcid = curcid;
286 if (SecondarySnapshot)
287 SecondarySnapshot->curcid = curcid;
291 * SetTransactionSnapshot
292 * Set the transaction's snapshot from an imported MVCC snapshot.
294 * Note that this is very closely tied to GetTransactionSnapshot --- it
295 * must take care of all the same considerations as the first-snapshot case
296 * in GetTransactionSnapshot.
299 SetTransactionSnapshot(Snapshot sourcesnap, TransactionId sourcexid)
301 /* Caller should have checked this already */
302 Assert(!FirstSnapshotSet);
304 Assert(RegisteredSnapshots == 0);
305 Assert(FirstXactSnapshot == NULL);
308 * Even though we are not going to use the snapshot it computes, we must
309 * call GetSnapshotData, for two reasons: (1) to be sure that
310 * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
311 * RecentXmin and RecentGlobalXmin. (We could alternatively include those
312 * two variables in exported snapshot files, but it seems better to have
313 * snapshot importers compute reasonably up-to-date values for them.)
315 CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
318 * Now copy appropriate fields from the source snapshot.
320 CurrentSnapshot->xmin = sourcesnap->xmin;
321 CurrentSnapshot->xmax = sourcesnap->xmax;
322 CurrentSnapshot->xcnt = sourcesnap->xcnt;
323 Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
324 memcpy(CurrentSnapshot->xip, sourcesnap->xip,
325 sourcesnap->xcnt * sizeof(TransactionId));
326 CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
327 Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
328 memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
329 sourcesnap->subxcnt * sizeof(TransactionId));
330 CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
331 CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
332 /* NB: curcid should NOT be copied, it's a local matter */
335 * Now we have to fix what GetSnapshotData did with MyPgXact->xmin and
336 * TransactionXmin. There is a race condition: to make sure we are not
337 * causing the global xmin to go backwards, we have to test that the
338 * source transaction is still running, and that has to be done
339 * atomically. So let procarray.c do it.
341 * Note: in serializable mode, predicate.c will do this a second time. It
342 * doesn't seem worth contorting the logic here to avoid two calls,
343 * especially since it's not clear that predicate.c *must* do this.
345 if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcexid))
347 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
348 errmsg("could not import the requested snapshot"),
349 errdetail("The source transaction %u is not running anymore.",
353 * In transaction-snapshot mode, the first snapshot must live until end of
354 * xact, so we must make a copy of it. Furthermore, if we're running in
355 * serializable mode, predicate.c needs to do its own processing.
357 if (IsolationUsesXactSnapshot())
359 if (IsolationIsSerializable())
360 SetSerializableTransactionSnapshot(CurrentSnapshot, sourcexid);
361 /* Make a saved copy */
362 CurrentSnapshot = CopySnapshot(CurrentSnapshot);
363 FirstXactSnapshot = CurrentSnapshot;
364 /* Mark it as "registered" in FirstXactSnapshot */
365 FirstXactSnapshot->regd_count++;
366 RegisteredSnapshots++;
369 FirstSnapshotSet = true;
374 * Copy the given snapshot.
376 * The copy is palloc'd in TopTransactionContext and has initial refcounts set
377 * to 0. The returned snapshot has the copied flag set.
380 CopySnapshot(Snapshot snapshot)
386 Assert(snapshot != InvalidSnapshot);
388 /* We allocate any XID arrays needed in the same palloc block. */
389 size = subxipoff = sizeof(SnapshotData) +
390 snapshot->xcnt * sizeof(TransactionId);
391 if (snapshot->subxcnt > 0)
392 size += snapshot->subxcnt * sizeof(TransactionId);
394 newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
395 memcpy(newsnap, snapshot, sizeof(SnapshotData));
397 newsnap->regd_count = 0;
398 newsnap->active_count = 0;
399 newsnap->copied = true;
401 /* setup XID array */
402 if (snapshot->xcnt > 0)
404 newsnap->xip = (TransactionId *) (newsnap + 1);
405 memcpy(newsnap->xip, snapshot->xip,
406 snapshot->xcnt * sizeof(TransactionId));
412 * Setup subXID array. Don't bother to copy it if it had overflowed,
413 * though, because it's not used anywhere in that case. Except if it's a
414 * snapshot taken during recovery; all the top-level XIDs are in subxip as
415 * well in that case, so we mustn't lose them.
417 if (snapshot->subxcnt > 0 &&
418 (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
420 newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
421 memcpy(newsnap->subxip, snapshot->subxip,
422 snapshot->subxcnt * sizeof(TransactionId));
425 newsnap->subxip = NULL;
432 * Free the memory associated with a snapshot.
435 FreeSnapshot(Snapshot snapshot)
437 Assert(snapshot->regd_count == 0);
438 Assert(snapshot->active_count == 0);
439 Assert(snapshot->copied);
446 * Set the given snapshot as the current active snapshot
448 * If the passed snapshot is a statically-allocated one, or it is possibly
449 * subject to a future command counter update, create a new long-lived copy
450 * with active refcount=1. Otherwise, only increment the refcount.
453 PushActiveSnapshot(Snapshot snap)
455 ActiveSnapshotElt *newactive;
457 Assert(snap != InvalidSnapshot);
459 newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
462 * Checking SecondarySnapshot is probably useless here, but it seems
465 if (snap == CurrentSnapshot || snap == SecondarySnapshot || !snap->copied)
466 newactive->as_snap = CopySnapshot(snap);
468 newactive->as_snap = snap;
470 newactive->as_next = ActiveSnapshot;
471 newactive->as_level = GetCurrentTransactionNestLevel();
473 newactive->as_snap->active_count++;
475 ActiveSnapshot = newactive;
480 * As above, except forcibly copy the presented snapshot.
482 * This should be used when the ActiveSnapshot has to be modifiable, for
483 * example if the caller intends to call UpdateActiveSnapshotCommandId.
484 * The new snapshot will be released when popped from the stack.
487 PushCopiedSnapshot(Snapshot snapshot)
489 PushActiveSnapshot(CopySnapshot(snapshot));
493 * UpdateActiveSnapshotCommandId
495 * Update the current CID of the active snapshot. This can only be applied
496 * to a snapshot that is not referenced elsewhere.
499 UpdateActiveSnapshotCommandId(void)
501 Assert(ActiveSnapshot != NULL);
502 Assert(ActiveSnapshot->as_snap->active_count == 1);
503 Assert(ActiveSnapshot->as_snap->regd_count == 0);
505 ActiveSnapshot->as_snap->curcid = GetCurrentCommandId(false);
511 * Remove the topmost snapshot from the active snapshot stack, decrementing the
512 * reference count, and free it if this was the last reference.
515 PopActiveSnapshot(void)
517 ActiveSnapshotElt *newstack;
519 newstack = ActiveSnapshot->as_next;
521 Assert(ActiveSnapshot->as_snap->active_count > 0);
523 ActiveSnapshot->as_snap->active_count--;
525 if (ActiveSnapshot->as_snap->active_count == 0 &&
526 ActiveSnapshot->as_snap->regd_count == 0)
527 FreeSnapshot(ActiveSnapshot->as_snap);
529 pfree(ActiveSnapshot);
530 ActiveSnapshot = newstack;
537 * Return the topmost snapshot in the Active stack.
540 GetActiveSnapshot(void)
542 Assert(ActiveSnapshot != NULL);
544 return ActiveSnapshot->as_snap;
549 * Return whether there is at least one snapshot in the Active stack
552 ActiveSnapshotSet(void)
554 return ActiveSnapshot != NULL;
559 * Register a snapshot as being in use by the current resource owner
561 * If InvalidSnapshot is passed, it is not registered.
564 RegisterSnapshot(Snapshot snapshot)
566 if (snapshot == InvalidSnapshot)
567 return InvalidSnapshot;
569 return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
573 * RegisterSnapshotOnOwner
574 * As above, but use the specified resource owner
577 RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
581 if (snapshot == InvalidSnapshot)
582 return InvalidSnapshot;
584 /* Static snapshot? Create a persistent copy */
585 snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
587 /* and tell resowner.c about it */
588 ResourceOwnerEnlargeSnapshots(owner);
590 ResourceOwnerRememberSnapshot(owner, snap);
592 RegisteredSnapshots++;
600 * Decrement the reference count of a snapshot, remove the corresponding
601 * reference from CurrentResourceOwner, and free the snapshot if no more
605 UnregisterSnapshot(Snapshot snapshot)
607 if (snapshot == NULL)
610 UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
614 * UnregisterSnapshotFromOwner
615 * As above, but use the specified resource owner
618 UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
620 if (snapshot == NULL)
623 Assert(snapshot->regd_count > 0);
624 Assert(RegisteredSnapshots > 0);
626 ResourceOwnerForgetSnapshot(owner, snapshot);
627 RegisteredSnapshots--;
628 if (--snapshot->regd_count == 0 && snapshot->active_count == 0)
630 FreeSnapshot(snapshot);
638 * If there are no more snapshots, we can reset our PGXACT->xmin to InvalidXid.
639 * Note we can do this without locking because we assume that storing an Xid
643 SnapshotResetXmin(void)
645 if (RegisteredSnapshots == 0 && ActiveSnapshot == NULL)
646 MyPgXact->xmin = InvalidTransactionId;
650 * AtSubCommit_Snapshot
653 AtSubCommit_Snapshot(int level)
655 ActiveSnapshotElt *active;
658 * Relabel the active snapshots set in this subtransaction as though they
659 * are owned by the parent subxact.
661 for (active = ActiveSnapshot; active != NULL; active = active->as_next)
663 if (active->as_level < level)
665 active->as_level = level - 1;
670 * AtSubAbort_Snapshot
671 * Clean up snapshots after a subtransaction abort
674 AtSubAbort_Snapshot(int level)
676 /* Forget the active snapshots set by this subtransaction */
677 while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
679 ActiveSnapshotElt *next;
681 next = ActiveSnapshot->as_next;
684 * Decrement the snapshot's active count. If it's still registered or
685 * marked as active by an outer subtransaction, we can't free it yet.
687 Assert(ActiveSnapshot->as_snap->active_count >= 1);
688 ActiveSnapshot->as_snap->active_count -= 1;
690 if (ActiveSnapshot->as_snap->active_count == 0 &&
691 ActiveSnapshot->as_snap->regd_count == 0)
692 FreeSnapshot(ActiveSnapshot->as_snap);
694 /* and free the stack element */
695 pfree(ActiveSnapshot);
697 ActiveSnapshot = next;
705 * Snapshot manager's cleanup function for end of transaction
708 AtEOXact_Snapshot(bool isCommit)
711 * In transaction-snapshot mode we must release our privately-managed
712 * reference to the transaction snapshot. We must decrement
713 * RegisteredSnapshots to keep the check below happy. But we don't bother
714 * to do FreeSnapshot, for two reasons: the memory will go away with
715 * TopTransactionContext anyway, and if someone has left the snapshot
716 * stacked as active, we don't want the code below to be chasing through a
719 if (FirstXactSnapshot != NULL)
721 Assert(FirstXactSnapshot->regd_count > 0);
722 Assert(RegisteredSnapshots > 0);
723 RegisteredSnapshots--;
725 FirstXactSnapshot = NULL;
728 * If we exported any snapshots, clean them up.
730 if (exportedSnapshots != NIL)
732 TransactionId myxid = GetTopTransactionId();
737 * Get rid of the files. Unlink failure is only a WARNING because (1)
738 * it's too late to abort the transaction, and (2) leaving a leaked
739 * file around has little real consequence anyway.
741 for (i = 1; i <= list_length(exportedSnapshots); i++)
743 XactExportFilePath(buf, myxid, i, "");
745 elog(WARNING, "could not unlink file \"%s\": %m", buf);
749 * As with the FirstXactSnapshot, we needn't spend any effort on
750 * cleaning up the per-snapshot data structures, but we do need to
751 * adjust the RegisteredSnapshots count to prevent a warning below.
753 * Note: you might be thinking "why do we have the exportedSnapshots
754 * list at all? All we need is a counter!". You're right, but we do
755 * it this way in case we ever feel like improving xmin management.
757 Assert(RegisteredSnapshots >= list_length(exportedSnapshots));
758 RegisteredSnapshots -= list_length(exportedSnapshots);
760 exportedSnapshots = NIL;
763 /* On commit, complain about leftover snapshots */
766 ActiveSnapshotElt *active;
768 if (RegisteredSnapshots != 0)
769 elog(WARNING, "%d registered snapshots seem to remain after cleanup",
770 RegisteredSnapshots);
772 /* complain about unpopped active snapshots */
773 for (active = ActiveSnapshot; active != NULL; active = active->as_next)
774 elog(WARNING, "snapshot %p still active", active);
778 * And reset our state. We don't need to free the memory explicitly --
779 * it'll go away with TopTransactionContext.
781 ActiveSnapshot = NULL;
782 RegisteredSnapshots = 0;
784 CurrentSnapshot = NULL;
785 SecondarySnapshot = NULL;
787 FirstSnapshotSet = false;
795 * Export the snapshot to a file so that other backends can import it.
796 * Returns the token (the file name) that can be used to import this
800 ExportSnapshot(Snapshot snapshot)
802 TransactionId topXid;
803 TransactionId *children;
809 MemoryContext oldcxt;
810 char path[MAXPGPATH];
811 char pathtmp[MAXPGPATH];
814 * It's tempting to call RequireTransactionChain here, since it's not very
815 * useful to export a snapshot that will disappear immediately afterwards.
816 * However, we haven't got enough information to do that, since we don't
817 * know if we're at top level or not. For example, we could be inside a
818 * plpgsql function that is going to fire off other transactions via
819 * dblink. Rather than disallow perfectly legitimate usages, don't make a
822 * Also note that we don't make any restriction on the transaction's
823 * isolation level; however, importers must check the level if they are
828 * This will assign a transaction ID if we do not yet have one.
830 topXid = GetTopTransactionId();
833 * We cannot export a snapshot from a subtransaction because there's no
834 * easy way for importers to verify that the same subtransaction is still
837 if (IsSubTransaction())
839 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
840 errmsg("cannot export a snapshot from a subtransaction")));
843 * We do however allow previous committed subtransactions to exist.
844 * Importers of the snapshot must see them as still running, so get their
845 * XIDs to add them to the snapshot.
847 nchildren = xactGetCommittedChildren(&children);
850 * Copy the snapshot into TopTransactionContext, add it to the
851 * exportedSnapshots list, and mark it pseudo-registered. We do this to
852 * ensure that the snapshot's xmin is honored for the rest of the
853 * transaction. (Right now, because SnapshotResetXmin is so stupid, this
854 * is overkill; but later we might make that routine smarter.)
856 snapshot = CopySnapshot(snapshot);
858 oldcxt = MemoryContextSwitchTo(TopTransactionContext);
859 exportedSnapshots = lappend(exportedSnapshots, snapshot);
860 MemoryContextSwitchTo(oldcxt);
862 snapshot->regd_count++;
863 RegisteredSnapshots++;
866 * Fill buf with a text serialization of the snapshot, plus identification
867 * data about this transaction. The format expected by ImportSnapshot is
868 * pretty rigid: each line must be fieldname:value.
870 initStringInfo(&buf);
872 appendStringInfo(&buf, "xid:%u\n", topXid);
873 appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
874 appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
875 appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
877 appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
878 appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
881 * We must include our own top transaction ID in the top-xid data, since
882 * by definition we will still be running when the importing transaction
883 * adopts the snapshot, but GetSnapshotData never includes our own XID in
884 * the snapshot. (There must, therefore, be enough room to add it.)
886 * However, it could be that our topXid is after the xmax, in which case
887 * we shouldn't include it because xip[] members are expected to be before
888 * xmax. (We need not make the same check for subxip[] members, see
891 addTopXid = TransactionIdPrecedes(topXid, snapshot->xmax) ? 1 : 0;
892 appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
893 for (i = 0; i < snapshot->xcnt; i++)
894 appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
896 appendStringInfo(&buf, "xip:%u\n", topXid);
899 * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
900 * we have to cope with possible overflow.
902 if (snapshot->suboverflowed ||
903 snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
904 appendStringInfoString(&buf, "sof:1\n");
907 appendStringInfoString(&buf, "sof:0\n");
908 appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
909 for (i = 0; i < snapshot->subxcnt; i++)
910 appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
911 for (i = 0; i < nchildren; i++)
912 appendStringInfo(&buf, "sxp:%u\n", children[i]);
914 appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
917 * Now write the text representation into a file. We first write to a
918 * ".tmp" filename, and rename to final filename if no error. This
919 * ensures that no other backend can read an incomplete file
920 * (ImportSnapshot won't allow it because of its valid-characters check).
922 XactExportFilePath(pathtmp, topXid, list_length(exportedSnapshots), ".tmp");
923 if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
925 (errcode_for_file_access(),
926 errmsg("could not create file \"%s\": %m", pathtmp)));
928 if (fwrite(buf.data, buf.len, 1, f) != 1)
930 (errcode_for_file_access(),
931 errmsg("could not write to file \"%s\": %m", pathtmp)));
933 /* no fsync() since file need not survive a system crash */
937 (errcode_for_file_access(),
938 errmsg("could not write to file \"%s\": %m", pathtmp)));
941 * Now that we have written everything into a .tmp file, rename the file
942 * to remove the .tmp suffix.
944 XactExportFilePath(path, topXid, list_length(exportedSnapshots), "");
946 if (rename(pathtmp, path) < 0)
948 (errcode_for_file_access(),
949 errmsg("could not rename file \"%s\" to \"%s\": %m",
953 * The basename of the file is what we return from pg_export_snapshot().
954 * It's already in path in a textual format and we know that the path
955 * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash
956 * and pstrdup it so as not to return the address of a local variable.
958 return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
963 * SQL-callable wrapper for ExportSnapshot.
966 pg_export_snapshot(PG_FUNCTION_ARGS)
970 snapshotName = ExportSnapshot(GetActiveSnapshot());
971 PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
976 * Parsing subroutines for ImportSnapshot: parse a line with the given
977 * prefix followed by a value, and advance *s to the next line. The
978 * filename is provided for use in error messages.
981 parseIntFromText(const char *prefix, char **s, const char *filename)
984 int prefixlen = strlen(prefix);
987 if (strncmp(ptr, prefix, prefixlen) != 0)
989 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
990 errmsg("invalid snapshot data in file \"%s\"", filename)));
992 if (sscanf(ptr, "%d", &val) != 1)
994 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
995 errmsg("invalid snapshot data in file \"%s\"", filename)));
996 ptr = strchr(ptr, '\n');
999 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1000 errmsg("invalid snapshot data in file \"%s\"", filename)));
1005 static TransactionId
1006 parseXidFromText(const char *prefix, char **s, const char *filename)
1009 int prefixlen = strlen(prefix);
1012 if (strncmp(ptr, prefix, prefixlen) != 0)
1014 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1015 errmsg("invalid snapshot data in file \"%s\"", filename)));
1017 if (sscanf(ptr, "%u", &val) != 1)
1019 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1020 errmsg("invalid snapshot data in file \"%s\"", filename)));
1021 ptr = strchr(ptr, '\n');
1024 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1025 errmsg("invalid snapshot data in file \"%s\"", filename)));
1032 * Import a previously exported snapshot. The argument should be a
1033 * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file.
1034 * This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1037 ImportSnapshot(const char *idstr)
1039 char path[MAXPGPATH];
1041 struct stat stat_buf;
1045 TransactionId src_xid;
1049 SnapshotData snapshot;
1052 * Must be at top level of a fresh transaction. Note in particular that
1053 * we check we haven't acquired an XID --- if we have, it's conceivable
1054 * that the snapshot would show it as not running, making for very screwy
1057 if (FirstSnapshotSet ||
1058 GetTopTransactionIdIfAny() != InvalidTransactionId ||
1061 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1062 errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1065 * If we are in read committed mode then the next query would execute with
1066 * a new snapshot thus making this function call quite useless.
1068 if (!IsolationUsesXactSnapshot())
1070 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1071 errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1074 * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do
1075 * this mainly to prevent reading arbitrary files.
1077 if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
1079 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1080 errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1082 /* OK, read the file */
1083 snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
1085 f = AllocateFile(path, PG_BINARY_R);
1088 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1089 errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1091 /* get the size of the file so that we know how much memory we need */
1092 if (fstat(fileno(f), &stat_buf))
1093 elog(ERROR, "could not stat file \"%s\": %m", path);
1095 /* and read the file into a palloc'd string */
1096 filebuf = (char *) palloc(stat_buf.st_size + 1);
1097 if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
1098 elog(ERROR, "could not read file \"%s\": %m", path);
1100 filebuf[stat_buf.st_size] = '\0';
1105 * Construct a snapshot struct by parsing the file content.
1107 memset(&snapshot, 0, sizeof(snapshot));
1109 src_xid = parseXidFromText("xid:", &filebuf, path);
1110 /* we abuse parseXidFromText a bit here ... */
1111 src_dbid = parseXidFromText("dbid:", &filebuf, path);
1112 src_isolevel = parseIntFromText("iso:", &filebuf, path);
1113 src_readonly = parseIntFromText("ro:", &filebuf, path);
1115 snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
1116 snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
1118 snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1120 /* sanity-check the xid count before palloc */
1121 if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
1123 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1124 errmsg("invalid snapshot data in file \"%s\"", path)));
1126 snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1127 for (i = 0; i < xcnt; i++)
1128 snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
1130 snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1132 if (!snapshot.suboverflowed)
1134 snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1136 /* sanity-check the xid count before palloc */
1137 if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
1139 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1140 errmsg("invalid snapshot data in file \"%s\"", path)));
1142 snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1143 for (i = 0; i < xcnt; i++)
1144 snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
1148 snapshot.subxcnt = 0;
1149 snapshot.subxip = NULL;
1152 snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1155 * Do some additional sanity checking, just to protect ourselves. We
1156 * don't trouble to check the array elements, just the most critical
1159 if (!TransactionIdIsNormal(src_xid) ||
1160 !OidIsValid(src_dbid) ||
1161 !TransactionIdIsNormal(snapshot.xmin) ||
1162 !TransactionIdIsNormal(snapshot.xmax))
1164 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1165 errmsg("invalid snapshot data in file \"%s\"", path)));
1168 * If we're serializable, the source transaction must be too, otherwise
1169 * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a
1170 * non-read-only transaction can't adopt a snapshot from a read-only
1171 * transaction, as predicate.c handles the cases very differently.
1173 if (IsolationIsSerializable())
1175 if (src_isolevel != XACT_SERIALIZABLE)
1177 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1178 errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
1179 if (src_readonly && !XactReadOnly)
1181 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1182 errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1186 * We cannot import a snapshot that was taken in a different database,
1187 * because vacuum calculates OldestXmin on a per-database basis; so the
1188 * source transaction's xmin doesn't protect us from data loss. This
1189 * restriction could be removed if the source transaction were to mark its
1190 * xmin as being globally applicable. But that would require some
1191 * additional syntax, since that has to be known when the snapshot is
1192 * initially taken. (See pgsql-hackers discussion of 2011-10-21.)
1194 if (src_dbid != MyDatabaseId)
1196 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1197 errmsg("cannot import a snapshot from a different database")));
1199 /* OK, install the snapshot */
1200 SetTransactionSnapshot(&snapshot, src_xid);
1204 * XactHasExportedSnapshots
1205 * Test whether current transaction has exported any snapshots.
1208 XactHasExportedSnapshots(void)
1210 return (exportedSnapshots != NIL);
1214 * DeleteAllExportedSnapshotFiles
1215 * Clean up any files that have been left behind by a crashed backend
1216 * that had exported snapshots before it died.
1218 * This should be called during database startup or crash recovery.
1221 DeleteAllExportedSnapshotFiles(void)
1223 char buf[MAXPGPATH];
1225 struct dirent *s_de;
1227 if (!(s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR)))
1230 * We really should have that directory in a sane cluster setup. But
1231 * then again if we don't, it's not fatal enough to make it FATAL.
1232 * Since we're running in the postmaster, LOG is our best bet.
1234 elog(LOG, "could not open directory \"%s\": %m", SNAPSHOT_EXPORT_DIR);
1238 while ((s_de = ReadDir(s_dir, SNAPSHOT_EXPORT_DIR)) != NULL)
1240 if (strcmp(s_de->d_name, ".") == 0 ||
1241 strcmp(s_de->d_name, "..") == 0)
1244 snprintf(buf, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1245 /* Again, unlink failure is not worthy of FATAL */
1247 elog(LOG, "could not unlink file \"%s\": %m", buf);
1254 ThereAreNoPriorRegisteredSnapshots(void)
1256 if (RegisteredSnapshots <= 1)