1 /*-------------------------------------------------------------------------
4 * Export internal transaction IDs to user level.
6 * Note that only top-level transaction IDs are ever converted to TXID.
7 * This is important because TXIDs frequently persist beyond the global
8 * xmin horizon, or may even be shipped to other machines, so we cannot
9 * rely on being able to correlate subtransaction IDs with their parents
10 * via functions such as SubTransGetTopmostTransaction().
13 * Copyright (c) 2003-2009, PostgreSQL Global Development Group
14 * Author: Jan Wieck, Afilias USA INC.
15 * 64-bit txids: Marko Kreen, Skype Technologies
17 * $PostgreSQL: pgsql/src/backend/utils/adt/txid.c,v 1.9 2009/12/19 01:32:36 sriggs Exp $
19 *-------------------------------------------------------------------------
24 #include "access/transam.h"
25 #include "access/xact.h"
27 #include "miscadmin.h"
28 #include "libpq/pqformat.h"
29 #include "utils/builtins.h"
30 #include "utils/snapmgr.h"
33 #ifndef INT64_IS_BUSTED
34 /* txid will be signed int8 in database, so must limit to 63 bits */
35 #define MAX_TXID UINT64CONST(0x7FFFFFFFFFFFFFFF)
37 /* we only really have 32 bits to work with :-( */
38 #define MAX_TXID UINT64CONST(0x7FFFFFFF)
41 /* Use unsigned variant internally */
44 /* sprintf format code for uint64 */
45 #define TXID_FMT UINT64_FORMAT
48 * If defined, use bsearch() function for searching for txids in snapshots
49 * that have more than the specified number of values.
51 #define USE_BSEARCH_IF_NXIP_GREATER 30
55 * Snapshot containing 8byte txids.
60 * 4-byte length hdr, should not be touched directly.
62 * Explicit embedding is ok as we want always correct alignment anyway.
66 uint32 nxip; /* number of txids in xip array */
69 txid xip[1]; /* in-progress txids, xmin <= xip[i] < xmax */
72 #define TXID_SNAPSHOT_SIZE(nxip) \
73 (offsetof(TxidSnapshot, xip) + sizeof(txid) * (nxip))
76 * Epoch values from xact.c
80 TransactionId last_xid;
86 * Fetch epoch data from xact.c.
89 load_xid_epoch(TxidEpoch *state)
91 GetNextXidAndEpoch(&state->last_xid, &state->epoch);
95 * do a TransactionId -> txid conversion for an XID near the given epoch
98 convert_xid(TransactionId xid, const TxidEpoch *state)
100 #ifndef INT64_IS_BUSTED
103 /* return special xid's as-is */
104 if (!TransactionIdIsNormal(xid))
107 /* xid can be on either side when near wrap-around */
108 epoch = (uint64) state->epoch;
109 if (xid > state->last_xid &&
110 TransactionIdPrecedes(xid, state->last_xid))
112 else if (xid < state->last_xid &&
113 TransactionIdFollows(xid, state->last_xid))
116 return (epoch << 32) | xid;
117 #else /* INT64_IS_BUSTED */
118 /* we can't do anything with the epoch, so ignore it */
119 return (txid) xid & MAX_TXID;
120 #endif /* INT64_IS_BUSTED */
124 * txid comparator for qsort/bsearch
127 cmp_txid(const void *aa, const void *bb)
129 txid a = *(const txid *) aa;
130 txid b = *(const txid *) bb;
140 * sort a snapshot's txids, so we can use bsearch() later.
142 * For consistency of on-disk representation, we always sort even if bsearch
146 sort_snapshot(TxidSnapshot *snap)
149 qsort(snap->xip, snap->nxip, sizeof(txid), cmp_txid);
153 * check txid visibility.
156 is_visible_txid(txid value, const TxidSnapshot *snap)
158 if (value < snap->xmin)
160 else if (value >= snap->xmax)
162 #ifdef USE_BSEARCH_IF_NXIP_GREATER
163 else if (snap->nxip > USE_BSEARCH_IF_NXIP_GREATER)
167 res = bsearch(&value, snap->xip, snap->nxip, sizeof(txid), cmp_txid);
168 /* if found, transaction is still in progress */
169 return (res) ? false : true;
176 for (i = 0; i < snap->nxip; i++)
178 if (value == snap->xip[i])
186 * helper functions to use StringInfo for TxidSnapshot creation.
190 buf_init(txid xmin, txid xmax)
199 buf = makeStringInfo();
200 appendBinaryStringInfo(buf, (char *) &snap, TXID_SNAPSHOT_SIZE(0));
205 buf_add_txid(StringInfo buf, txid xid)
207 TxidSnapshot *snap = (TxidSnapshot *) buf->data;
209 /* do this before possible realloc */
212 appendBinaryStringInfo(buf, (char *) &xid, sizeof(xid));
215 static TxidSnapshot *
216 buf_finalize(StringInfo buf)
218 TxidSnapshot *snap = (TxidSnapshot *) buf->data;
220 SET_VARSIZE(snap, buf->len);
222 /* buf is not needed anymore */
230 * simple number parser.
232 * We return 0 on error, which is invalid value for txid.
235 str2txid(const char *s, const char **endp)
238 txid cutoff = MAX_TXID / 10;
239 txid cutlim = MAX_TXID % 10;
245 if (*s < '0' || *s > '9')
252 if (val > cutoff || (val == cutoff && d > cutlim))
266 * parse snapshot from cstring
268 static TxidSnapshot *
269 parse_snapshot(const char *str)
275 const char *str_start = str;
279 xmin = str2txid(str, &endp);
284 xmax = str2txid(str, &endp);
289 /* it should look sane */
290 if (xmin == 0 || xmax == 0 || xmin > xmax)
293 /* allocate buffer */
294 buf = buf_init(xmin, xmax);
296 /* loop over values */
299 /* read next value */
300 val = str2txid(str, &endp);
303 /* require the input to be in order */
304 if (val < xmin || val >= xmax || val <= last_val)
307 buf_add_txid(buf, val);
312 else if (*str != '\0')
316 return buf_finalize(buf);
319 elog(ERROR, "invalid input for txid_snapshot: \"%s\"", str_start);
326 * txid_current() and txid_current_snapshot() are the only ones that
327 * communicate with core xid machinery. All the others work on data
332 * txid_current() returns int8
334 * Return the current toplevel transaction ID as TXID
337 txid_current(PG_FUNCTION_ARGS)
343 * Must prevent during recovery because if an xid is
344 * not assigned we try to assign one, which would fail.
345 * Programs already rely on this function to always
346 * return a valid current xid, so we should not change
347 * this to return NULL or similar invalid xid.
349 PreventCommandDuringRecovery();
351 load_xid_epoch(&state);
353 val = convert_xid(GetTopTransactionId(), &state);
355 PG_RETURN_INT64(val);
359 * txid_current_snapshot() returns txid_snapshot
361 * Return current snapshot in TXID format
363 * Note that only top-transaction XIDs are included in the snapshot.
366 txid_current_snapshot(PG_FUNCTION_ARGS)
375 cur = GetActiveSnapshot();
377 elog(ERROR, "no active snapshot set");
379 load_xid_epoch(&state);
383 size = TXID_SNAPSHOT_SIZE(nxip);
385 SET_VARSIZE(snap, size);
388 snap->xmin = convert_xid(cur->xmin, &state);
389 snap->xmax = convert_xid(cur->xmax, &state);
391 for (i = 0; i < nxip; i++)
392 snap->xip[i] = convert_xid(cur->xip[i], &state);
394 /* we want them guaranteed to be in ascending order */
397 PG_RETURN_POINTER(snap);
401 * txid_snapshot_in(cstring) returns txid_snapshot
403 * input function for type txid_snapshot
406 txid_snapshot_in(PG_FUNCTION_ARGS)
408 char *str = PG_GETARG_CSTRING(0);
411 snap = parse_snapshot(str);
413 PG_RETURN_POINTER(snap);
417 * txid_snapshot_out(txid_snapshot) returns cstring
419 * output function for type txid_snapshot
422 txid_snapshot_out(PG_FUNCTION_ARGS)
424 TxidSnapshot *snap = (TxidSnapshot *) PG_GETARG_VARLENA_P(0);
428 initStringInfo(&str);
430 appendStringInfo(&str, TXID_FMT ":", snap->xmin);
431 appendStringInfo(&str, TXID_FMT ":", snap->xmax);
433 for (i = 0; i < snap->nxip; i++)
436 appendStringInfoChar(&str, ',');
437 appendStringInfo(&str, TXID_FMT, snap->xip[i]);
440 PG_RETURN_CSTRING(str.data);
444 * txid_snapshot_recv(internal) returns txid_snapshot
446 * binary input function for type txid_snapshot
448 * format: int4 nxip, int8 xmin, int8 xmax, int8 xip
451 txid_snapshot_recv(PG_FUNCTION_ARGS)
453 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
464 * load nxip and check for nonsense.
466 * (nxip > avail) check is against int overflows in 'expect'.
468 nxip = pq_getmsgint(buf, 4);
469 avail = buf->len - buf->cursor;
470 expect = 8 + 8 + nxip * 8;
471 if (nxip < 0 || nxip > avail || expect > avail)
474 xmin = pq_getmsgint64(buf);
475 xmax = pq_getmsgint64(buf);
476 if (xmin == 0 || xmax == 0 || xmin > xmax || xmax > MAX_TXID)
479 snap = palloc(TXID_SNAPSHOT_SIZE(nxip));
483 SET_VARSIZE(snap, TXID_SNAPSHOT_SIZE(nxip));
485 for (i = 0; i < nxip; i++)
487 txid cur = pq_getmsgint64(buf);
489 if (cur <= last || cur < xmin || cur >= xmax)
494 PG_RETURN_POINTER(snap);
497 elog(ERROR, "invalid snapshot data");
502 * txid_snapshot_send(txid_snapshot) returns bytea
504 * binary output function for type txid_snapshot
506 * format: int4 nxip, int8 xmin, int8 xmax, int8 xip
509 txid_snapshot_send(PG_FUNCTION_ARGS)
511 TxidSnapshot *snap = (TxidSnapshot *) PG_GETARG_VARLENA_P(0);
515 pq_begintypsend(&buf);
516 pq_sendint(&buf, snap->nxip, 4);
517 pq_sendint64(&buf, snap->xmin);
518 pq_sendint64(&buf, snap->xmax);
519 for (i = 0; i < snap->nxip; i++)
520 pq_sendint64(&buf, snap->xip[i]);
521 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
525 * txid_visible_in_snapshot(int8, txid_snapshot) returns bool
527 * is txid visible in snapshot ?
530 txid_visible_in_snapshot(PG_FUNCTION_ARGS)
532 txid value = PG_GETARG_INT64(0);
533 TxidSnapshot *snap = (TxidSnapshot *) PG_GETARG_VARLENA_P(1);
535 PG_RETURN_BOOL(is_visible_txid(value, snap));
539 * txid_snapshot_xmin(txid_snapshot) returns int8
541 * return snapshot's xmin
544 txid_snapshot_xmin(PG_FUNCTION_ARGS)
546 TxidSnapshot *snap = (TxidSnapshot *) PG_GETARG_VARLENA_P(0);
548 PG_RETURN_INT64(snap->xmin);
552 * txid_snapshot_xmax(txid_snapshot) returns int8
554 * return snapshot's xmax
557 txid_snapshot_xmax(PG_FUNCTION_ARGS)
559 TxidSnapshot *snap = (TxidSnapshot *) PG_GETARG_VARLENA_P(0);
561 PG_RETURN_INT64(snap->xmax);
565 * txid_snapshot_xip(txid_snapshot) returns setof int8
567 * return in-progress TXIDs in snapshot.
570 txid_snapshot_xip(PG_FUNCTION_ARGS)
572 FuncCallContext *fctx;
576 /* on first call initialize snap_state and get copy of snapshot */
577 if (SRF_IS_FIRSTCALL())
579 TxidSnapshot *arg = (TxidSnapshot *) PG_GETARG_VARLENA_P(0);
581 fctx = SRF_FIRSTCALL_INIT();
583 /* make a copy of user snapshot */
584 snap = MemoryContextAlloc(fctx->multi_call_memory_ctx, VARSIZE(arg));
585 memcpy(snap, arg, VARSIZE(arg));
587 fctx->user_fctx = snap;
590 /* return values one-by-one */
591 fctx = SRF_PERCALL_SETUP();
592 snap = fctx->user_fctx;
593 if (fctx->call_cntr < snap->nxip)
595 value = snap->xip[fctx->call_cntr];
596 SRF_RETURN_NEXT(fctx, Int64GetDatum(value));
600 SRF_RETURN_DONE(fctx);