DELETE FROM agg_csv WHERE a = 100;
ERROR: cannot change foreign table "agg_csv"
SELECT * FROM agg_csv FOR UPDATE OF agg_csv;
-ERROR: SELECT FOR UPDATE/SHARE cannot be used with foreign table "agg_csv"
+ERROR: SELECT FOR UPDATE/SHARE/KEY UPDATE/KEY SHARE cannot be used with foreign table "agg_csv"
LINE 1: SELECT * FROM agg_csv FOR UPDATE OF agg_csv;
^
-- but this should be ignored
tuphdr = (HeapTupleHeader) PageGetItem(page, id);
values[4] = UInt32GetDatum(HeapTupleHeaderGetXmin(tuphdr));
- values[5] = UInt32GetDatum(HeapTupleHeaderGetXmax(tuphdr));
+ values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); /* shared with xvac */
values[7] = PointerGetDatum(&tuphdr->t_ctid);
values[8] = UInt32GetDatum(tuphdr->t_infomask2);
bool got_xid = false;
bool got_oid = false;
bool got_nextxlogfile = false;
+ bool got_multi = false;
+ bool got_mxoff = false;
+ bool got_oldestmulti = false;
bool got_log_id = false;
bool got_log_seg = false;
bool got_tli = false;
cluster->controldata.chkpnt_nxtoid = str2uint(p);
got_oid = true;
}
+ else if ((p = strstr(bufin, "Latest checkpoint's NextMultiXactId:")) != NULL)
+ {
+ p = strchr(p, ':');
+
+ if (p == NULL || strlen(p) <= 1)
+ pg_log(PG_FATAL, "%d: controldata retrieval problem\n", __LINE__);
+
+ p++; /* removing ':' char */
+ cluster->controldata.chkpnt_nxtmulti = str2uint(p);
+ got_multi = true;
+ }
+ else if ((p = strstr(bufin, "Latest checkpoint's oldestMultiXid:")) != NULL)
+ {
+ p = strchr(p, ':');
+
+ if (p == NULL || strlen(p) <= 1)
+ pg_log(PG_FATAL, "%d: controldata retrieval problem\n", __LINE__);
+
+ p++; /* removing ':' char */
+ cluster->controldata.chkpnt_oldstMulti = str2uint(p);
+ got_oldestmulti = true;
+ }
+ else if ((p = strstr(bufin, "Latest checkpoint's NextMultiOffset:")) != NULL)
+ {
+ p = strchr(p, ':');
+
+ if (p == NULL || strlen(p) <= 1)
+ pg_log(PG_FATAL, "%d: controldata retrieval problem\n", __LINE__);
+
+ p++; /* removing ':' char */
+ cluster->controldata.chkpnt_nxtmxoff = str2uint(p);
+ got_mxoff = true;
+ }
else if ((p = strstr(bufin, "Maximum data alignment:")) != NULL)
{
p = strchr(p, ':');
/* verify that we got all the mandatory pg_control data */
if (!got_xid || !got_oid ||
+ !got_multi || !got_mxoff || !got_oldestmulti ||
(!live_check && !got_nextxlogfile) ||
!got_tli ||
!got_align || !got_blocksz || !got_largesz || !got_walsz ||
if (!got_oid)
pg_log(PG_REPORT, " latest checkpoint next OID\n");
+ if (!got_multi)
+ pg_log(PG_REPORT, " latest checkpoint next MultiXactId\n");
+
+ if (!got_mxoff)
+ pg_log(PG_REPORT, " latest checkpoint next MultiXactOffset\n");
+
+ if (!got_oldestmulti)
+ pg_log(PG_REPORT, " latest checkpoint oldest MultiXactId\n");
+
if (!live_check && !got_nextxlogfile)
pg_log(PG_REPORT, " first WAL segment after reset\n");
new_cluster.pgdata);
check_ok();
+ /*
+ * If both new and old are after the pg_multixact change commit, copy those
+ * files too. If the old server is before that change and the new server
+ * is after, then we don't copy anything but we need to reset pg_control so
+ * that the new server doesn't attempt to read multis older than the cutoff
+ * value.
+ */
+ if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER &&
+ new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
+ {
+ copy_subdir_files("pg_multixact/offsets");
+ copy_subdir_files("pg_multixact/members");
+ prep_status("Setting next multixact ID and offset for new cluster");
+ /*
+ * we preserve all files and contents, so we must preserve both "next"
+ * counters here and the oldest multi present on system.
+ */
+ exec_prog(UTILITY_LOG_FILE, NULL, true,
+ "\"%s/pg_resetxlog\" -O %u -m %u,%u \"%s\"",
+ new_cluster.bindir,
+ old_cluster.controldata.chkpnt_nxtmxoff,
+ old_cluster.controldata.chkpnt_nxtmulti,
+ old_cluster.controldata.chkpnt_oldstMulti,
+ new_cluster.pgdata);
+ check_ok();
+ }
+ else if (new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
+ {
+ prep_status("Setting oldest multixact ID on new cluster");
+ /*
+ * We don't preserve files in this case, but it's important that the
+ * oldest multi is set to the latest value used by the old system, so
+ * that multixact.c returns the empty set for multis that might be
+ * present on disk. We set next multi to the value following that; it
+ * might end up wrapped around (i.e. 0) if the old cluster had
+ * next=MaxMultiXactId, but multixact.c can cope with that just fine.
+ */
+ exec_prog(UTILITY_LOG_FILE, NULL, true,
+ "\"%s/pg_resetxlog\" -m %u,%u \"%s\"",
+ new_cluster.bindir,
+ old_cluster.controldata.chkpnt_nxtmulti + 1,
+ old_cluster.controldata.chkpnt_nxtmulti,
+ new_cluster.pgdata);
+ check_ok();
+ }
+
/* now reset the wal archives in the new cluster */
prep_status("Resetting WAL archives");
exec_prog(UTILITY_LOG_FILE, NULL, true,
*/
#define VISIBILITY_MAP_CRASHSAFE_CAT_VER 201107031
+/*
+ * pg_multixact format changed in this catversion:
+ */
+#define MULTIXACT_FORMATCHANGE_CAT_VER 201301231
/*
* Each relation is represented by a relinfo structure.
uint32 chkpnt_tli;
uint32 chkpnt_nxtxid;
uint32 chkpnt_nxtoid;
+ uint32 chkpnt_nxtmulti;
+ uint32 chkpnt_nxtmxoff;
+ uint32 chkpnt_oldstMulti;
uint32 align;
uint32 blocksz;
uint32 largesz;
OBJS = pgrowlocks.o
EXTENSION = pgrowlocks
-DATA = pgrowlocks--1.0.sql pgrowlocks--unpackaged--1.0.sql
+DATA = pgrowlocks--1.1.sql pgrowlocks--1.0--1.1.sql pgrowlocks--unpackaged--1.0.sql
ifdef USE_PGXS
PG_CONFIG = pg_config
--- /dev/null
+/* contrib/pgrowlocks/pgrowlocks--1.0--1.1.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pgrowlocks" to load this file. \quit
+
+ALTER EXTENSION pgrowlocks DROP FUNCTION pgrowlocks(text);
+DROP FUNCTION pgrowlocks(text);
+CREATE FUNCTION pgrowlocks(IN relname text,
+ OUT locked_row TID, -- row TID
+ OUT locker XID, -- locking XID
+ OUT multi bool, -- multi XID?
+ OUT xids xid[], -- multi XIDs
+ OUT modes text[], -- multi XID statuses
+ OUT pids INTEGER[]) -- locker's process id
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pgrowlocks'
+LANGUAGE C STRICT;
-/* contrib/pgrowlocks/pgrowlocks--1.0.sql */
+/* contrib/pgrowlocks/pgrowlocks--1.1.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION pgrowlocks" to load this file. \quit
CREATE FUNCTION pgrowlocks(IN relname text,
OUT locked_row TID, -- row TID
- OUT lock_type TEXT, -- lock type
OUT locker XID, -- locking XID
OUT multi bool, -- multi XID?
OUT xids xid[], -- multi XIDs
+ OUT modes text[], -- multi XID statuses
OUT pids INTEGER[]) -- locker's process id
RETURNS SETOF record
AS 'MODULE_PATHNAME', 'pgrowlocks'
int ncolumns;
} MyData;
+#define Atnum_tid 0
+#define Atnum_xmax 1
+#define Atnum_ismulti 2
+#define Atnum_xids 3
+#define Atnum_modes 4
+#define Atnum_pids 5
+
Datum
pgrowlocks(PG_FUNCTION_ARGS)
{
/* scan the relation */
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
+ HTSU_Result htsu;
+ TransactionId xmax;
+ uint16 infomask;
+
/* must hold a buffer lock to call HeapTupleSatisfiesUpdate */
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
- if (HeapTupleSatisfiesUpdate(tuple->t_data,
- GetCurrentCommandId(false),
- scan->rs_cbuf) == HeapTupleBeingUpdated)
+ htsu = HeapTupleSatisfiesUpdate(tuple->t_data,
+ GetCurrentCommandId(false),
+ scan->rs_cbuf);
+ xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
+ infomask = tuple->t_data->t_infomask;
+
+ /*
+ * a tuple is locked if HTSU returns BeingUpdated, and if it returns
+ * MayBeUpdated but the Xmax is valid and pointing at us.
+ */
+ if (htsu == HeapTupleBeingUpdated ||
+ (htsu == HeapTupleMayBeUpdated &&
+ !(infomask & HEAP_XMAX_INVALID) &&
+ !(infomask & HEAP_XMAX_IS_MULTI) &&
+ (xmax == GetCurrentTransactionIdIfAny())))
{
-
char **values;
- int i;
values = (char **) palloc(mydata->ncolumns * sizeof(char *));
- i = 0;
- values[i++] = (char *) DirectFunctionCall1(tidout, PointerGetDatum(&tuple->t_self));
+ values[Atnum_tid] = (char *) DirectFunctionCall1(tidout,
+ PointerGetDatum(&tuple->t_self));
- if (tuple->t_data->t_infomask & HEAP_XMAX_SHARED_LOCK)
- values[i++] = pstrdup("Shared");
- else
- values[i++] = pstrdup("Exclusive");
- values[i] = palloc(NCHARS * sizeof(char));
- snprintf(values[i++], NCHARS, "%d", HeapTupleHeaderGetXmax(tuple->t_data));
- if (tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)
+ values[Atnum_xmax] = palloc(NCHARS * sizeof(char));
+ snprintf(values[Atnum_xmax], NCHARS, "%d", xmax);
+ if (infomask & HEAP_XMAX_IS_MULTI)
{
- TransactionId *xids;
- int nxids;
- int j;
- int isValidXid = 0; /* any valid xid ever exists? */
-
- values[i++] = pstrdup("true");
- nxids = GetMultiXactIdMembers(HeapTupleHeaderGetXmax(tuple->t_data), &xids);
- if (nxids == -1)
+ MultiXactMember *members;
+ int nmembers;
+ bool first = true;
+ bool allow_old;
+
+ values[Atnum_ismulti] = pstrdup("true");
+
+ allow_old = !(infomask & HEAP_LOCK_MASK) &&
+ (infomask & HEAP_XMAX_LOCK_ONLY);
+ nmembers = GetMultiXactIdMembers(xmax, &members, allow_old);
+ if (nmembers == -1)
{
- elog(ERROR, "GetMultiXactIdMembers returns error");
+ values[Atnum_xids] = "{0}";
+ values[Atnum_modes] = "{transient upgrade status}";
+ values[Atnum_pids] = "{0}";
}
+ else
+ {
+ int j;
- values[i] = palloc(NCHARS * nxids);
- values[i + 1] = palloc(NCHARS * nxids);
- strcpy(values[i], "{");
- strcpy(values[i + 1], "{");
+ values[Atnum_xids] = palloc(NCHARS * nmembers);
+ values[Atnum_modes] = palloc(NCHARS * nmembers);
+ values[Atnum_pids] = palloc(NCHARS * nmembers);
- for (j = 0; j < nxids; j++)
- {
- char buf[NCHARS];
+ strcpy(values[Atnum_xids], "{");
+ strcpy(values[Atnum_modes], "{");
+ strcpy(values[Atnum_pids], "{");
- if (TransactionIdIsInProgress(xids[j]))
+ for (j = 0; j < nmembers; j++)
{
- if (isValidXid)
+ char buf[NCHARS];
+
+ if (!first)
{
- strcat(values[i], ",");
- strcat(values[i + 1], ",");
+ strcat(values[Atnum_xids], ",");
+ strcat(values[Atnum_modes], ",");
+ strcat(values[Atnum_pids], ",");
}
- snprintf(buf, NCHARS, "%d", xids[j]);
- strcat(values[i], buf);
- snprintf(buf, NCHARS, "%d", BackendXidGetPid(xids[j]));
- strcat(values[i + 1], buf);
+ snprintf(buf, NCHARS, "%d", members[j].xid);
+ strcat(values[Atnum_xids], buf);
+ switch (members[j].status)
+ {
+ case MultiXactStatusUpdate:
+ snprintf(buf, NCHARS, "Update");
+ break;
+ case MultiXactStatusNoKeyUpdate:
+ snprintf(buf, NCHARS, "No Key Update");
+ break;
+ case MultiXactStatusForUpdate:
+ snprintf(buf, NCHARS, "For Update");
+ break;
+ case MultiXactStatusForNoKeyUpdate:
+ snprintf(buf, NCHARS, "For No Key Update");
+ break;
+ case MultiXactStatusForShare:
+ snprintf(buf, NCHARS, "Share");
+ break;
+ case MultiXactStatusForKeyShare:
+ snprintf(buf, NCHARS, "Key Share");
+ break;
+ }
+ strcat(values[Atnum_modes], buf);
+ snprintf(buf, NCHARS, "%d",
+ BackendXidGetPid(members[j].xid));
+ strcat(values[Atnum_pids], buf);
- isValidXid = 1;
+ first = false;
}
- }
- strcat(values[i], "}");
- strcat(values[i + 1], "}");
- i++;
+ strcat(values[Atnum_xids], "}");
+ strcat(values[Atnum_modes], "}");
+ strcat(values[Atnum_pids], "}");
+ }
}
else
{
- values[i++] = pstrdup("false");
- values[i] = palloc(NCHARS * sizeof(char));
- snprintf(values[i++], NCHARS, "{%d}", HeapTupleHeaderGetXmax(tuple->t_data));
+ values[Atnum_ismulti] = pstrdup("false");
+
+ values[Atnum_xids] = palloc(NCHARS * sizeof(char));
+ snprintf(values[Atnum_xids], NCHARS, "{%d}", xmax);
+
+ values[Atnum_modes] = palloc(NCHARS);
+ if (infomask & HEAP_XMAX_LOCK_ONLY)
+ {
+ if (HEAP_XMAX_IS_SHR_LOCKED(infomask))
+ snprintf(values[Atnum_modes], NCHARS, "{For Share}");
+ else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
+ snprintf(values[Atnum_modes], NCHARS, "{For Key Share}");
+ else if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
+ snprintf(values[Atnum_modes], NCHARS, "{For Update}");
+ else
+ /* neither keyshare nor exclusive bit it set */
+ snprintf(values[Atnum_modes], NCHARS,
+ "{transient upgrade status}");
+ }
+ else
+ {
+ if (tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED)
+ snprintf(values[Atnum_modes], NCHARS, "{Key Update}");
+ else
+ snprintf(values[Atnum_modes], NCHARS, "{Update}");
+ }
- values[i] = palloc(NCHARS * sizeof(char));
- snprintf(values[i++], NCHARS, "{%d}", BackendXidGetPid(HeapTupleHeaderGetXmax(tuple->t_data)));
+ values[Atnum_pids] = palloc(NCHARS * sizeof(char));
+ snprintf(values[Atnum_pids], NCHARS, "{%d}",
+ BackendXidGetPid(xmax));
}
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
/* make the tuple into a datum */
result = HeapTupleGetDatum(tuple);
- /* Clean up */
- for (i = 0; i < mydata->ncolumns; i++)
- pfree(values[i]);
- pfree(values);
+ /*
+ * no need to pfree what we allocated; it's on a short-lived memory
+ * context anyway
+ */
SRF_RETURN_NEXT(funcctx, result);
}
# pgrowlocks extension
comment = 'show row-level locking information'
-default_version = '1.0'
+default_version = '1.1'
module_pathname = '$libdir/pgrowlocks'
relocatable = true
<entry><type>tid</type></entry>
<entry>Tuple ID (TID) of locked row</entry>
</row>
- <row>
- <entry><structfield>lock_type</structfield></entry>
- <entry><type>text</type></entry>
- <entry><literal>Shared</> for shared lock, or
- <literal>Exclusive</> for exclusive lock</entry>
- </row>
<row>
<entry><structfield>locker</structfield></entry>
<entry><type>xid</type></entry>
<entry><type>xid[]</type></entry>
<entry>Transaction IDs of lockers (more than one if multitransaction)</entry>
</row>
+ <row>
+ <entry><structfield>lock_type</structfield></entry>
+ <entry><type>text[]</type></entry>
+ <entry>Lock mode of lockers (more than one if multitransaction),
+ an array of <literal>Key Share</>, <literal>Share</>,
+ <literal>For No Key Update</>, <literal>No Key Update</>,
+ <literal>For Update</>, <literal>Update</>.</entry>
+ </row>
+
<row>
<entry><structfield>pids</structfield></entry>
<entry><type>integer[]</type></entry>
[ LIMIT { <replaceable class="parameter">count</replaceable> | ALL } ]
[ OFFSET <replaceable class="parameter">start</replaceable> [ ROW | ROWS ] ]
[ FETCH { FIRST | NEXT } [ <replaceable class="parameter">count</replaceable> ] { ROW | ROWS } ONLY ]
- [ FOR { UPDATE | SHARE } [ OF <replaceable class="parameter">table_name</replaceable> [, ...] ] [ NOWAIT ] [...] ]
+ [ FOR { UPDATE | NO KEY UPDATE | SHARE | KEY SHARE } [ OF <replaceable class="parameter">table_name</replaceable> [, ...] ] [ NOWAIT ] [...] ]
<phrase>where <replaceable class="parameter">from_item</replaceable> can be one of:</phrase>
<listitem>
<para>
- If <literal>FOR UPDATE</literal> or <literal>FOR SHARE</literal>
+ If <literal>FOR UPDATE</>, <literal>FOR NO KEY UPDATE</literal>, <literal>FOR SHARE</literal>
+ or <literal>FOR KEY SHARE</literal>
is specified, the
<command>SELECT</command> statement locks the selected rows
against concurrent updates. (See <xref linkend="sql-for-update-share"
<para>
You must have <literal>SELECT</literal> privilege on each column used
- in a <command>SELECT</> command. The use of <literal>FOR UPDATE</literal>
- or <literal>FOR SHARE</literal> requires
+ in a <command>SELECT</> command. The use of <literal>FOR NO KEY UPDATE</>,
+ <literal>FOR UPDATE</literal>,
+ <literal>FOR SHARE</literal> or <literal>FOR KEY SHARE</literal> requires
<literal>UPDATE</literal> privilege as well (for at least one column
of each table so selected).
</para>
<replaceable class="parameter">select_statement</replaceable> UNION [ ALL | DISTINCT ] <replaceable class="parameter">select_statement</replaceable>
</synopsis><replaceable class="parameter">select_statement</replaceable> is
any <command>SELECT</command> statement without an <literal>ORDER
- BY</>, <literal>LIMIT</>, <literal>FOR UPDATE</literal>, or
- <literal>FOR SHARE</literal> clause.
+ BY</>, <literal>LIMIT</>, <literal>FOR NO KEY UPDATE</>, <literal>FOR UPDATE</literal>,
+ <literal>FOR SHARE</literal>, or <literal>FOR KEY SHARE</literal> clause.
(<literal>ORDER BY</> and <literal>LIMIT</> can be attached to a
subexpression if it is enclosed in parentheses. Without
parentheses, these clauses will be taken to apply to the result of
</para>
<para>
- Currently, <literal>FOR UPDATE</> and <literal>FOR SHARE</> cannot be
+ Currently, <literal>FOR NO KEY UPDATE</>, <literal>FOR UPDATE</>, <literal>FOR SHARE</> and
+ <literal>FOR KEY SHARE</> cannot be
specified either for a <literal>UNION</> result or for any input of a
<literal>UNION</>.
</para>
<replaceable class="parameter">select_statement</replaceable> INTERSECT [ ALL | DISTINCT ] <replaceable class="parameter">select_statement</replaceable>
</synopsis><replaceable class="parameter">select_statement</replaceable> is
any <command>SELECT</command> statement without an <literal>ORDER
- BY</>, <literal>LIMIT</>, <literal>FOR UPDATE</literal>, or
- <literal>FOR SHARE</literal> clause.
+ BY</>, <literal>LIMIT</>, <literal>FOR NO KEY UPDATE</>, <literal>FOR UPDATE</literal>,
+ <literal>FOR SHARE</literal>, or <literal>FOR KEY SHARE</> clause.
</para>
<para>
</para>
<para>
- Currently, <literal>FOR UPDATE</> and <literal>FOR SHARE</> cannot be
+ Currently, <literal>FOR NO KEY UPDATE</>, <literal>FOR UPDATE</>, <literal>FOR SHARE</> and
+ <literal>FOR KEY SHARE</> cannot be
specified either for an <literal>INTERSECT</> result or for any input of
an <literal>INTERSECT</>.
</para>
<replaceable class="parameter">select_statement</replaceable> EXCEPT [ ALL | DISTINCT ] <replaceable class="parameter">select_statement</replaceable>
</synopsis><replaceable class="parameter">select_statement</replaceable> is
any <command>SELECT</command> statement without an <literal>ORDER
- BY</>, <literal>LIMIT</>, <literal>FOR UPDATE</literal>, or
- <literal>FOR SHARE</literal> clause.
+ BY</>, <literal>LIMIT</>, <literal>FOR NO KEY UPDATE</>, <literal>FOR UPDATE</literal>,
+ <literal>FOR SHARE</literal>, or <literal>FOR KEY SHARE</> clause.
</para>
<para>
</para>
<para>
- Currently, <literal>FOR UPDATE</> and <literal>FOR SHARE</> cannot be
+ Currently, <literal>FOR NO KEY UPDATE</>, <literal>FOR UPDATE</>, <literal>FOR SHARE</> and
+ <literal>FOR KEY SHARE</> cannot be
specified either for an <literal>EXCEPT</> result or for any input of
an <literal>EXCEPT</>.
</para>
</refsect2>
<refsect2 id="SQL-FOR-UPDATE-SHARE">
- <title id="sql-for-update-share-title"><literal>FOR UPDATE</literal>/<literal>FOR SHARE</literal> Clause</title>
+ <title id="sql-for-update-share-title"><literal>FOR UPDATE</>, <literal>FOR NO KEY UPDATE</>/<literal>FOR SHARE</>/<literal>FOR KEY SHARE</> Clauses</title>
+
+ <para>
+ <literal>FOR UPDATE</>, <literal>FOR NO KEY UPDATE</>, <literal>FOR SHARE</>
+ and <literal>FOR KEY SHARE</>
+ are <firstterm>locking clauses</>; they affect how <literal>SELECT</>
+ locks rows as they are obtained from the table.
+ </para>
<para>
The <literal>FOR UPDATE</literal> clause has this form:
</synopsis>
</para>
+ <para>
+ The <literal>FOR NO KEY UPDATE</literal> clause has this form:
+<synopsis>
+FOR NO KEY UPDATE [ OF <replaceable class="parameter">table_name</replaceable> [, ...] ] [ NOWAIT ]
+</synopsis>
+ </para>
+
<para>
The closely related <literal>FOR SHARE</literal> clause has this form:
<synopsis>
</synopsis>
</para>
+ <para>
+ Similarly, the <literal>FOR KEY SHARE</> clause has this form:
+<synopsis>
+FOR KEY SHARE [ OF <replaceable class="parameter">table_name</replaceable> [, ...] ] [ NOWAIT ]
+</synopsis>
+ </para>
+
<para>
<literal>FOR UPDATE</literal> causes the rows retrieved by the
<command>SELECT</command> statement to be locked as though for
update. This prevents them from being modified or deleted by
other transactions until the current transaction ends. That is,
other transactions that attempt <command>UPDATE</command>,
- <command>DELETE</command>, or <command>SELECT FOR UPDATE</command>
+ <command>DELETE</command>,
+ <command>SELECT FOR UPDATE</command>,
+ <command>SELECT FOR SHARE</command> or
+ <command>SELECT FOR KEY SHARE</command>
of these rows will be blocked until the current transaction ends.
+ The <literal>FOR UPDATE</> lock mode
+ is also acquired by any <command>DELETE</> on a row, and also by an
+ <command>UPDATE</> that modifies the values on certain columns. Currently,
+ the set of columns considered for the <command>UPDATE</> case are those that
+ have an unique index on them that can be used in a foreign key (so partial
+ indexes and expressional indexes are not considered), but this may change
+ in the future.
Also, if an <command>UPDATE</command>, <command>DELETE</command>,
or <command>SELECT FOR UPDATE</command> from another transaction
has already locked a selected row or rows, <command>SELECT FOR
linkend="mvcc">.
</para>
+ <para>
+ <literal>FOR NO KEY UPDATE</> behaves similarly, except that the lock
+ acquired is weaker: this lock will not block
+ <literal>SELECT FOR KEY SHARE</> commands that attempt to acquire
+ a lock on the same rows.
+ </para>
+
<para>
<literal>FOR SHARE</literal> behaves similarly, except that it
acquires a shared rather than exclusive lock on each retrieved
row. A shared lock blocks other transactions from performing
<command>UPDATE</command>, <command>DELETE</command>, or <command>SELECT
FOR UPDATE</command> on these rows, but it does not prevent them
- from performing <command>SELECT FOR SHARE</command>.
+ from performing <command>SELECT FOR SHARE</command> or
+ <command>SELECT FOR KEY SHARE</command>.
+ </para>
+
+ <para>
+ <literal>FOR KEY SHARE</> behaves similarly to <literal>FOR SHARE</literal>,
+ except that the lock
+ is weaker: <literal>SELECT FOR UPDATE</> is blocked, but
+ not <literal>SELECT FOR NO KEY UPDATE</>. A key-shared
+ lock blocks other transactions from performing <command>DELETE</command>
+ or any <command>UPDATE</command> that changes the key values, but not
+ other <command>UPDATE</>, and neither it does prevent
+ <command>SELECT FOR UPDATE</>, <command>SELECT FOR SHARE</>, or
+ <command>SELECT FOR KEY SHARE</>.
</para>
<para>
</para>
<para>
- If specific tables are named in <literal>FOR UPDATE</literal>
- or <literal>FOR SHARE</literal>,
+ If specific tables are named in a locking clause,
then only rows coming from those tables are locked; any other
tables used in the <command>SELECT</command> are simply read as
- usual. A <literal>FOR UPDATE</literal> or <literal>FOR SHARE</literal>
+ usual. A locking
clause without a table list affects all tables used in the statement.
- If <literal>FOR UPDATE</literal> or <literal>FOR SHARE</literal> is
+ If a locking clause is
applied to a view or sub-query, it affects all tables used in
the view or sub-query.
- However, <literal>FOR UPDATE</literal>/<literal>FOR SHARE</literal>
+ However, these clauses
do not apply to <literal>WITH</> queries referenced by the primary query.
If you want row locking to occur within a <literal>WITH</> query, specify
- <literal>FOR UPDATE</literal> or <literal>FOR SHARE</literal> within the
- <literal>WITH</> query.
+ a locking clause within the <literal>WITH</> query.
</para>
<para>
- Multiple <literal>FOR UPDATE</literal> and <literal>FOR SHARE</literal>
+ Multiple locking
clauses can be written if it is necessary to specify different locking
behavior for different tables. If the same table is mentioned (or
- implicitly affected) by both <literal>FOR UPDATE</literal> and
- <literal>FOR SHARE</literal> clauses, then it is processed as
- <literal>FOR UPDATE</literal>. Similarly, a table is processed
+ implicitly affected) by more than one locking clause,
+ then it is processed as if it was only specified by the strongest one.
+ Similarly, a table is processed
as <literal>NOWAIT</> if that is specified in any of the clauses
affecting it.
</para>
<para>
- <literal>FOR UPDATE</literal> and <literal>FOR SHARE</literal> cannot be
+ The locking clauses cannot be
used in contexts where returned rows cannot be clearly identified with
individual table rows; for example they cannot be used with aggregation.
</para>
<para>
- When <literal>FOR UPDATE</literal> or <literal>FOR SHARE</literal>
+ When a locking clause
appears at the top level of a <command>SELECT</> query, the rows that
are locked are exactly those that are returned by the query; in the
case of a join query, the rows locked are those that contribute to
<literal>LIMIT</> is used, locking stops
once enough rows have been returned to satisfy the limit (but note that
rows skipped over by <literal>OFFSET</> will get locked). Similarly,
- if <literal>FOR UPDATE</literal> or <literal>FOR SHARE</literal>
+ if a locking clause
is used in a cursor's query, only rows actually fetched or stepped past
by the cursor will be locked.
</para>
<para>
- When <literal>FOR UPDATE</literal> or <literal>FOR SHARE</literal>
+ When a locking clause
appears in a sub-<command>SELECT</>, the rows locked are those
returned to the outer query by the sub-query. This might involve
fewer rows than inspection of the sub-query alone would suggest,
condition is not textually within the sub-query.
</para>
- <caution>
- <para>
- Avoid locking a row and then modifying it within a later savepoint or
- <application>PL/pgSQL</application> exception block. A subsequent
- rollback would cause the lock to be lost. For example:
+ <para>
+ Previous releases failed to preserve a lock which is upgraded by a later
+ savepoint. For example, this code:
<programlisting>
BEGIN;
SELECT * FROM mytable WHERE key = 1 FOR UPDATE;
UPDATE mytable SET ... WHERE key = 1;
ROLLBACK TO s;
</programlisting>
- After the <command>ROLLBACK</>, the row is effectively unlocked, rather
- than returned to its pre-savepoint state of being locked but not modified.
- This hazard occurs if a row locked in the current transaction is updated
- or deleted, or if a shared lock is upgraded to exclusive: in all these
- cases, the former lock state is forgotten. If the transaction is then
- rolled back to a state between the original locking command and the
- subsequent change, the row will appear not to be locked at all. This is
- an implementation deficiency which will be addressed in a future release
- of <productname>PostgreSQL</productname>.
- </para>
- </caution>
+ would fail to preserve the <literal>FOR UPDATE</> lock after the
+ <command>ROLLBACK</>. This has been fixed in release 9.2.
+ </para>
<caution>
<para>
It is possible for a <command>SELECT</> command running at the <literal>READ
COMMITTED</literal> transaction isolation level and using <literal>ORDER
- BY</literal> and <literal>FOR UPDATE/SHARE</literal> to return rows out of
+ BY</literal> and a locking clause to return rows out of
order. This is because <literal>ORDER BY</> is applied first.
The command sorts the result, but might then block trying to obtain a lock
on one or more of the rows. Once the <literal>SELECT</> unblocks, some
</refsect2>
<refsect2>
- <title><literal>FOR UPDATE</> and <literal>FOR SHARE</></title>
+ <title><literal>FOR NO KEY UPDATE</>, <literal>FOR UPDATE</>, <literal>FOR SHARE</>, <literal>FOR KEY SHARE</></title>
<para>
Although <literal>FOR UPDATE</> appears in the SQL standard, the
standard allows it only as an option of <command>DECLARE CURSOR</>.
<productname>PostgreSQL</productname> allows it in any <command>SELECT</>
query as well as in sub-<command>SELECT</>s, but this is an extension.
- The <literal>FOR SHARE</> variant, and the <literal>NOWAIT</> option,
+ The <literal>FOR NO KEY UPDATE</>, <literal>FOR SHARE</> and
+ <literal>FOR KEY SHARE</> variants,
+ as well as the <literal>NOWAIT</> option,
do not appear in the standard.
</para>
</refsect2>
result = TransactionIdGetDatum(HeapTupleHeaderGetXmin(tup->t_data));
break;
case MaxTransactionIdAttributeNumber:
- result = TransactionIdGetDatum(HeapTupleHeaderGetXmax(tup->t_data));
+ result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmax(tup->t_data));
break;
case MinCommandIdAttributeNumber:
case MaxCommandIdAttributeNumber:
--- /dev/null
+Locking tuples
+--------------
+
+Locking tuples is not as easy as locking tables or other database objects.
+The problem is that transactions might want to lock large numbers of tuples at
+any one time, so it's not possible to keep the locks objects in shared memory.
+To work around this limitation, we use a two-level mechanism. The first level
+is implemented by storing locking information in the tuple header: a tuple is
+marked as locked by setting the current transaction's XID as its XMAX, and
+setting additional infomask bits to distinguish this case from the more normal
+case of having deleted the tuple. When multiple transactions concurrently
+lock a tuple, a MultiXact is used; see below. This mechanism can accomodate
+arbitrarily large numbers of tuples being locked simultaneously.
+
+When it is necessary to wait for a tuple-level lock to be released, the basic
+delay is provided by XactLockTableWait or MultiXactIdWait on the contents of
+the tuple's XMAX. However, that mechanism will release all waiters
+concurrently, so there would be a race condition as to which waiter gets the
+tuple, potentially leading to indefinite starvation of some waiters. The
+possibility of share-locking makes the problem much worse --- a steady stream
+of share-lockers can easily block an exclusive locker forever. To provide
+more reliable semantics about who gets a tuple-level lock first, we use the
+standard lock manager, which implements the second level mentioned above. The
+protocol for waiting for a tuple-level lock is really
+
+ LockTuple()
+ XactLockTableWait()
+ mark tuple as locked by me
+ UnlockTuple()
+
+When there are multiple waiters, arbitration of who is to get the lock next
+is provided by LockTuple(). However, at most one tuple-level lock will
+be held or awaited per backend at any time, so we don't risk overflow
+of the lock table. Note that incoming share-lockers are required to
+do LockTuple as well, if there is any conflict, to ensure that they don't
+starve out waiting exclusive-lockers. However, if there is not any active
+conflict for a tuple, we don't incur any extra overhead.
+
+We provide four levels of tuple locking strength: SELECT FOR KEY UPDATE is
+super-exclusive locking (used to delete tuples and more generally to update
+tuples modifying the values of the columns that make up the key of the tuple);
+SELECT FOR UPDATE is a standards-compliant exclusive lock; SELECT FOR SHARE
+implements shared locks; and finally SELECT FOR KEY SHARE is a super-weak mode
+that does not conflict with exclusive mode, but conflicts with SELECT FOR KEY
+UPDATE. This last mode implements a mode just strong enough to implement RI
+checks, i.e. it ensures that tuples do not go away from under a check, without
+blocking when some other transaction that want to update the tuple without
+changing its key.
+
+The conflict table is:
+
+ KEY UPDATE UPDATE SHARE KEY SHARE
+KEY UPDATE conflict conflict conflict conflict
+UPDATE conflict conflict conflict
+SHARE conflict conflict
+KEY SHARE conflict
+
+When there is a single locker in a tuple, we can just store the locking info
+in the tuple itself. We do this by storing the locker's Xid in XMAX, and
+setting infomask bits specifying the locking strength. There is one exception
+here: since infomask space is limited, we do not provide a separate bit
+for SELECT FOR SHARE, so we have to use the extended info in a MultiXact in
+that case. (The other cases, SELECT FOR UPDATE and SELECT FOR KEY SHARE, are
+presumably more commonly used due to being the standards-mandated locking
+mechanism, or heavily used by the RI code, so we want to provide fast paths
+for those.)
+
+MultiXacts
+----------
+
+A tuple header provides very limited space for storing information about tuple
+locking and updates: there is room only for a single Xid and a small number of
+infomask bits. Whenever we need to store more than one lock, we replace the
+first locker's Xid with a new MultiXactId. Each MultiXact provides extended
+locking data; it comprises an array of Xids plus some flags bits for each one.
+The flags are currently used to store the locking strength of each member
+transaction. (The flags also distinguish a pure locker from an updater.)
+
+In earlier PostgreSQL releases, a MultiXact always meant that the tuple was
+locked in shared mode by multiple transactions. This is no longer the case; a
+MultiXact may contain an update or delete Xid. (Keep in mind that tuple locks
+in a transaction do not conflict with other tuple locks in the same
+transaction, so it's possible to have otherwise conflicting locks in a
+MultiXact if they belong to the same transaction).
+
+Note that each lock is attributed to the subtransaction that acquires it.
+This means that a subtransaction that aborts is seen as though it releases the
+locks it acquired; concurrent transactions can then proceed without having to
+wait for the main transaction to finish. It also means that a subtransaction
+can upgrade to a stronger lock level than an earlier transaction had, and if
+the subxact aborts, the earlier, weaker lock is kept.
+
+The possibility of having an update within a MultiXact means that they must
+persist across crashes and restarts: a future reader of the tuple needs to
+figure out whether the update committed or aborted. So we have a requirement
+that pg_multixact needs to retain pages of its data until we're certain that
+the MultiXacts in them are no longer of interest.
+
+VACUUM is in charge of removing old MultiXacts at the time of tuple freezing.
+This works in the same way that pg_clog segments are removed: we have a
+pg_class column that stores the earliest multixact that could possibly be
+stored in the table; the minimum of all such values is stored in a pg_database
+column. VACUUM computes the minimum across all pg_database values, and
+removes pg_multixact segments older than the minimum.
+
+Infomask Bits
+-------------
+
+The following infomask bits are applicable:
+
+- HEAP_XMAX_INVALID
+ Any tuple with this bit set does not have a valid value stored in XMAX.
+
+- HEAP_XMAX_IS_MULTI
+ This bit is set if the tuple's Xmax is a MultiXactId (as opposed to a
+ regular TransactionId).
+
+- HEAP_XMAX_LOCK_ONLY
+ This bit is set when the XMAX is a locker only; that is, if it's a
+ multixact, it does not contain an update among its members. It's set when
+ the XMAX is a plain Xid that locked the tuple, as well.
+
+- HEAP_XMAX_KEYSHR_LOCK
+- HEAP_XMAX_EXCL_LOCK
+ These bits indicate the strength of the lock acquired; they are useful when
+ the XMAX is not a MultiXactId. If it's a multi, the info is to be found in
+ the member flags. If HEAP_XMAX_IS_MULTI is not set and HEAP_XMAX_LOCK_ONLY
+ is set, then one of these *must* be set as well.
+ Note there is no infomask bit for a SELECT FOR SHARE lock. Also there is no
+ separate bit for a SELECT FOR KEY UPDATE lock; this is implemented by the
+ HEAP_KEYS_UPDATED bit.
+
+- HEAP_KEYS_UPDATED
+ This bit lives in t_infomask2. If set, indicates that the XMAX updated
+ this tuple and changed the key values, or it deleted the tuple.
+ It's set regardless of whether the XMAX is a TransactionId or a MultiXactId.
+
+We currently never set the HEAP_XMAX_COMMITTED when the HEAP_XMAX_IS_MULTI bit
+is set.
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
TransactionId xid, CommandId cid, int options);
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
- ItemPointerData from, Buffer newbuf, HeapTuple newtup,
- bool all_visible_cleared, bool new_all_visible_cleared);
-static bool HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs,
- HeapTuple oldtup, HeapTuple newtup);
+ Buffer newbuf, HeapTuple oldtup,
+ HeapTuple newtup, bool all_visible_cleared,
+ bool new_all_visible_cleared);
+static void HeapSatisfiesHOTandKeyUpdate(Relation relation,
+ Bitmapset *hot_attrs, Bitmapset *key_attrs,
+ bool *satisfies_hot, bool *satisfies_key,
+ HeapTuple oldtup, HeapTuple newtup);
+static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask,
+ uint16 old_infomask2, TransactionId add_to_xmax,
+ LockTupleMode mode, bool is_update,
+ TransactionId *result_xmax, uint16 *result_infomask,
+ uint16 *result_infomask2);
+static HTSU_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple,
+ ItemPointer ctid, TransactionId xid,
+ LockTupleMode mode);
+static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask,
+ uint16 *new_infomask2);
+static TransactionId MultiXactIdGetUpdateXid(TransactionId xmax,
+ uint16 t_infomask);
+static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status,
+ int *remaining, uint16 infomask);
+static bool ConditionalMultiXactIdWait(MultiXactId multi,
+ MultiXactStatus status, int *remaining,
+ uint16 infomask);
+/*
+ * Each tuple lock mode has a corresponding heavyweight lock, and one or two
+ * corresponding MultiXactStatuses (one to merely lock tuples, another one to
+ * update them). This table (and the macros below) helps us determine the
+ * heavyweight lock mode and MultiXactStatus values to use for any particular
+ * tuple lock strength.
+ */
+static const struct
+{
+ LOCKMODE hwlock;
+ MultiXactStatus lockstatus;
+ MultiXactStatus updstatus;
+}
+tupleLockExtraInfo[MaxLockTupleMode + 1] =
+{
+ { /* LockTupleKeyShare */
+ AccessShareLock,
+ MultiXactStatusForKeyShare,
+ -1 /* KeyShare does not allow updating tuples */
+ },
+ { /* LockTupleShare */
+ RowShareLock,
+ MultiXactStatusForShare,
+ -1 /* Share does not allow updating tuples */
+ },
+ { /* LockTupleNoKeyExclusive */
+ ExclusiveLock,
+ MultiXactStatusForNoKeyUpdate,
+ MultiXactStatusNoKeyUpdate
+ },
+ { /* LockTupleExclusive */
+ AccessExclusiveLock,
+ MultiXactStatusForUpdate,
+ MultiXactStatusUpdate
+ }
+};
+/* Get the LOCKMODE for a given MultiXactStatus */
+#define LOCKMODE_from_mxstatus(status) \
+ (tupleLockExtraInfo[TUPLOCK_from_mxstatus((status))].hwlock)
+
+/*
+ * Acquire heavyweight locks on tuples, using a LockTupleMode strength value.
+ * This is more readable than having every caller translate it to lock.h's
+ * LOCKMODE.
+ */
+#define LockTupleTuplock(rel, tup, mode) \
+ LockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock)
+#define UnlockTupleTuplock(rel, tup, mode) \
+ UnlockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock)
+#define ConditionalLockTupleTuplock(rel, tup, mode) \
+ ConditionalLockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock)
+
+/*
+ * This table maps tuple lock strength values for each particular
+ * MultiXactStatus value.
+ */
+static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
+{
+ LockTupleKeyShare, /* ForKeyShare */
+ LockTupleShare, /* ForShare */
+ LockTupleNoKeyExclusive, /* ForNoKeyUpdate */
+ LockTupleExclusive, /* ForUpdate */
+ LockTupleNoKeyExclusive, /* NoKeyUpdate */
+ LockTupleExclusive /* Update */
+};
+
+/* Get the LockTupleMode for a given MultiXactStatus */
+#define TUPLOCK_from_mxstatus(status) \
+ (MultiXactStatusLock[(status)])
+/* Get the is_update bit for a given MultiXactStatus */
+#define ISUPDATE_from_mxstatus(status) \
+ ((status) > MultiXactStatusForUpdate)
+
/* ----------------------------------------------------------------
* heap support routines
* ----------------------------------------------------------------
ItemPointerGetBlockNumber(tid));
offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
at_chain_start = false;
- prev_xmax = HeapTupleHeaderGetXmax(heapTuple->t_data);
+ prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
}
else
break; /* end of chain */
* tuple. Check for XMIN match.
*/
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data)))
+ !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data)))
{
UnlockReleaseBuffer(buffer);
break;
/*
* If there's a valid t_ctid link, follow it, else we're done.
*/
- if ((tp.t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED)) ||
+ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ HeapTupleHeaderIsOnlyLocked(tp.t_data) ||
ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
{
UnlockReleaseBuffer(buffer);
}
ctid = tp.t_data->t_ctid;
- priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
+ priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
UnlockReleaseBuffer(buffer);
} /* end of loop */
}
* If the transaction aborted, we guarantee the XMAX_INVALID hint bit will
* be set on exit. If the transaction committed, we set the XMAX_COMMITTED
* hint bit if possible --- but beware that that may not yet be possible,
- * if the transaction committed asynchronously. Hence callers should look
- * only at XMAX_INVALID.
+ * if the transaction committed asynchronously.
+ *
+ * Note that if the transaction was a locker only, we set HEAP_XMAX_INVALID
+ * even if it commits.
+ *
+ * Hence callers should look only at XMAX_INVALID.
+ *
+ * Note this is not allowed for tuples whose xmax is a multixact.
*/
static void
UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
{
- Assert(TransactionIdEquals(HeapTupleHeaderGetXmax(tuple), xid));
+ Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple), xid));
+ Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI));
if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID)))
{
- if (TransactionIdDidCommit(xid))
+ if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) &&
+ TransactionIdDidCommit(xid))
HeapTupleSetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
xid);
else
return heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
}
+/*
+ * Given infomask/infomask2, compute the bits that must be saved in the
+ * "infobits" field of xl_heap_delete, xl_heap_update, xl_heap_lock,
+ * xl_heap_lock_updated WAL records.
+ *
+ * See fix_infomask_from_infobits.
+ */
+static uint8
+compute_infobits(uint16 infomask, uint16 infomask2)
+{
+ return
+ ((infomask & HEAP_XMAX_IS_MULTI) != 0 ? XLHL_XMAX_IS_MULTI : 0) |
+ ((infomask & HEAP_XMAX_LOCK_ONLY) != 0 ? XLHL_XMAX_LOCK_ONLY : 0) |
+ ((infomask & HEAP_XMAX_EXCL_LOCK) != 0 ? XLHL_XMAX_EXCL_LOCK : 0) |
+ /* note we ignore HEAP_XMAX_SHR_LOCK here */
+ ((infomask & HEAP_XMAX_KEYSHR_LOCK) != 0 ? XLHL_XMAX_KEYSHR_LOCK : 0) |
+ ((infomask2 & HEAP_KEYS_UPDATED) != 0 ?
+ XLHL_KEYS_UPDATED : 0);
+}
+
/*
* heap_delete - delete a tuple
*
* (the last only possible if wait == false).
*
* In the failure cases, the routine fills *hufd with the tuple's t_ctid,
- * t_xmax, and t_cmax (the last only for HeapTupleSelfUpdated, since we
+ * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax
+ * (the last only for HeapTupleSelfUpdated, since we
* cannot obtain cmax from a combocid generated by another transaction).
* See comments for struct HeapUpdateFailureData for additional info.
*/
BlockNumber block;
Buffer buffer;
Buffer vmbuffer = InvalidBuffer;
+ TransactionId new_xmax;
+ uint16 new_infomask,
+ new_infomask2;
bool have_tuple_lock = false;
bool iscombo;
bool all_visible_cleared = false;
uint16 infomask;
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetXmax(tp.t_data);
+ xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
infomask = tp.t_data->t_infomask;
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
*/
if (!have_tuple_lock)
{
- LockTuple(relation, &(tp.t_self), ExclusiveLock);
+ LockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
have_tuple_lock = true;
}
/*
* Sleep until concurrent transaction ends. Note that we don't care
- * if the locker has an exclusive or shared lock, because we need
- * exclusive.
+ * which lock mode the locker has, because we need the strongest one.
*/
if (infomask & HEAP_XMAX_IS_MULTI)
{
/* wait for multixact */
- MultiXactIdWait((MultiXactId) xwait);
+ MultiXactIdWait((MultiXactId) xwait, MultiXactStatusUpdate,
+ NULL, infomask);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* change, and start over if so.
*/
if (!(tp.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(tp.t_data),
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),
xwait))
goto l1;
* Check for xmax change, and start over if so.
*/
if ((tp.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(tp.t_data),
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),
xwait))
goto l1;
* We may overwrite if previous xmax aborted, or if it committed but
* only locked the tuple without updating it.
*/
- if (tp.t_data->t_infomask & (HEAP_XMAX_INVALID |
- HEAP_IS_LOCKED))
+ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) ||
+ HeapTupleHeaderIsOnlyLocked(tp.t_data))
result = HeapTupleMayBeUpdated;
else
result = HeapTupleUpdated;
result == HeapTupleBeingUpdated);
Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
hufd->ctid = tp.t_data->t_ctid;
- hufd->xmax = HeapTupleHeaderGetXmax(tp.t_data);
+ hufd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
if (result == HeapTupleSelfUpdated)
hufd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
else
hufd->cmax = 0; /* for lack of an InvalidCommandId value */
UnlockReleaseBuffer(buffer);
if (have_tuple_lock)
- UnlockTuple(relation, &(tp.t_self), ExclusiveLock);
+ UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
if (vmbuffer != InvalidBuffer)
ReleaseBuffer(vmbuffer);
return result;
vmbuffer);
}
+ /*
+ * If this is the first possibly-multixact-able operation in the
+ * current transaction, set my per-backend OldestMemberMXactId setting.
+ * We can be certain that the transaction will never become a member of
+ * any older MultiXactIds than that. (We have to do this even if we
+ * end up just using our own TransactionId below, since some other
+ * backend could incorporate our XID into a MultiXact immediately
+ * afterwards.)
+ */
+ MultiXactIdSetOldestMember();
+
+ compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data),
+ tp.t_data->t_infomask, tp.t_data->t_infomask2,
+ xid, LockTupleExclusive, true,
+ &new_xmax, &new_infomask, &new_infomask2);
+
/* store transaction information of xact deleting the tuple */
- tp.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
+ tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ tp.t_data->t_infomask |= new_infomask;
+ tp.t_data->t_infomask2 |= new_infomask2;
HeapTupleHeaderClearHotUpdated(tp.t_data);
- HeapTupleHeaderSetXmax(tp.t_data, xid);
+ HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
/* Make sure there is no forward chain link in t_ctid */
tp.t_data->t_ctid = tp.t_self;
XLogRecData rdata[2];
xlrec.all_visible_cleared = all_visible_cleared;
+ xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
+ tp.t_data->t_infomask2);
xlrec.target.node = relation->rd_node;
xlrec.target.tid = tp.t_self;
+ xlrec.xmax = new_xmax;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapDelete;
rdata[0].buffer = InvalidBuffer;
* Release the lmgr tuple lock, if we had it.
*/
if (have_tuple_lock)
- UnlockTuple(relation, &(tp.t_self), ExclusiveLock);
+ UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
pgstat_count_heap_delete(relation);
* crosscheck - if not InvalidSnapshot, also check old tuple against this
* wait - true if should wait for any conflicting update to commit/abort
* hufd - output parameter, filled in failure cases (see below)
+ * lockmode - output parameter, filled with lock mode acquired on tuple
*
* Normal, successful return value is HeapTupleMayBeUpdated, which
* actually means we *did* update it. Failure return codes are
* data are not reflected into *newtup.
*
* In the failure cases, the routine fills *hufd with the tuple's t_ctid,
- * t_xmax, and t_cmax (the last only for HeapTupleSelfUpdated, since we
+ * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax
+ * (the last only for HeapTupleSelfUpdated, since we
* cannot obtain cmax from a combocid generated by another transaction).
* See comments for struct HeapUpdateFailureData for additional info.
*/
HTSU_Result
heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait,
- HeapUpdateFailureData *hufd)
+ HeapUpdateFailureData *hufd, LockTupleMode *lockmode)
{
HTSU_Result result;
TransactionId xid = GetCurrentTransactionId();
Bitmapset *hot_attrs;
+ Bitmapset *key_attrs;
ItemId lp;
HeapTupleData oldtup;
HeapTuple heaptup;
Page page;
BlockNumber block;
+ MultiXactStatus mxact_status;
Buffer buffer,
newbuf,
vmbuffer = InvalidBuffer,
pagefree;
bool have_tuple_lock = false;
bool iscombo;
+ bool satisfies_hot;
+ bool satisfies_key;
bool use_hot_update = false;
+ bool key_intact;
bool all_visible_cleared = false;
bool all_visible_cleared_new = false;
+ bool checked_lockers;
+ bool locker_remains;
+ TransactionId xmax_new_tuple,
+ xmax_old_tuple;
+ uint16 infomask_old_tuple,
+ infomask2_old_tuple,
+ infomask_new_tuple,
+ infomask2_new_tuple;
Assert(ItemPointerIsValid(otid));
* Note that we get a copy here, so we need not worry about relcache flush
* happening midway through.
*/
- hot_attrs = RelationGetIndexAttrBitmap(relation);
+ hot_attrs = RelationGetIndexAttrBitmap(relation, false);
+ key_attrs = RelationGetIndexAttrBitmap(relation, true);
block = ItemPointerGetBlockNumber(otid);
buffer = ReadBuffer(relation, block);
oldtup.t_len = ItemIdGetLength(lp);
oldtup.t_self = *otid;
+ /*
+ * If we're not updating any "key" column, we can grab a weaker lock type.
+ * This allows for more concurrency when we are running simultaneously with
+ * foreign key checks.
+ *
+ * Note that if a column gets detoasted while executing the update, but the
+ * value ends up being the same, this test will fail and we will use the
+ * stronger lock. This is acceptable; the important case to optimize is
+ * updates that don't manipulate key columns, not those that
+ * serendipitiously arrive at the same key values.
+ */
+ HeapSatisfiesHOTandKeyUpdate(relation, hot_attrs, key_attrs,
+ &satisfies_hot, &satisfies_key,
+ &oldtup, newtup);
+ if (satisfies_key)
+ {
+ *lockmode = LockTupleNoKeyExclusive;
+ mxact_status = MultiXactStatusNoKeyUpdate;
+ key_intact = true;
+
+ /*
+ * If this is the first possibly-multixact-able operation in the
+ * current transaction, set my per-backend OldestMemberMXactId setting.
+ * We can be certain that the transaction will never become a member of
+ * any older MultiXactIds than that. (We have to do this even if we
+ * end up just using our own TransactionId below, since some other
+ * backend could incorporate our XID into a MultiXact immediately
+ * afterwards.)
+ */
+ MultiXactIdSetOldestMember();
+ }
+ else
+ {
+ *lockmode = LockTupleExclusive;
+ mxact_status = MultiXactStatusUpdate;
+ key_intact = false;
+ }
+
/*
* Note: beyond this point, use oldtup not otid to refer to old tuple.
* otid may very well point at newtup->t_self, which we will overwrite
*/
l2:
+ checked_lockers = false;
+ locker_remains = false;
result = HeapTupleSatisfiesUpdate(oldtup.t_data, cid, buffer);
+ /* see below about the "no wait" case */
+ Assert(result != HeapTupleBeingUpdated || wait);
+
if (result == HeapTupleInvisible)
{
UnlockReleaseBuffer(buffer);
}
else if (result == HeapTupleBeingUpdated && wait)
{
- TransactionId xwait;
+ TransactionId xwait;
uint16 infomask;
+ bool can_continue = false;
+
+ checked_lockers = true;
+
+ /*
+ * XXX note that we don't consider the "no wait" case here. This
+ * isn't a problem currently because no caller uses that case, but it
+ * should be fixed if such a caller is introduced. It wasn't a problem
+ * previously because this code would always wait, but now that some
+ * tuple locks do not conflict with one of the lock modes we use, it is
+ * possible that this case is interesting to handle specially.
+ *
+ * This may cause failures with third-party code that calls heap_update
+ * directly.
+ */
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetXmax(oldtup.t_data);
+ xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
infomask = oldtup.t_data->t_infomask;
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
*/
if (!have_tuple_lock)
{
- LockTuple(relation, &(oldtup.t_self), ExclusiveLock);
+ LockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
have_tuple_lock = true;
}
/*
- * Sleep until concurrent transaction ends. Note that we don't care
- * if the locker has an exclusive or shared lock, because we need
- * exclusive.
+ * Now we have to do something about the existing locker. If it's a
+ * multi, sleep on it; we might be awakened before it is completely
+ * gone (or even not sleep at all in some cases); we need to preserve
+ * it as locker, unless it is gone completely.
+ *
+ * If it's not a multi, we need to check for sleeping conditions before
+ * actually going to sleep. If the update doesn't conflict with the
+ * locks, we just continue without sleeping (but making sure it is
+ * preserved).
*/
-
if (infomask & HEAP_XMAX_IS_MULTI)
{
+ TransactionId update_xact;
+ int remain;
+
/* wait for multixact */
- MultiXactIdWait((MultiXactId) xwait);
+ MultiXactIdWait((MultiXactId) xwait, mxact_status, &remain,
+ infomask);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/*
* change, and start over if so.
*/
if (!(oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data),
xwait))
goto l2;
/*
- * You might think the multixact is necessarily done here, but not
- * so: it could have surviving members, namely our own xact or
- * other subxacts of this backend. It is legal for us to update
- * the tuple in either case, however (the latter case is
- * essentially a situation of upgrading our former shared lock to
- * exclusive). We don't bother changing the on-disk hint bits
- * since we are about to overwrite the xmax altogether.
+ * Note that the multixact may not be done by now. It could have
+ * surviving members; our own xact or other subxacts of this
+ * backend, and also any other concurrent transaction that locked
+ * the tuple with KeyShare if we only got TupleLockUpdate. If this
+ * is the case, we have to be careful to mark the updated tuple
+ * with the surviving members in Xmax.
+ *
+ * Note that there could have been another update in the MultiXact.
+ * In that case, we need to check whether it committed or aborted.
+ * If it aborted we are safe to update it again; otherwise there is
+ * an update conflict, and we have to return HeapTupleUpdated
+ * below.
+ *
+ * In the LockTupleExclusive case, we still need to preserve the
+ * surviving members: those would include the tuple locks we had
+ * before this one, which are important to keep in case this
+ * subxact aborts.
*/
+ update_xact = InvalidTransactionId;
+ if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
+ update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
+
+ /* there was no UPDATE in the MultiXact; or it aborted. */
+ if (!TransactionIdIsValid(update_xact) ||
+ TransactionIdDidAbort(update_xact))
+ can_continue = true;
+
+ locker_remains = remain != 0;
}
else
{
- /* wait for regular transaction to end */
- XactLockTableWait(xwait);
- LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
-
/*
- * xwait is done, but if xwait had just locked the tuple then some
- * other xact could update this tuple before we get to this point.
- * Check for xmax change, and start over if so.
+ * If it's just a key-share locker, and we're not changing the
+ * key columns, we don't need to wait for it to end; but we
+ * need to preserve it as locker.
*/
- if ((oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
- xwait))
- goto l2;
+ if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) && key_intact)
+ {
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
- /* Otherwise check if it committed or aborted */
- UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
+ /*
+ * recheck the locker; if someone else changed the tuple while we
+ * weren't looking, start over.
+ */
+ if ((oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ xwait))
+ goto l2;
+
+ can_continue = true;
+ locker_remains = true;
+ }
+ else
+ {
+ /* wait for regular transaction to end */
+ XactLockTableWait(xwait);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /*
+ * xwait is done, but if xwait had just locked the tuple then some
+ * other xact could update this tuple before we get to this point.
+ * Check for xmax change, and start over if so.
+ */
+ if ((oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ xwait))
+ goto l2;
+
+ /* Otherwise check if it committed or aborted */
+ UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
+ if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
+ can_continue = true;
+ }
}
- /*
- * We may overwrite if previous xmax aborted, or if it committed but
- * only locked the tuple without updating it.
- */
- if (oldtup.t_data->t_infomask & (HEAP_XMAX_INVALID |
- HEAP_IS_LOCKED))
- result = HeapTupleMayBeUpdated;
- else
- result = HeapTupleUpdated;
+ result = can_continue ? HeapTupleMayBeUpdated : HeapTupleUpdated;
}
if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
result == HeapTupleBeingUpdated);
Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
hufd->ctid = oldtup.t_data->t_ctid;
- hufd->xmax = HeapTupleHeaderGetXmax(oldtup.t_data);
+ hufd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
if (result == HeapTupleSelfUpdated)
hufd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
else
hufd->cmax = 0; /* for lack of an InvalidCommandId value */
UnlockReleaseBuffer(buffer);
if (have_tuple_lock)
- UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock);
+ UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
if (vmbuffer != InvalidBuffer)
ReleaseBuffer(vmbuffer);
bms_free(hot_attrs);
+ bms_free(key_attrs);
return result;
}
* visible while we were busy locking the buffer, or during some
* subsequent window during which we had it unlocked, we'll have to unlock
* and re-lock, to avoid holding the buffer lock across an I/O. That's a
- * bit unfortunate, esepecially since we'll now have to recheck whether
+ * bit unfortunate, especially since we'll now have to recheck whether
* the tuple has been locked or updated under us, but hopefully it won't
* happen very often.
*/
Assert(!(newtup->t_data->t_infomask & HEAP_HASOID));
}
+ /*
+ * If the tuple we're updating is locked, we need to preserve the locking
+ * info in the old tuple's Xmax. Prepare a new Xmax value for this.
+ */
+ compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ oldtup.t_data->t_infomask,
+ oldtup.t_data->t_infomask2,
+ xid, *lockmode, true,
+ &xmax_old_tuple, &infomask_old_tuple,
+ &infomask2_old_tuple);
+
+ /* And also prepare an Xmax value for the new copy of the tuple */
+ if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ (checked_lockers && !locker_remains))
+ xmax_new_tuple = InvalidTransactionId;
+ else
+ xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
+
+ if (!TransactionIdIsValid(xmax_new_tuple))
+ {
+ infomask_new_tuple = HEAP_XMAX_INVALID;
+ infomask2_new_tuple = 0;
+ }
+ else
+ {
+ if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
+ &infomask2_new_tuple);
+ }
+ else
+ {
+ infomask_new_tuple = HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_LOCK_ONLY;
+ infomask2_new_tuple = 0;
+ }
+ }
+
+ /*
+ * Prepare the new tuple with the appropriate initial values of Xmin and
+ * Xmax, as well as initial infomask bits as computed above.
+ */
newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
- newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
HeapTupleHeaderSetXmin(newtup->t_data, xid);
HeapTupleHeaderSetCmin(newtup->t_data, cid);
- HeapTupleHeaderSetXmax(newtup->t_data, 0); /* for cleanliness */
+ newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
+ newtup->t_data->t_infomask2 |= infomask2_new_tuple;
+ HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple);
newtup->t_tableOid = RelationGetRelid(relation);
/*
if (need_toast || newtupsize > pagefree)
{
/* Clear obsolete visibility flags ... */
- oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
+ oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
HeapTupleClearHotUpdated(&oldtup);
/* ... and store info about transaction updating this tuple */
- HeapTupleHeaderSetXmax(oldtup.t_data, xid);
+ Assert(TransactionIdIsValid(xmax_old_tuple));
+ HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
+ oldtup.t_data->t_infomask |= infomask_old_tuple;
+ oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
/* temporarily make it look not-updated */
oldtup.t_data->t_ctid = oldtup.t_self;
* to do a HOT update. Check if any of the index columns have been
* changed. If not, then HOT update is possible.
*/
- if (HeapSatisfiesHOTUpdate(relation, hot_attrs, &oldtup, heaptup))
+ if (satisfies_hot)
use_hot_update = true;
}
else
if (!already_marked)
{
/* Clear obsolete visibility flags ... */
- oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
+ oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
/* ... and store info about transaction updating this tuple */
- HeapTupleHeaderSetXmax(oldtup.t_data, xid);
+ Assert(TransactionIdIsValid(xmax_old_tuple));
+ HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
+ oldtup.t_data->t_infomask |= infomask_old_tuple;
+ oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
}
/* XLOG stuff */
if (RelationNeedsWAL(relation))
{
- XLogRecPtr recptr = log_heap_update(relation, buffer, oldtup.t_self,
- newbuf, heaptup,
+ XLogRecPtr recptr = log_heap_update(relation, buffer,
+ newbuf, &oldtup, heaptup,
all_visible_cleared,
all_visible_cleared_new);
* Release the lmgr tuple lock, if we had it.
*/
if (have_tuple_lock)
- UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock);
+ UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
pgstat_count_heap_update(relation, use_hot_update);
}
bms_free(hot_attrs);
+ bms_free(key_attrs);
return HeapTupleMayBeUpdated;
}
/*
* Check if the specified attribute's value is same in both given tuples.
- * Subroutine for HeapSatisfiesHOTUpdate.
+ * Subroutine for HeapSatisfiesHOTandKeyUpdate.
*/
static bool
heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
/*
* Extract the corresponding values. XXX this is pretty inefficient if
- * there are many indexed columns. Should HeapSatisfiesHOTUpdate do a
+ * there are many indexed columns. Should HeapSatisfiesHOTandKeyUpdate do a
* single heap_deform_tuple call on each tuple, instead? But that doesn't
* work for system columns ...
*/
}
/*
- * Check if the old and new tuples represent a HOT-safe update. To be able
- * to do a HOT update, we must not have changed any columns used in index
- * definitions.
+ * Check which columns are being updated.
+ *
+ * This simultaneously checks conditions for HOT updates and for FOR KEY
+ * SHARE updates. Since much of the time they will be checking very similar
+ * sets of columns, and doing the same tests on them, it makes sense to
+ * optimize and do them together.
*
- * The set of attributes to be checked is passed in (we dare not try to
- * compute it while holding exclusive buffer lock...) NOTE that hot_attrs
- * is destructively modified! That is OK since this is invoked at most once
- * by heap_update().
+ * We receive two bitmapsets comprising the two sets of columns we're
+ * interested in. Note these are destructively modified; that is OK since
+ * this is invoked at most once in heap_update.
*
- * Returns true if safe to do HOT update.
+ * hot_result is set to TRUE if it's okay to do a HOT update (i.e. it does not
+ * modified indexed columns); key_result is set to TRUE if the update does not
+ * modify columns used in the key.
*/
-static bool
-HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs,
- HeapTuple oldtup, HeapTuple newtup)
+static void
+HeapSatisfiesHOTandKeyUpdate(Relation relation,
+ Bitmapset *hot_attrs, Bitmapset *key_attrs,
+ bool *satisfies_hot, bool *satisfies_key,
+ HeapTuple oldtup, HeapTuple newtup)
{
- int attrnum;
+ int next_hot_attnum;
+ int next_key_attnum;
+ bool hot_result = true;
+ bool key_result = true;
+ bool key_done = false;
+ bool hot_done = false;
+
+ next_hot_attnum = bms_first_member(hot_attrs);
+ if (next_hot_attnum == -1)
+ hot_done = true;
+ else
+ /* Adjust for system attributes */
+ next_hot_attnum += FirstLowInvalidHeapAttributeNumber;
- while ((attrnum = bms_first_member(hot_attrs)) >= 0)
- {
+ next_key_attnum = bms_first_member(key_attrs);
+ if (next_key_attnum == -1)
+ key_done = true;
+ else
/* Adjust for system attributes */
- attrnum += FirstLowInvalidHeapAttributeNumber;
+ next_key_attnum += FirstLowInvalidHeapAttributeNumber;
- /* If the attribute value has changed, we can't do HOT update */
- if (!heap_tuple_attr_equals(RelationGetDescr(relation), attrnum,
- oldtup, newtup))
- return false;
+ for (;;)
+ {
+ int check_now;
+ bool changed;
+
+ /* both bitmapsets are now empty */
+ if (key_done && hot_done)
+ break;
+
+ /* XXX there's probably an easier way ... */
+ if (hot_done)
+ check_now = next_key_attnum;
+ if (key_done)
+ check_now = next_hot_attnum;
+ else
+ check_now = Min(next_hot_attnum, next_key_attnum);
+
+ changed = !heap_tuple_attr_equals(RelationGetDescr(relation),
+ check_now, oldtup, newtup);
+ if (changed)
+ {
+ if (check_now == next_hot_attnum)
+ hot_result = false;
+ if (check_now == next_key_attnum)
+ key_result = false;
+ }
+
+ /* if both are false now, we can stop checking */
+ if (!hot_result && !key_result)
+ break;
+
+ if (check_now == next_hot_attnum)
+ {
+ next_hot_attnum = bms_first_member(hot_attrs);
+ if (next_hot_attnum == -1)
+ hot_done = true;
+ else
+ /* Adjust for system attributes */
+ next_hot_attnum += FirstLowInvalidHeapAttributeNumber;
+ }
+ if (check_now == next_key_attnum)
+ {
+ next_key_attnum = bms_first_member(key_attrs);
+ if (next_key_attnum == -1)
+ key_done = true;
+ else
+ /* Adjust for system attributes */
+ next_key_attnum += FirstLowInvalidHeapAttributeNumber;
+ }
}
- return true;
+ *satisfies_hot = hot_result;
+ *satisfies_key = key_result;
}
/*
{
HTSU_Result result;
HeapUpdateFailureData hufd;
+ LockTupleMode lockmode;
result = heap_update(relation, otid, tup,
GetCurrentCommandId(true), InvalidSnapshot,
true /* wait for commit */,
- &hufd);
+ &hufd, &lockmode);
switch (result)
{
case HeapTupleSelfUpdated:
}
}
+
+/*
+ * Return the MultiXactStatus corresponding to the given tuple lock mode.
+ */
+static MultiXactStatus
+get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
+{
+ MultiXactStatus retval;
+
+ if (is_update)
+ retval = tupleLockExtraInfo[mode].updstatus;
+ else
+ retval = tupleLockExtraInfo[mode].lockstatus;
+
+ if (retval == -1)
+ elog(ERROR, "invalid lock tuple mode %d/%s", mode,
+ is_update ? "true" : "false");
+
+ return retval;
+}
+
+
/*
* heap_lock_tuple - lock a tuple in shared or exclusive mode
*
* tuple's cmax if lock is successful)
* mode: indicates if shared or exclusive tuple lock is desired
* nowait: if true, ereport rather than blocking if lock not available
+ * follow_updates: if true, follow the update chain to also lock descendant
+ * tuples.
*
* Output parameters:
* *tuple: all fields filled in
* HeapTupleUpdated: lock failed because tuple updated by other xact
*
* In the failure cases, the routine fills *hufd with the tuple's t_ctid,
- * t_xmax, and t_cmax (the last only for HeapTupleSelfUpdated, since we
+ * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax
+ * (the last only for HeapTupleSelfUpdated, since we
* cannot obtain cmax from a combocid generated by another transaction).
* See comments for struct HeapUpdateFailureData for additional info.
*
- *
- * NOTES: because the shared-memory lock table is of finite size, but users
- * could reasonably want to lock large numbers of tuples, we do not rely on
- * the standard lock manager to store tuple-level locks over the long term.
- * Instead, a tuple is marked as locked by setting the current transaction's
- * XID as its XMAX, and setting additional infomask bits to distinguish this
- * usage from the more normal case of having deleted the tuple. When
- * multiple transactions concurrently share-lock a tuple, the first locker's
- * XID is replaced in XMAX with a MultiTransactionId representing the set of
- * XIDs currently holding share-locks.
- *
- * When it is necessary to wait for a tuple-level lock to be released, the
- * basic delay is provided by XactLockTableWait or MultiXactIdWait on the
- * contents of the tuple's XMAX. However, that mechanism will release all
- * waiters concurrently, so there would be a race condition as to which
- * waiter gets the tuple, potentially leading to indefinite starvation of
- * some waiters. The possibility of share-locking makes the problem much
- * worse --- a steady stream of share-lockers can easily block an exclusive
- * locker forever. To provide more reliable semantics about who gets a
- * tuple-level lock first, we use the standard lock manager. The protocol
- * for waiting for a tuple-level lock is really
- * LockTuple()
- * XactLockTableWait()
- * mark tuple as locked by me
- * UnlockTuple()
- * When there are multiple waiters, arbitration of who is to get the lock next
- * is provided by LockTuple(). However, at most one tuple-level lock will
- * be held or awaited per backend at any time, so we don't risk overflow
- * of the lock table. Note that incoming share-lockers are required to
- * do LockTuple as well, if there is any conflict, to ensure that they don't
- * starve out waiting exclusive-lockers. However, if there is not any active
- * conflict for a tuple, we don't incur any extra overhead.
+ * See README.tuplock for a thorough explanation of this mechanism.
*/
HTSU_Result
heap_lock_tuple(Relation relation, HeapTuple tuple,
CommandId cid, LockTupleMode mode, bool nowait,
+ bool follow_updates,
Buffer *buffer, HeapUpdateFailureData *hufd)
{
HTSU_Result result;
ItemPointer tid = &(tuple->t_self);
ItemId lp;
Page page;
- TransactionId xid;
- TransactionId xmax;
- uint16 old_infomask;
- uint16 new_infomask;
- LOCKMODE tuple_lock_type;
+ TransactionId xid,
+ xmax;
+ uint16 old_infomask,
+ new_infomask,
+ new_infomask2;
bool have_tuple_lock = false;
- tuple_lock_type = (mode == LockTupleShared) ? ShareLock : ExclusiveLock;
-
*buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
{
TransactionId xwait;
uint16 infomask;
+ uint16 infomask2;
+ bool require_sleep;
+ ItemPointerData t_ctid;
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetXmax(tuple->t_data);
+ xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
infomask = tuple->t_data->t_infomask;
+ infomask2 = tuple->t_data->t_infomask2;
+ ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
/*
- * If we wish to acquire share lock, and the tuple is already
- * share-locked by a multixact that includes any subtransaction of the
- * current top transaction, then we effectively hold the desired lock
- * already. We *must* succeed without trying to take the tuple lock,
- * else we will deadlock against anyone waiting to acquire exclusive
- * lock. We don't need to make any state changes in this case.
+ * If any subtransaction of the current top transaction already holds a
+ * lock as strong or stronger than what we're requesting, we
+ * effectively hold the desired lock already. We *must* succeed
+ * without trying to take the tuple lock, else we will deadlock against
+ * anyone wanting to acquire a stronger lock.
*/
- if (mode == LockTupleShared &&
- (infomask & HEAP_XMAX_IS_MULTI) &&
- MultiXactIdIsCurrent((MultiXactId) xwait))
+ if (infomask & HEAP_XMAX_IS_MULTI)
{
- Assert(infomask & HEAP_XMAX_SHARED_LOCK);
- /* Probably can't hold tuple lock here, but may as well check */
- if (have_tuple_lock)
- UnlockTuple(relation, tid, tuple_lock_type);
- return HeapTupleMayBeUpdated;
+ int i;
+ int nmembers;
+ MultiXactMember *members;
+
+ /*
+ * We don't need to allow old multixacts here; if that had been the
+ * case, HeapTupleSatisfiesUpdate would have returned MayBeUpdated
+ * and we wouldn't be here.
+ */
+ nmembers = GetMultiXactIdMembers(xwait, &members, false);
+
+ for (i = 0; i < nmembers; i++)
+ {
+ if (TransactionIdIsCurrentTransactionId(members[i].xid))
+ {
+ LockTupleMode membermode;
+
+ membermode = TUPLOCK_from_mxstatus(members[i].status);
+
+ if (membermode >= mode)
+ {
+ if (have_tuple_lock)
+ UnlockTupleTuplock(relation, tid, mode);
+
+ pfree(members);
+ return HeapTupleMayBeUpdated;
+ }
+ }
+ }
+
+ pfree(members);
}
/*
{
if (nowait)
{
- if (!ConditionalLockTuple(relation, tid, tuple_lock_type))
+ if (!ConditionalLockTupleTuplock(relation, tid, mode))
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
- errmsg("could not obtain lock on row in relation \"%s\"",
- RelationGetRelationName(relation))));
+ errmsg("could not obtain lock on row in relation \"%s\"",
+ RelationGetRelationName(relation))));
}
else
- LockTuple(relation, tid, tuple_lock_type);
+ LockTupleTuplock(relation, tid, mode);
have_tuple_lock = true;
}
- if (mode == LockTupleShared && (infomask & HEAP_XMAX_SHARED_LOCK))
+ /*
+ * Initially assume that we will have to wait for the locking
+ * transaction(s) to finish. We check various cases below in which
+ * this can be turned off.
+ */
+ require_sleep = true;
+ if (mode == LockTupleKeyShare)
{
/*
- * Acquiring sharelock when there's at least one sharelocker
- * already. We need not wait for him/them to complete.
- */
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
-
- /*
- * Make sure it's still a shared lock, else start over. (It's OK
- * if the ownership of the shared lock has changed, though.)
+ * If we're requesting KeyShare, and there's no update present, we
+ * don't need to wait. Even if there is an update, we can still
+ * continue if the key hasn't been modified.
+ *
+ * However, if there are updates, we need to walk the update chain
+ * to mark future versions of the row as locked, too. That way, if
+ * somebody deletes that future version, we're protected against
+ * the key going away. This locking of future versions could block
+ * momentarily, if a concurrent transaction is deleting a key; or
+ * it could return a value to the effect that the transaction
+ * deleting the key has already committed. So we do this before
+ * re-locking the buffer; otherwise this would be prone to
+ * deadlocks.
+ *
+ * Note that the TID we're locking was grabbed before we unlocked
+ * the buffer. For it to change while we're not looking, the other
+ * properties we're testing for below after re-locking the buffer
+ * would also change, in which case we would restart this loop
+ * above.
*/
- if (!(tuple->t_data->t_infomask & HEAP_XMAX_SHARED_LOCK))
- goto l3;
- }
- else if (infomask & HEAP_XMAX_IS_MULTI)
- {
- /* wait for multixact to end */
- if (nowait)
+ if (!(infomask2 & HEAP_KEYS_UPDATED))
{
- if (!ConditionalMultiXactIdWait((MultiXactId) xwait))
- ereport(ERROR,
- (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
- errmsg("could not obtain lock on row in relation \"%s\"",
- RelationGetRelationName(relation))));
- }
- else
- MultiXactIdWait((MultiXactId) xwait);
+ bool updated;
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
- /*
- * If xwait had just locked the tuple then some other xact could
- * update this tuple before we get to this point. Check for xmax
- * change, and start over if so.
- */
- if (!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(tuple->t_data),
- xwait))
- goto l3;
+ /*
+ * If there are updates, follow the update chain; bail out
+ * if that cannot be done.
+ */
+ if (follow_updates && updated)
+ {
+ HTSU_Result res;
+
+ res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
+ GetCurrentTransactionId(),
+ mode);
+ if (res != HeapTupleMayBeUpdated)
+ {
+ result = res;
+ /* recovery code expects to have buffer lock held */
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ goto failed;
+ }
+ }
- /*
- * You might think the multixact is necessarily done here, but not
- * so: it could have surviving members, namely our own xact or
- * other subxacts of this backend. It is legal for us to lock the
- * tuple in either case, however. We don't bother changing the
- * on-disk hint bits since we are about to overwrite the xmax
- * altogether.
- */
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /*
+ * Make sure it's still an appropriate lock, else start over.
+ * Also, if it wasn't updated before we released the lock, but
+ * is updated now, we start over too; the reason is that we now
+ * need to follow the update chain to lock the new versions.
+ */
+ if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
+ ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
+ !updated))
+ goto l3;
+
+ /* Things look okay, so we can skip sleeping */
+ require_sleep = false;
+
+ /*
+ * Note we allow Xmax to change here; other updaters/lockers
+ * could have modified it before we grabbed the buffer lock.
+ * However, this is not a problem, because with the recheck we
+ * just did we ensure that they still don't conflict with the
+ * lock we want.
+ */
+ }
}
- else
+ else if (mode == LockTupleShare)
{
- /* wait for regular transaction to end */
- if (nowait)
+ /*
+ * If we're requesting Share, we can similarly avoid sleeping if
+ * there's no update and no exclusive lock present.
+ */
+ if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
+ !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
{
- if (!ConditionalXactLockTableWait(xwait))
- ereport(ERROR,
- (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
- errmsg("could not obtain lock on row in relation \"%s\"",
- RelationGetRelationName(relation))));
- }
- else
- XactLockTableWait(xwait);
-
- LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ /*
+ * Make sure it's still an appropriate lock, else start over.
+ * See above about allowing xmax to change.
+ */
+ if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
+ HEAP_XMAX_IS_EXCL_LOCKED(tuple->t_data->t_infomask))
+ goto l3;
+ require_sleep = false;
+ }
+ }
+ else if (mode == LockTupleNoKeyExclusive)
+ {
/*
- * xwait is done, but if xwait had just locked the tuple then some
- * other xact could update this tuple before we get to this point.
- * Check for xmax change, and start over if so.
+ * If we're requesting NoKeyExclusive, we might also be able to
+ * avoid sleeping; just ensure that there's no other lock type than
+ * KeyShare. Note that this is a bit more involved than just
+ * checking hint bits -- we need to expand the multixact to figure
+ * out lock modes for each one (unless there was only one such
+ * locker).
*/
- if ((tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
- !TransactionIdEquals(HeapTupleHeaderGetXmax(tuple->t_data),
- xwait))
- goto l3;
+ if (infomask & HEAP_XMAX_IS_MULTI)
+ {
+ int nmembers;
+ MultiXactMember *members;
- /* Otherwise check if it committed or aborted */
- UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
+ /*
+ * We don't need to allow old multixacts here; if that had been
+ * the case, HeapTupleSatisfiesUpdate would have returned
+ * MayBeUpdated and we wouldn't be here.
+ */
+ nmembers = GetMultiXactIdMembers(xwait, &members, false);
+
+ if (nmembers <= 0)
+ {
+ /*
+ * No need to keep the previous xmax here. This is unlikely
+ * to happen.
+ */
+ require_sleep = false;
+ }
+ else
+ {
+ int i;
+ bool allowed = true;
+
+ for (i = 0; i < nmembers; i++)
+ {
+ if (members[i].status != MultiXactStatusForKeyShare)
+ {
+ allowed = false;
+ break;
+ }
+ }
+ if (allowed)
+ {
+ /*
+ * if the xmax changed under us in the meantime, start
+ * over.
+ */
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ if (!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ xwait))
+ {
+ pfree(members);
+ goto l3;
+ }
+ /* otherwise, we're good */
+ require_sleep = false;
+ }
+
+ pfree(members);
+ }
+ }
+ else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
+ {
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /* if the xmax changed in the meantime, start over */
+ if ((tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ xwait))
+ goto l3;
+ /* otherwise, we're good */
+ require_sleep = false;
+ }
}
/*
- * We may lock if previous xmax aborted, or if it committed but only
- * locked the tuple without updating it. The case where we didn't
- * wait because we are joining an existing shared lock is correctly
- * handled, too.
+ * By here, we either have already acquired the buffer exclusive lock,
+ * or we must wait for the locking transaction or multixact; so below
+ * we ensure that we grab buffer lock after the sleep.
*/
- if (tuple->t_data->t_infomask & (HEAP_XMAX_INVALID |
- HEAP_IS_LOCKED))
- result = HeapTupleMayBeUpdated;
- else
- result = HeapTupleUpdated;
- }
- if (result != HeapTupleMayBeUpdated)
- {
- Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated);
- Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
- hufd->ctid = tuple->t_data->t_ctid;
- hufd->xmax = HeapTupleHeaderGetXmax(tuple->t_data);
- if (result == HeapTupleSelfUpdated)
- hufd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
- else
- hufd->cmax = 0; /* for lack of an InvalidCommandId value */
- LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
- if (have_tuple_lock)
- UnlockTuple(relation, tid, tuple_lock_type);
- return result;
- }
+ if (require_sleep)
+ {
+ if (infomask & HEAP_XMAX_IS_MULTI)
+ {
+ MultiXactStatus status = get_mxact_status_for_lock(mode, false);
- /*
- * We might already hold the desired lock (or stronger), possibly under a
- * different subtransaction of the current top transaction. If so, there
- * is no need to change state or issue a WAL record. We already handled
- * the case where this is true for xmax being a MultiXactId, so now check
- * for cases where it is a plain TransactionId.
- *
- * Note in particular that this covers the case where we already hold
- * exclusive lock on the tuple and the caller only wants shared lock. It
- * would certainly not do to give up the exclusive lock.
- */
- xmax = HeapTupleHeaderGetXmax(tuple->t_data);
- old_infomask = tuple->t_data->t_infomask;
-
- if (!(old_infomask & (HEAP_XMAX_INVALID |
- HEAP_XMAX_COMMITTED |
- HEAP_XMAX_IS_MULTI)) &&
- (mode == LockTupleShared ?
- (old_infomask & HEAP_IS_LOCKED) :
- (old_infomask & HEAP_XMAX_EXCL_LOCK)) &&
- TransactionIdIsCurrentTransactionId(xmax))
- {
- LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
- /* Probably can't hold tuple lock here, but may as well check */
- if (have_tuple_lock)
- UnlockTuple(relation, tid, tuple_lock_type);
- return HeapTupleMayBeUpdated;
- }
+ /* We only ever lock tuples, never update them */
+ if (status >= MultiXactStatusNoKeyUpdate)
+ elog(ERROR, "invalid lock mode in heap_lock_tuple");
- /*
- * Compute the new xmax and infomask to store into the tuple. Note we do
- * not modify the tuple just yet, because that would leave it in the wrong
- * state if multixact.c elogs.
- */
- xid = GetCurrentTransactionId();
-
- new_infomask = old_infomask & ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
+ /* wait for multixact to end */
+ if (nowait)
+ {
+ if (!ConditionalMultiXactIdWait((MultiXactId) xwait,
+ status, NULL, infomask))
+ ereport(ERROR,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("could not obtain lock on row in relation \"%s\"",
+ RelationGetRelationName(relation))));
+ }
+ else
+ MultiXactIdWait((MultiXactId) xwait, status, NULL, infomask);
- if (mode == LockTupleShared)
- {
- /*
- * If this is the first acquisition of a shared lock in the current
- * transaction, set my per-backend OldestMemberMXactId setting. We can
- * be certain that the transaction will never become a member of any
- * older MultiXactIds than that. (We have to do this even if we end
- * up just using our own TransactionId below, since some other backend
- * could incorporate our XID into a MultiXact immediately afterwards.)
- */
- MultiXactIdSetOldestMember();
+ /* if there are updates, follow the update chain */
+ if (follow_updates &&
+ !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
+ {
+ HTSU_Result res;
+
+ res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
+ GetCurrentTransactionId(),
+ mode);
+ if (res != HeapTupleMayBeUpdated)
+ {
+ result = res;
+ /* recovery code expects to have buffer lock held */
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ goto failed;
+ }
+ }
- new_infomask |= HEAP_XMAX_SHARED_LOCK;
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
- /*
- * Check to see if we need a MultiXactId because there are multiple
- * lockers.
- *
- * HeapTupleSatisfiesUpdate will have set the HEAP_XMAX_INVALID bit if
- * the xmax was a MultiXactId but it was not running anymore. There is
- * a race condition, which is that the MultiXactId may have finished
- * since then, but that uncommon case is handled within
- * MultiXactIdExpand.
- *
- * There is a similar race condition possible when the old xmax was a
- * regular TransactionId. We test TransactionIdIsInProgress again
- * just to narrow the window, but it's still possible to end up
- * creating an unnecessary MultiXactId. Fortunately this is harmless.
- */
- if (!(old_infomask & (HEAP_XMAX_INVALID | HEAP_XMAX_COMMITTED)))
- {
- if (old_infomask & HEAP_XMAX_IS_MULTI)
- {
/*
- * If the XMAX is already a MultiXactId, then we need to
- * expand it to include our own TransactionId.
+ * If xwait had just locked the tuple then some other xact
+ * could update this tuple before we get to this point. Check
+ * for xmax change, and start over if so.
*/
- xid = MultiXactIdExpand((MultiXactId) xmax, xid);
- new_infomask |= HEAP_XMAX_IS_MULTI;
- }
- else if (TransactionIdIsInProgress(xmax))
- {
+ if (!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ xwait))
+ goto l3;
+
/*
- * If the XMAX is a valid TransactionId, then we need to
- * create a new MultiXactId that includes both the old locker
- * and our own TransactionId.
+ * Of course, the multixact might not be done here: if we're
+ * requesting a light lock mode, other transactions with light
+ * locks could still be alive, as well as locks owned by our
+ * own xact or other subxacts of this backend. We need to
+ * preserve the surviving MultiXact members. Note that it
+ * isn't absolutely necessary in the latter case, but doing so
+ * is simpler.
*/
- xid = MultiXactIdCreate(xmax, xid);
- new_infomask |= HEAP_XMAX_IS_MULTI;
}
else
{
+ /* wait for regular transaction to end */
+ if (nowait)
+ {
+ if (!ConditionalXactLockTableWait(xwait))
+ ereport(ERROR,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("could not obtain lock on row in relation \"%s\"",
+ RelationGetRelationName(relation))));
+ }
+ else
+ XactLockTableWait(xwait);
+
+ /* if there are updates, follow the update chain */
+ if (follow_updates &&
+ !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
+ {
+ HTSU_Result res;
+
+ res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
+ GetCurrentTransactionId(),
+ mode);
+ if (res != HeapTupleMayBeUpdated)
+ {
+ result = res;
+ /* recovery code expects to have buffer lock held */
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ goto failed;
+ }
+ }
+
+ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+
/*
- * Can get here iff HeapTupleSatisfiesUpdate saw the old xmax
- * as running, but it finished before
- * TransactionIdIsInProgress() got to run. Treat it like
- * there's no locker in the tuple.
+ * xwait is done, but if xwait had just locked the tuple then
+ * some other xact could update this tuple before we get to
+ * this point. Check for xmax change, and start over if so.
*/
+ if ((tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ xwait))
+ goto l3;
+
+ /*
+ * Otherwise check if it committed or aborted. Note we cannot
+ * be here if the tuple was only locked by somebody who didn't
+ * conflict with us; that should have been handled above. So
+ * that transaction must necessarily be gone by now.
+ */
+ UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
}
}
+
+ /* By here, we're certain that we hold buffer exclusive lock again */
+
+ /*
+ * We may lock if previous xmax aborted, or if it committed but only
+ * locked the tuple without updating it; or if we didn't have to wait
+ * at all for whatever reason.
+ */
+ if (!require_sleep ||
+ (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
+ HeapTupleHeaderIsOnlyLocked(tuple->t_data))
+ result = HeapTupleMayBeUpdated;
else
- {
- /*
- * There was no previous locker, so just insert our own
- * TransactionId.
- */
- }
+ result = HeapTupleUpdated;
}
- else
+
+failed:
+ if (result != HeapTupleMayBeUpdated)
+ {
+ Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated);
+ Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
+ hufd->ctid = tuple->t_data->t_ctid;
+ hufd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
+ if (result == HeapTupleSelfUpdated)
+ hufd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
+ else
+ hufd->cmax = 0; /* for lack of an InvalidCommandId value */
+ LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+ if (have_tuple_lock)
+ UnlockTupleTuplock(relation, tid, mode);
+ return result;
+ }
+
+ xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
+ old_infomask = tuple->t_data->t_infomask;
+
+ /*
+ * We might already hold the desired lock (or stronger), possibly under a
+ * different subtransaction of the current top transaction. If so, there
+ * is no need to change state or issue a WAL record. We already handled
+ * the case where this is true for xmax being a MultiXactId, so now check
+ * for cases where it is a plain TransactionId.
+ *
+ * Note in particular that this covers the case where we already hold
+ * exclusive lock on the tuple and the caller only wants key share or share
+ * lock. It would certainly not do to give up the exclusive lock.
+ */
+ if (!(old_infomask & (HEAP_XMAX_INVALID |
+ HEAP_XMAX_COMMITTED |
+ HEAP_XMAX_IS_MULTI)) &&
+ (mode == LockTupleKeyShare ?
+ (HEAP_XMAX_IS_KEYSHR_LOCKED(old_infomask) ||
+ HEAP_XMAX_IS_SHR_LOCKED(old_infomask) ||
+ HEAP_XMAX_IS_EXCL_LOCKED(old_infomask)) :
+ mode == LockTupleShare ?
+ (HEAP_XMAX_IS_SHR_LOCKED(old_infomask) ||
+ HEAP_XMAX_IS_EXCL_LOCKED(old_infomask)) :
+ (HEAP_XMAX_IS_EXCL_LOCKED(old_infomask))) &&
+ TransactionIdIsCurrentTransactionId(xmax))
{
- /* We want an exclusive lock on the tuple */
- new_infomask |= HEAP_XMAX_EXCL_LOCK;
+ LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+ /* Probably can't hold tuple lock here, but may as well check */
+ if (have_tuple_lock)
+ UnlockTupleTuplock(relation, tid, mode);
+ return HeapTupleMayBeUpdated;
}
+ /*
+ * If this is the first possibly-multixact-able operation in the
+ * current transaction, set my per-backend OldestMemberMXactId setting.
+ * We can be certain that the transaction will never become a member of
+ * any older MultiXactIds than that. (We have to do this even if we
+ * end up just using our own TransactionId below, since some other
+ * backend could incorporate our XID into a MultiXact immediately
+ * afterwards.)
+ */
+ MultiXactIdSetOldestMember();
+
+ /*
+ * Compute the new xmax and infomask to store into the tuple. Note we do
+ * not modify the tuple just yet, because that would leave it in the wrong
+ * state if multixact.c elogs.
+ */
+ compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
+ GetCurrentTransactionId(), mode, false,
+ &xid, &new_infomask, &new_infomask2);
+
START_CRIT_SECTION();
/*
* Store transaction information of xact locking the tuple.
*
* Note: Cmax is meaningless in this context, so don't set it; this avoids
- * possibly generating a useless combo CID.
+ * possibly generating a useless combo CID. Moreover, if we're locking a
+ * previously updated tuple, it's important to preserve the Cmax.
+ *
+ * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
+ * we would break the HOT chain.
*/
- tuple->t_data->t_infomask = new_infomask;
- HeapTupleHeaderClearHotUpdated(tuple->t_data);
+ tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
+ tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ tuple->t_data->t_infomask |= new_infomask;
+ tuple->t_data->t_infomask2 |= new_infomask2;
+ if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
+ HeapTupleHeaderClearHotUpdated(tuple->t_data);
HeapTupleHeaderSetXmax(tuple->t_data, xid);
- /* Make sure there is no forward chain link in t_ctid */
- tuple->t_data->t_ctid = *tid;
+
+ /*
+ * Make sure there is no forward chain link in t_ctid. Note that in the
+ * cases where the tuple has been updated, we must not overwrite t_ctid,
+ * because it was set by the updater. Moreover, if the tuple has been
+ * updated, we need to follow the update chain to lock the new versions
+ * of the tuple as well.
+ */
+ if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
+ tuple->t_data->t_ctid = *tid;
MarkBufferDirty(*buffer);
xlrec.target.node = relation->rd_node;
xlrec.target.tid = tuple->t_self;
xlrec.locking_xid = xid;
- xlrec.xid_is_mxact = ((new_infomask & HEAP_XMAX_IS_MULTI) != 0);
- xlrec.shared_lock = (mode == LockTupleShared);
+ xlrec.infobits_set = compute_infobits(new_infomask,
+ tuple->t_data->t_infomask2);
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapLock;
rdata[0].buffer = InvalidBuffer;
* release the lmgr tuple lock, if we had it.
*/
if (have_tuple_lock)
- UnlockTuple(relation, tid, tuple_lock_type);
+ UnlockTupleTuplock(relation, tid, mode);
+
+ return HeapTupleMayBeUpdated;
+}
+
+
+/*
+ * Given an original set of Xmax and infomask, and a transaction (identified by
+ * add_to_xmax) acquiring a new lock of some mode, compute the new Xmax and
+ * corresponding infomasks to use on the tuple.
+ *
+ * Note that this might have side effects such as creating a new MultiXactId.
+ *
+ * Most callers will have called HeapTupleSatisfiesUpdate before this function;
+ * that will have set the HEAP_XMAX_INVALID bit if the xmax was a MultiXactId
+ * but it was not running anymore. There is a race condition, which is that the
+ * MultiXactId may have finished since then, but that uncommon case is handled
+ * either here, or within MultiXactIdExpand.
+ *
+ * There is a similar race condition possible when the old xmax was a regular
+ * TransactionId. We test TransactionIdIsInProgress again just to narrow the
+ * window, but it's still possible to end up creating an unnecessary
+ * MultiXactId. Fortunately this is harmless.
+ */
+static void
+compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask,
+ uint16 old_infomask2, TransactionId add_to_xmax,
+ LockTupleMode mode, bool is_update,
+ TransactionId *result_xmax, uint16 *result_infomask,
+ uint16 *result_infomask2)
+{
+ TransactionId new_xmax;
+ uint16 new_infomask,
+ new_infomask2;
+
+l5:
+ new_infomask = 0;
+ new_infomask2 = 0;
+ if (old_infomask & HEAP_XMAX_INVALID)
+ {
+ /*
+ * No previous locker; we just insert our own TransactionId.
+ */
+ if (is_update)
+ {
+ new_xmax = add_to_xmax;
+ if (mode == LockTupleExclusive)
+ new_infomask2 |= HEAP_KEYS_UPDATED;
+ }
+ else
+ {
+ new_infomask |= HEAP_XMAX_LOCK_ONLY;
+ switch (mode)
+ {
+ case LockTupleKeyShare:
+ new_xmax = add_to_xmax;
+ new_infomask |= HEAP_XMAX_KEYSHR_LOCK;
+ break;
+ case LockTupleShare:
+ new_xmax = add_to_xmax;
+ new_infomask |= HEAP_XMAX_SHR_LOCK;
+ break;
+ case LockTupleNoKeyExclusive:
+ new_xmax = add_to_xmax;
+ new_infomask |= HEAP_XMAX_EXCL_LOCK;
+ break;
+ case LockTupleExclusive:
+ new_xmax = add_to_xmax;
+ new_infomask |= HEAP_XMAX_EXCL_LOCK;
+ new_infomask2 |= HEAP_KEYS_UPDATED;
+ break;
+ default:
+ new_xmax = InvalidTransactionId; /* silence compiler */
+ elog(ERROR, "invalid lock mode");
+ }
+ }
+ }
+ else if (old_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ MultiXactStatus new_status;
+
+ /*
+ * Currently we don't allow XMAX_COMMITTED to be set for multis,
+ * so cross-check.
+ */
+ Assert(!(old_infomask & HEAP_XMAX_COMMITTED));
+
+ /*
+ * A multixact together with LOCK_ONLY set but neither lock bit set
+ * (i.e. a pg_upgraded share locked tuple) cannot possibly be running
+ * anymore. This check is critical for databases upgraded by
+ * pg_upgrade; both MultiXactIdIsRunning and MultiXactIdExpand assume
+ * that such multis are never passed.
+ */
+ if (!(old_infomask & HEAP_LOCK_MASK) &&
+ HEAP_XMAX_IS_LOCKED_ONLY(old_infomask))
+ {
+ old_infomask &= ~HEAP_XMAX_IS_MULTI;
+ old_infomask |= HEAP_XMAX_INVALID;
+ goto l5;
+ }
+
+ /*
+ * If the XMAX is already a MultiXactId, then we need to expand it to
+ * include add_to_xmax; but if all the members were lockers and are all
+ * gone, we can do away with the IS_MULTI bit and just set add_to_xmax
+ * as the only locker/updater. If all lockers are gone and we have an
+ * updater that aborted, we can also do without a multi.
+ *
+ * The cost of doing GetMultiXactIdMembers would be paid by
+ * MultiXactIdExpand if we weren't to do this, so this check is not
+ * incurring extra work anyhow.
+ */
+ if (!MultiXactIdIsRunning(xmax))
+ {
+ if (HEAP_XMAX_IS_LOCKED_ONLY(old_infomask) ||
+ TransactionIdDidAbort(MultiXactIdGetUpdateXid(xmax,
+ old_infomask)))
+ {
+ /*
+ * Reset these bits and restart; otherwise fall through to
+ * create a new multi below.
+ */
+ old_infomask &= ~HEAP_XMAX_IS_MULTI;
+ old_infomask |= HEAP_XMAX_INVALID;
+ goto l5;
+ }
+ }
+
+ new_status = get_mxact_status_for_lock(mode, is_update);
+
+ new_xmax = MultiXactIdExpand((MultiXactId) xmax, add_to_xmax,
+ new_status);
+ GetMultiXactIdHintBits(new_xmax, &new_infomask, &new_infomask2);
+ }
+ else if (old_infomask & HEAP_XMAX_COMMITTED)
+ {
+ /*
+ * It's a committed update, so we need to preserve him as updater of
+ * the tuple.
+ */
+ MultiXactStatus status;
+ MultiXactStatus new_status;
+
+ if (old_infomask2 & HEAP_KEYS_UPDATED)
+ status = MultiXactStatusUpdate;
+ else
+ status = MultiXactStatusNoKeyUpdate;
+
+ new_status = get_mxact_status_for_lock(mode, is_update);
+ /*
+ * since it's not running, it's obviously impossible for the old
+ * updater to be identical to the current one, so we need not check
+ * for that case as we do in the block above.
+ */
+ new_xmax = MultiXactIdCreate(xmax, status, add_to_xmax, new_status);
+ GetMultiXactIdHintBits(new_xmax, &new_infomask, &new_infomask2);
+ }
+ else if (TransactionIdIsInProgress(xmax))
+ {
+ /*
+ * If the XMAX is a valid, in-progress TransactionId, then we need to
+ * create a new MultiXactId that includes both the old locker or
+ * updater and our own TransactionId.
+ */
+ MultiXactStatus status;
+ MultiXactStatus new_status;
+
+ if (HEAP_XMAX_IS_LOCKED_ONLY(old_infomask))
+ {
+ if (HEAP_XMAX_IS_KEYSHR_LOCKED(old_infomask))
+ status = MultiXactStatusForKeyShare;
+ else if (HEAP_XMAX_IS_SHR_LOCKED(old_infomask))
+ status = MultiXactStatusForShare;
+ else if (HEAP_XMAX_IS_EXCL_LOCKED(old_infomask))
+ {
+ if (old_infomask2 & HEAP_KEYS_UPDATED)
+ status = MultiXactStatusForUpdate;
+ else
+ status = MultiXactStatusForNoKeyUpdate;
+ }
+ else
+ {
+ /*
+ * LOCK_ONLY can be present alone only when a page has been
+ * upgraded by pg_upgrade. But in that case,
+ * TransactionIdIsInProgress() should have returned false. We
+ * assume it's no longer locked in this case.
+ */
+ elog(WARNING, "LOCK_ONLY found for Xid in progress %u", xmax);
+ old_infomask |= HEAP_XMAX_INVALID;
+ old_infomask &= ~HEAP_XMAX_LOCK_ONLY;
+ goto l5;
+ }
+ }
+ else
+ {
+ /* it's an update, but which kind? */
+ if (old_infomask2 & HEAP_KEYS_UPDATED)
+ status = MultiXactStatusUpdate;
+ else
+ status = MultiXactStatusNoKeyUpdate;
+ }
+
+ new_status = get_mxact_status_for_lock(mode, is_update);
+
+ /*
+ * If the existing lock mode is identical to or weaker than the new
+ * one, we can act as though there is no existing lock, so set
+ * XMAX_INVALID and restart.
+ */
+ if (xmax == add_to_xmax)
+ {
+ LockTupleMode old_mode = TUPLOCK_from_mxstatus(status);
+ bool old_isupd = ISUPDATE_from_mxstatus(status);
+
+ /*
+ * We can do this if the new LockTupleMode is higher or equal than
+ * the old one; and if there was previously an update, we need an
+ * update, but if there wasn't, then we can accept there not being
+ * one.
+ */
+ if ((mode >= old_mode) && (is_update || !old_isupd))
+ {
+ /*
+ * Note that the infomask might contain some other dirty bits.
+ * However, since the new infomask is reset to zero, we only
+ * set what's minimally necessary, and that the case that
+ * checks HEAP_XMAX_INVALID is the very first above, there is
+ * no need for extra cleanup of the infomask here.
+ */
+ old_infomask |= HEAP_XMAX_INVALID;
+ goto l5;
+ }
+ }
+ new_xmax = MultiXactIdCreate(xmax, status, add_to_xmax, new_status);
+ GetMultiXactIdHintBits(new_xmax, &new_infomask, &new_infomask2);
+ }
+ else if (!HEAP_XMAX_IS_LOCKED_ONLY(old_infomask) &&
+ TransactionIdDidCommit(xmax))
+ {
+ /*
+ * It's a committed update, so we gotta preserve him as updater of the
+ * tuple.
+ */
+ MultiXactStatus status;
+ MultiXactStatus new_status;
+
+ if (old_infomask2 & HEAP_KEYS_UPDATED)
+ status = MultiXactStatusUpdate;
+ else
+ status = MultiXactStatusNoKeyUpdate;
+
+ new_status = get_mxact_status_for_lock(mode, is_update);
+ /*
+ * since it's not running, it's obviously impossible for the old
+ * updater to be identical to the current one, so we need not check
+ * for that case as we do in the block above.
+ */
+ new_xmax = MultiXactIdCreate(xmax, status, add_to_xmax, new_status);
+ GetMultiXactIdHintBits(new_xmax, &new_infomask, &new_infomask2);
+ }
+ else
+ {
+ /*
+ * Can get here iff the locking/updating transaction was running when
+ * the infomask was extracted from the tuple, but finished before
+ * TransactionIdIsInProgress got to run. Deal with it as if there was
+ * no locker at all in the first place.
+ */
+ old_infomask |= HEAP_XMAX_INVALID;
+ goto l5;
+ }
+
+ *result_infomask = new_infomask;
+ *result_infomask2 = new_infomask2;
+ *result_xmax = new_xmax;
+}
+
+
+/*
+ * Recursive part of heap_lock_updated_tuple
+ *
+ * Fetch the tuple pointed to by tid in rel, and mark it as locked by the given
+ * xid with the given mode; if this tuple is updated, recurse to lock the new
+ * version as well.
+ */
+static HTSU_Result
+heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid,
+ LockTupleMode mode)
+{
+ ItemPointerData tupid;
+ HeapTupleData mytup;
+ Buffer buf;
+ uint16 new_infomask,
+ new_infomask2,
+ old_infomask;
+ TransactionId xmax,
+ new_xmax;
+
+ ItemPointerCopy(tid, &tupid);
+
+ for (;;)
+ {
+ new_infomask = 0;
+ new_xmax = InvalidTransactionId;
+ ItemPointerCopy(&tupid, &(mytup.t_self));
+
+ if (!heap_fetch(rel, SnapshotAny, &mytup, &buf, false, NULL))
+ elog(ERROR, "unable to fetch updated version of tuple");
+
+l4:
+ CHECK_FOR_INTERRUPTS();
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
+ old_infomask = mytup.t_data->t_infomask;
+ xmax = HeapTupleHeaderGetRawXmax(mytup.t_data);
+
+ /*
+ * If this tuple is updated and the key has been modified (or deleted),
+ * what we do depends on the status of the updating transaction: if
+ * it's live, we sleep until it finishes; if it has committed, we have
+ * to fail (i.e. return HeapTupleUpdated); if it aborted, we ignore it.
+ * For updates that didn't touch the key, we can just plough ahead.
+ */
+ if (!(old_infomask & HEAP_XMAX_INVALID) &&
+ (mytup.t_data->t_infomask2 & HEAP_KEYS_UPDATED))
+ {
+ TransactionId update_xid;
+
+ /*
+ * Note: we *must* check TransactionIdIsInProgress before
+ * TransactionIdDidAbort/Commit; see comment at top of tqual.c for
+ * an explanation.
+ */
+ update_xid = HeapTupleHeaderGetUpdateXid(mytup.t_data);
+ if (TransactionIdIsCurrentTransactionId(update_xid))
+ {
+ UnlockReleaseBuffer(buf);
+ return HeapTupleSelfUpdated;
+ }
+ else if (TransactionIdIsInProgress(update_xid))
+ {
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ /* No LockTupleTuplock here -- see heap_lock_updated_tuple */
+ XactLockTableWait(update_xid);
+ goto l4;
+ }
+ else if (TransactionIdDidAbort(update_xid))
+ ; /* okay to proceed */
+ else if (TransactionIdDidCommit(update_xid))
+ {
+ UnlockReleaseBuffer(buf);
+ return HeapTupleUpdated;
+ }
+ }
+
+ /* compute the new Xmax and infomask values for the tuple ... */
+ compute_new_xmax_infomask(xmax, old_infomask, mytup.t_data->t_infomask2,
+ xid, mode, false,
+ &new_xmax, &new_infomask, &new_infomask2);
+
+ START_CRIT_SECTION();
+
+ /* ... and set them */
+ HeapTupleHeaderSetXmax(mytup.t_data, new_xmax);
+ mytup.t_data->t_infomask &= ~HEAP_XMAX_BITS;
+ mytup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ mytup.t_data->t_infomask |= new_infomask;
+ mytup.t_data->t_infomask2 |= new_infomask2;
+
+ MarkBufferDirty(buf);
+
+ /* XLOG stuff */
+ if (RelationNeedsWAL(rel))
+ {
+ xl_heap_lock_updated xlrec;
+ XLogRecPtr recptr;
+ XLogRecData rdata[2];
+ Page page = BufferGetPage(buf);
+
+ xlrec.target.node = rel->rd_node;
+ xlrec.target.tid = mytup.t_self;
+ xlrec.xmax = new_xmax;
+ xlrec.infobits_set = compute_infobits(new_infomask, new_infomask2);
+
+ rdata[0].data = (char *) &xlrec;
+ rdata[0].len = SizeOfHeapLockUpdated;
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].next = &(rdata[1]);
+
+ rdata[1].data = NULL;
+ rdata[1].len = 0;
+ rdata[1].buffer = buf;
+ rdata[1].buffer_std = true;
+ rdata[1].next = NULL;
+
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED, rdata);
+
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+
+ END_CRIT_SECTION();
+
+ /* if we find the end of update chain, we're done. */
+ if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID ||
+ ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) ||
+ HeapTupleHeaderIsOnlyLocked(mytup.t_data))
+ {
+ UnlockReleaseBuffer(buf);
+ return HeapTupleMayBeUpdated;
+ }
+ /* tail recursion */
+ ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid);
+ UnlockReleaseBuffer(buf);
+ }
+}
+
+/*
+ * heap_lock_updated_tuple
+ * Follow update chain when locking an updated tuple, acquiring locks (row
+ * marks) on the updated versions.
+ *
+ * The initial tuple is assumed to be already locked.
+ *
+ * This function doesn't check visibility, it just inconditionally marks the
+ * tuple(s) as locked. If any tuple in the updated chain is being deleted
+ * concurrently (or updated with the key being modified), sleep until the
+ * transaction doing it is finished.
+ *
+ * Note that we don't acquire heavyweight tuple locks on the tuples we walk
+ * when we have to wait for other transactions to release them, as opposed to
+ * what heap_lock_tuple does. The reason is that having more than one
+ * transaction walking the chain is probably uncommon enough that risk of
+ * starvation is not likely: one of the preconditions for being here is that
+ * the snapshot in use predates the update that created this tuple (because we
+ * started at an earlier version of the tuple), but at the same time such a
+ * transaction cannot be using repeatable read or serializable isolation
+ * levels, because that would lead to a serializability failure.
+ */
+static HTSU_Result
+heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid,
+ TransactionId xid, LockTupleMode mode)
+{
+ if (!ItemPointerEquals(&tuple->t_self, ctid))
+ {
+ /*
+ * If this is the first possibly-multixact-able operation in the
+ * current transaction, set my per-backend OldestMemberMXactId setting.
+ * We can be certain that the transaction will never become a member of
+ * any older MultiXactIds than that. (We have to do this even if we
+ * end up just using our own TransactionId below, since some other
+ * backend could incorporate our XID into a MultiXact immediately
+ * afterwards.)
+ */
+ MultiXactIdSetOldestMember();
+
+ return heap_lock_updated_tuple_rec(rel, ctid, xid, mode);
+ }
+
+ /* nothing to lock */
return HeapTupleMayBeUpdated;
}
* because this function is applied during WAL recovery, when we don't have
* access to any such state, and can't depend on the hint bits to be set.)
*
+ * Similarly, cutoff_multi must be less than or equal to the smallest
+ * MultiXactId used by any transaction currently open.
+ *
* If the tuple is in a shared buffer, caller must hold an exclusive lock on
* that buffer.
*
* infomask bits.
*/
bool
-heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid)
+heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
+ MultiXactId cutoff_multi)
{
bool changed = false;
TransactionId xid;
changed = true;
}
- if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
+ /*
+ * Note that this code handles IS_MULTI Xmax values, too, but only to mark
+ * the tuple frozen if the updating Xid in the mxact is below the freeze
+ * cutoff; it doesn't remove dead members of a very old multixact.
+ */
+ xid = HeapTupleHeaderGetRawXmax(tuple);
+ if (TransactionIdIsNormal(xid) &&
+ (((!(tuple->t_infomask & HEAP_XMAX_IS_MULTI) &&
+ TransactionIdPrecedes(xid, cutoff_xid))) ||
+ MultiXactIdPrecedes(xid, cutoff_multi)))
{
- xid = HeapTupleHeaderGetXmax(tuple);
- if (TransactionIdIsNormal(xid) &&
- TransactionIdPrecedes(xid, cutoff_xid))
- {
- HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
+ HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
- /*
- * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED
- * + LOCKED. Normalize to INVALID just to be sure no one gets
- * confused.
- */
- tuple->t_infomask &= ~HEAP_XMAX_COMMITTED;
- tuple->t_infomask |= HEAP_XMAX_INVALID;
- HeapTupleHeaderClearHotUpdated(tuple);
- changed = true;
- }
- }
- else
- {
- /*----------
- * XXX perhaps someday we should zero out very old MultiXactIds here?
- *
- * The only way a stale MultiXactId could pose a problem is if a
- * tuple, having once been multiply-share-locked, is not touched by
- * any vacuum or attempted lock or deletion for just over 4G MultiXact
- * creations, and then in the probably-narrow window where its xmax
- * is again a live MultiXactId, someone tries to lock or delete it.
- * Even then, another share-lock attempt would work fine. An
- * exclusive-lock or delete attempt would face unexpected delay, or
- * in the very worst case get a deadlock error. This seems an
- * extremely low-probability scenario with minimal downside even if
- * it does happen, so for now we don't do the extra bookkeeping that
- * would be needed to clean out MultiXactIds.
- *----------
+ /*
+ * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED
+ * + LOCKED. Normalize to INVALID just to be sure no one gets
+ * confused. Also get rid of the HEAP_KEYS_UPDATED bit.
*/
+ tuple->t_infomask &= ~HEAP_XMAX_BITS;
+ tuple->t_infomask |= HEAP_XMAX_INVALID;
+ HeapTupleHeaderClearHotUpdated(tuple);
+ tuple->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ changed = true;
}
/*
return changed;
}
+/*
+ * For a given MultiXactId, return the hint bits that should be set in the
+ * tuple's infomask.
+ *
+ * Normally this should be called for a multixact that was just created, and
+ * so is on our local cache, so the GetMembers call is fast.
+ */
+static void
+GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask,
+ uint16 *new_infomask2)
+{
+ int nmembers;
+ MultiXactMember *members;
+ int i;
+ uint16 bits = HEAP_XMAX_IS_MULTI;
+ uint16 bits2 = 0;
+ bool has_update = false;
+
+ /*
+ * We only use this in multis we just created, so they cannot be values
+ * pre-pg_upgrade.
+ */
+ nmembers = GetMultiXactIdMembers(multi, &members, false);
+
+ for (i = 0; i < nmembers; i++)
+ {
+ switch (members[i].status)
+ {
+ case MultiXactStatusForKeyShare:
+ bits |= HEAP_XMAX_KEYSHR_LOCK;
+ break;
+ case MultiXactStatusForShare:
+ bits |= HEAP_XMAX_SHR_LOCK;
+ break;
+ case MultiXactStatusForNoKeyUpdate:
+ bits |= HEAP_XMAX_EXCL_LOCK;
+ break;
+ case MultiXactStatusForUpdate:
+ bits |= HEAP_XMAX_EXCL_LOCK;
+ bits2 |= HEAP_KEYS_UPDATED;
+ break;
+ case MultiXactStatusNoKeyUpdate:
+ bits |= HEAP_XMAX_EXCL_LOCK;
+ has_update = true;
+ break;
+ case MultiXactStatusUpdate:
+ bits |= HEAP_XMAX_EXCL_LOCK;
+ bits2 |= HEAP_KEYS_UPDATED;
+ has_update = true;
+ break;
+ }
+ }
+ if (!has_update)
+ bits |= HEAP_XMAX_LOCK_ONLY;
+
+ if (nmembers > 0)
+ pfree(members);
+
+ *new_infomask = bits;
+ *new_infomask2 = bits2;
+}
+
+/*
+ * MultiXactIdGetUpdateXid
+ *
+ * Given a multixact Xmax and corresponding infomask, which does not have the
+ * HEAP_XMAX_LOCK_ONLY bit set, obtain and return the Xid of the updating
+ * transaction.
+ */
+static TransactionId
+MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask)
+{
+ TransactionId update_xact = InvalidTransactionId;
+ MultiXactMember *members;
+ int nmembers;
+
+ Assert(!(t_infomask & HEAP_XMAX_LOCK_ONLY));
+ Assert(t_infomask & HEAP_XMAX_IS_MULTI);
+
+ /*
+ * Since we know the LOCK_ONLY bit is not set, this cannot be a
+ * multi from pre-pg_upgrade.
+ */
+ nmembers = GetMultiXactIdMembers(xmax, &members, false);
+
+ if (nmembers > 0)
+ {
+ int i;
+
+ for (i = 0; i < nmembers; i++)
+ {
+ /* Ignore lockers */
+ if (members[i].status == MultiXactStatusForKeyShare ||
+ members[i].status == MultiXactStatusForShare ||
+ members[i].status == MultiXactStatusForNoKeyUpdate ||
+ members[i].status == MultiXactStatusForUpdate)
+ continue;
+
+ /* ignore aborted transactions */
+ if (TransactionIdDidAbort(members[i].xid))
+ continue;
+ /* there should be at most one non-aborted updater */
+ Assert(update_xact == InvalidTransactionId);
+ Assert(members[i].status == MultiXactStatusNoKeyUpdate ||
+ members[i].status == MultiXactStatusUpdate);
+ update_xact = members[i].xid;
+#ifndef USE_ASSERT_CHECKING
+ /*
+ * in an assert-enabled build, walk the whole array to ensure
+ * there's no other updater.
+ */
+ break;
+#endif
+ }
+
+ pfree(members);
+ }
+
+ return update_xact;
+}
+
+/*
+ * HeapTupleGetUpdateXid
+ * As above, but use a HeapTupleHeader
+ *
+ * See also HeapTupleHeaderGetUpdateXid, which can be used without previously
+ * checking the hint bits.
+ */
+TransactionId
+HeapTupleGetUpdateXid(HeapTupleHeader tuple)
+{
+ return MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(tuple),
+ tuple->t_infomask);
+}
+
+/*
+ * Do_MultiXactIdWait
+ * Actual implementation for the two functions below.
+ *
+ * We do this by sleeping on each member using XactLockTableWait. Any
+ * members that belong to the current backend are *not* waited for, however;
+ * this would not merely be useless but would lead to Assert failure inside
+ * XactLockTableWait. By the time this returns, it is certain that all
+ * transactions *of other backends* that were members of the MultiXactId
+ * that conflict with the requested status are dead (and no new ones can have
+ * been added, since it is not legal to add members to an existing
+ * MultiXactId).
+ *
+ * But by the time we finish sleeping, someone else may have changed the Xmax
+ * of the containing tuple, so the caller needs to iterate on us somehow.
+ *
+ * Note that in case we return false, the number of remaining members is
+ * not to be trusted.
+ */
+static bool
+Do_MultiXactIdWait(MultiXactId multi, MultiXactStatus status,
+ int *remaining, uint16 infomask, bool nowait)
+{
+ bool allow_old;
+ bool result = true;
+ MultiXactMember *members;
+ int nmembers;
+ int remain = 0;
+
+ allow_old = !(infomask & HEAP_LOCK_MASK) && HEAP_XMAX_IS_LOCKED_ONLY(infomask);
+ nmembers = GetMultiXactIdMembers(multi, &members, allow_old);
+
+ if (nmembers >= 0)
+ {
+ int i;
+
+ for (i = 0; i < nmembers; i++)
+ {
+ TransactionId memxid = members[i].xid;
+ MultiXactStatus memstatus = members[i].status;
+
+ if (TransactionIdIsCurrentTransactionId(memxid))
+ {
+ remain++;
+ continue;
+ }
+
+ if (!DoLockModesConflict(LOCKMODE_from_mxstatus(memstatus),
+ LOCKMODE_from_mxstatus(status)))
+ {
+ if (remaining && TransactionIdIsInProgress(memxid))
+ remain++;
+ continue;
+ }
+
+ /*
+ * This member conflicts with our multi, so we have to sleep (or
+ * return failure, if asked to avoid waiting.)
+ */
+ if (nowait)
+ {
+ result = ConditionalXactLockTableWait(memxid);
+ if (!result)
+ break;
+ }
+ else
+ XactLockTableWait(memxid);
+ }
+
+ pfree(members);
+ }
+
+ if (remaining)
+ *remaining = remain;
+
+ return result;
+}
+
+/*
+ * MultiXactIdWait
+ * Sleep on a MultiXactId.
+ *
+ * By the time we finish sleeping, someone else may have changed the Xmax
+ * of the containing tuple, so the caller needs to iterate on us somehow.
+ *
+ * We return (in *remaining, if not NULL) the number of members that are still
+ * running, including any (non-aborted) subtransactions of our own transaction.
+ *
+ */
+static void
+MultiXactIdWait(MultiXactId multi, MultiXactStatus status,
+ int *remaining, uint16 infomask)
+{
+ Do_MultiXactIdWait(multi, status, remaining, infomask, false);
+}
+
+/*
+ * ConditionalMultiXactIdWait
+ * As above, but only lock if we can get the lock without blocking.
+ *
+ * By the time we finish sleeping, someone else may have changed the Xmax
+ * of the containing tuple, so the caller needs to iterate on us somehow.
+ *
+ * If the multixact is now all gone, return true. Returns false if some
+ * transactions might still be running.
+ *
+ * We return (in *remaining, if not NULL) the number of members that are still
+ * running, including any (non-aborted) subtransactions of our own transaction.
+ */
+static bool
+ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status,
+ int *remaining, uint16 infomask)
+{
+ return Do_MultiXactIdWait(multi, status, remaining, infomask, true);
+}
+
/*
* heap_tuple_needs_freeze
*
* Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
- * are older than the specified cutoff XID. If so, return TRUE.
+ * are older than the specified cutoff XID or MultiXactId. If so, return TRUE.
*
* It doesn't matter whether the tuple is alive or dead, we are checking
* to see if a tuple needs to be removed or frozen to avoid wraparound.
*/
bool
heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
- Buffer buf)
+ MultiXactId cutoff_multi, Buffer buf)
{
TransactionId xid;
TransactionIdPrecedes(xid, cutoff_xid))
return true;
- if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
+ if (!(tuple->t_infomask & HEAP_XMAX_INVALID))
{
- xid = HeapTupleHeaderGetXmax(tuple);
- if (TransactionIdIsNormal(xid) &&
- TransactionIdPrecedes(xid, cutoff_xid))
- return true;
+ if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
+ {
+ xid = HeapTupleHeaderGetRawXmax(tuple);
+ if (TransactionIdIsNormal(xid) &&
+ TransactionIdPrecedes(xid, cutoff_xid))
+ return true;
+ }
+ else
+ {
+ MultiXactId multi;
+
+ multi = HeapTupleHeaderGetRawXmax(tuple);
+ if (MultiXactIdPrecedes(multi, cutoff_multi))
+ return true;
+ }
}
if (tuple->t_infomask & HEAP_MOVED)
TransactionId *latestRemovedXid)
{
TransactionId xmin = HeapTupleHeaderGetXmin(tuple);
- TransactionId xmax = HeapTupleHeaderGetXmax(tuple);
+ TransactionId xmax = HeapTupleHeaderGetUpdateXid(tuple);
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
if (tuple->t_infomask & HEAP_MOVED)
*/
XLogRecPtr
log_heap_freeze(Relation reln, Buffer buffer,
- TransactionId cutoff_xid,
+ TransactionId cutoff_xid, MultiXactId cutoff_multi,
OffsetNumber *offsets, int offcnt)
{
xl_heap_freeze xlrec;
xlrec.node = reln->rd_node;
xlrec.block = BufferGetBlockNumber(buffer);
xlrec.cutoff_xid = cutoff_xid;
+ xlrec.cutoff_multi = cutoff_multi;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapFreeze;
* have modified the buffer(s) and marked them dirty.
*/
static XLogRecPtr
-log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from,
- Buffer newbuf, HeapTuple newtup,
+log_heap_update(Relation reln, Buffer oldbuf,
+ Buffer newbuf, HeapTuple oldtup, HeapTuple newtup,
bool all_visible_cleared, bool new_all_visible_cleared)
{
xl_heap_update xlrec;
info = XLOG_HEAP_UPDATE;
xlrec.target.node = reln->rd_node;
- xlrec.target.tid = from;
+ xlrec.target.tid = oldtup->t_self;
+ xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
+ xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2);
+ xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
xlrec.all_visible_cleared = all_visible_cleared;
xlrec.newtid = newtup->t_self;
xlrec.new_all_visible_cleared = new_all_visible_cleared;
{
xl_heap_freeze *xlrec = (xl_heap_freeze *) XLogRecGetData(record);
TransactionId cutoff_xid = xlrec->cutoff_xid;
+ MultiXactId cutoff_multi = xlrec->cutoff_multi;
Buffer buffer;
Page page;
ItemId lp = PageGetItemId(page, *offsets);
HeapTupleHeader tuple = (HeapTupleHeader) PageGetItem(page, lp);
- (void) heap_freeze_tuple(tuple, cutoff_xid);
+ (void) heap_freeze_tuple(tuple, cutoff_xid, cutoff_multi);
offsets++;
}
}
UnlockReleaseBuffer(buffer);
}
+/*
+ * Given an "infobits" field from an XLog record, set the correct bits in the
+ * given infomask and infomask2 for the tuple touched by the record.
+ *
+ * (This is the reverse of compute_infobits).
+ */
+static void
+fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
+{
+ *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
+ HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
+ *infomask2 &= ~HEAP_KEYS_UPDATED;
+
+ if (infobits & XLHL_XMAX_IS_MULTI)
+ *infomask |= HEAP_XMAX_IS_MULTI;
+ if (infobits & XLHL_XMAX_LOCK_ONLY)
+ *infomask |= HEAP_XMAX_LOCK_ONLY;
+ if (infobits & XLHL_XMAX_EXCL_LOCK)
+ *infomask |= HEAP_XMAX_EXCL_LOCK;
+ /* note HEAP_XMAX_SHR_LOCK isn't considered here */
+ if (infobits & XLHL_XMAX_KEYSHR_LOCK)
+ *infomask |= HEAP_XMAX_KEYSHR_LOCK;
+
+ if (infobits & XLHL_KEYS_UPDATED)
+ *infomask2 |= HEAP_KEYS_UPDATED;
+}
+
static void
heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
{
htup = (HeapTupleHeader) PageGetItem(page, lp);
- htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
+ htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
HeapTupleHeaderClearHotUpdated(htup);
- HeapTupleHeaderSetXmax(htup, record->xl_xid);
+ fix_infomask_from_infobits(xlrec->infobits_set,
+ &htup->t_infomask, &htup->t_infomask2);
+ HeapTupleHeaderSetXmax(htup, xlrec->xmax);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Mark the page as a candidate for pruning */
htup = (HeapTupleHeader) PageGetItem(page, lp);
- htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
+ htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
if (hot_update)
HeapTupleHeaderSetHotUpdated(htup);
else
HeapTupleHeaderClearHotUpdated(htup);
- HeapTupleHeaderSetXmax(htup, record->xl_xid);
+ fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
+ &htup->t_infomask2);
+ HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Set forward chain link in t_ctid */
htup->t_ctid = xlrec->newtid;
HeapTupleHeaderSetXmin(htup, record->xl_xid);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
+ HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
/* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = xlrec->newtid;
htup = (HeapTupleHeader) PageGetItem(page, lp);
- htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
- HEAP_XMAX_INVALID |
- HEAP_XMAX_IS_MULTI |
- HEAP_IS_LOCKED |
- HEAP_MOVED);
- if (xlrec->xid_is_mxact)
- htup->t_infomask |= HEAP_XMAX_IS_MULTI;
- if (xlrec->shared_lock)
- htup->t_infomask |= HEAP_XMAX_SHARED_LOCK;
- else
- htup->t_infomask |= HEAP_XMAX_EXCL_LOCK;
+ fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
+ &htup->t_infomask2);
HeapTupleHeaderClearHotUpdated(htup);
HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
UnlockReleaseBuffer(buffer);
}
+static void
+heap_xlog_lock_updated(XLogRecPtr lsn, XLogRecord *record)
+{
+ xl_heap_lock_updated *xlrec =
+ (xl_heap_lock_updated *) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+ OffsetNumber offnum;
+ ItemId lp = NULL;
+ HeapTupleHeader htup;
+
+ /* If we have a full-page image, restore it and we're done */
+ if (record->xl_info & XLR_BKP_BLOCK(0))
+ {
+ (void) RestoreBackupBlock(lsn, record, 0, false, false);
+ return;
+ }
+
+ buffer = XLogReadBuffer(xlrec->target.node,
+ ItemPointerGetBlockNumber(&(xlrec->target.tid)),
+ false);
+ if (!BufferIsValid(buffer))
+ return;
+ page = (Page) BufferGetPage(buffer);
+
+ if (lsn <= PageGetLSN(page)) /* changes are applied */
+ {
+ UnlockReleaseBuffer(buffer);
+ return;
+ }
+
+ offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+ if (PageGetMaxOffsetNumber(page) >= offnum)
+ lp = PageGetItemId(page, offnum);
+
+ if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ elog(PANIC, "heap_xlog_lock_updated: invalid lp");
+
+ htup = (HeapTupleHeader) PageGetItem(page, lp);
+
+ fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
+ &htup->t_infomask2);
+ HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+}
+
static void
heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
{
case XLOG_HEAP2_MULTI_INSERT:
heap_xlog_multi_insert(lsn, record);
break;
+ case XLOG_HEAP2_LOCK_UPDATED:
+ heap_xlog_lock_updated(lsn, record);
+ break;
default:
elog(PANIC, "heap2_redo: unknown op code %u", info);
}
* that the page is reconsidered for pruning in future.
*/
heap_prune_record_prunable(prstate,
- HeapTupleHeaderGetXmax(htup));
+ HeapTupleHeaderGetUpdateXid(htup));
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
* that the page is reconsidered for pruning in future.
*/
heap_prune_record_prunable(prstate,
- HeapTupleHeaderGetXmax(htup));
+ HeapTupleHeaderGetUpdateXid(htup));
break;
case HEAPTUPLE_LIVE:
Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
BufferGetBlockNumber(buffer));
offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
- priorXmax = HeapTupleHeaderGetXmax(htup);
+ priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
/*
/* Set up to scan the HOT-chain */
nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
- priorXmax = HeapTupleHeaderGetXmax(htup);
+ priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
else
{
break;
nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
- priorXmax = HeapTupleHeaderGetXmax(htup);
+ priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
}
}
#include "storage/smgr.h"
#include "utils/memutils.h"
#include "utils/rel.h"
+#include "utils/tqual.h"
/*
* determine tuple visibility */
TransactionId rs_freeze_xid;/* Xid that will be used as freeze cutoff
* point */
+ MultiXactId rs_freeze_multi;/* MultiXactId that will be used as freeze
+ * cutoff point for multixacts */
MemoryContext rs_cxt; /* for hash tables and entries and tuples in
* them */
HTAB *rs_unresolved_tups; /* unmatched A tuples */
* new_heap new, locked heap relation to insert tuples to
* oldest_xmin xid used by the caller to determine which tuples are dead
* freeze_xid xid before which tuples will be frozen
+ * freeze_multi multixact before which multis will be frozen
* use_wal should the inserts to the new heap be WAL-logged?
*
* Returns an opaque RewriteState, allocated in current memory context,
*/
RewriteState
begin_heap_rewrite(Relation new_heap, TransactionId oldest_xmin,
- TransactionId freeze_xid, bool use_wal)
+ TransactionId freeze_xid, MultiXactId freeze_multi,
+ bool use_wal)
{
RewriteState state;
MemoryContext rw_cxt;
state->rs_use_wal = use_wal;
state->rs_oldest_xmin = oldest_xmin;
state->rs_freeze_xid = freeze_xid;
+ state->rs_freeze_multi = freeze_multi;
state->rs_cxt = rw_cxt;
/* Initialize hash tables used to track update chains */
* While we have our hands on the tuple, we may as well freeze any
* very-old xmin or xmax, so that future VACUUM effort can be saved.
*/
- heap_freeze_tuple(new_tuple->t_data, state->rs_freeze_xid);
+ heap_freeze_tuple(new_tuple->t_data, state->rs_freeze_xid,
+ state->rs_freeze_multi);
/*
* Invalid ctid means that ctid should point to the tuple itself. We'll
/*
* If the tuple has been updated, check the old-to-new mapping hash table.
*/
- if (!(old_tuple->t_data->t_infomask & (HEAP_XMAX_INVALID |
- HEAP_IS_LOCKED)) &&
+ if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
+ HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&
!(ItemPointerEquals(&(old_tuple->t_self),
&(old_tuple->t_data->t_ctid))))
{
OldToNewMapping mapping;
memset(&hashkey, 0, sizeof(hashkey));
- hashkey.xmin = HeapTupleHeaderGetXmax(old_tuple->t_data);
+ hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data);
hashkey.tid = old_tuple->t_data->t_ctid;
mapping = (OldToNewMapping)
ItemPointerGetOffsetNumber(&(target->tid)));
}
+static void
+out_infobits(StringInfo buf, uint8 infobits)
+{
+ if (infobits & XLHL_XMAX_IS_MULTI)
+ appendStringInfo(buf, "IS_MULTI ");
+ if (infobits & XLHL_XMAX_LOCK_ONLY)
+ appendStringInfo(buf, "LOCK_ONLY ");
+ if (infobits & XLHL_XMAX_EXCL_LOCK)
+ appendStringInfo(buf, "EXCL_LOCK ");
+ if (infobits & XLHL_XMAX_KEYSHR_LOCK)
+ appendStringInfo(buf, "KEYSHR_LOCK ");
+ if (infobits & XLHL_KEYS_UPDATED)
+ appendStringInfo(buf, "KEYS_UPDATED ");
+}
+
void
heap_desc(StringInfo buf, uint8 xl_info, char *rec)
{
appendStringInfo(buf, "delete: ");
out_target(buf, &(xlrec->target));
+ appendStringInfoChar(buf, ' ');
+ out_infobits(buf, xlrec->infobits_set);
}
else if (info == XLOG_HEAP_UPDATE)
{
else
appendStringInfo(buf, "update: ");
out_target(buf, &(xlrec->target));
- appendStringInfo(buf, "; new %u/%u",
+ appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
+ out_infobits(buf, xlrec->old_infobits_set);
+ appendStringInfo(buf, "; new tid %u/%u xmax %u",
ItemPointerGetBlockNumber(&(xlrec->newtid)),
- ItemPointerGetOffsetNumber(&(xlrec->newtid)));
+ ItemPointerGetOffsetNumber(&(xlrec->newtid)),
+ xlrec->new_xmax);
}
else if (info == XLOG_HEAP_HOT_UPDATE)
{
else
appendStringInfo(buf, "hot_update: ");
out_target(buf, &(xlrec->target));
- appendStringInfo(buf, "; new %u/%u",
+ appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
+ out_infobits(buf, xlrec->old_infobits_set);
+ appendStringInfo(buf, "; new tid %u/%u xmax %u",
ItemPointerGetBlockNumber(&(xlrec->newtid)),
- ItemPointerGetOffsetNumber(&(xlrec->newtid)));
+ ItemPointerGetOffsetNumber(&(xlrec->newtid)),
+ xlrec->new_xmax);
}
else if (info == XLOG_HEAP_NEWPAGE)
{
{
xl_heap_lock *xlrec = (xl_heap_lock *) rec;
- if (xlrec->shared_lock)
- appendStringInfo(buf, "shared_lock: ");
- else
- appendStringInfo(buf, "exclusive_lock: ");
- if (xlrec->xid_is_mxact)
- appendStringInfo(buf, "mxid ");
- else
- appendStringInfo(buf, "xid ");
- appendStringInfo(buf, "%u ", xlrec->locking_xid);
+ appendStringInfo(buf, "lock %u: ", xlrec->locking_xid);
out_target(buf, &(xlrec->target));
+ appendStringInfoChar(buf, ' ');
+ out_infobits(buf, xlrec->infobits_set);
}
else if (info == XLOG_HEAP_INPLACE)
{
else
appendStringInfo(buf, "UNKNOWN");
}
-
void
heap2_desc(StringInfo buf, uint8 xl_info, char *rec)
{
{
xl_heap_freeze *xlrec = (xl_heap_freeze *) rec;
- appendStringInfo(buf, "freeze: rel %u/%u/%u; blk %u; cutoff %u",
+ appendStringInfo(buf, "freeze: rel %u/%u/%u; blk %u; cutoff xid %u multi %u",
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block,
- xlrec->cutoff_xid);
+ xlrec->cutoff_xid, xlrec->cutoff_multi);
}
else if (info == XLOG_HEAP2_CLEAN)
{
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
xlrec->blkno, xlrec->ntuples);
}
+ else if (info == XLOG_HEAP2_LOCK_UPDATED)
+ {
+ xl_heap_lock_updated *xlrec = (xl_heap_lock_updated *) rec;
+
+ appendStringInfo(buf, "lock updated: xmax %u msk %04x; ", xlrec->xmax,
+ xlrec->infobits_set);
+ out_target(buf, &(xlrec->target));
+ }
else
appendStringInfo(buf, "UNKNOWN");
}
#include "access/multixact.h"
+static void
+out_member(StringInfo buf, MultiXactMember *member)
+{
+ appendStringInfo(buf, "%u ", member->xid);
+ switch (member->status)
+ {
+ case MultiXactStatusForKeyShare:
+ appendStringInfoString(buf, "(keysh) ");
+ break;
+ case MultiXactStatusForShare:
+ appendStringInfoString(buf, "(sh) ");
+ break;
+ case MultiXactStatusForNoKeyUpdate:
+ appendStringInfoString(buf, "(fornokeyupd) ");
+ break;
+ case MultiXactStatusForUpdate:
+ appendStringInfoString(buf, "(forupd) ");
+ break;
+ case MultiXactStatusNoKeyUpdate:
+ appendStringInfoString(buf, "(nokeyupd) ");
+ break;
+ case MultiXactStatusUpdate:
+ appendStringInfoString(buf, "(upd) ");
+ break;
+ default:
+ appendStringInfoString(buf, "(unk) ");
+ break;
+ }
+}
void
multixact_desc(StringInfo buf, uint8 xl_info, char *rec)
xl_multixact_create *xlrec = (xl_multixact_create *) rec;
int i;
- appendStringInfo(buf, "create multixact %u offset %u:",
- xlrec->mid, xlrec->moff);
- for (i = 0; i < xlrec->nxids; i++)
- appendStringInfo(buf, " %u", xlrec->xids[i]);
+ appendStringInfo(buf, "create mxid %u offset %u nmembers %d: ", xlrec->mid,
+ xlrec->moff, xlrec->nmembers);
+ for (i = 0; i < xlrec->nmembers; i++)
+ out_member(buf, &xlrec->members[i]);
}
else
appendStringInfo(buf, "UNKNOWN");
appendStringInfo(buf, "checkpoint: redo %X/%X; "
"tli %u; fpw %s; xid %u/%u; oid %u; multi %u; offset %u; "
- "oldest xid %u in DB %u; oldest running xid %u; %s",
+ "oldest xid %u in DB %u; oldest multi %u in DB %u; "
+ "oldest running xid %u; %s",
(uint32) (checkpoint->redo >> 32), (uint32) checkpoint->redo,
checkpoint->ThisTimeLineID,
checkpoint->fullPageWrites ? "true" : "false",
checkpoint->nextMultiOffset,
checkpoint->oldestXid,
checkpoint->oldestXidDB,
+ checkpoint->oldestMulti,
+ checkpoint->oldestMultiDB,
checkpoint->oldestActiveXid,
(info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
}
Not all transactional behaviour is emulated, for example we do not insert
a transaction entry into the lock table, nor do we maintain the transaction
-stack in memory. Clog entries are made normally. Multitrans is not maintained
+stack in memory. Clog entries are made normally. Multixact is not maintained
because its purpose is to record tuple level locks that an application has
-requested to prevent write locks. Since write locks cannot be obtained at all,
-there is never any conflict and so there is no reason to update multitrans.
+requested to prevent other tuple locks. Since tuple locks cannot be obtained at
+all, there is never any conflict and so there is no reason to update multixact.
Subtrans is maintained during recovery but the details of the transaction
tree are ignored and all subtransactions reference the top-level TransactionId
directly. Since commit is atomic this provides correct lock wait behaviour
* multixact.c
* PostgreSQL multi-transaction-log manager
*
- * The pg_multixact manager is a pg_clog-like manager that stores an array
- * of TransactionIds for each MultiXactId. It is a fundamental part of the
- * shared-row-lock implementation. A share-locked tuple stores a
- * MultiXactId in its Xmax, and a transaction that needs to wait for the
- * tuple to be unlocked can sleep on the potentially-several TransactionIds
- * that compose the MultiXactId.
+ * The pg_multixact manager is a pg_clog-like manager that stores an array of
+ * MultiXactMember for each MultiXactId. It is a fundamental part of the
+ * shared-row-lock implementation. Each MultiXactMember is comprised of a
+ * TransactionId and a set of flag bits. The name is a bit historical:
+ * originally, a MultiXactId consisted of more than one TransactionId (except
+ * in rare corner cases), hence "multi". Nowadays, however, it's perfectly
+ * legitimate to have MultiXactIds that only include a single Xid.
+ *
+ * The meaning of the flag bits is opaque to this module, but they are mostly
+ * used in heapam.c to identify lock modes that each of the member transactions
+ * is holding on any given tuple. This module just contains support to store
+ * and retrieve the arrays.
*
* We use two SLRU areas, one for storing the offsets at which the data
* starts for each MultiXactId in the other one. This trick allows us to
* replay, the next-MXID and next-offset counters are at least as large as
* anything we saw during replay.
*
+ * We are able to remove segments no longer necessary by carefully tracking
+ * each table's used values: during vacuum, any multixact older than a
+ * certain value is removed; the cutoff value is stored in pg_class.
+ * The minimum value in each database is stored in pg_database, and the
+ * global minimum is part of pg_control. Any vacuum that is able to
+ * advance its database's minimum value also computes a new global minimum,
+ * and uses this value to truncate older segments. When new multixactid
+ * values are to be created, care is taken that the counter does not
+ * fall within the wraparound horizon considering the global minimum value.
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
#include "access/twophase.h"
#include "access/twophase_rmgr.h"
#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
+#include "funcapi.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "storage/lmgr.h"
+#include "storage/pmsignal.h"
#include "storage/procarray.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
+#include "utils/snapmgr.h"
/*
* Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
* used everywhere else in Postgres.
*
- * Note: because both MultiXactOffsets and TransactionIds are 32 bits and
- * wrap around at 0xFFFFFFFF, MultiXact page numbering also wraps around at
- * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE, and segment numbering at
- * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no
- * explicit notice of that fact in this module, except when comparing segment
- * and page numbers in TruncateMultiXact
- * (see MultiXact{Offset,Member}PagePrecedes).
+ * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
+ * MultiXact page numbering also wraps around at
+ * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
+ * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need
+ * take no explicit notice of that fact in this module, except when comparing
+ * segment and page numbers in TruncateMultiXact (see
+ * MultiXactOffsetPagePrecedes).
*/
-/* We need four bytes per offset and also four bytes per member */
+/* We need four bytes per offset */
#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
-#define MULTIXACT_MEMBERS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
#define MultiXactIdToOffsetPage(xid) \
((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
#define MultiXactIdToOffsetEntry(xid) \
((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
-#define MXOffsetToMemberPage(xid) \
- ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
-#define MXOffsetToMemberEntry(xid) \
- ((xid) % (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
+/*
+ * The situation for members is a bit more complex: we store one byte of
+ * additional flag bits for each TransactionId. To do this without getting
+ * into alignment issues, we store four bytes of flags, and then the
+ * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
+ * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
+ * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
+ * performance) trumps space efficiency here.
+ *
+ * Note that the "offset" macros work with byte offset, not array indexes, so
+ * arithmetic must be done using "char *" pointers.
+ */
+/* We need eight bits per xact, so one xact fits in a byte */
+#define MXACT_MEMBER_BITS_PER_XACT 8
+#define MXACT_MEMBER_FLAGS_PER_BYTE 1
+#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
+
+/* how many full bytes of flags are there in a group? */
+#define MULTIXACT_FLAGBYTES_PER_GROUP 4
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
+ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE \
+ (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+#define MULTIXACT_MEMBERS_PER_PAGE \
+ (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
+
+/* page in which a member is to be found */
+#define MXOffsetToMemberPage(xid) ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
+
+/* Location (byte offset within page) of flag word for a given member */
+#define MXOffsetToFlagsOffset(xid) \
+ ((((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_MEMBERGROUP) % \
+ (TransactionId) MULTIXACT_MEMBERGROUPS_PER_PAGE) * \
+ (TransactionId) MULTIXACT_MEMBERGROUP_SIZE)
+#define MXOffsetToFlagsBitShift(xid) \
+ (((xid) % (TransactionId) MULTIXACT_MEMBERS_PER_MEMBERGROUP) * \
+ MXACT_MEMBER_BITS_PER_XACT)
+
+/* Location (byte offset within page) of TransactionId of given member */
+#define MXOffsetToMemberOffset(xid) \
+ (MXOffsetToFlagsOffset(xid) + MULTIXACT_FLAGBYTES_PER_GROUP + \
+ ((xid) % MULTIXACT_MEMBERS_PER_MEMBERGROUP) * sizeof(TransactionId))
/*
/* the Offset SLRU area was last truncated at this MultiXactId */
MultiXactId lastTruncationPoint;
+ /*
+ * oldest multixact that is still on disk. Anything older than this should
+ * not be consulted.
+ */
+ MultiXactId oldestMultiXactId;
+ Oid oldestMultiXactDB;
+
+ /* support for anti-wraparound measures */
+ MultiXactId multiVacLimit;
+ MultiXactId multiWarnLimit;
+ MultiXactId multiStopLimit;
+ MultiXactId multiWrapLimit;
+
/*
* Per-backend data starts here. We have two arrays stored in the area
* immediately following the MultiXactStateData struct. Each is indexed by
* so they will be uninteresting by the time our next transaction starts.
* (XXX not clear that this is correct --- other members of the MultiXact
* could hang around longer than we did. However, it's not clear what a
- * better policy for flushing old cache entries would be.)
+ * better policy for flushing old cache entries would be.) FIXME actually
+ * this is plain wrong now that multixact's may contain update Xids.
*
* We allocate the cache entries in a memory context that is deleted at
* transaction end, so we don't need to do retail freeing of entries.
{
struct mXactCacheEnt *next;
MultiXactId multi;
- int nxids;
- TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */
+ int nmembers;
+ MultiXactMember members[FLEXIBLE_ARRAY_MEMBER];
} mXactCacheEnt;
static mXactCacheEnt *MXactCache = NULL;
static MemoryContext MXactContext = NULL;
-
#ifdef MULTIXACT_DEBUG
#define debug_elog2(a,b) elog(a,b)
#define debug_elog3(a,b,c) elog(a,b,c)
#define debug_elog4(a,b,c,d) elog(a,b,c,d)
#define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
+#define debug_elog6(a,b,c,d,e,f) elog(a,b,c,d,e,f)
#else
#define debug_elog2(a,b)
#define debug_elog3(a,b,c)
#define debug_elog4(a,b,c,d)
#define debug_elog5(a,b,c,d,e)
+#define debug_elog6(a,b,c,d,e,f)
#endif
/* internal MultiXactId management */
static void MultiXactIdSetOldestVisible(void);
-static MultiXactId CreateMultiXactId(int nxids, TransactionId *xids);
+static MultiXactId CreateMultiXactId(int nmembers, MultiXactMember *members);
static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
- int nxids, TransactionId *xids);
-static MultiXactId GetNewMultiXactId(int nxids, MultiXactOffset *offset);
+ int nmembers, MultiXactMember *members);
+static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
/* MultiXact cache management */
-static MultiXactId mXactCacheGetBySet(int nxids, TransactionId *xids);
-static int mXactCacheGetById(MultiXactId multi, TransactionId **xids);
-static void mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids);
+static int mxactMemberComparator(const void *arg1, const void *arg2);
+static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members);
+static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members);
+static void mXactCachePut(MultiXactId multi, int nmembers,
+ MultiXactMember *members);
-#ifdef MULTIXACT_DEBUG
-static char *mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids);
-#endif
+static char *mxstatus_to_string(MultiXactStatus status);
/* management of SLRU infrastructure */
static int ZeroMultiXactOffsetPage(int pageno, bool writeXlog);
static int ZeroMultiXactMemberPage(int pageno, bool writeXlog);
static bool MultiXactOffsetPagePrecedes(int page1, int page2);
static bool MultiXactMemberPagePrecedes(int page1, int page2);
-static bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
MultiXactOffset offset2);
static void ExtendMultiXactOffset(MultiXactId multi);
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static void TruncateMultiXact(void);
static void WriteMZeroPageXlogRec(int pageno, uint8 info);
* MultiXactIdCreate
* Construct a MultiXactId representing two TransactionIds.
*
- * The two XIDs must be different.
+ * The two XIDs must be different, or be requesting different statuses.
*
* NB - we don't worry about our local MultiXactId cache here, because that
* is handled by the lower-level routines.
*/
MultiXactId
-MultiXactIdCreate(TransactionId xid1, TransactionId xid2)
+MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1,
+ TransactionId xid2, MultiXactStatus status2)
{
MultiXactId newMulti;
- TransactionId xids[2];
+ MultiXactMember members[2];
AssertArg(TransactionIdIsValid(xid1));
AssertArg(TransactionIdIsValid(xid2));
- Assert(!TransactionIdEquals(xid1, xid2));
+ Assert(!TransactionIdEquals(xid1, xid2) || (status1 != status2));
/*
* Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
* caller just did a check on xid1, so it'd be wasted effort.
*/
- xids[0] = xid1;
- xids[1] = xid2;
+ members[0].xid = xid1;
+ members[0].status = status1;
+ members[1].xid = xid2;
+ members[1].status = status2;
- newMulti = CreateMultiXactId(2, xids);
+ newMulti = CreateMultiXactId(2, members);
- debug_elog5(DEBUG2, "Create: returning %u for %u, %u",
- newMulti, xid1, xid2);
+ debug_elog3(DEBUG2, "Create: %s",
+ mxid_to_string(newMulti, 2, members));
return newMulti;
}
* MultiXactIdExpand
* Add a TransactionId to a pre-existing MultiXactId.
*
- * If the TransactionId is already a member of the passed MultiXactId,
- * just return it as-is.
+ * If the TransactionId is already a member of the passed MultiXactId with the
+ * same status, just return it as-is.
*
* Note that we do NOT actually modify the membership of a pre-existing
* MultiXactId; instead we create a new one. This is necessary to avoid
- * a race condition against MultiXactIdWait (see notes there).
+ * a race condition against code trying to wait for one MultiXactId to finish;
+ * see notes in heapam.c.
*
* NB - we don't worry about our local MultiXactId cache here, because that
* is handled by the lower-level routines.
+ *
+ * Note: It is critical that MultiXactIds that come from an old cluster (i.e.
+ * one upgraded by pg_upgrade from a cluster older than this feature) are not
+ * passed in.
*/
MultiXactId
-MultiXactIdExpand(MultiXactId multi, TransactionId xid)
+MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
{
MultiXactId newMulti;
- TransactionId *members;
- TransactionId *newMembers;
+ MultiXactMember *members;
+ MultiXactMember *newMembers;
int nmembers;
int i;
int j;
AssertArg(MultiXactIdIsValid(multi));
AssertArg(TransactionIdIsValid(xid));
- debug_elog4(DEBUG2, "Expand: received multi %u, xid %u",
- multi, xid);
+ debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
+ multi, xid, mxstatus_to_string(status));
- nmembers = GetMultiXactIdMembers(multi, &members);
+ /*
+ * Note: we don't allow for old multis here. The reason is that the
+ * only caller of this function does a check that the multixact is
+ * no longer running.
+ */
+ nmembers = GetMultiXactIdMembers(multi, &members, false);
if (nmembers < 0)
{
+ MultiXactMember member;
+
/*
* The MultiXactId is obsolete. This can only happen if all the
* MultiXactId members stop running between the caller checking and
* caller, but it would complicate the API and it's unlikely to happen
* too often, so just deal with it by creating a singleton MultiXact.
*/
- newMulti = CreateMultiXactId(1, &xid);
+ member.xid = xid;
+ member.status = status;
+ newMulti = CreateMultiXactId(1, &member);
debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
multi, newMulti);
}
/*
- * If the TransactionId is already a member of the MultiXactId, just
- * return the existing MultiXactId.
+ * If the TransactionId is already a member of the MultiXactId with the
+ * same status, just return the existing MultiXactId.
*/
for (i = 0; i < nmembers; i++)
{
- if (TransactionIdEquals(members[i], xid))
+ if (TransactionIdEquals(members[i].xid, xid) &&
+ (members[i].status == status))
{
debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
xid, multi);
}
/*
- * Determine which of the members of the MultiXactId are still running,
- * and use them to create a new one. (Removing dead members is just an
- * optimization, but a useful one. Note we have the same race condition
- * here as above: j could be 0 at the end of the loop.)
+ * Determine which of the members of the MultiXactId are still of interest.
+ * This is any running transaction, and also any transaction that grabbed
+ * something stronger than just a lock and was committed. (An update that
+ * aborted is of no interest here.)
+ *
+ * (Removing dead members is just an optimization, but a useful one.
+ * Note we have the same race condition here as above: j could be 0 at the
+ * end of the loop.)
*/
- newMembers = (TransactionId *)
- palloc(sizeof(TransactionId) * (nmembers + 1));
+ newMembers = (MultiXactMember *)
+ palloc(sizeof(MultiXactMember) * (nmembers + 1));
for (i = 0, j = 0; i < nmembers; i++)
{
- if (TransactionIdIsInProgress(members[i]))
- newMembers[j++] = members[i];
+ if (TransactionIdIsInProgress(members[i].xid) ||
+ ((members[i].status > MultiXactStatusForUpdate) &&
+ TransactionIdDidCommit(members[i].xid)))
+ {
+ newMembers[j].xid = members[i].xid;
+ newMembers[j++].status = members[i].status;
+ }
}
- newMembers[j++] = xid;
+ newMembers[j].xid = xid;
+ newMembers[j++].status = status;
newMulti = CreateMultiXactId(j, newMembers);
pfree(members);
* We return true if at least one member of the given MultiXactId is still
* running. Note that a "false" result is certain not to change,
* because it is not legal to add members to an existing MultiXactId.
+ *
+ * Caller is expected to have verified that the multixact does not come from
+ * a pg_upgraded share-locked tuple.
*/
bool
MultiXactIdIsRunning(MultiXactId multi)
{
- TransactionId *members;
+ MultiXactMember *members;
int nmembers;
int i;
debug_elog3(DEBUG2, "IsRunning %u?", multi);
- nmembers = GetMultiXactIdMembers(multi, &members);
+ /*
+ * "false" here means we assume our callers have checked that the given
+ * multi cannot possibly come from a pg_upgraded database.
+ */
+ nmembers = GetMultiXactIdMembers(multi, &members, false);
if (nmembers < 0)
{
}
/*
- * Checking for myself is cheap compared to looking in shared memory, so
- * first do the equivalent of MultiXactIdIsCurrent(). This is not needed
- * for correctness, it's just a fast path.
+ * Checking for myself is cheap compared to looking in shared memory;
+ * return true if any live subtransaction of the current top-level
+ * transaction is a member.
+ *
+ * This is not needed for correctness, it's just a fast path.
*/
for (i = 0; i < nmembers; i++)
{
- if (TransactionIdIsCurrentTransactionId(members[i]))
+ if (TransactionIdIsCurrentTransactionId(members[i].xid))
{
debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
pfree(members);
*/
for (i = 0; i < nmembers; i++)
{
- if (TransactionIdIsInProgress(members[i]))
+ if (TransactionIdIsInProgress(members[i].xid))
{
debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
- i, members[i]);
+ i, members[i].xid);
pfree(members);
return true;
}
return false;
}
-/*
- * MultiXactIdIsCurrent
- * Returns true if the current transaction is a member of the MultiXactId.
- *
- * We return true if any live subtransaction of the current top-level
- * transaction is a member. This is appropriate for the same reason that a
- * lock held by any such subtransaction is globally equivalent to a lock
- * held by the current subtransaction: no such lock could be released without
- * aborting this subtransaction, and hence releasing its locks. So it's not
- * necessary to add the current subxact to the MultiXact separately.
- */
-bool
-MultiXactIdIsCurrent(MultiXactId multi)
-{
- bool result = false;
- TransactionId *members;
- int nmembers;
- int i;
-
- nmembers = GetMultiXactIdMembers(multi, &members);
-
- if (nmembers < 0)
- return false;
-
- for (i = 0; i < nmembers; i++)
- {
- if (TransactionIdIsCurrentTransactionId(members[i]))
- {
- result = true;
- break;
- }
- }
-
- pfree(members);
-
- return result;
-}
-
/*
* MultiXactIdSetOldestMember
* Save the oldest MultiXactId this transaction could be a member of.
*
- * We set the OldestMemberMXactId for a given transaction the first time
- * it's going to acquire a shared lock. We need to do this even if we end
- * up using a TransactionId instead of a MultiXactId, because there is a
- * chance that another transaction would add our XID to a MultiXactId.
+ * We set the OldestMemberMXactId for a given transaction the first time it's
+ * going to do some operation that might require a MultiXactId (tuple lock,
+ * update or delete). We need to do this even if we end up using a
+ * TransactionId instead of a MultiXactId, because there is a chance that
+ * another transaction would add our XID to a MultiXactId.
*
- * The value to set is the next-to-be-assigned MultiXactId, so this is meant
- * to be called just before acquiring a shared lock.
+ * The value to set is the next-to-be-assigned MultiXactId, so this is meant to
+ * be called just before doing any such possibly-MultiXactId-able operation.
*/
void
MultiXactIdSetOldestMember(void)
}
/*
- * MultiXactIdWait
- * Sleep on a MultiXactId.
- *
- * We do this by sleeping on each member using XactLockTableWait. Any
- * members that belong to the current backend are *not* waited for, however;
- * this would not merely be useless but would lead to Assert failure inside
- * XactLockTableWait. By the time this returns, it is certain that all
- * transactions *of other backends* that were members of the MultiXactId
- * are dead (and no new ones can have been added, since it is not legal
- * to add members to an existing MultiXactId).
- *
- * But by the time we finish sleeping, someone else may have changed the Xmax
- * of the containing tuple, so the caller needs to iterate on us somehow.
+ * ReadNextMultiXactId
+ * Return the next MultiXactId to be assigned, but don't allocate it
*/
-void
-MultiXactIdWait(MultiXactId multi)
-{
- TransactionId *members;
- int nmembers;
-
- nmembers = GetMultiXactIdMembers(multi, &members);
-
- if (nmembers >= 0)
- {
- int i;
-
- for (i = 0; i < nmembers; i++)
- {
- TransactionId member = members[i];
-
- debug_elog4(DEBUG2, "MultiXactIdWait: waiting for %d (%u)",
- i, member);
- if (!TransactionIdIsCurrentTransactionId(member))
- XactLockTableWait(member);
- }
-
- pfree(members);
- }
-}
-
-/*
- * ConditionalMultiXactIdWait
- * As above, but only lock if we can get the lock without blocking.
- */
-bool
-ConditionalMultiXactIdWait(MultiXactId multi)
+MultiXactId
+ReadNextMultiXactId(void)
{
- bool result = true;
- TransactionId *members;
- int nmembers;
-
- nmembers = GetMultiXactIdMembers(multi, &members);
-
- if (nmembers >= 0)
- {
- int i;
+ MultiXactId mxid;
- for (i = 0; i < nmembers; i++)
- {
- TransactionId member = members[i];
-
- debug_elog4(DEBUG2, "ConditionalMultiXactIdWait: trying %d (%u)",
- i, member);
- if (!TransactionIdIsCurrentTransactionId(member))
- {
- result = ConditionalXactLockTableWait(member);
- if (!result)
- break;
- }
- }
+ /* XXX we could presumably do this without a lock. */
+ LWLockAcquire(MultiXactGenLock, LW_SHARED);
+ mxid = MultiXactState->nextMXact;
+ LWLockRelease(MultiXactGenLock);
- pfree(members);
- }
+ if (mxid < FirstMultiXactId)
+ mxid = FirstMultiXactId;
- return result;
+ return mxid;
}
/*
* Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
* given TransactionIds as members. Returns the newly created MultiXactId.
*
- * NB: the passed xids[] array will be sorted in-place.
+ * NB: the passed members[] array will be sorted in-place.
*/
static MultiXactId
-CreateMultiXactId(int nxids, TransactionId *xids)
+CreateMultiXactId(int nmembers, MultiXactMember *members)
{
MultiXactId multi;
MultiXactOffset offset;
xl_multixact_create xlrec;
debug_elog3(DEBUG2, "Create: %s",
- mxid_to_string(InvalidMultiXactId, nxids, xids));
+ mxid_to_string(InvalidMultiXactId, nmembers, members));
/*
- * See if the same set of XIDs already exists in our cache; if so, just
+ * See if the same set of members already exists in our cache; if so, just
* re-use that MultiXactId. (Note: it might seem that looking in our
* cache is insufficient, and we ought to search disk to see if a
* duplicate definition already exists. But since we only ever create
* corner cases where someone else added us to a MultiXact without our
* knowledge, but it's not worth checking for.)
*/
- multi = mXactCacheGetBySet(nxids, xids);
+ multi = mXactCacheGetBySet(nmembers, members);
if (MultiXactIdIsValid(multi))
{
debug_elog2(DEBUG2, "Create: in cache!");
* in the OFFSETs and MEMBERs files. NB: this routine does
* START_CRIT_SECTION().
*/
- multi = GetNewMultiXactId(nxids, &offset);
+ multi = GetNewMultiXactId(nmembers, &offset);
/*
* Make an XLOG entry describing the new MXID.
*/
xlrec.mid = multi;
xlrec.moff = offset;
- xlrec.nxids = nxids;
+ xlrec.nmembers = nmembers;
+ /*
+ * XXX Note: there's a lot of padding space in MultiXactMember. We could
+ * find a more compact representation of this Xlog record -- perhaps all the
+ * status flags in one XLogRecData, then all the xids in another one? Not
+ * clear that it's worth the trouble though.
+ */
rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfMultiXactCreate;
+ rdata[0].len = SizeOfMultiXactCreate;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) xids;
- rdata[1].len = nxids * sizeof(TransactionId);
+
+ rdata[1].data = (char *) members;
+ rdata[1].len = nmembers * sizeof(MultiXactMember);
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
(void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
/* Now enter the information into the OFFSETs and MEMBERs logs */
- RecordNewMultiXact(multi, offset, nxids, xids);
+ RecordNewMultiXact(multi, offset, nmembers, members);
/* Done with critical section */
END_CRIT_SECTION();
/* Store the new MultiXactId in the local cache, too */
- mXactCachePut(multi, nxids, xids);
+ mXactCachePut(multi, nmembers, members);
debug_elog2(DEBUG2, "Create: all done");
*/
static void
RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
- int nxids, TransactionId *xids)
+ int nmembers, MultiXactMember *members)
{
int pageno;
int prev_pageno;
prev_pageno = -1;
- for (i = 0; i < nxids; i++, offset++)
+ for (i = 0; i < nmembers; i++, offset++)
{
TransactionId *memberptr;
+ uint32 *flagsptr;
+ uint32 flagsval;
+ int bshift;
+ int flagsoff;
+ int memberoff;
+
+ Assert(members[i].status <= MultiXactStatusUpdate);
pageno = MXOffsetToMemberPage(offset);
- entryno = MXOffsetToMemberEntry(offset);
+ memberoff = MXOffsetToMemberOffset(offset);
+ flagsoff = MXOffsetToFlagsOffset(offset);
+ bshift = MXOffsetToFlagsBitShift(offset);
if (pageno != prev_pageno)
{
}
memberptr = (TransactionId *)
- MultiXactMemberCtl->shared->page_buffer[slotno];
- memberptr += entryno;
+ (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
+
+ *memberptr = members[i].xid;
+
+ flagsptr = (uint32 *)
+ (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
- *memberptr = xids[i];
+ flagsval = *flagsptr;
+ flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
+ flagsval |= (members[i].status << bshift);
+ *flagsptr = flagsval;
MultiXactMemberCtl->shared->page_dirty[slotno] = true;
}
* caller must end the critical section after writing SLRU data.
*/
static MultiXactId
-GetNewMultiXactId(int nxids, MultiXactOffset *offset)
+GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
{
MultiXactId result;
MultiXactOffset nextOffset;
- debug_elog3(DEBUG2, "GetNew: for %d xids", nxids);
+ debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
/* MultiXactIdSetOldestMember() must have been called already */
Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId]));
+ /* safety check, we should never get this far in a HS slave */
+ if (RecoveryInProgress())
+ elog(ERROR, "cannot assign MultiXactIds during recovery");
+
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
/* Handle wraparound of the nextMXact counter */
if (MultiXactState->nextMXact < FirstMultiXactId)
MultiXactState->nextMXact = FirstMultiXactId;
- /*
- * Assign the MXID, and make sure there is room for it in the file.
- */
+ /* Assign the MXID */
result = MultiXactState->nextMXact;
+ /*----------
+ * Check to see if it's safe to assign another MultiXactId. This protects
+ * against catastrophic data loss due to multixact wraparound. The basic
+ * rules are:
+ *
+ * If we're past multiVacLimit, start trying to force autovacuum cycles.
+ * If we're past multiWarnLimit, start issuing warnings.
+ * If we're past multiStopLimit, refuse to create new MultiXactIds.
+ *
+ * Note these are pretty much the same protections in GetNewTransactionId.
+ *----------
+ */
+ if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit))
+ {
+ /*
+ * For safety's sake, we release MultiXactGenLock while sending
+ * signals, warnings, etc. This is not so much because we care about
+ * preserving concurrency in this situation, as to avoid any
+ * possibility of deadlock while doing get_database_name(). First,
+ * copy all the shared values we'll need in this path.
+ */
+ MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
+ MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
+ MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
+ Oid oldest_datoid = MultiXactState->oldestMultiXactDB;
+
+ LWLockRelease(MultiXactGenLock);
+
+ /*
+ * To avoid swamping the postmaster with signals, we issue the autovac
+ * request only once per 64K transaction starts. This still gives
+ * plenty of chances before we get into real trouble.
+ */
+ if (IsUnderPostmaster && (result % 65536) == 0)
+ SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
+
+ if (IsUnderPostmaster &&
+ !MultiXactIdPrecedes(result, multiStopLimit))
+ {
+ char *oldest_datname = get_database_name(oldest_datoid);
+
+ /* complain even if that DB has disappeared */
+ if (oldest_datname)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database \"%s\"",
+ oldest_datname),
+ errhint("Execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database with OID %u",
+ oldest_datoid),
+ errhint("Execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ }
+ else if (!MultiXactIdPrecedes(result, multiWarnLimit))
+ {
+ char *oldest_datname = get_database_name(oldest_datoid);
+
+ /* complain even if that DB has disappeared */
+ if (oldest_datname)
+ ereport(WARNING,
+ (errmsg("database \"%s\" must be vacuumed before %u more MultiXactIds are used",
+ oldest_datname,
+ multiWrapLimit - result),
+ errhint("Execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ else
+ ereport(WARNING,
+ (errmsg("database with OID %u must be vacuumed before %u more MultiXactIds are used",
+ oldest_datoid,
+ multiWrapLimit - result),
+ errhint("Execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ }
+
+ /* Re-acquire lock and start over */
+ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
+ result = MultiXactState->nextMXact;
+ if (result < FirstMultiXactId)
+ result = FirstMultiXactId;
+ }
+
+ /* Make sure there is room for the MXID in the file. */
ExtendMultiXactOffset(result);
/*
if (nextOffset == 0)
{
*offset = 1;
- nxids++; /* allocate member slot 0 too */
+ nmembers++; /* allocate member slot 0 too */
}
else
*offset = nextOffset;
- ExtendMultiXactMember(nextOffset, nxids);
+ ExtendMultiXactMember(nextOffset, nmembers);
/*
* Critical section from here until caller has written the data into the
*
* We don't care about MultiXactId wraparound here; it will be handled by
* the next iteration. But note that nextMXact may be InvalidMultiXactId
- * after this routine exits, so anyone else looking at the variable must
- * be prepared to deal with that. Similarly, nextOffset may be zero, but
- * we won't use that as the actual start offset of the next multixact.
+ * or the first value on a segment-beginning page after this routine exits,
+ * so anyone else looking at the variable must be prepared to deal with
+ * either case. Similarly, nextOffset may be zero, but we won't use that
+ * as the actual start offset of the next multixact.
*/
(MultiXactState->nextMXact)++;
- MultiXactState->nextOffset += nxids;
+ MultiXactState->nextOffset += nmembers;
LWLockRelease(MultiXactGenLock);
/*
* GetMultiXactIdMembers
- * Returns the set of TransactionIds that make up a MultiXactId
+ * Returns the set of MultiXactMembers that make up a MultiXactId
+ *
+ * If the given MultiXactId is older than the value we know to be oldest, we
+ * return -1. The caller is expected to allow that only in permissible cases,
+ * i.e. when the infomask lets it presuppose that the tuple had been
+ * share-locked before a pg_upgrade; this means that the HEAP_XMAX_LOCK_ONLY
+ * needs to be set, but HEAP_XMAX_KEYSHR_LOCK and HEAP_XMAX_EXCL_LOCK are not
+ * set.
*
- * We return -1 if the MultiXactId is too old to possibly have any members
- * still running; in that case we have not actually looked them up, and
- * *xids is not set.
+ * Other border conditions, such as trying to read a value that's larger than
+ * the value currently known as the next to assign, raise an error. Previously
+ * these also returned -1, but since this can lead to the wrong visibility
+ * results, it is dangerous to do that.
*/
int
-GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
+GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
+ bool allow_old)
{
int pageno;
int prev_pageno;
int length;
int truelength;
int i;
+ MultiXactId oldestMXact;
MultiXactId nextMXact;
MultiXactId tmpMXact;
MultiXactOffset nextOffset;
- TransactionId *ptr;
+ MultiXactMember *ptr;
debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
Assert(MultiXactIdIsValid(multi));
/* See if the MultiXactId is in the local cache */
- length = mXactCacheGetById(multi, xids);
+ length = mXactCacheGetById(multi, members);
if (length >= 0)
{
debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
- mxid_to_string(multi, length, *xids));
+ mxid_to_string(multi, length, *members));
return length;
}
/*
* We check known limits on MultiXact before resorting to the SLRU area.
*
- * An ID older than our OldestVisibleMXactId[] entry can't possibly still
- * be running, and we'd run the risk of trying to read already-truncated
- * SLRU data if we did try to examine it.
+ * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
+ * useful; it should have already been frozen by vacuum. We've truncated
+ * the on-disk structures anyway. Returning the wrong values could lead to
+ * an incorrect visibility result. However, to support pg_upgrade we need
+ * to allow an empty set to be returned regardless, if the caller is
+ * willing to accept it; the caller is expected to check that it's an
+ * allowed condition (such as ensuring that the infomask bits set on the
+ * tuple are consistent with the pg_upgrade scenario). If the caller is
+ * expecting this to be called only on recently created multis, then we
+ * raise an error.
*
* Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is
- * seen, it implies undetected ID wraparound has occurred. We just
- * silently assume that such an ID is no longer running.
+ * seen, it implies undetected ID wraparound has occurred. This raises
+ * a hard error.
*
* Shared lock is enough here since we aren't modifying any global state.
- * Also, we can examine our own OldestVisibleMXactId without the lock,
- * since no one else is allowed to change it.
- */
- if (MultiXactIdPrecedes(multi, OldestVisibleMXactId[MyBackendId]))
- {
- debug_elog2(DEBUG2, "GetMembers: it's too old");
- *xids = NULL;
- return -1;
- }
-
- /*
- * Acquire the shared lock just long enough to grab the current counter
- * values. We may need both nextMXact and nextOffset; see below.
+ * Acquire it just long enough to grab the current counter values. We may
+ * need both nextMXact and nextOffset; see below.
*/
LWLockAcquire(MultiXactGenLock, LW_SHARED);
+ oldestMXact = MultiXactState->oldestMultiXactId;
nextMXact = MultiXactState->nextMXact;
nextOffset = MultiXactState->nextOffset;
LWLockRelease(MultiXactGenLock);
- if (!MultiXactIdPrecedes(multi, nextMXact))
+ if (MultiXactIdPrecedes(multi, oldestMXact))
{
- debug_elog2(DEBUG2, "GetMembers: it's too new!");
- *xids = NULL;
+ ereport(allow_old ? DEBUG1 : ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
+ multi)));
return -1;
}
+ if (!MultiXactIdPrecedes(multi, nextMXact))
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
+ multi)));
+
/*
* Find out the offset at which we need to start reading MultiXactMembers
* and the number of members in the multixact. We determine the latter as
LWLockRelease(MultiXactOffsetControlLock);
- ptr = (TransactionId *) palloc(length * sizeof(TransactionId));
- *xids = ptr;
+ ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
+ *members = ptr;
/* Now get the members themselves. */
LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
for (i = 0; i < length; i++, offset++)
{
TransactionId *xactptr;
+ uint32 *flagsptr;
+ int flagsoff;
+ int bshift;
+ int memberoff;
pageno = MXOffsetToMemberPage(offset);
- entryno = MXOffsetToMemberEntry(offset);
+ memberoff = MXOffsetToMemberOffset(offset);
if (pageno != prev_pageno)
{
}
xactptr = (TransactionId *)
- MultiXactMemberCtl->shared->page_buffer[slotno];
- xactptr += entryno;
+ (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
if (!TransactionIdIsValid(*xactptr))
{
continue;
}
- ptr[truelength++] = *xactptr;
+ flagsoff = MXOffsetToFlagsOffset(offset);
+ bshift = MXOffsetToFlagsBitShift(offset);
+ flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
+
+ ptr[truelength].xid = *xactptr;
+ ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
+ truelength++;
}
LWLockRelease(MultiXactMemberControlLock);
return truelength;
}
+/*
+ * mxactMemberComparator
+ * qsort comparison function for MultiXactMember
+ *
+ * We can't use wraparound comparison for XIDs because that does not respect
+ * the triangle inequality! Any old sort order will do.
+ */
+static int
+mxactMemberComparator(const void *arg1, const void *arg2)
+{
+ MultiXactMember member1 = *(const MultiXactMember *) arg1;
+ MultiXactMember member2 = *(const MultiXactMember *) arg2;
+
+ if (member1.xid > member2.xid)
+ return 1;
+ if (member1.xid < member2.xid)
+ return -1;
+ if (member1.status > member2.status)
+ return 1;
+ if (member1.status < member2.status)
+ return -1;
+ return 0;
+}
+
/*
* mXactCacheGetBySet
* returns a MultiXactId from the cache based on the set of
* for the majority of tuples, thus keeping MultiXactId usage low (saving
* both I/O and wraparound issues).
*
- * NB: the passed xids[] array will be sorted in-place.
+ * NB: the passed members array will be sorted in-place.
*/
static MultiXactId
-mXactCacheGetBySet(int nxids, TransactionId *xids)
+mXactCacheGetBySet(int nmembers, MultiXactMember *members)
{
mXactCacheEnt *entry;
debug_elog3(DEBUG2, "CacheGet: looking for %s",
- mxid_to_string(InvalidMultiXactId, nxids, xids));
+ mxid_to_string(InvalidMultiXactId, nmembers, members));
/* sort the array so comparison is easy */
- qsort(xids, nxids, sizeof(TransactionId), xidComparator);
+ qsort(members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
for (entry = MXactCache; entry != NULL; entry = entry->next)
{
- if (entry->nxids != nxids)
+ if (entry->nmembers != nmembers)
continue;
- /* We assume the cache entries are sorted */
- if (memcmp(xids, entry->xids, nxids * sizeof(TransactionId)) == 0)
+ /*
+ * We assume the cache entries are sorted, and that the unused bits in
+ * "status" are zeroed.
+ */
+ if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0)
{
debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
return entry->multi;
/*
* mXactCacheGetById
- * returns the composing TransactionId set from the cache for a
+ * returns the composing MultiXactMember set from the cache for a
* given MultiXactId, if present.
*
* If successful, *xids is set to the address of a palloc'd copy of the
- * TransactionId set. Return value is number of members, or -1 on failure.
+ * MultiXactMember set. Return value is number of members, or -1 on failure.
*/
static int
-mXactCacheGetById(MultiXactId multi, TransactionId **xids)
+mXactCacheGetById(MultiXactId multi, MultiXactMember **members)
{
mXactCacheEnt *entry;
{
if (entry->multi == multi)
{
- TransactionId *ptr;
+ MultiXactMember *ptr;
Size size;
- size = sizeof(TransactionId) * entry->nxids;
- ptr = (TransactionId *) palloc(size);
- *xids = ptr;
+ size = sizeof(MultiXactMember) * entry->nmembers;
+ ptr = (MultiXactMember *) palloc(size);
+ *members = ptr;
- memcpy(ptr, entry->xids, size);
+ memcpy(ptr, entry->members, size);
debug_elog3(DEBUG2, "CacheGet: found %s",
- mxid_to_string(multi, entry->nxids, entry->xids));
- return entry->nxids;
+ mxid_to_string(multi, entry->nmembers, entry->members));
+ return entry->nmembers;
}
}
* Add a new MultiXactId and its composing set into the local cache.
*/
static void
-mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids)
+mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
{
mXactCacheEnt *entry;
debug_elog3(DEBUG2, "CachePut: storing %s",
- mxid_to_string(multi, nxids, xids));
+ mxid_to_string(multi, nmembers, members));
if (MXactContext == NULL)
{
entry = (mXactCacheEnt *)
MemoryContextAlloc(MXactContext,
- offsetof(mXactCacheEnt, xids) +
- nxids * sizeof(TransactionId));
+ offsetof(mXactCacheEnt, members) +
+ nmembers * sizeof(MultiXactMember));
entry->multi = multi;
- entry->nxids = nxids;
- memcpy(entry->xids, xids, nxids * sizeof(TransactionId));
+ entry->nmembers = nmembers;
+ memcpy(entry->members, members, nmembers * sizeof(MultiXactMember));
/* mXactCacheGetBySet assumes the entries are sorted, so sort them */
- qsort(entry->xids, nxids, sizeof(TransactionId), xidComparator);
+ qsort(entry->members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
entry->next = MXactCache;
MXactCache = entry;
}
-#ifdef MULTIXACT_DEBUG
static char *
-mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids)
+mxstatus_to_string(MultiXactStatus status)
+{
+ switch (status)
+ {
+ case MultiXactStatusForKeyShare:
+ return "keysh";
+ case MultiXactStatusForShare:
+ return "sh";
+ case MultiXactStatusForNoKeyUpdate:
+ return "fornokeyupd";
+ case MultiXactStatusForUpdate:
+ return "forupd";
+ case MultiXactStatusNoKeyUpdate:
+ return "nokeyupd";
+ case MultiXactStatusUpdate:
+ return "upd";
+ default:
+ elog(ERROR, "unrecognized multixact status %d", status);
+ return "";
+ }
+}
+
+char *
+mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
{
- char *str = palloc(15 * (nxids + 1) + 4);
+ static char *str = NULL;
+ StringInfoData buf;
int i;
- snprintf(str, 47, "%u %d[%u", multi, nxids, xids[0]);
+ if (str != NULL)
+ pfree(str);
- for (i = 1; i < nxids; i++)
- snprintf(str + strlen(str), 17, ", %u", xids[i]);
+ initStringInfo(&buf);
- strcat(str, "]");
+ appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid,
+ mxstatus_to_string(members[0].status));
+
+ for (i = 1; i < nmembers; i++)
+ appendStringInfo(&buf, ", %u (%s)", members[i].xid,
+ mxstatus_to_string(members[i].status));
+
+ appendStringInfoChar(&buf, ']');
+ str = MemoryContextStrdup(TopMemoryContext, buf.data);
+ pfree(buf.data);
return str;
}
-#endif
/*
* AtEOXact_MultiXact
* This must be called ONCE during postmaster or standalone-backend startup.
*
* StartupXLOG has already established nextMXact/nextOffset by calling
- * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact. Note that we
- * may already have replayed WAL data into the SLRU files.
+ * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact, and the oldestMulti
+ * info from pg_control and/or MultiXactAdvanceOldest. Note that we may
+ * already have replayed WAL data into the SLRU files.
*
* We don't need any locks here, really; the SLRU locks are taken
* only because slru.c expects to be called with locks held.
MultiXactOffset offset = MultiXactState->nextOffset;
int pageno;
int entryno;
+ int flagsoff;
/* Clean up offsets state */
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
* Zero out the remainder of the current members page. See notes in
* TrimCLOG() for motivation.
*/
- entryno = MXOffsetToMemberEntry(offset);
- if (entryno != 0)
+ flagsoff = MXOffsetToFlagsOffset(offset);
+ if (flagsoff != 0)
{
int slotno;
TransactionId *xidptr;
+ int memberoff;
+ memberoff = MXOffsetToMemberOffset(offset);
slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, offset);
- xidptr = (TransactionId *) MultiXactMemberCtl->shared->page_buffer[slotno];
- xidptr += entryno;
+ xidptr = (TransactionId *)
+ (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
- MemSet(xidptr, 0, BLCKSZ - (entryno * sizeof(TransactionId)));
+ MemSet(xidptr, 0, BLCKSZ - memberoff);
+
+ /*
+ * Note: we don't need to zero out the flag bits in the remaining
+ * members of the current group, because they are always reset before
+ * writing.
+ */
MultiXactMemberCtl->shared->page_dirty[slotno] = true;
}
LWLockRelease(MultiXactMemberControlLock);
-
- /*
- * Initialize lastTruncationPoint to invalid, ensuring that the first
- * checkpoint will try to do truncation.
- */
- MultiXactState->lastTruncationPoint = InvalidMultiXactId;
}
/*
}
/*
- * Get the next MultiXactId and offset to save in a checkpoint record
+ * Get the MultiXact data to save in a checkpoint record
*/
void
MultiXactGetCheckptMulti(bool is_shutdown,
MultiXactId *nextMulti,
- MultiXactOffset *nextMultiOffset)
+ MultiXactOffset *nextMultiOffset,
+ MultiXactId *oldestMulti,
+ Oid *oldestMultiDB)
{
LWLockAcquire(MultiXactGenLock, LW_SHARED);
-
*nextMulti = MultiXactState->nextMXact;
*nextMultiOffset = MultiXactState->nextOffset;
-
+ *oldestMulti = MultiXactState->oldestMultiXactId;
+ *oldestMultiDB = MultiXactState->oldestMultiXactDB;
LWLockRelease(MultiXactGenLock);
- debug_elog4(DEBUG2, "MultiXact: checkpoint is nextMulti %u, nextOffset %u",
- *nextMulti, *nextMultiOffset);
+ debug_elog6(DEBUG2,
+ "MultiXact: checkpoint is nextMulti %u, nextOffset %u, oldestMulti %u in DB %u",
+ *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
}
/*
SimpleLruFlush(MultiXactOffsetCtl, true);
SimpleLruFlush(MultiXactMemberCtl, true);
- /*
- * Truncate the SLRU files. This could be done at any time, but
- * checkpoint seems a reasonable place for it. There is one exception: if
- * we are called during xlog recovery, then shared->latest_page_number
- * isn't valid (because StartupMultiXact hasn't been called yet) and so
- * SimpleLruTruncate would get confused. It seems best not to risk
- * removing any data during recovery anyway, so don't truncate.
- */
- if (!RecoveryInProgress())
- TruncateMultiXact();
-
TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_DONE(true);
}
LWLockRelease(MultiXactGenLock);
}
+/*
+ * Determine the last safe MultiXactId to allocate given the currently oldest
+ * datminmxid (ie, the oldest MultiXactId that might exist in any database
+ * of our cluster), and the OID of the (or a) database with that value.
+ */
+void
+SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
+{
+ MultiXactId multiVacLimit;
+ MultiXactId multiWarnLimit;
+ MultiXactId multiStopLimit;
+ MultiXactId multiWrapLimit;
+ MultiXactId curMulti;
+
+ Assert(MultiXactIdIsValid(oldest_datminmxid));
+
+ /*
+ * The place where we actually get into deep trouble is halfway around
+ * from the oldest potentially-existing XID/multi. (This calculation is
+ * probably off by one or two counts for Xids, because the special XIDs
+ * reduce the size of the loop a little bit. But we throw in plenty of
+ * slop below, so it doesn't matter.)
+ */
+ multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
+ if (multiWrapLimit < FirstMultiXactId)
+ multiWrapLimit += FirstMultiXactId;
+
+ /*
+ * We'll refuse to continue assigning MultiXactIds once we get within 100
+ * multi of data loss.
+ */
+ multiStopLimit = multiWrapLimit - 100;
+ if (multiStopLimit < FirstMultiXactId)
+ multiStopLimit -= FirstMultiXactId;
+
+ /*
+ * We'll start complaining loudly when we get within 10M multis of the stop
+ * point. This is kind of arbitrary, but if you let your gas gauge get
+ * down to 1% of full, would you be looking for the next gas station? We
+ * need to be fairly liberal about this number because there are lots of
+ * scenarios where most transactions are done by automatic clients that
+ * won't pay attention to warnings. (No, we're not gonna make this
+ * configurable. If you know enough to configure it, you know enough to
+ * not get in this kind of trouble in the first place.)
+ */
+ multiWarnLimit = multiStopLimit - 10000000;
+ if (multiWarnLimit < FirstMultiXactId)
+ multiWarnLimit -= FirstMultiXactId;
+
+ /*
+ * We'll start trying to force autovacuums when oldest_datminmxid gets
+ * to be more than 200 million transactions old.
+ */
+ multiVacLimit = oldest_datminmxid + 200000000;
+ if (multiVacLimit < FirstMultiXactId)
+ multiVacLimit += FirstMultiXactId;
+
+ /* Grab lock for just long enough to set the new limit values */
+ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
+ MultiXactState->oldestMultiXactId = oldest_datminmxid;
+ MultiXactState->oldestMultiXactDB = oldest_datoid;
+ MultiXactState->multiVacLimit = multiVacLimit;
+ MultiXactState->multiWarnLimit = multiWarnLimit;
+ MultiXactState->multiStopLimit = multiStopLimit;
+ MultiXactState->multiWrapLimit = multiWrapLimit;
+ curMulti = MultiXactState->nextMXact;
+ LWLockRelease(MultiXactGenLock);
+
+ /* Log the info */
+ ereport(DEBUG1,
+ (errmsg("MultiXactId wrap limit is %u, limited by database with OID %u",
+ multiWrapLimit, oldest_datoid)));
+
+ /*
+ * If past the autovacuum force point, immediately signal an autovac
+ * request. The reason for this is that autovac only processes one
+ * database per invocation. Once it's finished cleaning up the oldest
+ * database, it'll call here, and we'll signal the postmaster to start
+ * another iteration immediately if there are still any old databases.
+ */
+ if (MultiXactIdPrecedes(multiVacLimit, curMulti) &&
+ IsUnderPostmaster && !InRecovery)
+ SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
+
+ /* Give an immediate warning if past the wrap warn point */
+ if (MultiXactIdPrecedes(multiWarnLimit, curMulti) && !InRecovery)
+ {
+ char *oldest_datname;
+
+ /*
+ * We can be called when not inside a transaction, for example during
+ * StartupXLOG(). In such a case we cannot do database access, so we
+ * must just report the oldest DB's OID.
+ *
+ * Note: it's also possible that get_database_name fails and returns
+ * NULL, for example because the database just got dropped. We'll
+ * still warn, even though the warning might now be unnecessary.
+ */
+ if (IsTransactionState())
+ oldest_datname = get_database_name(oldest_datoid);
+ else
+ oldest_datname = NULL;
+
+ if (oldest_datname)
+ ereport(WARNING,
+ (errmsg("database \"%s\" must be vacuumed before %u more MultiXactId are used",
+ oldest_datname,
+ multiWrapLimit - curMulti),
+ errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ else
+ ereport(WARNING,
+ (errmsg("database with OID %u must be vacuumed before %u more MultiXactId are used",
+ oldest_datoid,
+ multiWrapLimit - curMulti),
+ errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ }
+}
+
/*
* Ensure the next-to-be-assigned MultiXactId is at least minMulti,
- * and similarly nextOffset is at least minMultiOffset
+ * and similarly nextOffset is at least minMultiOffset.
*
* This is used when we can determine minimum safe values from an XLog
* record (either an on-line checkpoint or an mxact creation log entry).
LWLockRelease(MultiXactGenLock);
}
+/*
+ * Update our oldestMultiXactId value, but only if it's more recent than
+ * what we had.
+ */
+void
+MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
+{
+ if (MultiXactIdPrecedes(MultiXactState->oldestMultiXactId, oldestMulti))
+ SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
+}
+
/*
* Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
*
*/
while (nmembers > 0)
{
- int entryno;
+ int flagsoff;
+ int flagsbit;
+ int difference;
/*
* Only zero when at first entry of a page.
*/
- entryno = MXOffsetToMemberEntry(offset);
- if (entryno == 0)
+ flagsoff = MXOffsetToFlagsOffset(offset);
+ flagsbit = MXOffsetToFlagsBitShift(offset);
+ if (flagsoff == 0 && flagsbit == 0)
{
int pageno;
}
/* Advance to next page (OK if nmembers goes negative) */
- offset += (MULTIXACT_MEMBERS_PER_PAGE - entryno);
- nmembers -= (MULTIXACT_MEMBERS_PER_PAGE - entryno);
+ difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
+ offset += difference;
+ nmembers -= difference;
}
}
/*
- * Remove all MultiXactOffset and MultiXactMember segments before the oldest
- * ones still of interest.
+ * GetOldestMultiXactId
*
- * This is called only during checkpoints. We assume no more than one
- * backend does this at a time.
+ * Return the oldest MultiXactId that's still possibly still seen as live by
+ * any running transaction. Older ones might still exist on disk, but they no
+ * longer have any running member transaction.
*
- * XXX do we have any issues with needing to checkpoint here?
+ * It's not safe to truncate MultiXact SLRU segments on the value returned by
+ * this function; however, it can be used by a full-table vacuum to set the
+ * point at which it will be possible to truncate SLRU for that table.
*/
-static void
-TruncateMultiXact(void)
+MultiXactId
+GetOldestMultiXactId(void)
{
- MultiXactId nextMXact;
- MultiXactOffset nextOffset;
- MultiXactId oldestMXact;
- MultiXactOffset oldestOffset;
- int cutoffPage;
- int i;
+ MultiXactId oldestMXact;
+ MultiXactId nextMXact;
+ int i;
/*
- * First, compute where we can safely truncate. Per notes above, this is
- * the oldest valid value among all the OldestMemberMXactId[] and
+ * This is the oldest valid value among all the OldestMemberMXactId[] and
* OldestVisibleMXactId[] entries, or nextMXact if none are valid.
*/
LWLockAcquire(MultiXactGenLock, LW_SHARED);
oldestMXact = thisoldest;
}
- /* Save the current nextOffset too */
- nextOffset = MultiXactState->nextOffset;
-
LWLockRelease(MultiXactGenLock);
- debug_elog3(DEBUG2, "MultiXact: truncation point = %u", oldestMXact);
+ return oldestMXact;
+}
+
+typedef struct mxtruncinfo
+{
+ int earliestExistingPage;
+} mxtruncinfo;
+
+/*
+ * SlruScanDirectory callback
+ * This callback determines the earliest existing page number.
+ */
+static bool
+SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data)
+{
+ mxtruncinfo *trunc = (mxtruncinfo *) data;
+
+ if (trunc->earliestExistingPage == -1 ||
+ ctl->PagePrecedes(segpage, trunc->earliestExistingPage))
+ {
+ trunc->earliestExistingPage = segpage;
+ }
+
+ return false; /* keep going */
+}
+
+/*
+ * Remove all MultiXactOffset and MultiXactMember segments before the oldest
+ * ones still of interest.
+ *
+ * This is called by vacuum after it has successfully advanced a database's
+ * datminmxid value; the cutoff value we're passed is the minimum of all
+ * databases' datminmxid values.
+ */
+void
+TruncateMultiXact(MultiXactId oldestMXact)
+{
+ MultiXactOffset oldestOffset;
+ mxtruncinfo trunc;
+ MultiXactId earliest;
/*
- * If we already truncated at this point, do nothing. This saves time
- * when no MultiXacts are getting used, which is probably not uncommon.
+ * Note we can't just plow ahead with the truncation; it's possible that
+ * there are no segments to truncate, which is a problem because we are
+ * going to attempt to read the offsets page to determine where to truncate
+ * the members SLRU. So we first scan the directory to determine the
+ * earliest offsets page number that we can read without error.
*/
- if (MultiXactState->lastTruncationPoint == oldestMXact)
+ trunc.earliestExistingPage = -1;
+ SlruScanDirectory(MultiXactOffsetCtl, SlruScanDirCbFindEarliest, &trunc);
+ earliest = trunc.earliestExistingPage * MULTIXACT_OFFSETS_PER_PAGE;
+
+ /* nothing to do */
+ if (MultiXactIdPrecedes(oldestMXact, earliest))
return;
/*
- * We need to determine where to truncate MultiXactMember. If we found a
- * valid oldest MultiXactId, read its starting offset; otherwise we use
- * the nextOffset value we saved above.
+ * First, compute the safe truncation point for MultiXactMember.
+ * This is the starting offset of the multixact we were passed
+ * as MultiXactOffset cutoff.
*/
- if (oldestMXact == nextMXact)
- oldestOffset = nextOffset;
- else
{
int pageno;
int slotno;
pageno = MultiXactIdToOffsetPage(oldestMXact);
entryno = MultiXactIdToOffsetEntry(oldestMXact);
- slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, oldestMXact);
- offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+ slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno,
+ oldestMXact);
+ offptr = (MultiXactOffset *)
+ MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr += entryno;
oldestOffset = *offptr;
LWLockRelease(MultiXactOffsetControlLock);
}
- /*
- * The cutoff point is the start of the segment containing oldestMXact. We
- * pass the *page* containing oldestMXact to SimpleLruTruncate.
- */
- cutoffPage = MultiXactIdToOffsetPage(oldestMXact);
-
- SimpleLruTruncate(MultiXactOffsetCtl, cutoffPage);
-
- /*
- * Also truncate MultiXactMember at the previously determined offset.
- */
- cutoffPage = MXOffsetToMemberPage(oldestOffset);
+ /* truncate MultiXactOffset */
+ SimpleLruTruncate(MultiXactOffsetCtl,
+ MultiXactIdToOffsetPage(oldestMXact));
- SimpleLruTruncate(MultiXactMemberCtl, cutoffPage);
-
- /*
- * Set the last known truncation point. We don't need a lock for this
- * since only one backend does checkpoints at a time.
- */
- MultiXactState->lastTruncationPoint = oldestMXact;
+ /* truncate MultiXactMembers and we're done */
+ SimpleLruTruncate(MultiXactMemberCtl,
+ MXOffsetToMemberPage(oldestOffset));
}
/*
* XXX do we need to do something special for InvalidMultiXactId?
* (Doesn't look like it.)
*/
-static bool
+bool
MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
{
int32 diff = (int32) (multi1 - multi2);
return (diff < 0);
}
-
/*
* Write an xlog record reflecting the zeroing of either a MEMBERs or
* OFFSETs page (info shows which)
}
else if (info == XLOG_MULTIXACT_CREATE_ID)
{
- xl_multixact_create *xlrec = (xl_multixact_create *) XLogRecGetData(record);
- TransactionId *xids = xlrec->xids;
+ xl_multixact_create *xlrec =
+ (xl_multixact_create *) XLogRecGetData(record);
TransactionId max_xid;
int i;
/* Store the data back into the SLRU files */
- RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nxids, xids);
+ RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
+ xlrec->members);
/* Make sure nextMXact/nextOffset are beyond what this record has */
- MultiXactAdvanceNextMXact(xlrec->mid + 1, xlrec->moff + xlrec->nxids);
+ MultiXactAdvanceNextMXact(xlrec->mid + 1,
+ xlrec->moff + xlrec->nmembers);
/*
* Make sure nextXid is beyond any XID mentioned in the record. This
* evidence in the XLOG, but let's be safe.
*/
max_xid = record->xl_xid;
- for (i = 0; i < xlrec->nxids; i++)
+ for (i = 0; i < xlrec->nmembers; i++)
{
- if (TransactionIdPrecedes(max_xid, xids[i]))
- max_xid = xids[i];
+ if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
+ max_xid = xlrec->members[i].xid;
}
/*
else
elog(PANIC, "multixact_redo: unknown op code %u", info);
}
+
+Datum
+pg_get_multixact_members(PG_FUNCTION_ARGS)
+{
+ typedef struct
+ {
+ MultiXactMember *members;
+ int nmembers;
+ int iter;
+ } mxact;
+ MultiXactId mxid = PG_GETARG_UINT32(0);
+ mxact *multi;
+ FuncCallContext *funccxt;
+
+ if (mxid < FirstMultiXactId)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid MultiXactId: %u", mxid)));
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcxt;
+ TupleDesc tupdesc;
+
+ funccxt = SRF_FIRSTCALL_INIT();
+ oldcxt = MemoryContextSwitchTo(funccxt->multi_call_memory_ctx);
+
+ multi = palloc(sizeof(mxact));
+ /* no need to allow for old values here */
+ multi->nmembers = GetMultiXactIdMembers(mxid, &multi->members, false);
+ multi->iter = 0;
+
+ tupdesc = CreateTemplateTupleDesc(2, false);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "xid",
+ XIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "mode",
+ TEXTOID, -1, 0);
+
+ funccxt->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+ funccxt->user_fctx = multi;
+
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ funccxt = SRF_PERCALL_SETUP();
+ multi = (mxact *) funccxt->user_fctx;
+
+ while (multi->iter < multi->nmembers)
+ {
+ HeapTuple tuple;
+ char *values[2];
+
+ values[0] = palloc(32);
+ sprintf(values[0], "%u", multi->members[multi->iter].xid);
+ values[1] = mxstatus_to_string(multi->members[multi->iter].status);
+
+ tuple = BuildTupleFromCStrings(funccxt->attinmeta, values);
+
+ multi->iter++;
+ pfree(values[0]);
+ SRF_RETURN_NEXT(funccxt, HeapTupleGetDatum(tuple));
+ }
+
+ if (multi->nmembers > 0)
+ pfree(multi->members);
+ pfree(multi);
+
+ SRF_RETURN_DONE(funccxt);
+}
* If we're past xidStopLimit, refuse to execute transactions, unless
* we are running in a standalone backend (which gives an escape hatch
* to the DBA who somehow got past the earlier defenses).
+ *
+ * Note that this coding also appears in GetNewMultiXactId.
*----------
*/
if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidVacLimit))
checkPoint.nextMultiOffset = 0;
checkPoint.oldestXid = FirstNormalTransactionId;
checkPoint.oldestXidDB = TemplateDbOid;
+ checkPoint.oldestMulti = FirstMultiXactId;
+ checkPoint.oldestMultiDB = TemplateDbOid;
checkPoint.time = (pg_time_t) time(NULL);
checkPoint.oldestActiveXid = InvalidTransactionId;
ShmemVariableCache->oidCount = 0;
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
+ SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
/* Set up the XLOG page header */
page->xlp_magic = XLOG_PAGE_MAGIC;
ereport(DEBUG1,
(errmsg("oldest unfrozen transaction ID: %u, in database %u",
checkPoint.oldestXid, checkPoint.oldestXidDB)));
+ ereport(DEBUG1,
+ (errmsg("oldest MultiXactId: %u, in database %u",
+ checkPoint.oldestMulti, checkPoint.oldestMultiDB)));
if (!TransactionIdIsNormal(checkPoint.nextXid))
ereport(PANIC,
(errmsg("invalid next transaction ID")));
ShmemVariableCache->oidCount = 0;
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
+ SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
XLogCtl->ckptXid = checkPoint.nextXid;
MultiXactGetCheckptMulti(shutdown,
&checkPoint.nextMulti,
- &checkPoint.nextMultiOffset);
+ &checkPoint.nextMultiOffset,
+ &checkPoint.oldestMulti,
+ &checkPoint.oldestMultiDB);
/*
* Having constructed the checkpoint record, ensure all shmem disk buffers
MultiXactSetNextMXact(checkPoint.nextMulti,
checkPoint.nextMultiOffset);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
+ SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
/*
* If we see a shutdown checkpoint while waiting for an end-of-backup
checkPoint.oldestXid))
SetTransactionIdLimit(checkPoint.oldestXid,
checkPoint.oldestXidDB);
+ MultiXactAdvanceOldest(checkPoint.oldestMulti,
+ checkPoint.oldestMultiDB);
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch;
#include "postgres.h"
#include "access/htup_details.h"
+#include "access/multixact.h"
#include "access/sysattr.h"
#include "access/transam.h"
#include "access/xact.h"
values[Anum_pg_class_relhastriggers - 1] = BoolGetDatum(rd_rel->relhastriggers);
values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass);
values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid);
+ values[Anum_pg_class_relminmxid - 1] = MultiXactIdGetDatum(rd_rel->relminmxid);
if (relacl != (Datum) 0)
values[Anum_pg_class_relacl - 1] = relacl;
else
break;
}
- /* Initialize relfrozenxid */
+ /* Initialize relfrozenxid and relminmxid */
if (relkind == RELKIND_RELATION ||
relkind == RELKIND_TOASTVALUE)
{
* that will do.
*/
new_rel_reltup->relfrozenxid = RecentXmin;
+ /*
+ * Similarly, initialize the minimum Multixact to the first value that
+ * could possibly be stored in tuples in the table. Running
+ * transactions could reuse values from their local cache, so we are
+ * careful to consider all currently running multis.
+ *
+ * XXX this could be refined further, but is it worth the hassle?
+ */
+ new_rel_reltup->relminmxid = GetOldestMultiXactId();
}
else
{
* commands/sequence.c.)
*/
new_rel_reltup->relfrozenxid = InvalidTransactionId;
+ new_rel_reltup->relfrozenxid = InvalidMultiXactId;
}
new_rel_reltup->relowner = relowner;
#include <unistd.h>
+#include "access/multixact.h"
#include "access/relscan.h"
#include "access/sysattr.h"
#include "access/transam.h"
* As with INSERT_IN_PROGRESS case, this is unexpected
* unless it's our own deletion or a system catalog.
*/
- Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
- xwait = HeapTupleHeaderGetXmax(heapTuple->t_data);
+ xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
if (!TransactionIdIsCurrentTransactionId(xwait))
{
if (!is_system_catalog)
}
/* We'll build a new physical relation for the index */
- RelationSetNewRelfilenode(iRel, InvalidTransactionId);
+ RelationSetNewRelfilenode(iRel, InvalidTransactionId,
+ InvalidMultiXactId);
/* Initialize the index and rebuild */
/* Note: we do not need to re-establish pkey setting */
/* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
if (is_pg_class)
- (void) RelationGetIndexAttrBitmap(rel);
+ (void) RelationGetIndexAttrBitmap(rel, false);
PG_TRY();
{
#include <math.h>
+#include "access/multixact.h"
#include "access/transam.h"
#include "access/tupconvert.h"
#include "access/tuptoaster.h"
totalrows,
visibilitymap_count(onerel),
hasindex,
- InvalidTransactionId);
+ InvalidTransactionId,
+ InvalidMultiXactId);
/*
* Same for indexes. Vacuum always scans all indexes, so if we're part of
totalindexrows,
0,
false,
- InvalidTransactionId);
+ InvalidTransactionId,
+ InvalidMultiXactId);
}
}
* right. (Note: this works out properly when the row was
* both inserted and deleted in our xact.)
*/
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(targtuple.t_data)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data)))
deadrows += 1;
else
liverows += 1;
*/
#include "postgres.h"
+#include "access/multixact.h"
#include "access/relscan.h"
#include "access/rewriteheap.h"
#include "access/transam.h"
int freeze_min_age, int freeze_table_age, bool verbose);
static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
int freeze_min_age, int freeze_table_age, bool verbose,
- bool *pSwapToastByContent, TransactionId *pFreezeXid);
+ bool *pSwapToastByContent, TransactionId *pFreezeXid,
+ MultiXactId *pFreezeMulti);
static List *get_tables_to_cluster(MemoryContext cluster_context);
static void reform_and_rewrite_tuple(HeapTuple tuple,
TupleDesc oldTupDesc, TupleDesc newTupDesc,
bool is_system_catalog;
bool swap_toast_by_content;
TransactionId frozenXid;
+ MultiXactId frozenMulti;
/* Mark the correct index as clustered */
if (OidIsValid(indexOid))
/* Copy the heap data into the new table in the desired order */
copy_heap_data(OIDNewHeap, tableOid, indexOid,
freeze_min_age, freeze_table_age, verbose,
- &swap_toast_by_content, &frozenXid);
+ &swap_toast_by_content, &frozenXid, &frozenMulti);
/*
* Swap the physical files of the target and transient tables, then
* rebuild the target's indexes and throw away the transient table.
*/
finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
- swap_toast_by_content, false, frozenXid);
+ swap_toast_by_content, false, frozenXid, frozenMulti);
}
static void
copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
int freeze_min_age, int freeze_table_age, bool verbose,
- bool *pSwapToastByContent, TransactionId *pFreezeXid)
+ bool *pSwapToastByContent, TransactionId *pFreezeXid,
+ MultiXactId *pFreezeMulti)
{
Relation NewHeap,
OldHeap,
bool is_system_catalog;
TransactionId OldestXmin;
TransactionId FreezeXid;
+ MultiXactId MultiXactFrzLimit;
RewriteState rwstate;
bool use_sort;
Tuplesortstate *tuplesort;
*/
vacuum_set_xid_limits(freeze_min_age, freeze_table_age,
OldHeap->rd_rel->relisshared,
- &OldestXmin, &FreezeXid, NULL);
+ &OldestXmin, &FreezeXid, NULL, &MultiXactFrzLimit);
/*
* FreezeXid will become the table's new relfrozenxid, and that mustn't go
if (TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
FreezeXid = OldHeap->rd_rel->relfrozenxid;
- /* return selected value to caller */
+ /* return selected values to caller */
*pFreezeXid = FreezeXid;
+ *pFreezeMulti = MultiXactFrzLimit;
/* Remember if it's a system catalog */
is_system_catalog = IsSystemRelation(OldHeap);
/* Initialize the rewrite operation */
- rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid, use_wal);
+ rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid,
+ MultiXactFrzLimit, use_wal);
/*
* Decide whether to use an indexscan or seqscan-and-optional-sort to scan
/*
* Similar situation to INSERT_IN_PROGRESS case.
*/
- Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
if (!is_system_catalog &&
- !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple->t_data)))
+ !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
elog(WARNING, "concurrent delete in progress within table \"%s\"",
RelationGetRelationName(OldHeap));
/* treat as recently dead */
swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
bool swap_toast_by_content,
TransactionId frozenXid,
+ MultiXactId frozenMulti,
Oid *mapped_tables)
{
Relation relRelation;
* and then fail to commit the pg_class update.
*/
- /* set rel1's frozen Xid */
+ /* set rel1's frozen Xid and minimum MultiXid */
if (relform1->relkind != RELKIND_INDEX)
{
Assert(TransactionIdIsNormal(frozenXid));
relform1->relfrozenxid = frozenXid;
+ Assert(MultiXactIdIsValid(frozenMulti));
+ relform1->relminmxid = frozenMulti;
}
/* swap size statistics too, since new rel has freshly-updated stats */
target_is_pg_class,
swap_toast_by_content,
frozenXid,
+ frozenMulti,
mapped_tables);
}
else
target_is_pg_class,
swap_toast_by_content,
InvalidTransactionId,
+ InvalidMultiXactId,
mapped_tables);
/* Clean up. */
bool is_system_catalog,
bool swap_toast_by_content,
bool check_constraints,
- TransactionId frozenXid)
+ TransactionId frozenXid,
+ MultiXactId frozenMulti)
{
ObjectAddress object;
Oid mapped_tables[4];
*/
swap_relation_files(OIDOldHeap, OIDNewHeap,
(OIDOldHeap == RelationRelationId),
- swap_toast_by_content, frozenXid, mapped_tables);
+ swap_toast_by_content, frozenXid, frozenMulti,
+ mapped_tables);
/*
* If it's a system catalog, queue an sinval message to flush all
Oid *dbIdP, Oid *ownerIdP,
int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
Oid *dbLastSysOidP, TransactionId *dbFrozenXidP,
+ MultiXactId *dbMinMultiP,
Oid *dbTablespace, char **dbCollate, char **dbCtype);
static bool have_createdb_privilege(void);
static void remove_dbtablespaces(Oid db_id);
bool src_allowconn;
Oid src_lastsysoid;
TransactionId src_frozenxid;
+ MultiXactId src_minmxid;
Oid src_deftablespace;
volatile Oid dst_deftablespace;
Relation pg_database_rel;
if (!get_db_info(dbtemplate, ShareLock,
&src_dboid, &src_owner, &src_encoding,
&src_istemplate, &src_allowconn, &src_lastsysoid,
- &src_frozenxid, &src_deftablespace,
+ &src_frozenxid, &src_minmxid, &src_deftablespace,
&src_collate, &src_ctype))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
new_record[Anum_pg_database_datlastsysoid - 1] = ObjectIdGetDatum(src_lastsysoid);
new_record[Anum_pg_database_datfrozenxid - 1] = TransactionIdGetDatum(src_frozenxid);
+ new_record[Anum_pg_database_datminmxid - 1] = TransactionIdGetDatum(src_minmxid);
new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace);
/*
pgdbrel = heap_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
- &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL))
+ &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
{
if (!missing_ok)
{
rel = heap_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL))
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database \"%s\" does not exist", oldname)));
pgdbrel = heap_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
- NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL))
+ NULL, NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database \"%s\" does not exist", dbname)));
Oid *dbIdP, Oid *ownerIdP,
int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
Oid *dbLastSysOidP, TransactionId *dbFrozenXidP,
+ MultiXactId *dbMinMultiP,
Oid *dbTablespace, char **dbCollate, char **dbCtype)
{
bool result = false;
/* limit of frozen XIDs */
if (dbFrozenXidP)
*dbFrozenXidP = dbform->datfrozenxid;
+ /* limit of frozen Multixacts */
+ if (dbMinMultiP)
+ *dbMinMultiP = dbform->datminmxid;
/* default tablespace for this database */
if (dbTablespace)
*dbTablespace = dbform->dattablespace;
*/
#include "postgres.h"
-#include "access/transam.h"
#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/transam.h"
#include "access/xlogutils.h"
#include "catalog/dependency.h"
#include "catalog/namespace.h"
/*
* Create a new storage file for the sequence. We want to keep the
* sequence's relfrozenxid at 0, since it won't contain any unfrozen XIDs.
+ * Same with relminmxid, since a sequence will never contain multixacts.
*/
- RelationSetNewRelfilenode(seq_rel, InvalidTransactionId);
+ RelationSetNewRelfilenode(seq_rel, InvalidTransactionId,
+ InvalidMultiXactId);
/*
* Insert the modified tuple into the new storage file.
* bit update, ie, don't bother to WAL-log it, since we can certainly do
* this again if the update gets lost.
*/
- if (HeapTupleHeaderGetXmax(seqtuple->t_data) != InvalidTransactionId)
+ Assert(!(seqtuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
+ if (HeapTupleHeaderGetRawXmax(seqtuple->t_data) != InvalidTransactionId)
{
HeapTupleHeaderSetXmax(seqtuple->t_data, InvalidTransactionId);
seqtuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
#include "postgres.h"
#include "access/genam.h"
+#include "access/heapam.h"
#include "access/heapam_xlog.h"
+#include "access/multixact.h"
#include "access/reloptions.h"
#include "access/relscan.h"
#include "access/sysattr.h"
{
Oid heap_relid;
Oid toast_relid;
+ MultiXactId minmulti;
/*
* This effectively deletes all rows in the table, and may be done
*/
CheckTableForSerializableConflictIn(rel);
+ minmulti = GetOldestMultiXactId();
+
/*
* Need the full transaction-safe pushups.
*
* as the relfilenode value. The old storage file is scheduled for
* deletion at commit.
*/
- RelationSetNewRelfilenode(rel, RecentXmin);
+ RelationSetNewRelfilenode(rel, RecentXmin, minmulti);
if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
heap_create_init_fork(rel);
if (OidIsValid(toast_relid))
{
rel = relation_open(toast_relid, AccessExclusiveLock);
- RelationSetNewRelfilenode(rel, RecentXmin);
+ RelationSetNewRelfilenode(rel, RecentXmin, minmulti);
if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
heap_create_init_fork(rel);
heap_close(rel, NoLock);
* interest in letting this code work on system catalogs.
*/
finish_heap_swap(tab->relid, OIDNewHeap,
- false, false, true, RecentXmin);
+ false, false, true, RecentXmin,
+ ReadNextMultiXactId());
}
else
{
EPQState *epqstate,
ResultRelInfo *relinfo,
ItemPointer tid,
+ LockTupleMode lockmode,
TupleTableSlot **newSlot);
static bool TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
Trigger *trigger, TriggerEvent event,
int i;
trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
- &newSlot);
+ LockTupleExclusive, &newSlot);
if (trigtuple == NULL)
return false;
if (trigdesc && trigdesc->trig_delete_after_row)
{
HeapTuple trigtuple = GetTupleForTrigger(estate, NULL, relinfo,
- tupleid, NULL);
+ tupleid, LockTupleExclusive,
+ NULL);
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_DELETE,
true, trigtuple, NULL, NIL, NULL);
TupleTableSlot *newSlot;
int i;
Bitmapset *modifiedCols;
+ Bitmapset *keyCols;
+ LockTupleMode lockmode;
+
+ /*
+ * Compute lock mode to use. If columns that are part of the key have not
+ * been modified, then we can use a weaker lock, allowing for better
+ * concurrency.
+ */
+ modifiedCols = GetModifiedColumns(relinfo, estate);
+ keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc, true);
+ if (bms_overlap(keyCols, modifiedCols))
+ lockmode = LockTupleExclusive;
+ else
+ lockmode = LockTupleNoKeyExclusive;
/* get a copy of the on-disk tuple we are planning to update */
trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
- &newSlot);
+ lockmode, &newSlot);
if (trigtuple == NULL)
return NULL; /* cancel the update action */
newtuple = slottuple;
}
- modifiedCols = GetModifiedColumns(relinfo, estate);
LocTriggerData.type = T_TriggerData;
LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE |
if (trigdesc && trigdesc->trig_update_after_row)
{
HeapTuple trigtuple = GetTupleForTrigger(estate, NULL, relinfo,
- tupleid, NULL);
+ tupleid, LockTupleExclusive,
+ NULL);
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE,
true, trigtuple, newtuple, recheckIndexes,
EPQState *epqstate,
ResultRelInfo *relinfo,
ItemPointer tid,
+ LockTupleMode lockmode,
TupleTableSlot **newSlot)
{
Relation relation = relinfo->ri_RelationDesc;
tuple.t_self = *tid;
test = heap_lock_tuple(relation, &tuple,
estate->es_output_cid,
- LockTupleExclusive, false /* wait */,
- &buffer, &hufd);
+ lockmode, false /* wait */,
+ false, &buffer, &hufd);
switch (test)
{
case HeapTupleSelfUpdated:
epqstate,
relation,
relinfo->ri_RangeTableIndex,
+ lockmode,
&hufd.ctid,
hufd.xmax);
if (!TupIsNull(epqslot))
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
+#include "access/multixact.h"
#include "access/transam.h"
#include "access/xact.h"
#include "catalog/namespace.h"
/* non-export function prototypes */
static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
-static void vac_truncate_clog(TransactionId frozenXID);
+static void vac_truncate_clog(TransactionId frozenXID, MultiXactId frozenMulti);
static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
bool for_wraparound);
bool sharedRel,
TransactionId *oldestXmin,
TransactionId *freezeLimit,
- TransactionId *freezeTableLimit)
+ TransactionId *freezeTableLimit,
+ MultiXactId *multiXactFrzLimit)
{
int freezemin;
TransactionId limit;
*freezeTableLimit = limit;
}
-}
+ if (multiXactFrzLimit != NULL)
+ {
+ MultiXactId mxLimit;
+
+ /*
+ * simplistic multixactid freezing: use the same freezing policy as
+ * for Xids
+ */
+ mxLimit = GetOldestMultiXactId() - freezemin;
+ if (mxLimit < FirstMultiXactId)
+ mxLimit = FirstMultiXactId;
+
+ *multiXactFrzLimit = mxLimit;
+ }
+}
/*
* vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
vac_update_relstats(Relation relation,
BlockNumber num_pages, double num_tuples,
BlockNumber num_all_visible_pages,
- bool hasindex, TransactionId frozenxid)
+ bool hasindex, TransactionId frozenxid,
+ MultiXactId minmulti)
{
Oid relid = RelationGetRelid(relation);
Relation rd;
dirty = true;
}
+ /* relminmxid must never go backward, either */
+ if (MultiXactIdIsValid(minmulti) &&
+ MultiXactIdPrecedes(pgcform->relminmxid, minmulti))
+ {
+ pgcform->relminmxid = minmulti;
+ dirty = true;
+ }
+
/* If anything changed, write out the tuple. */
if (dirty)
heap_inplace_update(rd, ctup);
* vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
*
* Update pg_database's datfrozenxid entry for our database to be the
- * minimum of the pg_class.relfrozenxid values. If we are able to
- * advance pg_database.datfrozenxid, also try to truncate pg_clog.
+ * minimum of the pg_class.relfrozenxid values.
+ *
+ * Similarly, update our datfrozenmulti to be the minimum of the
+ * pg_class.relfrozenmulti values.
+ *
+ * If we are able to advance either pg_database value, also try to
+ * truncate pg_clog and pg_multixact.
*
* We violate transaction semantics here by overwriting the database's
* existing pg_database tuple with the new value. This is reasonably
SysScanDesc scan;
HeapTuple classTup;
TransactionId newFrozenXid;
+ MultiXactId newFrozenMulti;
bool dirty = false;
/*
* Initialize the "min" calculation with GetOldestXmin, which is a
* reasonable approximation to the minimum relfrozenxid for not-yet-
* committed pg_class entries for new tables; see AddNewRelationTuple().
- * Se we cannot produce a wrong minimum by starting with this.
+ * So we cannot produce a wrong minimum by starting with this.
*/
newFrozenXid = GetOldestXmin(true, true);
+ /*
+ * Similarly, initialize the MultiXact "min" with the value that would
+ * be used on pg_class for new tables. See AddNewRelationTuple().
+ */
+ newFrozenMulti = GetOldestMultiXactId();
+
/*
* We must seqscan pg_class to find the minimum Xid, because there is no
* index that can help us here.
continue;
Assert(TransactionIdIsNormal(classForm->relfrozenxid));
+ Assert(MultiXactIdIsValid(classForm->relminmxid));
if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
newFrozenXid = classForm->relfrozenxid;
+
+ if (MultiXactIdPrecedes(classForm->relminmxid, newFrozenMulti))
+ newFrozenMulti = classForm->relminmxid;
}
/* we're done with pg_class */
heap_close(relation, AccessShareLock);
Assert(TransactionIdIsNormal(newFrozenXid));
+ Assert(MultiXactIdIsValid(newFrozenMulti));
/* Now fetch the pg_database tuple we need to update. */
relation = heap_open(DatabaseRelationId, RowExclusiveLock);
dirty = true;
}
+ /* ditto */
+ if (MultiXactIdPrecedes(dbform->datminmxid, newFrozenMulti))
+ {
+ dbform->datminmxid = newFrozenMulti;
+ dirty = true;
+ }
+
if (dirty)
heap_inplace_update(relation, tuple);
* this action will update that too.
*/
if (dirty || ForceTransactionIdLimitUpdate())
- vac_truncate_clog(newFrozenXid);
+ vac_truncate_clog(newFrozenXid, newFrozenMulti);
}
* info is stale.
*/
static void
-vac_truncate_clog(TransactionId frozenXID)
+vac_truncate_clog(TransactionId frozenXID, MultiXactId frozenMulti)
{
TransactionId myXID = GetCurrentTransactionId();
Relation relation;
HeapScanDesc scan;
HeapTuple tuple;
- Oid oldest_datoid;
+ Oid oldestxid_datoid;
+ Oid oldestmulti_datoid;
bool frozenAlreadyWrapped = false;
- /* init oldest_datoid to sync with my frozenXID */
- oldest_datoid = MyDatabaseId;
+ /* init oldest datoids to sync with my frozen values */
+ oldestxid_datoid = MyDatabaseId;
+ oldestmulti_datoid = MyDatabaseId;
/*
* Scan pg_database to compute the minimum datfrozenxid
Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
Assert(TransactionIdIsNormal(dbform->datfrozenxid));
+ Assert(MultiXactIdIsValid(dbform->datminmxid));
if (TransactionIdPrecedes(myXID, dbform->datfrozenxid))
frozenAlreadyWrapped = true;
else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
{
frozenXID = dbform->datfrozenxid;
- oldest_datoid = HeapTupleGetOid(tuple);
+ oldestxid_datoid = HeapTupleGetOid(tuple);
+ }
+
+ if (MultiXactIdPrecedes(dbform->datminmxid, frozenMulti))
+ {
+ frozenMulti = dbform->datminmxid;
+ oldestmulti_datoid = HeapTupleGetOid(tuple);
}
}
return;
}
- /* Truncate CLOG to the oldest frozenxid */
+ /* Truncate CLOG and Multi to the oldest computed value */
TruncateCLOG(frozenXID);
+ TruncateMultiXact(frozenMulti);
/*
- * Update the wrap limit for GetNewTransactionId. Note: this function
- * will also signal the postmaster for an(other) autovac cycle if needed.
+ * Update the wrap limit for GetNewTransactionId and creation of new
+ * MultiXactIds. Note: these functions will also signal the postmaster for
+ * an(other) autovac cycle if needed. XXX should we avoid possibly
+ * signalling twice?
*/
- SetTransactionIdLimit(frozenXID, oldest_datoid);
+ SetTransactionIdLimit(frozenXID, oldestxid_datoid);
+ MultiXactAdvanceOldest(frozenMulti, oldestmulti_datoid);
}
#include "access/heapam.h"
#include "access/heapam_xlog.h"
#include "access/htup_details.h"
+#include "access/multixact.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "catalog/storage.h"
static TransactionId OldestXmin;
static TransactionId FreezeLimit;
+static MultiXactId MultiXactFrzLimit;
static BufferAccessStrategy vac_strategy;
double new_rel_tuples;
BlockNumber new_rel_allvisible;
TransactionId new_frozen_xid;
+ MultiXactId new_min_multi;
/* measure elapsed time iff autovacuum logging requires it */
if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
onerel->rd_rel->relisshared,
- &OldestXmin, &FreezeLimit, &freezeTableLimit);
+ &OldestXmin, &FreezeLimit, &freezeTableLimit,
+ &MultiXactFrzLimit);
scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
freezeTableLimit);
if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
new_frozen_xid = InvalidTransactionId;
+ new_min_multi = MultiXactFrzLimit;
+ if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
+ new_min_multi = InvalidMultiXactId;
+
vac_update_relstats(onerel,
new_rel_pages,
new_rel_tuples,
new_rel_allvisible,
vacrelstats->hasindex,
- new_frozen_xid);
+ new_frozen_xid,
+ new_min_multi);
/*
* Report results to the stats collector, too. An early terminated
* Each non-removable tuple must be checked to see if it needs
* freezing. Note we already have exclusive buffer lock.
*/
- if (heap_freeze_tuple(tuple.t_data, FreezeLimit))
+ if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
+ MultiXactFrzLimit))
frozen[nfrozen++] = offnum;
}
} /* scan along page */
XLogRecPtr recptr;
recptr = log_heap_freeze(onerel, buf, FreezeLimit,
- frozen, nfrozen);
+ MultiXactFrzLimit, frozen, nfrozen);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
- if (heap_tuple_needs_freeze(tupleheader, FreezeLimit, buf))
+ if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
+ MultiXactFrzLimit, buf))
return true;
} /* scan along page */
stats->num_index_tuples,
0,
false,
- InvalidTransactionId);
+ InvalidTransactionId,
+ InvalidMultiXactId);
ereport(elevel,
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
case CMD_SELECT:
/*
- * SELECT FOR UPDATE/SHARE and modifying CTEs need to mark tuples
+ * SELECT FOR [KEY] UPDATE/SHARE and modifying CTEs need to mark
+ * tuples
*/
if (queryDesc->plannedstmt->rowMarks != NIL ||
queryDesc->plannedstmt->hasModifyingCTE)
}
/*
- * Similarly, we have to lock relations selected FOR UPDATE/FOR SHARE
+ * Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE
* before we initialize the plan tree, else we'd be risking lock upgrades.
* While we are at it, build the ExecRowMark list.
*/
switch (rc->markType)
{
case ROW_MARK_EXCLUSIVE:
+ case ROW_MARK_NOKEYEXCLUSIVE:
case ROW_MARK_SHARE:
+ case ROW_MARK_KEYSHARE:
relid = getrelid(rc->rti, rangeTable);
relation = heap_open(relid, RowShareLock);
break;
}
/*
- * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
+ * close any relations selected FOR [KEY] UPDATE/SHARE, again keeping locks
*/
foreach(l, estate->es_rowMarks)
{
* epqstate - state for EvalPlanQual rechecking
* relation - table containing tuple
* rti - rangetable index of table containing tuple
+ * lockmode - requested tuple lock mode
* *tid - t_ctid from the outdated tuple (ie, next updated version)
* priorXmax - t_xmax from the outdated tuple
*
*
* Returns a slot containing the new candidate update/delete tuple, or
* NULL if we determine we shouldn't process the row.
+ *
+ * Note: properly, lockmode should be declared as enum LockTupleMode,
+ * but we use "int" to avoid having to include heapam.h in executor.h.
*/
TupleTableSlot *
EvalPlanQual(EState *estate, EPQState *epqstate,
- Relation relation, Index rti,
+ Relation relation, Index rti, int lockmode,
ItemPointer tid, TransactionId priorXmax)
{
TupleTableSlot *slot;
/*
* Get and lock the updated version of the row; if fail, return NULL.
*/
- copyTuple = EvalPlanQualFetch(estate, relation, LockTupleExclusive,
+ copyTuple = EvalPlanQualFetch(estate, relation, lockmode,
tid, priorXmax);
if (copyTuple == NULL)
test = heap_lock_tuple(relation, &tuple,
estate->es_output_cid,
lockmode, false /* wait */,
- &buffer, &hufd);
+ false, &buffer, &hufd);
/* We now have two pins on the buffer, get rid of one */
ReleaseBuffer(buffer);
/* updated, so look at the updated row */
tuple.t_self = tuple.t_data->t_ctid;
/* updated row should have xmin matching this xmax */
- priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
+ priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data);
ReleaseBuffer(buffer);
/* loop back to fetch next in chain */
}
tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
/* okay, try to lock the tuple */
- if (erm->markType == ROW_MARK_EXCLUSIVE)
- lockmode = LockTupleExclusive;
- else
- lockmode = LockTupleShared;
+ switch (erm->markType)
+ {
+ case ROW_MARK_EXCLUSIVE:
+ lockmode = LockTupleExclusive;
+ break;
+ case ROW_MARK_NOKEYEXCLUSIVE:
+ lockmode = LockTupleNoKeyExclusive;
+ break;
+ case ROW_MARK_SHARE:
+ lockmode = LockTupleShare;
+ break;
+ case ROW_MARK_KEYSHARE:
+ lockmode = LockTupleKeyShare;
+ break;
+ default:
+ elog(ERROR, "unsupported rowmark type");
+ lockmode = LockTupleNoKeyExclusive; /* keep compiler quiet */
+ break;
+ }
test = heap_lock_tuple(erm->relation, &tuple,
estate->es_output_cid,
- lockmode, erm->noWait,
+ lockmode, erm->noWait, true,
&buffer, &hufd);
ReleaseBuffer(buffer);
switch (test)
epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
+ LockTupleExclusive,
&hufd.ctid,
hufd.xmax);
if (!TupIsNull(epqslot))
}
else
{
+ LockTupleMode lockmode;
+
/*
* Check the constraints of the tuple
*
estate->es_output_cid,
estate->es_crosscheck_snapshot,
true /* wait for commit */,
- &hufd);
+ &hufd, &lockmode);
switch (result)
{
case HeapTupleSelfUpdated:
epqstate,
resultRelationDesc,
resultRelInfo->ri_RangeTableIndex,
+ lockmode,
&hufd.ctid,
hufd.xmax);
if (!TupIsNull(epqslot))
RowMarkClause *newnode = makeNode(RowMarkClause);
COPY_SCALAR_FIELD(rti);
- COPY_SCALAR_FIELD(forUpdate);
+ COPY_SCALAR_FIELD(strength);
COPY_SCALAR_FIELD(noWait);
COPY_SCALAR_FIELD(pushedDown);
LockingClause *newnode = makeNode(LockingClause);
COPY_NODE_FIELD(lockedRels);
- COPY_SCALAR_FIELD(forUpdate);
+ COPY_SCALAR_FIELD(strength);
COPY_SCALAR_FIELD(noWait);
return newnode;
_equalLockingClause(const LockingClause *a, const LockingClause *b)
{
COMPARE_NODE_FIELD(lockedRels);
- COMPARE_SCALAR_FIELD(forUpdate);
+ COMPARE_SCALAR_FIELD(strength);
COMPARE_SCALAR_FIELD(noWait);
return true;
_equalRowMarkClause(const RowMarkClause *a, const RowMarkClause *b)
{
COMPARE_SCALAR_FIELD(rti);
- COMPARE_SCALAR_FIELD(forUpdate);
+ COMPARE_SCALAR_FIELD(strength);
COMPARE_SCALAR_FIELD(noWait);
COMPARE_SCALAR_FIELD(pushedDown);
WRITE_NODE_TYPE("LOCKINGCLAUSE");
WRITE_NODE_FIELD(lockedRels);
- WRITE_BOOL_FIELD(forUpdate);
+ WRITE_ENUM_FIELD(strength, LockClauseStrength);
WRITE_BOOL_FIELD(noWait);
}
WRITE_NODE_TYPE("ROWMARKCLAUSE");
WRITE_UINT_FIELD(rti);
- WRITE_BOOL_FIELD(forUpdate);
+ WRITE_ENUM_FIELD(strength, LockClauseStrength);
WRITE_BOOL_FIELD(noWait);
WRITE_BOOL_FIELD(pushedDown);
}
READ_LOCALS(RowMarkClause);
READ_UINT_FIELD(rti);
- READ_BOOL_FIELD(forUpdate);
+ READ_ENUM_FIELD(strength, LockClauseStrength);
READ_BOOL_FIELD(noWait);
READ_BOOL_FIELD(pushedDown);
Assert(jointype != JOIN_RIGHT);
/*
- * Presently the executor cannot support FOR UPDATE/SHARE marking of rels
+ * Presently the executor cannot support FOR [KEY] UPDATE/SHARE marking of rels
* appearing on the nullable side of an outer join. (It's somewhat unclear
* what that would mean, anyway: what should we mark when a result row is
* generated from no element of the nullable relation?) So, complain if
- * any nullable rel is FOR UPDATE/SHARE.
+ * any nullable rel is FOR [KEY] UPDATE/SHARE.
*
* You might be wondering why this test isn't made far upstream in the
* parser. It's because the parser hasn't got enough info --- consider
(jointype == JOIN_FULL && bms_is_member(rc->rti, left_rels)))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE cannot be applied to the nullable side of an outer join")));
+ errmsg("SELECT FOR UPDATE/SHARE/KEY UPDATE/KEY SHARE cannot be applied to the nullable side of an outer join")));
}
sjinfo->syn_lefthand = left_rels;
returningLists = NIL;
/*
- * If there was a FOR UPDATE/SHARE clause, the LockRows node will
+ * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will
* have dealt with fetching non-locked marked rows, else we need
* to have ModifyTable do that.
*/
root->simple_rel_array = save_rel_array;
/*
- * If there was a FOR UPDATE/SHARE clause, the LockRows node will have
+ * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will have
* dealt with fetching non-locked marked rows, else we need to have
* ModifyTable do that.
*/
tlist);
/*
- * Can't handle FOR UPDATE/SHARE here (parser should have checked
+ * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have checked
* already, but let's make sure).
*/
if (parse->rowMarks)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT")));
+ errmsg("SELECT FOR UPDATE/SHARE/KEY UPDATE/KEY SHARE is not allowed with UNION/INTERSECT/EXCEPT")));
/*
* Calculate pathkeys that represent result ordering requirements
}
/*
- * If there is a FOR UPDATE/SHARE clause, add the LockRows node. (Note: we
+ * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node. (Note: we
* intentionally test parse->rowMarks not root->rowMarks here. If there
* are only non-locking rowmarks, they should be handled by the
* ModifyTable node instead.)
if (parse->rowMarks)
{
/*
- * We've got trouble if FOR UPDATE/SHARE appears inside grouping,
+ * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside grouping,
* since grouping renders a reference to individual tuple CTIDs
* invalid. This is also checked at parse time, but that's
* insufficient because of rule substitution, query pullup, etc.
else
{
/*
- * We only need rowmarks for UPDATE, DELETE, or FOR UPDATE/SHARE.
+ * We only need rowmarks for UPDATE, DELETE, or FOR [KEY] UPDATE/SHARE.
*/
if (parse->commandType != CMD_UPDATE &&
parse->commandType != CMD_DELETE)
/*
* We need to have rowmarks for all base relations except the target. We
* make a bitmapset of all base rels and then remove the items we don't
- * need or have FOR UPDATE/SHARE marks for.
+ * need or have FOR [KEY] UPDATE/SHARE marks for.
*/
rels = get_base_rel_indexes((Node *) parse->jointree);
if (parse->resultRelation)
PlanRowMark *newrc;
/*
- * Currently, it is syntactically impossible to have FOR UPDATE
+ * Currently, it is syntactically impossible to have FOR UPDATE et al
* applied to an update/delete target rel. If that ever becomes
* possible, we should drop the target from the PlanRowMark list.
*/
newrc = makeNode(PlanRowMark);
newrc->rti = newrc->prti = rc->rti;
newrc->rowmarkId = ++(root->glob->lastRowMarkId);
- if (rc->forUpdate)
- newrc->markType = ROW_MARK_EXCLUSIVE;
- else
- newrc->markType = ROW_MARK_SHARE;
+ switch (rc->strength)
+ {
+ case LCS_FORUPDATE:
+ newrc->markType = ROW_MARK_EXCLUSIVE;
+ break;
+ case LCS_FORNOKEYUPDATE:
+ newrc->markType = ROW_MARK_NOKEYEXCLUSIVE;
+ break;
+ case LCS_FORSHARE:
+ newrc->markType = ROW_MARK_SHARE;
+ break;
+ case LCS_FORKEYSHARE:
+ newrc->markType = ROW_MARK_KEYSHARE;
+ break;
+ }
newrc->noWait = rc->noWait;
newrc->isParent = false;
/*
- * Check for features that are not supported together with FOR UPDATE/SHARE.
+ * Check for features that are not supported together with FOR [KEY] UPDATE/SHARE.
*
* exported so planner can check again after rewriting, query pullup, etc
*/
if (qry->setOperations)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT")));
+ errmsg("SELECT FOR UPDATE/SHARE/FOR KEY UPDATE/FOR KEY SHARE is not allowed with UNION/INTERSECT/EXCEPT")));
if (qry->distinctClause != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE is not allowed with DISTINCT clause")));
+ errmsg("SELECT FOR UPDATE/SHARE/FOR KEY UPDATE/FOR KEY SHARE is not allowed with DISTINCT clause")));
if (qry->groupClause != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE is not allowed with GROUP BY clause")));
+ errmsg("SELECT FOR UPDATE/SHARE/FOR KEY UPDATE/FOR KEY SHARE is not allowed with GROUP BY clause")));
if (qry->havingQual != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE is not allowed with HAVING clause")));
+ errmsg("SELECT FOR UPDATE/SHARE/FOR KEY UPDATE/FOR KEY SHARE is not allowed with HAVING clause")));
if (qry->hasAggs)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE is not allowed with aggregate functions")));
+ errmsg("SELECT FOR UPDATE/SHARE/FOR KEY UPDATE/FOR KEY SHARE is not allowed with aggregate functions")));
if (qry->hasWindowFuncs)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE is not allowed with window functions")));
+ errmsg("SELECT FOR UPDATE/SHARE/FOR KEY UPDATE/FOR KEY SHARE is not allowed with window functions")));
if (expression_returns_set((Node *) qry->targetList))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE is not allowed with set-returning functions in the target list")));
+ errmsg("SELECT FOR UPDATE/SHARE/FOR KEY UPDATE/FOR KEY SHARE is not allowed with set-returning functions in the target list")));
}
/*
- * Transform a FOR UPDATE/SHARE clause
+ * Transform a FOR [KEY] UPDATE/SHARE clause
*
* This basically involves replacing names by integer relids.
*
/* make a clause we can pass down to subqueries to select all rels */
allrels = makeNode(LockingClause);
allrels->lockedRels = NIL; /* indicates all rels */
- allrels->forUpdate = lc->forUpdate;
+ allrels->strength = lc->strength;
allrels->noWait = lc->noWait;
if (lockedRels == NIL)
if (rte->relkind == RELKIND_FOREIGN_TABLE)
break;
applyLockingClause(qry, i,
- lc->forUpdate, lc->noWait, pushedDown);
+ lc->strength, lc->noWait, pushedDown);
rte->requiredPerms |= ACL_SELECT_FOR_UPDATE;
break;
case RTE_SUBQUERY:
applyLockingClause(qry, i,
- lc->forUpdate, lc->noWait, pushedDown);
+ lc->strength, lc->noWait, pushedDown);
/*
- * FOR UPDATE/SHARE of subquery is propagated to all of
+ * FOR [KEY] UPDATE/SHARE of subquery is propagated to all of
* subquery's rels, too. We could do this later (based on
* the marking of the subquery RTE) but it is convenient
* to have local knowledge in each query level about which
if (thisrel->catalogname || thisrel->schemaname)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("SELECT FOR UPDATE/SHARE must specify unqualified relation names"),
+ errmsg("SELECT FOR UPDATE/SHARE/KEY UPDATE/KEY SHARE must specify unqualified relation names"),
parser_errposition(pstate, thisrel->location)));
i = 0;
if (rte->relkind == RELKIND_FOREIGN_TABLE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE cannot be used with foreign table \"%s\"",
+ errmsg("SELECT FOR UPDATE/SHARE/KEY UPDATE/KEY SHARE cannot be used with foreign table \"%s\"",
rte->eref->aliasname),
parser_errposition(pstate, thisrel->location)));
applyLockingClause(qry, i,
- lc->forUpdate, lc->noWait,
+ lc->strength, lc->noWait,
pushedDown);
rte->requiredPerms |= ACL_SELECT_FOR_UPDATE;
break;
case RTE_SUBQUERY:
applyLockingClause(qry, i,
- lc->forUpdate, lc->noWait,
+ lc->strength, lc->noWait,
pushedDown);
/* see comment above */
transformLockingClause(pstate, rte->subquery,
case RTE_JOIN:
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE cannot be applied to a join"),
+ errmsg("SELECT FOR UPDATE/SHARE/KEY UPDATE/KEY SHARE cannot be applied to a join"),
parser_errposition(pstate, thisrel->location)));
break;
case RTE_FUNCTION:
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE cannot be applied to a function"),
+ errmsg("SELECT FOR UPDATE/SHARE/KEY UPDATE/KEY SHARE cannot be applied to a function"),
parser_errposition(pstate, thisrel->location)));
break;
case RTE_VALUES:
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE cannot be applied to VALUES"),
+ errmsg("SELECT FOR UPDATE/SHARE/KEY UPDATE/KEY SHARE cannot be applied to VALUES"),
parser_errposition(pstate, thisrel->location)));
break;
case RTE_CTE:
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("SELECT FOR UPDATE/SHARE cannot be applied to a WITH query"),
+ errmsg("SELECT FOR UPDATE/SHARE/KEY UPDATE/KEY SHARE cannot be applied to a WITH query"),
parser_errposition(pstate, thisrel->location)));
break;
default:
if (rt == NULL)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_TABLE),
- errmsg("relation \"%s\" in FOR UPDATE/SHARE clause not found in FROM clause",
+ errmsg("relation \"%s\" in FOR UPDATE/SHARE/KEY UPDATE/KEY SHARE clause not found in FROM clause",
thisrel->relname),
parser_errposition(pstate, thisrel->location)));
}
*/
void
applyLockingClause(Query *qry, Index rtindex,
- bool forUpdate, bool noWait, bool pushedDown)
+ LockClauseStrength strength, bool noWait, bool pushedDown)
{
RowMarkClause *rc;
if ((rc = get_parse_rowmark(qry, rtindex)) != NULL)
{
/*
- * If the same RTE is specified both FOR UPDATE and FOR SHARE, treat
- * it as FOR UPDATE. (Reasonable, since you can't take both a shared
- * and exclusive lock at the same time; it'll end up being exclusive
- * anyway.)
+ * If the same RTE is specified for more than one locking strength,
+ * treat is as the strongest. (Reasonable, since you can't take both a
+ * shared and exclusive lock at the same time; it'll end up being
+ * exclusive anyway.)
*
* We also consider that NOWAIT wins if it's specified both ways. This
* is a bit more debatable but raising an error doesn't seem helpful.
*
* And of course pushedDown becomes false if any clause is explicit.
*/
- rc->forUpdate |= forUpdate;
+ rc->strength = Max(rc->strength, strength);
rc->noWait |= noWait;
rc->pushedDown &= pushedDown;
return;
/* Make a new RowMarkClause */
rc = makeNode(RowMarkClause);
rc->rti = rtindex;
- rc->forUpdate = forUpdate;
+ rc->strength = strength;
rc->noWait = noWait;
rc->pushedDown = pushedDown;
qry->rowMarks = lappend(qry->rowMarks, rc);
%type <ival> OptTemp
%type <oncommit> OnCommitOption
+%type <ival> for_locking_strength
%type <node> for_locking_item
%type <list> for_locking_clause opt_for_locking_clause for_locking_items
%type <list> locked_rels_list
* The duplicative productions are annoying, but hard to get rid of without
* creating shift/reduce conflicts.
*
- * FOR UPDATE/SHARE may be before or after LIMIT/OFFSET.
+ * The locking clause (FOR UPDATE etc) may be before or after LIMIT/OFFSET.
* In <=7.2.X, LIMIT/OFFSET had to be after FOR UPDATE
- * We now support both orderings, but prefer LIMIT/OFFSET before FOR UPDATE/SHARE
+ * We now support both orderings, but prefer LIMIT/OFFSET before the locking
+ * clause.
* 2002-08-28 bjm
*/
select_no_parens:
;
for_locking_item:
- FOR UPDATE locked_rels_list opt_nowait
+ for_locking_strength locked_rels_list opt_nowait
{
LockingClause *n = makeNode(LockingClause);
- n->lockedRels = $3;
- n->forUpdate = TRUE;
- n->noWait = $4;
- $$ = (Node *) n;
- }
- | FOR SHARE locked_rels_list opt_nowait
- {
- LockingClause *n = makeNode(LockingClause);
- n->lockedRels = $3;
- n->forUpdate = FALSE;
- n->noWait = $4;
+ n->lockedRels = $2;
+ n->strength = $1;
+ n->noWait = $3;
$$ = (Node *) n;
}
;
+for_locking_strength:
+ FOR UPDATE { $$ = LCS_FORUPDATE; }
+ | FOR NO KEY UPDATE { $$ = LCS_FORNOKEYUPDATE; }
+ | FOR SHARE { $$ = LCS_FORSHARE; }
+ | FOR KEY SHARE { $$ = LCS_FORKEYSHARE; }
+ ;
+
locked_rels_list:
OF qualified_name_list { $$ = $2; }
| /* EMPTY */ { $$ = NIL; }
#include "access/heapam.h"
#include "access/htup_details.h"
+#include "access/multixact.h"
#include "access/reloptions.h"
#include "access/transam.h"
#include "access/xact.h"
static volatile sig_atomic_t got_SIGUSR2 = false;
static volatile sig_atomic_t got_SIGTERM = false;
-/* Comparison point for determining whether freeze_max_age is exceeded */
+/* Comparison points for determining whether freeze_max_age is exceeded */
static TransactionId recentXid;
+static MultiXactId recentMulti;
/* Default freeze ages to use for autovacuum (varies by database) */
static int default_freeze_min_age;
Oid adw_datid;
char *adw_name;
TransactionId adw_frozenxid;
+ MultiXactId adw_frozenmulti;
PgStat_StatDBEntry *adw_entry;
} avw_dbase;
List *dblist;
ListCell *cell;
TransactionId xidForceLimit;
+ MultiXactId multiForceLimit;
bool for_xid_wrap;
+ bool for_multi_wrap;
avw_dbase *avdb;
TimestampTz current_time;
bool skipit = false;
if (xidForceLimit < FirstNormalTransactionId)
xidForceLimit -= FirstNormalTransactionId;
+ /* Also determine the oldest datminmxid we will consider. */
+ recentMulti = ReadNextMultiXactId();
+ multiForceLimit = recentMulti - autovacuum_freeze_max_age;
+ if (multiForceLimit < FirstMultiXactId)
+ multiForceLimit -= FirstMultiXactId;
+
/*
* Choose a database to connect to. We pick the database that was least
* recently auto-vacuumed, or one that needs vacuuming to prevent Xid
- * wraparound-related data loss. If any db at risk of wraparound is
+ * wraparound-related data loss. If any db at risk of Xid wraparound is
* found, we pick the one with oldest datfrozenxid, independently of
- * autovacuum times.
+ * autovacuum times; similarly we pick the one with the oldest datminmxid
+ * if any is in MultiXactId wraparound. Note that those in Xid wraparound
+ * danger are given more priority than those in multi wraparound danger.
*
* Note that a database with no stats entry is not considered, except for
* Xid wraparound purposes. The theory is that if no one has ever
*/
avdb = NULL;
for_xid_wrap = false;
+ for_multi_wrap = false;
current_time = GetCurrentTimestamp();
foreach(cell, dblist)
{
if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
{
if (avdb == NULL ||
- TransactionIdPrecedes(tmp->adw_frozenxid, avdb->adw_frozenxid))
+ TransactionIdPrecedes(tmp->adw_frozenxid,
+ avdb->adw_frozenxid))
avdb = tmp;
for_xid_wrap = true;
continue;
}
else if (for_xid_wrap)
continue; /* ignore not-at-risk DBs */
+ else if (MultiXactIdPrecedes(tmp->adw_frozenmulti, multiForceLimit))
+ {
+ if (avdb == NULL ||
+ MultiXactIdPrecedes(tmp->adw_frozenmulti,
+ avdb->adw_frozenmulti))
+ avdb = tmp;
+ for_multi_wrap = true;
+ continue;
+ }
+ else if (for_multi_wrap)
+ continue; /* ignore not-at-risk DBs */
/* Find pgstat entry if any */
tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
/* And do an appropriate amount of work */
recentXid = ReadNewTransactionId();
+ recentMulti = ReadNextMultiXactId();
do_autovacuum();
}
avdb->adw_datid = HeapTupleGetOid(tup);
avdb->adw_name = pstrdup(NameStr(pgdatabase->datname));
avdb->adw_frozenxid = pgdatabase->datfrozenxid;
+ avdb->adw_frozenmulti = pgdatabase->datminmxid;
/* this gets set later: */
avdb->adw_entry = NULL;
/* freeze parameters */
int freeze_max_age;
TransactionId xidForceLimit;
+ MultiXactId multiForceLimit;
AssertArg(classForm != NULL);
AssertArg(OidIsValid(relid));
force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) &&
TransactionIdPrecedes(classForm->relfrozenxid,
xidForceLimit));
+ if (!force_vacuum)
+ {
+ multiForceLimit = recentMulti - autovacuum_freeze_max_age;
+ if (multiForceLimit < FirstMultiXactId)
+ multiForceLimit -= FirstMultiXactId;
+ force_vacuum = MultiXactIdPrecedes(classForm->relminmxid,
+ multiForceLimit);
+ }
*wraparound = force_vacuum;
/* User disabled it in pg_class.reloptions? (But ignore if at risk) */
static void rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte,
Relation target_relation);
static void markQueryForLocking(Query *qry, Node *jtnode,
- bool forUpdate, bool noWait, bool pushedDown);
+ LockClauseStrength strength, bool noWait, bool pushedDown);
static List *matchLocks(CmdType event, RuleLock *rulelocks,
int varno, Query *parsetree);
static Query *fireRIRrules(Query *parsetree, List *activeRIRs,
* These locks will ensure that the relation schemas don't change under us
* while we are rewriting and planning the query.
*
- * forUpdatePushedDown indicates that a pushed-down FOR UPDATE/SHARE applies
+ * forUpdatePushedDown indicates that a pushed-down FOR [KEY] UPDATE/SHARE applies
* to the current subquery, requiring all rels to be opened with RowShareLock.
* This should always be false at the start of the recursion.
*
*
* If the relation is the query's result relation, then we
* need RowExclusiveLock. Otherwise, check to see if the
- * relation is accessed FOR UPDATE/SHARE or not. We can't
+ * relation is accessed FOR [KEY] UPDATE/SHARE or not. We can't
* just grab AccessShareLock because then the executor would
* be trying to upgrade the lock, leading to possible
* deadlocks.
}
/*
- * If FOR UPDATE/SHARE of view, be sure we get right initial lock on the
+ * If FOR [KEY] UPDATE/SHARE of view, be sure we get right initial lock on the
* relations it references.
*/
rc = get_parse_rowmark(parsetree, rt_index);
rte->modifiedCols = NULL;
/*
- * If FOR UPDATE/SHARE of view, mark all the contained tables as implicit
- * FOR UPDATE/SHARE, the same as the parser would have done if the view's
+ * If FOR [KEY] UPDATE/SHARE of view, mark all the contained tables as implicit
+ * FOR [KEY] UPDATE/SHARE, the same as the parser would have done if the view's
* subquery had been written out explicitly.
*
* Note: we don't consider forUpdatePushedDown here; such marks will be
*/
if (rc != NULL)
markQueryForLocking(rule_action, (Node *) rule_action->jointree,
- rc->forUpdate, rc->noWait, true);
+ rc->strength, rc->noWait, true);
return parsetree;
}
/*
- * Recursively mark all relations used by a view as FOR UPDATE/SHARE.
+ * Recursively mark all relations used by a view as FOR [KEY] UPDATE/SHARE.
*
* This may generate an invalid query, eg if some sub-query uses an
* aggregate. We leave it to the planner to detect that.
*/
static void
markQueryForLocking(Query *qry, Node *jtnode,
- bool forUpdate, bool noWait, bool pushedDown)
+ LockClauseStrength strength, bool noWait, bool pushedDown)
{
if (jtnode == NULL)
return;
/* ignore foreign tables */
if (rte->relkind != RELKIND_FOREIGN_TABLE)
{
- applyLockingClause(qry, rti, forUpdate, noWait, pushedDown);
+ applyLockingClause(qry, rti, strength, noWait, pushedDown);
rte->requiredPerms |= ACL_SELECT_FOR_UPDATE;
}
}
else if (rte->rtekind == RTE_SUBQUERY)
{
- applyLockingClause(qry, rti, forUpdate, noWait, pushedDown);
- /* FOR UPDATE/SHARE of subquery is propagated to subquery's rels */
+ applyLockingClause(qry, rti, strength, noWait, pushedDown);
+ /* FOR [KEY] UPDATE/SHARE of subquery is propagated to subquery's rels */
markQueryForLocking(rte->subquery, (Node *) rte->subquery->jointree,
- forUpdate, noWait, true);
+ strength, noWait, true);
}
/* other RTE types are unaffected by FOR UPDATE */
}
ListCell *l;
foreach(l, f->fromlist)
- markQueryForLocking(qry, lfirst(l), forUpdate, noWait, pushedDown);
+ markQueryForLocking(qry, lfirst(l), strength, noWait, pushedDown);
}
else if (IsA(jtnode, JoinExpr))
{
JoinExpr *j = (JoinExpr *) jtnode;
- markQueryForLocking(qry, j->larg, forUpdate, noWait, pushedDown);
- markQueryForLocking(qry, j->rarg, forUpdate, noWait, pushedDown);
+ markQueryForLocking(qry, j->larg, strength, noWait, pushedDown);
+ markQueryForLocking(qry, j->rarg, strength, noWait, pushedDown);
}
else
elog(ERROR, "unrecognized node type: %d",
return lockhash;
}
+/*
+ * Given two lock modes, return whether they would conflict.
+ */
+bool
+DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2)
+{
+ LockMethod lockMethodTable = LockMethods[DEFAULT_LOCKMETHOD];
+
+ if (lockMethodTable->conflictTab[mode1] & LOCKBIT_ON(mode2))
+ return true;
+
+ return false;
+}
+
/*
* LockHasWaiters -- look up 'locktag' and check if releasing this
* lock would wake up other processes waiting for it.
return hasWaiters;
}
-
/*
* LockAcquire -- Check for lock conflicts, sleep if conflict found,
* set lock if/when no conflicts.
case HEAPTUPLE_RECENTLY_DEAD:
if (!visible)
return;
- xid = HeapTupleHeaderGetXmax(tuple->t_data);
+ xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
- xid = HeapTupleHeaderGetXmax(tuple->t_data);
+ xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
break;
case HEAPTUPLE_INSERT_IN_PROGRESS:
xid = HeapTupleHeaderGetXmin(tuple->t_data);
{
case CMD_SELECT:
if (stmt->rowMarks != NIL)
- return false; /* SELECT FOR UPDATE/SHARE */
+ return false; /* SELECT FOR [KEY] UPDATE/SHARE */
else if (stmt->hasModifyingCTE)
return false; /* data-modifying CTE */
else
else if (stmt->rowMarks != NIL)
{
/* not 100% but probably close enough */
- if (((PlanRowMark *) linitial(stmt->rowMarks))->markType == ROW_MARK_EXCLUSIVE)
- tag = "SELECT FOR UPDATE";
- else
- tag = "SELECT FOR SHARE";
+ switch (((PlanRowMark *) linitial(stmt->rowMarks))->markType)
+ {
+ case ROW_MARK_EXCLUSIVE:
+ tag = "SELECT FOR UPDATE";
+ break;
+ case ROW_MARK_NOKEYEXCLUSIVE:
+ tag = "SELECT FOR NO KEY UPDATE";
+ break;
+ case ROW_MARK_SHARE:
+ tag = "SELECT FOR SHARE";
+ break;
+ case ROW_MARK_KEYSHARE:
+ tag = "SELECT FOR KEY SHARE";
+ break;
+ case ROW_MARK_REFERENCE:
+ case ROW_MARK_COPY:
+ tag = "SELECT";
+ break;
+ default:
+ tag = "???";
+ break;
+ }
}
else
tag = "SELECT";
else if (stmt->rowMarks != NIL)
{
/* not 100% but probably close enough */
- if (((RowMarkClause *) linitial(stmt->rowMarks))->forUpdate)
- tag = "SELECT FOR UPDATE";
- else
- tag = "SELECT FOR SHARE";
+ switch (((RowMarkClause *) linitial(stmt->rowMarks))->strength)
+ {
+ case LCS_FORKEYSHARE:
+ tag = "SELECT FOR KEY SHARE";
+ break;
+ case LCS_FORSHARE:
+ tag = "SELECT FOR SHARE";
+ break;
+ case LCS_FORNOKEYUPDATE:
+ tag = "SELECT FOR NO KEY UPDATE";
+ break;
+ case LCS_FORUPDATE:
+ tag = "SELECT FOR UPDATE";
+ break;
+ default:
+ tag = "???";
+ break;
+ }
}
else
tag = "SELECT";
* Get the relation descriptors of the FK and PK tables.
*
* pk_rel is opened in RowShareLock mode since that's what our eventual
- * SELECT FOR SHARE will get on it.
+ * SELECT FOR KEY SHARE will get on it.
*/
fk_rel = trigdata->tg_relation;
pk_rel = heap_open(riinfo->pk_relid, RowShareLock);
/* ----------
* The query string built is
- * SELECT 1 FROM ONLY <pktable> WHERE pkatt1 = $1 [AND ...] FOR SHARE
+ * SELECT 1 FROM ONLY <pktable> x WHERE pkatt1 = $1 [AND ...]
+ * FOR KEY SHARE OF x
* The type id's for the $ parameters are those of the
* corresponding FK attributes.
* ----------
querysep = "AND";
queryoids[i] = fk_type;
}
- appendStringInfo(&querybuf, " FOR SHARE OF x");
+ appendStringInfo(&querybuf, " FOR KEY SHARE OF x");
/* Prepare and save the plan */
qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids,
/* ----------
* The query string built is
- * SELECT 1 FROM ONLY <pktable> WHERE pkatt1 = $1 [AND ...] FOR SHARE
+ * SELECT 1 FROM ONLY <pktable> x WHERE pkatt1 = $1 [AND ...]
+ * FOR KEY SHARE OF x
* The type id's for the $ parameters are those of the
* PK attributes themselves.
* ----------
querysep = "AND";
queryoids[i] = pk_type;
}
- appendStringInfo(&querybuf, " FOR SHARE OF x");
+ appendStringInfo(&querybuf, " FOR KEY SHARE OF x");
/* Prepare and save the plan */
qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids,
* Get the relation descriptors of the FK and PK tables and the old tuple.
*
* fk_rel is opened in RowShareLock mode since that's what our eventual
- * SELECT FOR SHARE will get on it.
+ * SELECT FOR KEY SHARE will get on it.
*/
fk_rel = heap_open(riinfo->fk_relid, RowShareLock);
pk_rel = trigdata->tg_relation;
/* ----------
* The query string built is
- * SELECT 1 FROM ONLY <fktable> WHERE $1 = fkatt1 [AND ...]
+ * SELECT 1 FROM ONLY <fktable> x WHERE $1 = fkatt1 [AND ...]
+ * FOR KEY SHARE OF x
* The type id's for the $ parameters are those of the
* corresponding PK attributes.
* ----------
querysep = "AND";
queryoids[i] = pk_type;
}
- appendStringInfo(&querybuf, " FOR SHARE OF x");
+ appendStringInfo(&querybuf, " FOR KEY SHARE OF x");
/* Prepare and save the plan */
qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids,
* old tuple.
*
* fk_rel is opened in RowShareLock mode since that's what our eventual
- * SELECT FOR SHARE will get on it.
+ * SELECT FOR KEY SHARE will get on it.
*/
fk_rel = heap_open(riinfo->fk_relid, RowShareLock);
pk_rel = trigdata->tg_relation;
querysep = "AND";
queryoids[i] = pk_type;
}
- appendStringInfo(&querybuf, " FOR SHARE OF x");
+ appendStringInfo(&querybuf, " FOR KEY SHARE OF x");
/* Prepare and save the plan */
qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids,
get_rule_expr(query->limitCount, context, false);
}
- /* Add FOR UPDATE/SHARE clauses if present */
+ /* Add FOR [KEY] UPDATE/SHARE clauses if present */
if (query->hasForUpdate)
{
foreach(l, query->rowMarks)
if (rc->pushedDown)
continue;
- if (rc->forUpdate)
- appendContextKeyword(context, " FOR UPDATE",
- -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
- else
- appendContextKeyword(context, " FOR SHARE",
- -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ switch (rc->strength)
+ {
+ case LCS_FORKEYSHARE:
+ appendContextKeyword(context, " FOR KEY SHARE",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ break;
+ case LCS_FORSHARE:
+ appendContextKeyword(context, " FOR SHARE",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ break;
+ case LCS_FORNOKEYUPDATE:
+ appendContextKeyword(context, " FOR NO KEY UPDATE",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ break;
+ case LCS_FORUPDATE:
+ appendContextKeyword(context, " FOR UPDATE",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ break;
+ }
+
appendStringInfo(buf, " OF %s",
quote_identifier(get_rtable_name(rc->rti,
context)));
#include <fcntl.h>
#include <unistd.h>
+#include "access/htup_details.h"
+#include "access/multixact.h"
#include "access/reloptions.h"
#include "access/sysattr.h"
#include "access/transam.h"
-#include "access/htup_details.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "catalog/index.h"
* the XIDs that will be put into the new relation contents.
*/
void
-RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
+RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid,
+ MultiXactId minmulti)
{
Oid newrelfilenode;
RelFileNodeBackend newrnode;
relation->rd_rel->relkind == RELKIND_SEQUENCE) ?
freezeXid == InvalidTransactionId :
TransactionIdIsNormal(freezeXid));
+ Assert(TransactionIdIsNormal(freezeXid) == MultiXactIdIsValid(minmulti));
/* Allocate a new relfilenode */
newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL,
classform->relallvisible = 0;
}
classform->relfrozenxid = freezeXid;
+ classform->relminmxid = minmulti;
simple_heap_update(pg_class, &tuple->t_self, tuple);
CatalogUpdateIndexes(pg_class, tuple);
* simple index keys, but attributes used in expressions and partial-index
* predicates.)
*
+ * If "keyAttrs" is true, only attributes that can be referenced by foreign
+ * keys are considered.
+ *
* Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
* we can include system attributes (e.g., OID) in the bitmap representation.
*
* be bms_free'd when not needed anymore.
*/
Bitmapset *
-RelationGetIndexAttrBitmap(Relation relation)
+RelationGetIndexAttrBitmap(Relation relation, bool keyAttrs)
{
Bitmapset *indexattrs;
+ Bitmapset *uindexattrs;
List *indexoidlist;
ListCell *l;
MemoryContext oldcxt;
/* Quick exit if we already computed the result. */
if (relation->rd_indexattr != NULL)
- return bms_copy(relation->rd_indexattr);
+ return bms_copy(keyAttrs ? relation->rd_keyattr : relation->rd_indexattr);
/* Fast path if definitely no indexes */
if (!RelationGetForm(relation)->relhasindex)
* won't be returned at all by RelationGetIndexList.
*/
indexattrs = NULL;
+ uindexattrs = NULL;
foreach(l, indexoidlist)
{
Oid indexOid = lfirst_oid(l);
Relation indexDesc;
IndexInfo *indexInfo;
int i;
+ bool isKey;
indexDesc = index_open(indexOid, AccessShareLock);
/* Extract index key information from the index's pg_index row */
indexInfo = BuildIndexInfo(indexDesc);
+ /* Can this index be referenced by a foreign key? */
+ isKey = indexInfo->ii_Unique &&
+ indexInfo->ii_Expressions == NIL &&
+ indexInfo->ii_Predicate == NIL;
+
/* Collect simple attribute references */
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
{
int attrnum = indexInfo->ii_KeyAttrNumbers[i];
if (attrnum != 0)
+ {
indexattrs = bms_add_member(indexattrs,
attrnum - FirstLowInvalidHeapAttributeNumber);
+ if (isKey)
+ uindexattrs = bms_add_member(uindexattrs,
+ attrnum - FirstLowInvalidHeapAttributeNumber);
+ }
}
/* Collect all attributes used in expressions, too */
/* Now save a copy of the bitmap in the relcache entry. */
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
relation->rd_indexattr = bms_copy(indexattrs);
+ relation->rd_keyattr = bms_copy(uindexattrs);
MemoryContextSwitchTo(oldcxt);
/* We return our original working copy for caller to play with */
- return indexattrs;
+ return keyAttrs ? uindexattrs : indexattrs;
}
/*
{
CommandId cid = HeapTupleHeaderGetRawCommandId(tup);
- /* We do not store cmax when locking a tuple */
- Assert(!(tup->t_infomask & (HEAP_MOVED | HEAP_IS_LOCKED)));
- Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tup)));
+ Assert(!(tup->t_infomask & HEAP_MOVED));
+ Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tup)));
if (tup->t_infomask & HEAP_COMBOCID)
return GetRealCmax(cid);
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true;
- if (tuple->t_infomask & HEAP_IS_LOCKED) /* not deleter */
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) /* not deleter */
return true;
- Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI));
+ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ TransactionId xmax;
+
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ return true;
+
+ /* updating subtransaction must have aborted */
+ if (!TransactionIdIsCurrentTransactionId(xmax))
+ return true;
+ else
+ return false;
+ }
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
{
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
return false; /* updated by other */
}
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- /* MultiXacts are currently only allowed to lock tuples */
- Assert(tuple->t_infomask & HEAP_IS_LOCKED);
+ TransactionId xmax;
+
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
+ return true;
+
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ return true;
+ if (TransactionIdIsCurrentTransactionId(xmax))
+ return false;
+ if (TransactionIdIsInProgress(xmax))
+ return true;
+ if (TransactionIdDidCommit(xmax))
+ return false;
return true;
}
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
{
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
return false;
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
return true;
- if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
/* xmax transaction committed */
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
InvalidTransactionId);
}
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetXmax(tuple));
+ HeapTupleHeaderGetRawXmax(tuple));
return false;
}
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true;
- if (tuple->t_infomask & HEAP_IS_LOCKED) /* not deleter */
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) /* not deleter */
return true;
- Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI));
+ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ TransactionId xmax;
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ return true;
+
+ /* updating subtransaction must have aborted */
+ if (!TransactionIdIsCurrentTransactionId(xmax))
+ return true;
+ else
+ return false;
+ }
+
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
{
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
return false;
}
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- /* MultiXacts are currently only allowed to lock tuples */
- Assert(tuple->t_infomask & HEAP_IS_LOCKED);
+ TransactionId xmax;
+
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
+ return true;
+
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ return true;
+ if (TransactionIdIsCurrentTransactionId(xmax))
+ {
+ if (HeapTupleHeaderGetCmax(tuple) >= GetCurrentCommandId(false))
+ return true; /* deleted after scan started */
+ else
+ return false; /* deleted before scan started */
+ }
+ if (TransactionIdIsInProgress(xmax))
+ return true;
+ if (TransactionIdDidCommit(xmax))
+ return false;
return true;
}
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
{
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
if (HeapTupleHeaderGetCmax(tuple) >= GetCurrentCommandId(false))
return true; /* deleted after scan started */
return false; /* deleted before scan started */
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
return true;
- if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
/* xmax transaction committed */
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
InvalidTransactionId);
}
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetXmax(tuple));
+ HeapTupleHeaderGetRawXmax(tuple));
return false;
}
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return HeapTupleMayBeUpdated;
- if (tuple->t_infomask & HEAP_IS_LOCKED) /* not deleter */
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) /* not deleter */
return HeapTupleMayBeUpdated;
- Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI));
+ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ TransactionId xmax;
+
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ return HeapTupleMayBeUpdated;
+
+ /* updating subtransaction must have aborted */
+ if (!TransactionIdIsCurrentTransactionId(xmax))
+ return HeapTupleMayBeUpdated;
+ else
+ {
+ if (HeapTupleHeaderGetCmax(tuple) >= curcid)
+ return HeapTupleSelfUpdated; /* updated after scan started */
+ else
+ return HeapTupleInvisible; /* updated before scan started */
+ }
+ }
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
{
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return HeapTupleMayBeUpdated;
return HeapTupleUpdated; /* updated by other */
}
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- /* MultiXacts are currently only allowed to lock tuples */
- Assert(tuple->t_infomask & HEAP_IS_LOCKED);
+ TransactionId xmax;
- if (MultiXactIdIsRunning(HeapTupleHeaderGetXmax(tuple)))
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
+ {
+ /*
+ * If it's only locked but neither EXCL_LOCK nor KEYSHR_LOCK
+ * is set, it cannot possibly be running. Otherwise need to
+ * check.
+ */
+ if ((tuple->t_infomask & (HEAP_XMAX_EXCL_LOCK |
+ HEAP_XMAX_KEYSHR_LOCK)) &&
+ MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple)))
+ return HeapTupleBeingUpdated;
+
+ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
+ return HeapTupleMayBeUpdated;
+ }
+
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ {
+ if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple)))
+ return HeapTupleBeingUpdated;
+
+ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
+ return HeapTupleMayBeUpdated;
+ }
+
+ if (TransactionIdIsCurrentTransactionId(xmax))
+ {
+ if (HeapTupleHeaderGetCmax(tuple) >= curcid)
+ return HeapTupleSelfUpdated; /* updated after scan started */
+ else
+ return HeapTupleInvisible; /* updated before scan started */
+ }
+
+ if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple)))
return HeapTupleBeingUpdated;
- SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
- InvalidTransactionId);
+
+ if (TransactionIdDidCommit(xmax))
+ return HeapTupleUpdated;
+ /* it must have aborted or crashed */
+ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
return HeapTupleMayBeUpdated;
}
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
{
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return HeapTupleMayBeUpdated;
if (HeapTupleHeaderGetCmax(tuple) >= curcid)
return HeapTupleSelfUpdated; /* updated after scan started */
return HeapTupleInvisible; /* updated before scan started */
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
return HeapTupleBeingUpdated;
- if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
/* xmax transaction committed */
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
InvalidTransactionId);
}
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetXmax(tuple));
+ HeapTupleHeaderGetRawXmax(tuple));
return HeapTupleUpdated; /* updated by other */
}
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true;
- if (tuple->t_infomask & HEAP_IS_LOCKED) /* not deleter */
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) /* not deleter */
return true;
- Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI));
+ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ TransactionId xmax;
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ return true;
+
+ /* updating subtransaction must have aborted */
+ if (!TransactionIdIsCurrentTransactionId(xmax))
+ return true;
+ else
+ return false;
+ }
+
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
{
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
return false; /* updated by other */
}
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- /* MultiXacts are currently only allowed to lock tuples */
- Assert(tuple->t_infomask & HEAP_IS_LOCKED);
+ TransactionId xmax;
+
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
+ return true;
+
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ return true;
+ if (TransactionIdIsCurrentTransactionId(xmax))
+ return false;
+ if (TransactionIdIsInProgress(xmax))
+ {
+ snapshot->xmax = xmax;
+ return true;
+ }
+ if (TransactionIdDidCommit(xmax))
+ return false;
return true;
}
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
{
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
return false;
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
{
- snapshot->xmax = HeapTupleHeaderGetXmax(tuple);
+ snapshot->xmax = HeapTupleHeaderGetRawXmax(tuple);
return true;
}
- if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
/* xmax transaction committed */
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
{
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
InvalidTransactionId);
}
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetXmax(tuple));
+ HeapTupleHeaderGetRawXmax(tuple));
return false; /* updated by other */
}
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true;
- if (tuple->t_infomask & HEAP_IS_LOCKED) /* not deleter */
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) /* not deleter */
return true;
- Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI));
+ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ TransactionId xmax;
+
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ return true;
+
+ /* updating subtransaction must have aborted */
+ if (!TransactionIdIsCurrentTransactionId(xmax))
+ return true;
+ else if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
+ return true; /* updated after scan started */
+ else
+ return false; /* updated before scan started */
+ }
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
return true;
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- /* MultiXacts are currently only allowed to lock tuples */
- Assert(tuple->t_infomask & HEAP_IS_LOCKED);
+ TransactionId xmax;
+
+ /* already checked above */
+ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
+
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ return true;
+ if (TransactionIdIsCurrentTransactionId(xmax))
+ {
+ if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
+ return true; /* deleted after scan started */
+ else
+ return false; /* deleted before scan started */
+ }
+ if (TransactionIdIsInProgress(xmax))
+ return true;
+ if (TransactionIdDidCommit(xmax))
+ {
+ /* updating transaction committed, but when? */
+ if (XidInMVCCSnapshot(xmax, snapshot))
+ return true; /* treat as still in progress */
+ return false;
+ }
return true;
}
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
{
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
{
if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
return true; /* deleted after scan started */
return false; /* deleted before scan started */
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
return true;
- if (!TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
/* xmax transaction committed */
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetXmax(tuple));
+ HeapTupleHeaderGetRawXmax(tuple));
}
/*
* OK, the deleting transaction committed too ... but when?
*/
- if (XidInMVCCSnapshot(HeapTupleHeaderGetXmax(tuple), snapshot))
+ if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot))
return true; /* treat as still in progress */
return false;
{
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return HEAPTUPLE_INSERT_IN_PROGRESS;
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return HEAPTUPLE_INSERT_IN_PROGRESS;
/* inserted and then deleted by same xact */
return HEAPTUPLE_DELETE_IN_PROGRESS;
if (tuple->t_infomask & HEAP_XMAX_INVALID)
return HEAPTUPLE_LIVE;
- if (tuple->t_infomask & HEAP_IS_LOCKED)
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
{
/*
* "Deleting" xact really only locked it, so the tuple is live in any
{
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- if (MultiXactIdIsRunning(HeapTupleHeaderGetXmax(tuple)))
+ /*
+ * If it's only locked but neither EXCL_LOCK nor KEYSHR_LOCK
+ * are set, it cannot possibly be running; otherwise have to
+ * check.
+ */
+ if ((tuple->t_infomask & (HEAP_XMAX_EXCL_LOCK |
+ HEAP_XMAX_KEYSHR_LOCK)) &&
+ MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple)))
return HEAPTUPLE_LIVE;
+ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
+
}
else
{
- if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
return HEAPTUPLE_LIVE;
+ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
+ InvalidTransactionId);
}
-
- /*
- * We don't really care whether xmax did commit, abort or crash.
- * We know that xmax did lock the tuple, but it did not and will
- * never actually update it.
- */
- SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
- InvalidTransactionId);
}
+
+ /*
+ * We don't really care whether xmax did commit, abort or crash.
+ * We know that xmax did lock the tuple, but it did not and will
+ * never actually update it.
+ */
+
return HEAPTUPLE_LIVE;
}
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- /* MultiXacts are currently only allowed to lock tuples */
- Assert(tuple->t_infomask & HEAP_IS_LOCKED);
- return HEAPTUPLE_LIVE;
+ TransactionId xmax;
+
+ if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple)))
+ {
+ /* already checked above */
+ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
+
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ return HEAPTUPLE_LIVE;
+ if (TransactionIdIsInProgress(xmax))
+ return HEAPTUPLE_DELETE_IN_PROGRESS;
+ else if (TransactionIdDidCommit(xmax))
+ /* there are still lockers around -- can't return DEAD here */
+ return HEAPTUPLE_RECENTLY_DEAD;
+ /* updating transaction aborted */
+ return HEAPTUPLE_LIVE;
+ }
+
+ Assert(!(tuple->t_infomask & HEAP_XMAX_COMMITTED));
+
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax))
+ return HEAPTUPLE_LIVE;
+ /* multi is not running -- updating xact cannot be */
+ Assert(!TransactionIdIsInProgress(xmax));
+ if (TransactionIdDidCommit(xmax))
+ {
+ if (!TransactionIdPrecedes(xmax, OldestXmin))
+ return HEAPTUPLE_RECENTLY_DEAD;
+ else
+ return HEAPTUPLE_DEAD;
+ }
+ else
+ {
+ /*
+ * Not in Progress, Not Committed, so either Aborted or crashed.
+ */
+ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
+ return HEAPTUPLE_LIVE;
+ }
+
+ /*
+ * Deleter committed, but perhaps it was recent enough that some open
+ * transactions could still see the tuple.
+ */
+
+ /* Otherwise, it's dead and removable */
+ return HEAPTUPLE_DEAD;
}
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
{
- if (TransactionIdIsInProgress(HeapTupleHeaderGetXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
return HEAPTUPLE_DELETE_IN_PROGRESS;
- else if (TransactionIdDidCommit(HeapTupleHeaderGetXmax(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetXmax(tuple));
+ HeapTupleHeaderGetRawXmax(tuple));
else
{
/*
* Deleter committed, but perhaps it was recent enough that some open
* transactions could still see the tuple.
*/
- if (!TransactionIdPrecedes(HeapTupleHeaderGetXmax(tuple), OldestXmin))
+ if (!TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin))
return HEAPTUPLE_RECENTLY_DEAD;
/* Otherwise, it's dead and removable */
/*
* If the inserting transaction committed, but any deleting transaction
- * aborted, the tuple is still alive. Likewise, if XMAX is a lock rather
- * than a delete, the tuple is still alive.
+ * aborted, the tuple is still alive.
*/
- if (tuple->t_infomask &
- (HEAP_XMAX_INVALID | HEAP_IS_LOCKED | HEAP_XMAX_IS_MULTI))
+ if (tuple->t_infomask & HEAP_XMAX_INVALID)
+ return false;
+
+ /*
+ * If the XMAX is just a lock, the tuple is still alive.
+ */
+ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
+ return false;
+
+ /*
+ * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
+ * know without checking pg_multixact.
+ */
+ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
return false;
/* If deleter isn't known to have committed, assume it's still running. */
return false;
/* Deleter committed, so tuple is dead if the XID is old enough. */
- return TransactionIdPrecedes(HeapTupleHeaderGetXmax(tuple), OldestXmin);
+ return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin);
}
/*
return false;
}
+
+/*
+ * Is the tuple really only locked? That is, is it not updated?
+ *
+ * It's easy to check just infomask bits if the locker is not a multi; but
+ * otherwise we need to verify that the updating transaction has not aborted.
+ *
+ * This function is here because it follows the same time qualification rules
+ * laid out at the top of this file.
+ */
+bool
+HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
+{
+ TransactionId xmax;
+
+ /* if there's no valid Xmax, then there's obviously no update either */
+ if (tuple->t_infomask & HEAP_XMAX_INVALID)
+ return true;
+
+ if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
+ return true;
+
+ /* invalid xmax means no update */
+ if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
+ return true;
+
+ /*
+ * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this
+ * must necessarily have been updated
+ */
+ if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
+ return false;
+
+ /* ... but if it's a multi, then perhaps the updating Xid aborted. */
+ xmax = HeapTupleGetUpdateXid(tuple);
+ if (!TransactionIdIsValid(xmax)) /* shouldn't happen .. */
+ return true;
+
+ if (TransactionIdIsCurrentTransactionId(xmax))
+ return false;
+ if (TransactionIdIsInProgress(xmax))
+ return false;
+ if (TransactionIdDidCommit(xmax))
+ return false;
+
+ /*
+ * not current, not in progress, not committed -- must have aborted or
+ * crashed
+ */
+ return true;
+}
ControlFile.checkPointCopy.oldestXidDB);
printf(_("Latest checkpoint's oldestActiveXID: %u\n"),
ControlFile.checkPointCopy.oldestActiveXid);
+ printf(_("Latest checkpoint's oldestMultiXact: %u\n"),
+ ControlFile.checkPointCopy.oldestMulti);
+ printf(_("Latest checkpoint's oldestMulti's DB: %u\n"),
+ ControlFile.checkPointCopy.oldestMultiDB);
printf(_("Time of latest checkpoint: %s\n"),
ckpttime_str);
printf(_("Min recovery ending location: %X/%X\n"),
TransactionId set_xid = 0;
Oid set_oid = 0;
MultiXactId set_mxid = 0;
+ MultiXactId set_oldestmxid = 0;
MultiXactOffset set_mxoff = (MultiXactOffset) -1;
uint32 minXlogTli = 0;
XLogSegNo minXlogSegNo = 0;
char *endptr;
+ char *endptr2;
char *DataDir;
int fd;
case 'm':
set_mxid = strtoul(optarg, &endptr, 0);
- if (endptr == optarg || *endptr != '\0')
+ if (endptr == optarg || *endptr != ',')
+ {
+ fprintf(stderr, _("%s: invalid argument for option -m\n"), progname);
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+
+ set_oldestmxid = strtoul(endptr + 1, &endptr2, 0);
+ if (endptr2 == endptr + 1 || *endptr2 != '\0')
{
fprintf(stderr, _("%s: invalid argument for option -m\n"), progname);
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
fprintf(stderr, _("%s: multitransaction ID (-m) must not be 0\n"), progname);
exit(1);
}
+ /*
+ * XXX It'd be nice to have more sanity checks here, e.g. so
+ * that oldest is not wrapped around w.r.t. nextMulti.
+ */
+ if (set_oldestmxid == 0)
+ {
+ fprintf(stderr, _("%s: oldest multitransaction ID (-m) must not be 0\n"),
+ progname);
+ exit(1);
+ }
break;
case 'O':
ControlFile.checkPointCopy.nextOid = set_oid;
if (set_mxid != 0)
+ {
ControlFile.checkPointCopy.nextMulti = set_mxid;
+ ControlFile.checkPointCopy.oldestMulti = set_oldestmxid;
+ if (ControlFile.checkPointCopy.oldestMulti < FirstMultiXactId)
+ ControlFile.checkPointCopy.oldestMulti += FirstMultiXactId;
+ ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
+ }
+
if (set_mxoff != -1)
ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
ControlFile.checkPointCopy.nextMultiOffset = 0;
ControlFile.checkPointCopy.oldestXid = FirstNormalTransactionId;
ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
+ ControlFile.checkPointCopy.oldestMulti = FirstMultiXactId;
+ ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
ControlFile.checkPointCopy.oldestActiveXid = InvalidTransactionId;
ControlFile.checkPointCopy.oldestXidDB);
printf(_("Latest checkpoint's oldestActiveXID: %u\n"),
ControlFile.checkPointCopy.oldestActiveXid);
+ printf(_("Latest checkpoint's oldestMultiXid: %u\n"),
+ ControlFile.checkPointCopy.oldestMulti);
+ printf(_("Latest checkpoint's oldestMulti's DB: %u\n"),
+ ControlFile.checkPointCopy.oldestMultiDB);
printf(_("Maximum data alignment: %u\n"),
ControlFile.maxAlign);
/* we don't print floatFormat since can't say much useful about it */
printf(_(" -e XIDEPOCH set next transaction ID epoch\n"));
printf(_(" -f force update to be done\n"));
printf(_(" -l xlogfile force minimum WAL starting location for new transaction log\n"));
- printf(_(" -m XID set next multitransaction ID\n"));
+ printf(_(" -m XID,OLDEST set next multitransaction ID and oldest value\n"));
printf(_(" -n no update, just show extracted control values (for testing)\n"));
printf(_(" -o OID set next OID\n"));
printf(_(" -O OFFSET set next multitransaction offset\n"));
typedef struct BulkInsertStateData *BulkInsertState;
-typedef enum
+/*
+ * Possible lock modes for a tuple.
+ */
+typedef enum LockTupleMode
{
- LockTupleShared,
+ /* SELECT FOR KEY SHARE */
+ LockTupleKeyShare,
+ /* SELECT FOR SHARE */
+ LockTupleShare,
+ /* SELECT FOR NO KEY UPDATE, and UPDATEs that don't modify key columns */
+ LockTupleNoKeyExclusive,
+ /* SELECT FOR UPDATE, UPDATEs that modify key columns, and DELETE */
LockTupleExclusive
} LockTupleMode;
+#define MaxLockTupleMode LockTupleExclusive
+
/*
* When heap_update, heap_delete, or heap_lock_tuple fail because the target
* tuple is already outdated, they fill in this struct to provide information
extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait,
- HeapUpdateFailureData *hufd);
+ HeapUpdateFailureData *hufd, LockTupleMode *lockmode);
extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
CommandId cid, LockTupleMode mode, bool nowait,
+ bool follow_update,
Buffer *buffer, HeapUpdateFailureData *hufd);
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
-extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid);
+extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
+ TransactionId cutoff_multi);
extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
- Buffer buf);
+ MultiXactId cutoff_multi, Buffer buf);
extern Oid simple_heap_insert(Relation relation, HeapTuple tup);
extern void simple_heap_delete(Relation relation, ItemPointer tid);
#define XLOG_HEAP2_CLEANUP_INFO 0x30
#define XLOG_HEAP2_VISIBLE 0x40
#define XLOG_HEAP2_MULTI_INSERT 0x50
+#define XLOG_HEAP2_LOCK_UPDATED 0x60
/*
* All what we need to find changed tuple
typedef struct xl_heap_delete
{
xl_heaptid target; /* deleted tuple id */
+ TransactionId xmax; /* xmax of the deleted tuple */
+ uint8 infobits_set; /* infomask bits */
bool all_visible_cleared; /* PD_ALL_VISIBLE was cleared */
} xl_heap_delete;
typedef struct xl_heap_update
{
xl_heaptid target; /* deleted tuple id */
+ TransactionId old_xmax; /* xmax of the old tuple */
+ TransactionId new_xmax; /* xmax of the new tuple */
ItemPointerData newtid; /* new inserted tuple id */
+ uint8 old_infobits_set; /* infomask bits to set on old tuple */
bool all_visible_cleared; /* PD_ALL_VISIBLE was cleared */
bool new_all_visible_cleared; /* same for the page of newtid */
/* NEW TUPLE xl_heap_header AND TUPLE DATA FOLLOWS AT END OF STRUCT */
#define SizeOfHeapNewpage (offsetof(xl_heap_newpage, blkno) + sizeof(BlockNumber))
+/* flags for infobits_set */
+#define XLHL_XMAX_IS_MULTI 0x01
+#define XLHL_XMAX_LOCK_ONLY 0x02
+#define XLHL_XMAX_EXCL_LOCK 0x04
+#define XLHL_XMAX_KEYSHR_LOCK 0x08
+#define XLHL_KEYS_UPDATED 0x10
+
/* This is what we need to know about lock */
typedef struct xl_heap_lock
{
xl_heaptid target; /* locked tuple id */
TransactionId locking_xid; /* might be a MultiXactId not xid */
- bool xid_is_mxact; /* is it? */
- bool shared_lock; /* shared or exclusive row lock? */
+ int8 infobits_set; /* infomask and infomask2 bits to set */
} xl_heap_lock;
-#define SizeOfHeapLock (offsetof(xl_heap_lock, shared_lock) + sizeof(bool))
+#define SizeOfHeapLock (offsetof(xl_heap_lock, infobits_set) + sizeof(int8))
+
+/* This is what we need to know about locking an updated version of a row */
+typedef struct xl_heap_lock_updated
+{
+ xl_heaptid target;
+ TransactionId xmax;
+ uint8 infobits_set;
+} xl_heap_lock_updated;
+
+#define SizeOfHeapLockUpdated (offsetof(xl_heap_lock_updated, infobits_set) + sizeof(uint8))
/* This is what we need to know about in-place update */
typedef struct xl_heap_inplace
RelFileNode node;
BlockNumber block;
TransactionId cutoff_xid;
+ MultiXactId cutoff_multi;
/* TUPLE OFFSET NUMBERS FOLLOW AT THE END */
} xl_heap_freeze;
-#define SizeOfHeapFreeze (offsetof(xl_heap_freeze, cutoff_xid) + sizeof(TransactionId))
+#define SizeOfHeapFreeze (offsetof(xl_heap_freeze, cutoff_multi) + sizeof(MultiXactId))
/* This is what we need to know about setting a visibility map bit */
typedef struct xl_heap_visible
OffsetNumber *nowunused, int nunused,
TransactionId latestRemovedXid);
extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
- TransactionId cutoff_xid,
+ TransactionId cutoff_xid, MultiXactId cutoff_multi,
OffsetNumber *offsets, int offcnt);
extern XLogRecPtr log_heap_visible(RelFileNode rnode, BlockNumber block,
Buffer vm_buffer, TransactionId cutoff_xid);
extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup);
extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup);
extern void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup,
- CommandId *cmax,
- bool *iscombo);
+ CommandId *cmax, bool *iscombo);
+
+/* Prototype for HeapTupleHeader accessors in heapam.c */
+extern TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple);
#endif /* HTUP_H */
#define HEAP_HASVARWIDTH 0x0002 /* has variable-width attribute(s) */
#define HEAP_HASEXTERNAL 0x0004 /* has external stored attribute(s) */
#define HEAP_HASOID 0x0008 /* has an object-id field */
-/* bit 0x0010 is available */
+#define HEAP_XMAX_KEYSHR_LOCK 0x0010 /* xmax is a key-shared locker */
#define HEAP_COMBOCID 0x0020 /* t_cid is a combo cid */
#define HEAP_XMAX_EXCL_LOCK 0x0040 /* xmax is exclusive locker */
-#define HEAP_XMAX_SHARED_LOCK 0x0080 /* xmax is shared locker */
-/* if either LOCK bit is set, xmax hasn't deleted the tuple, only locked it */
-#define HEAP_IS_LOCKED (HEAP_XMAX_EXCL_LOCK | HEAP_XMAX_SHARED_LOCK)
+#define HEAP_XMAX_LOCK_ONLY 0x0080 /* xmax, if valid, is only a locker */
+
+ /* xmax is a shared locker */
+#define HEAP_XMAX_SHR_LOCK (HEAP_XMAX_EXCL_LOCK | HEAP_XMAX_KEYSHR_LOCK)
+
+#define HEAP_LOCK_MASK (HEAP_XMAX_SHR_LOCK | HEAP_XMAX_EXCL_LOCK | \
+ HEAP_XMAX_KEYSHR_LOCK)
#define HEAP_XMIN_COMMITTED 0x0100 /* t_xmin committed */
#define HEAP_XMIN_INVALID 0x0200 /* t_xmin invalid/aborted */
#define HEAP_XMAX_COMMITTED 0x0400 /* t_xmax committed */
* upgrade support */
#define HEAP_MOVED (HEAP_MOVED_OFF | HEAP_MOVED_IN)
-#define HEAP_XACT_MASK 0xFFE0 /* visibility-related bits */
+#define HEAP_XACT_MASK 0xFFF0 /* visibility-related bits */
+
+/*
+ * A tuple is only locked (i.e. not updated by its Xmax) if it the
+ * HEAP_XMAX_LOCK_ONLY bit is set.
+ *
+ * See also HeapTupleHeaderIsOnlyLocked, which also checks for a possible
+ * aborted updater transaction.
+ */
+#define HEAP_XMAX_IS_LOCKED_ONLY(infomask) \
+ ((infomask) & HEAP_XMAX_LOCK_ONLY)
+/*
+ * Use these to test whether a particular lock is applied to a tuple
+ */
+#define HEAP_XMAX_IS_SHR_LOCKED(infomask) \
+ (((infomask) & HEAP_LOCK_MASK) == HEAP_XMAX_SHR_LOCK)
+#define HEAP_XMAX_IS_EXCL_LOCKED(infomask) \
+ (((infomask) & HEAP_LOCK_MASK) == HEAP_XMAX_EXCL_LOCK)
+#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) \
+ (((infomask) & HEAP_LOCK_MASK) == HEAP_XMAX_KEYSHR_LOCK)
+
+/* turn these all off when Xmax is to change */
+#define HEAP_XMAX_BITS (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID | \
+ HEAP_XMAX_IS_MULTI | HEAP_LOCK_MASK | HEAP_XMAX_LOCK_ONLY)
/*
* information stored in t_infomask2:
*/
#define HEAP_NATTS_MASK 0x07FF /* 11 bits for number of attributes */
-/* bits 0x3800 are available */
+/* bits 0x1800 are available */
+#define HEAP_KEYS_UPDATED 0x2000 /* tuple was updated and key cols
+ * modified, or tuple deleted */
#define HEAP_HOT_UPDATED 0x4000 /* tuple was HOT-updated */
#define HEAP_ONLY_TUPLE 0x8000 /* this is heap-only tuple */
-#define HEAP2_XACT_MASK 0xC000 /* visibility-related bits */
+#define HEAP2_XACT_MASK 0xE000 /* visibility-related bits */
/*
* HEAP_TUPLE_HAS_MATCH is a temporary flag used during hash joins. It is
(tup)->t_choice.t_heap.t_xmin = (xid) \
)
-#define HeapTupleHeaderGetXmax(tup) \
+/*
+ * HeapTupleHeaderGetRawXmax gets you the raw Xmax field. To find out the Xid
+ * that updated a tuple, you might need to resolve the MultiXactId if certain
+ * bits are set. HeapTupleHeaderGetUpdateXid checks those bits and takes care
+ * to resolve the MultiXactId if necessary. This might involve multixact I/O,
+ * so it should only be used if absolutely necessary.
+ */
+#define HeapTupleHeaderGetUpdateXid(tup) \
+( \
+ (!((tup)->t_infomask & HEAP_XMAX_INVALID) && \
+ ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) && \
+ !((tup)->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \
+ HeapTupleGetUpdateXid(tup) \
+ : \
+ HeapTupleHeaderGetRawXmax(tup) \
+)
+
+#define HeapTupleHeaderGetRawXmax(tup) \
( \
(tup)->t_choice.t_heap.t_xmax \
)
#include "access/xlog.h"
+
+/*
+ * The first two MultiXactId values are reserved to store the truncation Xid
+ * and epoch of the first segment, so we start assigning multixact values from
+ * 2.
+ */
#define InvalidMultiXactId ((MultiXactId) 0)
#define FirstMultiXactId ((MultiXactId) 1)
+#define MaxMultiXactId ((MultiXactId) 0xFFFFFFFF)
#define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
#define NUM_MXACTOFFSET_BUFFERS 8
#define NUM_MXACTMEMBER_BUFFERS 16
+/*
+ * Possible multixact lock modes ("status"). The first four modes are for
+ * tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
+ * next two are used for update and delete modes.
+ */
+typedef enum
+{
+ MultiXactStatusForKeyShare = 0x00,
+ MultiXactStatusForShare = 0x01,
+ MultiXactStatusForNoKeyUpdate = 0x02,
+ MultiXactStatusForUpdate = 0x03,
+ /* an update that doesn't touch "key" columns */
+ MultiXactStatusNoKeyUpdate = 0x04,
+ /* other updates, and delete */
+ MultiXactStatusUpdate = 0x05
+} MultiXactStatus;
+
+#define MaxMultiXactStatus MultiXactStatusUpdate
+
+
+typedef struct MultiXactMember
+{
+ TransactionId xid;
+ MultiXactStatus status;
+} MultiXactMember;
+
+
/* ----------------
* multixact-related XLOG entries
* ----------------
{
MultiXactId mid; /* new MultiXact's ID */
MultiXactOffset moff; /* its starting offset in members file */
- int32 nxids; /* number of member XIDs */
- TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */
+ int32 nmembers; /* number of member XIDs */
+ MultiXactMember members[FLEXIBLE_ARRAY_MEMBER];
} xl_multixact_create;
-#define MinSizeOfMultiXactCreate offsetof(xl_multixact_create, xids)
+#define SizeOfMultiXactCreate (offsetof(xl_multixact_create, members))
-extern MultiXactId MultiXactIdCreate(TransactionId xid1, TransactionId xid2);
-extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid);
+extern MultiXactId MultiXactIdCreate(TransactionId xid1,
+ MultiXactStatus status1, TransactionId xid2,
+ MultiXactStatus status2);
+extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid,
+ MultiXactStatus status);
+extern MultiXactId ReadNextMultiXactId(void);
extern bool MultiXactIdIsRunning(MultiXactId multi);
-extern bool MultiXactIdIsCurrent(MultiXactId multi);
-extern void MultiXactIdWait(MultiXactId multi);
-extern bool ConditionalMultiXactIdWait(MultiXactId multi);
extern void MultiXactIdSetOldestMember(void);
-extern int GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids);
+extern int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **xids,
+ bool allow_old);
+extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
extern void AtEOXact_MultiXact(void);
extern void AtPrepare_MultiXact(void);
extern void BootStrapMultiXact(void);
extern void StartupMultiXact(void);
extern void ShutdownMultiXact(void);
+extern void SetMultiXactIdLimit(MultiXactId oldest_datminmxid,
+ Oid oldest_datoid);
extern void MultiXactGetCheckptMulti(bool is_shutdown,
MultiXactId *nextMulti,
- MultiXactOffset *nextMultiOffset);
+ MultiXactOffset *nextMultiOffset,
+ MultiXactId *oldestMulti,
+ Oid *oldestMultiDB);
extern void CheckPointMultiXact(void);
+extern MultiXactId GetOldestMultiXactId(void);
+extern void TruncateMultiXact(MultiXactId cutoff_multi);
extern void MultiXactSetNextMXact(MultiXactId nextMulti,
MultiXactOffset nextMultiOffset);
extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset);
+extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
extern void multixact_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern void multixact_redo(XLogRecPtr lsn, XLogRecord *record);
extern void multixact_desc(StringInfo buf, uint8 xl_info, char *rec);
+extern char *mxid_to_string(MultiXactId multi, int nmembers,
+ MultiXactMember *members);
#endif /* MULTIXACT_H */
extern RewriteState begin_heap_rewrite(Relation NewHeap,
TransactionId OldestXmin, TransactionId FreezeXid,
- bool use_wal);
+ MultiXactId MultiXactFrzLimit, bool use_wal);
extern void end_heap_rewrite(RewriteState state);
extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
HeapTuple newTuple);
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201301211
+#define CATALOG_VERSION_NO 201301231
#endif
bool relhastriggers; /* has (or has had) any TRIGGERs */
bool relhassubclass; /* has (or has had) derived classes */
TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */
+ TransactionId relminmxid; /* all multixacts in this rel are >= this.
+ * this is really a MultiXactId */
#ifdef CATALOG_VARLEN /* variable-length fields start here */
/* NOTE: These fields are not present in a relcache entry's rd_rel field. */
/* Size of fixed part of pg_class tuples, not counting var-length fields */
#define CLASS_TUPLE_SIZE \
- (offsetof(FormData_pg_class,relfrozenxid) + sizeof(TransactionId))
+ (offsetof(FormData_pg_class,relminmxid) + sizeof(TransactionId))
/* ----------------
* Form_pg_class corresponds to a pointer to a tuple with
* ----------------
*/
-#define Natts_pg_class 27
+#define Natts_pg_class 28
#define Anum_pg_class_relname 1
#define Anum_pg_class_relnamespace 2
#define Anum_pg_class_reltype 3
#define Anum_pg_class_relhastriggers 23
#define Anum_pg_class_relhassubclass 24
#define Anum_pg_class_relfrozenxid 25
-#define Anum_pg_class_relacl 26
-#define Anum_pg_class_reloptions 27
+#define Anum_pg_class_relminmxid 26
+#define Anum_pg_class_relacl 27
+#define Anum_pg_class_reloptions 28
/* ----------------
* initial contents of pg_class
* ----------------
*/
-/* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */
-DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 _null_ _null_ ));
+/*
+ * Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId;
+ * similarly, "1" in relminmxid stands for FirstMultiXactId
+ */
+DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f 3 1 _null_ _null_ ));
DESCR("");
/* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION 932
+#define PG_CONTROL_VERSION 933
/*
* Body of CheckPoint XLOG records. This is declared here because we keep
MultiXactOffset nextMultiOffset; /* next free MultiXact offset */
TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */
Oid oldestXidDB; /* database with minimum datfrozenxid */
+ MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */
+ Oid oldestMultiDB; /* database with minimum datminmxid */
pg_time_t time; /* time stamp of checkpoint */
/*
int32 datconnlimit; /* max connections allowed (-1=no limit) */
Oid datlastsysoid; /* highest OID to consider a system OID */
TransactionId datfrozenxid; /* all Xids < this are frozen in this DB */
+ TransactionId datminmxid; /* all multixacts in the DB are >= this */
Oid dattablespace; /* default table space for this DB */
#ifdef CATALOG_VARLEN /* variable-length fields start here */
* compiler constants for pg_database
* ----------------
*/
-#define Natts_pg_database 12
+#define Natts_pg_database 13
#define Anum_pg_database_datname 1
#define Anum_pg_database_datdba 2
#define Anum_pg_database_encoding 3
#define Anum_pg_database_datconnlimit 8
#define Anum_pg_database_datlastsysoid 9
#define Anum_pg_database_datfrozenxid 10
-#define Anum_pg_database_dattablespace 11
-#define Anum_pg_database_datacl 12
+#define Anum_pg_database_datminmxid 11
+#define Anum_pg_database_dattablespace 12
+#define Anum_pg_database_datacl 13
-DATA(insert OID = 1 ( template1 PGUID ENCODING "LC_COLLATE" "LC_CTYPE" t t -1 0 0 1663 _null_));
+DATA(insert OID = 1 ( template1 PGUID ENCODING "LC_COLLATE" "LC_CTYPE" t t -1 0 0 1 1663 _null_));
SHDESCR("default template for new databases");
#define TemplateDbOid 1
DESCR("view system lock information");
DATA(insert OID = 1065 ( pg_prepared_xact PGNSP PGUID 12 1 1000 0 0 f f f f t t v 0 0 2249 "" "{28,25,1184,26,26}" "{o,o,o,o,o}" "{transaction,gid,prepared,ownerid,dbid}" _null_ pg_prepared_xact _null_ _null_ _null_ ));
DESCR("view two-phase transactions");
+DATA(insert OID = 3819 ( pg_get_multixact_members PGNSP PGUID 12 1 1000 0 0 f f f f t t v 1 0 2249 "28" "{28,28,25}" "{i,o,o}" "{multixid,xid,mode}" _null_ pg_get_multixact_members _null_ _null_ _null_ ));
+DESCR("view members of a multixactid");
DATA(insert OID = 3537 ( pg_describe_object PGNSP PGUID 12 1 0 0 0 f f f f t f s 3 0 25 "26 26 23" _null_ _null_ _null_ _null_ pg_describe_object _null_ _null_ _null_ ));
DESCR("get identification of SQL object");
bool is_system_catalog,
bool swap_toast_by_content,
bool check_constraints,
- TransactionId frozenXid);
+ TransactionId frozenXid,
+ MultiXactId frozenMulti);
#endif /* CLUSTER_H */
double num_tuples,
BlockNumber num_all_visible_pages,
bool hasindex,
- TransactionId frozenxid);
+ TransactionId frozenxid,
+ MultiXactId minmulti);
extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age,
bool sharedRel,
TransactionId *oldestXmin,
TransactionId *freezeLimit,
- TransactionId *freezeTableLimit);
+ TransactionId *freezeTableLimit,
+ MultiXactId *multiXactFrzLimit);
extern void vac_update_datfrozenxid(void);
extern void vacuum_delay_point(void);
extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti);
extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist);
extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate,
- Relation relation, Index rti,
+ Relation relation, Index rti, int lockmode,
ItemPointer tid, TransactionId priorXmax);
extern HeapTuple EvalPlanQualFetch(EState *estate, Relation relation,
int lockmode, ItemPointer tid, TransactionId priorXmax);
/*
* ExecRowMark -
- * runtime representation of FOR UPDATE/SHARE clauses
+ * runtime representation of FOR [KEY] UPDATE/SHARE clauses
*
- * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we should have an
+ * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we should have an
* ExecRowMark for each non-target relation in the query (except inheritance
* parent RTEs, which can be ignored at runtime). See PlanRowMark for details
* about most of the fields. In addition to fields directly derived from
/*
* ExecAuxRowMark -
- * additional runtime representation of FOR UPDATE/SHARE clauses
+ * additional runtime representation of FOR [KEY] UPDATE/SHARE clauses
*
* Each LockRows and ModifyTable node keeps a list of the rowmarks it needs to
* deal with. In addition to a pointer to the related entry in es_rowMarks,
/* ----------------
* LockRowsState information
*
- * LockRows nodes are used to enforce FOR UPDATE/FOR SHARE locking.
+ * LockRows nodes are used to enforce FOR [KEY] UPDATE/SHARE locking.
* ----------------
*/
typedef struct LockRowsState
#define ACL_CONNECT (1<<11) /* for databases */
#define N_ACL_RIGHTS 12 /* 1 plus the last 1<<x */
#define ACL_NO_RIGHTS 0
-/* Currently, SELECT ... FOR UPDATE/FOR SHARE requires UPDATE privileges */
+/* Currently, SELECT ... FOR [KEY] UPDATE/SHARE requires UPDATE privileges */
#define ACL_SELECT_FOR_UPDATE ACL_UPDATE
bool hasDistinctOn; /* distinctClause is from DISTINCT ON */
bool hasRecursive; /* WITH RECURSIVE was specified */
bool hasModifyingCTE; /* has INSERT/UPDATE/DELETE in WITH */
- bool hasForUpdate; /* FOR UPDATE or FOR SHARE was specified */
+ bool hasForUpdate; /* FOR [KEY] UPDATE/SHARE was specified */
List *cteList; /* WITH list (of CommonTableExpr's) */
} DefElem;
/*
- * LockingClause - raw representation of FOR UPDATE/SHARE options
+ * LockingClause - raw representation of FOR [NO KEY] UPDATE/[KEY] SHARE
+ * options
*
* Note: lockedRels == NIL means "all relations in query". Otherwise it
* is a list of RangeVar nodes. (We use RangeVar mainly because it carries
* a location field --- currently, parse analysis insists on unqualified
* names in LockingClause.)
*/
+typedef enum LockClauseStrength
+{
+ /* order is important -- see applyLockingClause */
+ LCS_FORKEYSHARE,
+ LCS_FORSHARE,
+ LCS_FORNOKEYUPDATE,
+ LCS_FORUPDATE
+} LockClauseStrength;
+
typedef struct LockingClause
{
NodeTag type;
- List *lockedRels; /* FOR UPDATE or FOR SHARE relations */
- bool forUpdate; /* true = FOR UPDATE, false = FOR SHARE */
+ List *lockedRels; /* FOR [KEY] UPDATE/SHARE relations */
+ LockClauseStrength strength;
bool noWait; /* NOWAIT option */
} LockingClause;
/*
* RowMarkClause -
- * parser output representation of FOR UPDATE/SHARE clauses
+ * parser output representation of FOR [KEY] UPDATE/SHARE clauses
*
* Query.rowMarks contains a separate RowMarkClause node for each relation
- * identified as a FOR UPDATE/SHARE target. If FOR UPDATE/SHARE is applied
- * to a subquery, we generate RowMarkClauses for all normal and subquery rels
- * in the subquery, but they are marked pushedDown = true to distinguish them
- * from clauses that were explicitly written at this query level. Also,
- * Query.hasForUpdate tells whether there were explicit FOR UPDATE/SHARE
- * clauses in the current query level.
+ * identified as a FOR [KEY] UPDATE/SHARE target. If one of these clauses
+ * is applied to a subquery, we generate RowMarkClauses for all normal and
+ * subquery rels in the subquery, but they are marked pushedDown = true to
+ * distinguish them from clauses that were explicitly written at this query
+ * level. Also, Query.hasForUpdate tells whether there were explicit FOR
+ * UPDATE/SHARE/KEY SHARE clauses in the current query level.
*/
typedef struct RowMarkClause
{
NodeTag type;
Index rti; /* range table index of target relation */
- bool forUpdate; /* true = FOR UPDATE, false = FOR SHARE */
+ LockClauseStrength strength;
bool noWait; /* NOWAIT option */
bool pushedDown; /* pushed down from higher query level? */
} RowMarkClause;
* RowMarkType -
* enums for types of row-marking operations
*
- * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we have to uniquely
+ * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we have to uniquely
* identify all the source rows, not only those from the target relations, so
* that we can perform EvalPlanQual rechecking at need. For plain tables we
* can just fetch the TID, the same as for a target relation. Otherwise (for
typedef enum RowMarkType
{
ROW_MARK_EXCLUSIVE, /* obtain exclusive tuple lock */
+ ROW_MARK_NOKEYEXCLUSIVE, /* obtain no-key exclusive tuple lock */
ROW_MARK_SHARE, /* obtain shared tuple lock */
+ ROW_MARK_KEYSHARE, /* obtain keyshare tuple lock */
ROW_MARK_REFERENCE, /* just fetch the TID */
ROW_MARK_COPY /* physically copy the row value */
} RowMarkType;
-#define RowMarkRequiresRowShareLock(marktype) ((marktype) <= ROW_MARK_SHARE)
+#define RowMarkRequiresRowShareLock(marktype) ((marktype) <= ROW_MARK_KEYSHARE)
/*
* PlanRowMark -
- * plan-time representation of FOR UPDATE/SHARE clauses
+ * plan-time representation of FOR [KEY] UPDATE/SHARE clauses
*
- * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we create a separate
+ * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we create a separate
* PlanRowMark node for each non-target relation in the query. Relations that
- * are not specified as FOR UPDATE/SHARE are marked ROW_MARK_REFERENCE (if
+ * are not specified as FOR [KEY] UPDATE/SHARE are marked ROW_MARK_REFERENCE (if
* real tables) or ROW_MARK_COPY (if not).
*
* Initially all PlanRowMarks have rti == prti and isParent == false.
extern void CheckSelectLocking(Query *qry);
extern void applyLockingClause(Query *qry, Index rtindex,
- bool forUpdate, bool noWait, bool pushedDown);
+ LockClauseStrength strength, bool noWait, bool pushedDown);
#endif /* ANALYZE_H */
#define TransactionIdGetDatum(X) ((Datum) SET_4_BYTES((X)))
+/*
+ * MultiXactIdGetDatum
+ * Returns datum representation for a multixact identifier.
+ */
+
+#define MultiXactIdGetDatum(X) ((Datum) SET_4_BYTES((X)))
+
/*
* DatumGetCommandId
* Returns command identifier value of a datum.
extern void InitLocks(void);
extern LockMethod GetLocksMethodTable(const LOCK *lock);
extern uint32 LockTagHashCode(const LOCKTAG *locktag);
+extern bool DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2);
extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
LOCKMODE lockmode,
bool sessionLock,
/* access/transam/twophase.c */
extern Datum pg_prepared_xact(PG_FUNCTION_ARGS);
+/* access/transam/multixact.c */
+extern Datum pg_get_multixact_members(PG_FUNCTION_ARGS);
+
/* catalogs/dependency.c */
extern Datum pg_describe_object(PG_FUNCTION_ARGS);
Oid rd_id; /* relation's object id */
List *rd_indexlist; /* list of OIDs of indexes on relation */
Bitmapset *rd_indexattr; /* identifies columns used in indexes */
+ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */
Oid rd_oidindex; /* OID of unique index on OID, if any */
LockInfoData rd_lockInfo; /* lock mgr's info for locking relation */
RuleLock *rd_rules; /* rewrite rules */
extern Oid RelationGetOidIndex(Relation relation);
extern List *RelationGetIndexExpressions(Relation relation);
extern List *RelationGetIndexPredicate(Relation relation);
-extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation);
+extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation, bool keyAttrs);
extern void RelationGetExclusionInfo(Relation indexRelation,
Oid **operators,
Oid **procs,
* Routine to manage assignment of new relfilenode to a relation
*/
extern void RelationSetNewRelfilenode(Relation relation,
- TransactionId freezeXid);
+ TransactionId freezeXid, MultiXactId minmulti);
/*
* Routines for flushing/rebuilding relcache entries in various scenarios
extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
uint16 infomask, TransactionId xid);
+extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
#endif /* TQUAL_H */
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1s s1u s1r s1l s1c s2l s2c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+
+starting permutation: s1s s1u s1r s1l s2l s1c s2c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+step s2c: COMMIT;
+
+starting permutation: s1s s1u s1r s1l s2l s2c s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1c: COMMIT;
+
+starting permutation: s1s s1u s1r s2l s1l s1c s2c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+step s2c: COMMIT;
+
+starting permutation: s1s s1u s1r s2l s1l s2c s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1c: COMMIT;
+
+starting permutation: s1s s1u s1r s2l s2c s1l s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s1s s1u s2l s1r s1l s1c s2c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s2l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+step s1r: ROLLBACK TO f;
+step s2l: <... completed>
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+step s2c: COMMIT;
+
+starting permutation: s1s s1u s2l s1r s1l s2c s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s2l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+step s1r: ROLLBACK TO f;
+step s2l: <... completed>
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1c: COMMIT;
+
+starting permutation: s1s s1u s2l s1r s2c s1l s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s2l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+step s1r: ROLLBACK TO f;
+step s2l: <... completed>
+key value
+
+1 1
+step s2c: COMMIT;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s1s s1u s2l s2c s1r s1l s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s2l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1s s2l s1u s1r s1l s1c s2c
+step s1s: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1s s2l s1u s1r s1l s2c s1c
+step s1s: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1s s2l s1u s1r s2c s1l s1c
+step s1s: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1s s2l s1u s2c s1r s1l s1c
+step s1s: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+step s2c: COMMIT;
+step s1u: <... completed>
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s1s s2l s2c s1u s1r s1l s1c
+step s1s: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s2l s1s s1u s1r s1l s1c s2c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2l s1s s1u s1r s1l s2c s1c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2l s1s s1u s1r s2c s1l s1c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2l s1s s1u s2c s1r s1l s1c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+step s2c: COMMIT;
+step s1u: <... completed>
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s2l s1s s2c s1u s1r s1l s1c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1s: SAVEPOINT f;
+step s2c: COMMIT;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s2l s2c s1s s1u s1r s1l s1c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1s s1u s1r s1l s1c s2l s2c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+
+starting permutation: s1s s1u s1r s1l s2l s1c s2c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+step s2c: COMMIT;
+
+starting permutation: s1s s1u s1r s1l s2l s2c s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1c: COMMIT;
+
+starting permutation: s1s s1u s1r s2l s1l s1c s2c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+step s2c: COMMIT;
+
+starting permutation: s1s s1u s1r s2l s1l s2c s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1c: COMMIT;
+
+starting permutation: s1s s1u s1r s2l s2c s1l s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s1s s1u s2l s1r s1l s1c s2c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s2l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+step s1r: ROLLBACK TO f;
+step s2l: <... completed>
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+step s2c: COMMIT;
+
+starting permutation: s1s s1u s2l s1r s1l s2c s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s2l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+step s1r: ROLLBACK TO f;
+step s2l: <... completed>
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1c: COMMIT;
+
+starting permutation: s1s s1u s2l s1r s2c s1l s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s2l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+step s1r: ROLLBACK TO f;
+step s2l: <... completed>
+key value
+
+1 1
+step s2c: COMMIT;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s1s s1u s2l s2c s1r s1l s1c
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s2l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1s s2l s1u s1r s1l s1c s2c
+step s1s: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1s s2l s1u s1r s1l s2c s1c
+step s1s: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1s s2l s1u s1r s2c s1l s1c
+step s1s: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1s s2l s1u s2c s1r s1l s1c
+step s1s: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+step s2c: COMMIT;
+step s1u: <... completed>
+error in steps s2c s1u: ERROR: could not serialize access due to concurrent update
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s1s s2l s2c s1u s1r s1l s1c
+step s1s: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s2l s1s s1u s1r s1l s1c s2c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2l s1s s1u s1r s1l s2c s1c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2l s1s s1u s1r s2c s1l s1c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2l s1s s1u s2c s1r s1l s1c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2; <waiting ...>
+step s2c: COMMIT;
+step s1u: <... completed>
+error in steps s2c s1u: ERROR: could not serialize access due to concurrent update
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s2l s1s s2c s1u s1r s1l s1c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1s: SAVEPOINT f;
+step s2c: COMMIT;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s2l s2c s1s s1u s1r s1l s1c
+step s2l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1s: SAVEPOINT f;
+step s1u: UPDATE foo SET key = 2;
+step s1r: ROLLBACK TO f;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1c: COMMIT;
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1l s1svp s1d s1r s2l s1c s2c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: SELECT * FROM foo FOR NO KEY UPDATE;
+key value
+
+1 1
+step s1r: ROLLBACK TO f;
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1c: COMMIT;
+step s2l: <... completed>
+key value
+
+1 1
+step s2c: COMMIT;
+
+starting permutation: s1l s1svp s1d s2l s1r s1c s2c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: SELECT * FROM foo FOR NO KEY UPDATE;
+key value
+
+1 1
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1r: ROLLBACK TO f;
+step s1c: COMMIT;
+step s2l: <... completed>
+key value
+
+1 1
+step s2c: COMMIT;
+
+starting permutation: s1l s1svp s1d s1r s2l2 s1c s2c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: SELECT * FROM foo FOR NO KEY UPDATE;
+key value
+
+1 1
+step s1r: ROLLBACK TO f;
+step s2l2: SELECT * FROM foo FOR NO KEY UPDATE;
+key value
+
+1 1
+step s1c: COMMIT;
+step s2c: COMMIT;
+
+starting permutation: s1l s1svp s1d s2l2 s1r s1c s2c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: SELECT * FROM foo FOR NO KEY UPDATE;
+key value
+
+1 1
+step s2l2: SELECT * FROM foo FOR NO KEY UPDATE; <waiting ...>
+step s1r: ROLLBACK TO f;
+step s2l2: <... completed>
+key value
+
+1 1
+step s1c: COMMIT;
+step s2c: COMMIT;
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1l s1svp s1d s1r s1c s2l s2c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: DELETE FROM foo;
+step s1r: ROLLBACK TO f;
+step s1c: COMMIT;
+step s2l: SELECT * FROM foo FOR UPDATE;
+key value
+
+1 1
+step s2c: COMMIT;
+
+starting permutation: s1l s1svp s1d s1r s2l s1c s2c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: DELETE FROM foo;
+step s1r: ROLLBACK TO f;
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1c: COMMIT;
+step s2l: <... completed>
+key value
+
+1 1
+step s2c: COMMIT;
+
+starting permutation: s1l s1svp s1d s1r s2l s2c s1c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: DELETE FROM foo;
+step s1r: ROLLBACK TO f;
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1l s1svp s1d s2l s1r s1c s2c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: DELETE FROM foo;
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1r: ROLLBACK TO f;
+step s1c: COMMIT;
+step s2l: <... completed>
+key value
+
+1 1
+step s2c: COMMIT;
+
+starting permutation: s1l s1svp s1d s2l s1r s2c s1c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: DELETE FROM foo;
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1r: ROLLBACK TO f;
+invalid permutation detected
+
+starting permutation: s1l s1svp s1d s2l s2c s1r s1c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: DELETE FROM foo;
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1l s1svp s2l s1d s1r s1c s2c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1d: DELETE FROM foo;
+step s1r: ROLLBACK TO f;
+step s1c: COMMIT;
+step s2l: <... completed>
+key value
+
+1 1
+step s2c: COMMIT;
+
+starting permutation: s1l s1svp s2l s1d s1r s2c s1c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1d: DELETE FROM foo;
+step s1r: ROLLBACK TO f;
+invalid permutation detected
+
+starting permutation: s1l s1svp s2l s1d s2c s1r s1c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1d: DELETE FROM foo;
+invalid permutation detected
+
+starting permutation: s1l s1svp s2l s2c s1d s1r s1c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1l s2l s1svp s1d s1r s1c s2c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1svp: SAVEPOINT f;
+step s1d: DELETE FROM foo;
+step s1r: ROLLBACK TO f;
+step s1c: COMMIT;
+step s2l: <... completed>
+key value
+
+1 1
+step s2c: COMMIT;
+
+starting permutation: s1l s2l s1svp s1d s1r s2c s1c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1svp: SAVEPOINT f;
+step s1d: DELETE FROM foo;
+step s1r: ROLLBACK TO f;
+invalid permutation detected
+
+starting permutation: s1l s2l s1svp s1d s2c s1r s1c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1svp: SAVEPOINT f;
+step s1d: DELETE FROM foo;
+invalid permutation detected
+
+starting permutation: s1l s2l s1svp s2c s1d s1r s1c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+step s1svp: SAVEPOINT f;
+invalid permutation detected
+
+starting permutation: s1l s2l s2c s1svp s1d s1r s1c
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2l: SELECT * FROM foo FOR UPDATE; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2l s1l s1svp s1d s1r s1c s2c
+step s2l: SELECT * FROM foo FOR UPDATE;
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2l s1l s1svp s1d s1r s2c s1c
+step s2l: SELECT * FROM foo FOR UPDATE;
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2l s1l s1svp s1d s2c s1r s1c
+step s2l: SELECT * FROM foo FOR UPDATE;
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2l s1l s1svp s2c s1d s1r s1c
+step s2l: SELECT * FROM foo FOR UPDATE;
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2l s1l s2c s1svp s1d s1r s1c
+step s2l: SELECT * FROM foo FOR UPDATE;
+key value
+
+1 1
+step s1l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+step s2c: COMMIT;
+step s1l: <... completed>
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: DELETE FROM foo;
+step s1r: ROLLBACK TO f;
+step s1c: COMMIT;
+
+starting permutation: s2l s2c s1l s1svp s1d s1r s1c
+step s2l: SELECT * FROM foo FOR UPDATE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s1svp: SAVEPOINT f;
+step s1d: DELETE FROM foo;
+step s1r: ROLLBACK TO f;
+step s1c: COMMIT;
starting permutation: ins upd com
step ins: INSERT INTO bar VALUES (42);
-step upd: UPDATE foo SET b = 'Hello World'; <waiting ...>
+step upd: UPDATE foo SET b = 'Hello World';
step com: COMMIT;
-step upd: <... completed>
starting permutation: upd ins com
step upd: UPDATE foo SET b = 'Hello World';
starting permutation: s1i s1u s2i s1c s2u s2c
step s1i: INSERT INTO child VALUES (1, 1);
step s1u: UPDATE parent SET aux = 'bar';
-step s2i: INSERT INTO child VALUES (2, 1); <waiting ...>
+step s2i: INSERT INTO child VALUES (2, 1);
+step s1c: COMMIT;
+step s2u: UPDATE parent SET aux = 'baz';
+step s2c: COMMIT;
+
+starting permutation: s1i s1u s2i s2u s1c s2c
+step s1i: INSERT INTO child VALUES (1, 1);
+step s1u: UPDATE parent SET aux = 'bar';
+step s2i: INSERT INTO child VALUES (2, 1);
+step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
+step s1c: COMMIT;
+step s2u: <... completed>
+step s2c: COMMIT;
+
+starting permutation: s1i s1u s2i s2u s2c s1c
+step s1i: INSERT INTO child VALUES (1, 1);
+step s1u: UPDATE parent SET aux = 'bar';
+step s2i: INSERT INTO child VALUES (2, 1);
+step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1i s2i s1u s1c s2u s2c
+step s1i: INSERT INTO child VALUES (1, 1);
+step s2i: INSERT INTO child VALUES (2, 1);
+step s1u: UPDATE parent SET aux = 'bar';
step s1c: COMMIT;
-step s2i: <... completed>
step s2u: UPDATE parent SET aux = 'baz';
step s2c: COMMIT;
starting permutation: s1i s2i s1u s2u s1c s2c
step s1i: INSERT INTO child VALUES (1, 1);
step s2i: INSERT INTO child VALUES (2, 1);
+step s1u: UPDATE parent SET aux = 'bar';
+step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
+step s1c: COMMIT;
+step s2u: <... completed>
+step s2c: COMMIT;
+
+starting permutation: s1i s2i s1u s2u s2c s1c
+step s1i: INSERT INTO child VALUES (1, 1);
+step s2i: INSERT INTO child VALUES (2, 1);
+step s1u: UPDATE parent SET aux = 'bar';
+step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1i s2i s2u s1u s1c s2c
+step s1i: INSERT INTO child VALUES (1, 1);
+step s2i: INSERT INTO child VALUES (2, 1);
+step s2u: UPDATE parent SET aux = 'baz';
step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1i s2i s2u s1u s2c s1c
+step s1i: INSERT INTO child VALUES (1, 1);
+step s2i: INSERT INTO child VALUES (2, 1);
step s2u: UPDATE parent SET aux = 'baz';
+step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
+step s2c: COMMIT;
step s1u: <... completed>
-error in steps s2u s1u: ERROR: deadlock detected
step s1c: COMMIT;
+
+starting permutation: s1i s2i s2u s2c s1u s1c
+step s1i: INSERT INTO child VALUES (1, 1);
+step s2i: INSERT INTO child VALUES (2, 1);
+step s2u: UPDATE parent SET aux = 'baz';
step s2c: COMMIT;
+step s1u: UPDATE parent SET aux = 'bar';
+step s1c: COMMIT;
-starting permutation: s1i s2i s2u s1u s2c s1c
+starting permutation: s2i s1i s1u s1c s2u s2c
+step s2i: INSERT INTO child VALUES (2, 1);
step s1i: INSERT INTO child VALUES (1, 1);
+step s1u: UPDATE parent SET aux = 'bar';
+step s1c: COMMIT;
+step s2u: UPDATE parent SET aux = 'baz';
+step s2c: COMMIT;
+
+starting permutation: s2i s1i s1u s2u s1c s2c
step s2i: INSERT INTO child VALUES (2, 1);
-step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
+step s1i: INSERT INTO child VALUES (1, 1);
step s1u: UPDATE parent SET aux = 'bar';
+step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
+step s1c: COMMIT;
step s2u: <... completed>
-error in steps s1u s2u: ERROR: deadlock detected
step s2c: COMMIT;
-step s1c: COMMIT;
-starting permutation: s2i s1i s1u s2u s1c s2c
+starting permutation: s2i s1i s1u s2u s2c s1c
step s2i: INSERT INTO child VALUES (2, 1);
step s1i: INSERT INTO child VALUES (1, 1);
+step s1u: UPDATE parent SET aux = 'bar';
+step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2i s1i s2u s1u s1c s2c
+step s2i: INSERT INTO child VALUES (2, 1);
+step s1i: INSERT INTO child VALUES (1, 1);
+step s2u: UPDATE parent SET aux = 'baz';
step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2i s1i s2u s1u s2c s1c
+step s2i: INSERT INTO child VALUES (2, 1);
+step s1i: INSERT INTO child VALUES (1, 1);
step s2u: UPDATE parent SET aux = 'baz';
+step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
+step s2c: COMMIT;
step s1u: <... completed>
-error in steps s2u s1u: ERROR: deadlock detected
step s1c: COMMIT;
-step s2c: COMMIT;
-starting permutation: s2i s1i s2u s1u s2c s1c
+starting permutation: s2i s1i s2u s2c s1u s1c
step s2i: INSERT INTO child VALUES (2, 1);
step s1i: INSERT INTO child VALUES (1, 1);
-step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
+step s2u: UPDATE parent SET aux = 'baz';
+step s2c: COMMIT;
step s1u: UPDATE parent SET aux = 'bar';
-step s2u: <... completed>
-error in steps s1u s2u: ERROR: deadlock detected
+step s1c: COMMIT;
+
+starting permutation: s2i s2u s1i s1u s1c s2c
+step s2i: INSERT INTO child VALUES (2, 1);
+step s2u: UPDATE parent SET aux = 'baz';
+step s1i: INSERT INTO child VALUES (1, 1);
+step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2i s2u s1i s1u s2c s1c
+step s2i: INSERT INTO child VALUES (2, 1);
+step s2u: UPDATE parent SET aux = 'baz';
+step s1i: INSERT INTO child VALUES (1, 1);
+step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
step s2c: COMMIT;
+step s1u: <... completed>
step s1c: COMMIT;
starting permutation: s2i s2u s1i s2c s1u s1c
step s2i: INSERT INTO child VALUES (2, 1);
step s2u: UPDATE parent SET aux = 'baz';
-step s1i: INSERT INTO child VALUES (1, 1); <waiting ...>
+step s1i: INSERT INTO child VALUES (1, 1);
+step s2c: COMMIT;
+step s1u: UPDATE parent SET aux = 'bar';
+step s1c: COMMIT;
+
+starting permutation: s2i s2u s2c s1i s1u s1c
+step s2i: INSERT INTO child VALUES (2, 1);
+step s2u: UPDATE parent SET aux = 'baz';
step s2c: COMMIT;
-step s1i: <... completed>
+step s1i: INSERT INTO child VALUES (1, 1);
step s1u: UPDATE parent SET aux = 'bar';
step s1c: COMMIT;
step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s2c: COMMIT;
+starting permutation: s1u1 s1u2 s2u1 s2u2 s1c s2c
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1u1 s1u2 s2u1 s2u2 s2c s1c
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s1u1 s2u1 s1u2 s1c s2u2 s2c
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+invalid permutation detected
+
starting permutation: s1u1 s2u1 s1u2 s2u2 s1c s2c
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s1u2: <... completed>
-error in steps s2u2 s1u2: ERROR: deadlock detected
-step s1c: COMMIT;
-step s2c: COMMIT;
+invalid permutation detected
starting permutation: s1u1 s2u1 s1u2 s2u2 s2c s1c
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s1u2: <... completed>
-error in steps s2u2 s1u2: ERROR: deadlock detected
step s2c: COMMIT;
+step s1u2: <... completed>
step s1c: COMMIT;
starting permutation: s1u1 s2u1 s2u2 s1u2 s1c s2c
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
-step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: <... completed>
-error in steps s1u2 s2u2: ERROR: deadlock detected
-step s1c: COMMIT;
-step s2c: COMMIT;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+invalid permutation detected
starting permutation: s1u1 s2u1 s2u2 s1u2 s2c s1c
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
-step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: <... completed>
-error in steps s1u2 s2u2: ERROR: deadlock detected
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2c: COMMIT;
+step s1u2: <... completed>
step s1c: COMMIT;
+starting permutation: s1u1 s2u1 s2u2 s2c s1u2 s1c
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1c: COMMIT;
+
+starting permutation: s2u1 s1u1 s1u2 s1c s2u2 s2c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+invalid permutation detected
+
starting permutation: s2u1 s1u1 s1u2 s2u2 s1c s2c
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s1u2: <... completed>
-error in steps s2u2 s1u2: ERROR: deadlock detected
-step s1c: COMMIT;
-step s2c: COMMIT;
+invalid permutation detected
starting permutation: s2u1 s1u1 s1u2 s2u2 s2c s1c
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s1u2: <... completed>
-error in steps s2u2 s1u2: ERROR: deadlock detected
step s2c: COMMIT;
+step s1u2: <... completed>
step s1c: COMMIT;
starting permutation: s2u1 s1u1 s2u2 s1u2 s1c s2c
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
-step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
-step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: <... completed>
-error in steps s1u2 s2u2: ERROR: deadlock detected
-step s1c: COMMIT;
-step s2c: COMMIT;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+invalid permutation detected
starting permutation: s2u1 s1u1 s2u2 s1u2 s2c s1c
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
-step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+step s2c: COMMIT;
+step s1u2: <... completed>
+step s1c: COMMIT;
+
+starting permutation: s2u1 s1u1 s2u2 s2c s1u2 s1c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: <... completed>
-error in steps s1u2 s2u2: ERROR: deadlock detected
+step s1c: COMMIT;
+
+starting permutation: s2u1 s2u2 s1u1 s1u2 s1c s2c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2u1 s2u2 s1u1 s1u2 s2c s1c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2c: COMMIT;
+step s1u2: <... completed>
step s1c: COMMIT;
starting permutation: s2u1 s2u2 s1u1 s2c s1u2 s1c
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1; <waiting ...>
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s2c: COMMIT;
-step s1u1: <... completed>
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1c: COMMIT;
+
+starting permutation: s2u1 s2u2 s2c s1u1 s1u2 s1c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1c: COMMIT;
ERROR: current transaction is aborted, commands ignored until end of transaction block
step s2c: COMMIT;
-starting permutation: s1u1 s2u1 s1u2 s2u2 s1c s2c
+starting permutation: s1u1 s2u1 s1u2 s2u2 s2c s1c
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
step s1u2: <... completed>
-error in steps s2u2 s1u2: ERROR: deadlock detected
+error in steps s2c s1u2: ERROR: could not serialize access due to concurrent update
step s1c: COMMIT;
-step s2c: COMMIT;
-starting permutation: s1u1 s2u1 s1u2 s2u2 s2c s1c
+starting permutation: s1u1 s2u1 s2u2 s1u2 s2c s1c
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s1u2: <... completed>
-error in steps s2u2 s1u2: ERROR: deadlock detected
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2c: COMMIT;
+step s1u2: <... completed>
+error in steps s2c s1u2: ERROR: could not serialize access due to concurrent update
step s1c: COMMIT;
-starting permutation: s1u1 s2u1 s2u2 s1u2 s1c s2c
+starting permutation: s1u1 s2u1 s2u2 s2c s1u2 s1c
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
-step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: <... completed>
-error in steps s1u2 s2u2: ERROR: deadlock detected
-step s1c: COMMIT;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s2c: COMMIT;
-
-starting permutation: s1u1 s2u1 s2u2 s1u2 s2c s1c
-step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
-step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: <... completed>
-error in steps s1u2 s2u2: ERROR: deadlock detected
-step s2c: COMMIT;
+ERROR: could not serialize access due to read/write dependencies among transactions
step s1c: COMMIT;
-starting permutation: s2u1 s1u1 s1u2 s2u2 s1c s2c
+starting permutation: s2u1 s1u1 s1u2 s2u2 s2c s1c
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
step s1u2: <... completed>
-error in steps s2u2 s1u2: ERROR: deadlock detected
+error in steps s2c s1u2: ERROR: could not serialize access due to concurrent update
step s1c: COMMIT;
-step s2c: COMMIT;
-starting permutation: s2u1 s1u1 s1u2 s2u2 s2c s1c
+starting permutation: s2u1 s1u1 s2u2 s1u2 s2c s1c
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
-step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s1u2: <... completed>
-error in steps s2u2 s1u2: ERROR: deadlock detected
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2c: COMMIT;
+step s1u2: <... completed>
+error in steps s2c s1u2: ERROR: could not serialize access due to concurrent update
step s1c: COMMIT;
-starting permutation: s2u1 s1u1 s2u2 s1u2 s1c s2c
+starting permutation: s2u1 s1u1 s2u2 s2c s1u2 s1c
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
-step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: <... completed>
-error in steps s1u2 s2u2: ERROR: deadlock detected
+ERROR: could not serialize access due to read/write dependencies among transactions
step s1c: COMMIT;
-step s2c: COMMIT;
-starting permutation: s2u1 s1u1 s2u2 s1u2 s2c s1c
+starting permutation: s2u1 s2u2 s1u1 s1u2 s2c s1c
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
-step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
-step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s2u2: <... completed>
-error in steps s1u2 s2u2: ERROR: deadlock detected
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
step s2c: COMMIT;
+step s1u2: <... completed>
+error in steps s2c s1u2: ERROR: could not serialize access due to concurrent update
step s1c: COMMIT;
starting permutation: s2u1 s2u2 s1u1 s2c s1u2 s1c
step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
-step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1; <waiting ...>
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
step s2c: COMMIT;
-step s1u1: <... completed>
step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
ERROR: could not serialize access due to read/write dependencies among transactions
step s1c: COMMIT;
+
+starting permutation: s2u1 s2u2 s2c s1u1 s1u2 s1c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1c: COMMIT;
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1u1 s1u2 s1c s2u1 s2u2 s2c
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1c: COMMIT;
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
+
+starting permutation: s1u1 s1u2 s2u1 s1c s2u2 s2c
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+step s1c: COMMIT;
+step s2u1: <... completed>
+error in steps s1c s2u1: ERROR: could not serialize access due to concurrent update
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+ERROR: current transaction is aborted, commands ignored until end of transaction block
+step s2c: COMMIT;
+
+starting permutation: s1u1 s2u1 s1u2 s2u2 s2c s1c
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
+step s1u2: <... completed>
+error in steps s2c s1u2: ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s1u1 s2u1 s2u2 s1u2 s2c s1c
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+step s2c: COMMIT;
+step s1u2: <... completed>
+error in steps s2c s1u2: ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s1u1 s2u1 s2u2 s2c s1u2 s1c
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s2u1 s1u1 s1u2 s2u2 s2c s1c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
+step s1u2: <... completed>
+error in steps s2c s1u2: ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s2u1 s1u1 s2u2 s1u2 s2c s1c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+step s2c: COMMIT;
+step s1u2: <... completed>
+error in steps s2c s1u2: ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s2u1 s1u1 s2u2 s2c s1u2 s1c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s2u1 s2u2 s1u1 s1u2 s2c s1c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2; <waiting ...>
+step s2c: COMMIT;
+step s1u2: <... completed>
+error in steps s2c s1u2: ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s2u1 s2u2 s1u1 s2c s1u2 s1c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s2c: COMMIT;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s2u1 s2u2 s2c s1u1 s1u2 s1c
+step s2u1: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s2c: COMMIT;
+step s1u1: UPDATE A SET Col1 = 1 WHERE AID = 1;
+step s1u2: UPDATE B SET Col2 = 1 WHERE BID = 2;
+step s1c: COMMIT;
starting permutation: s1i s1u s2i s1c s2u s2c
step s1i: INSERT INTO child VALUES (1, 1);
step s1u: UPDATE parent SET aux = 'bar';
-step s2i: INSERT INTO child VALUES (2, 1); <waiting ...>
+step s2i: INSERT INTO child VALUES (2, 1);
step s1c: COMMIT;
-step s2i: <... completed>
-error in steps s1c s2i: ERROR: could not serialize access due to concurrent update
step s2u: UPDATE parent SET aux = 'baz';
-ERROR: current transaction is aborted, commands ignored until end of transaction block
+ERROR: could not serialize access due to read/write dependencies among transactions
step s2c: COMMIT;
starting permutation: s1i s2i s1u s2u s1c s2c
step s1i: INSERT INTO child VALUES (1, 1);
step s2i: INSERT INTO child VALUES (2, 1);
-step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
-step s2u: UPDATE parent SET aux = 'baz';
-step s1u: <... completed>
-error in steps s2u s1u: ERROR: deadlock detected
+step s1u: UPDATE parent SET aux = 'bar';
+step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
step s1c: COMMIT;
+step s2u: <... completed>
+error in steps s1c s2u: ERROR: could not serialize access due to concurrent update
step s2c: COMMIT;
starting permutation: s1i s2i s2u s1u s2c s1c
step s1i: INSERT INTO child VALUES (1, 1);
step s2i: INSERT INTO child VALUES (2, 1);
-step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
-step s1u: UPDATE parent SET aux = 'bar';
-step s2u: <... completed>
-error in steps s1u s2u: ERROR: deadlock detected
+step s2u: UPDATE parent SET aux = 'baz';
+step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
step s2c: COMMIT;
+step s1u: <... completed>
+error in steps s2c s1u: ERROR: could not serialize access due to concurrent update
step s1c: COMMIT;
starting permutation: s2i s1i s1u s2u s1c s2c
step s2i: INSERT INTO child VALUES (2, 1);
step s1i: INSERT INTO child VALUES (1, 1);
-step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
-step s2u: UPDATE parent SET aux = 'baz';
-step s1u: <... completed>
-error in steps s2u s1u: ERROR: deadlock detected
+step s1u: UPDATE parent SET aux = 'bar';
+step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
step s1c: COMMIT;
+step s2u: <... completed>
+error in steps s1c s2u: ERROR: could not serialize access due to concurrent update
step s2c: COMMIT;
starting permutation: s2i s1i s2u s1u s2c s1c
step s2i: INSERT INTO child VALUES (2, 1);
step s1i: INSERT INTO child VALUES (1, 1);
-step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
-step s1u: UPDATE parent SET aux = 'bar';
-step s2u: <... completed>
-error in steps s1u s2u: ERROR: deadlock detected
+step s2u: UPDATE parent SET aux = 'baz';
+step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
step s2c: COMMIT;
+step s1u: <... completed>
+error in steps s2c s1u: ERROR: could not serialize access due to concurrent update
step s1c: COMMIT;
starting permutation: s2i s2u s1i s2c s1u s1c
step s2i: INSERT INTO child VALUES (2, 1);
step s2u: UPDATE parent SET aux = 'baz';
-step s1i: INSERT INTO child VALUES (1, 1); <waiting ...>
+step s1i: INSERT INTO child VALUES (1, 1);
step s2c: COMMIT;
-step s1i: <... completed>
-error in steps s2c s1i: ERROR: could not serialize access due to concurrent update
step s1u: UPDATE parent SET aux = 'bar';
-ERROR: current transaction is aborted, commands ignored until end of transaction block
+ERROR: could not serialize access due to read/write dependencies among transactions
step s1c: COMMIT;
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1i s1u s1c s2i s2u s2c
+step s1i: INSERT INTO child VALUES (1, 1);
+step s1u: UPDATE parent SET aux = 'bar';
+step s1c: COMMIT;
+step s2i: INSERT INTO child VALUES (2, 1);
+step s2u: UPDATE parent SET aux = 'baz';
+step s2c: COMMIT;
+
+starting permutation: s1i s1u s2i s1c s2u s2c
+step s1i: INSERT INTO child VALUES (1, 1);
+step s1u: UPDATE parent SET aux = 'bar';
+step s2i: INSERT INTO child VALUES (2, 1);
+step s1c: COMMIT;
+step s2u: UPDATE parent SET aux = 'baz';
+ERROR: could not serialize access due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1i s2i s1u s2u s1c s2c
+step s1i: INSERT INTO child VALUES (1, 1);
+step s2i: INSERT INTO child VALUES (2, 1);
+step s1u: UPDATE parent SET aux = 'bar';
+step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
+step s1c: COMMIT;
+step s2u: <... completed>
+error in steps s1c s2u: ERROR: could not serialize access due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s1i s2i s2u s1u s2c s1c
+step s1i: INSERT INTO child VALUES (1, 1);
+step s2i: INSERT INTO child VALUES (2, 1);
+step s2u: UPDATE parent SET aux = 'baz';
+step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
+step s2c: COMMIT;
+step s1u: <... completed>
+error in steps s2c s1u: ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s2i s1i s1u s2u s1c s2c
+step s2i: INSERT INTO child VALUES (2, 1);
+step s1i: INSERT INTO child VALUES (1, 1);
+step s1u: UPDATE parent SET aux = 'bar';
+step s2u: UPDATE parent SET aux = 'baz'; <waiting ...>
+step s1c: COMMIT;
+step s2u: <... completed>
+error in steps s1c s2u: ERROR: could not serialize access due to concurrent update
+step s2c: COMMIT;
+
+starting permutation: s2i s1i s2u s1u s2c s1c
+step s2i: INSERT INTO child VALUES (2, 1);
+step s1i: INSERT INTO child VALUES (1, 1);
+step s2u: UPDATE parent SET aux = 'baz';
+step s1u: UPDATE parent SET aux = 'bar'; <waiting ...>
+step s2c: COMMIT;
+step s1u: <... completed>
+error in steps s2c s1u: ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s2i s2u s1i s2c s1u s1c
+step s2i: INSERT INTO child VALUES (2, 1);
+step s2u: UPDATE parent SET aux = 'baz';
+step s1i: INSERT INTO child VALUES (1, 1);
+step s2c: COMMIT;
+step s1u: UPDATE parent SET aux = 'bar';
+ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1d s1c s2i s2c
+step s1d: DELETE FROM A WHERE AID = 1;
+step s1c: COMMIT;
+step s2i: INSERT INTO B (BID,AID,Col2) VALUES (2,1,0);
+ERROR: insert or update on table "b" violates foreign key constraint "b_aid_fkey"
+step s2c: COMMIT;
+
+starting permutation: s1d s2i s1c s2c
+step s1d: DELETE FROM A WHERE AID = 1;
+step s2i: INSERT INTO B (BID,AID,Col2) VALUES (2,1,0); <waiting ...>
+step s1c: COMMIT;
+step s2i: <... completed>
+error in steps s1c s2i: ERROR: insert or update on table "b" violates foreign key constraint "b_aid_fkey"
+step s2c: COMMIT;
+
+starting permutation: s1d s2i s2c s1c
+step s1d: DELETE FROM A WHERE AID = 1;
+step s2i: INSERT INTO B (BID,AID,Col2) VALUES (2,1,0); <waiting ...>
+invalid permutation detected
+
+starting permutation: s2i s1d s1c s2c
+step s2i: INSERT INTO B (BID,AID,Col2) VALUES (2,1,0);
+step s1d: DELETE FROM A WHERE AID = 1; <waiting ...>
+invalid permutation detected
+
+starting permutation: s2i s1d s2c s1c
+step s2i: INSERT INTO B (BID,AID,Col2) VALUES (2,1,0);
+step s1d: DELETE FROM A WHERE AID = 1; <waiting ...>
+step s2c: COMMIT;
+step s1d: <... completed>
+error in steps s2c s1d: ERROR: update or delete on table "a" violates foreign key constraint "b_aid_fkey" on table "b"
+step s1c: COMMIT;
+
+starting permutation: s2i s2c s1d s1c
+step s2i: INSERT INTO B (BID,AID,Col2) VALUES (2,1,0);
+step s2c: COMMIT;
+step s1d: DELETE FROM A WHERE AID = 1;
+ERROR: update or delete on table "a" violates foreign key constraint "b_aid_fkey" on table "b"
+step s1c: COMMIT;
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1b s2b s1s s2u s2d s1l s2c s1c
+step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2b: BEGIN;
+step s1s: SELECT * FROM foo;
+key value
+
+1 1
+step s2u: UPDATE foo SET value = 2 WHERE key = 1;
+step s2d: DELETE FROM foo;
+step s1l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+step s2c: COMMIT;
+step s1l: <... completed>
+error in steps s2c s1l: ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s1b s2b s1s s2u s2d s1l s2r s1c
+step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2b: BEGIN;
+step s1s: SELECT * FROM foo;
+key value
+
+1 1
+step s2u: UPDATE foo SET value = 2 WHERE key = 1;
+step s2d: DELETE FROM foo;
+step s1l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+step s2r: ROLLBACK;
+step s1l: <... completed>
+key value
+
+1 1
+step s1c: COMMIT;
+
+starting permutation: s1b s2b s1s s2u s2u2 s1l s2c s1c
+step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2b: BEGIN;
+step s1s: SELECT * FROM foo;
+key value
+
+1 1
+step s2u: UPDATE foo SET value = 2 WHERE key = 1;
+step s2u2: UPDATE foo SET key = 2 WHERE key = 1;
+step s1l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+step s2c: COMMIT;
+step s1l: <... completed>
+error in steps s2c s1l: ERROR: could not serialize access due to concurrent update
+step s1c: COMMIT;
+
+starting permutation: s1b s2b s1s s2u s2u2 s1l s2r s1c
+step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2b: BEGIN;
+step s1s: SELECT * FROM foo;
+key value
+
+1 1
+step s2u: UPDATE foo SET value = 2 WHERE key = 1;
+step s2u2: UPDATE foo SET key = 2 WHERE key = 1;
+step s1l: SELECT * FROM foo FOR KEY SHARE; <waiting ...>
+step s2r: ROLLBACK;
+step s1l: <... completed>
+key value
+
+1 1
+step s1c: COMMIT;
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1b s2b s1s s2u s1l s2c s2d s1c
+step s1b: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2b: BEGIN;
+step s1s: SELECT * FROM foo;
+key value
+
+1 1
+step s2u: UPDATE foo SET value = 2 WHERE key = 1;
+step s1l: SELECT * FROM foo FOR KEY SHARE;
+key value
+
+1 1
+step s2c: COMMIT;
+step s2d: DELETE FROM foo WHERE key = 1; <waiting ...>
+step s1c: COMMIT;
+step s2d: <... completed>
--- /dev/null
+Parsed test spec with 3 sessions
+
+starting permutation: s1lock s2lock s1svpt s3lock s1lock2 s2c s1c s3c
+step s1lock: SELECT * FROM justthis FOR SHARE;
+value
+
+1
+step s2lock: SELECT * FROM justthis FOR SHARE;
+value
+
+1
+step s1svpt: SAVEPOINT foo;
+step s3lock: SELECT * FROM justthis FOR UPDATE; <waiting ...>
+step s1lock2: SELECT * FROM justthis FOR SHARE;
+value
+
+1
+step s2c: COMMIT;
+step s1c: COMMIT;
+step s3lock: <... completed>
+value
+
+1
+step s3c: COMMIT;
test: fk-deadlock
test: fk-deadlock2
test: eval-plan-qual
+test: lock-update-delete
+test: lock-update-traversal
+test: delete-abort-savept
+test: delete-abort-savept-2
+test: aborted-keyrevoke
test: drop-index-concurrently-1
* but it can only be unblocked by running steps from other
* sessions.
*/
+ fflush(stdout);
fprintf(stderr, "invalid permutation detected\n");
/* Cancel the waiting statement from this session. */
--- /dev/null
+# When a tuple that has been updated is locked, the locking command
+# should traverse the update chain; thus, a DELETE should not be able
+# to proceed until the lock has been released.
+
+setup
+{
+ CREATE TABLE foo (
+ key int PRIMARY KEY,
+ value int
+ );
+
+ INSERT INTO foo VALUES (1, 1);
+}
+
+teardown
+{
+ DROP TABLE foo;
+}
+
+session "s1"
+setup { BEGIN; }
+step "s1s" { SAVEPOINT f; }
+step "s1u" { UPDATE foo SET key = 2; } # obtain KEY REVOKE
+step "s1r" { ROLLBACK TO f; } # lose KEY REVOKE
+step "s1l" { SELECT * FROM foo FOR KEY SHARE; }
+step "s1c" { COMMIT; }
+
+session "s2"
+setup { BEGIN; }
+step "s2l" { SELECT * FROM foo FOR KEY SHARE; }
+step "s2c" { COMMIT; }
--- /dev/null
+# A funkier version of delete-abort-savept
+setup
+{
+ CREATE TABLE foo (
+ key INT PRIMARY KEY,
+ value INT
+ );
+
+ INSERT INTO foo VALUES (1, 1);
+}
+
+teardown
+{
+ DROP TABLE foo;
+}
+
+session "s1"
+setup { BEGIN; }
+step "s1l" { SELECT * FROM foo FOR KEY SHARE; }
+step "s1svp" { SAVEPOINT f; }
+step "s1d" { SELECT * FROM foo FOR NO KEY UPDATE; }
+step "s1r" { ROLLBACK TO f; }
+step "s1c" { COMMIT; }
+
+session "s2"
+setup { BEGIN; }
+step "s2l" { SELECT * FROM foo FOR UPDATE; }
+step "s2l2" { SELECT * FROM foo FOR NO KEY UPDATE; }
+step "s2c" { COMMIT; }
+
+permutation "s1l" "s1svp" "s1d" "s1r" "s2l" "s1c" "s2c"
+permutation "s1l" "s1svp" "s1d" "s2l" "s1r" "s1c" "s2c"
+permutation "s1l" "s1svp" "s1d" "s1r" "s2l2" "s1c" "s2c"
+permutation "s1l" "s1svp" "s1d" "s2l2" "s1r" "s1c" "s2c"
--- /dev/null
+# After rolling back a subtransaction that upgraded a lock, the previously
+# held lock should still be held.
+setup
+{
+ CREATE TABLE foo (
+ key INT PRIMARY KEY,
+ value INT
+ );
+
+ INSERT INTO foo VALUES (1, 1);
+}
+
+teardown
+{
+ DROP TABLE foo;
+}
+
+session "s1"
+setup { BEGIN; }
+step "s1l" { SELECT * FROM foo FOR KEY SHARE; }
+step "s1svp" { SAVEPOINT f; }
+step "s1d" { DELETE FROM foo; }
+step "s1r" { ROLLBACK TO f; }
+step "s1c" { COMMIT; }
+
+session "s2"
+setup { BEGIN; }
+step "s2l" { SELECT * FROM foo FOR UPDATE; }
+step "s2c" { COMMIT; }
step "s2i" { INSERT INTO child VALUES (2, 1); }
step "s2u" { UPDATE parent SET aux = 'baz'; }
step "s2c" { COMMIT; }
-
-## Most theoretical permutations require that a blocked session execute a
-## command, making them impossible in practice.
-permutation "s1i" "s1u" "s1c" "s2i" "s2u" "s2c"
-permutation "s1i" "s1u" "s2i" "s1c" "s2u" "s2c"
-#permutation "s1i" "s1u" "s2i" "s2u" "s1c" "s2c"
-#permutation "s1i" "s1u" "s2i" "s2u" "s2c" "s1c"
-#permutation "s1i" "s2i" "s1u" "s1c" "s2u" "s2c"
-permutation "s1i" "s2i" "s1u" "s2u" "s1c" "s2c"
-#permutation "s1i" "s2i" "s1u" "s2u" "s2c" "s1c"
-#permutation "s1i" "s2i" "s2u" "s1u" "s1c" "s2c"
-permutation "s1i" "s2i" "s2u" "s1u" "s2c" "s1c"
-#permutation "s1i" "s2i" "s2u" "s2c" "s1u" "s1c"
-#permutation "s2i" "s1i" "s1u" "s1c" "s2u" "s2c"
-permutation "s2i" "s1i" "s1u" "s2u" "s1c" "s2c"
-#permutation "s2i" "s1i" "s1u" "s2u" "s2c" "s1c"
-#permutation "s2i" "s1i" "s2u" "s1u" "s1c" "s2c"
-permutation "s2i" "s1i" "s2u" "s1u" "s2c" "s1c"
-#permutation "s2i" "s1i" "s2u" "s2c" "s1u" "s1c"
-#permutation "s2i" "s2u" "s1i" "s1u" "s1c" "s2c"
-#permutation "s2i" "s2u" "s1i" "s1u" "s2c" "s1c"
-permutation "s2i" "s2u" "s1i" "s2c" "s1u" "s1c"
-#permutation "s2i" "s2u" "s2c" "s1i" "s1u" "s1c"
step "s2u1" { UPDATE B SET Col2 = 1 WHERE BID = 2; }
step "s2u2" { UPDATE B SET Col2 = 1 WHERE BID = 2; }
step "s2c" { COMMIT; }
-
-## Many theoretical permutations require that a blocked session execute a
-## command, making them impossible in practice.
-permutation "s1u1" "s1u2" "s1c" "s2u1" "s2u2" "s2c"
-permutation "s1u1" "s1u2" "s2u1" "s1c" "s2u2" "s2c"
-#permutation "s1u1" "s1u2" "s2u1" "s2u2" "s1c" "s2c"
-#permutation "s1u1" "s1u2" "s2u1" "s2u2" "s2c" "s1c"
-#permutation "s1u1" "s2u1" "s1u2" "s1c" "s2u2" "s2c"
-permutation "s1u1" "s2u1" "s1u2" "s2u2" "s1c" "s2c"
-permutation "s1u1" "s2u1" "s1u2" "s2u2" "s2c" "s1c"
-permutation "s1u1" "s2u1" "s2u2" "s1u2" "s1c" "s2c"
-permutation "s1u1" "s2u1" "s2u2" "s1u2" "s2c" "s1c"
-#permutation "s1u1" "s2u1" "s2u2" "s2c" "s1u2" "s1c"
-#permutation "s2u1" "s1u1" "s1u2" "s1c" "s2u2" "s2c"
-permutation "s2u1" "s1u1" "s1u2" "s2u2" "s1c" "s2c"
-permutation "s2u1" "s1u1" "s1u2" "s2u2" "s2c" "s1c"
-permutation "s2u1" "s1u1" "s2u2" "s1u2" "s1c" "s2c"
-permutation "s2u1" "s1u1" "s2u2" "s1u2" "s2c" "s1c"
-#permutation "s2u1" "s1u1" "s2u2" "s2c" "s1u2" "s1c"
-#permutation "s2u1" "s2u2" "s1u1" "s1u2" "s1c" "s2c"
-#permutation "s2u1" "s2u2" "s1u1" "s1u2" "s2c" "s1c"
-permutation "s2u1" "s2u2" "s1u1" "s2c" "s1u2" "s1c"
-#permutation "s2u1" "s2u2" "s2c" "s1u1" "s1u2" "s1c"
--- /dev/null
+# If we update a tuple, and then delete (or update that touches the key) it,
+# and later somebody tries to come along and traverse that update chain,
+# he should get an error when locking the latest version, if the delete
+# committed; or succeed, when the deleting transaction rolls back.
+
+setup
+{
+ CREATE TABLE foo (
+ key int PRIMARY KEY,
+ value int
+ );
+
+ INSERT INTO foo VALUES (1, 1);
+}
+
+teardown
+{
+ DROP TABLE foo;
+}
+
+session "s1"
+step "s1b" { BEGIN ISOLATION LEVEL REPEATABLE READ; }
+step "s1s" { SELECT * FROM foo; } # obtain snapshot
+step "s1l" { SELECT * FROM foo FOR KEY SHARE; } # obtain lock
+step "s1c" { COMMIT; }
+
+session "s2"
+step "s2b" { BEGIN; }
+step "s2u" { UPDATE foo SET value = 2 WHERE key = 1; }
+step "s2d" { DELETE FROM foo; }
+step "s2u2" { UPDATE foo SET key = 2 WHERE key = 1; }
+step "s2c" { COMMIT; }
+step "s2r" { ROLLBACK; }
+
+permutation "s1b" "s2b" "s1s" "s2u" "s2d" "s1l" "s2c" "s1c"
+permutation "s1b" "s2b" "s1s" "s2u" "s2d" "s1l" "s2r" "s1c"
+permutation "s1b" "s2b" "s1s" "s2u" "s2u2" "s1l" "s2c" "s1c"
+permutation "s1b" "s2b" "s1s" "s2u" "s2u2" "s1l" "s2r" "s1c"
--- /dev/null
+# When a tuple that has been updated is locked, the locking command
+# should traverse the update chain; thus, a DELETE should not be able
+# to proceed until the lock has been released.
+
+setup
+{
+ CREATE TABLE foo (
+ key int PRIMARY KEY,
+ value int
+ );
+
+ INSERT INTO foo VALUES (1, 1);
+}
+
+teardown
+{
+ DROP TABLE foo;
+}
+
+session "s1"
+step "s1b" { BEGIN ISOLATION LEVEL REPEATABLE READ; }
+step "s1s" { SELECT * FROM foo; } # obtain snapshot
+step "s1l" { SELECT * FROM foo FOR KEY SHARE; } # obtain lock
+step "s1c" { COMMIT; }
+
+session "s2"
+step "s2b" { BEGIN; }
+step "s2u" { UPDATE foo SET value = 2 WHERE key = 1; }
+step "s2c" { COMMIT; }
+step "s2d" { DELETE FROM foo WHERE key = 1; }
+
+permutation "s1b" "s2b" "s1s" "s2u" "s1l" "s2c" "s2d" "s1c"
--- /dev/null
+# If we already hold a lock of a given strength, do not deadlock when
+# some other transaction is waiting for a conflicting lock and we try
+# to acquire the same lock we already held.
+setup
+{
+ CREATE TABLE justthis (
+ value int
+ );
+
+ INSERT INTO justthis VALUES (1);
+}
+
+teardown
+{
+ DROP TABLE justthis;
+}
+
+session "s1"
+setup { BEGIN; }
+step "s1lock" { SELECT * FROM justthis FOR SHARE; }
+step "s1svpt" { SAVEPOINT foo; }
+step "s1lock2" { SELECT * FROM justthis FOR SHARE; }
+step "s1c" { COMMIT; }
+
+session "s2"
+setup { BEGIN; }
+step "s2lock" { SELECT * FROM justthis FOR SHARE; } # ensure it's a multi
+step "s2c" { COMMIT; }
+
+session "s3"
+setup { BEGIN; }
+step "s3lock" { SELECT * FROM justthis FOR UPDATE; }
+step "s3c" { COMMIT; }
+
+permutation "s1lock" "s2lock" "s1svpt" "s3lock" "s1lock2" "s2c" "s1c" "s3c"