spi \
tablefunc \
tcn \
+ test_decoding \
test_parser \
test_shm_mq \
tsearch2 \
--- /dev/null
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
--- /dev/null
+# contrib/test_decoding/Makefile
+
+MODULES = test_decoding
+OBJS = test_decoding.o
+
+# Note: because we don't tell the Makefile there are any regression tests,
+# we have to clean those result files explicitly
+EXTRA_CLEAN = -r $(pg_regress_clean_files)
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/test_decoding
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+# Disabled because these tests require "wal_level=logical", which
+# typical installcheck users do not have (e.g. buildfarm clients).
+installcheck:;
+
+# But it can nonetheless be very helpful to run tests on preexisting
+# installation, allow to do so, but only if requested explicitly.
+installcheck-force: regresscheck-install-force isolationcheck-install-force
+
+check: regresscheck isolationcheck
+
+submake-regress:
+ $(MAKE) -C $(top_builddir)/src/test/regress all
+
+submake-isolation:
+ $(MAKE) -C $(top_builddir)/src/test/isolation all
+
+submake-test_decoding:
+ $(MAKE) -C $(top_builddir)/contrib/test_decoding
+
+REGRESSCHECKS=ddl rewrite toast permissions decoding_in_xact binary
+
+regresscheck: all | submake-regress submake-test_decoding
+ $(pg_regress_check) \
+ --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf \
+ --temp-install=./tmp_check \
+ --extra-install=contrib/test_decoding \
+ $(REGRESSCHECKS)
+
+regresscheck-install-force: | submake-regress submake-test_decoding
+ $(pg_regress_installcheck) \
+ --extra-install=contrib/test_decoding \
+ $(REGRESSCHECKS)
+
+ISOLATIONCHECKS=mxact delayed_startup concurrent_ddl_dml
+
+isolationcheck: all | submake-isolation submake-test_decoding
+ $(pg_isolation_regress_check) \
+ --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf \
+ --extra-install=contrib/test_decoding \
+ $(ISOLATIONCHECKS)
+
+isolationcheck-install-force: all | submake-isolation submake-test_decoding
+ $(pg_isolation_regress_installcheck) \
+ --extra-install=contrib/test_decoding \
+ $(ISOLATIONCHECKS)
+
+PHONY: submake-test_decoding submake-regress check \
+ regresscheck regresscheck-install-force \
+ isolationcheck isolationcheck-install-force
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+-- succeeds, textual plugin, textual consumer
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '0');
+ data
+------
+(0 rows)
+
+-- fails, binary plugin, textual consumer
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '1');
+ERROR: output plugin cannot produce text output
+-- succeeds, textual plugin, binary consumer
+SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '0');
+ data
+------
+(0 rows)
+
+-- succeeds, binary plugin, binary consumer
+SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '1');
+ data
+------
+(0 rows)
+
+SELECT 'init' FROM pg_drop_replication_slot('regression_slot');
+ ?column?
+----------
+ init
+(1 row)
+
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_float s1_insert_tbl2 s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_float: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE float;
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s1_commit: COMMIT;
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[double precision]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl1_float s1_insert_tbl2 s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl1_float: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; <waiting ...>
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s1_commit: COMMIT;
+step s2_alter_tbl1_float: <... completed>
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+BEGIN
+table public.pg_temp: INSERT: val1[integer]:1 val2[double precision]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_char s1_insert_tbl2 s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying;
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s1_commit: COMMIT;
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[character varying]:'1'
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl1_char s1_insert_tbl2 s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; <waiting ...>
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s1_commit: COMMIT;
+step s2_alter_tbl1_char: <... completed>
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+BEGIN
+table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1'
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s2_alter_tbl1_float s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl1_float: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; <waiting ...>
+step s1_commit: COMMIT;
+step s2_alter_tbl1_float: <... completed>
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+BEGIN
+table public.pg_temp: INSERT: val1[integer]:1 val2[double precision]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s2_alter_tbl1_char s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; <waiting ...>
+step s1_commit: COMMIT;
+step s2_alter_tbl1_char: <... completed>
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+BEGIN
+table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1'
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_float s1_insert_tbl2 s2_alter_tbl1_float s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_float: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE float;
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl1_float: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; <waiting ...>
+step s1_commit: COMMIT;
+step s2_alter_tbl1_float: <... completed>
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[double precision]:1
+COMMIT
+BEGIN
+table public.pg_temp: INSERT: val1[integer]:1 val2[double precision]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_char s1_insert_tbl2 s2_alter_tbl1_char s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying;
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; <waiting ...>
+step s1_commit: COMMIT;
+step s2_alter_tbl1_char: <... completed>
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[character varying]:'1'
+COMMIT
+BEGIN
+table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1'
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s2_alter_tbl2_char s1_begin s1_insert_tbl1 s2_alter_tbl2_text s1_insert_tbl2 s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying;
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_text: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE text;
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s1_commit: COMMIT;
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[text]:'1'
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s2_alter_tbl2_char s1_begin s1_insert_tbl1 s2_alter_tbl2_text s1_insert_tbl2 s2_alter_tbl1_char s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying;
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_text: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE text;
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; <waiting ...>
+step s1_commit: COMMIT;
+step s2_alter_tbl1_char: <... completed>
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[text]:'1'
+COMMIT
+BEGIN
+table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1'
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_boolean s1_insert_tbl2 s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_boolean: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean;
+ERROR: column "val2" cannot be cast automatically to type boolean
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s1_commit: COMMIT;
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_boolean s1_insert_tbl2 s2_alter_tbl1_boolean s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_boolean: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean;
+ERROR: column "val2" cannot be cast automatically to type boolean
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl1_boolean: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE boolean; <waiting ...>
+step s1_commit: COMMIT;
+step s2_alter_tbl1_boolean: <... completed>
+error in steps s1_commit s2_alter_tbl1_boolean: ERROR: column "val2" cannot be cast automatically to type boolean
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_add_int s1_insert_tbl2_3col s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s1_commit: COMMIT;
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s1_commit s1_begin s2_alter_tbl2_add_int s1_insert_tbl2_3col s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s1_commit: COMMIT;
+step s1_begin: BEGIN;
+step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s1_commit: COMMIT;
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_add_float s1_insert_tbl2_3col s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_add_float: ALTER TABLE tbl2 ADD COLUMN val3 FLOAT;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s1_commit: COMMIT;
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[double precision]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s1_commit s1_begin s2_alter_tbl2_add_float s1_insert_tbl2_3col s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s1_commit: COMMIT;
+step s1_begin: BEGIN;
+step s2_alter_tbl2_add_float: ALTER TABLE tbl2 ADD COLUMN val3 FLOAT;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s1_commit: COMMIT;
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[double precision]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_add_char s1_insert_tbl2_3col s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s1_commit: COMMIT;
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s1_commit s1_begin s2_alter_tbl2_add_char s1_insert_tbl2_3col s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s1_commit: COMMIT;
+step s1_begin: BEGIN;
+step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s1_commit: COMMIT;
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s2_alter_tbl2_add_int s1_begin s1_insert_tbl2_3col s2_alter_tbl2_drop_3rd_col s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER;
+step s1_begin: BEGIN;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; <waiting ...>
+step s1_commit: COMMIT;
+step s2_alter_tbl2_drop_3rd_col: <... completed>
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
+COMMIT
+BEGIN
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s2_alter_tbl2_add_int s1_begin s1_insert_tbl2_3col s2_alter_tbl2_drop_3rd_col s1_insert_tbl2 s1_commit s1_insert_tbl2 s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER;
+step s1_begin: BEGIN;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; <waiting ...>
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s1_commit: COMMIT;
+step s2_alter_tbl2_drop_3rd_col: <... completed>
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:null
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s2_alter_tbl2_add_int s1_begin s1_insert_tbl2_3col s2_alter_tbl2_drop_3rd_col s1_commit s2_get_changes s2_alter_tbl2_add_text s1_begin s1_insert_tbl2_3col s2_alter_tbl2_3rd_char s1_insert_tbl2_3col s1_commit s2_get_changes s2_alter_tbl2_3rd_int s1_insert_tbl2_3col s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER;
+step s1_begin: BEGIN;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; <waiting ...>
+step s1_commit: COMMIT;
+step s2_alter_tbl2_drop_3rd_col: <... completed>
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
+COMMIT
+BEGIN
+COMMIT
+step s2_alter_tbl2_add_text: ALTER TABLE tbl2 ADD COLUMN val3 TEXT;
+step s1_begin: BEGIN;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s2_alter_tbl2_3rd_char: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying; <waiting ...>
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s1_commit: COMMIT;
+step s2_alter_tbl2_3rd_char: <... completed>
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
+COMMIT
+BEGIN
+COMMIT
+step s2_alter_tbl2_3rd_int: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE int USING val3::integer;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+table public.pg_temp: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:null
+table public.pg_temp: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
+table public.pg_temp: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s2_alter_tbl2_add_char s1_begin s1_insert_tbl1 s1_insert_tbl2_3col s2_alter_tbl2_3rd_text s1_insert_tbl2_3col s1_commit s1_insert_tbl2_3col s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying;
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s2_alter_tbl2_3rd_text: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE text; <waiting ...>
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s1_commit: COMMIT;
+step s2_alter_tbl2_3rd_text: <... completed>
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s2_alter_tbl2_add_text s1_begin s1_insert_tbl1 s1_insert_tbl2_3col s2_alter_tbl2_3rd_char s1_insert_tbl2_3col s1_commit s1_insert_tbl2_3col s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s2_alter_tbl2_add_text: ALTER TABLE tbl2 ADD COLUMN val3 TEXT;
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s2_alter_tbl2_3rd_char: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying; <waiting ...>
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s1_commit: COMMIT;
+step s2_alter_tbl2_3rd_char: <... completed>
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s2_alter_tbl2_add_char s1_begin s1_insert_tbl1 s2_alter_tbl2_3rd_text s1_insert_tbl2_3col s1_commit s2_alter_tbl2_drop_3rd_col s1_insert_tbl2 s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying;
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_3rd_text: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE text;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s1_commit: COMMIT;
+step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3;
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1'
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s2_alter_tbl2_add_text s1_begin s1_insert_tbl1 s2_alter_tbl2_3rd_char s1_insert_tbl2_3col s1_commit s2_alter_tbl2_drop_3rd_col s1_insert_tbl2 s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s2_alter_tbl2_add_text: ALTER TABLE tbl2 ADD COLUMN val3 TEXT;
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_3rd_char: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying;
+step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1);
+step s1_commit: COMMIT;
+step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3;
+step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1);
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1'
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+?column?
+
+stop
+
+starting permutation: s1_init s2_alter_tbl2_add_char s1_begin s1_insert_tbl1 s2_alter_tbl2_drop_3rd_col s1_insert_tbl1 s1_commit s2_get_changes
+step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying;
+step s1_begin: BEGIN;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3;
+step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1);
+step s1_commit: COMMIT;
+step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0');
+data
+
+BEGIN
+COMMIT
+BEGIN
+COMMIT
+BEGIN
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1
+COMMIT
+?column?
+
+stop
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+-- fail because of an already existing slot
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ERROR: replication slot "regression_slot" already exists
+-- fail because of an invalid name
+SELECT 'init' FROM pg_create_logical_replication_slot('Invalid Name', 'test_decoding');
+ERROR: replication slot name "Invalid Name" contains invalid character
+HINT: Replication slot names may only contain letters, numbers and the underscore character.
+-- fail twice because of an invalid parameter values
+SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', 'frakbar');
+ERROR: could not parse value "frakbar" for parameter "include-xids"
+CONTEXT: slot "regression_slot", output plugin "test_decoding", in the startup callback
+SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'nonexistant-option', 'frakbar');
+ERROR: option "nonexistant-option" = "frakbar" is unknown
+CONTEXT: slot "regression_slot", output plugin "test_decoding", in the startup callback
+SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', 'frakbar');
+ERROR: could not parse value "frakbar" for parameter "include-xids"
+CONTEXT: slot "regression_slot", output plugin "test_decoding", in the startup callback
+-- succeed once
+SELECT pg_drop_replication_slot('regression_slot');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
+-- fail
+SELECT pg_drop_replication_slot('regression_slot');
+ERROR: replication slot "regression_slot" does not exist
+-- check that we're detecting a streaming rep slot used for logical decoding
+SELECT 'init' FROM pg_create_physical_replication_slot('repl');
+ ?column?
+----------
+ init
+(1 row)
+
+SELECT data FROM pg_logical_slot_get_changes('repl', NULL, NULL, 'include-xids', '0');
+ERROR: cannot use physical replication slot for logical decoding
+SELECT pg_drop_replication_slot('repl');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+/* check whether status function reports us, only reproduceable columns */
+SELECT slot_name, plugin, slot_type, active,
+ NOT catalog_xmin IS NULL AS catalog_xmin_set,
+ xmin IS NULl AS data_xmin_not_set,
+ pg_xlog_location_diff(restart_lsn, '0/01000000') > 0 AS some_wal
+FROM pg_replication_slots;
+ slot_name | plugin | slot_type | active | catalog_xmin_set | data_xmin_not_set | some_wal
+-----------------+---------------+-----------+--------+------------------+-------------------+----------
+ regression_slot | test_decoding | logical | f | t | t | t
+(1 row)
+
+/*
+ * Check that changes are handled correctly when interleaved with ddl
+ */
+CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
+BEGIN;
+INSERT INTO replication_example(somedata, text) VALUES (1, 1);
+INSERT INTO replication_example(somedata, text) VALUES (1, 2);
+COMMIT;
+ALTER TABLE replication_example ADD COLUMN bar int;
+INSERT INTO replication_example(somedata, text, bar) VALUES (2, 1, 4);
+BEGIN;
+INSERT INTO replication_example(somedata, text, bar) VALUES (2, 2, 4);
+INSERT INTO replication_example(somedata, text, bar) VALUES (2, 3, 4);
+INSERT INTO replication_example(somedata, text, bar) VALUES (2, 4, NULL);
+COMMIT;
+ALTER TABLE replication_example DROP COLUMN bar;
+INSERT INTO replication_example(somedata, text) VALUES (3, 1);
+BEGIN;
+INSERT INTO replication_example(somedata, text) VALUES (3, 2);
+INSERT INTO replication_example(somedata, text) VALUES (3, 3);
+COMMIT;
+ALTER TABLE replication_example RENAME COLUMN text TO somenum;
+INSERT INTO replication_example(somedata, somenum) VALUES (4, 1);
+-- collect all changes
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+---------------------------------------------------------------------------------------------------------------------------
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:1 somedata[integer]:1 text[character varying]:'1'
+ table public.replication_example: INSERT: id[integer]:2 somedata[integer]:1 text[character varying]:'2'
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:3 somedata[integer]:2 text[character varying]:'1' bar[integer]:4
+ COMMIT
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:4 somedata[integer]:2 text[character varying]:'2' bar[integer]:4
+ table public.replication_example: INSERT: id[integer]:5 somedata[integer]:2 text[character varying]:'3' bar[integer]:4
+ table public.replication_example: INSERT: id[integer]:6 somedata[integer]:2 text[character varying]:'4' bar[integer]:null
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:7 somedata[integer]:3 text[character varying]:'1'
+ COMMIT
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:8 somedata[integer]:3 text[character varying]:'2'
+ table public.replication_example: INSERT: id[integer]:9 somedata[integer]:3 text[character varying]:'3'
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:10 somedata[integer]:4 somenum[character varying]:'1'
+ COMMIT
+(30 rows)
+
+ALTER TABLE replication_example ALTER COLUMN somenum TYPE int4 USING (somenum::int4);
+-- throw away changes, they contain oids
+SELECT count(data) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ count
+-------
+ 12
+(1 row)
+
+INSERT INTO replication_example(somedata, somenum) VALUES (5, 1);
+BEGIN;
+INSERT INTO replication_example(somedata, somenum) VALUES (6, 1);
+ALTER TABLE replication_example ADD COLUMN zaphod1 int;
+INSERT INTO replication_example(somedata, somenum, zaphod1) VALUES (6, 2, 1);
+ALTER TABLE replication_example ADD COLUMN zaphod2 int;
+INSERT INTO replication_example(somedata, somenum, zaphod2) VALUES (6, 3, 1);
+INSERT INTO replication_example(somedata, somenum, zaphod1) VALUES (6, 4, 2);
+COMMIT;
+-- show changes
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+------------------------------------------------------------------------------------------------------------------------------------------
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:11 somedata[integer]:5 somenum[integer]:1
+ COMMIT
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:12 somedata[integer]:6 somenum[integer]:1
+ table public.replication_example: INSERT: id[integer]:13 somedata[integer]:6 somenum[integer]:2 zaphod1[integer]:1
+ table public.replication_example: INSERT: id[integer]:14 somedata[integer]:6 somenum[integer]:3 zaphod1[integer]:null zaphod2[integer]:1
+ table public.replication_example: INSERT: id[integer]:15 somedata[integer]:6 somenum[integer]:4 zaphod1[integer]:2 zaphod2[integer]:null
+ COMMIT
+(9 rows)
+
+-- hide changes bc of oid visible in full table rewrites
+CREATE TABLE tr_unique(id2 serial unique NOT NULL, data int);
+INSERT INTO tr_unique(data) VALUES(10);
+ALTER TABLE tr_unique RENAME TO tr_pkey;
+ALTER TABLE tr_pkey ADD COLUMN id serial primary key;
+SELECT count(data) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ count
+-------
+ 10
+(1 row)
+
+INSERT INTO tr_pkey(data) VALUES(1);
+--show deletion with primary key
+DELETE FROM tr_pkey;
+/* display results */
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+----------------------------------------------------------------------------
+ BEGIN
+ table public.tr_pkey: INSERT: id2[integer]:2 data[integer]:1 id[integer]:2
+ COMMIT
+ BEGIN
+ table public.tr_pkey: DELETE: id[integer]:1
+ table public.tr_pkey: DELETE: id[integer]:2
+ COMMIT
+(7 rows)
+
+/*
+ * check that disk spooling works
+ */
+BEGIN;
+CREATE TABLE tr_etoomuch (id serial primary key, data int);
+INSERT INTO tr_etoomuch(data) SELECT g.i FROM generate_series(1, 10234) g(i);
+DELETE FROM tr_etoomuch WHERE id < 5000;
+UPDATE tr_etoomuch SET data = - data WHERE id > 5000;
+COMMIT;
+/* display results, but hide most of the output */
+SELECT count(*), min(data), max(data)
+FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0')
+GROUP BY substring(data, 1, 24)
+ORDER BY 1;
+ count | min | max
+-------+-------------------------------------------------+------------------------------------------------------------------------
+ 1 | COMMIT | COMMIT
+ 1 | BEGIN | BEGIN
+ 20467 | table public.tr_etoomuch: DELETE: id[integer]:1 | table public.tr_etoomuch: UPDATE: id[integer]:9999 data[integer]:-9999
+(3 rows)
+
+/*
+ * check whether we decode subtransactions correctly in relation with each
+ * other
+ */
+CREATE TABLE tr_sub (id serial primary key, path text);
+-- toplevel, subtxn, toplevel, subtxn, subtxn
+BEGIN;
+INSERT INTO tr_sub(path) VALUES ('1-top-#1');
+SAVEPOINT a;
+INSERT INTO tr_sub(path) VALUES ('1-top-1-#1');
+INSERT INTO tr_sub(path) VALUES ('1-top-1-#2');
+RELEASE SAVEPOINT a;
+SAVEPOINT b;
+SAVEPOINT c;
+INSERT INTO tr_sub(path) VALUES ('1-top-2-1-#1');
+INSERT INTO tr_sub(path) VALUES ('1-top-2-1-#2');
+RELEASE SAVEPOINT c;
+INSERT INTO tr_sub(path) VALUES ('1-top-2-#1');
+RELEASE SAVEPOINT b;
+COMMIT;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+----------------------------------------------------------------------
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.tr_sub: INSERT: id[integer]:1 path[text]:'1-top-#1'
+ table public.tr_sub: INSERT: id[integer]:2 path[text]:'1-top-1-#1'
+ table public.tr_sub: INSERT: id[integer]:3 path[text]:'1-top-1-#2'
+ table public.tr_sub: INSERT: id[integer]:4 path[text]:'1-top-2-1-#1'
+ table public.tr_sub: INSERT: id[integer]:5 path[text]:'1-top-2-1-#2'
+ table public.tr_sub: INSERT: id[integer]:6 path[text]:'1-top-2-#1'
+ COMMIT
+(10 rows)
+
+-- check that we handle xlog assignments correctly
+BEGIN;
+-- nest 80 subtxns
+SAVEPOINT subtop;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+-- assign xid by inserting
+INSERT INTO tr_sub(path) VALUES ('2-top-1...--#1');
+INSERT INTO tr_sub(path) VALUES ('2-top-1...--#2');
+INSERT INTO tr_sub(path) VALUES ('2-top-1...--#3');
+RELEASE SAVEPOINT subtop;
+INSERT INTO tr_sub(path) VALUES ('2-top-#1');
+COMMIT;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+------------------------------------------------------------------------
+ BEGIN
+ table public.tr_sub: INSERT: id[integer]:7 path[text]:'2-top-1...--#1'
+ table public.tr_sub: INSERT: id[integer]:8 path[text]:'2-top-1...--#2'
+ table public.tr_sub: INSERT: id[integer]:9 path[text]:'2-top-1...--#3'
+ table public.tr_sub: INSERT: id[integer]:10 path[text]:'2-top-#1'
+ COMMIT
+(6 rows)
+
+-- make sure rollbacked subtransactions aren't decoded
+BEGIN;
+INSERT INTO tr_sub(path) VALUES ('3-top-2-#1');
+SAVEPOINT a;
+INSERT INTO tr_sub(path) VALUES ('3-top-2-1-#1');
+SAVEPOINT b;
+INSERT INTO tr_sub(path) VALUES ('3-top-2-2-#1');
+ROLLBACK TO SAVEPOINT b;
+INSERT INTO tr_sub(path) VALUES ('3-top-2-#2');
+COMMIT;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+-----------------------------------------------------------------------
+ BEGIN
+ table public.tr_sub: INSERT: id[integer]:11 path[text]:'3-top-2-#1'
+ table public.tr_sub: INSERT: id[integer]:12 path[text]:'3-top-2-1-#1'
+ table public.tr_sub: INSERT: id[integer]:14 path[text]:'3-top-2-#2'
+ COMMIT
+(5 rows)
+
+-- test whether a known, but not yet logged toplevel xact, followed by a
+-- subxact commit is handled correctly
+BEGIN;
+SELECT txid_current() != 0; -- so no fixed xid apears in the outfile
+ ?column?
+----------
+ t
+(1 row)
+
+SAVEPOINT a;
+INSERT INTO tr_sub(path) VALUES ('4-top-1-#1');
+RELEASE SAVEPOINT a;
+COMMIT;
+-- test whether a change in a subtransaction, in an unknown toplevel
+-- xact is handled correctly.
+BEGIN;
+SAVEPOINT a;
+INSERT INTO tr_sub(path) VALUES ('5-top-1-#1');
+COMMIT;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+---------------------------------------------------------------------
+ BEGIN
+ table public.tr_sub: INSERT: id[integer]:15 path[text]:'4-top-1-#1'
+ COMMIT
+ BEGIN
+ table public.tr_sub: INSERT: id[integer]:16 path[text]:'5-top-1-#1'
+ COMMIT
+(6 rows)
+
+/*
+ * Check whether treating a table as a catalog table works somewhat
+ */
+CREATE TABLE replication_metadata (
+ id serial primary key,
+ relation name NOT NULL,
+ options text[]
+)
+WITH (user_catalog_table = true)
+;
+\d+ replication_metadata
+ Table "public.replication_metadata"
+ Column | Type | Modifiers | Storage | Stats target | Description
+----------+---------+-------------------------------------------------------------------+----------+--------------+-------------
+ id | integer | not null default nextval('replication_metadata_id_seq'::regclass) | plain | |
+ relation | name | not null | plain | |
+ options | text[] | | extended | |
+Indexes:
+ "replication_metadata_pkey" PRIMARY KEY, btree (id)
+Has OIDs: no
+Options: user_catalog_table=true
+
+INSERT INTO replication_metadata(relation, options)
+VALUES ('foo', ARRAY['a', 'b']);
+ALTER TABLE replication_metadata RESET (user_catalog_table);
+\d+ replication_metadata
+ Table "public.replication_metadata"
+ Column | Type | Modifiers | Storage | Stats target | Description
+----------+---------+-------------------------------------------------------------------+----------+--------------+-------------
+ id | integer | not null default nextval('replication_metadata_id_seq'::regclass) | plain | |
+ relation | name | not null | plain | |
+ options | text[] | | extended | |
+Indexes:
+ "replication_metadata_pkey" PRIMARY KEY, btree (id)
+Has OIDs: no
+
+INSERT INTO replication_metadata(relation, options)
+VALUES ('bar', ARRAY['a', 'b']);
+ALTER TABLE replication_metadata SET (user_catalog_table = true);
+\d+ replication_metadata
+ Table "public.replication_metadata"
+ Column | Type | Modifiers | Storage | Stats target | Description
+----------+---------+-------------------------------------------------------------------+----------+--------------+-------------
+ id | integer | not null default nextval('replication_metadata_id_seq'::regclass) | plain | |
+ relation | name | not null | plain | |
+ options | text[] | | extended | |
+Indexes:
+ "replication_metadata_pkey" PRIMARY KEY, btree (id)
+Has OIDs: no
+Options: user_catalog_table=true
+
+INSERT INTO replication_metadata(relation, options)
+VALUES ('blub', NULL);
+-- make sure rewrites don't work
+ALTER TABLE replication_metadata ADD COLUMN rewritemeornot int;
+ALTER TABLE replication_metadata ALTER COLUMN rewritemeornot TYPE text;
+ERROR: cannot rewrite table "replication_metadata" used as a catalog table
+ALTER TABLE replication_metadata SET (user_catalog_table = false);
+\d+ replication_metadata
+ Table "public.replication_metadata"
+ Column | Type | Modifiers | Storage | Stats target | Description
+----------------+---------+-------------------------------------------------------------------+----------+--------------+-------------
+ id | integer | not null default nextval('replication_metadata_id_seq'::regclass) | plain | |
+ relation | name | not null | plain | |
+ options | text[] | | extended | |
+ rewritemeornot | integer | | plain | |
+Indexes:
+ "replication_metadata_pkey" PRIMARY KEY, btree (id)
+Has OIDs: no
+Options: user_catalog_table=false
+
+INSERT INTO replication_metadata(relation, options)
+VALUES ('zaphod', NULL);
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+------------------------------------------------------------------------------------------------------------------------------------
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.replication_metadata: INSERT: id[integer]:1 relation[name]:'foo' options[text[]]:'{a,b}'
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.replication_metadata: INSERT: id[integer]:2 relation[name]:'bar' options[text[]]:'{a,b}'
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.replication_metadata: INSERT: id[integer]:3 relation[name]:'blub' options[text[]]:null
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.replication_metadata: INSERT: id[integer]:4 relation[name]:'zaphod' options[text[]]:null rewritemeornot[integer]:null
+ COMMIT
+(22 rows)
+
+/*
+ * check whether we handle updates/deletes correct with & without a pkey
+ */
+/* we should handle the case without a key at all more gracefully */
+CREATE TABLE table_without_key(id serial, data int);
+INSERT INTO table_without_key(data) VALUES(1),(2);
+DELETE FROM table_without_key WHERE data = 1;
+-- won't log old keys
+UPDATE table_without_key SET data = 3 WHERE data = 2;
+UPDATE table_without_key SET id = -id;
+UPDATE table_without_key SET id = -id;
+-- should log the full old row now
+ALTER TABLE table_without_key REPLICA IDENTITY FULL;
+UPDATE table_without_key SET data = 3 WHERE data = 2;
+UPDATE table_without_key SET id = -id;
+UPDATE table_without_key SET id = -id;
+DELETE FROM table_without_key WHERE data = 3;
+CREATE TABLE table_with_pkey(id serial primary key, data int);
+INSERT INTO table_with_pkey(data) VALUES(1), (2);
+DELETE FROM table_with_pkey WHERE data = 1;
+-- should log the old pkey
+UPDATE table_with_pkey SET data = 3 WHERE data = 2;
+UPDATE table_with_pkey SET id = -id;
+UPDATE table_with_pkey SET id = -id;
+-- check that we log nothing despite having a pkey
+ALTER TABLE table_without_key REPLICA IDENTITY NOTHING;
+UPDATE table_with_pkey SET id = -id;
+-- check that we log everything despite having a pkey
+ALTER TABLE table_without_key REPLICA IDENTITY FULL;
+UPDATE table_with_pkey SET id = -id;
+DELETE FROM table_with_pkey WHERE data = 3;
+CREATE TABLE table_with_unique_not_null(id serial unique, data int);
+ALTER TABLE table_with_unique_not_null ALTER COLUMN id SET NOT NULL; --already set
+-- won't log anything, replica identity not setup
+INSERT INTO table_with_unique_not_null(data) VALUES(1), (2);
+DELETE FROM table_with_unique_not_null WHERE data = 1;
+UPDATE table_with_unique_not_null SET data = 3 WHERE data = 2;
+UPDATE table_with_unique_not_null SET id = -id;
+UPDATE table_with_unique_not_null SET id = -id;
+DELETE FROM table_with_unique_not_null WHERE data = 3;
+-- should log old key
+ALTER TABLE table_with_unique_not_null REPLICA IDENTITY USING INDEX table_with_unique_not_null_id_key;
+INSERT INTO table_with_unique_not_null(data) VALUES(1), (2);
+DELETE FROM table_with_unique_not_null WHERE data = 1;
+UPDATE table_with_unique_not_null SET data = 3 WHERE data = 2;
+UPDATE table_with_unique_not_null SET id = -id;
+UPDATE table_with_unique_not_null SET id = -id;
+DELETE FROM table_with_unique_not_null WHERE data = 3;
+-- check toast support
+BEGIN;
+CREATE SEQUENCE toasttable_rand_seq START 79 INCREMENT 1499; -- portable "random"
+CREATE TABLE toasttable(
+ id serial primary key,
+ toasted_col1 text,
+ rand1 float8 DEFAULT nextval('toasttable_rand_seq'),
+ toasted_col2 text,
+ rand2 float8 DEFAULT nextval('toasttable_rand_seq')
+ );
+COMMIT;
+-- uncompressed external toast data
+INSERT INTO toasttable(toasted_col1) SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i);
+-- compressed external toast data
+INSERT INTO toasttable(toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i);
+-- update of existing column
+UPDATE toasttable
+ SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i))
+WHERE id = 1;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.table_without_key: INSERT: id[integer]:1 data[integer]:1
+ table public.table_without_key: INSERT: id[integer]:2 data[integer]:2
+ COMMIT
+ BEGIN
+ table public.table_without_key: DELETE: (no-tuple-data)
+ COMMIT
+ BEGIN
+ table public.table_without_key: UPDATE: id[integer]:2 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_without_key: UPDATE: id[integer]:-2 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_without_key: UPDATE: id[integer]:2 data[integer]:3
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.table_without_key: UPDATE: old-key: id[integer]:2 data[integer]:3 new-tuple: id[integer]:-2 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_without_key: UPDATE: old-key: id[integer]:-2 data[integer]:3 new-tuple: id[integer]:2 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_without_key: DELETE: id[integer]:2 data[integer]:3
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.table_with_pkey: INSERT: id[integer]:1 data[integer]:1
+ table public.table_with_pkey: INSERT: id[integer]:2 data[integer]:2
+ COMMIT
+ BEGIN
+ table public.table_with_pkey: DELETE: id[integer]:1
+ COMMIT
+ BEGIN
+ table public.table_with_pkey: UPDATE: id[integer]:2 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_with_pkey: UPDATE: old-key: id[integer]:2 new-tuple: id[integer]:-2 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_with_pkey: UPDATE: old-key: id[integer]:-2 new-tuple: id[integer]:2 data[integer]:3
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.table_with_pkey: UPDATE: old-key: id[integer]:2 new-tuple: id[integer]:-2 data[integer]:3
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.table_with_pkey: UPDATE: old-key: id[integer]:-2 new-tuple: id[integer]:2 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_with_pkey: DELETE: id[integer]:2
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: INSERT: id[integer]:1 data[integer]:1
+ table public.table_with_unique_not_null: INSERT: id[integer]:2 data[integer]:2
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: DELETE: (no-tuple-data)
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: UPDATE: id[integer]:2 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: UPDATE: id[integer]:-2 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: UPDATE: id[integer]:2 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: DELETE: (no-tuple-data)
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: INSERT: id[integer]:3 data[integer]:1
+ table public.table_with_unique_not_null: INSERT: id[integer]:4 data[integer]:2
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: DELETE: id[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: UPDATE: id[integer]:4 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: UPDATE: old-key: id[integer]:4 new-tuple: id[integer]:-4 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: UPDATE: old-key: id[integer]:-4 new-tuple: id[integer]:4 data[integer]:3
+ COMMIT
+ BEGIN
+ table public.table_with_unique_not_null: DELETE: id[integer]:4
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.toasttable: INSERT: id[integer]:1 toasted_col1[text]:'12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000' rand1[double precision]:79 toasted_col2[text]:null rand2[double precision]:1578
+ COMMIT
+ BEGIN
+ table public.toasttable: INSERT: id[integer]:2 toasted_col1[text]:null rand1[double precision]:3077 toasted_col2[text]:'0001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500' rand2[double precision]:4576
+ COMMIT
+ BEGIN
+ table public.toasttable: UPDATE: id[integer]:1 toasted_col1[text]:'12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000' rand1[double precision]:79 toasted_col2[text]:null rand2[double precision]:1578
+ COMMIT
+(113 rows)
+
+INSERT INTO toasttable(toasted_col1) SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i);
+-- update of second column, first column unchanged
+UPDATE toasttable
+ SET toasted_col2 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i))
+WHERE id = 1;
+-- make sure we decode correctly even if the toast table is gone
+DROP TABLE toasttable;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ BEGIN
+ table public.toasttable: INSERT: id[integer]:3 toasted_col1[text]:'12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000' rand1[double precision]:6075 toasted_col2[text]:null rand2[double precision]:7574
+ COMMIT
+ BEGIN
+ table public.toasttable: UPDATE: id[integer]:1 toasted_col1[text]:unchanged-toast-datum rand1[double precision]:79 toasted_col2[text]:'12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000' rand2[double precision]:1578
+ COMMIT
+ BEGIN
+ COMMIT
+(8 rows)
+
+-- done, free logical replication slot
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+------
+(0 rows)
+
+SELECT pg_drop_replication_slot('regression_slot');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
+/* check that we aren't visible anymore now */
+SELECT * FROM pg_stat_replication;
+ pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start | backend_xmin | state | sent_location | write_location | flush_location | replay_location | sync_priority | sync_state
+-----+----------+---------+------------------+-------------+-----------------+-------------+---------------+--------------+-------+---------------+----------------+----------------+-----------------+---------------+------------
+(0 rows)
+
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+-- fail because we're creating a slot while in an xact with xid
+BEGIN;
+SELECT txid_current() = 0;
+ ?column?
+----------
+ f
+(1 row)
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ERROR: cannot create logical replication slot in transaction that has performed writes
+ROLLBACK;
+-- fail because we're creating a slot while in an subxact whose topxact has a xid
+BEGIN;
+SELECT txid_current() = 0;
+ ?column?
+----------
+ f
+(1 row)
+
+SAVEPOINT barf;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ERROR: cannot create logical replication slot in transaction that has performed writes
+ROLLBACK TO SAVEPOINT barf;
+ROLLBACK;
+-- succeed, outside tx.
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+SELECT 'stop' FROM pg_drop_replication_slot('regression_slot');
+ ?column?
+----------
+ stop
+(1 row)
+
+-- succeed, in tx without xid.
+BEGIN;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+COMMIT;
+CREATE TABLE nobarf(id serial primary key, data text);
+INSERT INTO nobarf(data) VALUES('1');
+-- decoding works in transaction with xid
+BEGIN;
+SELECT txid_current() = 0;
+ ?column?
+----------
+ f
+(1 row)
+
+-- don't show yet, haven't committed
+INSERT INTO nobarf(data) VALUES('2');
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+-----------------------------------------------------------
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.nobarf: INSERT: id[integer]:1 data[text]:'1'
+ COMMIT
+(5 rows)
+
+COMMIT;
+INSERT INTO nobarf(data) VALUES('3');
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+-----------------------------------------------------------
+ BEGIN
+ table public.nobarf: INSERT: id[integer]:2 data[text]:'2'
+ COMMIT
+ BEGIN
+ table public.nobarf: INSERT: id[integer]:3 data[text]:'3'
+ COMMIT
+(6 rows)
+
+SELECT 'stop' FROM pg_drop_replication_slot('regression_slot');
+ ?column?
+----------
+ stop
+(1 row)
+
--- /dev/null
+Parsed test spec with 2 sessions
+
+starting permutation: s1b s1w s2init s1c s2start s1b s1w s1c s2start s1b s1w s2start s1c s2start
+step s1b: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s1w: INSERT INTO do_write DEFAULT VALUES;
+step s2init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); <waiting ...>
+step s1c: COMMIT;
+step s2init: <... completed>
+?column?
+
+init
+step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
+data
+
+step s1b: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s1w: INSERT INTO do_write DEFAULT VALUES;
+step s1c: COMMIT;
+step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
+data
+
+BEGIN
+table public.do_write: INSERT: id[integer]:2
+COMMIT
+step s1b: BEGIN ISOLATION LEVEL SERIALIZABLE;
+step s1w: INSERT INTO do_write DEFAULT VALUES;
+step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
+data
+
+step s1c: COMMIT;
+step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
+data
+
+BEGIN
+table public.do_write: INSERT: id[integer]:3
+COMMIT
+?column?
+
+stop
--- /dev/null
+Parsed test spec with 3 sessions
+
+starting permutation: s0init s0start s1begin s1sharepgclass s2begin s2sharepgclass s0w s0start s2commit s1commit
+step s0init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
+data
+
+step s1begin: BEGIN;
+step s1sharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s;
+?column?
+
+t
+step s2begin: BEGIN;
+step s2sharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s;
+?column?
+
+t
+step s0w: INSERT INTO do_write DEFAULT VALUES;
+step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
+data
+
+BEGIN
+table public.do_write: INSERT: id[integer]:1
+COMMIT
+step s2commit: COMMIT;
+step s1commit: COMMIT;
+?column?
+
+stop
+
+starting permutation: s0init s0start s1begin s1keysharepgclass s2begin s2keysharepgclass s0alter s0w s0start s2commit s1commit
+step s0init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+
+init
+step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
+data
+
+step s1begin: BEGIN;
+step s1keysharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s;
+?column?
+
+t
+step s2begin: BEGIN;
+step s2keysharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s;
+?column?
+
+t
+step s0alter: ALTER TABLE do_write ADD column ts timestamptz;
+step s0w: INSERT INTO do_write DEFAULT VALUES;
+step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');
+data
+
+BEGIN
+COMMIT
+BEGIN
+table public.do_write: INSERT: id[integer]:1 ts[timestamp with time zone]:null
+COMMIT
+step s2commit: COMMIT;
+step s1commit: COMMIT;
+?column?
+
+stop
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+-- setup
+CREATE ROLE lr_normal;
+CREATE ROLE lr_superuser SUPERUSER;
+CREATE ROLE lr_replication REPLICATION;
+CREATE TABLE lr_test(data text);
+-- superuser can control replication
+SET ROLE lr_superuser;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+INSERT INTO lr_test VALUES('lr_superuser_init');
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+--------------------------------------------------------------
+ BEGIN
+ table public.lr_test: INSERT: data[text]:'lr_superuser_init'
+ COMMIT
+(3 rows)
+
+SELECT pg_drop_replication_slot('regression_slot');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
+RESET ROLE;
+-- replication user can control replication
+SET ROLE lr_replication;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+INSERT INTO lr_test VALUES('lr_superuser_init');
+ERROR: permission denied for relation lr_test
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+------
+(0 rows)
+
+SELECT pg_drop_replication_slot('regression_slot');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
+RESET ROLE;
+-- plain user *can't* can control replication
+SET ROLE lr_normal;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ERROR: must be superuser or replication role to use replication slots
+INSERT INTO lr_test VALUES('lr_superuser_init');
+ERROR: permission denied for relation lr_test
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ERROR: must be superuser or replication role to use replication slots
+SELECT pg_drop_replication_slot('regression_slot');
+ERROR: must be superuser or replication role to use replication slots
+RESET ROLE;
+-- replication users can drop superuser created slots
+SET ROLE lr_superuser;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+RESET ROLE;
+SET ROLE lr_replication;
+SELECT pg_drop_replication_slot('regression_slot');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
+RESET ROLE;
+-- normal users can't drop existing slots
+SET ROLE lr_superuser;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+RESET ROLE;
+SET ROLE lr_normal;
+SELECT pg_drop_replication_slot('regression_slot');
+ERROR: must be superuser or replication role to use replication slots
+RESET ROLE;
+-- all users can see existing slots
+SET ROLE lr_superuser;
+SELECT slot_name, plugin FROM pg_replication_slots;
+ slot_name | plugin
+-----------------+---------------
+ regression_slot | test_decoding
+(1 row)
+
+RESET ROLE;
+SET ROLE lr_replication;
+SELECT slot_name, plugin FROM pg_replication_slots;
+ slot_name | plugin
+-----------------+---------------
+ regression_slot | test_decoding
+(1 row)
+
+RESET ROLE;
+SET ROLE lr_normal;
+SELECT slot_name, plugin FROM pg_replication_slots;
+ slot_name | plugin
+-----------------+---------------
+ regression_slot | test_decoding
+(1 row)
+
+RESET ROLE;
+-- cleanup
+SELECT pg_drop_replication_slot('regression_slot');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
+DROP ROLE lr_normal;
+DROP ROLE lr_superuser;
+DROP ROLE lr_replication;
+DROP TABLE lr_test;
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+DROP TABLE IF EXISTS replication_example;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
+INSERT INTO replication_example(somedata) VALUES (1);
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+----------------------------------------------------------------------------------------------------------
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:1 somedata[integer]:1 text[character varying]:null
+ COMMIT
+(5 rows)
+
+BEGIN;
+INSERT INTO replication_example(somedata) VALUES (2);
+ALTER TABLE replication_example ADD COLUMN testcolumn1 int;
+INSERT INTO replication_example(somedata, testcolumn1) VALUES (3, 1);
+COMMIT;
+BEGIN;
+INSERT INTO replication_example(somedata) VALUES (3);
+ALTER TABLE replication_example ADD COLUMN testcolumn2 int;
+INSERT INTO replication_example(somedata, testcolumn1, testcolumn2) VALUES (4, 2, 1);
+COMMIT;
+VACUUM FULL pg_am;
+VACUUM FULL pg_amop;
+VACUUM FULL pg_proc;
+VACUUM FULL pg_opclass;
+VACUUM FULL pg_type;
+VACUUM FULL pg_index;
+VACUUM FULL pg_database;
+-- repeated rewrites that fail
+BEGIN;
+CLUSTER pg_class USING pg_class_oid_index;
+CLUSTER pg_class USING pg_class_oid_index;
+ROLLBACK;
+-- repeated rewrites that succeed
+BEGIN;
+CLUSTER pg_class USING pg_class_oid_index;
+CLUSTER pg_class USING pg_class_oid_index;
+CLUSTER pg_class USING pg_class_oid_index;
+COMMIT;
+ -- repeated rewrites in different transactions
+VACUUM FULL pg_class;
+VACUUM FULL pg_class;
+INSERT INTO replication_example(somedata, testcolumn1) VALUES (5, 3);
+BEGIN;
+INSERT INTO replication_example(somedata, testcolumn1) VALUES (6, 4);
+ALTER TABLE replication_example ADD COLUMN testcolumn3 int;
+INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (7, 5, 1);
+COMMIT;
+-- make old files go away
+CHECKPOINT;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ data
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:2 somedata[integer]:2 text[character varying]:null
+ table public.replication_example: INSERT: id[integer]:3 somedata[integer]:3 text[character varying]:null testcolumn1[integer]:1
+ COMMIT
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:4 somedata[integer]:3 text[character varying]:null testcolumn1[integer]:null
+ table public.replication_example: INSERT: id[integer]:5 somedata[integer]:4 text[character varying]:null testcolumn1[integer]:2 testcolumn2[integer]:1
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:6 somedata[integer]:5 text[character varying]:null testcolumn1[integer]:3 testcolumn2[integer]:null
+ COMMIT
+ BEGIN
+ table public.replication_example: INSERT: id[integer]:7 somedata[integer]:6 text[character varying]:null testcolumn1[integer]:4 testcolumn2[integer]:null
+ table public.replication_example: INSERT: id[integer]:8 somedata[integer]:7 text[character varying]:null testcolumn1[integer]:5 testcolumn2[integer]:null testcolumn3[integer]:1
+ COMMIT
+(35 rows)
+
+SELECT pg_drop_replication_slot('regression_slot');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
+DROP TABLE IF EXISTS replication_example;
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+DROP TABLE IF EXISTS xpto;
+NOTICE: table "xpto" does not exist, skipping
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+CREATE SEQUENCE xpto_rand_seq START 79 INCREMENT 1499; -- portable "random"
+CREATE TABLE xpto (
+ id serial primary key,
+ toasted_col1 text,
+ rand1 float8 DEFAULT nextval('xpto_rand_seq'),
+ toasted_col2 text,
+ rand2 float8 DEFAULT nextval('xpto_rand_seq')
+);
+-- uncompressed external toast data
+INSERT INTO xpto (toasted_col1, toasted_col2) SELECT string_agg(g.i::text, ''), string_agg((g.i*2)::text, '') FROM generate_series(1, 2000) g(i);
+-- compressed external toast data
+INSERT INTO xpto (toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i);
+-- update of existing column
+UPDATE xpto SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i)) WHERE id = 1;
+UPDATE xpto SET rand1 = 123.456 WHERE id = 1;
+DELETE FROM xpto WHERE id = 1;
+DROP TABLE IF EXISTS toasted_key;
+NOTICE: table "toasted_key" does not exist, skipping
+CREATE TABLE toasted_key (
+ id serial,
+ toasted_key text PRIMARY KEY,
+ toasted_col1 text,
+ toasted_col2 text
+);
+ALTER TABLE toasted_key ALTER COLUMN toasted_key SET STORAGE EXTERNAL;
+ALTER TABLE toasted_key ALTER COLUMN toasted_col1 SET STORAGE EXTERNAL;
+INSERT INTO toasted_key(toasted_key, toasted_col1) VALUES(repeat('1234567890', 200), repeat('9876543210', 200));
+-- test update of a toasted key without changing it
+UPDATE toasted_key SET toasted_col2 = toasted_col1;
+-- test update of a toasted key, changing it
+UPDATE toasted_key SET toasted_key = toasted_key || '1';
+DELETE FROM toasted_key;
+SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+ substr
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.xpto: INSERT: id[integer]:1 toasted_col1[text]:'1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
+ COMMIT
+ BEGIN
+ table public.xpto: INSERT: id[integer]:2 toasted_col1[text]:null rand1[double precision]:3077 toasted_col2[text]:'00010002000300040005000600070008000900100011001200130014001500160017001800190020002100
+ COMMIT
+ BEGIN
+ table public.xpto: UPDATE: id[integer]:1 toasted_col1[text]:'1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
+ COMMIT
+ BEGIN
+ table public.xpto: UPDATE: id[integer]:1 toasted_col1[text]:unchanged-toast-datum rand1[double precision]:123.456 toasted_col2[text]:unchanged-toast-datum rand2[double precision]:1578
+ COMMIT
+ BEGIN
+ table public.xpto: DELETE: id[integer]:1
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ COMMIT
+ BEGIN
+ table public.toasted_key: INSERT: id[integer]:1 toasted_key[text]:'1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123
+ COMMIT
+ BEGIN
+ table public.toasted_key: UPDATE: id[integer]:1 toasted_key[text]:unchanged-toast-datum toasted_col1[text]:unchanged-toast-datum toasted_col2[text]:'987654321098765432109876543210987654321098765432109
+ COMMIT
+ BEGIN
+ table public.toasted_key: UPDATE: old-key: toasted_key[text]:'123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678
+ COMMIT
+ BEGIN
+ table public.toasted_key: DELETE: toasted_key[text]:'123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567
+ COMMIT
+(37 rows)
+
+SELECT pg_drop_replication_slot('regression_slot');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
--- /dev/null
+wal_level = logical
+max_replication_slots = 4
--- /dev/null
+setup
+{
+ DROP TABLE IF EXISTS tbl1;
+ DROP TABLE IF EXISTS tbl2;
+ CREATE TABLE tbl1(val1 integer, val2 integer);
+ CREATE TABLE tbl2(val1 integer, val2 integer);
+}
+
+teardown
+{
+ DROP TABLE tbl1;
+ DROP TABLE tbl2;
+ SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
+}
+
+session "s1"
+step "s1_init" { SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); }
+step "s1_begin" { BEGIN; }
+step "s1_insert_tbl1" { INSERT INTO tbl1 (val1, val2) VALUES (1, 1); }
+step "s1_insert_tbl1_3col" { INSERT INTO tbl1 (val1, val2, val3) VALUES (1, 1, 1); }
+step "s1_insert_tbl2" { INSERT INTO tbl2 (val1, val2) VALUES (1, 1); }
+step "s1_insert_tbl2_3col" { INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); }
+step "s1_commit" { COMMIT; }
+
+session "s2"
+step "s2_alter_tbl1_float" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; }
+step "s2_alter_tbl1_char" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; }
+step "s2_alter_tbl1_text" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE text; }
+step "s2_alter_tbl1_boolean" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE boolean; }
+
+step "s2_alter_tbl1_add_int" { ALTER TABLE tbl1 ADD COLUMN val3 INTEGER; }
+step "s2_alter_tbl1_add_float" { ALTER TABLE tbl1 ADD COLUMN val3 FLOAT; }
+step "s2_alter_tbl1_add_char" { ALTER TABLE tbl1 ADD COLUMN val3 character varying; }
+step "s2_alter_tbl1_add_boolean" { ALTER TABLE tbl1 ADD COLUMN val3 BOOLEAN; }
+step "s2_alter_tbl1_add_text" { ALTER TABLE tbl1 ADD COLUMN val3 TEXT; }
+
+step "s2_alter_tbl2_float" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE float; }
+step "s2_alter_tbl2_char" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying; }
+step "s2_alter_tbl2_text" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE text; }
+step "s2_alter_tbl2_boolean" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean; }
+step "s2_alter_tbl2_text" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean; }
+
+step "s2_alter_tbl2_add_int" { ALTER TABLE tbl2 ADD COLUMN val3 INTEGER; }
+step "s2_alter_tbl2_add_float" { ALTER TABLE tbl2 ADD COLUMN val3 FLOAT; }
+step "s2_alter_tbl2_add_char" { ALTER TABLE tbl2 ADD COLUMN val3 character varying; }
+step "s2_alter_tbl2_add_boolean" { ALTER TABLE tbl2 ADD COLUMN val3 BOOLEAN; }
+step "s2_alter_tbl2_add_text" { ALTER TABLE tbl2 ADD COLUMN val3 TEXT; }
+step "s2_alter_tbl2_drop_3rd_col" { ALTER TABLE tbl2 DROP COLUMN val3; }
+step "s2_alter_tbl2_3rd_char" { ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying; }
+step "s2_alter_tbl2_3rd_text" { ALTER TABLE tbl2 ALTER COLUMN val3 TYPE text; }
+step "s2_alter_tbl2_3rd_int" { ALTER TABLE tbl2 ALTER COLUMN val3 TYPE int USING val3::integer; }
+
+step "s2_get_changes" { SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); }
+
+
+
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_float" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl1_float" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_char" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl1_char" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
+
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s2_alter_tbl1_float" "s1_commit" "s2_get_changes"
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s2_alter_tbl1_char" "s1_commit" "s2_get_changes"
+
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_float" "s1_insert_tbl2" "s2_alter_tbl1_float" "s1_commit" "s2_get_changes"
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_char" "s1_insert_tbl2" "s2_alter_tbl1_char" "s1_commit" "s2_get_changes"
+
+permutation "s1_init" "s2_alter_tbl2_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_text" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
+permutation "s1_init" "s2_alter_tbl2_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_text" "s1_insert_tbl2" "s2_alter_tbl1_char" "s1_commit" "s2_get_changes"
+
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_boolean" "s1_insert_tbl2" "s1_commit" "s2_get_changes"
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_boolean" "s1_insert_tbl2" "s2_alter_tbl1_boolean" "s1_commit" "s2_get_changes"
+
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_add_int" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s1_commit" "s1_begin" "s2_alter_tbl2_add_int" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
+
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_add_float" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s1_commit" "s1_begin" "s2_alter_tbl2_add_float" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
+
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_add_char" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
+permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s1_commit" "s1_begin" "s2_alter_tbl2_add_char" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes"
+
+permutation "s1_init" "s2_alter_tbl2_add_int" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_drop_3rd_col" "s1_commit" "s2_get_changes"
+permutation "s1_init" "s2_alter_tbl2_add_int" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl2" "s1_commit" "s1_insert_tbl2" "s2_get_changes"
+
+permutation "s1_init" "s2_alter_tbl2_add_int" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_drop_3rd_col" "s1_commit" "s2_get_changes" "s2_alter_tbl2_add_text" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_3rd_char" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes" "s2_alter_tbl2_3rd_int" "s1_insert_tbl2_3col" "s2_get_changes"
+
+permutation "s1_init" "s2_alter_tbl2_add_char" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2_3col" "s2_alter_tbl2_3rd_text" "s1_insert_tbl2_3col" "s1_commit" "s1_insert_tbl2_3col" "s2_get_changes"
+permutation "s1_init" "s2_alter_tbl2_add_text" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2_3col" "s2_alter_tbl2_3rd_char" "s1_insert_tbl2_3col" "s1_commit" "s1_insert_tbl2_3col" "s2_get_changes"
+
+permutation "s1_init" "s2_alter_tbl2_add_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_3rd_text" "s1_insert_tbl2_3col" "s1_commit" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl2" "s2_get_changes"
+permutation "s1_init" "s2_alter_tbl2_add_text" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_3rd_char" "s1_insert_tbl2_3col" "s1_commit" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl2" "s2_get_changes"
+
+permutation "s1_init" "s2_alter_tbl2_add_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl1" "s1_commit" "s2_get_changes"
--- /dev/null
+setup
+{
+ DROP TABLE IF EXISTS do_write;
+ CREATE TABLE do_write(id serial primary key);
+}
+
+teardown
+{
+ DROP TABLE do_write;
+ SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
+}
+
+session "s1"
+setup { SET synchronous_commit=on; }
+step "s1b" { BEGIN ISOLATION LEVEL SERIALIZABLE; }
+step "s1w" { INSERT INTO do_write DEFAULT VALUES; }
+step "s1c" { COMMIT; }
+session "s2"
+setup { SET synchronous_commit=on; }
+step "s2init" {SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');}
+step "s2start" {SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');}
+
+
+permutation "s1b" "s1w" "s2init" "s1c" "s2start" "s1b" "s1w" "s1c" "s2start" "s1b" "s1w" "s2start" "s1c" "s2start"
--- /dev/null
+setup
+{
+ DROP TABLE IF EXISTS do_write;
+ CREATE TABLE do_write(id serial primary key);
+}
+
+teardown
+{
+ DROP TABLE IF EXISTS do_write;
+ SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
+}
+
+session "s0"
+setup { SET synchronous_commit=on; }
+step "s0init" {SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');}
+step "s0start" {SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');}
+step "s0alter" {ALTER TABLE do_write ADD column ts timestamptz; }
+step "s0w" { INSERT INTO do_write DEFAULT VALUES; }
+
+session "s1"
+setup { SET synchronous_commit=on; }
+step "s1begin" {BEGIN;}
+step "s1sharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s; }
+step "s1keysharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s; }
+step "s1commit" {COMMIT;}
+
+session "s2"
+setup { SET synchronous_commit=on; }
+step "s2begin" {BEGIN;}
+step "s2sharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s; }
+step "s2keysharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s; }
+step "s2commit" {COMMIT;}
+
+# test that we're handling an update-only mxact xmax correctly
+permutation "s0init" "s0start" "s1begin" "s1sharepgclass" "s2begin" "s2sharepgclass" "s0w" "s0start" "s2commit" "s1commit"
+
+# test that we're handling an update-only mxact xmax correctly
+permutation "s0init" "s0start" "s1begin" "s1keysharepgclass" "s2begin" "s2keysharepgclass" "s0alter" "s0w" "s0start" "s2commit" "s1commit"
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+-- succeeds, textual plugin, textual consumer
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '0');
+-- fails, binary plugin, textual consumer
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '1');
+-- succeeds, textual plugin, binary consumer
+SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '0');
+-- succeeds, binary plugin, binary consumer
+SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '1');
+
+SELECT 'init' FROM pg_drop_replication_slot('regression_slot');
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+-- fail because of an already existing slot
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+-- fail because of an invalid name
+SELECT 'init' FROM pg_create_logical_replication_slot('Invalid Name', 'test_decoding');
+
+-- fail twice because of an invalid parameter values
+SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', 'frakbar');
+SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'nonexistant-option', 'frakbar');
+SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', 'frakbar');
+
+-- succeed once
+SELECT pg_drop_replication_slot('regression_slot');
+-- fail
+SELECT pg_drop_replication_slot('regression_slot');
+
+-- check that we're detecting a streaming rep slot used for logical decoding
+SELECT 'init' FROM pg_create_physical_replication_slot('repl');
+SELECT data FROM pg_logical_slot_get_changes('repl', NULL, NULL, 'include-xids', '0');
+SELECT pg_drop_replication_slot('repl');
+
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+
+/* check whether status function reports us, only reproduceable columns */
+SELECT slot_name, plugin, slot_type, active,
+ NOT catalog_xmin IS NULL AS catalog_xmin_set,
+ xmin IS NULl AS data_xmin_not_set,
+ pg_xlog_location_diff(restart_lsn, '0/01000000') > 0 AS some_wal
+FROM pg_replication_slots;
+
+/*
+ * Check that changes are handled correctly when interleaved with ddl
+ */
+CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
+BEGIN;
+INSERT INTO replication_example(somedata, text) VALUES (1, 1);
+INSERT INTO replication_example(somedata, text) VALUES (1, 2);
+COMMIT;
+
+ALTER TABLE replication_example ADD COLUMN bar int;
+
+INSERT INTO replication_example(somedata, text, bar) VALUES (2, 1, 4);
+
+BEGIN;
+INSERT INTO replication_example(somedata, text, bar) VALUES (2, 2, 4);
+INSERT INTO replication_example(somedata, text, bar) VALUES (2, 3, 4);
+INSERT INTO replication_example(somedata, text, bar) VALUES (2, 4, NULL);
+COMMIT;
+
+ALTER TABLE replication_example DROP COLUMN bar;
+INSERT INTO replication_example(somedata, text) VALUES (3, 1);
+
+BEGIN;
+INSERT INTO replication_example(somedata, text) VALUES (3, 2);
+INSERT INTO replication_example(somedata, text) VALUES (3, 3);
+COMMIT;
+
+ALTER TABLE replication_example RENAME COLUMN text TO somenum;
+
+INSERT INTO replication_example(somedata, somenum) VALUES (4, 1);
+
+-- collect all changes
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+ALTER TABLE replication_example ALTER COLUMN somenum TYPE int4 USING (somenum::int4);
+-- throw away changes, they contain oids
+SELECT count(data) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+INSERT INTO replication_example(somedata, somenum) VALUES (5, 1);
+
+BEGIN;
+INSERT INTO replication_example(somedata, somenum) VALUES (6, 1);
+ALTER TABLE replication_example ADD COLUMN zaphod1 int;
+INSERT INTO replication_example(somedata, somenum, zaphod1) VALUES (6, 2, 1);
+ALTER TABLE replication_example ADD COLUMN zaphod2 int;
+INSERT INTO replication_example(somedata, somenum, zaphod2) VALUES (6, 3, 1);
+INSERT INTO replication_example(somedata, somenum, zaphod1) VALUES (6, 4, 2);
+COMMIT;
+
+-- show changes
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+-- hide changes bc of oid visible in full table rewrites
+CREATE TABLE tr_unique(id2 serial unique NOT NULL, data int);
+INSERT INTO tr_unique(data) VALUES(10);
+ALTER TABLE tr_unique RENAME TO tr_pkey;
+ALTER TABLE tr_pkey ADD COLUMN id serial primary key;
+SELECT count(data) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+INSERT INTO tr_pkey(data) VALUES(1);
+--show deletion with primary key
+DELETE FROM tr_pkey;
+
+/* display results */
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+/*
+ * check that disk spooling works
+ */
+BEGIN;
+CREATE TABLE tr_etoomuch (id serial primary key, data int);
+INSERT INTO tr_etoomuch(data) SELECT g.i FROM generate_series(1, 10234) g(i);
+DELETE FROM tr_etoomuch WHERE id < 5000;
+UPDATE tr_etoomuch SET data = - data WHERE id > 5000;
+COMMIT;
+
+/* display results, but hide most of the output */
+SELECT count(*), min(data), max(data)
+FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0')
+GROUP BY substring(data, 1, 24)
+ORDER BY 1;
+
+/*
+ * check whether we decode subtransactions correctly in relation with each
+ * other
+ */
+CREATE TABLE tr_sub (id serial primary key, path text);
+
+-- toplevel, subtxn, toplevel, subtxn, subtxn
+BEGIN;
+INSERT INTO tr_sub(path) VALUES ('1-top-#1');
+
+SAVEPOINT a;
+INSERT INTO tr_sub(path) VALUES ('1-top-1-#1');
+INSERT INTO tr_sub(path) VALUES ('1-top-1-#2');
+RELEASE SAVEPOINT a;
+
+SAVEPOINT b;
+SAVEPOINT c;
+INSERT INTO tr_sub(path) VALUES ('1-top-2-1-#1');
+INSERT INTO tr_sub(path) VALUES ('1-top-2-1-#2');
+RELEASE SAVEPOINT c;
+INSERT INTO tr_sub(path) VALUES ('1-top-2-#1');
+RELEASE SAVEPOINT b;
+COMMIT;
+
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+-- check that we handle xlog assignments correctly
+BEGIN;
+-- nest 80 subtxns
+SAVEPOINT subtop;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;
+-- assign xid by inserting
+INSERT INTO tr_sub(path) VALUES ('2-top-1...--#1');
+INSERT INTO tr_sub(path) VALUES ('2-top-1...--#2');
+INSERT INTO tr_sub(path) VALUES ('2-top-1...--#3');
+RELEASE SAVEPOINT subtop;
+INSERT INTO tr_sub(path) VALUES ('2-top-#1');
+COMMIT;
+
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+-- make sure rollbacked subtransactions aren't decoded
+BEGIN;
+INSERT INTO tr_sub(path) VALUES ('3-top-2-#1');
+SAVEPOINT a;
+INSERT INTO tr_sub(path) VALUES ('3-top-2-1-#1');
+SAVEPOINT b;
+INSERT INTO tr_sub(path) VALUES ('3-top-2-2-#1');
+ROLLBACK TO SAVEPOINT b;
+INSERT INTO tr_sub(path) VALUES ('3-top-2-#2');
+COMMIT;
+
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+-- test whether a known, but not yet logged toplevel xact, followed by a
+-- subxact commit is handled correctly
+BEGIN;
+SELECT txid_current() != 0; -- so no fixed xid apears in the outfile
+SAVEPOINT a;
+INSERT INTO tr_sub(path) VALUES ('4-top-1-#1');
+RELEASE SAVEPOINT a;
+COMMIT;
+
+-- test whether a change in a subtransaction, in an unknown toplevel
+-- xact is handled correctly.
+BEGIN;
+SAVEPOINT a;
+INSERT INTO tr_sub(path) VALUES ('5-top-1-#1');
+COMMIT;
+
+
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+
+/*
+ * Check whether treating a table as a catalog table works somewhat
+ */
+CREATE TABLE replication_metadata (
+ id serial primary key,
+ relation name NOT NULL,
+ options text[]
+)
+WITH (user_catalog_table = true)
+;
+\d+ replication_metadata
+
+INSERT INTO replication_metadata(relation, options)
+VALUES ('foo', ARRAY['a', 'b']);
+
+ALTER TABLE replication_metadata RESET (user_catalog_table);
+\d+ replication_metadata
+
+INSERT INTO replication_metadata(relation, options)
+VALUES ('bar', ARRAY['a', 'b']);
+
+ALTER TABLE replication_metadata SET (user_catalog_table = true);
+\d+ replication_metadata
+
+INSERT INTO replication_metadata(relation, options)
+VALUES ('blub', NULL);
+
+-- make sure rewrites don't work
+ALTER TABLE replication_metadata ADD COLUMN rewritemeornot int;
+ALTER TABLE replication_metadata ALTER COLUMN rewritemeornot TYPE text;
+
+ALTER TABLE replication_metadata SET (user_catalog_table = false);
+\d+ replication_metadata
+
+INSERT INTO replication_metadata(relation, options)
+VALUES ('zaphod', NULL);
+
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+/*
+ * check whether we handle updates/deletes correct with & without a pkey
+ */
+
+/* we should handle the case without a key at all more gracefully */
+CREATE TABLE table_without_key(id serial, data int);
+INSERT INTO table_without_key(data) VALUES(1),(2);
+DELETE FROM table_without_key WHERE data = 1;
+-- won't log old keys
+UPDATE table_without_key SET data = 3 WHERE data = 2;
+UPDATE table_without_key SET id = -id;
+UPDATE table_without_key SET id = -id;
+-- should log the full old row now
+ALTER TABLE table_without_key REPLICA IDENTITY FULL;
+UPDATE table_without_key SET data = 3 WHERE data = 2;
+UPDATE table_without_key SET id = -id;
+UPDATE table_without_key SET id = -id;
+DELETE FROM table_without_key WHERE data = 3;
+
+CREATE TABLE table_with_pkey(id serial primary key, data int);
+INSERT INTO table_with_pkey(data) VALUES(1), (2);
+DELETE FROM table_with_pkey WHERE data = 1;
+-- should log the old pkey
+UPDATE table_with_pkey SET data = 3 WHERE data = 2;
+UPDATE table_with_pkey SET id = -id;
+UPDATE table_with_pkey SET id = -id;
+-- check that we log nothing despite having a pkey
+ALTER TABLE table_without_key REPLICA IDENTITY NOTHING;
+UPDATE table_with_pkey SET id = -id;
+-- check that we log everything despite having a pkey
+ALTER TABLE table_without_key REPLICA IDENTITY FULL;
+UPDATE table_with_pkey SET id = -id;
+DELETE FROM table_with_pkey WHERE data = 3;
+
+CREATE TABLE table_with_unique_not_null(id serial unique, data int);
+ALTER TABLE table_with_unique_not_null ALTER COLUMN id SET NOT NULL; --already set
+-- won't log anything, replica identity not setup
+INSERT INTO table_with_unique_not_null(data) VALUES(1), (2);
+DELETE FROM table_with_unique_not_null WHERE data = 1;
+UPDATE table_with_unique_not_null SET data = 3 WHERE data = 2;
+UPDATE table_with_unique_not_null SET id = -id;
+UPDATE table_with_unique_not_null SET id = -id;
+DELETE FROM table_with_unique_not_null WHERE data = 3;
+-- should log old key
+ALTER TABLE table_with_unique_not_null REPLICA IDENTITY USING INDEX table_with_unique_not_null_id_key;
+INSERT INTO table_with_unique_not_null(data) VALUES(1), (2);
+DELETE FROM table_with_unique_not_null WHERE data = 1;
+UPDATE table_with_unique_not_null SET data = 3 WHERE data = 2;
+UPDATE table_with_unique_not_null SET id = -id;
+UPDATE table_with_unique_not_null SET id = -id;
+DELETE FROM table_with_unique_not_null WHERE data = 3;
+
+-- check toast support
+BEGIN;
+CREATE SEQUENCE toasttable_rand_seq START 79 INCREMENT 1499; -- portable "random"
+CREATE TABLE toasttable(
+ id serial primary key,
+ toasted_col1 text,
+ rand1 float8 DEFAULT nextval('toasttable_rand_seq'),
+ toasted_col2 text,
+ rand2 float8 DEFAULT nextval('toasttable_rand_seq')
+ );
+COMMIT;
+-- uncompressed external toast data
+INSERT INTO toasttable(toasted_col1) SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i);
+
+-- compressed external toast data
+INSERT INTO toasttable(toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i);
+
+-- update of existing column
+UPDATE toasttable
+ SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i))
+WHERE id = 1;
+
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+INSERT INTO toasttable(toasted_col1) SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i);
+
+-- update of second column, first column unchanged
+UPDATE toasttable
+ SET toasted_col2 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i))
+WHERE id = 1;
+
+-- make sure we decode correctly even if the toast table is gone
+DROP TABLE toasttable;
+
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+-- done, free logical replication slot
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+SELECT pg_drop_replication_slot('regression_slot');
+
+/* check that we aren't visible anymore now */
+SELECT * FROM pg_stat_replication;
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+
+-- fail because we're creating a slot while in an xact with xid
+BEGIN;
+SELECT txid_current() = 0;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ROLLBACK;
+
+-- fail because we're creating a slot while in an subxact whose topxact has a xid
+BEGIN;
+SELECT txid_current() = 0;
+SAVEPOINT barf;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+ROLLBACK TO SAVEPOINT barf;
+ROLLBACK;
+
+-- succeed, outside tx.
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+SELECT 'stop' FROM pg_drop_replication_slot('regression_slot');
+
+-- succeed, in tx without xid.
+BEGIN;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+COMMIT;
+
+CREATE TABLE nobarf(id serial primary key, data text);
+INSERT INTO nobarf(data) VALUES('1');
+
+-- decoding works in transaction with xid
+BEGIN;
+SELECT txid_current() = 0;
+-- don't show yet, haven't committed
+INSERT INTO nobarf(data) VALUES('2');
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+COMMIT;
+
+INSERT INTO nobarf(data) VALUES('3');
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+SELECT 'stop' FROM pg_drop_replication_slot('regression_slot');
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+
+-- setup
+CREATE ROLE lr_normal;
+CREATE ROLE lr_superuser SUPERUSER;
+CREATE ROLE lr_replication REPLICATION;
+CREATE TABLE lr_test(data text);
+
+-- superuser can control replication
+SET ROLE lr_superuser;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+INSERT INTO lr_test VALUES('lr_superuser_init');
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+SELECT pg_drop_replication_slot('regression_slot');
+RESET ROLE;
+
+-- replication user can control replication
+SET ROLE lr_replication;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+INSERT INTO lr_test VALUES('lr_superuser_init');
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+SELECT pg_drop_replication_slot('regression_slot');
+RESET ROLE;
+
+-- plain user *can't* can control replication
+SET ROLE lr_normal;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+INSERT INTO lr_test VALUES('lr_superuser_init');
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+SELECT pg_drop_replication_slot('regression_slot');
+RESET ROLE;
+
+-- replication users can drop superuser created slots
+SET ROLE lr_superuser;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+RESET ROLE;
+SET ROLE lr_replication;
+SELECT pg_drop_replication_slot('regression_slot');
+RESET ROLE;
+
+-- normal users can't drop existing slots
+SET ROLE lr_superuser;
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+RESET ROLE;
+SET ROLE lr_normal;
+SELECT pg_drop_replication_slot('regression_slot');
+RESET ROLE;
+
+-- all users can see existing slots
+SET ROLE lr_superuser;
+SELECT slot_name, plugin FROM pg_replication_slots;
+RESET ROLE;
+
+SET ROLE lr_replication;
+SELECT slot_name, plugin FROM pg_replication_slots;
+RESET ROLE;
+
+SET ROLE lr_normal;
+SELECT slot_name, plugin FROM pg_replication_slots;
+RESET ROLE;
+
+-- cleanup
+SELECT pg_drop_replication_slot('regression_slot');
+
+DROP ROLE lr_normal;
+DROP ROLE lr_superuser;
+DROP ROLE lr_replication;
+DROP TABLE lr_test;
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+
+DROP TABLE IF EXISTS replication_example;
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
+INSERT INTO replication_example(somedata) VALUES (1);
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+
+BEGIN;
+INSERT INTO replication_example(somedata) VALUES (2);
+ALTER TABLE replication_example ADD COLUMN testcolumn1 int;
+INSERT INTO replication_example(somedata, testcolumn1) VALUES (3, 1);
+COMMIT;
+
+BEGIN;
+INSERT INTO replication_example(somedata) VALUES (3);
+ALTER TABLE replication_example ADD COLUMN testcolumn2 int;
+INSERT INTO replication_example(somedata, testcolumn1, testcolumn2) VALUES (4, 2, 1);
+COMMIT;
+
+VACUUM FULL pg_am;
+VACUUM FULL pg_amop;
+VACUUM FULL pg_proc;
+VACUUM FULL pg_opclass;
+VACUUM FULL pg_type;
+VACUUM FULL pg_index;
+VACUUM FULL pg_database;
+
+-- repeated rewrites that fail
+BEGIN;
+CLUSTER pg_class USING pg_class_oid_index;
+CLUSTER pg_class USING pg_class_oid_index;
+ROLLBACK;
+
+-- repeated rewrites that succeed
+BEGIN;
+CLUSTER pg_class USING pg_class_oid_index;
+CLUSTER pg_class USING pg_class_oid_index;
+CLUSTER pg_class USING pg_class_oid_index;
+COMMIT;
+
+ -- repeated rewrites in different transactions
+VACUUM FULL pg_class;
+VACUUM FULL pg_class;
+
+INSERT INTO replication_example(somedata, testcolumn1) VALUES (5, 3);
+
+BEGIN;
+INSERT INTO replication_example(somedata, testcolumn1) VALUES (6, 4);
+ALTER TABLE replication_example ADD COLUMN testcolumn3 int;
+INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (7, 5, 1);
+COMMIT;
+
+-- make old files go away
+CHECKPOINT;
+
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+SELECT pg_drop_replication_slot('regression_slot');
+
+DROP TABLE IF EXISTS replication_example;
--- /dev/null
+-- predictability
+SET synchronous_commit = on;
+
+DROP TABLE IF EXISTS xpto;
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding');
+
+CREATE SEQUENCE xpto_rand_seq START 79 INCREMENT 1499; -- portable "random"
+CREATE TABLE xpto (
+ id serial primary key,
+ toasted_col1 text,
+ rand1 float8 DEFAULT nextval('xpto_rand_seq'),
+ toasted_col2 text,
+ rand2 float8 DEFAULT nextval('xpto_rand_seq')
+);
+
+-- uncompressed external toast data
+INSERT INTO xpto (toasted_col1, toasted_col2) SELECT string_agg(g.i::text, ''), string_agg((g.i*2)::text, '') FROM generate_series(1, 2000) g(i);
+
+-- compressed external toast data
+INSERT INTO xpto (toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i);
+
+-- update of existing column
+UPDATE xpto SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i)) WHERE id = 1;
+
+UPDATE xpto SET rand1 = 123.456 WHERE id = 1;
+
+DELETE FROM xpto WHERE id = 1;
+
+DROP TABLE IF EXISTS toasted_key;
+CREATE TABLE toasted_key (
+ id serial,
+ toasted_key text PRIMARY KEY,
+ toasted_col1 text,
+ toasted_col2 text
+);
+
+ALTER TABLE toasted_key ALTER COLUMN toasted_key SET STORAGE EXTERNAL;
+ALTER TABLE toasted_key ALTER COLUMN toasted_col1 SET STORAGE EXTERNAL;
+
+INSERT INTO toasted_key(toasted_key, toasted_col1) VALUES(repeat('1234567890', 200), repeat('9876543210', 200));
+
+-- test update of a toasted key without changing it
+UPDATE toasted_key SET toasted_col2 = toasted_col1;
+-- test update of a toasted key, changing it
+UPDATE toasted_key SET toasted_key = toasted_key || '1';
+
+DELETE FROM toasted_key;
+
+SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0');
+SELECT pg_drop_replication_slot('regression_slot');
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * test_decoding.c
+ * example logical decoding output plugin
+ *
+ * Copyright (c) 2012-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/test_decoding/test_decoding.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/sysattr.h"
+
+#include "catalog/pg_class.h"
+#include "catalog/pg_type.h"
+
+#include "nodes/parsenodes.h"
+
+#include "replication/output_plugin.h"
+#include "replication/logical.h"
+
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/relcache.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+
+PG_MODULE_MAGIC;
+
+extern void _PG_init(void);
+extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
+
+typedef struct
+{
+ MemoryContext context;
+ bool include_xids;
+ bool include_timestamp;
+} TestDecodingData;
+
+/* These must be available to pg_dlsym() */
+static void pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
+ bool is_init);
+static void pg_decode_shutdown(LogicalDecodingContext *ctx);
+static void pg_decode_begin_txn(LogicalDecodingContext *ctx,
+ ReorderBufferTXN *txn);
+static void pg_decode_commit_txn(LogicalDecodingContext *ctx,
+ ReorderBufferTXN *txn, XLogRecPtr commit_lsn);
+static void pg_decode_change(LogicalDecodingContext *ctx,
+ ReorderBufferTXN *txn, Relation rel,
+ ReorderBufferChange *change);
+
+void
+_PG_init(void)
+{
+ /* other plugins can perform things here */
+}
+
+/* specify output plugin callbacks */
+void
+_PG_output_plugin_init(OutputPluginCallbacks *cb)
+{
+ AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit);
+
+ cb->startup_cb = pg_decode_startup;
+ cb->begin_cb = pg_decode_begin_txn;
+ cb->change_cb = pg_decode_change;
+ cb->commit_cb = pg_decode_commit_txn;
+ cb->shutdown_cb = pg_decode_shutdown;
+}
+
+
+/* initialize this plugin */
+static void
+pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
+ bool is_init)
+{
+ ListCell *option;
+ TestDecodingData *data;
+
+ data = palloc(sizeof(TestDecodingData));
+ data->context = AllocSetContextCreate(ctx->context,
+ "text conversion context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ data->include_xids = true;
+ data->include_timestamp = false;
+
+ ctx->output_plugin_private = data;
+
+ opt->output_type = OUTPUT_PLUGIN_TEXTUAL_OUTPUT;
+
+ foreach(option, ctx->output_plugin_options)
+ {
+ DefElem *elem = lfirst(option);
+
+ Assert(elem->arg == NULL || IsA(elem->arg, String));
+
+ if (strcmp(elem->defname, "include-xids") == 0)
+ {
+ /* if option does not provide a value, it means its value is true */
+ if (elem->arg == NULL)
+ data->include_xids = true;
+ else if (!parse_bool(strVal(elem->arg), &data->include_xids))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not parse value \"%s\" for parameter \"%s\"",
+ strVal(elem->arg), elem->defname)));
+ }
+ else if (strcmp(elem->defname, "include-timestamp") == 0)
+ {
+ if (elem->arg == NULL)
+ data->include_timestamp = true;
+ else if (!parse_bool(strVal(elem->arg), &data->include_timestamp))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not parse value \"%s\" for parameter \"%s\"",
+ strVal(elem->arg), elem->defname)));
+ }
+ else if (strcmp(elem->defname, "force-binary") == 0)
+ {
+ bool force_binary;
+
+ if (elem->arg == NULL)
+ continue;
+ else if (!parse_bool(strVal(elem->arg), &force_binary))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not parse value \"%s\" for parameter \"%s\"",
+ strVal(elem->arg), elem->defname)));
+
+ if (force_binary)
+ opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT;
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("option \"%s\" = \"%s\" is unknown",
+ elem->defname,
+ elem->arg ? strVal(elem->arg) : "(null)")));
+ }
+ }
+}
+
+/* cleanup this plugin's resources */
+static void
+pg_decode_shutdown(LogicalDecodingContext *ctx)
+{
+ TestDecodingData *data = ctx->output_plugin_private;
+
+ /* cleanup our own resources via memory context reset */
+ MemoryContextDelete(data->context);
+}
+
+/* BEGIN callback */
+static void
+pg_decode_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
+{
+ TestDecodingData *data = ctx->output_plugin_private;
+
+ OutputPluginPrepareWrite(ctx, true);
+ if (data->include_xids)
+ appendStringInfo(ctx->out, "BEGIN %u", txn->xid);
+ else
+ appendStringInfoString(ctx->out, "BEGIN");
+ OutputPluginWrite(ctx, true);
+}
+
+/* COMMIT callback */
+static void
+pg_decode_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
+ XLogRecPtr commit_lsn)
+{
+ TestDecodingData *data = ctx->output_plugin_private;
+
+ OutputPluginPrepareWrite(ctx, true);
+ if (data->include_xids)
+ appendStringInfo(ctx->out, "COMMIT %u", txn->xid);
+ else
+ appendStringInfoString(ctx->out, "COMMIT");
+
+ if (data->include_timestamp)
+ appendStringInfo(ctx->out, " (at %s)",
+ timestamptz_to_str(txn->commit_time));
+
+ OutputPluginWrite(ctx, true);
+}
+
+/*
+ * Print literal `outputstr' already represented as string of type `typid'
+ * into stringbuf `s'.
+ *
+ * Some builtin types aren't quoted, the rest is quoted. Escaping is done as
+ * if standard_conforming_strings were enabled.
+ */
+static void
+print_literal(StringInfo s, Oid typid, char *outputstr)
+{
+ const char *valptr;
+
+ switch (typid)
+ {
+ case INT2OID:
+ case INT4OID:
+ case INT8OID:
+ case OIDOID:
+ case FLOAT4OID:
+ case FLOAT8OID:
+ case NUMERICOID:
+ /* NB: We don't care about Inf, NaN et al. */
+ appendStringInfoString(s, outputstr);
+ break;
+
+ case BITOID:
+ case VARBITOID:
+ appendStringInfo(s, "B'%s'", outputstr);
+ break;
+
+ case BOOLOID:
+ if (strcmp(outputstr, "t") == 0)
+ appendStringInfoString(s, "true");
+ else
+ appendStringInfoString(s, "false");
+ break;
+
+ default:
+ appendStringInfoChar(s, '\'');
+ for (valptr = outputstr; *valptr; valptr++)
+ {
+ char ch = *valptr;
+
+ if (SQL_STR_DOUBLE(ch, false))
+ appendStringInfoChar(s, ch);
+ appendStringInfoChar(s, ch);
+ }
+ appendStringInfoChar(s, '\'');
+ break;
+ }
+}
+
+/* print the tuple 'tuple' into the StringInfo s */
+static void
+tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple, bool skip_nulls)
+{
+ int natt;
+ Oid oid;
+
+ /* print oid of tuple, it's not included in the TupleDesc */
+ if ((oid = HeapTupleHeaderGetOid(tuple->t_data)) != InvalidOid)
+ {
+ appendStringInfo(s, " oid[oid]:%u", oid);
+ }
+
+ /* print all columns individually */
+ for (natt = 0; natt < tupdesc->natts; natt++)
+ {
+ Form_pg_attribute attr; /* the attribute itself */
+ Oid typid; /* type of current attribute */
+ Oid typoutput; /* output function */
+ bool typisvarlena;
+ Datum origval; /* possibly toasted Datum */
+ bool isnull; /* column is null? */
+
+ attr = tupdesc->attrs[natt];
+
+ /*
+ * don't print dropped columns, we can't be sure everything is
+ * available for them
+ */
+ if (attr->attisdropped)
+ continue;
+
+ /*
+ * Don't print system columns, oid will already have been printed if
+ * present.
+ */
+ if (attr->attnum < 0)
+ continue;
+
+ typid = attr->atttypid;
+
+ /* get Datum from tuple */
+ origval = fastgetattr(tuple, natt + 1, tupdesc, &isnull);
+
+ if (isnull && skip_nulls)
+ continue;
+
+ /* print attribute name */
+ appendStringInfoChar(s, ' ');
+ appendStringInfoString(s, quote_identifier(NameStr(attr->attname)));
+
+ /* print attribute type */
+ appendStringInfoChar(s, '[');
+ appendStringInfoString(s, format_type_be(typid));
+ appendStringInfoChar(s, ']');
+
+ /* query output function */
+ getTypeOutputInfo(typid,
+ &typoutput, &typisvarlena);
+
+ /* print separator */
+ appendStringInfoChar(s, ':');
+
+ /* print data */
+ if (isnull)
+ appendStringInfoString(s, "null");
+ else if (typisvarlena && VARATT_IS_EXTERNAL_ONDISK(origval))
+ appendStringInfoString(s, "unchanged-toast-datum");
+ else if (!typisvarlena)
+ print_literal(s, typid,
+ OidOutputFunctionCall(typoutput, origval));
+ else
+ {
+ Datum val; /* definitely detoasted Datum */
+ val = PointerGetDatum(PG_DETOAST_DATUM(origval));
+ print_literal(s, typid, OidOutputFunctionCall(typoutput, val));
+ }
+ }
+}
+
+/*
+ * callback for individual changed tuples
+ */
+static void
+pg_decode_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
+ Relation relation, ReorderBufferChange *change)
+{
+ TestDecodingData *data;
+ Form_pg_class class_form;
+ TupleDesc tupdesc;
+ MemoryContext old;
+
+ data = ctx->output_plugin_private;
+ class_form = RelationGetForm(relation);
+ tupdesc = RelationGetDescr(relation);
+
+ /* Avoid leaking memory by using and resetting our own context */
+ old = MemoryContextSwitchTo(data->context);
+
+ OutputPluginPrepareWrite(ctx, true);
+
+ appendStringInfoString(ctx->out, "table ");
+ appendStringInfoString(ctx->out,
+ quote_qualified_identifier(
+ get_namespace_name(
+ get_rel_namespace(RelationGetRelid(relation))),
+ NameStr(class_form->relname)));
+ appendStringInfoString(ctx->out, ":");
+
+ switch (change->action)
+ {
+ case REORDER_BUFFER_CHANGE_INSERT:
+ appendStringInfoString(ctx->out, " INSERT:");
+ if (change->tp.newtuple == NULL)
+ appendStringInfoString(ctx->out, " (no-tuple-data)");
+ else
+ tuple_to_stringinfo(ctx->out, tupdesc,
+ &change->tp.newtuple->tuple,
+ false);
+ break;
+ case REORDER_BUFFER_CHANGE_UPDATE:
+ appendStringInfoString(ctx->out, " UPDATE:");
+ if (change->tp.oldtuple != NULL)
+ {
+ appendStringInfoString(ctx->out, " old-key:");
+ tuple_to_stringinfo(ctx->out, tupdesc,
+ &change->tp.oldtuple->tuple,
+ true);
+ appendStringInfoString(ctx->out, " new-tuple:");
+ }
+
+ if (change->tp.newtuple == NULL)
+ appendStringInfoString(ctx->out, " (no-tuple-data)");
+ else
+ tuple_to_stringinfo(ctx->out, tupdesc,
+ &change->tp.newtuple->tuple,
+ false);
+ break;
+ case REORDER_BUFFER_CHANGE_DELETE:
+ appendStringInfoString(ctx->out, " DELETE:");
+
+ /* if there was no PK, we only know that a delete happened */
+ if (change->tp.oldtuple == NULL)
+ appendStringInfoString(ctx->out, " (no-tuple-data)");
+ /* In DELETE, only the replica identity is present; display that */
+ else
+ tuple_to_stringinfo(ctx->out, tupdesc,
+ &change->tp.oldtuple->tuple,
+ true);
+ break;
+ }
+
+ MemoryContextSwitchTo(old);
+ MemoryContextReset(data->context);
+
+ OutputPluginWrite(ctx, true);
+}
&sslinfo;
&tablefunc;
&tcn;
+ &test-decoding;
&test-parser;
&test-shm-mq;
&tsearch2;
<!ENTITY sslinfo SYSTEM "sslinfo.sgml">
<!ENTITY tablefunc SYSTEM "tablefunc.sgml">
<!ENTITY tcn SYSTEM "tcn.sgml">
+<!ENTITY test-decoding SYSTEM "test-decoding.sgml">
<!ENTITY test-parser SYSTEM "test-parser.sgml">
<!ENTITY test-shm-mq SYSTEM "test-shm-mq.sgml">
<!ENTITY tsearch2 SYSTEM "tsearch2.sgml">
--- /dev/null
+<!-- doc/src/sgml/test-decoding.sgml -->
+
+<sect1 id="test-decoding" xreflabel="test_decoding">
+ <title>test_decoding</title>
+
+ <indexterm zone="test-decoding">
+ <primary>test_decoding</primary>
+ </indexterm>
+
+ <para>
+ <filename>test_decoding</> is an example of a logical decoding
+ output plugin. It doesn't do anything especially useful, but can serve as
+ a starting point for developing your own decoder.
+ </para>
+
+ <para>
+ <filename>test_decoding</> receives WAL through the logical decoding
+ mechanism and decodes it into text representations of the operations
+ performed.
+ </para>
+
+ <para>
+ Typical output from this plugin, used over the SQL logical decoding
+ interface, might be:
+
+ <programlisting>
+postgres=# SELECT * FROM pg_logical_slot_get_changes('test_slot', 'now', 'include-xids', '0');
+ location | xid | data
+-----------+-----+--------------------------------------------------
+ 0/16D30F8 | 691 | BEGIN
+ 0/16D32A0 | 691 | table public.data: INSERT: id[int4]:2 data[text]:'arg'
+ 0/16D32A0 | 691 | table public.data: INSERT: id[int4]:3 data[text]:'demo'
+ 0/16D32A0 | 691 | COMMIT
+ 0/16D32D8 | 692 | BEGIN
+ 0/16D3398 | 692 | table public.data: DELETE: id[int4]:2
+ 0/16D3398 | 692 | table public.data: DELETE: id[int4]:3
+ 0/16D3398 | 692 | COMMIT
+(8 rows)
+ </programlisting>
+ </para>
+
+</sect1>
pg_regress_clean_files = results/ regression.diffs regression.out tmp_check/ log/
+pg_isolation_regress_check = $(top_builddir)/src/test/isolation/pg_isolation_regress --inputdir=$(srcdir) --temp-install=./tmp_check --top-builddir=$(top_builddir) $(pg_regress_locale_flags)
+pg_isolation_regress_installcheck = $(top_builddir)/src/test/isolation/pg_isolation_regress --inputdir=$(srcdir) --top-builddir=$(top_builddir) $(pg_regress_locale_flags)
##########################################################################
#
/*
* Prune and repair fragmentation for the whole page, if possible.
*/
- Assert(TransactionIdIsValid(RecentGlobalXmin));
- heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin);
+ heap_page_prune_opt(scan->rs_rd, buffer);
/*
* We must hold share lock on the buffer content while examining tuple
*/
if (!skip)
{
+ /*
+ * For the benefit of logical decoding, have t_self point at the
+ * element of the HOT chain we're currently investigating instead
+ * of the root tuple of the HOT chain. This is important because
+ * the *Satisfies routine for historical mvcc snapshots needs the
+ * correct tid to decide about the visibility in some cases.
+ */
+ ItemPointerSet(&(heapTuple->t_self), BufferGetBlockNumber(buffer), offnum);
+
/* If it's visible per the snapshot, we must return it */
valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
CheckForSerializableConflictOut(valid, relation, heapTuple,
buffer, snapshot);
+ /* reset to original, non-redirected, tid */
+ heapTuple->t_self = *tid;
+
if (valid)
{
ItemPointerSetOffsetNumber(tid, offnum);
* decoding.
*/
break;
+ case XLOG_HEAP2_REWRITE:
+ heap_xlog_logical_rewrite(lsn, record);
+ break;
default:
elog(PANIC, "heap2_redo: unknown op code %u", info);
}
#include "access/heapam_xlog.h"
#include "access/transam.h"
#include "access/htup_details.h"
+#include "catalog/catalog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
+#include "utils/snapmgr.h"
#include "utils/rel.h"
#include "utils/tqual.h"
-
/* Working data for heap_page_prune and subroutines */
typedef struct
{
* or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
*/
void
-heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
+heap_page_prune_opt(Relation relation, Buffer buffer)
{
Page page = BufferGetPage(buffer);
Size minfree;
+ TransactionId OldestXmin;
+
+ /*
+ * We can't write WAL in recovery mode, so there's no point trying to
+ * clean the page. The master will likely issue a cleaning WAL record soon
+ * anyway, so this is no particular loss.
+ */
+ if (RecoveryInProgress())
+ return;
+
+ /*
+ * Use the appropriate xmin horizon for this relation. If it's a proper
+ * catalog relation or a user defined, additional, catalog relation, we
+ * need to use the horizon that includes slots, otherwise the data-only
+ * horizon can be used. Note that the toast relation of user defined
+ * relations are *not* considered catalog relations.
+ */
+ if (IsCatalogRelation(relation) ||
+ RelationIsAccessibleInLogicalDecoding(relation))
+ OldestXmin = RecentGlobalXmin;
+ else
+ OldestXmin = RecentGlobalDataXmin;
+
+ Assert(TransactionIdIsValid(OldestXmin));
/*
* Let's see if we really need pruning.
if (!PageIsPrunable(page, OldestXmin))
return;
- /*
- * We can't write WAL in recovery mode, so there's no point trying to
- * clean the page. The master will likely issue a cleaning WAL record soon
- * anyway, so this is no particular loss.
- */
- if (RecoveryInProgress())
- return;
-
/*
* We prune when a previous UPDATE failed to find enough space on the page
* for a new tuple version, or when free space falls below the relation's
*/
#include "postgres.h"
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "miscadmin.h"
+
#include "access/heapam.h"
#include "access/heapam_xlog.h"
#include "access/rewriteheap.h"
#include "access/transam.h"
#include "access/tuptoaster.h"
+#include "access/xact.h"
+
+#include "catalog/catalog.h"
+
+#include "lib/ilist.h"
+
+#include "replication/logical.h"
+#include "replication/slot.h"
+
#include "storage/bufmgr.h"
+#include "storage/fd.h"
#include "storage/smgr.h"
+
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/tqual.h"
+#include "storage/procarray.h"
/*
* State associated with a rewrite operation. This is opaque to the user
*/
typedef struct RewriteStateData
{
+ Relation rs_old_rel; /* source heap */
Relation rs_new_rel; /* destination heap */
Page rs_buffer; /* page currently being built */
BlockNumber rs_blockno; /* block where page will go */
bool rs_buffer_valid; /* T if any tuples in buffer */
bool rs_use_wal; /* must we WAL-log inserts? */
+ bool rs_logical_rewrite; /* do we need to do logical rewriting */
TransactionId rs_oldest_xmin; /* oldest xmin used by caller to
* determine tuple visibility */
TransactionId rs_freeze_xid;/* Xid that will be used as freeze cutoff
* point */
+ TransactionId rs_logical_xmin; /* Xid that will be used as cutoff
+ * point for logical rewrites */
MultiXactId rs_cutoff_multi;/* MultiXactId that will be used as cutoff
* point for multixacts */
MemoryContext rs_cxt; /* for hash tables and entries and tuples in
* them */
+ XLogRecPtr rs_begin_lsn; /* XLogInsertLsn when starting the rewrite */
HTAB *rs_unresolved_tups; /* unmatched A tuples */
HTAB *rs_old_new_tid_map; /* unmatched B tuples */
+ HTAB *rs_logical_mappings; /* logical remapping files */
+ uint32 rs_num_rewrite_mappings; /* # in memory mappings */
} RewriteStateData;
/*
typedef OldToNewMappingData *OldToNewMapping;
+/*
+ * In-Memory data for a xid that might need logical remapping entries
+ * to be logged.
+ */
+typedef struct RewriteMappingFile
+{
+ TransactionId xid; /* xid that might need to see the row */
+ int vfd; /* fd of mappings file */
+ off_t off; /* how far have we written yet */
+ uint32 num_mappings; /* number of in-memory mappings */
+ dlist_head mappings; /* list of in-memory mappings */
+ char path[MAXPGPATH]; /* path, for error messages */
+} RewriteMappingFile;
+
+/*
+ * A single In-Memeory logical rewrite mapping, hanging of
+ * RewriteMappingFile->mappings.
+ */
+typedef struct RewriteMappingDataEntry
+{
+ LogicalRewriteMappingData map; /* map between old and new location of
+ * the tuple */
+ dlist_node node;
+} RewriteMappingDataEntry;
+
/* prototypes for internal functions */
static void raw_heap_insert(RewriteState state, HeapTuple tup);
+/* internal logical remapping prototypes */
+static void logical_begin_heap_rewrite(RewriteState state);
+static void logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid, HeapTuple new_tuple);
+static void logical_end_heap_rewrite(RewriteState state);
+
/*
* Begin a rewrite of a table
*
+ * old_heap old, locked heap relation tuples will be read from
* new_heap new, locked heap relation to insert tuples to
* oldest_xmin xid used by the caller to determine which tuples are dead
* freeze_xid xid before which tuples will be frozen
* to be used in subsequent calls to the other functions.
*/
RewriteState
-begin_heap_rewrite(Relation new_heap, TransactionId oldest_xmin,
+begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin,
TransactionId freeze_xid, MultiXactId cutoff_multi,
bool use_wal)
{
/* Create and fill in the state struct */
state = palloc0(sizeof(RewriteStateData));
+ state->rs_old_rel = old_heap;
state->rs_new_rel = new_heap;
state->rs_buffer = (Page) palloc(BLCKSZ);
/* new_heap needn't be empty, just locked */
MemoryContextSwitchTo(old_cxt);
+ logical_begin_heap_rewrite(state);
+
return state;
}
if (RelationNeedsWAL(state->rs_new_rel))
heap_sync(state->rs_new_rel);
+ logical_end_heap_rewrite(state);
+
/* Deleting the context frees everything */
MemoryContextDelete(state->rs_cxt);
}
raw_heap_insert(state, new_tuple);
new_tid = new_tuple->t_self;
+ logical_rewrite_heap_tuple(state, old_tid, new_tuple);
+
/*
* If the tuple is the updated version of a row, and the prior version
* wouldn't be DEAD yet, then we need to either resolve the prior
if (heaptup != tup)
heap_freetuple(heaptup);
}
+
+/* ------------------------------------------------------------------------
+ * Logical rewrite support
+ *
+ * When doing logical decoding - which relies on using cmin/cmax of catalog
+ * tuples, via xl_heap_new_cid records - heap rewrites have to log enough
+ * information to allow the decoding backend to updates its internal mapping
+ * of (relfilenode,ctid) => (cmin, cmax) to be correct for the rewritten heap.
+ *
+ * For that, every time we find a tuple that's been modified in a catalog
+ * relation within the xmin horizon of any decoding slot, we log a mapping
+ * from the old to the new location.
+ *
+ * To deal with rewrites that abort the filename of a mapping file contains
+ * the xid of the transaction performing the rewrite, which then can be
+ * checked before being read in.
+ *
+ * For efficiency we don't immediately spill every single map mapping for a
+ * row to disk but only do so in batches when we've collected several of them
+ * in memory or when end_heap_rewrite() has been called.
+ *
+ * Crash-Safety: This module diverts from the usual patterns of doing WAL
+ * since it cannot rely on checkpoint flushing out all buffers and thus
+ * waiting for exlusive locks on buffers. Usually the XLogInsert() covering
+ * buffer modifications is performed while the buffer(s) that are being
+ * modified are exlusively locked guaranteeing that both the WAL record and
+ * the modified heap are on either side of the checkpoint. But since the
+ * mapping files we log aren't in shared_buffers that interlock doesn't work.
+ *
+ * Instead we simply write the mapping files out to disk, *before* the
+ * XLogInsert() is performed. That guarantees that either the XLogInsert() is
+ * inserted after the checkpoint's redo pointer or that the checkpoint (via
+ * LogicalRewriteHeapCheckpoint()) has flushed the (partial) mapping file to
+ * disk. That leaves the tail end that has not yet been flushed open to
+ * corruption, which is solved by including the current offset in the
+ * xl_heap_rewrite_mapping records and truncating the mapping file to it
+ * during replay. Every time a rewrite is finished all generated mapping files
+ * are synced to disk.
+ *
+ * Note that if we were only concerned about crash safety we wouldn't have to
+ * deal with WAL logging at all - an fsync() at the end of a rewrite would be
+ * sufficient for crash safety. Any mapping that hasn't been safely flushed to
+ * disk has to be by an aborted (explicitly or via a crash) transaction and is
+ * ignored by virtue of the xid in it's name being subject to a
+ * TransactionDidCommit() check. But we want to support having standbys via
+ * physical replication, both for availability and to to do logical decoding
+ * there.
+ * ------------------------------------------------------------------------
+ */
+
+/*
+ * Do preparations for logging logical mappings during a rewrite if
+ * necessary. If we detect that we don't need to log anything we'll prevent
+ * any further action by the various logical rewrite functions.
+ */
+static void
+logical_begin_heap_rewrite(RewriteState state)
+{
+ HASHCTL hash_ctl;
+ TransactionId logical_xmin;
+
+ /*
+ * We only need to persist these mappings if the rewritten table can be
+ * accessed during logical decoding, if not, we can skip doing any
+ * additional work.
+ */
+ state->rs_logical_rewrite =
+ RelationIsAccessibleInLogicalDecoding(state->rs_old_rel);
+
+ if (!state->rs_logical_rewrite)
+ return;
+
+ Assert(ReplicationSlotCtl != NULL);
+
+ ProcArrayGetReplicationSlotXmin(NULL, &logical_xmin);
+
+ /*
+ * If there are no logical slots in progress we don't need to do anything,
+ * there cannot be any remappings for relevant rows yet. The relation's
+ * lock protects us against races.
+ */
+ if (logical_xmin == InvalidTransactionId)
+ {
+ state->rs_logical_rewrite = false;
+ return;
+ }
+
+ state->rs_logical_xmin = logical_xmin;
+ state->rs_begin_lsn = GetXLogInsertRecPtr();
+ state->rs_num_rewrite_mappings = 0;
+
+ memset(&hash_ctl, 0, sizeof(hash_ctl));
+ hash_ctl.keysize = sizeof(TransactionId);
+ hash_ctl.entrysize = sizeof(RewriteMappingFile);
+ hash_ctl.hcxt = state->rs_cxt;
+ hash_ctl.hash = tag_hash;
+
+ state->rs_logical_mappings =
+ hash_create("Logical rewrite mapping",
+ 128, /* arbitrary initial size */
+ &hash_ctl,
+ HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+}
+
+/*
+ * Flush all logical in-memory mappings to disk, but don't fsync them yet.
+ */
+static void
+logical_heap_rewrite_flush_mappings(RewriteState state)
+{
+ HASH_SEQ_STATUS seq_status;
+ RewriteMappingFile *src;
+ dlist_mutable_iter iter;
+
+ Assert(state->rs_logical_rewrite);
+
+ /* no logical rewrite in progress, no need to iterate over mappings */
+ if (state->rs_num_rewrite_mappings == 0)
+ return;
+
+ elog(DEBUG1, "flushing %u logical rewrite mapping entries",
+ state->rs_num_rewrite_mappings);
+
+ hash_seq_init(&seq_status, state->rs_logical_mappings);
+ while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
+ {
+ XLogRecData rdata[2];
+ char *waldata;
+ char *waldata_start;
+ xl_heap_rewrite_mapping xlrec;
+ Oid dboid;
+ uint32 len;
+ int written;
+
+ /* this file hasn't got any new mappings */
+ if (src->num_mappings == 0)
+ continue;
+
+ if (state->rs_old_rel->rd_rel->relisshared)
+ dboid = InvalidOid;
+ else
+ dboid = MyDatabaseId;
+
+ xlrec.num_mappings = src->num_mappings;
+ xlrec.mapped_rel = RelationGetRelid(state->rs_old_rel);
+ xlrec.mapped_xid = src->xid;
+ xlrec.mapped_db = dboid;
+ xlrec.offset = src->off;
+ xlrec.start_lsn = state->rs_begin_lsn;
+
+ rdata[0].data = (char *) (&xlrec);
+ rdata[0].len = sizeof(xlrec);
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].next = &(rdata[1]);
+
+ /* write all mappings consecutively */
+ len = src->num_mappings * sizeof(LogicalRewriteMappingData);
+ waldata = palloc(len);
+ waldata_start = waldata;
+
+ /*
+ * collect data we need to write out, but don't modify ondisk data yet
+ */
+ dlist_foreach_modify(iter, &src->mappings)
+ {
+ RewriteMappingDataEntry *pmap;
+
+ pmap = dlist_container(RewriteMappingDataEntry, node, iter.cur);
+
+ memcpy(waldata, &pmap->map, sizeof(pmap->map));
+ waldata += sizeof(pmap->map);
+
+ /* remove from the list and free */
+ dlist_delete(&pmap->node);
+ pfree(pmap);
+
+ /* update bookkeeping */
+ state->rs_num_rewrite_mappings--;
+ src->num_mappings--;
+ }
+
+ /*
+ * Note that we deviate from the usual WAL coding practices here,
+ * check the above "Logical rewrite support" comment for reasoning.
+ */
+ written = FileWrite(src->vfd, waldata_start, len);
+ if (written != len)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\", wrote %d of %d: %m", src->path,
+ written, len)));
+ src->off += len;
+
+ Assert(src->num_mappings == 0);
+
+ rdata[1].data = waldata_start;
+ rdata[1].len = len;
+ rdata[1].buffer = InvalidBuffer;
+ rdata[1].next = NULL;
+
+ /* write xlog record */
+ XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE, rdata);
+
+ }
+ Assert(state->rs_num_rewrite_mappings == 0);
+}
+
+/*
+ * Logical remapping part of end_heap_rewrite().
+ */
+static void
+logical_end_heap_rewrite(RewriteState state)
+{
+ HASH_SEQ_STATUS seq_status;
+ RewriteMappingFile *src;
+
+ /* done, no logical rewrite in progress */
+ if (!state->rs_logical_rewrite)
+ return;
+
+ /* writeout remaining in-memory entries */
+ if (state->rs_num_rewrite_mappings > 0 )
+ logical_heap_rewrite_flush_mappings(state);
+
+ /* Iterate over all mappings we have written and fsync the files. */
+ hash_seq_init(&seq_status, state->rs_logical_mappings);
+ while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
+ {
+ if(FileSync(src->vfd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m", src->path)));
+ FileClose(src->vfd);
+ }
+ /* memory context cleanup will deal with the rest */
+}
+
+/*
+ * Log a single (old->new) mapping for 'xid'.
+ */
+static void
+logical_rewrite_log_mapping(RewriteState state, TransactionId xid,
+ LogicalRewriteMappingData *map)
+{
+ RewriteMappingFile *src;
+ RewriteMappingDataEntry *pmap;
+ Oid relid;
+ bool found;
+
+ relid = RelationGetRelid(state->rs_old_rel);
+
+ /* look for existing mappings for this 'mapped' xid */
+ src = hash_search(state->rs_logical_mappings, &xid,
+ HASH_ENTER, &found);
+
+ /*
+ * We haven't yet had the need to map anything for this xid, create
+ * per-xid data structures.
+ */
+ if (!found)
+ {
+ char path[MAXPGPATH];
+ Oid dboid;
+
+ if (state->rs_old_rel->rd_rel->relisshared)
+ dboid = InvalidOid;
+ else
+ dboid = MyDatabaseId;
+
+ snprintf(path, MAXPGPATH,
+ "pg_llog/mappings/" LOGICAL_REWRITE_FORMAT,
+ dboid, relid,
+ (uint32) (state->rs_begin_lsn >> 32),
+ (uint32) state->rs_begin_lsn,
+ xid, GetCurrentTransactionId());
+
+ dlist_init(&src->mappings);
+ src->num_mappings = 0;
+ src->off = 0;
+ memcpy(src->path, path, sizeof(path));
+ src->vfd = PathNameOpenFile(path,
+ O_CREAT | O_EXCL | O_WRONLY | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ if (src->vfd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m", path)));
+ }
+
+ pmap = MemoryContextAlloc(state->rs_cxt,
+ sizeof(RewriteMappingDataEntry));
+ memcpy(&pmap->map, map, sizeof(LogicalRewriteMappingData));
+ dlist_push_tail(&src->mappings, &pmap->node);
+ src->num_mappings++;
+ state->rs_num_rewrite_mappings++;
+
+ /*
+ * Write out buffer every time we've too many in-memory entries across all
+ * mapping files.
+ */
+ if (state->rs_num_rewrite_mappings >= 1000 /* arbitrary number */)
+ logical_heap_rewrite_flush_mappings(state);
+}
+
+/*
+ * Perform logical remapping for a tuple that's mapped from old_tid to
+ * new_tuple->t_self by rewrite_heap_tuple() iff necessary for the tuple.
+ */
+static void
+logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid,
+ HeapTuple new_tuple)
+{
+ ItemPointerData new_tid = new_tuple->t_self;
+ TransactionId cutoff = state->rs_logical_xmin;
+ TransactionId xmin;
+ TransactionId xmax;
+ bool do_log_xmin = false;
+ bool do_log_xmax = false;
+ LogicalRewriteMappingData map;
+
+ /* no logical rewrite in progress, we don't need to log anything */
+ if (!state->rs_logical_rewrite)
+ return;
+
+ xmin = HeapTupleHeaderGetXmin(new_tuple->t_data);
+ /* use *GetUpdateXid to correctly deal with multixacts */
+ xmax = HeapTupleHeaderGetUpdateXid(new_tuple->t_data);
+
+ /*
+ * Log the mapping iff the tuple has been created recently.
+ */
+ if (TransactionIdIsNormal(xmin) && !TransactionIdPrecedes(xmin, cutoff))
+ do_log_xmin = true;
+
+ if (!TransactionIdIsNormal(xmax))
+ {
+ /*
+ * no xmax is set, can't have any permanent ones, so this check is
+ * sufficient
+ */
+ }
+ else if (HEAP_XMAX_IS_LOCKED_ONLY(new_tuple->t_data->t_infomask))
+ {
+ /* only locked, we don't care */
+ }
+ else if (!TransactionIdPrecedes(xmax, cutoff))
+ {
+ /* tuple has been deleted recently, log */
+ do_log_xmax = true;
+ }
+
+ /* if neither needs to be logged, we're done */
+ if (!do_log_xmin && !do_log_xmax)
+ return;
+
+ /* fill out mapping information */
+ map.old_node = state->rs_old_rel->rd_node;
+ map.old_tid = old_tid;
+ map.new_node = state->rs_new_rel->rd_node;
+ map.new_tid = new_tid;
+
+ /* ---
+ * Now persist the mapping for the individual xids that are affected. We
+ * need to log for both xmin and xmax if they aren't the same transaction
+ * since the mapping files are per "affected" xid.
+ * We don't muster all that much effort detecting whether xmin and xmax
+ * are actually the same transaction, we just check whether the xid is the
+ * same disregarding subtransactions. Logging too much is relatively
+ * harmless and we could never do the check fully since subtransaction
+ * data is thrown away during restarts.
+ * ---
+ */
+ if (do_log_xmin)
+ logical_rewrite_log_mapping(state, xmin, &map);
+ /* separately log mapping for xmax unless it'd be redundant */
+ if (do_log_xmax && !TransactionIdEquals(xmin, xmax))
+ logical_rewrite_log_mapping(state, xmax, &map);
+}
+
+/*
+ * Replay XLOG_HEAP2_REWRITE records
+ */
+void
+heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
+{
+ char path[MAXPGPATH];
+ int fd;
+ xl_heap_rewrite_mapping *xlrec;
+ uint32 len;
+ char *data;
+
+ xlrec = (xl_heap_rewrite_mapping *) XLogRecGetData(r);
+
+ snprintf(path, MAXPGPATH,
+ "pg_llog/mappings/" LOGICAL_REWRITE_FORMAT,
+ xlrec->mapped_db, xlrec->mapped_rel,
+ (uint32) (xlrec->start_lsn >> 32),
+ (uint32) xlrec->start_lsn,
+ xlrec->mapped_xid, r->xl_xid);
+
+ fd = OpenTransientFile(path,
+ O_CREAT | O_WRONLY | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m", path)));
+ /*
+ * Truncate all data that's not guaranteed to have been safely fsynced (by
+ * previous record or by the last checkpoint).
+ */
+ if (ftruncate(fd, xlrec->offset) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not truncate file \"%s\" to %u: %m",
+ path, (uint32) xlrec->offset)));
+
+ /* now seek to the position we want to write our data to */
+ if (lseek(fd, xlrec->offset, SEEK_SET) != xlrec->offset)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not seek to the end of file \"%s\": %m",
+ path)));
+
+ data = XLogRecGetData(r) + sizeof(*xlrec);
+
+ len = xlrec->num_mappings * sizeof(LogicalRewriteMappingData);
+
+ /* write out tail end of mapping file (again) */
+ if (write(fd, data, len) != len)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m", path)));
+ /*
+ * Now fsync all previously written data. We could improve things and only
+ * do this for the last write to a file, but the required bookkeeping
+ * doesn't seem worth the trouble.
+ */
+ if (pg_fsync(fd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m", path)));
+
+ CloseTransientFile(fd);
+}
+
+/* ---
+ * Perform a checkpoint for logical rewrite mappings
+ *
+ * This serves two tasks:
+ * 1) Remove all mappings not needed anymore based on the logical restart LSN
+ * 2) Flush all remaining mappings to disk, so that replay after a checkpoint
+ * only has to deal with the parts of a mapping that have been written out
+ * after the checkpoint started.
+ * ---
+ */
+void
+CheckPointLogicalRewriteHeap(void)
+{
+ XLogRecPtr cutoff;
+ XLogRecPtr redo;
+ DIR *mappings_dir;
+ struct dirent *mapping_de;
+ char path[MAXPGPATH];
+
+ /*
+ * We start of with a minimum of the last redo pointer. No new decoding
+ * slot will start before that, so that's a safe upper bound for removal.
+ */
+ redo = GetRedoRecPtr();
+
+ /* now check for the restart ptrs from existing slots */
+ cutoff = ReplicationSlotsComputeLogicalRestartLSN();
+
+ /* don't start earlier than the restart lsn */
+ if (cutoff != InvalidXLogRecPtr && redo < cutoff)
+ cutoff = redo;
+
+ mappings_dir = AllocateDir("pg_llog/mappings");
+ while ((mapping_de = ReadDir(mappings_dir, "pg_llog/mappings")) != NULL)
+ {
+ struct stat statbuf;
+ Oid dboid;
+ Oid relid;
+ XLogRecPtr lsn;
+ TransactionId rewrite_xid;
+ TransactionId create_xid;
+ uint32 hi, lo;
+
+ if (strcmp(mapping_de->d_name, ".") == 0 ||
+ strcmp(mapping_de->d_name, "..") == 0)
+ continue;
+
+ snprintf(path, MAXPGPATH, "pg_llog/mappings/%s", mapping_de->d_name);
+ if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
+ continue;
+
+ /* Skip over files that cannot be ours. */
+ if (strncmp(mapping_de->d_name, "map-", 4) != 0)
+ continue;
+
+ if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,
+ &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6)
+ elog(ERROR,"could not parse filename \"%s\"", mapping_de->d_name);
+
+ lsn = ((uint64) hi) << 32 | lo;
+
+ if (lsn < cutoff || cutoff == InvalidXLogRecPtr)
+ {
+ elog(DEBUG1, "removing logical rewrite file \"%s\"", path);
+ if (unlink(path) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not unlink file \"%s\": %m", path)));
+ }
+ else
+ {
+ int fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
+
+ /*
+ * The file cannot vanish due to concurrency since this function
+ * is the only one removing logical mappings and it's run while
+ * CheckpointLock is held exclusively.
+ */
+ if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", path)));
+ /*
+ * We could try to avoid fsyncing files that either haven't
+ * changed or have only been created since the checkpoint's start,
+ * but it's currently not deemed worth the effort.
+ */
+ else if (pg_fsync(fd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m", path)));
+ CloseTransientFile(fd);
+ }
+ }
+ FreeDir(mappings_dir);
+}
#undef TOAST_DEBUG
-/*
- * Testing whether an externally-stored value is compressed now requires
- * comparing extsize (the actual length of the external data) to rawsize
- * (the original uncompressed datum's size). The latter includes VARHDRSZ
- * overhead, the former doesn't. We never use compression unless it actually
- * saves space, so we expect either equality or less-than.
- */
-#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \
- ((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ)
-
-/*
- * Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
- * into a local "struct varatt_external" toast pointer. This should be
- * just a memcpy, but some versions of gcc seem to produce broken code
- * that assumes the datum contents are aligned. Introducing an explicit
- * intermediate "varattrib_1b_e *" variable seems to fix it.
- */
-#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \
-do { \
- varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \
- Assert(VARATT_IS_EXTERNAL(attre)); \
- Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \
- memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \
-} while (0)
-
-
static void toast_delete_datum(Relation rel, Datum value);
static Datum toast_save_datum(Relation rel, Datum value,
struct varlena * oldexternal, int options);
#include "access/relscan.h"
#include "access/transam.h"
+#include "access/xlog.h"
+
#include "catalog/index.h"
+#include "catalog/catalog.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
* Prune page, but only if we weren't already on this page
*/
if (prev_buf != scan->xs_cbuf)
- heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
- RecentGlobalXmin);
+ heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf);
}
/* Obtain share-lock on the buffer so we can examine visibility */
xlrec->node.relNode, xlrec->block,
xlrec->cutoff_xid, xlrec->ntuples);
}
+ else if (info == XLOG_HEAP2_REWRITE)
+ {
+ appendStringInfoString(buf, "heap rewrite:");
+ }
else if (info == XLOG_HEAP2_CLEANUP_INFO)
{
xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec;
/*
* Do we need the long commit record? If not, use the compact format.
+ *
+ * For now always use the non-compact version if wal_level=logical, so
+ * we can hide commits from other databases. TODO: In the future we
+ * should merge compact and non-compact commits and use a flags
+ * variable to determine if it contains subxacts, relations or
+ * invalidation messages, that's more extensible and degrades more
+ * gracefully. Till then, it's just 20 bytes of overhead.
*/
- if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit)
+ if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit ||
+ XLogLogicalInfoActive())
{
XLogRecData rdata[4];
int lastrdata = 0;
#include "access/clog.h"
#include "access/multixact.h"
+#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
+#include "replication/logical.h"
#include "replication/slot.h"
+#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/barrier.h"
}
}
+/*
+ * Return the last WAL segment removed, or 0 if no segment has been removed
+ * since startup.
+ *
+ * NB: the result can be out of date arbitrarily fast, the caller has to deal
+ * with that.
+ */
+XLogSegNo
+XLogGetLastRemovedSegno(void)
+{
+ /* use volatile pointer to prevent code rearrangement */
+ volatile XLogCtlData *xlogctl = XLogCtl;
+ XLogSegNo lastRemovedSegNo;
+
+ SpinLockAcquire(&xlogctl->info_lck);
+ lastRemovedSegNo = xlogctl->lastRemovedSegNo;
+ SpinLockRelease(&xlogctl->info_lck);
+
+ return lastRemovedSegNo;
+}
+
/*
* Update the last removed segno pointer in shared memory, to reflect
* that the given XLOG file has been removed.
*/
StartupReplicationSlots(checkPoint.redo);
+ /*
+ * Startup logical state, needs to be setup now so we have proper data
+ * during crash recovery.
+ */
+ StartupReorderBuffer();
+
/*
* Startup MultiXact. We need to do this early for two reasons: one
* is that we might try to access multixacts when we do tuple freezing,
* StartupSUBTRANS hasn't been called yet.
*/
if (!RecoveryInProgress())
- TruncateSUBTRANS(GetOldestXmin(true, false));
+ TruncateSUBTRANS(GetOldestXmin(NULL, false));
/* Real work is done, but log and update stats before releasing lock. */
LogCheckpointEnd(false);
CheckPointPredicate();
CheckPointRelationMap();
CheckPointReplicationSlots();
+ CheckPointSnapBuild();
+ CheckPointLogicalRewriteHeap();
CheckPointBuffers(flags); /* performs all required fsyncs */
/* We deliberately delay 2PC checkpointing as long as possible */
CheckPointTwoPhase(checkPointRedo);
* this because StartupSUBTRANS hasn't been called yet.
*/
if (EnableHotStandby)
- TruncateSUBTRANS(GetOldestXmin(true, false));
+ TruncateSUBTRANS(GetOldestXmin(NULL, false));
/* Real work is done, but log and update before releasing lock. */
LogCheckpointEnd(true);
{
snapshot = SnapshotAny;
/* okay to ignore lazy VACUUMs here */
- OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
+ OldestXmin = GetOldestXmin(heapRelation, true);
}
scan = heap_beginscan_strat(heapRelation, /* relation */
CREATE VIEW pg_replication_slots AS
SELECT
L.slot_name,
+ L.plugin,
L.slot_type,
L.datoid,
D.datname AS database,
L.active,
L.xmin,
+ L.catalog_xmin,
L.restart_lsn
FROM pg_get_replication_slots() AS L
LEFT JOIN pg_database D ON (L.datoid = D.oid);
CREATE OR REPLACE FUNCTION
json_populate_recordset(base anyelement, from_json json, use_json_as_text boolean DEFAULT false)
RETURNS SETOF anyelement LANGUAGE internal STABLE ROWS 100 AS 'json_populate_recordset';
+
+CREATE OR REPLACE FUNCTION pg_logical_slot_get_changes(
+ IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
+ OUT location pg_lsn, OUT xid xid, OUT data text)
+RETURNS SETOF RECORD
+LANGUAGE INTERNAL
+VOLATILE ROWS 1000 COST 1000
+AS 'pg_logical_slot_get_changes';
+
+CREATE OR REPLACE FUNCTION pg_logical_slot_peek_changes(
+ IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
+ OUT location pg_lsn, OUT xid xid, OUT data text)
+RETURNS SETOF RECORD
+LANGUAGE INTERNAL
+VOLATILE ROWS 1000 COST 1000
+AS 'pg_logical_slot_peek_changes';
+
+CREATE OR REPLACE FUNCTION pg_logical_slot_get_binary_changes(
+ IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
+ OUT location pg_lsn, OUT xid xid, OUT data bytea)
+RETURNS SETOF RECORD
+LANGUAGE INTERNAL
+VOLATILE ROWS 1000 COST 1000
+AS 'pg_logical_slot_get_binary_changes';
+
+CREATE OR REPLACE FUNCTION pg_logical_slot_peek_binary_changes(
+ IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
+ OUT location pg_lsn, OUT xid xid, OUT data bytea)
+RETURNS SETOF RECORD
+LANGUAGE INTERNAL
+VOLATILE ROWS 1000 COST 1000
+AS 'pg_logical_slot_peek_binary_changes';
#include "access/tuptoaster.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
+#include "catalog/catalog.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "catalog/pg_collation.h"
totalblocks = RelationGetNumberOfBlocks(onerel);
/* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
- OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true);
+ OldestXmin = GetOldestXmin(onerel, true);
/* Prepare for sampling block numbers */
BlockSampler_Init(&bs, totalblocks, targrows);
* Since we're going to rewrite the whole table anyway, there's no reason
* not to be aggressive about this.
*/
- vacuum_set_xid_limits(0, 0, 0, 0, OldHeap->rd_rel->relisshared,
+ vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
&OldestXmin, &FreezeXid, NULL, &MultiXactCutoff,
NULL);
is_system_catalog = IsSystemRelation(OldHeap);
/* Initialize the rewrite operation */
- rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid,
+ rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
MultiXactCutoff, use_wal);
/*
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
+#include "replication/slot.h"
#include "storage/copydir.h"
#include "storage/fd.h"
#include "storage/lmgr.h"
HeapTuple tup;
int notherbackends;
int npreparedxacts;
+ int nslots, nslots_active;
/*
* Look up the target database's OID, and get exclusive lock on it. We
(errcode(ERRCODE_OBJECT_IN_USE),
errmsg("cannot drop the currently open database")));
+ /*
+ * Check whether there are, possibly unconnected, logical slots that refer
+ * to the to-be-dropped database. The database lock we are holding
+ * prevents the creation of new slots using the database.
+ */
+ if (ReplicationSlotsCountDBSlots(db_id, &nslots, &nslots_active))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("database \"%s\" is used by a logical decoding slot",
+ dbname),
+ errdetail("There are %d slot(s), %d of them active",
+ nslots, nslots_active)));
+
/*
* Check for other backends in the target database. (Because we hold the
* database lock, no new ones can start after this.)
* not interested.
*/
void
-vacuum_set_xid_limits(int freeze_min_age,
+vacuum_set_xid_limits(Relation rel,
+ int freeze_min_age,
int freeze_table_age,
int multixact_freeze_min_age,
int multixact_freeze_table_age,
- bool sharedRel,
TransactionId *oldestXmin,
TransactionId *freezeLimit,
TransactionId *xidFullScanLimit,
* working on a particular table at any time, and that each vacuum is
* always an independent transaction.
*/
- *oldestXmin = GetOldestXmin(sharedRel, true);
+ *oldestXmin = GetOldestXmin(rel, true);
Assert(TransactionIdIsNormal(*oldestXmin));
* committed pg_class entries for new tables; see AddNewRelationTuple().
* So we cannot produce a wrong minimum by starting with this.
*/
- newFrozenXid = GetOldestXmin(true, true);
+ newFrozenXid = GetOldestXmin(NULL, true);
/*
* Similarly, initialize the MultiXact "min" with the value that would be
#include "access/multixact.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
+#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "commands/dbcommands.h"
#include "commands/vacuum.h"
vac_strategy = bstrategy;
- vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
+ vacuum_set_xid_limits(onerel,
+ vacstmt->freeze_min_age, vacstmt->freeze_table_age,
vacstmt->multixact_freeze_min_age,
vacstmt->multixact_freeze_table_age,
- onerel->rd_rel->relisshared,
&OldestXmin, &FreezeLimit, &xidFullScanLimit,
&MultiXactCutoff, &mxactFullScanLimit);
/*
* Prune and repair fragmentation for the whole page, if possible.
*/
- Assert(TransactionIdIsValid(RecentGlobalXmin));
- heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin);
+ heap_page_prune_opt(scan->rs_rd, buffer);
/*
* We must hold share lock on the buffer content while examining tuple
OBJS = walsender.o walreceiverfuncs.o walreceiver.o basebackup.o \
repl_gram.o slot.o slotfuncs.o syncrep.o
+SUBDIRS = logical
+
include $(top_srcdir)/src/backend/common.mk
# repl_scanner is compiled as part of repl_gram
--- /dev/null
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for src/backend/replication/logical
+#
+# IDENTIFICATION
+# src/backend/replication/logical/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/replication/logical
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
+
+OBJS = decode.o logical.o logicalfuncs.o reorderbuffer.o snapbuild.o
+
+include $(top_srcdir)/src/backend/common.mk
--- /dev/null
+/* -------------------------------------------------------------------------
+ *
+ * decode.c
+ * This module decodes WAL records read using xlogreader.h's APIs for the
+ * purpose of logical decoding by passing information to the
+ * reorderbuffer module (containing the actual changes) and to the
+ * snapbuild module to build a fitting catalog snapshot (to be able to
+ * properly decode the changes in the reorderbuffer).
+ *
+ * NOTE:
+ * This basically tries to handle all low level xlog stuff for
+ * reorderbuffer.c and snapbuild.c. There's some minor leakage where a
+ * specific record's struct is used to pass data along, but those just
+ * happen to contain the right amount of data in a convenient
+ * format. There isn't and shouldn't be much intelligence about the
+ * contents of records in here except turning them into a more usable
+ * format.
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/replication/logical/decode.c
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/heapam_xlog.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/xlog_internal.h"
+#include "access/xlogreader.h"
+
+#include "catalog/pg_control.h"
+
+#include "replication/decode.h"
+#include "replication/logical.h"
+#include "replication/reorderbuffer.h"
+#include "replication/snapbuild.h"
+
+#include "storage/standby.h"
+
+typedef struct XLogRecordBuffer
+{
+ XLogRecPtr origptr;
+ XLogRecPtr endptr;
+ XLogRecord record;
+ char *record_data;
+} XLogRecordBuffer;
+
+/* RMGR Handlers */
+static void DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+
+/* individual record(group)'s handlers */
+static void DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
+ TransactionId xid, Oid dboid,
+ TimestampTz commit_time,
+ int nsubxacts, TransactionId *sub_xids,
+ int ninval_msgs, SharedInvalidationMessage *msg);
+static void DecodeAbort(LogicalDecodingContext *ctx, XLogRecPtr lsn,
+ TransactionId xid, TransactionId *sub_xids, int nsubxacts);
+
+/* common function to decode tuples */
+static void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tup);
+
+/*
+ * Take every XLogReadRecord()ed record and perform the actions required to
+ * decode it using the output plugin already setup in the logical decoding
+ * context.
+ */
+void
+LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record)
+{
+ XLogRecordBuffer buf;
+
+ buf.origptr = ctx->reader->ReadRecPtr;
+ buf.endptr = ctx->reader->EndRecPtr;
+ buf.record = *record;
+ buf.record_data = XLogRecGetData(record);
+
+ /* cast so we get a warning when new rmgrs are added */
+ switch ((RmgrIds) buf.record.xl_rmid)
+ {
+ /*
+ * Rmgrs we care about for logical decoding. Add new rmgrs in
+ * rmgrlist.h's order.
+ */
+ case RM_XLOG_ID:
+ DecodeXLogOp(ctx, &buf);
+ break;
+
+ case RM_XACT_ID:
+ DecodeXactOp(ctx, &buf);
+ break;
+
+ case RM_STANDBY_ID:
+ DecodeStandbyOp(ctx, &buf);
+ break;
+
+ case RM_HEAP2_ID:
+ DecodeHeap2Op(ctx, &buf);
+ break;
+
+ case RM_HEAP_ID:
+ DecodeHeapOp(ctx, &buf);
+ break;
+
+ /*
+ * Rmgrs irrelevant for logical decoding; they describe stuff not
+ * represented in logical decoding. Add new rmgrs in rmgrlist.h's
+ * order.
+ */
+ case RM_SMGR_ID:
+ case RM_CLOG_ID:
+ case RM_DBASE_ID:
+ case RM_TBLSPC_ID:
+ case RM_MULTIXACT_ID:
+ case RM_RELMAP_ID:
+ case RM_BTREE_ID:
+ case RM_HASH_ID:
+ case RM_GIN_ID:
+ case RM_GIST_ID:
+ case RM_SEQ_ID:
+ case RM_SPGIST_ID:
+ break;
+ case RM_NEXT_ID:
+ elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record.xl_rmid);
+ }
+}
+
+/*
+ * Handle rmgr XLOG_ID records for DecodeRecordIntoReorderBuffer().
+ */
+static void
+DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+ SnapBuild *builder = ctx->snapshot_builder;
+ uint8 info = buf->record.xl_info & ~XLR_INFO_MASK;
+
+ switch (info)
+ {
+ /* this is also used in END_OF_RECOVERY checkpoints */
+ case XLOG_CHECKPOINT_SHUTDOWN:
+ case XLOG_END_OF_RECOVERY:
+ SnapBuildSerializationPoint(builder, buf->origptr);
+
+ break;
+ case XLOG_CHECKPOINT_ONLINE:
+ /*
+ * a RUNNING_XACTS record will have been logged near to this, we
+ * can restart from there.
+ */
+ break;
+ case XLOG_NOOP:
+ case XLOG_NEXTOID:
+ case XLOG_SWITCH:
+ case XLOG_BACKUP_END:
+ case XLOG_PARAMETER_CHANGE:
+ case XLOG_RESTORE_POINT:
+ case XLOG_FPW_CHANGE:
+ case XLOG_FPI:
+ break;
+ default:
+ elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info);
+ }
+}
+
+/*
+ * Handle rmgr XACT_ID records for DecodeRecordIntoReorderBuffer().
+ */
+static void
+DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+ SnapBuild *builder = ctx->snapshot_builder;
+ ReorderBuffer *reorder = ctx->reorder;
+ XLogRecord *r = &buf->record;
+ uint8 info = r->xl_info & ~XLR_INFO_MASK;
+
+ /* no point in doing anything yet, data could not be decoded anyway */
+ if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
+ return;
+
+ switch (info)
+ {
+ case XLOG_XACT_COMMIT:
+ {
+ xl_xact_commit *xlrec;
+ TransactionId *subxacts = NULL;
+ SharedInvalidationMessage *invals = NULL;
+
+ xlrec = (xl_xact_commit *) buf->record_data;
+
+ subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
+ invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
+
+ DecodeCommit(ctx, buf, r->xl_xid, xlrec->dbId,
+ xlrec->xact_time,
+ xlrec->nsubxacts, subxacts,
+ xlrec->nmsgs, invals);
+
+ break;
+ }
+ case XLOG_XACT_COMMIT_PREPARED:
+ {
+ xl_xact_commit_prepared *prec;
+ xl_xact_commit *xlrec;
+ TransactionId *subxacts;
+ SharedInvalidationMessage *invals = NULL;
+
+ /* Prepared commits contain a normal commit record... */
+ prec = (xl_xact_commit_prepared *) buf->record_data;
+ xlrec = &prec->crec;
+
+ subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
+ invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
+
+ DecodeCommit(ctx, buf, r->xl_xid, xlrec->dbId,
+ xlrec->xact_time,
+ xlrec->nsubxacts, subxacts,
+ xlrec->nmsgs, invals);
+
+ break;
+ }
+ case XLOG_XACT_COMMIT_COMPACT:
+ {
+ xl_xact_commit_compact *xlrec;
+
+ xlrec = (xl_xact_commit_compact *) buf->record_data;
+
+ DecodeCommit(ctx, buf, r->xl_xid, InvalidOid,
+ xlrec->xact_time,
+ xlrec->nsubxacts, xlrec->subxacts,
+ 0, NULL);
+ break;
+ }
+ case XLOG_XACT_ABORT:
+ {
+ xl_xact_abort *xlrec;
+ TransactionId *sub_xids;
+
+ xlrec = (xl_xact_abort *) buf->record_data;
+
+ sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
+
+ DecodeAbort(ctx, buf->origptr, r->xl_xid,
+ sub_xids, xlrec->nsubxacts);
+ break;
+ }
+ case XLOG_XACT_ABORT_PREPARED:
+ {
+ xl_xact_abort_prepared *prec;
+ xl_xact_abort *xlrec;
+ TransactionId *sub_xids;
+
+ /* prepared abort contain a normal commit abort... */
+ prec = (xl_xact_abort_prepared *) buf->record_data;
+ xlrec = &prec->arec;
+
+ sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
+
+ /* r->xl_xid is committed in a separate record */
+ DecodeAbort(ctx, buf->origptr, prec->xid,
+ sub_xids, xlrec->nsubxacts);
+ break;
+ }
+
+ case XLOG_XACT_ASSIGNMENT:
+ {
+ xl_xact_assignment *xlrec;
+ int i;
+ TransactionId *sub_xid;
+
+ xlrec = (xl_xact_assignment *) buf->record_data;
+
+ sub_xid = &xlrec->xsub[0];
+
+ for (i = 0; i < xlrec->nsubxacts; i++)
+ {
+ ReorderBufferAssignChild(reorder, xlrec->xtop,
+ *(sub_xid++), buf->origptr);
+ }
+ break;
+ }
+ case XLOG_XACT_PREPARE:
+ /*
+ * Currently decoding ignores PREPARE TRANSACTION and will just
+ * decode the transaction when the COMMIT PREPARED is sent or
+ * throw away the transaction's contents when a ROLLBACK PREPARED
+ * is received. In the future we could add code to expose prepared
+ * transactions in the changestream allowing for a kind of
+ * distributed 2PC.
+ */
+ break;
+ default:
+ elog(ERROR, "unexpected RM_XACT_ID record type: %u", info);
+ }
+}
+
+/*
+ * Handle rmgr STANDBY_ID records for DecodeRecordIntoReorderBuffer().
+ */
+static void
+DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+ SnapBuild *builder = ctx->snapshot_builder;
+ XLogRecord *r = &buf->record;
+ uint8 info = r->xl_info & ~XLR_INFO_MASK;
+
+ switch (info)
+ {
+ case XLOG_RUNNING_XACTS:
+ {
+ xl_running_xacts *running = (xl_running_xacts *) buf->record_data;
+ SnapBuildProcessRunningXacts(builder, buf->origptr, running);
+ /*
+ * Abort all transactions that we keep track of, that are
+ * older than the record's oldestRunningXid. This is the most
+ * convenient spot for doing so since, in contrast to shutdown
+ * or end-of-recovery checkpoints, we have information about
+ * all running transactions which includes prepared ones,
+ * while shutdown checkpoints just know that no non-prepared
+ * transactions are in progress.
+ */
+ ReorderBufferAbortOld(ctx->reorder, running->oldestRunningXid);
+ }
+ break;
+ case XLOG_STANDBY_LOCK:
+ break;
+ default:
+ elog(ERROR, "unexpected RM_STANDBY_ID record type: %u", info);
+ }
+}
+
+/*
+ * Handle rmgr HEAP2_ID records for DecodeRecordIntoReorderBuffer().
+ */
+static void
+DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+ uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK;
+ TransactionId xid = buf->record.xl_xid;
+ SnapBuild *builder = ctx->snapshot_builder;
+
+ /* no point in doing anything yet */
+ if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
+ return;
+
+ switch (info)
+ {
+ case XLOG_HEAP2_MULTI_INSERT:
+ if (SnapBuildProcessChange(builder, xid, buf->origptr))
+ DecodeMultiInsert(ctx, buf);
+ break;
+ case XLOG_HEAP2_NEW_CID:
+ {
+ xl_heap_new_cid *xlrec;
+ xlrec = (xl_heap_new_cid *) buf->record_data;
+ SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec);
+
+ break;
+ }
+ case XLOG_HEAP2_REWRITE:
+ /*
+ * Although these records only exist to serve the needs of logical
+ * decoding, all the work happens as part of crash or archive
+ * recovery, so we don't need to do anything here.
+ */
+ break;
+ /*
+ * Everything else here is just low level physical stuff we're
+ * not interested in.
+ */
+ case XLOG_HEAP2_FREEZE_PAGE:
+ case XLOG_HEAP2_CLEAN:
+ case XLOG_HEAP2_CLEANUP_INFO:
+ case XLOG_HEAP2_VISIBLE:
+ case XLOG_HEAP2_LOCK_UPDATED:
+ break;
+ default:
+ elog(ERROR, "unexpected RM_HEAP2_ID record type: %u", info);
+ }
+}
+
+/*
+ * Handle rmgr HEAP_ID records for DecodeRecordIntoReorderBuffer().
+ */
+static void
+DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+ uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK;
+ TransactionId xid = buf->record.xl_xid;
+ SnapBuild *builder = ctx->snapshot_builder;
+
+ /* no point in doing anything yet */
+ if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT)
+ return;
+
+ switch (info)
+ {
+ case XLOG_HEAP_INSERT:
+ if (SnapBuildProcessChange(builder, xid, buf->origptr))
+ DecodeInsert(ctx, buf);
+ break;
+
+ /*
+ * Treat HOT update as normal updates. There is no useful
+ * information in the fact that we could make it a HOT update
+ * locally and the WAL layout is compatible.
+ */
+ case XLOG_HEAP_HOT_UPDATE:
+ case XLOG_HEAP_UPDATE:
+ if (SnapBuildProcessChange(builder, xid, buf->origptr))
+ DecodeUpdate(ctx, buf);
+ break;
+
+ case XLOG_HEAP_DELETE:
+ if (SnapBuildProcessChange(builder, xid, buf->origptr))
+ DecodeDelete(ctx, buf);
+ break;
+
+ case XLOG_HEAP_NEWPAGE:
+ /*
+ * This is only used in places like indexams and CLUSTER which
+ * don't contain changes relevant for logical replication.
+ */
+ break;
+
+ case XLOG_HEAP_INPLACE:
+ /*
+ * Inplace updates are only ever performed on catalog tuples and
+ * can, per definition, not change tuple visibility. Since we
+ * don't decode catalog tuples, we're not interested in the
+ * record's contents.
+ *
+ * In-place updates can be used either by XID-bearing transactions
+ * (e.g. in CREATE INDEX CONCURRENTLY) or by XID-less
+ * transactions (e.g. VACUUM). In the former case, the commit
+ * record will include cache invalidations, so we mark the
+ * transaction as catalog modifying here. Currently that's
+ * redundant because the commit will do that as well, but once we
+ * support decoding in-progress relations, this will be important.
+ */
+ if (!TransactionIdIsValid(xid))
+ break;
+
+ SnapBuildProcessChange(builder, xid, buf->origptr);
+ ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
+ break;
+
+ case XLOG_HEAP_LOCK:
+ /* we don't care about row level locks for now */
+ break;
+
+ default:
+ elog(ERROR, "unexpected RM_HEAP_ID record type: %u", info);
+ break;
+ }
+}
+
+/*
+ * Consolidated commit record handling between the different form of commit
+ * records.
+ */
+static void
+DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
+ TransactionId xid, Oid dboid,
+ TimestampTz commit_time,
+ int nsubxacts, TransactionId *sub_xids,
+ int ninval_msgs, SharedInvalidationMessage *msgs)
+{
+ int i;
+
+ /*
+ * Process invalidation messages, even if we're not interested in the
+ * transaction's contents, since the various caches need to always be
+ * consistent.
+ */
+ if (ninval_msgs > 0)
+ {
+ ReorderBufferAddInvalidations(ctx->reorder, xid, buf->origptr,
+ ninval_msgs, msgs);
+ ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
+ }
+
+ SnapBuildCommitTxn(ctx->snapshot_builder, buf->origptr, xid,
+ nsubxacts, sub_xids);
+
+ /* ----
+ * Check whether we are interested in this specific transaction, and tell
+ * the the reorderbuffer to forget the content of the (sub-)transactions
+ * if not.
+ *
+ * There basically two reasons we might not be interested in this
+ * transaction:
+ * 1) We might not be interested in decoding transactions up to this
+ * LSN. This can happen because we previously decoded it and now just
+ * are restarting or if we haven't assembled a consistent snapshot yet.
+ * 2) The transaction happened in another database.
+ *
+ * We can't just use ReorderBufferAbort() here, because we need to execute
+ * the transaction's invalidations. This currently won't be needed if
+ * we're just skipping over the transaction because currently we only do
+ * so during startup, to get to the first transaction the client needs. As
+ * we have reset the catalog caches before starting to read WAL, and we
+ * haven't yet touched any catalogs, there can't be anything to invalidate.
+ * But if we're "forgetting" this commit because it's it happened in
+ * another database, the invalidations might be important, because they
+ * could be for shared catalogs and we might have loaded data into the
+ * relevant syscaches.
+ * ---
+ */
+ if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) ||
+ (dboid != InvalidOid && dboid != ctx->slot->data.database))
+ {
+ for (i = 0; i < nsubxacts; i++)
+ {
+ ReorderBufferForget(ctx->reorder, *sub_xids, buf->origptr);
+ sub_xids++;
+ }
+ ReorderBufferForget(ctx->reorder, xid, buf->origptr);
+
+ return;
+ }
+
+ /* tell the reorderbuffer about the surviving subtransactions */
+ for (i = 0; i < nsubxacts; i++)
+ {
+ ReorderBufferCommitChild(ctx->reorder, xid, *sub_xids,
+ buf->origptr, buf->endptr);
+ sub_xids++;
+ }
+
+ /* replay actions of all transaction + subtransactions in order */
+ ReorderBufferCommit(ctx->reorder, xid, buf->origptr, buf->endptr,
+ commit_time);
+}
+
+/*
+ * Get the data from the various forms of abort records and pass it on to
+ * snapbuild.c and reorderbuffer.c
+ */
+static void
+DecodeAbort(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
+ TransactionId *sub_xids, int nsubxacts)
+{
+ int i;
+
+ SnapBuildAbortTxn(ctx->snapshot_builder, lsn, xid, nsubxacts, sub_xids);
+
+ for (i = 0; i < nsubxacts; i++)
+ {
+ ReorderBufferAbort(ctx->reorder, *sub_xids, lsn);
+ sub_xids++;
+ }
+
+ ReorderBufferAbort(ctx->reorder, xid, lsn);
+}
+
+/*
+ * Parse XLOG_HEAP_INSERT (not MULTI_INSERT!) records into tuplebufs.
+ *
+ * Deletes can contain the new tuple.
+ */
+static void
+DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+ XLogRecord *r = &buf->record;
+ xl_heap_insert *xlrec;
+ ReorderBufferChange *change;
+
+ xlrec = (xl_heap_insert *) buf->record_data;
+
+ /* only interested in our database */
+ if (xlrec->target.node.dbNode != ctx->slot->data.database)
+ return;
+
+ change = ReorderBufferGetChange(ctx->reorder);
+ change->action = REORDER_BUFFER_CHANGE_INSERT;
+ memcpy(&change->tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
+
+ if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
+ {
+ Assert(r->xl_len > (SizeOfHeapInsert + SizeOfHeapHeader));
+
+ change->tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
+
+ DecodeXLogTuple((char *) xlrec + SizeOfHeapInsert,
+ r->xl_len - SizeOfHeapInsert,
+ change->tp.newtuple);
+ }
+
+ ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
+}
+
+/*
+ * Parse XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE, which have the same layout
+ * in the record, from wal into proper tuplebufs.
+ *
+ * Updates can possibly contain a new tuple and the old primary key.
+ */
+static void
+DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+ XLogRecord *r = &buf->record;
+ xl_heap_update *xlrec;
+ xl_heap_header_len *xlhdr;
+ ReorderBufferChange *change;
+ char *data;
+
+ xlrec = (xl_heap_update *) buf->record_data;
+ xlhdr = (xl_heap_header_len *) (buf->record_data + SizeOfHeapUpdate);
+
+ /* only interested in our database */
+ if (xlrec->target.node.dbNode != ctx->slot->data.database)
+ return;
+
+ change = ReorderBufferGetChange(ctx->reorder);
+ change->action = REORDER_BUFFER_CHANGE_UPDATE;
+ memcpy(&change->tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
+
+ data = (char *) &xlhdr->header;
+
+ if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
+ {
+ Assert(r->xl_len > (SizeOfHeapUpdate + SizeOfHeapHeaderLen));
+
+ change->tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
+
+ DecodeXLogTuple(data,
+ xlhdr->t_len + SizeOfHeapHeader,
+ change->tp.newtuple);
+ /* skip over the rest of the tuple header */
+ data += SizeOfHeapHeader;
+ /* skip over the tuple data */
+ data += xlhdr->t_len;
+ }
+
+ if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
+ {
+ xlhdr = (xl_heap_header_len *) data;
+ change->tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
+ DecodeXLogTuple((char *) &xlhdr->header,
+ xlhdr->t_len + SizeOfHeapHeader,
+ change->tp.oldtuple);
+ data = (char *) &xlhdr->header;
+ data += SizeOfHeapHeader;
+ data += xlhdr->t_len;
+ }
+
+ ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
+}
+
+/*
+ * Parse XLOG_HEAP_DELETE from wal into proper tuplebufs.
+ *
+ * Deletes can possibly contain the old primary key.
+ */
+static void
+DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+ XLogRecord *r = &buf->record;
+ xl_heap_delete *xlrec;
+ ReorderBufferChange *change;
+
+ xlrec = (xl_heap_delete *) buf->record_data;
+
+ /* only interested in our database */
+ if (xlrec->target.node.dbNode != ctx->slot->data.database)
+ return;
+
+ change = ReorderBufferGetChange(ctx->reorder);
+ change->action = REORDER_BUFFER_CHANGE_DELETE;
+
+ memcpy(&change->tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
+
+ /* old primary key stored */
+ if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
+ {
+ Assert(r->xl_len > (SizeOfHeapDelete + SizeOfHeapHeader));
+
+ change->tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
+
+ DecodeXLogTuple((char *) xlrec + SizeOfHeapDelete,
+ r->xl_len - SizeOfHeapDelete,
+ change->tp.oldtuple);
+ }
+ ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change);
+}
+
+/*
+ * Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs.
+ *
+ * Currently MULTI_INSERT will always contain the full tuples.
+ */
+static void
+DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+ XLogRecord *r = &buf->record;
+ xl_heap_multi_insert *xlrec;
+ int i;
+ char *data;
+ bool isinit = (r->xl_info & XLOG_HEAP_INIT_PAGE) != 0;
+
+ xlrec = (xl_heap_multi_insert *) buf->record_data;
+
+ /* only interested in our database */
+ if (xlrec->node.dbNode != ctx->slot->data.database)
+ return;
+
+ data = buf->record_data + SizeOfHeapMultiInsert;
+
+ /*
+ * OffsetNumbers (which are not of interest to us) are stored when
+ * XLOG_HEAP_INIT_PAGE is not set -- skip over them.
+ */
+ if (!isinit)
+ data += sizeof(OffsetNumber) * xlrec->ntuples;
+
+ for (i = 0; i < xlrec->ntuples; i++)
+ {
+ ReorderBufferChange *change;
+ xl_multi_insert_tuple *xlhdr;
+ int datalen;
+ ReorderBufferTupleBuf *tuple;
+
+ change = ReorderBufferGetChange(ctx->reorder);
+ change->action = REORDER_BUFFER_CHANGE_INSERT;
+ memcpy(&change->tp.relnode, &xlrec->node, sizeof(RelFileNode));
+
+ /*
+ * CONTAINS_NEW_TUPLE will always be set currently as multi_insert
+ * isn't used for catalogs, but better be future proof.
+ *
+ * We decode the tuple in pretty much the same way as DecodeXLogTuple,
+ * but since the layout is slightly different, we can't use it here.
+ */
+ if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
+ {
+ change->tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
+
+ tuple = change->tp.newtuple;
+
+ /* not a disk based tuple */
+ ItemPointerSetInvalid(&tuple->tuple.t_self);
+
+ xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(data);
+ data = ((char *) xlhdr) + SizeOfMultiInsertTuple;
+ datalen = xlhdr->datalen;
+
+ /*
+ * We can only figure this out after reassembling the
+ * transactions.
+ */
+ tuple->tuple.t_tableOid = InvalidOid;
+ tuple->tuple.t_data = &tuple->header;
+ tuple->tuple.t_len = datalen
+ + offsetof(HeapTupleHeaderData, t_bits);
+
+ memset(&tuple->header, 0, sizeof(HeapTupleHeaderData));
+
+ memcpy((char *) &tuple->header
+ + offsetof(HeapTupleHeaderData, t_bits),
+ (char *) data,
+ datalen);
+ data += datalen;
+
+ tuple->header.t_infomask = xlhdr->t_infomask;
+ tuple->header.t_infomask2 = xlhdr->t_infomask2;
+ tuple->header.t_hoff = xlhdr->t_hoff;
+ }
+
+ ReorderBufferQueueChange(ctx->reorder, r->xl_xid,
+ buf->origptr, change);
+ }
+}
+
+/*
+ * Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete
+ * (but not by heap_multi_insert) into a tuplebuf.
+ *
+ * The size 'len' and the pointer 'data' in the record need to be
+ * computed outside as they are record specific.
+ */
+static void
+DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple)
+{
+ xl_heap_header xlhdr;
+ int datalen = len - SizeOfHeapHeader;
+
+ Assert(datalen >= 0);
+ Assert(datalen <= MaxHeapTupleSize);
+
+ tuple->tuple.t_len = datalen + offsetof(HeapTupleHeaderData, t_bits);
+
+ /* not a disk based tuple */
+ ItemPointerSetInvalid(&tuple->tuple.t_self);
+
+ /* we can only figure this out after reassembling the transactions */
+ tuple->tuple.t_tableOid = InvalidOid;
+ tuple->tuple.t_data = &tuple->header;
+
+ /* data is not stored aligned, copy to aligned storage */
+ memcpy((char *) &xlhdr,
+ data,
+ SizeOfHeapHeader);
+
+ memset(&tuple->header, 0, sizeof(HeapTupleHeaderData));
+
+ memcpy((char *) &tuple->header + offsetof(HeapTupleHeaderData, t_bits),
+ data + SizeOfHeapHeader,
+ datalen);
+
+ tuple->header.t_infomask = xlhdr.t_infomask;
+ tuple->header.t_infomask2 = xlhdr.t_infomask2;
+ tuple->header.t_hoff = xlhdr.t_hoff;
+}
--- /dev/null
+/*-------------------------------------------------------------------------
+ * logical.c
+ * PostgreSQL logical decoding coordination
+ *
+ * Copyright (c) 2012-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/replication/logical/logical.c
+ *
+ * NOTES
+ * This file coordinates interaction between the various modules that
+ * together providethe logical decoding, primarily by providing so
+ * called LogicalDecodingContexts. The goal is to encapsulate most of the
+ * internal complexity for consumers of logical decoding, so they can
+ * create and consume a changestream with a low amount of code.
+ *
+ * The idea is that a consumer provides three callbacks, one to read WAL,
+ * one to prepare a data write, and a final one for actually writing since
+ * their implementation depends on the type of consumer. Check
+ * logicalfunc.c for an example implementations of a fairly simple consumer
+ * and a implementation of a WAL reading callback that's suitable for
+ * simpler consumers.
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "miscadmin.h"
+
+#include "access/xact.h"
+
+#include "replication/decode.h"
+#include "replication/logical.h"
+#include "replication/reorderbuffer.h"
+#include "replication/snapbuild.h"
+
+#include "storage/proc.h"
+#include "storage/procarray.h"
+
+#include "utils/memutils.h"
+
+/* data for errcontext callback */
+typedef struct LogicalErrorCallbackState
+{
+ LogicalDecodingContext *ctx;
+ const char *callback_name;
+ XLogRecPtr report_location;
+} LogicalErrorCallbackState;
+
+/* wrappers around output plugin callbacks */
+static void output_plugin_error_callback(void *arg);
+static void startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
+ bool is_init);
+static void shutdown_cb_wrapper(LogicalDecodingContext *ctx);
+static void begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn);
+static void commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
+ XLogRecPtr commit_lsn);
+static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
+ Relation relation, ReorderBufferChange *change);
+
+static void LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin);
+
+/*
+ * Make sure the current settings & environment are capable of doing logical
+ * decoding.
+ */
+void
+CheckLogicalDecodingRequirements(void)
+{
+ CheckSlotRequirements();
+
+ if (wal_level < WAL_LEVEL_LOGICAL)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("logical decoding requires wal_level >= logical")));
+
+ if (MyDatabaseId == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("logical decoding requires a database connection")));
+
+ /* ----
+ * TODO: We got to change that someday soon...
+ *
+ * There's basically three things missing to allow this:
+ * 1) We need to be able to correctly and quickly identify the timeline a
+ * LSN belongs to
+ * 2) We need to force hot_standby_feedback to be enabled at all times so
+ * the primary cannot remove rows we need.
+ * 3) support dropping replication slots referring to a database, in
+ * dbase_redo. There can't be any active ones due to HS recovery
+ * conflicts, so that should be relatively easy.
+ * ----
+ */
+ if (RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("logical decoding cannot be used while in recovery")));
+}
+
+/*
+ * Helper function for CreateInitialDecodingContext() and
+ * CreateDecodingContext() performing common tasks.
+ */
+static LogicalDecodingContext *
+StartupDecodingContext(List *output_plugin_options,
+ XLogRecPtr start_lsn,
+ TransactionId xmin_horizon,
+ XLogPageReadCB read_page,
+ LogicalOutputPluginWriterPrepareWrite prepare_write,
+ LogicalOutputPluginWriterWrite do_write)
+{
+ ReplicationSlot *slot;
+ MemoryContext context, old_context;
+ LogicalDecodingContext *ctx;
+
+ /* shorter lines... */
+ slot = MyReplicationSlot;
+
+ context = AllocSetContextCreate(CurrentMemoryContext,
+ "Changeset Extraction Context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ old_context = MemoryContextSwitchTo(context);
+ ctx = palloc0(sizeof(LogicalDecodingContext));
+
+ ctx->context = context;
+
+ /* (re-)load output plugins, so we detect a bad (removed) output plugin now. */
+ LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
+
+ /*
+ * Now that the slot's xmin has been set, we can announce ourselves as a
+ * logical decoding backend which doesn't need to be checked individually
+ * when computing the xmin horizon because the xmin is enforced via
+ * replication slots.
+ */
+ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ MyPgXact->vacuumFlags |= PROC_IN_LOGICAL_DECODING;
+ LWLockRelease(ProcArrayLock);
+
+ ctx->slot = slot;
+
+ ctx->reader = XLogReaderAllocate(read_page, ctx);
+ ctx->reader->private_data = ctx;
+
+ ctx->reorder = ReorderBufferAllocate();
+ ctx->snapshot_builder =
+ AllocateSnapshotBuilder(ctx->reorder, xmin_horizon, start_lsn);
+
+ ctx->reorder->private_data = ctx;
+
+ /* wrap output plugin callbacks, so we can add error context information */
+ ctx->reorder->begin = begin_cb_wrapper;
+ ctx->reorder->apply_change = change_cb_wrapper;
+ ctx->reorder->commit = commit_cb_wrapper;
+
+ ctx->out = makeStringInfo();
+ ctx->prepare_write = prepare_write;
+ ctx->write = do_write;
+
+ ctx->output_plugin_options = output_plugin_options;
+
+ MemoryContextSwitchTo(old_context);
+
+ return ctx;
+}
+
+/*
+ * Create a new decoding context, for a new logical slot.
+ *
+ * plugin contains the name of the output plugin
+ * output_plugin_options contains options passed to the output plugin
+ * read_page, prepare_write, do_write are callbacks that have to be filled to
+ * perform the use-case dependent, actual, work.
+ *
+ * Needs to be called while in a memory context that's at least as long lived
+ * as the the decoding context because further memory contexts will be created
+ * inside it.
+ *
+ * Returns an initialized decoding context after calling the output plugin's
+ * startup function.
+ */
+LogicalDecodingContext *
+CreateInitDecodingContext(char *plugin,
+ List *output_plugin_options,
+ XLogPageReadCB read_page,
+ LogicalOutputPluginWriterPrepareWrite prepare_write,
+ LogicalOutputPluginWriterWrite do_write)
+{
+ TransactionId xmin_horizon = InvalidTransactionId;
+ ReplicationSlot *slot;
+ LogicalDecodingContext *ctx;
+ MemoryContext old_context;
+
+ /* shorter lines... */
+ slot = MyReplicationSlot;
+
+ /* first some sanity checks that are unlikely to be violated */
+ if (slot == NULL)
+ elog(ERROR, "cannot perform logical decoding without a acquired slot");
+
+ if (plugin == NULL)
+ elog(ERROR, "cannot initialize logical decoding without a specified plugin");
+
+ /* Make sure the passed slot is suitable. These are user facing errors. */
+ if (slot->data.database == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot use physical replication slot created for logical decoding")));
+
+ if (slot->data.database != MyDatabaseId)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("replication slot \"%s\" was not created in this database",
+ NameStr(slot->data.name))));
+
+ if (IsTransactionState() &&
+ GetTopTransactionIdIfAny() != InvalidTransactionId)
+ ereport(ERROR,
+ (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
+ errmsg("cannot create logical replication slot in transaction that has performed writes")));
+
+ /* register output plugin name with slot */
+ SpinLockAcquire(&slot->mutex);
+ strncpy(NameStr(slot->data.plugin), plugin,
+ NAMEDATALEN);
+ NameStr(slot->data.plugin)[NAMEDATALEN - 1] = '\0';
+ SpinLockRelease(&slot->mutex);
+
+ /*
+ * The replication slot mechanism is used to prevent removal of required
+ * WAL. As there is no interlock between this and checkpoints required WAL
+ * could be removed before ReplicationSlotsComputeRequiredLSN() has been
+ * called to prevent that. In the very unlikely case that this happens
+ * we'll just retry.
+ */
+ while (true)
+ {
+ XLogSegNo segno;
+
+ /*
+ * Let's start with enough information if we can, so log a standby
+ * snapshot and start decoding at exactly that position.
+ */
+ if (!RecoveryInProgress())
+ {
+ XLogRecPtr flushptr;
+
+ /* start at current insert position*/
+ slot->data.restart_lsn = GetXLogInsertRecPtr();
+
+ /* make sure we have enough information to start */
+ flushptr = LogStandbySnapshot();
+
+ /* and make sure it's fsynced to disk */
+ XLogFlush(flushptr);
+ }
+ else
+ slot->data.restart_lsn = GetRedoRecPtr();
+
+ /* prevent WAL removal as fast as possible */
+ ReplicationSlotsComputeRequiredLSN();
+
+ /*
+ * If all required WAL is still there, great, otherwise retry. The
+ * slot should prevent further removal of WAL, unless there's a
+ * concurrent ReplicationSlotsComputeRequiredLSN() after we've written
+ * the new restart_lsn above, so normally we should never need to loop
+ * more than twice.
+ */
+ XLByteToSeg(slot->data.restart_lsn, segno);
+ if (XLogGetLastRemovedSegno() < segno)
+ break;
+ }
+
+
+ /* ----
+ * This is a bit tricky: We need to determine a safe xmin horizon to start
+ * decoding from, to avoid starting from a running xacts record referring
+ * to xids whose rows have been vacuumed or pruned
+ * already. GetOldestSafeDecodingTransactionId() returns such a value, but
+ * without further interlock it's return value might immediately be out of
+ * date.
+ *
+ * So we have to acquire the ProcArrayLock to prevent computation of new
+ * xmin horizons by other backends, get the safe decoding xid, and inform
+ * the slot machinery about the new limit. Once that's done the
+ * ProcArrayLock can be be released as the slot machinery now is
+ * protecting against vacuum.
+ * ----
+ */
+ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+
+ slot->effective_catalog_xmin = GetOldestSafeDecodingTransactionId();
+ slot->data.catalog_xmin = slot->effective_catalog_xmin;
+
+ ReplicationSlotsComputeRequiredXmin(true);
+
+ LWLockRelease(ProcArrayLock);
+
+ /*
+ * tell the snapshot builder to only assemble snapshot once reaching
+ * the a running_xact's record with the respective xmin.
+ */
+ xmin_horizon = slot->data.catalog_xmin;
+
+ ReplicationSlotMarkDirty();
+ ReplicationSlotSave();
+
+ ctx = StartupDecodingContext(NIL, InvalidXLogRecPtr, xmin_horizon,
+ read_page, prepare_write, do_write);
+
+ /* call output plugin initialization callback */
+ old_context = MemoryContextSwitchTo(ctx->context);
+ if (ctx->callbacks.startup_cb != NULL)
+ startup_cb_wrapper(ctx, &ctx->options, true);
+ MemoryContextSwitchTo(old_context);
+
+ return ctx;
+}
+
+/*
+ * Create a new decoding context, for a logical slot that has previously been
+ * used already.
+ *
+ * start_lsn contains the LSN of the last received data or InvalidXLogRecPtr
+ * output_plugin_options contains options passed to the output plugin
+ * read_page, prepare_write, do_write are callbacks that have to be filled to
+ * perform the use-case dependent, actual, work.
+ *
+ * Needs to be called while in a memory context that's at least as long lived
+ * as the the decoding context because further memory contexts will be created
+ * inside it.
+ *
+ * Returns an initialized decoding context after calling the output plugin's
+ * startup function.
+ */
+LogicalDecodingContext *
+CreateDecodingContext(XLogRecPtr start_lsn,
+ List *output_plugin_options,
+ XLogPageReadCB read_page,
+ LogicalOutputPluginWriterPrepareWrite prepare_write,
+ LogicalOutputPluginWriterWrite do_write)
+{
+ LogicalDecodingContext *ctx;
+ ReplicationSlot *slot;
+ MemoryContext old_context;
+
+ /* shorter lines... */
+ slot = MyReplicationSlot;
+
+ /* first some sanity checks that are unlikely to be violated */
+ if (slot == NULL)
+ elog(ERROR, "cannot perform logical decoding without a acquired slot");
+
+ /* make sure the passed slot is suitable, these are user facing errors */
+ if (slot->data.database == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ (errmsg("cannot use physical replication slot for logical decoding"))));
+
+ if (slot->data.database != MyDatabaseId)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ (errmsg("replication slot \"%s\" was not created in this database",
+ NameStr(slot->data.name)))));
+
+ if (start_lsn == InvalidXLogRecPtr)
+ {
+ /* continue from last position */
+ start_lsn = slot->data.confirmed_flush;
+ }
+ else if (start_lsn < slot->data.confirmed_flush)
+ {
+ /*
+ * It might seem like we should error out in this case, but it's
+ * pretty common for a client to acknowledge a LSN it doesn't have to
+ * do anything for, and thus didn't store persistently, because the
+ * xlog records didn't result in anything relevant for logical
+ * decoding. Clients have to be able to do that to support
+ * synchronous replication.
+ */
+ start_lsn = slot->data.confirmed_flush;
+ elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
+ (uint32)(start_lsn >> 32), (uint32)start_lsn,
+ (uint32)(slot->data.confirmed_flush >> 32),
+ (uint32)slot->data.confirmed_flush);
+ }
+
+ ctx = StartupDecodingContext(output_plugin_options,
+ start_lsn, InvalidTransactionId,
+ read_page, prepare_write, do_write);
+
+ /* call output plugin initialization callback */
+ old_context = MemoryContextSwitchTo(ctx->context);
+ if (ctx->callbacks.startup_cb != NULL)
+ startup_cb_wrapper(ctx, &ctx->options, true);
+ MemoryContextSwitchTo(old_context);
+
+ ereport(LOG,
+ (errmsg("starting logical decoding for slot %s",
+ NameStr(slot->data.name)),
+ errdetail("streaming transactions committing after %X/%X, reading WAL from %X/%X",
+ (uint32)(slot->data.confirmed_flush >> 32),
+ (uint32)slot->data.confirmed_flush,
+ (uint32)(slot->data.restart_lsn >> 32),
+ (uint32)slot->data.restart_lsn)));
+
+ return ctx;
+}
+
+/*
+ * Returns true if an consistent initial decoding snapshot has been built.
+ */
+bool
+DecodingContextReady(LogicalDecodingContext *ctx)
+{
+ return SnapBuildCurrentState(ctx->snapshot_builder) == SNAPBUILD_CONSISTENT;
+}
+
+/*
+ * Read from the decoding slot, until it is ready to start extracting changes.
+ */
+void
+DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
+{
+ XLogRecPtr startptr;
+
+ /* Initialize from where to start reading WAL. */
+ startptr = ctx->slot->data.restart_lsn;
+
+ elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
+ (uint32)(ctx->slot->data.restart_lsn >> 32),
+ (uint32)ctx->slot->data.restart_lsn);
+
+ /* Wait for a consistent starting point */
+ for (;;)
+ {
+ XLogRecord *record;
+ char *err = NULL;
+
+ /*
+ * If the caller requires that interrupts be checked, the read_page
+ * callback should do so, as those will often wait.
+ */
+
+ /* the read_page callback waits for new WAL */
+ record = XLogReadRecord(ctx->reader, startptr, &err);
+ if (err)
+ elog(ERROR, "%s", err);
+
+ Assert(record);
+
+ startptr = InvalidXLogRecPtr;
+
+ LogicalDecodingProcessRecord(ctx, record);
+
+ /* only continue till we found a consistent spot */
+ if (DecodingContextReady(ctx))
+ break;
+ }
+
+ ctx->slot->data.confirmed_flush = ctx->reader->EndRecPtr;
+}
+
+/*
+ * Free a previously allocated decoding context, invoking the shutdown
+ * callback if necessary.
+ */
+void
+FreeDecodingContext(LogicalDecodingContext *ctx)
+{
+ if (ctx->callbacks.shutdown_cb != NULL)
+ shutdown_cb_wrapper(ctx);
+
+ ReorderBufferFree(ctx->reorder);
+ FreeSnapshotBuilder(ctx->snapshot_builder);
+ XLogReaderFree(ctx->reader);
+ MemoryContextDelete(ctx->context);
+}
+
+/*
+ * Prepare a write using the context's output routine.
+ */
+void
+OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write)
+{
+ if (!ctx->accept_writes)
+ elog(ERROR, "writes are only accepted in commit, begin and change callbacks");
+
+ ctx->prepare_write(ctx, ctx->write_location, ctx->write_xid, last_write);
+ ctx->prepared_write = true;
+}
+
+/*
+ * Perform a write using the context's output routine.
+ */
+void
+OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write)
+{
+ if (!ctx->prepared_write)
+ elog(ERROR, "OutputPluginPrepareWrite needs to be called before OutputPluginWrite");
+
+ ctx->write(ctx, ctx->write_location, ctx->write_xid, last_write);
+ ctx->prepared_write = false;
+}
+
+/*
+ * Load the output plugin, lookup its output plugin init function, and check
+ * that it provides the required callbacks.
+ */
+static void
+LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin)
+{
+ LogicalOutputPluginInit plugin_init;
+
+ plugin_init = (LogicalOutputPluginInit)
+ load_external_function(plugin, "_PG_output_plugin_init", false, NULL);
+
+ if (plugin_init == NULL)
+ elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
+
+ /* ask the output plugin to fill the callback struct */
+ plugin_init(callbacks);
+
+ if (callbacks->begin_cb == NULL)
+ elog(ERROR, "output plugins have to register a begin callback");
+ if (callbacks->change_cb == NULL)
+ elog(ERROR, "output plugins have to register a change callback");
+ if (callbacks->commit_cb == NULL)
+ elog(ERROR, "output plugins have to register a commit callback");
+}
+
+static void
+output_plugin_error_callback(void *arg)
+{
+ LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
+ /* not all callbacks have an associated LSN */
+ if (state->report_location != InvalidXLogRecPtr)
+ errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
+ NameStr(state->ctx->slot->data.name),
+ NameStr(state->ctx->slot->data.plugin),
+ state->callback_name,
+ (uint32)(state->report_location >> 32),
+ (uint32)state->report_location);
+ else
+ errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
+ NameStr(state->ctx->slot->data.name),
+ NameStr(state->ctx->slot->data.plugin),
+ state->callback_name);
+}
+
+static void
+startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init)
+{
+ LogicalErrorCallbackState state;
+ ErrorContextCallback errcallback;
+
+ /* Push callback + info on the error context stack */
+ state.ctx = ctx;
+ state.callback_name = "startup";
+ state.report_location = InvalidXLogRecPtr;
+ errcallback.callback = output_plugin_error_callback;
+ errcallback.arg = (void *) &state;
+ errcallback.previous = error_context_stack;
+ error_context_stack = &errcallback;
+
+ /* set output state */
+ ctx->accept_writes = false;
+
+ /* do the actual work: call callback */
+ ctx->callbacks.startup_cb(ctx, opt, is_init);
+
+ /* Pop the error context stack */
+ error_context_stack = errcallback.previous;
+}
+
+static void
+shutdown_cb_wrapper(LogicalDecodingContext *ctx)
+{
+ LogicalErrorCallbackState state;
+ ErrorContextCallback errcallback;
+
+ /* Push callback + info on the error context stack */
+ state.ctx = ctx;
+ state.callback_name = "shutdown";
+ state.report_location = InvalidXLogRecPtr;
+ errcallback.callback = output_plugin_error_callback;
+ errcallback.arg = (void *) &state;
+ errcallback.previous = error_context_stack;
+ error_context_stack = &errcallback;
+
+ /* set output state */
+ ctx->accept_writes = false;
+
+ /* do the actual work: call callback */
+ ctx->callbacks.shutdown_cb(ctx);
+
+ /* Pop the error context stack */
+ error_context_stack = errcallback.previous;
+}
+
+
+/*
+ * Callbacks for ReorderBuffer which add in some more information and then call
+ * output_plugin.h plugins.
+ */
+static void
+begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn)
+{
+ LogicalDecodingContext *ctx = cache->private_data;
+ LogicalErrorCallbackState state;
+ ErrorContextCallback errcallback;
+
+ /* Push callback + info on the error context stack */
+ state.ctx = ctx;
+ state.callback_name = "begin";
+ state.report_location = txn->first_lsn;
+ errcallback.callback = output_plugin_error_callback;
+ errcallback.arg = (void *) &state;
+ errcallback.previous = error_context_stack;
+ error_context_stack = &errcallback;
+
+ /* set output state */
+ ctx->accept_writes = true;
+ ctx->write_xid = txn->xid;
+ ctx->write_location = txn->first_lsn;
+
+ /* do the actual work: call callback */
+ ctx->callbacks.begin_cb(ctx, txn);
+
+ /* Pop the error context stack */
+ error_context_stack = errcallback.previous;
+}
+
+static void
+commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
+ XLogRecPtr commit_lsn)
+{
+ LogicalDecodingContext *ctx = cache->private_data;
+ LogicalErrorCallbackState state;
+ ErrorContextCallback errcallback;
+
+ /* Push callback + info on the error context stack */
+ state.ctx = ctx;
+ state.callback_name = "commit";
+ state.report_location = txn->final_lsn; /* beginning of commit record */
+ errcallback.callback = output_plugin_error_callback;
+ errcallback.arg = (void *) &state;
+ errcallback.previous = error_context_stack;
+ error_context_stack = &errcallback;
+
+ /* set output state */
+ ctx->accept_writes = true;
+ ctx->write_xid = txn->xid;
+ ctx->write_location = txn->end_lsn; /* points to the end of the record */
+
+ /* do the actual work: call callback */
+ ctx->callbacks.commit_cb(ctx, txn, commit_lsn);
+
+ /* Pop the error context stack */
+ error_context_stack = errcallback.previous;
+}
+
+static void
+change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
+ Relation relation, ReorderBufferChange *change)
+{
+ LogicalDecodingContext *ctx = cache->private_data;
+ LogicalErrorCallbackState state;
+ ErrorContextCallback errcallback;
+
+ /* Push callback + info on the error context stack */
+ state.ctx = ctx;
+ state.callback_name = "change";
+ state.report_location = change->lsn;
+ errcallback.callback = output_plugin_error_callback;
+ errcallback.arg = (void *) &state;
+ errcallback.previous = error_context_stack;
+ error_context_stack = &errcallback;
+
+ /* set output state */
+ ctx->accept_writes = true;
+ ctx->write_xid = txn->xid;
+ /*
+ * report this change's lsn so replies from clients can give an up2date
+ * answer. This won't ever be enough (and shouldn't be!) to confirm
+ * receipt of this transaction, but it might allow another transaction's
+ * commit to be confirmed with one message.
+ */
+ ctx->write_location = change->lsn;
+
+ ctx->callbacks.change_cb(ctx, txn, relation, change);
+
+ /* Pop the error context stack */
+ error_context_stack = errcallback.previous;
+}
+
+/*
+ * Set the required catalog xmin horizon for historic snapshots in the current
+ * replication slot.
+ *
+ * Note that in the most cases, we won't be able to immediately use the xmin
+ * to increase the xmin horizon, we need to wait till the client has confirmed
+ * receiving current_lsn with LogicalConfirmReceivedLocation().
+ */
+void
+LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
+{
+ bool updated_xmin = false;
+ ReplicationSlot *slot;
+
+ slot = MyReplicationSlot;
+
+ Assert(slot != NULL);
+
+ SpinLockAcquire(&slot->mutex);
+
+ /*
+ * don't overwrite if we already have a newer xmin. This can
+ * happen if we restart decoding in a slot.
+ */
+ if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
+ {
+ }
+ /*
+ * If the client has already confirmed up to this lsn, we directly
+ * can mark this as accepted. This can happen if we restart
+ * decoding in a slot.
+ */
+ else if (current_lsn <= slot->data.confirmed_flush)
+ {
+ slot->candidate_catalog_xmin = xmin;
+ slot->candidate_xmin_lsn = current_lsn;
+
+ /* our candidate can directly be used */
+ updated_xmin = true;
+ }
+ /*
+ * Only increase if the previous values have been applied, otherwise we
+ * might never end up updating if the receiver acks too slowly.
+ */
+ else if (slot->candidate_xmin_lsn == InvalidXLogRecPtr)
+ {
+ slot->candidate_catalog_xmin = xmin;
+ slot->candidate_xmin_lsn = current_lsn;
+ }
+ SpinLockRelease(&slot->mutex);
+
+ /* candidate already valid with the current flush position, apply */
+ if (updated_xmin)
+ LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
+}
+
+/*
+ * Mark the minimal LSN (restart_lsn) we need to read to replay all
+ * transactions that have not yet committed at current_lsn.
+ *
+ * Just like IncreaseRestartDecodingForSlot this nly takes effect when the
+ * client has confirmed to have received current_lsn.
+ */
+void
+LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
+{
+ bool updated_lsn = false;
+ ReplicationSlot *slot;
+
+ slot = MyReplicationSlot;
+
+ Assert(slot != NULL);
+ Assert(restart_lsn != InvalidXLogRecPtr);
+ Assert(current_lsn != InvalidXLogRecPtr);
+
+ SpinLockAcquire(&slot->mutex);
+
+ /* don't overwrite if have a newer restart lsn*/
+ if (restart_lsn <= slot->data.restart_lsn)
+ {
+ }
+ /*
+ * We might have already flushed far enough to directly accept this lsn, in
+ * this case there is no need to check for existing candidate LSNs
+ */
+ else if (current_lsn <= slot->data.confirmed_flush)
+ {
+ slot->candidate_restart_valid = current_lsn;
+ slot->candidate_restart_lsn = restart_lsn;
+
+ /* our candidate can directly be used */
+ updated_lsn = true;
+ }
+ /*
+ * Only increase if the previous values have been applied, otherwise we
+ * might never end up updating if the receiver acks too slowly. A missed
+ * value here will just cause some extra effort after reconnecting.
+ */
+ if (slot->candidate_restart_valid == InvalidXLogRecPtr)
+ {
+ slot->candidate_restart_valid = current_lsn;
+ slot->candidate_restart_lsn = restart_lsn;
+
+ elog(DEBUG1, "got new restart lsn %X/%X at %X/%X",
+ (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
+ (uint32) (current_lsn >> 32), (uint32) current_lsn);
+ }
+ else
+ {
+ elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X",
+ (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
+ (uint32) (current_lsn >> 32), (uint32) current_lsn,
+ (uint32) (slot->candidate_restart_lsn >> 32),
+ (uint32) slot->candidate_restart_lsn,
+ (uint32) (slot->candidate_restart_valid >> 32),
+ (uint32) slot->candidate_restart_valid,
+ (uint32) (slot->data.confirmed_flush >> 32),
+ (uint32) slot->data.confirmed_flush
+ );
+ }
+ SpinLockRelease(&slot->mutex);
+
+ /* candidates are already valid with the current flush position, apply */
+ if (updated_lsn)
+ LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
+}
+
+/*
+ * Handle a consumer's conformation having received all changes up to lsn.
+ */
+void
+LogicalConfirmReceivedLocation(XLogRecPtr lsn)
+{
+ Assert(lsn != InvalidXLogRecPtr);
+
+ /* Do an unlocked check for candidate_lsn first. */
+ if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr ||
+ MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr)
+ {
+ bool updated_xmin = false;
+ bool updated_restart = false;
+
+ /* use volatile pointer to prevent code rearrangement */
+ volatile ReplicationSlot *slot = MyReplicationSlot;
+
+ SpinLockAcquire(&slot->mutex);
+
+ slot->data.confirmed_flush = lsn;
+
+ /* if were past the location required for bumping xmin, do so */
+ if (slot->candidate_xmin_lsn != InvalidXLogRecPtr &&
+ slot->candidate_xmin_lsn <= lsn)
+ {
+ /*
+ * We have to write the changed xmin to disk *before* we change
+ * the in-memory value, otherwise after a crash we wouldn't know
+ * that some catalog tuples might have been removed already.
+ *
+ * Ensure that by first writing to ->xmin and only update
+ * ->effective_xmin once the new state is synced to disk. After a
+ * crash ->effective_xmin is set to ->xmin.
+ */
+ if (TransactionIdIsValid(slot->candidate_catalog_xmin) &&
+ slot->data.catalog_xmin != slot->candidate_catalog_xmin)
+ {
+ slot->data.catalog_xmin = slot->candidate_catalog_xmin;
+ slot->candidate_catalog_xmin = InvalidTransactionId;
+ slot->candidate_xmin_lsn = InvalidXLogRecPtr;
+ updated_xmin = true;
+ }
+ }
+
+ if (slot->candidate_restart_valid != InvalidXLogRecPtr &&
+ slot->candidate_restart_valid <= lsn)
+ {
+ Assert(slot->candidate_restart_lsn != InvalidXLogRecPtr);
+
+ slot->data.restart_lsn = slot->candidate_restart_lsn;
+ slot->candidate_restart_lsn = InvalidXLogRecPtr;
+ slot->candidate_restart_valid = InvalidXLogRecPtr;
+ updated_restart = true;
+ }
+
+ SpinLockRelease(&slot->mutex);
+
+ /* first write new xmin to disk, so we know whats up after a crash */
+ if (updated_xmin || updated_restart)
+ {
+ ReplicationSlotMarkDirty();
+ ReplicationSlotSave();
+ elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
+ }
+ /*
+ * Now the new xmin is safely on disk, we can let the global value
+ * advance. We do not take ProcArrayLock or similar since we only
+ * advance xmin here and there's not much harm done by a concurrent
+ * computation missing that.
+ */
+ if (updated_xmin)
+ {
+ SpinLockAcquire(&slot->mutex);
+ slot->effective_catalog_xmin = slot->data.catalog_xmin;
+ SpinLockRelease(&slot->mutex);
+
+ ReplicationSlotsComputeRequiredXmin(false);
+ ReplicationSlotsComputeRequiredLSN();
+ }
+ }
+ else
+ {
+ volatile ReplicationSlot *slot = MyReplicationSlot;
+
+ SpinLockAcquire(&slot->mutex);
+ slot->data.confirmed_flush = lsn;
+ SpinLockRelease(&slot->mutex);
+ }
+}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * logicalfuncs.c
+ *
+ * Support functions for using logical decoding and managemnt of
+ * logical replication slots via SQL.
+ *
+ *
+ * Copyright (c) 2012-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/replication/logicalfuncs.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <unistd.h>
+
+#include "fmgr.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+
+#include "catalog/pg_type.h"
+
+#include "nodes/makefuncs.h"
+
+#include "mb/pg_wchar.h"
+
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/inval.h"
+#include "utils/memutils.h"
+#include "utils/pg_lsn.h"
+#include "utils/resowner.h"
+#include "utils/lsyscache.h"
+
+#include "replication/decode.h"
+#include "replication/logical.h"
+#include "replication/logicalfuncs.h"
+
+#include "storage/fd.h"
+
+/* private date for writing out data */
+typedef struct DecodingOutputState {
+ Tuplestorestate *tupstore;
+ TupleDesc tupdesc;
+ bool binary_output;
+ int64 returned_rows;
+} DecodingOutputState;
+
+/*
+ * Prepare for a output plugin write.
+ */
+static void
+LogicalOutputPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
+ bool last_write)
+{
+ resetStringInfo(ctx->out);
+}
+
+/*
+ * Perform output plugin write into tuplestore.
+ */
+static void
+LogicalOutputWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
+ bool last_write)
+{
+ Datum values[3];
+ bool nulls[3];
+ DecodingOutputState *p;
+
+ /* SQL Datums can only be of a limited length... */
+ if (ctx->out->len > MaxAllocSize - VARHDRSZ)
+ elog(ERROR, "too much output for sql interface");
+
+ p = (DecodingOutputState *) ctx->output_writer_private;
+
+ memset(nulls, 0, sizeof(nulls));
+ values[0] = LSNGetDatum(lsn);
+ values[1] = TransactionIdGetDatum(xid);
+
+ /*
+ * Assert ctx->out is in database encoding when we're writing textual
+ * output.
+ */
+ if (!p->binary_output)
+ Assert(pg_verify_mbstr(GetDatabaseEncoding(),
+ ctx->out->data, ctx->out->len,
+ false));
+
+ /* ick, but cstring_to_text_with_len works for bytea perfectly fine */
+ values[2] = PointerGetDatum(
+ cstring_to_text_with_len(ctx->out->data, ctx->out->len));
+
+ tuplestore_putvalues(p->tupstore, p->tupdesc, values, nulls);
+ p->returned_rows++;
+}
+
+/*
+ * TODO: This is duplicate code with pg_xlogdump, similar to walsender.c, but
+ * we currently don't have the infrastructure (elog!) to share it.
+ */
+static void
+XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
+{
+ char *p;
+ XLogRecPtr recptr;
+ Size nbytes;
+
+ static int sendFile = -1;
+ static XLogSegNo sendSegNo = 0;
+ static uint32 sendOff = 0;
+
+ p = buf;
+ recptr = startptr;
+ nbytes = count;
+
+ while (nbytes > 0)
+ {
+ uint32 startoff;
+ int segbytes;
+ int readbytes;
+
+ startoff = recptr % XLogSegSize;
+
+ if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo))
+ {
+ char path[MAXPGPATH];
+
+ /* Switch to another logfile segment */
+ if (sendFile >= 0)
+ close(sendFile);
+
+ XLByteToSeg(recptr, sendSegNo);
+
+ XLogFilePath(path, tli, sendSegNo);
+
+ sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
+
+ if (sendFile < 0)
+ {
+ if (errno == ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("requested WAL segment %s has already been removed",
+ path)));
+ else
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m",
+ path)));
+ }
+ sendOff = 0;
+ }
+
+ /* Need to seek in the file? */
+ if (sendOff != startoff)
+ {
+ if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
+ {
+ char path[MAXPGPATH];
+
+ XLogFilePath(path, tli, sendSegNo);
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not seek in log segment %s to offset %u: %m",
+ path, startoff)));
+ }
+ sendOff = startoff;
+ }
+
+ /* How many bytes are within this segment? */
+ if (nbytes > (XLogSegSize - startoff))
+ segbytes = XLogSegSize - startoff;
+ else
+ segbytes = nbytes;
+
+ readbytes = read(sendFile, p, segbytes);
+ if (readbytes <= 0)
+ {
+ char path[MAXPGPATH];
+
+ XLogFilePath(path, tli, sendSegNo);
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read from log segment %s, offset %u, length %lu: %m",
+ path, sendOff, (unsigned long) segbytes)));
+ }
+
+ /* Update state for read */
+ recptr += readbytes;
+
+ sendOff += readbytes;
+ nbytes -= readbytes;
+ p += readbytes;
+ }
+}
+
+static void
+check_permissions(void)
+{
+ if (!superuser() && !has_rolreplication(GetUserId()))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser or replication role to use replication slots"))));
+}
+
+/*
+ * read_page callback for logical decoding contexts.
+ *
+ * Public because it would likely be very helpful for someone writing another
+ * output method outside walsender, e.g. in a bgworker.
+ *
+ * TODO: The walsender has it's own version of this, but it relies on the
+ * walsender's latch being set whenever WAL is flushed. No such infrastructure
+ * exists for normal backends, so we have to do a check/sleep/repeat style of
+ * loop for now.
+ */
+int
+logical_read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
+ int reqLen, XLogRecPtr targetRecPtr, char *cur_page, TimeLineID *pageTLI)
+{
+ XLogRecPtr flushptr,
+ loc;
+ int count;
+
+ loc = targetPagePtr + reqLen;
+ while (1)
+ {
+ /*
+ * TODO: we're going to have to do something more intelligent about
+ * timelines on standbys. Use readTimeLineHistory() and
+ * tliOfPointInHistory() to get the proper LSN? For now we'll catch
+ * that case earlier, but the code and TODO is left in here for when
+ * that changes.
+ */
+ if (!RecoveryInProgress())
+ {
+ *pageTLI = ThisTimeLineID;
+ flushptr = GetFlushRecPtr();
+ }
+ else
+ flushptr = GetXLogReplayRecPtr(pageTLI);
+
+ if (loc <= flushptr)
+ break;
+
+ CHECK_FOR_INTERRUPTS();
+ pg_usleep(1000L);
+ }
+
+ /* more than one block available */
+ if (targetPagePtr + XLOG_BLCKSZ <= flushptr)
+ count = XLOG_BLCKSZ;
+ /* not enough data there */
+ else if (targetPagePtr + reqLen > flushptr)
+ return -1;
+ /* part of the page available */
+ else
+ count = flushptr - targetPagePtr;
+
+ XLogRead(cur_page, *pageTLI, targetPagePtr, XLOG_BLCKSZ);
+
+ return count;
+}
+
+/*
+ * Helper function for the various SQL callable logical decoding functions.
+ */
+static Datum
+pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool binary)
+{
+ Name name = PG_GETARG_NAME(0);
+ XLogRecPtr upto_lsn;
+ int32 upto_nchanges;
+
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ MemoryContext per_query_ctx;
+ MemoryContext oldcontext;
+
+ XLogRecPtr end_of_wal;
+ XLogRecPtr startptr;
+
+ LogicalDecodingContext *ctx;
+
+ ResourceOwner old_resowner = CurrentResourceOwner;
+ ArrayType *arr;
+ Size ndim;
+ List *options = NIL;
+ DecodingOutputState *p;
+
+ if (PG_ARGISNULL(1))
+ upto_lsn = InvalidXLogRecPtr;
+ else
+ upto_lsn = PG_GETARG_LSN(1);
+
+ if (PG_ARGISNULL(2))
+ upto_nchanges = InvalidXLogRecPtr;
+ else
+ upto_nchanges = PG_GETARG_INT32(2);
+
+ /* check to see if caller supports us returning a tuplestore */
+ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("set-valued function called in context that cannot accept a set")));
+ if (!(rsinfo->allowedModes & SFRM_Materialize))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("materialize mode required, but it is not allowed in this context")));
+
+ /* state to write output to */
+ p = palloc0(sizeof(DecodingOutputState));
+
+ p->binary_output = binary;
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &p->tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ check_permissions();
+
+ CheckLogicalDecodingRequirements();
+
+ arr = PG_GETARG_ARRAYTYPE_P(3);
+ ndim = ARR_NDIM(arr);
+
+ per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+ oldcontext = MemoryContextSwitchTo(per_query_ctx);
+
+ if (ndim > 1)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("array must be one-dimensional")));
+ }
+ else if (array_contains_nulls(arr))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("array must not contain nulls")));
+ }
+ else if (ndim == 1)
+ {
+ int nelems;
+ Datum *datum_opts;
+ int i;
+
+ Assert(ARR_ELEMTYPE(arr) == TEXTOID);
+
+ deconstruct_array(arr, TEXTOID, -1, false, 'i',
+ &datum_opts, NULL, &nelems);
+
+ if (nelems % 2 != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("array must have even number of elements")));
+
+ for (i = 0; i < nelems; i += 2)
+ {
+ char *name = TextDatumGetCString(datum_opts[i]);
+ char *opt = TextDatumGetCString(datum_opts[i + 1]);
+
+ options = lappend(options, makeDefElem(name, (Node *) makeString(opt)));
+ }
+ }
+
+ p->tupstore = tuplestore_begin_heap(true, false, work_mem);
+ rsinfo->returnMode = SFRM_Materialize;
+ rsinfo->setResult = p->tupstore;
+ rsinfo->setDesc = p->tupdesc;
+
+ /* compute the current end-of-wal */
+ if (!RecoveryInProgress())
+ end_of_wal = GetFlushRecPtr();
+ else
+ end_of_wal = GetXLogReplayRecPtr(NULL);
+
+ CheckLogicalDecodingRequirements();
+ ReplicationSlotAcquire(NameStr(*name));
+
+ PG_TRY();
+ {
+ ctx = CreateDecodingContext(InvalidXLogRecPtr,
+ options,
+ logical_read_local_xlog_page,
+ LogicalOutputPrepareWrite,
+ LogicalOutputWrite);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ /*
+ * Check whether the output pluggin writes textual output if that's
+ * what we need.
+ */
+ if (!binary &&
+ ctx->options.output_type != OUTPUT_PLUGIN_TEXTUAL_OUTPUT)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("output plugin cannot produce text output")));
+
+ ctx->output_writer_private = p;
+
+ startptr = MyReplicationSlot->data.restart_lsn;
+
+ CurrentResourceOwner = ResourceOwnerCreate(CurrentResourceOwner, "logical decoding");
+
+ /* invalidate non-timetravel entries */
+ InvalidateSystemCaches();
+
+ while ((startptr != InvalidXLogRecPtr && startptr < end_of_wal) ||
+ (ctx->reader->EndRecPtr && ctx->reader->EndRecPtr < end_of_wal))
+ {
+ XLogRecord *record;
+ char *errm = NULL;
+
+ record = XLogReadRecord(ctx->reader, startptr, &errm);
+ if (errm)
+ elog(ERROR, "%s", errm);
+
+ startptr = InvalidXLogRecPtr;
+
+ /*
+ * The {begin_txn,change,commit_txn}_wrapper callbacks above will
+ * store the description into our tuplestore.
+ */
+ if (record != NULL)
+ LogicalDecodingProcessRecord(ctx, record);
+
+ /* check limits */
+ if (upto_lsn != InvalidXLogRecPtr &&
+ upto_lsn <= ctx->reader->EndRecPtr)
+ break;
+ if (upto_nchanges != 0 &&
+ upto_nchanges <= p->returned_rows)
+ break;
+ }
+ }
+ PG_CATCH();
+ {
+ /* clear all timetravel entries */
+ InvalidateSystemCaches();
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ tuplestore_donestoring(tupstore);
+
+ CurrentResourceOwner = old_resowner;
+
+ /*
+ * Next time, start where we left off. (Hunting things, the family
+ * business..)
+ */
+ if (ctx->reader->EndRecPtr != InvalidXLogRecPtr && confirm)
+ LogicalConfirmReceivedLocation(ctx->reader->EndRecPtr);
+
+ /* free context, call shutdown callback */
+ FreeDecodingContext(ctx);
+
+ ReplicationSlotRelease();
+ InvalidateSystemCaches();
+
+ return (Datum) 0;
+}
+
+/*
+ * SQL function returning the changestream as text, consuming the data.
+ */
+Datum
+pg_logical_slot_get_changes(PG_FUNCTION_ARGS)
+{
+ Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, false);
+ return ret;
+}
+
+/*
+ * SQL function returning the changestream as text, only peeking ahead.
+ */
+Datum
+pg_logical_slot_peek_changes(PG_FUNCTION_ARGS)
+{
+ Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, false);
+ return ret;
+}
+
+/*
+ * SQL function returning the changestream in binary, consuming the data.
+ */
+Datum
+pg_logical_slot_get_binary_changes(PG_FUNCTION_ARGS)
+{
+ Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, true);
+ return ret;
+}
+
+/*
+ * SQL function returning the changestream in binary, only peeking ahead.
+ */
+Datum
+pg_logical_slot_peek_binary_changes(PG_FUNCTION_ARGS)
+{
+ Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, true);
+ return ret;
+}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * reorderbuffer.c
+ * PostgreSQL logical replay/reorder buffer management
+ *
+ *
+ * Copyright (c) 2012-2014, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/replication/reorderbuffer.c
+ *
+ * NOTES
+ * This module gets handed individual pieces of transactions in the order
+ * they are written to the WAL and is responsible to reassemble them into
+ * toplevel transaction sized pieces. When a transaction is completely
+ * reassembled - signalled by reading the transaction commit record - it
+ * will then call the output plugin (c.f. ReorderBufferCommit()) with the
+ * individual changes. The output plugins rely on snapshots built by
+ * snapbuild.c which hands them to us.
+ *
+ * Transactions and subtransactions/savepoints in postgres are not
+ * immediately linked to each other from outside the performing
+ * backend. Only at commit/abort (or special xact_assignment records) they
+ * are linked together. Which means that we will have to splice together a
+ * toplevel transaction from its subtransactions. To do that efficiently we
+ * build a binary heap indexed by the smallest current lsn of the individual
+ * subtransactions' changestreams. As the individual streams are inherently
+ * ordered by LSN - since that is where we build them from - the transaction
+ * can easily be reassembled by always using the subtransaction with the
+ * smallest current LSN from the heap.
+ *
+ * In order to cope with large transactions - which can be several times as
+ * big as the available memory - this module supports spooling the contents
+ * of a large transactions to disk. When the transaction is replayed the
+ * contents of individual (sub-)transactions will be read from disk in
+ * chunks.
+ *
+ * This module also has to deal with reassembling toast records from the
+ * individual chunks stored in WAL. When a new (or initial) version of a
+ * tuple is stored in WAL it will always be preceded by the toast chunks
+ * emitted for the columns stored out of line. Within a single toplevel
+ * transaction there will be no other data carrying records between a row's
+ * toast chunks and the row data itself. See ReorderBufferToast* for
+ * details.
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "miscadmin.h"
+
+#include "access/rewriteheap.h"
+#include "access/transam.h"
+#include "access/tuptoaster.h"
+#include "access/xact.h"
+
+#include "catalog/catalog.h"
+
+#include "common/relpath.h"
+
+#include "lib/binaryheap.h"
+
+#include "replication/logical.h"
+#include "replication/reorderbuffer.h"
+#include "replication/slot.h"
+#include "replication/snapbuild.h" /* just for SnapBuildSnapDecRefcount */
+
+#include "storage/bufmgr.h"
+#include "storage/fd.h"
+#include "storage/sinval.h"
+
+#include "utils/builtins.h"
+#include "utils/combocid.h"
+#include "utils/memdebug.h"
+#include "utils/memutils.h"
+#include "utils/relcache.h"
+#include "utils/relfilenodemap.h"
+#include "utils/tqual.h"
+
+/*
+ * For efficiency and simplicity reasons we want to keep Snapshots, CommandIds
+ * and ComboCids in the same list with the user visible INSERT/UPDATE/DELETE
+ * changes. We don't want to leak those internal values to external users
+ * though (they would just use switch()...default:) because that would make it
+ * harder to add to new user visible values.
+ *
+ * This needs to be synchronized with ReorderBufferChangeType! Adjust the
+ * StaticAssertExpr's in ReorderBufferAllocate if you add anything!
+ */
+typedef enum
+{
+ REORDER_BUFFER_CHANGE_INTERNAL_INSERT,
+ REORDER_BUFFER_CHANGE_INTERNAL_UPDATE,
+ REORDER_BUFFER_CHANGE_INTERNAL_DELETE,
+ REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT,
+ REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID,
+ REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID
+} ReorderBufferChangeTypeInternal;
+
+/* entry for a hash table we use to map from xid to our transaction state */
+typedef struct ReorderBufferTXNByIdEnt
+{
+ TransactionId xid;
+ ReorderBufferTXN *txn;
+} ReorderBufferTXNByIdEnt;
+
+/* data structures for (relfilenode, ctid) => (cmin, cmax) mapping */
+typedef struct ReorderBufferTupleCidKey
+{
+ RelFileNode relnode;
+ ItemPointerData tid;
+} ReorderBufferTupleCidKey;
+
+typedef struct ReorderBufferTupleCidEnt
+{
+ ReorderBufferTupleCidKey key;
+ CommandId cmin;
+ CommandId cmax;
+ CommandId combocid; /* just for debugging */
+} ReorderBufferTupleCidEnt;
+
+/* k-way in-order change iteration support structures */
+typedef struct ReorderBufferIterTXNEntry
+{
+ XLogRecPtr lsn;
+ ReorderBufferChange *change;
+ ReorderBufferTXN *txn;
+ int fd;
+ XLogSegNo segno;
+} ReorderBufferIterTXNEntry;
+
+typedef struct ReorderBufferIterTXNState
+{
+ binaryheap *heap;
+ Size nr_txns;
+ dlist_head old_change;
+ ReorderBufferIterTXNEntry entries[FLEXIBLE_ARRAY_MEMBER];
+} ReorderBufferIterTXNState;
+
+/* toast datastructures */
+typedef struct ReorderBufferToastEnt
+{
+ Oid chunk_id; /* toast_table.chunk_id */
+ int32 last_chunk_seq; /* toast_table.chunk_seq of the last chunk we
+ * have seen */
+ Size num_chunks; /* number of chunks we've already seen */
+ Size size; /* combined size of chunks seen */
+ dlist_head chunks; /* linked list of chunks */
+ struct varlena *reconstructed; /* reconstructed varlena now pointed
+ * to in main tup */
+} ReorderBufferToastEnt;
+
+/* Disk serialization support datastructures */
+typedef struct ReorderBufferDiskChange
+{
+ Size size;
+ ReorderBufferChange change;
+ /* data follows */
+} ReorderBufferDiskChange;
+
+/*
+ * Maximum number of changes kept in memory, per transaction. After that,
+ * changes are spooled to disk.
+ *
+ * The current value should be sufficient to decode the entire transaction
+ * without hitting disk in OLTP workloads, while starting to spool to disk in
+ * other workloads reasonably fast.
+ *
+ * At some point in the future it probaly makes sense to have a more elaborate
+ * resource management here, but it's not entirely clear what that would look
+ * like.
+ */
+static const Size max_changes_in_memory = 4096;
+
+/*
+ * We use a very simple form of a slab allocator for frequently allocated
+ * objects, simply keeping a fixed number in a linked list when unused,
+ * instead pfree()ing them. Without that in many workloads aset.c becomes a
+ * major bottleneck, especially when spilling to disk while decoding batch
+ * workloads.
+ */
+static const Size max_cached_changes = 4096 * 2;
+static const Size max_cached_tuplebufs = 4096 * 2; /* ~8MB */
+static const Size max_cached_transactions = 512;
+
+
+/* ---------------------------------------
+ * primary reorderbuffer support routines
+ * ---------------------------------------
+ */
+static ReorderBufferTXN *ReorderBufferGetTXN(ReorderBuffer *rb);
+static void ReorderBufferReturnTXN(ReorderBuffer *rb, ReorderBufferTXN *txn);
+static ReorderBufferTXN *ReorderBufferTXNByXid(ReorderBuffer *rb,
+ TransactionId xid, bool create, bool *is_new,
+ XLogRecPtr lsn, bool create_as_top);
+
+static void AssertTXNLsnOrder(ReorderBuffer *rb);
+
+/* ---------------------------------------
+ * support functions for lsn-order iterating over the ->changes of a
+ * transaction and its subtransactions
+ *
+ * used for iteration over the k-way heap merge of a transaction and its
+ * subtransactions
+ * ---------------------------------------
+ */
+static ReorderBufferIterTXNState *ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn);
+static ReorderBufferChange *
+ ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state);
+static void ReorderBufferIterTXNFinish(ReorderBuffer *rb,
+ ReorderBufferIterTXNState *state);
+static void ReorderBufferExecuteInvalidations(ReorderBuffer *rb, ReorderBufferTXN *txn);
+
+/*
+ * ---------------------------------------
+ * Disk serialization support functions
+ * ---------------------------------------
+ */
+static void ReorderBufferCheckSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn);
+static void ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn);
+static void ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
+ int fd, ReorderBufferChange *change);
+static Size ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
+ int *fd, XLogSegNo *segno);
+static void ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
+ char *change);
+static void ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn);
+
+static void ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap);
+static Snapshot ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap,
+ ReorderBufferTXN *txn, CommandId cid);
+
+/* ---------------------------------------
+ * toast reassembly support
+ * ---------------------------------------
+ */
+static void ReorderBufferToastInitHash(ReorderBuffer *rb, ReorderBufferTXN *txn);
+static void ReorderBufferToastReset(ReorderBuffer *rb, ReorderBufferTXN *txn);
+static void ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
+ Relation relation, ReorderBufferChange *change);
+static void ReorderBufferToastAppendChunk(ReorderBuffer *rb, ReorderBufferTXN *txn,
+ Relation relation, ReorderBufferChange *change);
+
+
+/*
+ * Allocate a new ReorderBuffer
+ */
+ReorderBuffer *
+ReorderBufferAllocate(void)
+{
+ ReorderBuffer *buffer;
+ HASHCTL hash_ctl;
+ MemoryContext new_ctx;
+
+ StaticAssertExpr((int) REORDER_BUFFER_CHANGE_INTERNAL_INSERT == (int) REORDER_BUFFER_CHANGE_INSERT, "out of sync enums");
+ StaticAssertExpr((int) REORDER_BUFFER_CHANGE_INTERNAL_UPDATE == (int) REORDER_BUFFER_CHANGE_UPDATE, "out of sync enums");
+ StaticAssertExpr((int) REORDER_BUFFER_CHANGE_INTERNAL_DELETE == (int) REORDER_BUFFER_CHANGE_DELETE, "out of sync enums");
+
+ /* allocate memory in own context, to have better accountability */
+ new_ctx = AllocSetContextCreate(CurrentMemoryContext,
+ "ReorderBuffer",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ buffer =
+ (ReorderBuffer *) MemoryContextAlloc(new_ctx, sizeof(ReorderBuffer));
+
+ memset(&hash_ctl, 0, sizeof(hash_ctl));
+
+ buffer->context = new_ctx;
+
+ hash_ctl.keysize = sizeof(TransactionId);
+ hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
+ hash_ctl.hash = tag_hash;
+ hash_ctl.hcxt = buffer->context;
+
+ buffer->by_txn = hash_create("ReorderBufferByXid", 1000, &hash_ctl,
+ HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+
+ buffer->by_txn_last_xid = InvalidTransactionId;
+ buffer->by_txn_last_txn = NULL;
+
+ buffer->nr_cached_transactions = 0;
+ buffer->nr_cached_changes = 0;
+ buffer->nr_cached_tuplebufs = 0;
+
+ buffer->outbuf = NULL;
+ buffer->outbufsize = 0;
+
+ buffer->current_restart_decoding_lsn = InvalidXLogRecPtr;
+
+ dlist_init(&buffer->toplevel_by_lsn);
+ dlist_init(&buffer->cached_transactions);
+ dlist_init(&buffer->cached_changes);
+ slist_init(&buffer->cached_tuplebufs);
+
+ return buffer;
+}
+
+/*
+ * Free a ReorderBuffer
+ */
+void
+ReorderBufferFree(ReorderBuffer *rb)
+{
+ MemoryContext context = rb->context;
+
+ /*
+ * We free separately allocated data by entirely scrapping reorderbuffer's
+ * memory context.
+ */
+ MemoryContextDelete(context);
+}
+
+/*
+ * Get a unused, possibly preallocated, ReorderBufferTXN.
+ */
+static ReorderBufferTXN *
+ReorderBufferGetTXN(ReorderBuffer *rb)
+{
+ ReorderBufferTXN *txn;
+
+ /* check the slab cache */
+ if (rb->nr_cached_transactions > 0)
+ {
+ rb->nr_cached_transactions--;
+ txn = (ReorderBufferTXN *)
+ dlist_container(ReorderBufferTXN, node,
+ dlist_pop_head_node(&rb->cached_transactions));
+ }
+ else
+ {
+ txn = (ReorderBufferTXN *)
+ MemoryContextAlloc(rb->context, sizeof(ReorderBufferTXN));
+ }
+
+ memset(txn, 0, sizeof(ReorderBufferTXN));
+
+ dlist_init(&txn->changes);
+ dlist_init(&txn->tuplecids);
+ dlist_init(&txn->subtxns);
+
+ return txn;
+}
+
+/*
+ * Free a ReorderBufferTXN.
+ *
+ * Deallocation might be delayed for efficiency purposes, for details check
+ * the comments above max_cached_changes's definition.
+ */
+void
+ReorderBufferReturnTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
+{
+ /* clean the lookup cache if we were cached (quite likely) */
+ if (rb->by_txn_last_xid == txn->xid)
+ {
+ rb->by_txn_last_xid = InvalidTransactionId;
+ rb->by_txn_last_txn = NULL;
+ }
+
+ /* free data that's contained */
+
+ if (txn->tuplecid_hash != NULL)
+ {
+ hash_destroy(txn->tuplecid_hash);
+ txn->tuplecid_hash = NULL;
+ }
+
+ if (txn->invalidations)
+ {
+ pfree(txn->invalidations);
+ txn->invalidations = NULL;
+ }
+
+ /* check whether to put into the slab cache */
+ if (rb->nr_cached_transactions < max_cached_transactions)
+ {
+ rb->nr_cached_transactions++;
+ dlist_push_head(&rb->cached_transactions, &txn->node);
+ VALGRIND_MAKE_MEM_UNDEFINED(txn, sizeof(ReorderBufferTXN));
+ VALGRIND_MAKE_MEM_DEFINED(&txn->node, sizeof(txn->node));
+ }
+ else
+ {
+ pfree(txn);
+ }
+}
+
+/*
+ * Get a unused, possibly preallocated, ReorderBufferChange.
+ */
+ReorderBufferChange *
+ReorderBufferGetChange(ReorderBuffer *rb)
+{
+ ReorderBufferChange *change;
+
+ /* check the slab cache */
+ if (rb->nr_cached_changes)
+ {
+ rb->nr_cached_changes--;
+ change = (ReorderBufferChange *)
+ dlist_container(ReorderBufferChange, node,
+ dlist_pop_head_node(&rb->cached_changes));
+ }
+ else
+ {
+ change = (ReorderBufferChange *)
+ MemoryContextAlloc(rb->context, sizeof(ReorderBufferChange));
+ }
+
+ memset(change, 0, sizeof(ReorderBufferChange));
+ return change;
+}
+
+/*
+ * Free an ReorderBufferChange.
+ *
+ * Deallocation might be delayed for efficiency purposes, for details check
+ * the comments above max_cached_changes's definition.
+ */
+void
+ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change)
+{
+ /* free contained data */
+ switch ((ReorderBufferChangeTypeInternal) change->action_internal)
+ {
+ case REORDER_BUFFER_CHANGE_INTERNAL_INSERT:
+ case REORDER_BUFFER_CHANGE_INTERNAL_UPDATE:
+ case REORDER_BUFFER_CHANGE_INTERNAL_DELETE:
+ if (change->tp.newtuple)
+ {
+ ReorderBufferReturnTupleBuf(rb, change->tp.newtuple);
+ change->tp.newtuple = NULL;
+ }
+
+ if (change->tp.oldtuple)
+ {
+ ReorderBufferReturnTupleBuf(rb, change->tp.oldtuple);
+ change->tp.oldtuple = NULL;
+ }
+ break;
+ case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT:
+ if (change->snapshot)
+ {
+ ReorderBufferFreeSnap(rb, change->snapshot);
+ change->snapshot = NULL;
+ }
+ break;
+ case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
+ break;
+ case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
+ break;
+ }
+
+ /* check whether to put into the slab cache */
+ if (rb->nr_cached_changes < max_cached_changes)
+ {
+ rb->nr_cached_changes++;
+ dlist_push_head(&rb->cached_changes, &change->node);
+ VALGRIND_MAKE_MEM_UNDEFINED(change, sizeof(ReorderBufferChange));
+ VALGRIND_MAKE_MEM_DEFINED(&change->node, sizeof(change->node));
+ }
+ else
+ {
+ pfree(change);
+ }
+}
+
+
+/*
+ * Get a unused, possibly preallocated, ReorderBufferTupleBuf
+ */
+ReorderBufferTupleBuf *
+ReorderBufferGetTupleBuf(ReorderBuffer *rb)
+{
+ ReorderBufferTupleBuf *tuple;
+
+ /* check the slab cache */
+ if (rb->nr_cached_tuplebufs)
+ {
+ rb->nr_cached_tuplebufs--;
+ tuple = slist_container(ReorderBufferTupleBuf, node,
+ slist_pop_head_node(&rb->cached_tuplebufs));
+#ifdef USE_ASSERT_CHECKING
+ memset(tuple, 0xdeadbeef, sizeof(ReorderBufferTupleBuf));
+#endif
+ }
+ else
+ {
+ tuple = (ReorderBufferTupleBuf *)
+ MemoryContextAlloc(rb->context, sizeof(ReorderBufferTupleBuf));
+ }
+
+ return tuple;
+}
+
+/*
+ * Free an ReorderBufferTupleBuf.
+ *
+ * Deallocation might be delayed for efficiency purposes, for details check
+ * the comments above max_cached_changes's definition.
+ */
+void
+ReorderBufferReturnTupleBuf(ReorderBuffer *rb, ReorderBufferTupleBuf *tuple)
+{
+ /* check whether to put into the slab cache */
+ if (rb->nr_cached_tuplebufs < max_cached_tuplebufs)
+ {
+ rb->nr_cached_tuplebufs++;
+ slist_push_head(&rb->cached_tuplebufs, &tuple->node);
+ VALGRIND_MAKE_MEM_UNDEFINED(tuple, sizeof(ReorderBufferTupleBuf));
+ VALGRIND_MAKE_MEM_DEFINED(&tuple->node, sizeof(tuple->node));
+ }
+ else
+ {
+ pfree(tuple);
+ }
+}
+
+/*
+ * Return the ReorderBufferTXN from the given buffer, specified by Xid.
+ * If create is true, and a transaction doesn't already exist, create it
+ * (with the given LSN, and as top transaction if that's specified);
+ * when this happens, is_new is set to true.
+ */
+static ReorderBufferTXN *
+ReorderBufferTXNByXid(ReorderBuffer *rb, TransactionId xid, bool create,
+ bool *is_new, XLogRecPtr lsn, bool create_as_top)
+{
+ ReorderBufferTXN *txn;
+ ReorderBufferTXNByIdEnt *ent;
+ bool found;
+
+ Assert(TransactionIdIsValid(xid));
+ Assert(!create || lsn != InvalidXLogRecPtr);
+
+ /*
+ * Check the one-entry lookup cache first
+ */
+ if (TransactionIdIsValid(rb->by_txn_last_xid) &&
+ rb->by_txn_last_xid == xid)
+ {
+ txn = rb->by_txn_last_txn;
+
+ if (txn != NULL)
+ {
+ /* found it, and it's valid */
+ if (is_new)
+ *is_new = false;
+ return txn;
+ }
+
+ /*
+ * cached as non-existant, and asked not to create? Then nothing else
+ * to do.
+ */
+ if (!create)
+ return NULL;
+ /* otherwise fall through to create it */
+ }
+
+ /*
+ * If the cache wasn't hit or it yielded an "does-not-exist" and we want
+ * to create an entry.
+ */
+
+ /* search the lookup table */
+ ent = (ReorderBufferTXNByIdEnt *)
+ hash_search(rb->by_txn,
+ (void *) &xid,
+ create ? HASH_ENTER : HASH_FIND,
+ &found);
+ if (found)
+ txn = ent->txn;
+ else if (create)
+ {
+ /* initialize the new entry, if creation was requested */
+ Assert(ent != NULL);
+
+ ent->txn = ReorderBufferGetTXN(rb);
+ ent->txn->xid = xid;
+ txn = ent->txn;
+ txn->first_lsn = lsn;
+ txn->restart_decoding_lsn = rb->current_restart_decoding_lsn;
+
+ if (create_as_top)
+ {
+ dlist_push_tail(&rb->toplevel_by_lsn, &txn->node);
+ AssertTXNLsnOrder(rb);
+ }
+ }
+ else
+ txn = NULL; /* not found and not asked to create */
+
+ /* update cache */
+ rb->by_txn_last_xid = xid;
+ rb->by_txn_last_txn = txn;
+
+ if (is_new)
+ *is_new = !found;
+
+ Assert(!create || !!txn);
+ return txn;
+}
+
+/*
+ * Queue a change into a transaction so it can be replayed upon commit.
+ */
+void
+ReorderBufferQueueChange(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn,
+ ReorderBufferChange *change)
+{
+ ReorderBufferTXN *txn;
+
+ txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
+
+ change->lsn = lsn;
+ Assert(InvalidXLogRecPtr != lsn);
+ dlist_push_tail(&txn->changes, &change->node);
+ txn->nentries++;
+ txn->nentries_mem++;
+
+ ReorderBufferCheckSerializeTXN(rb, txn);
+}
+
+static void
+AssertTXNLsnOrder(ReorderBuffer *rb)
+{
+#ifdef USE_ASSERT_CHECKING
+ dlist_iter iter;
+ XLogRecPtr prev_first_lsn = InvalidXLogRecPtr;
+
+ dlist_foreach(iter, &rb->toplevel_by_lsn)
+ {
+ ReorderBufferTXN *cur_txn;
+
+ cur_txn = dlist_container(ReorderBufferTXN, node, iter.cur);
+ Assert(cur_txn->first_lsn != InvalidXLogRecPtr);
+
+ if (cur_txn->end_lsn != InvalidXLogRecPtr)
+ Assert(cur_txn->first_lsn <= cur_txn->end_lsn);
+
+ if (prev_first_lsn != InvalidXLogRecPtr)
+ Assert(prev_first_lsn < cur_txn->first_lsn);
+
+ Assert(!cur_txn->is_known_as_subxact);
+ prev_first_lsn = cur_txn->first_lsn;
+ }
+#endif
+}
+
+ReorderBufferTXN *
+ReorderBufferGetOldestTXN(ReorderBuffer *rb)
+{
+ ReorderBufferTXN *txn;
+
+ if (dlist_is_empty(&rb->toplevel_by_lsn))
+ return NULL;
+
+ AssertTXNLsnOrder(rb);
+
+ txn = dlist_head_element(ReorderBufferTXN, node, &rb->toplevel_by_lsn);
+
+ Assert(!txn->is_known_as_subxact);
+ Assert(txn->first_lsn != InvalidXLogRecPtr);
+ return txn;
+}
+
+void
+ReorderBufferSetRestartPoint(ReorderBuffer *rb, XLogRecPtr ptr)
+{
+ rb->current_restart_decoding_lsn = ptr;
+}
+
+void
+ReorderBufferAssignChild(ReorderBuffer *rb, TransactionId xid,
+ TransactionId subxid, XLogRecPtr lsn)
+{
+ ReorderBufferTXN *txn;
+ ReorderBufferTXN *subtxn;
+ bool new_top;
+ bool new_sub;
+
+ txn = ReorderBufferTXNByXid(rb, xid, true, &new_top, lsn, true);
+ subtxn = ReorderBufferTXNByXid(rb, subxid, true, &new_sub, lsn, false);
+
+ if (new_sub)
+ {
+ /*
+ * we assign subtransactions to top level transaction even if we don't
+ * have data for it yet, assignment records frequently reference xids
+ * that have not yet produced any records. Knowing those aren't top
+ * level xids allows us to make processing cheaper in some places.
+ */
+ dlist_push_tail(&txn->subtxns, &subtxn->node);
+ txn->nsubtxns++;
+ }
+ else if (!subtxn->is_known_as_subxact)
+ {
+ subtxn->is_known_as_subxact = true;
+ Assert(subtxn->nsubtxns == 0);
+
+ /* remove from lsn order list of top-level transactions */
+ dlist_delete(&subtxn->node);
+
+ /* add to toplevel transaction */
+ dlist_push_tail(&txn->subtxns, &subtxn->node);
+ txn->nsubtxns++;
+ }
+ else if (new_top)
+ {
+ elog(ERROR, "existing subxact assigned to unknown toplevel xact");
+ }
+}
+
+/*
+ * Associate a subtransaction with its toplevel transaction at commit
+ * time. There may be no further changes added after this.
+ */
+void
+ReorderBufferCommitChild(ReorderBuffer *rb, TransactionId xid,
+ TransactionId subxid, XLogRecPtr commit_lsn,
+ XLogRecPtr end_lsn)
+{
+ ReorderBufferTXN *txn;
+ ReorderBufferTXN *subtxn;
+
+ subtxn = ReorderBufferTXNByXid(rb, subxid, false, NULL,
+ InvalidXLogRecPtr, false);
+
+ /*
+ * No need to do anything if that subtxn didn't contain any changes
+ */
+ if (!subtxn)
+ return;
+
+ txn = ReorderBufferTXNByXid(rb, xid, false, NULL, commit_lsn, true);
+
+ if (txn == NULL)
+ elog(ERROR, "subxact logged without previous toplevel record");
+
+ /*
+ * Pass the our base snapshot to the parent transaction if it doesn't have
+ * one, or ours is older. That can happen if there are no changes in the
+ * toplevel transaction but in one of the child transactions. This allows
+ * the parent to simply use it's base snapshot initially.
+ */
+ if (txn->base_snapshot == NULL ||
+ txn->base_snapshot_lsn > subtxn->base_snapshot_lsn)
+ {
+ txn->base_snapshot = subtxn->base_snapshot;
+ txn->base_snapshot_lsn = subtxn->base_snapshot_lsn;
+ subtxn->base_snapshot = NULL;
+ subtxn->base_snapshot_lsn = InvalidXLogRecPtr;
+ }
+
+ subtxn->final_lsn = commit_lsn;
+ subtxn->end_lsn = end_lsn;
+
+ if (!subtxn->is_known_as_subxact)
+ {
+ subtxn->is_known_as_subxact = true;
+ Assert(subtxn->nsubtxns == 0);
+
+ /* remove from lsn order list of top-level transactions */
+ dlist_delete(&subtxn->node);
+
+ /* add to subtransaction list */
+ dlist_push_tail(&txn->subtxns, &subtxn->node);
+ txn->nsubtxns++;
+ }
+}
+
+
+/*
+ * Support for efficiently iterating over a transaction's and its
+ * subtransactions' changes.
+ *
+ * We do by doing a k-way merge between transactions/subtransactions. For that
+ * we model the current heads of the different transactions as a binary heap
+ * so we easily know which (sub-)transaction has the change with the smallest
+ * lsn next.
+ *
+ * We assume the changes in individual transactions are already sorted by LSN.
+ */
+
+/*
+ * Binary heap comparison function.
+ */
+static int
+ReorderBufferIterCompare(Datum a, Datum b, void *arg)
+{
+ ReorderBufferIterTXNState *state = (ReorderBufferIterTXNState *) arg;
+ XLogRecPtr pos_a = state->entries[DatumGetInt32(a)].lsn;
+ XLogRecPtr pos_b = state->entries[DatumGetInt32(b)].lsn;
+
+ if (pos_a < pos_b)
+ return 1;
+ else if (pos_a == pos_b)
+ return 0;
+ return -1;
+}
+
+/*
+ * Allocate & initialize an iterator which iterates in lsn order over a
+ * transaction and all its subtransactions.
+ */
+static ReorderBufferIterTXNState *
+ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn)
+{
+ Size nr_txns = 0;
+ ReorderBufferIterTXNState *state;
+ dlist_iter cur_txn_i;
+ int32 off;
+
+ /*
+ * Calculate the size of our heap: one element for every transaction that
+ * contains changes. (Besides the transactions already in the reorder
+ * buffer, we count the one we were directly passed.)
+ */
+ if (txn->nentries > 0)
+ nr_txns++;
+
+ dlist_foreach(cur_txn_i, &txn->subtxns)
+ {
+ ReorderBufferTXN *cur_txn;
+
+ cur_txn = dlist_container(ReorderBufferTXN, node, cur_txn_i.cur);
+
+ if (cur_txn->nentries > 0)
+ nr_txns++;
+ }
+
+ /*
+ * TODO: Consider adding fastpath for the rather common nr_txns=1 case, no
+ * need to allocate/build a heap then.
+ */
+
+ /* allocate iteration state */
+ state = (ReorderBufferIterTXNState *)
+ MemoryContextAllocZero(rb->context,
+ sizeof(ReorderBufferIterTXNState) +
+ sizeof(ReorderBufferIterTXNEntry) * nr_txns);
+
+ state->nr_txns = nr_txns;
+ dlist_init(&state->old_change);
+
+ for (off = 0; off < state->nr_txns; off++)
+ {
+ state->entries[off].fd = -1;
+ state->entries[off].segno = 0;
+ }
+
+ /* allocate heap */
+ state->heap = binaryheap_allocate(state->nr_txns,
+ ReorderBufferIterCompare,
+ state);
+
+ /*
+ * Now insert items into the binary heap, in an unordered fashion. (We
+ * will run a heap assembly step at the end; this is more efficient.)
+ */
+
+ off = 0;
+
+ /* add toplevel transaction if it contains changes */
+ if (txn->nentries > 0)
+ {
+ ReorderBufferChange *cur_change;
+
+ if (txn->nentries != txn->nentries_mem)
+ ReorderBufferRestoreChanges(rb, txn, &state->entries[off].fd,
+ &state->entries[off].segno);
+
+ cur_change = dlist_head_element(ReorderBufferChange, node,
+ &txn->changes);
+
+ state->entries[off].lsn = cur_change->lsn;
+ state->entries[off].change = cur_change;
+ state->entries[off].txn = txn;
+
+ binaryheap_add_unordered(state->heap, Int32GetDatum(off++));
+ }
+
+ /* add subtransactions if they contain changes */
+ dlist_foreach(cur_txn_i, &txn->subtxns)
+ {
+ ReorderBufferTXN *cur_txn;
+
+ cur_txn = dlist_container(ReorderBufferTXN, node, cur_txn_i.cur);
+
+ if (cur_txn->nentries > 0)
+ {
+ ReorderBufferChange *cur_change;
+
+ if (txn->nentries != txn->nentries_mem)
+ ReorderBufferRestoreChanges(rb, cur_txn,
+ &state->entries[off].fd,
+ &state->entries[off].segno);
+
+ cur_change = dlist_head_element(ReorderBufferChange, node,
+ &cur_txn->changes);
+
+ state->entries[off].lsn = cur_change->lsn;
+ state->entries[off].change = cur_change;
+ state->entries[off].txn = cur_txn;
+
+ binaryheap_add_unordered(state->heap, Int32GetDatum(off++));
+ }
+ }
+
+ /* assemble a valid binary heap */
+ binaryheap_build(state->heap);
+
+ return state;
+}
+
+/*
+ * Return the next change when iterating over a transaction and its
+ * subtransactions.
+ *
+ * Returns NULL when no further changes exist.
+ */
+static ReorderBufferChange *
+ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
+{
+ ReorderBufferChange *change;
+ ReorderBufferIterTXNEntry *entry;
+ int32 off;
+
+ /* nothing there anymore */
+ if (state->heap->bh_size == 0)
+ return NULL;
+
+ off = DatumGetInt32(binaryheap_first(state->heap));
+ entry = &state->entries[off];
+
+ /* free memory we might have "leaked" in the previous *Next call */
+ if (!dlist_is_empty(&state->old_change))
+ {
+ change = dlist_container(ReorderBufferChange, node,
+ dlist_pop_head_node(&state->old_change));
+ ReorderBufferReturnChange(rb, change);
+ Assert(dlist_is_empty(&state->old_change));
+ }
+
+ change = entry->change;
+
+ /*
+ * update heap with information about which transaction has the next
+ * relevant change in LSN order
+ */
+
+ /* there are in-memory changes */
+ if (dlist_has_next(&entry->txn->changes, &entry->change->node))
+ {
+ dlist_node *next = dlist_next_node(&entry->txn->changes, &change->node);
+ ReorderBufferChange *next_change =
+ dlist_container(ReorderBufferChange, node, next);
+
+ /* txn stays the same */
+ state->entries[off].lsn = next_change->lsn;
+ state->entries[off].change = next_change;
+
+ binaryheap_replace_first(state->heap, Int32GetDatum(off));
+ return change;
+ }
+
+ /* try to load changes from disk */
+ if (entry->txn->nentries != entry->txn->nentries_mem)
+ {
+ /*
+ * Ugly: restoring changes will reuse *Change records, thus delete the
+ * current one from the per-tx list and only free in the next call.
+ */
+ dlist_delete(&change->node);
+ dlist_push_tail(&state->old_change, &change->node);
+
+ if (ReorderBufferRestoreChanges(rb, entry->txn, &entry->fd,
+ &state->entries[off].segno))
+ {
+ /* successfully restored changes from disk */
+ ReorderBufferChange *next_change =
+ dlist_head_element(ReorderBufferChange, node,
+ &entry->txn->changes);
+
+ elog(DEBUG2, "restored %u/%u changes from disk",
+ (uint32) entry->txn->nentries_mem,
+ (uint32) entry->txn->nentries);
+
+ Assert(entry->txn->nentries_mem);
+ /* txn stays the same */
+ state->entries[off].lsn = next_change->lsn;
+ state->entries[off].change = next_change;
+ binaryheap_replace_first(state->heap, Int32GetDatum(off));
+
+ return change;
+ }
+ }
+
+ /* ok, no changes there anymore, remove */
+ binaryheap_remove_first(state->heap);
+
+ return change;
+}
+
+/*
+ * Deallocate the iterator
+ */
+static void
+ReorderBufferIterTXNFinish(ReorderBuffer *rb,
+ ReorderBufferIterTXNState *state)
+{
+ int32 off;
+
+ for (off = 0; off < state->nr_txns; off++)
+ {
+ if (state->entries[off].fd != -1)
+ CloseTransientFile(state->entries[off].fd);
+ }
+
+ /* free memory we might have "leaked" in the last *Next call */
+ if (!dlist_is_empty(&state->old_change))
+ {
+ ReorderBufferChange *change;
+
+ change = dlist_container(ReorderBufferChange, node,
+ dlist_pop_head_node(&state->old_change));
+ ReorderBufferReturnChange(rb, change);
+ Assert(dlist_is_empty(&state->old_change));
+ }
+
+ binaryheap_free(state->heap);
+ pfree(state);
+}
+
+/*
+ * Cleanup the contents of a transaction, usually after the transaction
+ * committed or aborted.
+ */
+static void
+ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
+{
+ bool found;
+ dlist_mutable_iter iter;
+
+ /* cleanup subtransactions & their changes */
+ dlist_foreach_modify(iter, &txn->subtxns)
+ {
+ ReorderBufferTXN *subtxn;
+
+ subtxn = dlist_container(ReorderBufferTXN, node, iter.cur);
+
+ /*
+ * Subtransactions are always associated to the toplevel TXN, even if
+ * they originally were happening inside another subtxn, so we won't
+ * ever recurse more than one level deep here.
+ */
+ Assert(subtxn->is_known_as_subxact);
+ Assert(subtxn->nsubtxns == 0);
+
+ ReorderBufferCleanupTXN(rb, subtxn);
+ }
+
+ /* cleanup changes in the toplevel txn */
+ dlist_foreach_modify(iter, &txn->changes)
+ {
+ ReorderBufferChange *change;
+
+ change = dlist_container(ReorderBufferChange, node, iter.cur);
+
+ ReorderBufferReturnChange(rb, change);
+ }
+
+ /*
+ * Cleanup the tuplecids we stored for decoding catalog snapshot
+ * access. They are always stored in the toplevel transaction.
+ */
+ dlist_foreach_modify(iter, &txn->tuplecids)
+ {
+ ReorderBufferChange *change;
+
+ change = dlist_container(ReorderBufferChange, node, iter.cur);
+ Assert(change->action_internal == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID);
+ ReorderBufferReturnChange(rb, change);
+ }
+
+ if (txn->base_snapshot != NULL)
+ {
+ SnapBuildSnapDecRefcount(txn->base_snapshot);
+ txn->base_snapshot = NULL;
+ txn->base_snapshot_lsn = InvalidXLogRecPtr;
+ }
+
+ /* delete from list of known subxacts */
+ if (txn->is_known_as_subxact)
+ {
+ /* NB: nsubxacts count of parent will be too high now */
+ dlist_delete(&txn->node);
+ }
+ /* delete from LSN ordered list of toplevel TXNs */
+ else
+ {
+ dlist_delete(&txn->node);
+ }
+
+ /* now remove reference from buffer */
+ hash_search(rb->by_txn,
+ (void *) &txn->xid,
+ HASH_REMOVE,
+ &found);
+ Assert(found);
+
+ /* remove entries spilled to disk */
+ if (txn->nentries != txn->nentries_mem)
+ ReorderBufferRestoreCleanup(rb, txn);
+
+ /* deallocate */
+ ReorderBufferReturnTXN(rb, txn);
+}
+
+/*
+ * Build a hash with a (relfilenode, ctid) -> (cmin, cmax) mapping for use by
+ * tqual.c's HeapTupleSatisfiesHistoricMVCC.
+ */
+static void
+ReorderBufferBuildTupleCidHash(ReorderBuffer *rb, ReorderBufferTXN *txn)
+{
+ dlist_iter iter;
+ HASHCTL hash_ctl;
+
+ if (!txn->has_catalog_changes || dlist_is_empty(&txn->tuplecids))
+ return;
+
+ memset(&hash_ctl, 0, sizeof(hash_ctl));
+
+ hash_ctl.keysize = sizeof(ReorderBufferTupleCidKey);
+ hash_ctl.entrysize = sizeof(ReorderBufferTupleCidEnt);
+ hash_ctl.hash = tag_hash;
+ hash_ctl.hcxt = rb->context;
+
+ /*
+ * create the hash with the exact number of to-be-stored tuplecids from
+ * the start
+ */
+ txn->tuplecid_hash =
+ hash_create("ReorderBufferTupleCid", txn->ntuplecids, &hash_ctl,
+ HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+
+ dlist_foreach(iter, &txn->tuplecids)
+ {
+ ReorderBufferTupleCidKey key;
+ ReorderBufferTupleCidEnt *ent;
+ bool found;
+ ReorderBufferChange *change;
+
+ change = dlist_container(ReorderBufferChange, node, iter.cur);
+
+ Assert(change->action_internal == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID);
+
+ /* be careful about padding */
+ memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
+
+ key.relnode = change->tuplecid.node;
+
+ ItemPointerCopy(&change->tuplecid.tid,
+ &key.tid);
+
+ ent = (ReorderBufferTupleCidEnt *)
+ hash_search(txn->tuplecid_hash,
+ (void *) &key,
+ HASH_ENTER | HASH_FIND,
+ &found);
+ if (!found)
+ {
+ ent->cmin = change->tuplecid.cmin;
+ ent->cmax = change->tuplecid.cmax;
+ ent->combocid = change->tuplecid.combocid;
+ }
+ else
+ {
+ Assert(ent->cmin == change->tuplecid.cmin);
+ Assert(ent->cmax == InvalidCommandId ||
+ ent->cmax == change->tuplecid.cmax);
+
+ /*
+ * if the tuple got valid in this transaction and now got deleted
+ * we already have a valid cmin stored. The cmax will be
+ * InvalidCommandId though.
+ */
+ ent->cmax = change->tuplecid.cmax;
+ }
+ }
+}
+
+/*
+ * Copy a provided snapshot so we can modify it privately. This is needed so
+ * that catalog modifying transactions can look into intermediate catalog
+ * states.
+ */
+static Snapshot
+ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap,
+ ReorderBufferTXN *txn, CommandId cid)
+{
+ Snapshot snap;
+ dlist_iter iter;
+ int i = 0;
+ Size size;
+
+ size = sizeof(SnapshotData) +
+ sizeof(TransactionId) * orig_snap->xcnt +
+ sizeof(TransactionId) * (txn->nsubtxns + 1);
+
+ snap = MemoryContextAllocZero(rb->context, size);
+ memcpy(snap, orig_snap, sizeof(SnapshotData));
+
+ snap->copied = true;
+ snap->active_count = 0;
+ snap->regd_count = 1;
+ snap->xip = (TransactionId *) (snap + 1);
+
+ memcpy(snap->xip, orig_snap->xip, sizeof(TransactionId) * snap->xcnt);
+
+ /*
+ * snap->subxip contains all txids that belong to our transaction which we
+ * need to check via cmin/cmax. Thats why we store the toplevel
+ * transaction in there as well.
+ */
+ snap->subxip = snap->xip + snap->xcnt;
+ snap->subxip[i++] = txn->xid;
+
+ /*
+ * nsubxcnt isn't decreased when subtransactions abort, so count
+ * manually. Since it's an upper boundary it is safe to use it for the
+ * allocation above.
+ */
+ snap->subxcnt = 1;
+
+ dlist_foreach(iter, &txn->subtxns)
+ {
+ ReorderBufferTXN *sub_txn;
+
+ sub_txn = dlist_container(ReorderBufferTXN, node, iter.cur);
+ snap->subxip[i++] = sub_txn->xid;
+ snap->subxcnt++;
+ }
+
+ /* sort so we can bsearch() later */
+ qsort(snap->subxip, snap->subxcnt, sizeof(TransactionId), xidComparator);
+
+ /* store the specified current CommandId */
+ snap->curcid = cid;
+
+ return snap;
+}
+
+/*
+ * Free a previously ReorderBufferCopySnap'ed snapshot
+ */
+static void
+ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap)
+{
+ if (snap->copied)
+ pfree(snap);
+ else
+ SnapBuildSnapDecRefcount(snap);
+}
+
+/*
+ * Perform the replay of a transaction and it's non-aborted subtransactions.
+ *
+ * Subtransactions previously have to be processed by
+ * ReorderBufferCommitChild(), even if previously assigned to the toplevel
+ * transaction with ReorderBufferAssignChild.
+ *
+ * We currently can only decode a transaction's contents in when their commit
+ * record is read because that's currently the only place where we know about
+ * cache invalidations. Thus, once a toplevel commit is read, we iterate over
+ * the top and subtransactions (using a k-way merge) and replay the changes in
+ * lsn order.
+ */
+void
+ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
+ XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
+ TimestampTz commit_time)
+{
+ ReorderBufferTXN *txn;
+ ReorderBufferIterTXNState *iterstate = NULL;
+ ReorderBufferChange *change;
+
+ volatile CommandId command_id = FirstCommandId;
+ volatile Snapshot snapshot_now = NULL;
+ volatile bool txn_started = false;
+ volatile bool subtxn_started = false;
+
+ txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
+ false);
+
+ /* unknown transaction, nothing to replay */
+ if (txn == NULL)
+ return;
+
+ txn->final_lsn = commit_lsn;
+ txn->end_lsn = end_lsn;
+ txn->commit_time = commit_time;
+
+ /* serialize the last bunch of changes if we need start earlier anyway */
+ if (txn->nentries_mem != txn->nentries)
+ ReorderBufferSerializeTXN(rb, txn);
+
+ /*
+ * If this transaction didn't have any real changes in our database, it's
+ * OK not to have a snapshot. Note that ReorderBufferCommitChild will have
+ * transferred its snapshot to this transaction if it had one and the
+ * toplevel tx didn't.
+ */
+ if (txn->base_snapshot == NULL)
+ {
+ Assert(txn->ninvalidations == 0);
+ ReorderBufferCleanupTXN(rb, txn);
+ return;
+ }
+
+ snapshot_now = txn->base_snapshot;
+
+ /* build data to be able to lookup the CommandIds of catalog tuples */
+ ReorderBufferBuildTupleCidHash(rb, txn);
+
+ /* setup the initial snapshot */
+ SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
+
+ PG_TRY();
+ {
+ txn_started = false;
+
+ /*
+ * Decoding needs access to syscaches et al., which in turn use
+ * heavyweight locks and such. Thus we need to have enough state around
+ * to keep track of those. The easiest way is to simply use a
+ * transaction internally. That also allows us to easily enforce that
+ * nothing writes to the database by checking for xid assignments.
+ *
+ * When we're called via the SQL SRF there's already a transaction
+ * started, so start an explicit subtransaction there.
+ */
+ if (IsTransactionOrTransactionBlock())
+ {
+ BeginInternalSubTransaction("replay");
+ subtxn_started = true;
+ }
+ else
+ {
+ StartTransactionCommand();
+ txn_started = true;
+ }
+
+ rb->begin(rb, txn);
+
+ iterstate = ReorderBufferIterTXNInit(rb, txn);
+ while ((change = ReorderBufferIterTXNNext(rb, iterstate)))
+ {
+ Relation relation = NULL;
+ Oid reloid;
+
+ switch ((ReorderBufferChangeTypeInternal) change->action_internal)
+ {
+ case REORDER_BUFFER_CHANGE_INTERNAL_INSERT:
+ case REORDER_BUFFER_CHANGE_INTERNAL_UPDATE:
+ case REORDER_BUFFER_CHANGE_INTERNAL_DELETE:
+ Assert(snapshot_now);
+
+ reloid = RelidByRelfilenode(change->tp.relnode.spcNode,
+ change->tp.relnode.relNode);
+
+ /*
+ * Catalog tuple without data, emitted while catalog was
+ * in the process of being rewritten.
+ */
+ if (reloid == InvalidOid &&
+ change->tp.newtuple == NULL &&
+ change->tp.oldtuple == NULL)
+ continue;
+ else if (reloid == InvalidOid)
+ elog(ERROR, "could not lookup relation %s",
+ relpathperm(change->tp.relnode, MAIN_FORKNUM));
+
+ relation = RelationIdGetRelation(reloid);
+
+ if (relation == NULL)
+ elog(ERROR, "could open relation descriptor %s",
+ relpathperm(change->tp.relnode, MAIN_FORKNUM));
+
+ if (RelationIsLogicallyLogged(relation))
+ {
+ /*
+ * For now ignore sequence changes entirely. Most of
+ * the time they don't log changes using records we
+ * understand, so it doesn't make sense to handle the
+ * few cases we do.
+ */
+ if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
+ {
+ }
+ /* user-triggered change */
+ else if (!IsToastRelation(relation))
+ {
+ ReorderBufferToastReplace(rb, txn, relation, change);
+ rb->apply_change(rb, txn, relation, change);
+ ReorderBufferToastReset(rb, txn);
+ }
+ /* we're not interested in toast deletions */
+ else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
+ {
+ /*
+ * Need to reassemble the full toasted Datum in
+ * memory, to ensure the chunks don't get reused
+ * till we're done remove it from the list of this
+ * transaction's changes. Otherwise it will get
+ * freed/reused while restoring spooled data from
+ * disk.
+ */
+ dlist_delete(&change->node);
+ ReorderBufferToastAppendChunk(rb, txn, relation,
+ change);
+ }
+
+ }
+ RelationClose(relation);
+ break;
+ case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT:
+ /* get rid of the old */
+ TeardownHistoricSnapshot(false);
+
+ if (snapshot_now->copied)
+ {
+ ReorderBufferFreeSnap(rb, snapshot_now);
+ snapshot_now =
+ ReorderBufferCopySnap(rb, change->snapshot,
+ txn, command_id);
+ }
+ /*
+ * Restored from disk, need to be careful not to double
+ * free. We could introduce refcounting for that, but for
+ * now this seems infrequent enough not to care.
+ */
+ else if (change->snapshot->copied)
+ {
+ snapshot_now =
+ ReorderBufferCopySnap(rb, change->snapshot,
+ txn, command_id);
+ }
+ else
+ {
+ snapshot_now = change->snapshot;
+ }
+
+
+ /* and continue with the new one */
+ SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
+ break;
+
+ case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
+ Assert(change->command_id != InvalidCommandId);
+
+ if (command_id < change->command_id)
+ {
+ command_id = change->command_id;
+
+ if (!snapshot_now->copied)
+ {
+ /* we don't use the global one anymore */
+ snapshot_now = ReorderBufferCopySnap(rb, snapshot_now,
+ txn, command_id);
+ }
+
+ snapshot_now->curcid = command_id;
+
+ TeardownHistoricSnapshot(false);
+ SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash);
+
+ /*
+ * Every time the CommandId is incremented, we could
+ * see new catalog contents, so execute all
+ * invalidations.
+ */
+ ReorderBufferExecuteInvalidations(rb, txn);
+ }
+
+ break;
+
+ case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
+ elog(ERROR, "tuplecid value in changequeue");
+ break;
+ }
+ }
+
+ ReorderBufferIterTXNFinish(rb, iterstate);
+
+ /* call commit callback */
+ rb->commit(rb, txn, commit_lsn);
+
+ /* this is just a sanity check against bad output plugin behaviour */
+ if (GetCurrentTransactionIdIfAny() != InvalidTransactionId)
+ elog(ERROR, "output plugin used xid %u",
+ GetCurrentTransactionId());
+
+ /* make sure there's no cache pollution */
+ ReorderBufferExecuteInvalidations(rb, txn);
+
+ /* cleanup */
+ TeardownHistoricSnapshot(false);
+
+ /*
+ * Abort subtransaction or the transaction as a whole has the right
+ * semantics. We want all locks acquired in here to be released, not
+ * reassigned to the parent and we do not want any database access
+ * have persistent effects.
+ */
+ if (subtxn_started)
+ RollbackAndReleaseCurrentSubTransaction();
+ else if (txn_started)
+ AbortCurrentTransaction();
+
+ if (snapshot_now->copied)
+ ReorderBufferFreeSnap(rb, snapshot_now);
+
+ /* remove potential on-disk data, and deallocate */
+ ReorderBufferCleanupTXN(rb, txn);
+ }
+ PG_CATCH();
+ {
+ /* TODO: Encapsulate cleanup from the PG_TRY and PG_CATCH blocks */
+ if (iterstate)
+ ReorderBufferIterTXNFinish(rb, iterstate);
+
+ TeardownHistoricSnapshot(true);
+
+ if (snapshot_now->copied)
+ ReorderBufferFreeSnap(rb, snapshot_now);
+
+ if (subtxn_started)
+ RollbackAndReleaseCurrentSubTransaction();
+ else if (txn_started)
+ AbortCurrentTransaction();
+
+ /*
+ * Invalidations in an aborted transactions aren't allowed to do
+ * catalog access, so we don't need to still have the snapshot setup.
+ */
+ ReorderBufferExecuteInvalidations(rb, txn);
+
+ /* remove potential on-disk data, and deallocate */
+ ReorderBufferCleanupTXN(rb, txn);
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+}
+
+/*
+ * Abort a transaction that possibly has previous changes. Needs to be first
+ * called for subtransactions and then for the toplevel xid.
+ *
+ * NB: Transactions handled here have to have actively aborted (i.e. have
+ * produced an abort record). Implicitly aborted transactions are handled via
+ * ReorderBufferAbortOld(); transactions we're just not interesteded in, but
+ * which have committed are handled in ReorderBufferForget().
+ *
+ * This function purges this transaction and its contents from memory and
+ * disk.
+ */
+void
+ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
+{
+ ReorderBufferTXN *txn;
+
+ txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
+ false);
+
+ /* unknown, nothing to remove */
+ if (txn == NULL)
+ return;
+
+ /* cosmetic... */
+ txn->final_lsn = lsn;
+
+ /* remove potential on-disk data, and deallocate */
+ ReorderBufferCleanupTXN(rb, txn);
+}
+
+/*
+ * Abort all transactions that aren't actually running anymore because the
+ * server restarted.
+ *
+ * NB: These really have to be transactions that have aborted due to a server
+ * crash/immediate restart, as we don't deal with invalidations here.
+ */
+void
+ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid)
+{
+ dlist_mutable_iter it;
+
+ /*
+ * Iterate through all (potential) toplevel TXNs and abort all that are
+ * older than what possibly can be running. Once we've found the first
+ * that is alive we stop, there might be some that acquired an xid earlier
+ * but started writing later, but it's unlikely and they will cleaned up
+ * in a later call to ReorderBufferAbortOld().
+ */
+ dlist_foreach_modify(it, &rb->toplevel_by_lsn)
+ {
+ ReorderBufferTXN * txn;
+
+ txn = dlist_container(ReorderBufferTXN, node, it.cur);
+
+ if (TransactionIdPrecedes(txn->xid, oldestRunningXid))
+ {
+ elog(DEBUG1, "aborting old transaction %u", txn->xid);
+
+ /* remove potential on-disk data, and deallocate this tx */
+ ReorderBufferCleanupTXN(rb, txn);
+ }
+ else
+ return;
+ }
+}
+
+/*
+ * Forget the contents of a transaction if we aren't interested in it's
+ * contents. Needs to be first called for subtransactions and then for the
+ * toplevel xid.
+ *
+ * This is significantly different to ReorderBufferAbort() because
+ * transactions that have committed need to be treated differenly from aborted
+ * ones since they may have modified the catalog.
+ *
+ * Note that this is only allowed to be called in the moment a transaction
+ * commit has just been read, not earlier; otherwise later records referring
+ * to this xid might re-create the transaction incompletely.
+ */
+void
+ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
+{
+ ReorderBufferTXN *txn;
+
+ txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
+ false);
+
+ /* unknown, nothing to forget */
+ if (txn == NULL)
+ return;
+
+ /* cosmetic... */
+ txn->final_lsn = lsn;
+
+ /*
+ * Proccess cache invalidation messages if there are any. Even if we're
+ * not interested in the transaction's contents, it could have manipulated
+ * the catalog and we need to update the caches according to that.
+ */
+ if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
+ {
+ /* setup snapshot to perform the invalidations in */
+ SetupHistoricSnapshot(txn->base_snapshot, txn->tuplecid_hash);
+ PG_TRY();
+ {
+ ReorderBufferExecuteInvalidations(rb, txn);
+ TeardownHistoricSnapshot(false);
+ }
+ PG_CATCH();
+ {
+ /* cleanup */
+ TeardownHistoricSnapshot(true);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+ }
+ else
+ Assert(txn->ninvalidations == 0);
+
+ /* remove potential on-disk data, and deallocate */
+ ReorderBufferCleanupTXN(rb, txn);
+}
+
+
+/*
+ * Check whether a transaction is already known in this module.xs
+ */
+bool
+ReorderBufferIsXidKnown(ReorderBuffer *rb, TransactionId xid)
+{
+ ReorderBufferTXN *txn;
+
+ txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
+ false);
+ return txn != NULL;
+}
+
+/*
+ * Add a new snapshot to this transaction that may only used after lsn 'lsn'
+ * because the previous snapshot doesn't describe the catalog correctly for
+ * following rows.
+ */
+void
+ReorderBufferAddSnapshot(ReorderBuffer *rb, TransactionId xid,
+ XLogRecPtr lsn, Snapshot snap)
+{
+ ReorderBufferChange *change = ReorderBufferGetChange(rb);
+
+ change->snapshot = snap;
+ change->action_internal = REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT;
+
+ ReorderBufferQueueChange(rb, xid, lsn, change);
+}
+
+/*
+ * Setup the base snapshot of a transaction. The base snapshot is the snapshot
+ * that is used to decode all changes until either this transaction modifies
+ * the catalog or another catalog modifying transaction commits.
+ *
+ * Needs to be called before any changes are added with
+ * ReorderBufferQueueChange().
+ */
+void
+ReorderBufferSetBaseSnapshot(ReorderBuffer *rb, TransactionId xid,
+ XLogRecPtr lsn, Snapshot snap)
+{
+ ReorderBufferTXN *txn;
+ bool is_new;
+
+ txn = ReorderBufferTXNByXid(rb, xid, true, &is_new, lsn, true);
+ Assert(txn->base_snapshot == NULL);
+ Assert(snap != NULL);
+
+ txn->base_snapshot = snap;
+ txn->base_snapshot_lsn = lsn;
+}
+
+/*
+ * Access the catalog with this CommandId at this point in the changestream.
+ *
+ * May only be called for command ids > 1
+ */
+void
+ReorderBufferAddNewCommandId(ReorderBuffer *rb, TransactionId xid,
+ XLogRecPtr lsn, CommandId cid)
+{
+ ReorderBufferChange *change = ReorderBufferGetChange(rb);
+
+ change->command_id = cid;
+ change->action_internal = REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID;
+
+ ReorderBufferQueueChange(rb, xid, lsn, change);
+}
+
+
+/*
+ * Add new (relfilenode, tid) -> (cmin, cmax) mappings.
+ */
+void
+ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid,
+ XLogRecPtr lsn, RelFileNode node,
+ ItemPointerData tid, CommandId cmin,
+ CommandId cmax, CommandId combocid)
+{
+ ReorderBufferChange *change = ReorderBufferGetChange(rb);
+ ReorderBufferTXN *txn;
+
+ txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
+
+ change->tuplecid.node = node;
+ change->tuplecid.tid = tid;
+ change->tuplecid.cmin = cmin;
+ change->tuplecid.cmax = cmax;
+ change->tuplecid.combocid = combocid;
+ change->lsn = lsn;
+ change->action_internal = REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID;
+
+ dlist_push_tail(&txn->tuplecids, &change->node);
+ txn->ntuplecids++;
+}
+
+/*
+ * Setup the invalidation of the toplevel transaction.
+ *
+ * This needs to be done before ReorderBufferCommit is called!
+ */
+void
+ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid,
+ XLogRecPtr lsn, Size nmsgs,
+ SharedInvalidationMessage *msgs)
+{
+ ReorderBufferTXN *txn;
+
+ txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
+
+ if (txn->ninvalidations != 0)
+ elog(ERROR, "only ever add one set of invalidations");
+
+ Assert(nmsgs > 0);
+
+ txn->ninvalidations = nmsgs;
+ txn->invalidations = (SharedInvalidationMessage *)
+ MemoryContextAlloc(rb->context,
+ sizeof(SharedInvalidationMessage) * nmsgs);
+ memcpy(txn->invalidations, msgs,
+ sizeof(SharedInvalidationMessage) * nmsgs);
+}
+
+/*
+ * Apply all invalidations we know. Possibly we only need parts at this point
+ * in the changestream but we don't know which those are.
+ */
+static void
+ReorderBufferExecuteInvalidations(ReorderBuffer *rb, ReorderBufferTXN *txn)
+{
+ int i;
+
+ for (i = 0; i < txn->ninvalidations; i++)
+ LocalExecuteInvalidationMessage(&txn->invalidations[i]);
+}
+
+/*
+ * Mark a transaction as containing catalog changes
+ */
+void
+ReorderBufferXidSetCatalogChanges(ReorderBuffer *rb, TransactionId xid,
+ XLogRecPtr lsn)
+{
+ ReorderBufferTXN *txn;
+
+ txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
+
+ txn->has_catalog_changes = true;
+}
+
+/*
+ * Query whether a transaction is already *known* to contain catalog
+ * changes. This can be wrong until directly before the commit!
+ */
+bool
+ReorderBufferXidHasCatalogChanges(ReorderBuffer *rb, TransactionId xid)
+{
+ ReorderBufferTXN *txn;
+
+ txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
+ false);
+ if (txn == NULL)
+ return false;
+
+ return txn->has_catalog_changes;
+}
+
+/*
+ * Have we already added the first snapshot?
+ */
+bool
+ReorderBufferXidHasBaseSnapshot(ReorderBuffer *rb, TransactionId xid)
+{
+ ReorderBufferTXN *txn;
+
+ txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr,
+ false);
+
+ /* transaction isn't known yet, ergo no snapshot */
+ if (txn == NULL)
+ return false;
+
+ /*
+ * TODO: It would be a nice improvement if we would check the toplevel
+ * transaction in subtransactions, but we'd need to keep track of a bit
+ * more state.
+ */
+ return txn->base_snapshot != NULL;
+}
+
+
+/*
+ * ---------------------------------------
+ * Disk serialization support
+ * ---------------------------------------
+ */
+
+/*
+ * Ensure the IO buffer is >= sz.
+ */
+static void
+ReorderBufferSerializeReserve(ReorderBuffer *rb, Size sz)
+{
+ if (!rb->outbufsize)
+ {
+ rb->outbuf = MemoryContextAlloc(rb->context, sz);
+ rb->outbufsize = sz;
+ }
+ else if (rb->outbufsize < sz)
+ {
+ rb->outbuf = repalloc(rb->outbuf, sz);
+ rb->outbufsize = sz;
+ }
+}
+
+/*
+ * Check whether the transaction tx should spill its data to disk.
+ */
+static void
+ReorderBufferCheckSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
+{
+ /*
+ * TODO: improve accounting so we cheaply can take subtransactions into
+ * account here.
+ */
+ if (txn->nentries_mem >= max_changes_in_memory)
+ {
+ ReorderBufferSerializeTXN(rb, txn);
+ Assert(txn->nentries_mem == 0);
+ }
+}
+
+/*
+ * Spill data of a large transaction (and its subtransactions) to disk.
+ */
+static void
+ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
+{
+ dlist_iter subtxn_i;
+ dlist_mutable_iter change_i;
+ int fd = -1;
+ XLogSegNo curOpenSegNo = 0;
+ Size spilled = 0;
+ char path[MAXPGPATH];
+
+ elog(DEBUG2, "spill %u changes in tx %u to disk",
+ (uint32) txn->nentries_mem, txn->xid);
+
+ /* do the same to all child TXs */
+ dlist_foreach(subtxn_i, &txn->subtxns)
+ {
+ ReorderBufferTXN *subtxn;
+
+ subtxn = dlist_container(ReorderBufferTXN, node, subtxn_i.cur);
+ ReorderBufferSerializeTXN(rb, subtxn);
+ }
+
+ /* serialize changestream */
+ dlist_foreach_modify(change_i, &txn->changes)
+ {
+ ReorderBufferChange *change;
+
+ change = dlist_container(ReorderBufferChange, node, change_i.cur);
+
+ /*
+ * store in segment in which it belongs by start lsn, don't split over
+ * multiple segments tho
+ */
+ if (fd == -1 || XLByteInSeg(change->lsn, curOpenSegNo))
+ {
+ XLogRecPtr recptr;
+
+ if (fd != -1)
+ CloseTransientFile(fd);
+
+ XLByteToSeg(change->lsn, curOpenSegNo);
+ XLogSegNoOffsetToRecPtr(curOpenSegNo, 0, recptr);
+
+ /*
+ * No need to care about TLIs here, only used during a single run,
+ * so each LSN only maps to a specific WAL record.
+ */
+ sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap",
+ NameStr(MyReplicationSlot->data.name), txn->xid,
+ (uint32) (recptr >> 32), (uint32) recptr);
+
+ /* open segment, create it if necessary */
+ fd = OpenTransientFile(path,
+ O_CREAT | O_WRONLY | O_APPEND | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+
+ if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m",
+ path)));
+ }
+
+ ReorderBufferSerializeChange(rb, txn, fd, change);
+ dlist_delete(&change->node);
+ ReorderBufferReturnChange(rb, change);
+
+ spilled++;
+ }
+
+ Assert(spilled == txn->nentries_mem);
+ Assert(dlist_is_empty(&txn->changes));
+ txn->nentries_mem = 0;
+
+ if (fd != -1)
+ CloseTransientFile(fd);
+}
+
+/*
+ * Serialize individual change to disk.
+ */
+static void
+ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
+ int fd, ReorderBufferChange *change)
+{
+ ReorderBufferDiskChange *ondisk;
+ Size sz = sizeof(ReorderBufferDiskChange);
+
+ ReorderBufferSerializeReserve(rb, sz);
+
+ ondisk = (ReorderBufferDiskChange *) rb->outbuf;
+ memcpy(&ondisk->change, change, sizeof(ReorderBufferChange));
+
+ switch ((ReorderBufferChangeTypeInternal) change->action_internal)
+ {
+ case REORDER_BUFFER_CHANGE_INTERNAL_INSERT:
+ /* fall through */
+ case REORDER_BUFFER_CHANGE_INTERNAL_UPDATE:
+ /* fall through */
+ case REORDER_BUFFER_CHANGE_INTERNAL_DELETE:
+ {
+ char *data;
+ Size oldlen = 0;
+ Size newlen = 0;
+
+ if (change->tp.oldtuple)
+ oldlen = offsetof(ReorderBufferTupleBuf, data)
+ + change->tp.oldtuple->tuple.t_len
+ - offsetof(HeapTupleHeaderData, t_bits);
+
+ if (change->tp.newtuple)
+ newlen = offsetof(ReorderBufferTupleBuf, data)
+ + change->tp.newtuple->tuple.t_len
+ - offsetof(HeapTupleHeaderData, t_bits);
+
+ sz += oldlen;
+ sz += newlen;
+
+ /* make sure we have enough space */
+ ReorderBufferSerializeReserve(rb, sz);
+
+ data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
+ /* might have been reallocated above */
+ ondisk = (ReorderBufferDiskChange *) rb->outbuf;
+
+ if (oldlen)
+ {
+ memcpy(data, change->tp.oldtuple, oldlen);
+ data += oldlen;
+ Assert(&change->tp.oldtuple->header == change->tp.oldtuple->tuple.t_data);
+ }
+
+ if (newlen)
+ {
+ memcpy(data, change->tp.newtuple, newlen);
+ data += newlen;
+ Assert(&change->tp.newtuple->header == change->tp.newtuple->tuple.t_data);
+ }
+ break;
+ }
+ case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT:
+ {
+ char *data;
+
+ sz += sizeof(SnapshotData) +
+ sizeof(TransactionId) * change->snapshot->xcnt +
+ sizeof(TransactionId) * change->snapshot->subxcnt
+ ;
+
+ /* make sure we have enough space */
+ ReorderBufferSerializeReserve(rb, sz);
+ data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange);
+ /* might have been reallocated above */
+ ondisk = (ReorderBufferDiskChange *) rb->outbuf;
+
+ memcpy(data, change->snapshot, sizeof(SnapshotData));
+ data += sizeof(SnapshotData);
+
+ if (change->snapshot->xcnt)
+ {
+ memcpy(data, change->snapshot->xip,
+ sizeof(TransactionId) + change->snapshot->xcnt);
+ data += sizeof(TransactionId) + change->snapshot->xcnt;
+ }
+
+ if (change->snapshot->subxcnt)
+ {
+ memcpy(data, change->snapshot->subxip,
+ sizeof(TransactionId) + change->snapshot->subxcnt);
+ data += sizeof(TransactionId) + change->snapshot->subxcnt;
+ }
+ break;
+ }
+ case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
+ /* ReorderBufferChange contains everything important */
+ break;
+ case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
+ /* ReorderBufferChange contains everything important */
+ break;
+ }
+
+ ondisk->size = sz;
+
+ if (write(fd, rb->outbuf, ondisk->size) != ondisk->size)
+ {
+ CloseTransientFile(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to xid %u's data file: %m",
+ txn->xid)));
+ }
+
+ Assert(ondisk->change.action_internal == change->action_internal);
+}
+
+/*
+ * Restore a number of changes spilled to disk back into memory.
+ */
+static Size
+ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
+ int *fd, XLogSegNo *segno)
+{
+ Size restored = 0;
+ XLogSegNo last_segno;
+ dlist_mutable_iter cleanup_iter;
+
+ Assert(txn->first_lsn != InvalidXLogRecPtr);
+ Assert(txn->final_lsn != InvalidXLogRecPtr);
+
+ /* free current entries, so we have memory for more */
+ dlist_foreach_modify(cleanup_iter, &txn->changes)
+ {
+ ReorderBufferChange *cleanup =
+ dlist_container(ReorderBufferChange, node, cleanup_iter.cur);
+
+ dlist_delete(&cleanup->node);
+ ReorderBufferReturnChange(rb, cleanup);
+ }
+ txn->nentries_mem = 0;
+ Assert(dlist_is_empty(&txn->changes));
+
+ XLByteToSeg(txn->final_lsn, last_segno);
+
+ while (restored < max_changes_in_memory && *segno <= last_segno)
+ {
+ int readBytes;
+ ReorderBufferDiskChange *ondisk;
+
+ if (*fd == -1)
+ {
+ XLogRecPtr recptr;
+ char path[MAXPGPATH];
+
+ /* first time in */
+ if (*segno == 0)
+ {
+ XLByteToSeg(txn->first_lsn, *segno);
+ }
+
+ Assert(*segno != 0 || dlist_is_empty(&txn->changes));
+ XLogSegNoOffsetToRecPtr(*segno, 0, recptr);
+
+ /*
+ * No need to care about TLIs here, only used during a single run,
+ * so each LSN only maps to a specific WAL record.
+ */
+ sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap",
+ NameStr(MyReplicationSlot->data.name), txn->xid,
+ (uint32) (recptr >> 32), (uint32) recptr);
+
+ *fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
+ if (*fd < 0 && errno == ENOENT)
+ {
+ *fd = -1;
+ (*segno)++;
+ continue;
+ }
+ else if (*fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m",
+ path)));
+
+ }
+
+ ReorderBufferSerializeReserve(rb, sizeof(ReorderBufferDiskChange));
+
+
+ /*
+ * Read the statically sized part of a change which has information
+ * about the total size. If we couldn't read a record, we're at the
+ * end of this file.
+ */
+
+ readBytes = read(*fd, rb->outbuf, sizeof(ReorderBufferDiskChange));
+
+ /* eof */
+ if (readBytes == 0)
+ {
+ CloseTransientFile(*fd);
+ *fd = -1;
+ (*segno)++;
+ continue;
+ }
+ else if (readBytes < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read from reorderbuffer spill file: %m")));
+ else if (readBytes != sizeof(ReorderBufferDiskChange))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("incomplete read from reorderbuffer spill file: read %d instead of %u",
+ readBytes,
+ (uint32) sizeof(ReorderBufferDiskChange))));
+
+ ondisk = (ReorderBufferDiskChange *) rb->outbuf;
+
+ ReorderBufferSerializeReserve(rb,
+ sizeof(ReorderBufferDiskChange) + ondisk->size);
+ ondisk = (ReorderBufferDiskChange *) rb->outbuf;
+
+ readBytes = read(*fd, rb->outbuf + sizeof(ReorderBufferDiskChange),
+ ondisk->size - sizeof(ReorderBufferDiskChange));
+
+ if (readBytes < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read from reorderbuffer spill file: %m")));
+ else if (readBytes != ondisk->size - sizeof(ReorderBufferDiskChange))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read from reorderbuffer spill file: read %d instead of %u",
+ readBytes,
+ (uint32) (ondisk->size - sizeof(ReorderBufferDiskChange)))));
+
+ /*
+ * ok, read a full change from disk, now restore it into proper
+ * in-memory format
+ */
+ ReorderBufferRestoreChange(rb, txn, rb->outbuf);
+ restored++;
+ }
+
+ return restored;
+}
+
+/*
+ * Convert change from its on-disk format to in-memory format and queue it onto
+ * the TXN's ->changes list.
+ */
+static void
+ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
+ char *data)
+{
+ ReorderBufferDiskChange *ondisk;
+ ReorderBufferChange *change;
+
+ ondisk = (ReorderBufferDiskChange *) data;
+
+ change = ReorderBufferGetChange(rb);
+
+ /* copy static part */
+ memcpy(change, &ondisk->change, sizeof(ReorderBufferChange));
+
+ data += sizeof(ReorderBufferDiskChange);
+
+ /* restore individual stuff */
+ switch ((ReorderBufferChangeTypeInternal) change->action_internal)
+ {
+ case REORDER_BUFFER_CHANGE_INTERNAL_INSERT:
+ /* fall through */
+ case REORDER_BUFFER_CHANGE_INTERNAL_UPDATE:
+ /* fall through */
+ case REORDER_BUFFER_CHANGE_INTERNAL_DELETE:
+ if (change->tp.newtuple)
+ {
+ Size len = offsetof(ReorderBufferTupleBuf, data)
+ +((ReorderBufferTupleBuf *) data)->tuple.t_len
+ - offsetof(HeapTupleHeaderData, t_bits);
+
+ change->tp.newtuple = ReorderBufferGetTupleBuf(rb);
+ memcpy(change->tp.newtuple, data, len);
+ change->tp.newtuple->tuple.t_data = &change->tp.newtuple->header;
+
+ data += len;
+ }
+
+ if (change->tp.oldtuple)
+ {
+ Size len = offsetof(ReorderBufferTupleBuf, data)
+ +((ReorderBufferTupleBuf *) data)->tuple.t_len
+ - offsetof(HeapTupleHeaderData, t_bits);
+
+ change->tp.oldtuple = ReorderBufferGetTupleBuf(rb);
+ memcpy(change->tp.oldtuple, data, len);
+ change->tp.oldtuple->tuple.t_data = &change->tp.oldtuple->header;
+ data += len;
+ }
+ break;
+ case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT:
+ {
+ Snapshot oldsnap = (Snapshot) data;
+ Size size = sizeof(SnapshotData) +
+ sizeof(TransactionId) * oldsnap->xcnt +
+ sizeof(TransactionId) * (oldsnap->subxcnt + 0)
+ ;
+
+ Assert(change->snapshot != NULL);
+
+ change->snapshot = MemoryContextAllocZero(rb->context, size);
+
+ memcpy(change->snapshot, data, size);
+ change->snapshot->xip = (TransactionId *)
+ (((char *) change->snapshot) + sizeof(SnapshotData));
+ change->snapshot->subxip =
+ change->snapshot->xip + change->snapshot->xcnt + 0;
+ change->snapshot->copied = true;
+ break;
+ }
+ /* the base struct contains all the data, easy peasy */
+ case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
+ case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
+ break;
+ }
+
+ dlist_push_tail(&txn->changes, &change->node);
+ txn->nentries_mem++;
+}
+
+/*
+ * Remove all on-disk stored for the passed in transaction.
+ */
+static void
+ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn)
+{
+ XLogSegNo first;
+ XLogSegNo cur;
+ XLogSegNo last;
+
+ Assert(txn->first_lsn != InvalidXLogRecPtr);
+ Assert(txn->final_lsn != InvalidXLogRecPtr);
+
+ XLByteToSeg(txn->first_lsn, first);
+ XLByteToSeg(txn->final_lsn, last);
+
+ /* iterate over all possible filenames, and delete them */
+ for (cur = first; cur <= last; cur++)
+ {
+ char path[MAXPGPATH];
+ XLogRecPtr recptr;
+
+ XLogSegNoOffsetToRecPtr(cur, 0, recptr);
+
+ sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap",
+ NameStr(MyReplicationSlot->data.name), txn->xid,
+ (uint32) (recptr >> 32), (uint32) recptr);
+ if (unlink(path) != 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not unlink file \"%s\": %m", path)));
+ }
+}
+
+/*
+ * Delete all data spilled to disk after we've restarted/crashed. It will be
+ * recreated when the respective slots are reused.
+ */
+void
+StartupReorderBuffer(void)
+{
+ DIR *logical_dir;
+ struct dirent *logical_de;
+
+ DIR *spill_dir;
+ struct dirent *spill_de;
+
+ logical_dir = AllocateDir("pg_replslot");
+ while ((logical_de = ReadDir(logical_dir, "pg_replslot")) != NULL)
+ {
+ struct stat statbuf;
+ char path[MAXPGPATH];
+
+ if (strcmp(logical_de->d_name, ".") == 0 ||
+ strcmp(logical_de->d_name, "..") == 0)
+ continue;
+
+ /* if it cannot be a slot, skip the directory */
+ if (!ReplicationSlotValidateName(logical_de->d_name, DEBUG2))
+ continue;
+
+ /*
+ * ok, has to be a surviving logical slot, iterate and delete
+ * everythign starting with xid-*
+ */
+ sprintf(path, "pg_replslot/%s", logical_de->d_name);
+
+ /* we're only creating directories here, skip if it's not our's */
+ if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
+ continue;
+
+ spill_dir = AllocateDir(path);
+ while ((spill_de = ReadDir(spill_dir, path)) != NULL)
+ {
+ if (strcmp(spill_de->d_name, ".") == 0 ||
+ strcmp(spill_de->d_name, "..") == 0)
+ continue;
+
+ /* only look at names that can be ours */
+ if (strncmp(spill_de->d_name, "xid", 3) == 0)
+ {
+ sprintf(path, "pg_replslot/%s/%s", logical_de->d_name,
+ spill_de->d_name);
+
+ if (unlink(path) != 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not unlink file \"%s\": %m",
+ path)));
+ }
+ }
+ FreeDir(spill_dir);
+ }
+ FreeDir(logical_dir);
+}
+
+/* ---------------------------------------
+ * toast reassembly support
+ * ---------------------------------------
+ */
+
+/*
+ * Initialize per tuple toast reconstruction support.
+ */
+static void
+ReorderBufferToastInitHash(ReorderBuffer *rb, ReorderBufferTXN *txn)
+{
+ HASHCTL hash_ctl;
+
+ Assert(txn->toast_hash == NULL);
+
+ memset(&hash_ctl, 0, sizeof(hash_ctl));
+ hash_ctl.keysize = sizeof(Oid);
+ hash_ctl.entrysize = sizeof(ReorderBufferToastEnt);
+ hash_ctl.hash = tag_hash;
+ hash_ctl.hcxt = rb->context;
+ txn->toast_hash = hash_create("ReorderBufferToastHash", 5, &hash_ctl,
+ HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+}
+
+/*
+ * Per toast-chunk handling for toast reconstruction
+ *
+ * Appends a toast chunk so we can reconstruct it when the tuple "owning" the
+ * toasted Datum comes along.
+ */
+static void
+ReorderBufferToastAppendChunk(ReorderBuffer *rb, ReorderBufferTXN *txn,
+ Relation relation, ReorderBufferChange *change)
+{
+ ReorderBufferToastEnt *ent;
+ bool found;
+ int32 chunksize;
+ bool isnull;
+ Pointer chunk;
+ TupleDesc desc = RelationGetDescr(relation);
+ Oid chunk_id;
+ Oid chunk_seq;
+
+ if (txn->toast_hash == NULL)
+ ReorderBufferToastInitHash(rb, txn);
+
+ Assert(IsToastRelation(relation));
+
+ chunk_id = DatumGetObjectId(fastgetattr(&change->tp.newtuple->tuple, 1, desc, &isnull));
+ Assert(!isnull);
+ chunk_seq = DatumGetInt32(fastgetattr(&change->tp.newtuple->tuple, 2, desc, &isnull));
+ Assert(!isnull);
+
+ ent = (ReorderBufferToastEnt *)
+ hash_search(txn->toast_hash,
+ (void *) &chunk_id,
+ HASH_ENTER,
+ &found);
+
+ if (!found)
+ {
+ Assert(ent->chunk_id == chunk_id);
+ ent->num_chunks = 0;
+ ent->last_chunk_seq = 0;
+ ent->size = 0;
+ ent->reconstructed = NULL;
+ dlist_init(&ent->chunks);
+
+ if (chunk_seq != 0)
+ elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq 0",
+ chunk_seq, chunk_id);
+ }
+ else if (found && chunk_seq != ent->last_chunk_seq + 1)
+ elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq %d",
+ chunk_seq, chunk_id, ent->last_chunk_seq + 1);
+
+ chunk = DatumGetPointer(fastgetattr(&change->tp.newtuple->tuple, 3, desc, &isnull));
+ Assert(!isnull);
+
+ /* calculate size so we can allocate the right size at once later */
+ if (!VARATT_IS_EXTENDED(chunk))
+ chunksize = VARSIZE(chunk) - VARHDRSZ;
+ else if (VARATT_IS_SHORT(chunk))
+ /* could happen due to heap_form_tuple doing its thing */
+ chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
+ else
+ elog(ERROR, "unexpected type of toast chunk");
+
+ ent->size += chunksize;
+ ent->last_chunk_seq = chunk_seq;
+ ent->num_chunks++;
+ dlist_push_tail(&ent->chunks, &change->node);
+}
+
+/*
+ * Rejigger change->newtuple to point to in-memory toast tuples instead to
+ * on-disk toast tuples that may not longer exist (think DROP TABLE or VACUUM).
+ *
+ * We cannot replace unchanged toast tuples though, so those will still point
+ * to on-disk toast data.
+ */
+static void
+ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
+ Relation relation, ReorderBufferChange *change)
+{
+ TupleDesc desc;
+ int natt;
+ Datum *attrs;
+ bool *isnull;
+ bool *free;
+ HeapTuple newtup;
+ Relation toast_rel;
+ TupleDesc toast_desc;
+ MemoryContext oldcontext;
+
+ /* no toast tuples changed */
+ if (txn->toast_hash == NULL)
+ return;
+
+ oldcontext = MemoryContextSwitchTo(rb->context);
+
+ /* we should only have toast tuples in an INSERT or UPDATE */
+ Assert(change->tp.newtuple);
+
+ desc = RelationGetDescr(relation);
+
+ toast_rel = RelationIdGetRelation(relation->rd_rel->reltoastrelid);
+ toast_desc = RelationGetDescr(toast_rel);
+
+ /* should we allocate from stack instead? */
+ attrs = palloc0(sizeof(Datum) * desc->natts);
+ isnull = palloc0(sizeof(bool) * desc->natts);
+ free = palloc0(sizeof(bool) * desc->natts);
+
+ heap_deform_tuple(&change->tp.newtuple->tuple, desc,
+ attrs, isnull);
+
+ for (natt = 0; natt < desc->natts; natt++)
+ {
+ Form_pg_attribute attr = desc->attrs[natt];
+ ReorderBufferToastEnt *ent;
+ struct varlena *varlena;
+
+ /* va_rawsize is the size of the original datum -- including header */
+ struct varatt_external toast_pointer;
+ struct varatt_indirect redirect_pointer;
+ struct varlena *new_datum = NULL;
+ struct varlena *reconstructed;
+ dlist_iter it;
+ Size data_done = 0;
+
+ /* system columns aren't toasted */
+ if (attr->attnum < 0)
+ continue;
+
+ if (attr->attisdropped)
+ continue;
+
+ /* not a varlena datatype */
+ if (attr->attlen != -1)
+ continue;
+
+ /* no data */
+ if (isnull[natt])
+ continue;
+
+ /* ok, we know we have a toast datum */
+ varlena = (struct varlena *) DatumGetPointer(attrs[natt]);
+
+ /* no need to do anything if the tuple isn't external */
+ if (!VARATT_IS_EXTERNAL(varlena))
+ continue;
+
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, varlena);
+
+ /*
+ * Check whether the toast tuple changed, replace if so.
+ */
+ ent = (ReorderBufferToastEnt *)
+ hash_search(txn->toast_hash,
+ (void *) &toast_pointer.va_valueid,
+ HASH_FIND,
+ NULL);
+ if (ent == NULL)
+ continue;
+
+ new_datum =
+ (struct varlena *) palloc0(INDIRECT_POINTER_SIZE);
+
+ free[natt] = true;
+
+ reconstructed = palloc0(toast_pointer.va_rawsize);
+
+ ent->reconstructed = reconstructed;
+
+ /* stitch toast tuple back together from its parts */
+ dlist_foreach(it, &ent->chunks)
+ {
+ bool isnull;
+ ReorderBufferTupleBuf *tup =
+ dlist_container(ReorderBufferChange, node, it.cur)->tp.newtuple;
+ Pointer chunk =
+ DatumGetPointer(fastgetattr(&tup->tuple, 3, toast_desc, &isnull));
+
+ Assert(!isnull);
+ Assert(!VARATT_IS_EXTERNAL(chunk));
+ Assert(!VARATT_IS_SHORT(chunk));
+
+ memcpy(VARDATA(reconstructed) + data_done,
+ VARDATA(chunk),
+ VARSIZE(chunk) - VARHDRSZ);
+ data_done += VARSIZE(chunk) - VARHDRSZ;
+ }
+ Assert(data_done == toast_pointer.va_extsize);
+
+ /* make sure its marked as compressed or not */
+ if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+ SET_VARSIZE_COMPRESSED(reconstructed, data_done + VARHDRSZ);
+ else
+ SET_VARSIZE(reconstructed, data_done + VARHDRSZ);
+
+ memset(&redirect_pointer, 0, sizeof(redirect_pointer));
+ redirect_pointer.pointer = reconstructed;
+
+ SET_VARTAG_EXTERNAL(new_datum, VARTAG_INDIRECT);
+ memcpy(VARDATA_EXTERNAL(new_datum), &redirect_pointer,
+ sizeof(redirect_pointer));
+
+ attrs[natt] = PointerGetDatum(new_datum);
+ }
+
+ /*
+ * Build tuple in separate memory & copy tuple back into the tuplebuf
+ * passed to the output plugin. We can't directly heap_fill_tuple() into
+ * the tuplebuf because attrs[] will point back into the current content.
+ */
+ newtup = heap_form_tuple(desc, attrs, isnull);
+ Assert(change->tp.newtuple->tuple.t_len <= MaxHeapTupleSize);
+ Assert(&change->tp.newtuple->header == change->tp.newtuple->tuple.t_data);
+
+ memcpy(change->tp.newtuple->tuple.t_data,
+ newtup->t_data,
+ newtup->t_len);
+ change->tp.newtuple->tuple.t_len = newtup->t_len;
+
+ /*
+ * free resources we won't further need, more persistent stuff will be
+ * free'd in ReorderBufferToastReset().
+ */
+ RelationClose(toast_rel);
+ pfree(newtup);
+ for (natt = 0; natt < desc->natts; natt++)
+ {
+ if (free[natt])
+ pfree(DatumGetPointer(attrs[natt]));
+ }
+ pfree(attrs);
+ pfree(free);
+ pfree(isnull);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Free all resources allocated for toast reconstruction.
+ */
+static void
+ReorderBufferToastReset(ReorderBuffer *rb, ReorderBufferTXN *txn)
+{
+ HASH_SEQ_STATUS hstat;
+ ReorderBufferToastEnt *ent;
+
+ if (txn->toast_hash == NULL)
+ return;
+
+ /* sequentially walk over the hash and free everything */
+ hash_seq_init(&hstat, txn->toast_hash);
+ while ((ent = (ReorderBufferToastEnt *) hash_seq_search(&hstat)) != NULL)
+ {
+ dlist_mutable_iter it;
+
+ if (ent->reconstructed != NULL)
+ pfree(ent->reconstructed);
+
+ dlist_foreach_modify(it, &ent->chunks)
+ {
+ ReorderBufferChange *change =
+ dlist_container(ReorderBufferChange, node, it.cur);
+
+ dlist_delete(&change->node);
+ ReorderBufferReturnChange(rb, change);
+ }
+ }
+
+ hash_destroy(txn->toast_hash);
+ txn->toast_hash = NULL;
+}
+
+
+/* ---------------------------------------
+ * Visibility support for logical decoding
+ *
+ *
+ * Lookup actual cmin/cmax values when using decoding snapshot. We can't
+ * always rely on stored cmin/cmax values because of two scenarios:
+ *
+ * * A tuple got changed multiple times during a single transaction and thus
+ * has got a combocid. Combocid's are only valid for the duration of a
+ * single transaction.
+ * * A tuple with a cmin but no cmax (and thus no combocid) got
+ * deleted/updated in another transaction than the one which created it
+ * which we are looking at right now. As only one of cmin, cmax or combocid
+ * is actually stored in the heap we don't have access to the the value we
+ * need anymore.
+ *
+ * To resolve those problems we have a per-transaction hash of (cmin,
+ * cmax) tuples keyed by (relfilenode, ctid) which contains the actual
+ * (cmin, cmax) values. That also takes care of combocids by simply
+ * not caring about them at all. As we have the real cmin/cmax values
+ * combocids aren't interesting.
+ *
+ * As we only care about catalog tuples here the overhead of this
+ * hashtable should be acceptable.
+ *
+ * Heap rewrites complicate this a bit, check rewriteheap.c for
+ * details.
+ * -------------------------------------------------------------------------
+ */
+
+/* struct for qsort()ing mapping files by lsn somewhat efficiently */
+typedef struct RewriteMappingFile
+{
+ XLogRecPtr lsn;
+ char fname[MAXPGPATH];
+} RewriteMappingFile;
+
+#if NOT_USED
+static void
+DisplayMapping(HTAB *tuplecid_data)
+{
+ HASH_SEQ_STATUS hstat;
+ ReorderBufferTupleCidEnt *ent;
+
+ hash_seq_init(&hstat, tuplecid_data);
+ while ((ent = (ReorderBufferTupleCidEnt *) hash_seq_search(&hstat)) != NULL)
+ {
+ elog(DEBUG3, "mapping: node: %u/%u/%u tid: %u/%u cmin: %u, cmax: %u",
+ ent->key.relnode.dbNode,
+ ent->key.relnode.spcNode,
+ ent->key.relnode.relNode,
+ BlockIdGetBlockNumber(&ent->key.tid.ip_blkid),
+ ent->key.tid.ip_posid,
+ ent->cmin,
+ ent->cmax
+ );
+ }
+}
+#endif
+
+/*
+ * Apply a single mapping file to tuplecid_data.
+ *
+ * The mapping file has to have been verified to be a) committed b) for our
+ * transaction c) applied in LSN order.
+ */
+static void
+ApplyLogicalMappingFile(HTAB *tuplecid_data, Oid relid, const char *fname)
+{
+ char path[MAXPGPATH];
+ int fd;
+ int readBytes;
+ LogicalRewriteMappingData map;
+
+ sprintf(path, "pg_llog/mappings/%s", fname);
+ fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
+ if (fd < 0)
+ ereport(ERROR,
+ (errmsg("could not open file \"%s\": %m", path)));
+
+ while (true)
+ {
+ ReorderBufferTupleCidKey key;
+ ReorderBufferTupleCidEnt *ent;
+ ReorderBufferTupleCidEnt *new_ent;
+ bool found;
+
+ /* be careful about padding */
+ memset(&key, 0, sizeof(ReorderBufferTupleCidKey));
+
+ /* read all mappings till the end of the file */
+ readBytes = read(fd, &map, sizeof(LogicalRewriteMappingData));
+
+ if (readBytes < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ path)));
+ else if (readBytes == 0) /* EOF */
+ break;
+ else if (readBytes != sizeof(LogicalRewriteMappingData))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\", read %d instead of %d",
+ path, readBytes,
+ (int32) sizeof(LogicalRewriteMappingData))));
+
+ key.relnode = map.old_node;
+ ItemPointerCopy(&map.old_tid,
+ &key.tid);
+
+
+ ent = (ReorderBufferTupleCidEnt *)
+ hash_search(tuplecid_data,
+ (void *) &key,
+ HASH_FIND,
+ NULL);
+
+ /* no existing mapping, no need to update */
+ if (!ent)
+ continue;
+
+ key.relnode = map.new_node;
+ ItemPointerCopy(&map.new_tid,
+ &key.tid);
+
+ new_ent = (ReorderBufferTupleCidEnt *)
+ hash_search(tuplecid_data,
+ (void *) &key,
+ HASH_ENTER,
+ &found);
+
+ if (found)
+ {
+ /*
+ * Make sure the existing mapping makes sense. We sometime update
+ * old records that did not yet have a cmax (e.g. pg_class' own
+ * entry while rewriting it) during rewrites, so allow that.
+ */
+ Assert(ent->cmin == InvalidCommandId || ent->cmin == new_ent->cmin);
+ Assert(ent->cmax == InvalidCommandId || ent->cmax == new_ent->cmax);
+ }
+ else
+ {
+ /* update mapping */
+ new_ent->cmin = ent->cmin;
+ new_ent->cmax = ent->cmax;
+ new_ent->combocid = ent->combocid;
+ }
+ }
+}
+
+
+/*
+ * Check whether the TransactionOId 'xid' is in the pre-sorted array 'xip'.
+ */
+static bool
+TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
+{
+ return bsearch(&xid, xip, num,
+ sizeof(TransactionId), xidComparator) != NULL;
+}
+
+/*
+ * qsort() comparator for sorting RewriteMappingFiles in LSN order.
+ */
+static int
+file_sort_by_lsn(const void *a_p, const void *b_p)
+{
+ RewriteMappingFile *a = *(RewriteMappingFile **)a_p;
+ RewriteMappingFile *b = *(RewriteMappingFile **)b_p;
+
+ if (a->lsn < b->lsn)
+ return -1;
+ else if (a->lsn > b->lsn)
+ return 1;
+ return 0;
+}
+
+/*
+ * Apply any existing logical remapping files if there are any targeted at our
+ * transaction for relid.
+ */
+static void
+UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
+{
+ DIR *mapping_dir;
+ struct dirent *mapping_de;
+ List *files = NIL;
+ ListCell *file;
+ RewriteMappingFile **files_a;
+ size_t off;
+ Oid dboid = IsSharedRelation(relid) ? InvalidOid : MyDatabaseId;
+
+ mapping_dir = AllocateDir("pg_llog/mappings");
+ while ((mapping_de = ReadDir(mapping_dir, "pg_llog/mappings")) != NULL)
+ {
+ Oid f_dboid;
+ Oid f_relid;
+ TransactionId f_mapped_xid;
+ TransactionId f_create_xid;
+ XLogRecPtr f_lsn;
+ uint32 f_hi, f_lo;
+ RewriteMappingFile *f;
+
+ if (strcmp(mapping_de->d_name, ".") == 0 ||
+ strcmp(mapping_de->d_name, "..") == 0)
+ continue;
+
+ /* Ignore files that aren't ours*/
+ if (strncmp(mapping_de->d_name, "map-", 4) != 0)
+ continue;
+
+ if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,
+ &f_dboid, &f_relid, &f_hi, &f_lo,
+ &f_mapped_xid, &f_create_xid) != 6)
+ elog(ERROR, "could not parse fname %s", mapping_de->d_name);
+
+ f_lsn = ((uint64) f_hi) << 32 | f_lo;
+
+ /* mapping for another database */
+ if (f_dboid != dboid)
+ continue;
+
+ /* mapping for another relation */
+ if (f_relid != relid)
+ continue;
+
+ /* did the creating transaction abort? */
+ if (!TransactionIdDidCommit(f_create_xid))
+ continue;
+
+ /* not for our transaction */
+ if (!TransactionIdInArray(f_mapped_xid, snapshot->subxip, snapshot->subxcnt))
+ continue;
+
+ /* ok, relevant, queue for apply */
+ f = palloc(sizeof(RewriteMappingFile));
+ f->lsn = f_lsn;
+ strcpy(f->fname, mapping_de->d_name);
+ files = lappend(files, f);
+ }
+ FreeDir(mapping_dir);
+
+ /* build array we can easily sort */
+ files_a = palloc(list_length(files) * sizeof(RewriteMappingFile *));
+ off = 0;
+ foreach(file, files)
+ {
+ files_a[off++] = lfirst(file);
+ }
+
+ /* sort files so we apply them in LSN order */
+ qsort(files_a, list_length(files), sizeof(RewriteMappingFile *),
+ file_sort_by_lsn);
+
+ for(off = 0; off < list_length(files); off++)
+ {
+ RewriteMappingFile *f = files_a[off];
+ elog(DEBUG1, "applying mapping: %s in %u", f->fname,
+ snapshot->subxip[0]);
+ ApplyLogicalMappingFile(tuplecid_data, relid, f->fname);
+ pfree(f);
+ }
+}
+
+/*
+ * Lookup cmin/cmax of a tuple, during logical decoding where we can't rely on
+ * combocids.
+ */
+bool
+ResolveCminCmaxDuringDecoding(HTAB *tuplecid_data,
+ Snapshot snapshot,
+ HeapTuple htup, Buffer buffer,
+ CommandId *cmin, CommandId *cmax)
+{
+ ReorderBufferTupleCidKey key;
+ ReorderBufferTupleCidEnt *ent;
+ ForkNumber forkno;
+ BlockNumber blockno;
+ bool updated_mapping = false;
+
+ /* be careful about padding */
+ memset(&key, 0, sizeof(key));
+
+ Assert(!BufferIsLocal(buffer));
+
+ /*
+ * get relfilenode from the buffer, no convenient way to access it other
+ * than that.
+ */
+ BufferGetTag(buffer, &key.relnode, &forkno, &blockno);
+
+ /* tuples can only be in the main fork */
+ Assert(forkno == MAIN_FORKNUM);
+ Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
+
+ ItemPointerCopy(&htup->t_self,
+ &key.tid);
+
+restart:
+ ent = (ReorderBufferTupleCidEnt *)
+ hash_search(tuplecid_data,
+ (void *) &key,
+ HASH_FIND,
+ NULL);
+
+ /*
+ * failed to find a mapping, check whether the table was rewritten and
+ * apply mapping if so, but only do that once - there can be no new
+ * mappings while we are in here since we have to hold a lock on the
+ * relation.
+ */
+ if (ent == NULL && !updated_mapping)
+ {
+ UpdateLogicalMappings(tuplecid_data, htup->t_tableOid, snapshot);
+ /* now check but don't update for a mapping again */
+ updated_mapping = true;
+ goto restart;
+ }
+ else if (ent == NULL)
+ return false;
+
+ if (cmin)
+ *cmin = ent->cmin;
+ if (cmax)
+ *cmax = ent->cmax;
+ return true;
+}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * snapbuild.c
+ *
+ * Infrastructure for building historic catalog snapshots based on contents
+ * of the WAL, for the purpose of decoding heapam.c style values in the
+ * WAL.
+ *
+ * NOTES:
+ *
+ * We build snapshots which can *only* be used to read catalog contents and we
+ * do so by reading and interpreting the WAL stream. The aim is to build a
+ * snapshot that behaves the same as a freshly taken MVCC snapshot would have
+ * at the time the XLogRecord was generated.
+ *
+ * To build the snapshots we reuse the infrastructure built for Hot
+ * Standby. The in-memory snapshots we build look different than HS' because
+ * we have different needs. To successfully decode data from the WAL we only
+ * need to access catalog tables and (sys|rel|cat)cache, not the actual user
+ * tables since the data we decode is wholly contained in the WAL
+ * records. Also, our snapshots need to be different in comparison to normal
+ * MVCC ones because in contrast to those we cannot fully rely on the clog and
+ * pg_subtrans for information about committed transactions because they might
+ * commit in the future from the POV of the WAL entry we're currently
+ * decoding. This definition has the advantage that we only need to prevent
+ * removal of catalog rows, while normal table's rows can still be
+ * removed. This is achieved by using the replication slot mechanism.
+ *
+ * As the percentage of transactions modifying the catalog normally is fairly
+ * small in comparisons to ones only manipulating user data, we keep track of
+ * the committed catalog modifying ones inside (xmin, xmax) instead of keeping
+ * track of all running transactions like its done in a normal snapshot. Note
+ * that we're generally only looking at transactions that have acquired an
+ * xid. That is we keep a list of transactions between snapshot->(xmin, xmax)
+ * that we consider committed, everything else is considered aborted/in
+ * progress. That also allows us not to care about subtransactions before they
+ * have committed which means this modules, in contrast to HS, doesn't have to
+ * care about suboverflowed subtransactions and similar.
+ *
+ * One complexity of doing this is that to e.g. handle mixed DDL/DML
+ * transactions we need Snapshots that see intermediate versions of the
+ * catalog in a transaction. During normal operation this is achieved by using
+ * CommandIds/cmin/cmax. The problem with that however is that for space
+ * efficiency reasons only one value of that is stored
+ * (c.f. combocid.c). Since ComboCids are only available in memory we log
+ * additional information which allows us to get the original (cmin, cmax)
+ * pair during visibility checks. Check the reorderbuffer.c's comment above
+ * ResolveCminCmaxDuringDecoding() for details.
+ *
+ * To facilitate all this we need our own visibility routine, as the normal
+ * ones are optimized for different usecases.
+ *
+ * To replace the normal catalog snapshots with decoding ones use the
+ * SetupHistoricSnapshot() and TeardownHistoricSnapshot() functions.
+ *
+ *
+ *
+ * The snapbuild machinery is starting up in in several stages, as illustrated
+ * by the following graph:
+ * +-------------------------+
+ * +----|SNAPBUILD_START |-------------+
+ * | +-------------------------+ |
+ * | | |
+ * | | |
+ * | running_xacts with running xacts |
+ * | | |
+ * | | |
+ * | v |
+ * | +-------------------------+ v
+ * | |SNAPBUILD_FULL_SNAPSHOT |------------>|
+ * | +-------------------------+ |
+ * running_xacts | saved snapshot
+ * with zero xacts | at running_xacts's lsn
+ * | | |
+ * | all running toplevel TXNs finished |
+ * | | |
+ * | v |
+ * | +-------------------------+ |
+ * +--->|SNAPBUILD_CONSISTENT |<------------+
+ * +-------------------------+
+ *
+ * Initially the machinery is in the START stage. When a xl_running_xacts
+ * record is read that is sufficiently new (above the safe xmin horizon),
+ * there's a state transation. If there were no running xacts when the
+ * runnign_xacts record was generated, we'll directly go into CONSISTENT
+ * state, otherwise we'll switch to the FULL_SNAPSHOT state. Having a full
+ * snapshot means that all transactions that start henceforth can be decoded
+ * in their entirety, but transactions that started previously can't. In
+ * FULL_SNAPSHOT we'll switch into CONSISTENT once all those previously
+ * running transactions have committed or aborted.
+ *
+ * Only transactions that commit after CONSISTENT state has been reached will
+ * be replayed, even though they might have started while still in
+ * FULL_SNAPSHOT. That ensures that we'll reach a point where no previous
+ * changes has been exported, but all the following ones will be. That point
+ * is a convenient point to initialize replication from, which is why we
+ * export a snapshot at that point, which *can* be used to read normal data.
+ *
+ * Copyright (c) 2012-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/replication/snapbuild.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "miscadmin.h"
+
+#include "access/heapam_xlog.h"
+#include "access/transam.h"
+#include "access/xact.h"
+
+#include "replication/logical.h"
+#include "replication/reorderbuffer.h"
+#include "replication/snapbuild.h"
+
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/snapshot.h"
+#include "utils/snapmgr.h"
+#include "utils/tqual.h"
+
+#include "storage/block.h" /* debugging output */
+#include "storage/fd.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "storage/standby.h"
+
+/*
+ * This struct contains the current state of the snapshot building
+ * machinery. Besides a forward declaration in the header, it is not exposed
+ * to the public, so we can easily change it's contents.
+ */
+struct SnapBuild
+{
+ /* how far are we along building our first full snapshot */
+ SnapBuildState state;
+
+ /* private memory context used to allocate memory for this module. */
+ MemoryContext context;
+
+ /* all transactions < than this have committed/aborted */
+ TransactionId xmin;
+
+ /* all transactions >= than this are uncommitted */
+ TransactionId xmax;
+
+ /*
+ * Don't replay commits from an LSN <= this LSN. This can be set
+ * externally but it will also be advanced (never retreat) from within
+ * snapbuild.c.
+ */
+ XLogRecPtr transactions_after;
+
+ /*
+ * Don't start decoding WAL until the "xl_running_xacts" information
+ * indicates there are no running xids with a xid smaller than this.
+ */
+ TransactionId initial_xmin_horizon;
+
+ /*
+ * Snapshot that's valid to see the catalog state seen at this moment.
+ */
+ Snapshot snapshot;
+
+ /*
+ * LSN of the last location we are sure a snapshot has been serialized to.
+ */
+ XLogRecPtr last_serialized_snapshot;
+
+ /*
+ * The reorderbuffer we need to update with usable snapshots et al.
+ */
+ ReorderBuffer *reorder;
+
+ /*
+ * Information about initially running transactions
+ *
+ * When we start building a snapshot there already may be transactions in
+ * progress. Those are stored in running.xip. We don't have enough
+ * information about those to decode their contents, so until they are
+ * finished (xcnt=0) we cannot switch to a CONSISTENT state.
+ */
+ struct
+ {
+ /*
+ * As long as running.xcnt all XIDs < running.xmin and > running.xmax
+ * have to be checked whether they still are running.
+ */
+ TransactionId xmin;
+ TransactionId xmax;
+
+ size_t xcnt; /* number of used xip entries */
+ size_t xcnt_space; /* allocated size of xip */
+ TransactionId *xip; /* running xacts array, xidComparator-sorted */
+ } running;
+
+ /*
+ * Array of transactions which could have catalog changes that committed
+ * between xmin and xmax.
+ */
+ struct
+ {
+ /* number of committed transactions */
+ size_t xcnt;
+
+ /* available space for committed transactions */
+ size_t xcnt_space;
+
+ /*
+ * Until we reach a CONSISTENT state, we record commits of all
+ * transactions, not just the catalog changing ones. Record when that
+ * changes so we know we cannot export a snapshot safely anymore.
+ */
+ bool includes_all_transactions;
+
+ /*
+ * Array of committed transactions that have modified the catalog.
+ *
+ * As this array is frequently modified we do *not* keep it in
+ * xidComparator order. Instead we sort the array when building &
+ * distributing a snapshot.
+ *
+ * TODO: It's unclear whether that reasoning has much merit. Every
+ * time we add something here after becoming consistent will also
+ * require distributing a snapshot. Storing them sorted would
+ * potentially also make it easier to purge (but more complicated wrt
+ * wraparound?). Should be improved if sorting while building the
+ * snapshot shows up in profiles.
+ */
+ TransactionId *xip;
+ } committed;
+};
+
+/*
+ * Starting a transaction -- which we need to do while exporting a snapshot --
+ * removes knowledge about the previously used resowner, so we save it here.
+ */
+ResourceOwner SavedResourceOwnerDuringExport = NULL;
+bool ExportInProgress = false;
+
+/* transaction state manipulation functions */
+static void SnapBuildEndTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid);
+
+/* ->running manipulation */
+static bool SnapBuildTxnIsRunning(SnapBuild *builder, TransactionId xid);
+
+/* ->committed manipulation */
+static void SnapBuildPurgeCommittedTxn(SnapBuild *builder);
+
+/* snapshot building/manipulation/distribution functions */
+static Snapshot SnapBuildBuildSnapshot(SnapBuild *builder, TransactionId xid);
+
+static void SnapBuildFreeSnapshot(Snapshot snap);
+
+static void SnapBuildSnapIncRefcount(Snapshot snap);
+
+static void SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn);
+
+/* xlog reading helper functions for SnapBuildProcessRecord */
+static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running);
+
+/* serialization functions */
+static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn);
+static bool SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn);
+
+
+/*
+ * Allocate a new snapshot builder.
+ *
+ * xmin_horizon is the xid >=which we can be sure no catalog rows have been
+ * removed, start_lsn is the LSN >= we want to replay commits.
+ */
+SnapBuild *
+AllocateSnapshotBuilder(ReorderBuffer *reorder,
+ TransactionId xmin_horizon,
+ XLogRecPtr start_lsn)
+{
+ MemoryContext context;
+ MemoryContext oldcontext;
+ SnapBuild *builder;
+
+ /* allocate memory in own context, to have better accountability */
+ context = AllocSetContextCreate(CurrentMemoryContext,
+ "snapshot builder context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ oldcontext = MemoryContextSwitchTo(context);
+
+ builder = palloc0(sizeof(SnapBuild));
+
+ builder->state = SNAPBUILD_START;
+ builder->context = context;
+ builder->reorder = reorder;
+ /* Other struct members initialized by zeroing via palloc0 above */
+
+ builder->committed.xcnt = 0;
+ builder->committed.xcnt_space = 128; /* arbitrary number */
+ builder->committed.xip =
+ palloc0(builder->committed.xcnt_space * sizeof(TransactionId));
+ builder->committed.includes_all_transactions = true;
+ builder->committed.xip =
+ palloc0(builder->committed.xcnt_space * sizeof(TransactionId));
+ builder->initial_xmin_horizon = xmin_horizon;
+ builder->transactions_after = start_lsn;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return builder;
+}
+
+/*
+ * Free a snapshot builder.
+ */
+void
+FreeSnapshotBuilder(SnapBuild *builder)
+{
+ MemoryContext context = builder->context;
+
+ /* free snapshot explicitly, that contains some error checking */
+ if (builder->snapshot != NULL)
+ {
+ SnapBuildSnapDecRefcount(builder->snapshot);
+ builder->snapshot = NULL;
+ }
+
+ /* other resources are deallocated via memory context reset */
+ MemoryContextDelete(context);
+}
+
+/*
+ * Free an unreferenced snapshot that has previously been built by us.
+ */
+static void
+SnapBuildFreeSnapshot(Snapshot snap)
+{
+ /* make sure we don't get passed an external snapshot */
+ Assert(snap->satisfies == HeapTupleSatisfiesHistoricMVCC);
+
+ /* make sure nobody modified our snapshot */
+ Assert(snap->curcid == FirstCommandId);
+ Assert(!snap->suboverflowed);
+ Assert(!snap->takenDuringRecovery);
+ Assert(snap->regd_count == 1);
+
+ /* slightly more likely, so it's checked even without c-asserts */
+ if (snap->copied)
+ elog(ERROR, "cannot free a copied snapshot");
+
+ if (snap->active_count)
+ elog(ERROR, "cannot free an active snapshot");
+
+ pfree(snap);
+}
+
+/*
+ * In which state of snapshot building are we?
+ */
+SnapBuildState
+SnapBuildCurrentState(SnapBuild *builder)
+{
+ return builder->state;
+}
+
+/*
+ * Should the contents of transaction ending at 'ptr' be decoded?
+ */
+bool
+SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr)
+{
+ return ptr <= builder->transactions_after;
+}
+
+/*
+ * Increase refcount of a snapshot.
+ *
+ * This is used when handing out a snapshot to some external resource or when
+ * adding a Snapshot as builder->snapshot.
+ */
+static void
+SnapBuildSnapIncRefcount(Snapshot snap)
+{
+ snap->active_count++;
+}
+
+/*
+ * Decrease refcount of a snapshot and free if the refcount reaches zero.
+ *
+ * Externally visible, so that external resources that have been handed an
+ * IncRef'ed Snapshot can adjust its refcount easily.
+ */
+void
+SnapBuildSnapDecRefcount(Snapshot snap)
+{
+ /* make sure we don't get passed an external snapshot */
+ Assert(snap->satisfies == HeapTupleSatisfiesHistoricMVCC);
+
+ /* make sure nobody modified our snapshot */
+ Assert(snap->curcid == FirstCommandId);
+ Assert(!snap->suboverflowed);
+ Assert(!snap->takenDuringRecovery);
+
+ Assert(snap->regd_count == 1);
+
+ Assert(snap->active_count);
+
+ /* slightly more likely, so its checked even without casserts */
+ if (snap->copied)
+ elog(ERROR, "cannot free a copied snapshot");
+
+ snap->active_count--;
+ if (!snap->active_count)
+ SnapBuildFreeSnapshot(snap);
+}
+
+/*
+ * Build a new snapshot, based on currently committed catalog-modifying
+ * transactions.
+ *
+ * In-progress transactions with catalog access are *not* allowed to modify
+ * these snapshots; they have to copy them and fill in appropriate ->curcid
+ * and ->subxip/subxcnt values.
+ */
+static Snapshot
+SnapBuildBuildSnapshot(SnapBuild *builder, TransactionId xid)
+{
+ Snapshot snapshot;
+ Size ssize;
+
+ Assert(builder->state >= SNAPBUILD_FULL_SNAPSHOT);
+
+ ssize = sizeof(SnapshotData)
+ + sizeof(TransactionId) * builder->committed.xcnt
+ + sizeof(TransactionId) * 1 /* toplevel xid */ ;
+
+ snapshot = MemoryContextAllocZero(builder->context, ssize);
+
+ snapshot->satisfies = HeapTupleSatisfiesHistoricMVCC;
+
+ /*
+ * We misuse the original meaning of SnapshotData's xip and subxip fields
+ * to make the more fitting for our needs.
+ *
+ * In the 'xip' array we store transactions that have to be treated as
+ * committed. Since we will only ever look at tuples from transactions
+ * that have modified the catalog its more efficient to store those few
+ * that exist between xmin and xmax (frequently there are none).
+ *
+ * Snapshots that are used in transactions that have modified the catalog
+ * also use the 'subxip' array to store their toplevel xid and all the
+ * subtransaction xids so we can recognize when we need to treat rows as
+ * visible that are not in xip but still need to be visible. Subxip only
+ * gets filled when the transaction is copied into the context of a
+ * catalog modifying transaction since we otherwise share a snapshot
+ * between transactions. As long as a txn hasn't modified the catalog it
+ * doesn't need to treat any uncommitted rows as visible, so there is no
+ * need for those xids.
+ *
+ * Both arrays are qsort'ed so that we can use bsearch() on them.
+ */
+ Assert(TransactionIdIsNormal(builder->xmin));
+ Assert(TransactionIdIsNormal(builder->xmax));
+
+ snapshot->xmin = builder->xmin;
+ snapshot->xmax = builder->xmax;
+
+ /* store all transactions to be treated as committed by this snapshot */
+ snapshot->xip =
+ (TransactionId *) ((char *) snapshot + sizeof(SnapshotData));
+ snapshot->xcnt = builder->committed.xcnt;
+ memcpy(snapshot->xip,
+ builder->committed.xip,
+ builder->committed.xcnt * sizeof(TransactionId));
+
+ /* sort so we can bsearch() */
+ qsort(snapshot->xip, snapshot->xcnt, sizeof(TransactionId), xidComparator);
+
+ /*
+ * Initially, subxip is empty, i.e. it's a snapshot to be used by
+ * transactions that don't modify the catalog. Will be filled by
+ * ReorderBufferCopySnap() if necessary.
+ */
+ snapshot->subxcnt = 0;
+ snapshot->subxip = NULL;
+
+ snapshot->suboverflowed = false;
+ snapshot->takenDuringRecovery = false;
+ snapshot->copied = false;
+ snapshot->curcid = FirstCommandId;
+ snapshot->active_count = 0;
+ snapshot->regd_count = 1; /* mark as registered so nobody frees it */
+
+ return snapshot;
+}
+
+/*
+ * Export a snapshot so it can be set in another session with SET TRANSACTION
+ * SNAPSHOT.
+ *
+ * For that we need to start a transaction in the current backend as the
+ * importing side checks whether the source transaction is still open to make
+ * sure the xmin horizon hasn't advanced since then.
+ *
+ * After that we convert a locally built snapshot into the normal variant
+ * understood by HeapTupleSatisfiesMVCC et al.
+ */
+const char *
+SnapBuildExportSnapshot(SnapBuild *builder)
+{
+ Snapshot snap;
+ char *snapname;
+ TransactionId xid;
+ TransactionId *newxip;
+ int newxcnt = 0;
+
+ if (builder->state != SNAPBUILD_CONSISTENT)
+ elog(ERROR, "cannot export a snapshot before reaching a consistent state");
+
+ if (!builder->committed.includes_all_transactions)
+ elog(ERROR, "cannot export a snapshot, not all transactions are monitored anymore");
+
+ /* so we don't overwrite the existing value */
+ if (TransactionIdIsValid(MyPgXact->xmin))
+ elog(ERROR, "cannot export a snapshot when MyPgXact->xmin already is valid");
+
+ if (IsTransactionOrTransactionBlock())
+ elog(ERROR, "cannot export a snapshot from within a transaction");
+
+ if (SavedResourceOwnerDuringExport)
+ elog(ERROR, "can only export one snapshot at a time");
+
+ SavedResourceOwnerDuringExport = CurrentResourceOwner;
+ ExportInProgress = true;
+
+ StartTransactionCommand();
+
+ Assert(!FirstSnapshotSet);
+
+ /* There doesn't seem to a nice API to set these */
+ XactIsoLevel = XACT_REPEATABLE_READ;
+ XactReadOnly = true;
+
+ snap = SnapBuildBuildSnapshot(builder, GetTopTransactionId());
+
+ /*
+ * We know that snap->xmin is alive, enforced by the logical xmin
+ * mechanism. Due to that we can do this without locks, we're only
+ * changing our own value.
+ */
+ MyPgXact->xmin = snap->xmin;
+
+ /* allocate in transaction context */
+ newxip = (TransactionId *)
+ palloc(sizeof(TransactionId) * GetMaxSnapshotXidCount());
+
+ /*
+ * snapbuild.c builds transactions in an "inverted" manner, which means it
+ * stores committed transactions in ->xip, not ones in progress. Build a
+ * classical snapshot by marking all non-committed transactions as
+ * in-progress. This can be expensive.
+ */
+ for (xid = snap->xmin; NormalTransactionIdPrecedes(xid, snap->xmax);)
+ {
+ void *test;
+
+ /*
+ * Check whether transaction committed using the decoding snapshot
+ * meaning of ->xip.
+ */
+ test = bsearch(&xid, snap->xip, snap->xcnt,
+ sizeof(TransactionId), xidComparator);
+
+ if (test == NULL)
+ {
+ if (newxcnt >= GetMaxSnapshotXidCount())
+ elog(ERROR, "snapshot too large");
+
+ newxip[newxcnt++] = xid;
+ }
+
+ TransactionIdAdvance(xid);
+ }
+
+ snap->xcnt = newxcnt;
+ snap->xip = newxip;
+
+ /*
+ * now that we've built a plain snapshot, use the normal mechanisms for
+ * exporting it
+ */
+ snapname = ExportSnapshot(snap);
+
+ ereport(LOG,
+ (errmsg("exported logical decoding snapshot: \"%s\" with %u xids",
+ snapname, snap->xcnt)));
+ return snapname;
+}
+
+/*
+ * Reset a previously SnapBuildExportSnapshot()'ed snapshot if there is
+ * any. Aborts the previously started transaction and resets the resource
+ * owner back to it's original value.
+ */
+void
+SnapBuildClearExportedSnapshot()
+{
+ /* nothing exported, thats the usual case */
+ if (!ExportInProgress)
+ return;
+
+ if (!IsTransactionState())
+ elog(ERROR, "clearing exported snapshot in wrong transaction state");
+
+ /* make sure nothing could have ever happened */
+ AbortCurrentTransaction();
+
+ CurrentResourceOwner = SavedResourceOwnerDuringExport;
+ SavedResourceOwnerDuringExport = NULL;
+ ExportInProgress = false;
+}
+
+/*
+ * Handle the effects of a single heap change, appropriate to the current state
+ * of the snapshot builder and returns whether changes made at (xid, lsn) can
+ * be decoded.
+ */
+bool
+SnapBuildProcessChange(SnapBuild *builder, TransactionId xid, XLogRecPtr lsn)
+{
+ bool is_old_tx;
+
+ /*
+ * We can't handle data in transactions if we haven't built a snapshot
+ * yet, so don't store them.
+ */
+ if (builder->state < SNAPBUILD_FULL_SNAPSHOT)
+ return false;
+
+ /*
+ * No point in keeping track of changes in transactions that we don't have
+ * enough information about to decode. This means that they started before
+ * we got into the SNAPBUILD_FULL_SNAPSHOT state.
+ */
+ if (builder->state < SNAPBUILD_CONSISTENT &&
+ SnapBuildTxnIsRunning(builder, xid))
+ return false;
+
+ /*
+ * If the reorderbuffer doesn't yet have a snapshot, add one now, it will
+ * be needed to decode the change we're currently processing.
+ */
+ is_old_tx = ReorderBufferIsXidKnown(builder->reorder, xid);
+
+ if (!is_old_tx || !ReorderBufferXidHasBaseSnapshot(builder->reorder, xid))
+ {
+ /* only build a new snapshot if we don't have a prebuilt one */
+ if (builder->snapshot == NULL)
+ {
+ builder->snapshot = SnapBuildBuildSnapshot(builder, xid);
+ /* inrease refcount for the snapshot builder */
+ SnapBuildSnapIncRefcount(builder->snapshot);
+ }
+
+ /*
+ * Increase refcount for the transaction we're handing the snapshot
+ * out to.
+ */
+ SnapBuildSnapIncRefcount(builder->snapshot);
+ ReorderBufferSetBaseSnapshot(builder->reorder, xid, lsn,
+ builder->snapshot);
+ }
+
+ return true;
+}
+
+/*
+ * Do CommandId/ComboCid handling after reading a xl_heap_new_cid record. This
+ * implies that a transaction has done some form of write to system catalogs.
+ */
+void
+SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
+ XLogRecPtr lsn, xl_heap_new_cid *xlrec)
+{
+ CommandId cid;
+
+ /*
+ * we only log new_cid's if a catalog tuple was modified, so mark
+ * the transaction as containing catalog modifications
+ */
+ ReorderBufferXidSetCatalogChanges(builder->reorder, xid,lsn);
+
+ ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
+ xlrec->target.node, xlrec->target.tid,
+ xlrec->cmin, xlrec->cmax,
+ xlrec->combocid);
+
+ /* figure out new command id */
+ if (xlrec->cmin != InvalidCommandId &&
+ xlrec->cmax != InvalidCommandId)
+ cid = Max(xlrec->cmin, xlrec->cmax);
+ else if (xlrec->cmax != InvalidCommandId)
+ cid = xlrec->cmax;
+ else if (xlrec->cmin != InvalidCommandId)
+ cid = xlrec->cmin;
+ else
+ {
+ cid = InvalidCommandId; /* silence compiler */
+ elog(ERROR, "xl_heap_new_cid record without a valid CommandId");
+ }
+
+ ReorderBufferAddNewCommandId(builder->reorder, xid, lsn, cid + 1);
+}
+
+/*
+ * Check whether `xid` is currently 'running'.
+ *
+ * Running transactions in our parlance are transactions which we didn't
+ * observe from the start so we can't properly decode their contents. They
+ * only exist after we freshly started from an < CONSISTENT snapshot.
+ */
+static bool
+SnapBuildTxnIsRunning(SnapBuild *builder, TransactionId xid)
+{
+ Assert(builder->state < SNAPBUILD_CONSISTENT);
+ Assert(TransactionIdIsNormal(builder->running.xmin));
+ Assert(TransactionIdIsNormal(builder->running.xmax));
+
+ if (builder->running.xcnt &&
+ NormalTransactionIdFollows(xid, builder->running.xmin) &&
+ NormalTransactionIdPrecedes(xid, builder->running.xmax))
+ {
+ TransactionId *search =
+ bsearch(&xid, builder->running.xip, builder->running.xcnt_space,
+ sizeof(TransactionId), xidComparator);
+
+ if (search != NULL)
+ {
+ Assert(*search == xid);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Add a new Snapshot to all transactions we're decoding that currently are
+ * in-progress so they can see new catalog contents made by the transaction
+ * that just committed. This is necessary because those in-progress
+ * transactions will use the new catalog's contents from here on (at the very
+ * least everything they do needs to be compatible with newer catalog
+ * contents).
+ */
+static void
+SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn)
+{
+ dlist_iter txn_i;
+ ReorderBufferTXN *txn;
+
+ /*
+ * Iterate through all toplevel transactions. This can include
+ * subtransactions which we just don't yet know to be that, but that's
+ * fine, they will just get an unneccesary snapshot queued.
+ */
+ dlist_foreach(txn_i, &builder->reorder->toplevel_by_lsn)
+ {
+ txn = dlist_container(ReorderBufferTXN, node, txn_i.cur);
+
+ Assert(TransactionIdIsValid(txn->xid));
+
+ /*
+ * If we don't have a base snapshot yet, there are no changes in this
+ * transaction which in turn implies we don't yet need a snapshot at
+ * all. We'll add add a snapshot when the first change gets queued.
+ *
+ * NB: This works correctly even for subtransactions because
+ * ReorderBufferCommitChild() takes care to pass the parent the base
+ * snapshot, and while iterating the changequeue we'll get the change
+ * from the subtxn.
+ */
+ if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, txn->xid))
+ continue;
+
+ elog(DEBUG2, "adding a new snapshot to %u at %X/%X",
+ txn->xid, (uint32) (lsn >> 32), (uint32) lsn);
+
+ /*
+ * increase the snapshot's refcount for the transaction we are handing
+ * it out to
+ */
+ SnapBuildSnapIncRefcount(builder->snapshot);
+ ReorderBufferAddSnapshot(builder->reorder, txn->xid, lsn,
+ builder->snapshot);
+ }
+}
+
+/*
+ * Keep track of a new catalog changing transaction that has committed.
+ */
+static void
+SnapBuildAddCommittedTxn(SnapBuild *builder, TransactionId xid)
+{
+ Assert(TransactionIdIsValid(xid));
+
+ if (builder->committed.xcnt == builder->committed.xcnt_space)
+ {
+ builder->committed.xcnt_space = builder->committed.xcnt_space * 2 + 1;
+
+ elog(DEBUG1, "increasing space for committed transactions to %u",
+ (uint32) builder->committed.xcnt_space);
+
+ builder->committed.xip = repalloc(builder->committed.xip,
+ builder->committed.xcnt_space * sizeof(TransactionId));
+ }
+
+ /*
+ * TODO: It might make sense to keep the array sorted here instead of
+ * doing it every time we build a new snapshot. On the other hand this
+ * gets called repeatedly when a transaction with subtransactions commits.
+ */
+ builder->committed.xip[builder->committed.xcnt++] = xid;
+}
+
+/*
+ * Remove knowledge about transactions we treat as committed that are smaller
+ * than ->xmin. Those won't ever get checked via the ->commited array but via
+ * the clog machinery, so we don't need to waste memory on them.
+ */
+static void
+SnapBuildPurgeCommittedTxn(SnapBuild *builder)
+{
+ int off;
+ TransactionId *workspace;
+ int surviving_xids = 0;
+
+ /* not ready yet */
+ if (!TransactionIdIsNormal(builder->xmin))
+ return;
+
+ /* TODO: Neater algorithm than just copying and iterating? */
+ workspace =
+ MemoryContextAlloc(builder->context,
+ builder->committed.xcnt * sizeof(TransactionId));
+
+ /* copy xids that still are interesting to workspace */
+ for (off = 0; off < builder->committed.xcnt; off++)
+ {
+ if (NormalTransactionIdPrecedes(builder->committed.xip[off],
+ builder->xmin))
+ ; /* remove */
+ else
+ workspace[surviving_xids++] = builder->committed.xip[off];
+ }
+
+ /* copy workspace back to persistent state */
+ memcpy(builder->committed.xip, workspace,
+ surviving_xids * sizeof(TransactionId));
+
+ elog(DEBUG3, "purged committed transactions from %u to %u, xmin: %u, xmax: %u",
+ (uint32) builder->committed.xcnt, (uint32) surviving_xids,
+ builder->xmin, builder->xmax);
+ builder->committed.xcnt = surviving_xids;
+
+ pfree(workspace);
+}
+
+/*
+ * Common logic for SnapBuildAbortTxn and SnapBuildCommitTxn dealing with
+ * keeping track of the amount of running transactions.
+ */
+static void
+SnapBuildEndTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid)
+{
+ if (builder->state == SNAPBUILD_CONSISTENT)
+ return;
+
+ /*
+ * NB: This handles subtransactions correctly even if we started from
+ * suboverflowed xl_running_xacts because we only keep track of toplevel
+ * transactions. Since the latter are always are allocated before their
+ * subxids and since they end at the same time it's sufficient to deal
+ * with them here.
+ */
+ if (SnapBuildTxnIsRunning(builder, xid))
+ {
+ Assert(builder->running.xcnt > 0);
+
+ if (!--builder->running.xcnt)
+ {
+ /*
+ * None of the originally running transaction is running anymore,
+ * so our incrementaly built snapshot now is consistent.
+ */
+ ereport(LOG,
+ (errmsg("logical decoding found consistent point at %X/%X",
+ (uint32)(lsn >> 32), (uint32)lsn),
+ errdetail("xid %u finished, no running transactions anymore",
+ xid)));
+ builder->state = SNAPBUILD_CONSISTENT;
+ }
+ }
+}
+
+/*
+ * Abort a transaction, throw away all state we kept.
+ */
+void
+SnapBuildAbortTxn(SnapBuild *builder, XLogRecPtr lsn,
+ TransactionId xid,
+ int nsubxacts, TransactionId *subxacts)
+{
+ int i;
+
+ for (i = 0; i < nsubxacts; i++)
+ {
+ TransactionId subxid = subxacts[i];
+
+ SnapBuildEndTxn(builder, lsn, subxid);
+ }
+
+ SnapBuildEndTxn(builder, lsn, xid);
+}
+
+/*
+ * Handle everything that needs to be done when a transaction commits
+ */
+void
+SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid,
+ int nsubxacts, TransactionId *subxacts)
+{
+ int nxact;
+
+ bool forced_timetravel = false;
+ bool sub_needs_timetravel = false;
+ bool top_needs_timetravel = false;
+
+ TransactionId xmax = xid;
+
+ /*
+ * If we couldn't observe every change of a transaction because it was
+ * already running at the point we started to observe we have to assume it
+ * made catalog changes.
+ *
+ * This has the positive benefit that we afterwards have enough
+ * information to build an exportable snapshot that's usable by pg_dump et
+ * al.
+ */
+ if (builder->state < SNAPBUILD_CONSISTENT)
+ {
+ /* ensure that only commits after this are getting replayed */
+ if (builder->transactions_after < lsn)
+ builder->transactions_after = lsn;
+
+ /*
+ * We could avoid treating !SnapBuildTxnIsRunning transactions as
+ * timetravel ones, but we want to be able to export a snapshot when
+ * we reached consistency.
+ */
+ forced_timetravel = true;
+ elog(DEBUG1, "forced to assume catalog changes for xid %u because it was running to early", xid);
+ }
+
+ for (nxact = 0; nxact < nsubxacts; nxact++)
+ {
+ TransactionId subxid = subxacts[nxact];
+
+ /*
+ * make sure txn is not tracked in running txn's anymore, switch state
+ */
+ SnapBuildEndTxn(builder, lsn, subxid);
+
+ /*
+ * If we're forcing timetravel we also need visibility information
+ * about subtransaction, so keep track of subtransaction's state.
+ */
+ if (forced_timetravel)
+ {
+ SnapBuildAddCommittedTxn(builder, subxid);
+ if (NormalTransactionIdFollows(subxid, xmax))
+ xmax = subxid;
+ }
+
+ /*
+ * Add subtransaction to base snapshot if it DDL, we don't distinguish
+ * to toplevel transactions there.
+ */
+ else if (ReorderBufferXidHasCatalogChanges(builder->reorder, subxid))
+ {
+ sub_needs_timetravel = true;
+
+ elog(DEBUG1, "found subtransaction %u:%u with catalog changes.",
+ xid, subxid);
+
+ SnapBuildAddCommittedTxn(builder, subxid);
+
+ if (NormalTransactionIdFollows(subxid, xmax))
+ xmax = subxid;
+ }
+ }
+
+ /*
+ * Make sure toplevel txn is not tracked in running txn's anymore, switch
+ * state to consistent if possible.
+ */
+ SnapBuildEndTxn(builder, lsn, xid);
+
+ if (forced_timetravel)
+ {
+ elog(DEBUG2, "forced transaction %u to do timetravel.", xid);
+
+ SnapBuildAddCommittedTxn(builder, xid);
+ }
+ /* add toplevel transaction to base snapshot */
+ else if (ReorderBufferXidHasCatalogChanges(builder->reorder, xid))
+ {
+ elog(DEBUG2, "found top level transaction %u, with catalog changes!",
+ xid);
+
+ top_needs_timetravel = true;
+ SnapBuildAddCommittedTxn(builder, xid);
+ }
+ else if (sub_needs_timetravel)
+ {
+ /* mark toplevel txn as timetravel as well */
+ SnapBuildAddCommittedTxn(builder, xid);
+ }
+
+ /* if there's any reason to build a historic snapshot, to so now */
+ if (forced_timetravel || top_needs_timetravel || sub_needs_timetravel)
+ {
+ /*
+ * Adjust xmax of the snapshot builder, we only do that for committed,
+ * catalog modifying, transactions, everything else isn't interesting
+ * for us since we'll never look at the respective rows.
+ */
+ if (!TransactionIdIsValid(builder->xmax) ||
+ TransactionIdFollowsOrEquals(xmax, builder->xmax))
+ {
+ builder->xmax = xmax;
+ TransactionIdAdvance(builder->xmax);
+ }
+
+ /*
+ * If we haven't built a complete snapshot yet there's no need to hand
+ * it out, it wouldn't (and couldn't) be used anyway.
+ */
+ if (builder->state < SNAPBUILD_FULL_SNAPSHOT)
+ return;
+
+ /*
+ * Decrease the snapshot builder's refcount of the old snapshot, note
+ * that it still will be used if it has been handed out to the
+ * reorderbuffer earlier.
+ */
+ if (builder->snapshot)
+ SnapBuildSnapDecRefcount(builder->snapshot);
+
+ builder->snapshot = SnapBuildBuildSnapshot(builder, xid);
+
+ /* we might need to execute invalidations, add snapshot */
+ if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, xid))
+ {
+ SnapBuildSnapIncRefcount(builder->snapshot);
+ ReorderBufferSetBaseSnapshot(builder->reorder, xid, lsn,
+ builder->snapshot);
+ }
+
+ /* refcount of the snapshot builder for the new snapshot */
+ SnapBuildSnapIncRefcount(builder->snapshot);
+
+ /* add a new SnapshotNow to all currently running transactions */
+ SnapBuildDistributeNewCatalogSnapshot(builder, lsn);
+ }
+ else
+ {
+ /* record that we cannot export a general snapshot anymore */
+ builder->committed.includes_all_transactions = false;
+ }
+}
+
+
+/* -----------------------------------
+ * Snapshot building functions dealing with xlog records
+ * -----------------------------------
+ */
+
+/*
+ * Process a running xacts record, and use it's information to first build a
+ * historic snapshot and later to release resources that aren't needed
+ * anymore.
+ */
+void
+SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running)
+{
+ ReorderBufferTXN *txn;
+
+ /*
+ * If we're not consistent yet, inspect the record to see whether it
+ * allows to get closer to being consistent. If we are consistent, dump
+ * our snapshot so others or we, after a restart, can use it.
+ */
+ if (builder->state < SNAPBUILD_CONSISTENT)
+ {
+ /* returns false if there's no point in performing cleanup just yet */
+ if (!SnapBuildFindSnapshot(builder, lsn, running))
+ return;
+ }
+ else
+ SnapBuildSerialize(builder, lsn);
+
+ /*
+ * Update range of interesting xids base don the running xacts
+ * information. We don't increase ->xmax using it, because once we are in
+ * a consistent state we can do that ourselves and much more efficiently
+ * so, because we only need to do it for catalog transactions since we
+ * only ever look at those.
+ *
+ * NB: Because of that xmax can be lower than xmin, because we only
+ * increase xmax when a catalog modifying transaction commits. While odd
+ * looking, its correct and actually more efficient this way since we hit
+ * fast paths in tqual.c.
+ */
+ builder->xmin = running->oldestRunningXid;
+
+ /* Remove transactions we don't need to keep track off anymore */
+ SnapBuildPurgeCommittedTxn(builder);
+
+ elog(DEBUG3, "xmin: %u, xmax: %u, oldestrunning: %u",
+ builder->xmin, builder->xmax,
+ running->oldestRunningXid);
+
+ /*
+ * Inrease shared memory limits, so vacuum can work on tuples we prevented
+ * from being pruned till now.
+ */
+ LogicalIncreaseXminForSlot(lsn, running->oldestRunningXid);
+
+ /*
+ * Also tell the slot where we can restart decoding from. We don't want to
+ * do that after every commit because changing that implies an fsync of
+ * the logical slot's state file, so we only do it every time we see a
+ * running xacts record.
+ *
+ * Do so by looking for the oldest in progress transaction (determined by
+ * the first LSN of any of its relevant records). Every transaction
+ * remembers the last location we stored the snapshot to disk before its
+ * beginning. That point is where we can restart from.
+ */
+
+ /*
+ * Can't know about a serialized snapshot's location if we're not
+ * consistent.
+ */
+ if (builder->state < SNAPBUILD_CONSISTENT)
+ return;
+
+ txn = ReorderBufferGetOldestTXN(builder->reorder);
+
+ /*
+ * oldest ongoing txn might have started when we didn't yet serialize
+ * anything because we hadn't reached a consistent state yet.
+ */
+ if (txn != NULL && txn->restart_decoding_lsn != InvalidXLogRecPtr)
+ LogicalIncreaseRestartDecodingForSlot(lsn, txn->restart_decoding_lsn);
+ /*
+ * No in-progress transaction, can reuse the last serialized snapshot if
+ * we have one.
+ */
+ else if (txn == NULL &&
+ builder->reorder->current_restart_decoding_lsn != InvalidXLogRecPtr &&
+ builder->last_serialized_snapshot != InvalidXLogRecPtr)
+ LogicalIncreaseRestartDecodingForSlot(lsn,
+ builder->last_serialized_snapshot);
+}
+
+
+/*
+ * Build the start of a snapshot that's capable of decoding the catalog.
+ *
+ * Helper function for SnapBuildProcessRunningXacts() while we're not yet
+ * consistent.
+ *
+ * Returns true if there is a point in performing internal maintenance/cleanup
+ * using the xl_running_xacts record.
+ */
+static bool
+SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running)
+{
+ /* ---
+ * Build catalog decoding snapshot incrementally using information about
+ * the currently running transactions. There are several ways to do that:
+ *
+ * a) There were no running transactions when the xl_running_xacts record
+ * was inserted, jump to CONSISTENT immediately. We might find such a
+ * state we were waiting for b) and c).
+ *
+ * b) Wait for all toplevel transactions that were running to end. We
+ * simply track the number of in-progress toplevel transactions and
+ * lower it whenever one commits or aborts. When that number
+ * (builder->running.xcnt) reaches zero, we can go from FULL_SNAPSHOT
+ * to CONSISTENT.
+ * NB: We need to search running.xip when seeing a transaction's end to
+ * make sure it's a toplevel transaction and it's been one of the
+ * intially running ones.
+ * Interestingly, in contrast to HS, this allows us not to care about
+ * subtransactions - and by extension suboverflowed xl_running_xacts -
+ * at all.
+ *
+ * c) This (in a previous run) or another decoding slot serialized a
+ * snapshot to disk that we can use.
+ * ---
+ */
+
+ /*
+ * xl_running_xact record is older than what we can use, we might not have
+ * all necessary catalog rows anymore.
+ */
+ if (TransactionIdIsNormal(builder->initial_xmin_horizon) &&
+ NormalTransactionIdPrecedes(running->oldestRunningXid,
+ builder->initial_xmin_horizon))
+ {
+ ereport(DEBUG1,
+ (errmsg("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low",
+ (uint32) (lsn >> 32), (uint32) lsn),
+ errdetail("initial xmin horizon of %u vs the snapshot's %u",
+ builder->initial_xmin_horizon, running->oldestRunningXid)));
+ return true;
+ }
+
+ /*
+ * a) No transaction were running, we can jump to consistent.
+ *
+ * NB: We might have already started to incrementally assemble a snapshot,
+ * so we need to be careful to deal with that.
+ */
+ if (running->xcnt == 0)
+ {
+ if (builder->transactions_after == InvalidXLogRecPtr ||
+ builder->transactions_after < lsn)
+ builder->transactions_after = lsn;
+
+ builder->xmin = running->oldestRunningXid;
+ builder->xmax = running->latestCompletedXid;
+ TransactionIdAdvance(builder->xmax);
+
+ Assert(TransactionIdIsNormal(builder->xmin));
+ Assert(TransactionIdIsNormal(builder->xmax));
+
+ /* no transactions running now */
+ builder->running.xcnt = 0;
+ builder->running.xmin = InvalidTransactionId;
+ builder->running.xmax = InvalidTransactionId;
+
+ builder->state = SNAPBUILD_CONSISTENT;
+
+ ereport(LOG,
+ (errmsg("logical decoding found consistent point at %X/%X",
+ (uint32)(lsn >> 32), (uint32)lsn),
+ errdetail("running xacts with xcnt == 0")));
+
+ return false;
+ }
+ /* c) valid on disk state */
+ else if (SnapBuildRestore(builder, lsn))
+ {
+ /* there won't be any state to cleanup */
+ return false;
+ }
+ /*
+ * b) first encounter of a useable xl_running_xacts record. If we had
+ * found one earlier we would either track running transactions
+ * (i.e. builder->running.xcnt != 0) or be consistent (this function
+ * wouldn't get called).
+ */
+ else if (!builder->running.xcnt)
+ {
+ int off;
+
+ /*
+ * We only care about toplevel xids as those are the ones we
+ * definitely see in the wal stream. As snapbuild.c tracks committed
+ * instead of running transactions we don't need to know anything
+ * about uncommitted subtransactions.
+ */
+ builder->xmin = running->oldestRunningXid;
+ builder->xmax = running->latestCompletedXid;
+ TransactionIdAdvance(builder->xmax);
+
+ /* so we can safely use the faster comparisons */
+ Assert(TransactionIdIsNormal(builder->xmin));
+ Assert(TransactionIdIsNormal(builder->xmax));
+
+ builder->running.xcnt = running->xcnt;
+ builder->running.xcnt_space = running->xcnt;
+ builder->running.xip =
+ MemoryContextAlloc(builder->context,
+ builder->running.xcnt * sizeof(TransactionId));
+ memcpy(builder->running.xip, running->xids,
+ builder->running.xcnt * sizeof(TransactionId));
+
+ /* sort so we can do a binary search */
+ qsort(builder->running.xip, builder->running.xcnt,
+ sizeof(TransactionId), xidComparator);
+
+ builder->running.xmin = builder->running.xip[0];
+ builder->running.xmax = builder->running.xip[running->xcnt - 1];
+
+ /* makes comparisons cheaper later */
+ TransactionIdRetreat(builder->running.xmin);
+ TransactionIdAdvance(builder->running.xmax);
+
+ builder->state = SNAPBUILD_FULL_SNAPSHOT;
+
+ ereport(LOG,
+ (errmsg("logical decoding found initial starting point at %X/%X",
+ (uint32)(lsn >> 32), (uint32)lsn),
+ errdetail("%u xacts need to finish", (uint32) builder->running.xcnt)));
+
+ /*
+ * Iterate through all xids, wait for them to finish.
+ *
+ * This isn't required for the correctness of decoding, but to allow
+ * isolationtester to notice that we're currently waiting for
+ * something.
+ */
+ for(off = 0; off < builder->running.xcnt; off++)
+ {
+ TransactionId xid = builder->running.xip[off];
+
+ /*
+ * Upper layers should prevent that we ever need to wait on
+ * ourselves. Check anyway, since failing to do so would either
+ * result in an endless wait or an Assert() failure.
+ */
+ if (TransactionIdIsCurrentTransactionId(xid))
+ elog(ERROR, "waiting for ourselves");
+
+ XactLockTableWait(xid);
+ }
+
+ /* nothing could have built up so far, so don't perform cleanup */
+ return false;
+ }
+
+ /*
+ * We already started to track running xacts and need to wait for all
+ * in-progress ones to finish. We fall through to the normal processing of
+ * records so incremental cleanup can be performed.
+ */
+ return true;
+}
+
+
+/* -----------------------------------
+ * Snapshot serialization support
+ * -----------------------------------
+ */
+
+/*
+ * We store current state of struct SnapBuild on disk in the following manner:
+ *
+ * struct SnapBuildOnDisk;
+ * TransactionId * running.xcnt_space;
+ * TransactionId * committed.xcnt; (*not xcnt_space*)
+ *
+ */
+typedef struct SnapBuildOnDisk
+{
+ /* first part of this struct needs to be version independent */
+
+ /* data not covered by checksum */
+ uint32 magic;
+ pg_crc32 checksum;
+
+ /* data covered by checksum */
+
+ /* version, in case we want to support pg_upgrade */
+ uint32 version;
+ /* how large is the on disk data, excluding the constant sized part */
+ uint32 length;
+
+ /* version dependent part */
+ SnapBuild builder;
+
+ /* variable amount of TransactionIds follows */
+} SnapBuildOnDisk;
+
+#define SnapBuildOnDiskConstantSize \
+ offsetof(SnapBuildOnDisk, builder)
+#define SnapBuildOnDiskNotChecksummedSize \
+ offsetof(SnapBuildOnDisk, version)
+
+#define SNAPBUILD_MAGIC 0x51A1E001
+#define SNAPBUILD_VERSION 1
+
+/*
+ * Store/Load a snapshot from disk, depending on the snapshot builder's state.
+ *
+ * Supposed to be used by external (i.e. not snapbuild.c) code that just reada
+ * record that's a potential location for a serialized snapshot.
+ */
+void
+SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn)
+{
+ if (builder->state < SNAPBUILD_CONSISTENT)
+ SnapBuildRestore(builder, lsn);
+ else
+ SnapBuildSerialize(builder, lsn);
+}
+
+/*
+ * Serialize the snapshot 'builder' at the location 'lsn' if it hasn't already
+ * been done by another decoding process.
+ */
+static void
+SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
+{
+ Size needed_length;
+ SnapBuildOnDisk *ondisk;
+ char *ondisk_c;
+ int fd;
+ char tmppath[MAXPGPATH];
+ char path[MAXPGPATH];
+ int ret;
+ struct stat stat_buf;
+ uint32 sz;
+
+ Assert(lsn != InvalidXLogRecPtr);
+ Assert(builder->last_serialized_snapshot == InvalidXLogRecPtr ||
+ builder->last_serialized_snapshot <= lsn);
+
+ /*
+ * no point in serializing if we cannot continue to work immediately after
+ * restoring the snapshot
+ */
+ if (builder->state < SNAPBUILD_CONSISTENT)
+ return;
+
+ /*
+ * We identify snapshots by the LSN they are valid for. We don't need to
+ * include timelines in the name as each LSN maps to exactly one timeline
+ * unless the user used pg_resetxlog or similar. If a user did so, there's
+ * no hope continuing to decode anyway.
+ */
+ sprintf(path, "pg_llog/snapshots/%X-%X.snap",
+ (uint32) (lsn >> 32), (uint32) lsn);
+
+ /*
+ * first check whether some other backend already has written the snapshot
+ * for this LSN. It's perfectly fine if there's none, so we accept ENOENT
+ * as a valid state. Everything else is an unexpected error.
+ */
+ ret = stat(path, &stat_buf);
+
+ if (ret != 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errmsg("could not stat file \"%s\": %m", path)));
+
+ else if (ret == 0)
+ {
+ /*
+ * somebody else has already serialized to this point, don't overwrite
+ * but remember location, so we don't need to read old data again.
+ *
+ * To be sure it has been synced to disk after the rename() from the
+ * tempfile filename to the real filename, we just repeat the
+ * fsync. That ought to be cheap because in most scenarios it should
+ * already be safely on disk.
+ */
+ fsync_fname(path, false);
+ fsync_fname("pg_llog/snapshots", true);
+
+ builder->last_serialized_snapshot = lsn;
+ goto out;
+ }
+
+ /*
+ * there is an obvious race condition here between the time we stat(2) the
+ * file and us writing the file. But we rename the file into place
+ * atomically and all files created need to contain the same data anyway,
+ * so this is perfectly fine, although a bit of a resource waste. Locking
+ * seems like pointless complication.
+ */
+ elog(DEBUG1, "serializing snapshot to %s", path);
+
+ /* to make sure only we will write to this tempfile, include pid */
+ sprintf(tmppath, "pg_llog/snapshots/%X-%X.snap.%u.tmp",
+ (uint32) (lsn >> 32), (uint32) lsn, MyProcPid);
+
+ /*
+ * Unlink temporary file if it already exists, needs to have been before a
+ * crash/error since we won't enter this function twice from within a
+ * single decoding slot/backend and the temporary file contains the pid of
+ * the current process.
+ */
+ if (unlink(tmppath) != 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not unlink file \"%s\": %m", path)));
+
+ needed_length = sizeof(SnapBuildOnDisk) +
+ sizeof(TransactionId) * builder->running.xcnt_space +
+ sizeof(TransactionId) * builder->committed.xcnt;
+
+ ondisk_c = MemoryContextAllocZero(builder->context, needed_length);
+ ondisk = (SnapBuildOnDisk *) ondisk_c;
+ ondisk->magic = SNAPBUILD_MAGIC;
+ ondisk->version = SNAPBUILD_VERSION;
+ ondisk->length = needed_length;
+ INIT_CRC32(ondisk->checksum);
+ COMP_CRC32(ondisk->checksum,
+ ((char *) ondisk) + SnapBuildOnDiskNotChecksummedSize,
+ SnapBuildOnDiskConstantSize - SnapBuildOnDiskNotChecksummedSize);
+ ondisk_c += sizeof(SnapBuildOnDisk);
+
+ memcpy(&ondisk->builder, builder, sizeof(SnapBuild));
+ /* NULL-ify memory-only data */
+ ondisk->builder.context = NULL;
+ ondisk->builder.snapshot = NULL;
+ ondisk->builder.reorder = NULL;
+ ondisk->builder.running.xip = NULL;
+ ondisk->builder.committed.xip = NULL;
+
+ COMP_CRC32(ondisk->checksum,
+ &ondisk->builder,
+ sizeof(SnapBuild));
+
+ /* copy running xacts */
+ sz = sizeof(TransactionId) * builder->running.xcnt_space;
+ memcpy(ondisk_c, builder->running.xip, sz);
+ COMP_CRC32(ondisk->checksum, ondisk_c, sz);
+ ondisk_c += sz;
+
+ /* copy committed xacts */
+ sz = sizeof(TransactionId) * builder->committed.xcnt;
+ memcpy(ondisk_c, builder->committed.xip, sz);
+ COMP_CRC32(ondisk->checksum, ondisk_c, sz);
+ ondisk_c += sz;
+
+ /* we have valid data now, open tempfile and write it there */
+ fd = OpenTransientFile(tmppath,
+ O_CREAT | O_EXCL | O_WRONLY | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ ereport(ERROR,
+ (errmsg("could not open file \"%s\": %m", path)));
+
+ if ((write(fd, ondisk, needed_length)) != needed_length)
+ {
+ CloseTransientFile(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m", tmppath)));
+ }
+
+ /*
+ * fsync the file before renaming so that even if we crash after this we
+ * have either a fully valid file or nothing.
+ *
+ * TODO: Do the fsync() via checkpoints/restartpoints, doing it here has
+ * some noticeable overhead since it's performed synchronously during
+ * decoding?
+ */
+ if (pg_fsync(fd) != 0)
+ {
+ CloseTransientFile(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fsync file \"%s\": %m", tmppath)));
+ }
+ CloseTransientFile(fd);
+
+ fsync_fname("pg_llog/snapshots", true);
+
+ /*
+ * We may overwrite the work from some other backend, but that's ok, our
+ * snapshot is valid as well, we'll just have done some superflous work.
+ */
+ if (rename(tmppath, path) != 0)
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not rename file \"%s\" to \"%s\": %m",
+ tmppath, path)));
+ }
+
+ /* make sure we persist */
+ fsync_fname(path, false);
+ fsync_fname("pg_llog/snapshots", true);
+
+ /*
+ * Now there's no way we can loose the dumped state anymore, remember
+ * this as a serialization point.
+ */
+ builder->last_serialized_snapshot = lsn;
+
+out:
+ ReorderBufferSetRestartPoint(builder->reorder,
+ builder->last_serialized_snapshot);
+}
+
+/*
+ * Restore a snapshot into 'builder' if previously one has been stored at the
+ * location indicated by 'lsn'. Returns true if successful, false otherwise.
+ */
+static bool
+SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
+{
+ SnapBuildOnDisk ondisk;
+ int fd;
+ char path[MAXPGPATH];
+ Size sz;
+ int readBytes;
+ pg_crc32 checksum;
+
+ /* no point in loading a snapshot if we're already there */
+ if (builder->state == SNAPBUILD_CONSISTENT)
+ return false;
+
+ sprintf(path, "pg_llog/snapshots/%X-%X.snap",
+ (uint32) (lsn >> 32), (uint32) lsn);
+
+ fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
+
+ if (fd < 0 && errno == ENOENT)
+ return false;
+ else if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", path)));
+
+ /* ----
+ * Make sure the snapshot had been stored safely to disk, that's normally
+ * cheap.
+ * Note that we do not need PANIC here, nobody will be able to use the
+ * slot without fsyncing, and saving it won't suceed without an fsync()
+ * either...
+ * ----
+ */
+ fsync_fname(path, false);
+ fsync_fname("pg_llog/snapshots", true);
+
+
+ /* read statically sized portion of snapshot */
+ readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
+ if (readBytes != SnapBuildOnDiskConstantSize)
+ {
+ CloseTransientFile(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\", read %d of %d: %m",
+ path, readBytes, (int) SnapBuildOnDiskConstantSize)));
+ }
+
+ if (ondisk.magic != SNAPBUILD_MAGIC)
+ ereport(ERROR,
+ (errmsg("snapbuild state file \"%s\" has wrong magic %u instead of %u",
+ path, ondisk.magic, SNAPBUILD_MAGIC)));
+
+ if (ondisk.version != SNAPBUILD_VERSION)
+ ereport(ERROR,
+ (errmsg("snapbuild state file \"%s\" has unsupported version %u instead of %u",
+ path, ondisk.version, SNAPBUILD_VERSION)));
+
+ INIT_CRC32(checksum);
+ COMP_CRC32(checksum,
+ ((char *) &ondisk) + SnapBuildOnDiskNotChecksummedSize,
+ SnapBuildOnDiskConstantSize - SnapBuildOnDiskNotChecksummedSize);
+
+ /* read SnapBuild */
+ readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
+ if (readBytes != sizeof(SnapBuild))
+ {
+ CloseTransientFile(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\", read %d of %d: %m",
+ path, readBytes, (int) sizeof(SnapBuild))));
+ }
+ COMP_CRC32(checksum, &ondisk.builder, sizeof(SnapBuild));
+
+ /* restore running xacts information */
+ sz = sizeof(TransactionId) * ondisk.builder.running.xcnt_space;
+ ondisk.builder.running.xip = MemoryContextAlloc(builder->context, sz);
+ readBytes = read(fd, ondisk.builder.running.xip, sz);
+ if (readBytes != sz)
+ {
+ CloseTransientFile(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\", read %d of %d: %m",
+ path, readBytes, (int) sz)));
+ }
+ COMP_CRC32(checksum, ondisk.builder.running.xip, sz);
+
+ /* restore committed xacts information */
+ sz = sizeof(TransactionId) * ondisk.builder.committed.xcnt;
+ ondisk.builder.committed.xip = MemoryContextAlloc(builder->context, sz);
+ readBytes = read(fd, ondisk.builder.committed.xip, sz);
+ if (readBytes != sz)
+ {
+ CloseTransientFile(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\", read %d of %d: %m",
+ path, readBytes, (int) sz)));
+ }
+ COMP_CRC32(checksum, ondisk.builder.committed.xip, sz);
+
+ CloseTransientFile(fd);
+
+ /* verify checksum of what we've read */
+ if (!EQ_CRC32(checksum, ondisk.checksum))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("snapbuild state file %s: checksum mismatch, is %u, should be %u",
+ path, checksum, ondisk.checksum)));
+
+ /*
+ * ok, we now have a sensible snapshot here, figure out if it has more
+ * information than we have.
+ */
+
+ /*
+ * We are only interested in consistent snapshots for now, comparing
+ * whether one imcomplete snapshot is more "advanced" seems to be
+ * unnecessarily complex.
+ */
+ if (ondisk.builder.state < SNAPBUILD_CONSISTENT)
+ goto snapshot_not_interesting;
+
+ /*
+ * Don't use a snapshot that requires an xmin that we cannot guarantee to
+ * be available.
+ */
+ if (TransactionIdPrecedes(ondisk.builder.xmin, builder->initial_xmin_horizon))
+ goto snapshot_not_interesting;
+
+
+ /* ok, we think the snapshot is sensible, copy over everything important */
+ builder->xmin = ondisk.builder.xmin;
+ builder->xmax = ondisk.builder.xmax;
+ builder->state = ondisk.builder.state;
+
+ builder->committed.xcnt = ondisk.builder.committed.xcnt;
+ /* We only allocated/stored xcnt, not xcnt_space xids ! */
+ /* don't overwrite preallocated xip, if we don't have anything here */
+ if (builder->committed.xcnt > 0)
+ {
+ pfree(builder->committed.xip);
+ builder->committed.xcnt_space = ondisk.builder.committed.xcnt;
+ builder->committed.xip = ondisk.builder.committed.xip;
+ }
+ ondisk.builder.committed.xip = NULL;
+
+ builder->running.xcnt = ondisk.builder.committed.xcnt;
+ if (builder->running.xip)
+ pfree(builder->running.xip);
+ builder->running.xcnt_space = ondisk.builder.committed.xcnt_space;
+ builder->running.xip = ondisk.builder.running.xip;
+
+ /* our snapshot is not interesting anymore, build a new one */
+ if (builder->snapshot != NULL)
+ {
+ SnapBuildSnapDecRefcount(builder->snapshot);
+ }
+ builder->snapshot = SnapBuildBuildSnapshot(builder, InvalidTransactionId);
+ SnapBuildSnapIncRefcount(builder->snapshot);
+
+ ReorderBufferSetRestartPoint(builder->reorder, lsn);
+
+ Assert(builder->state == SNAPBUILD_CONSISTENT);
+
+ ereport(LOG,
+ (errmsg("logical decoding found consistent point at %X/%X",
+ (uint32)(lsn >> 32), (uint32)lsn),
+ errdetail("found initial snapshot in snapbuild file")));
+ return true;
+
+snapshot_not_interesting:
+ if (ondisk.builder.running.xip != NULL)
+ pfree(ondisk.builder.running.xip);
+ if (ondisk.builder.committed.xip != NULL)
+ pfree(ondisk.builder.committed.xip);
+ return false;
+}
+
+/*
+ * Remove all serialized snapshots that are not required anymore because no
+ * slot can need them. This doesn't actually have to run during a checkpoint,
+ * but it's a convenient point to schedule this.
+ *
+ * NB: We run this during checkpoints even if logical decoding is disabled so
+ * we cleanup old slots at some point after it got disabled.
+ */
+void
+CheckPointSnapBuild(void)
+{
+ XLogRecPtr cutoff;
+ XLogRecPtr redo;
+ DIR *snap_dir;
+ struct dirent *snap_de;
+ char path[MAXPGPATH];
+
+ /*
+ * We start of with a minimum of the last redo pointer. No new replication
+ * slot will start before that, so that's a safe upper bound for removal.
+ */
+ redo = GetRedoRecPtr();
+
+ /* now check for the restart ptrs from existing slots */
+ cutoff = ReplicationSlotsComputeLogicalRestartLSN();
+
+ /* don't start earlier than the restart lsn */
+ if (redo < cutoff)
+ cutoff = redo;
+
+ snap_dir = AllocateDir("pg_llog/snapshots");
+ while ((snap_de = ReadDir(snap_dir, "pg_llog/snapshots")) != NULL)
+ {
+ uint32 hi;
+ uint32 lo;
+ XLogRecPtr lsn;
+ struct stat statbuf;
+
+ if (strcmp(snap_de->d_name, ".") == 0 ||
+ strcmp(snap_de->d_name, "..") == 0)
+ continue;
+
+ snprintf(path, MAXPGPATH, "pg_llog/snapshots/%s", snap_de->d_name);
+
+ if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
+ {
+ elog(DEBUG1, "only regular files expected: %s", path);
+ continue;
+ }
+
+ /*
+ * temporary filenames from SnapBuildSerialize() include the LSN and
+ * everything but are postfixed by .$pid.tmp. We can just remove them
+ * the same as other files because there can be none that are currently
+ * being written that are older than cutoff.
+ *
+ * We just log a message if a file doesn't fit the pattern, it's
+ * probably some editors lock/state file or similar...
+ */
+ if (sscanf(snap_de->d_name, "%X-%X.snap", &hi, &lo) != 2)
+ {
+ ereport(LOG,
+ (errmsg("could not parse filename \"%s\"", path)));
+ continue;
+ }
+
+ lsn = ((uint64) hi) << 32 | lo;
+
+ /* check whether we still need it */
+ if (lsn < cutoff || cutoff == InvalidXLogRecPtr)
+ {
+ elog(DEBUG1, "removing snapbuild snapshot %s", path);
+
+ /*
+ * It's not particularly harmful, though strange, if we can't
+ * remove the file here. Don't prevent the checkpoint from
+ * completing, that'd be cure worse than the disease.
+ */
+ if (unlink(path) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not unlink file \"%s\": %m",
+ path)));
+ continue;
+ }
+ }
+ }
+ FreeDir(snap_dir);
+}
#include "miscadmin.h"
#include "replication/slot.h"
#include "storage/fd.h"
+#include "storage/proc.h"
#include "storage/procarray.h"
/*
/* GUCs */
int max_replication_slots = 0; /* the maximum number of replication slots */
+static void ReplicationSlotDropAcquired(void);
+
/* internal persistency functions */
static void RestoreSlotFromDisk(const char *name);
static void CreateSlotOnDisk(ReplicationSlot *slot);
* Create a new replication slot and mark it as used by this backend.
*
* name: Name of the slot
- * db_specific: changeset extraction is db specific, if the slot is going to
+ * db_specific: logical decoding is db specific; if the slot is going to
* be used for that pass true, otherwise false.
*/
void
-ReplicationSlotCreate(const char *name, bool db_specific)
+ReplicationSlotCreate(const char *name, bool db_specific,
+ ReplicationSlotPersistency persistency)
{
ReplicationSlot *slot = NULL;
int i;
*/
Assert(!slot->in_use);
Assert(!slot->active);
+ slot->data.persistency = persistency;
slot->data.xmin = InvalidTransactionId;
slot->effective_xmin = InvalidTransactionId;
strncpy(NameStr(slot->data.name), name, NAMEDATALEN);
Assert(slot != NULL && slot->active);
- /* Mark slot inactive. We're not freeing it, just disconnecting. */
+ if (slot->data.persistency == RS_EPHEMERAL)
+ {
+ /*
+ * Delete the slot. There is no !PANIC case where this is allowed to
+ * fail, all that may happen is an incomplete cleanup of the on-disk
+ * data.
+ */
+ ReplicationSlotDropAcquired();
+ }
+ else
{
+ /* Mark slot inactive. We're not freeing it, just disconnecting. */
volatile ReplicationSlot *vslot = slot;
SpinLockAcquire(&slot->mutex);
vslot->active = false;
SpinLockRelease(&slot->mutex);
- MyReplicationSlot = NULL;
}
+
+ MyReplicationSlot = NULL;
+
+ /* might not have been set when we've been a plain slot */
+ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ MyPgXact->vacuumFlags &= ~PROC_IN_LOGICAL_DECODING;
+ LWLockRelease(ProcArrayLock);
}
/*
void
ReplicationSlotDrop(const char *name)
{
- ReplicationSlot *slot = NULL;
- int i;
- bool active;
+ Assert(MyReplicationSlot == NULL);
+
+ ReplicationSlotAcquire(name);
+
+ ReplicationSlotDropAcquired();
+}
+
+/*
+ * Permanently drop the currently acquired replication slot which will be
+ * released by the point this function returns.
+ */
+static void
+ReplicationSlotDropAcquired(void)
+{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
+ ReplicationSlot *slot = MyReplicationSlot;
- ReplicationSlotValidateName(name, ERROR);
+ Assert(MyReplicationSlot != NULL);
+
+ /* slot isn't acquired anymore */
+ MyReplicationSlot = NULL;
/*
- * If some other backend ran this code currently with us, we might both
- * try to free the same slot at the same time. Or we might try to delete
- * a slot with a certain name while someone else was trying to create a
- * slot with the same name.
+ * If some other backend ran this code concurrently with us, we might try
+ * to delete a slot with a certain name while someone else was trying to
+ * create a slot with the same name.
*/
LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
- /* Search for the named slot and mark it active if we find it. */
- LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
- for (i = 0; i < max_replication_slots; i++)
- {
- ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
-
- if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
- {
- volatile ReplicationSlot *vslot = s;
-
- SpinLockAcquire(&s->mutex);
- active = vslot->active;
- vslot->active = true;
- SpinLockRelease(&s->mutex);
- slot = s;
- break;
- }
- }
- LWLockRelease(ReplicationSlotControlLock);
-
- /* If we did not find the slot or it was already active, error out. */
- if (slot == NULL)
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_OBJECT),
- errmsg("replication slot \"%s\" does not exist", name)));
- if (active)
- ereport(ERROR,
- (errcode(ERRCODE_OBJECT_IN_USE),
- errmsg("replication slot \"%s\" is already active", name)));
-
/* Generate pathnames. */
sprintf(path, "pg_replslot/%s", NameStr(slot->data.name));
sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));
/*
* Rename the slot directory on disk, so that we'll no longer recognize
* this as a valid slot. Note that if this fails, we've got to mark the
- * slot inactive again before bailing out.
+ * slot inactive before bailing out. If we're dropping a ephemeral slot,
+ * we better never fail hard as the caller won't expect the slot to
+ * survive and this might get called during error handling.
*/
- if (rename(path, tmppath) != 0)
+ if (rename(path, tmppath) == 0)
+ {
+ /*
+ * We need to fsync() the directory we just renamed and its parent to
+ * make sure that our changes are on disk in a crash-safe fashion. If
+ * fsync() fails, we can't be sure whether the changes are on disk or
+ * not. For now, we handle that by panicking;
+ * StartupReplicationSlots() will try to straighten it out after
+ * restart.
+ */
+ START_CRIT_SECTION();
+ fsync_fname(tmppath, true);
+ fsync_fname("pg_replslot", true);
+ END_CRIT_SECTION();
+ }
+ else
{
volatile ReplicationSlot *vslot = slot;
+ bool fail_softly = slot->data.persistency == RS_EPHEMERAL;
SpinLockAcquire(&slot->mutex);
vslot->active = false;
SpinLockRelease(&slot->mutex);
- ereport(ERROR,
+ ereport(fail_softly ? WARNING : ERROR,
(errcode_for_file_access(),
errmsg("could not rename \"%s\" to \"%s\": %m",
path, tmppath)));
}
- /*
- * We need to fsync() the directory we just renamed and its parent to make
- * sure that our changes are on disk in a crash-safe fashion. If fsync()
- * fails, we can't be sure whether the changes are on disk or not. For
- * now, we handle that by panicking; StartupReplicationSlots() will
- * try to straighten it out after restart.
- */
- START_CRIT_SECTION();
- fsync_fname(tmppath, true);
- fsync_fname("pg_replslot", true);
- END_CRIT_SECTION();
-
/*
* The slot is definitely gone. Lock out concurrent scans of the array
* long enough to kill it. It's OK to clear the active flag here without
* Slot is dead and doesn't prevent resource removal anymore, recompute
* limits.
*/
- ReplicationSlotsComputeRequiredXmin();
+ ReplicationSlotsComputeRequiredXmin(false);
ReplicationSlotsComputeRequiredLSN();
/*
}
}
+/*
+ * Convert a slot that's marked as RS_DROP_ON_ERROR to a RS_PERSISTENT slot,
+ * guaranteeing it will be there after a eventual crash.
+ */
+void
+ReplicationSlotPersist(void)
+{
+ ReplicationSlot *slot = MyReplicationSlot;
+
+ Assert(slot != NULL);
+ Assert(slot->data.persistency != RS_PERSISTENT);
+
+ {
+ volatile ReplicationSlot *vslot = slot;
+
+ SpinLockAcquire(&slot->mutex);
+ vslot->data.persistency = RS_PERSISTENT;
+ SpinLockRelease(&slot->mutex);
+ }
+
+ ReplicationSlotMarkDirty();
+ ReplicationSlotSave();
+}
+
/*
* Compute the oldest xmin across all slots and store it in the ProcArray.
*/
void
-ReplicationSlotsComputeRequiredXmin(void)
+ReplicationSlotsComputeRequiredXmin(bool already_locked)
{
int i;
TransactionId agg_xmin = InvalidTransactionId;
+ TransactionId agg_catalog_xmin = InvalidTransactionId;
Assert(ReplicationSlotCtl != NULL);
- LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
+ if (!already_locked)
+ LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
+
for (i = 0; i < max_replication_slots; i++)
{
ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
TransactionId effective_xmin;
+ TransactionId effective_catalog_xmin;
if (!s->in_use)
continue;
SpinLockAcquire(&s->mutex);
effective_xmin = vslot->effective_xmin;
+ effective_catalog_xmin = vslot->effective_catalog_xmin;
SpinLockRelease(&s->mutex);
}
(!TransactionIdIsValid(agg_xmin) ||
TransactionIdPrecedes(effective_xmin, agg_xmin)))
agg_xmin = effective_xmin;
+
+ /* check the catalog xmin */
+ if (TransactionIdIsValid(effective_catalog_xmin) &&
+ (!TransactionIdIsValid(agg_catalog_xmin) ||
+ TransactionIdPrecedes(effective_catalog_xmin, agg_catalog_xmin)))
+ agg_catalog_xmin = effective_catalog_xmin;
}
- LWLockRelease(ReplicationSlotControlLock);
- ProcArraySetReplicationSlotXmin(agg_xmin);
+ if (!already_locked)
+ LWLockRelease(ReplicationSlotControlLock);
+
+ ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked);
}
/*
XLogSetReplicationSlotMinimumLSN(min_required);
}
+/*
+ * Compute the oldest WAL LSN required by *logical* decoding slots..
+ *
+ * Returns InvalidXLogRecPtr if logical decoding is disabled or no logicals
+ * slots exist.
+ *
+ * NB: this returns a value >= ReplicationSlotsComputeRequiredLSN(), since it
+ * ignores physical replication slots.
+ *
+ * The results aren't required frequently, so we don't maintain a precomputed
+ * value like we do for ComputeRequiredLSN() and ComputeRequiredXmin().
+ */
+XLogRecPtr
+ReplicationSlotsComputeLogicalRestartLSN(void)
+{
+ XLogRecPtr result = InvalidXLogRecPtr;
+ int i;
+
+ if (max_replication_slots <= 0)
+ return InvalidXLogRecPtr;
+
+ LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
+
+ for (i = 0; i < max_replication_slots; i++)
+ {
+ volatile ReplicationSlot *s;
+ XLogRecPtr restart_lsn;
+
+ s = &ReplicationSlotCtl->replication_slots[i];
+
+ /* cannot change while ReplicationSlotCtlLock is held */
+ if (!s->in_use)
+ continue;
+
+ /* we're only interested in logical slots */
+ if (s->data.database == InvalidOid)
+ continue;
+
+ /* read once, it's ok if it increases while we're checking */
+ SpinLockAcquire(&s->mutex);
+ restart_lsn = s->data.restart_lsn;
+ SpinLockRelease(&s->mutex);
+
+ if (result == InvalidXLogRecPtr ||
+ restart_lsn < result)
+ result = restart_lsn;
+ }
+
+ LWLockRelease(ReplicationSlotControlLock);
+
+ return result;
+}
+
+/*
+ * ReplicationSlotsCountDBSlots -- count the number of slots that refer to the
+ * passed database oid.
+ *
+ * Returns true if there are any slots referencing the database. *nslots will
+ * be set to the absolute number of slots in the database, *nactive to ones
+ * currently active.
+ */
+bool
+ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
+{
+ int i;
+
+ *nslots = *nactive = 0;
+
+ if (max_replication_slots <= 0)
+ return false;
+
+ LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
+ for (i = 0; i < max_replication_slots; i++)
+ {
+ volatile ReplicationSlot *s;
+
+ s = &ReplicationSlotCtl->replication_slots[i];
+
+ /* cannot change while ReplicationSlotCtlLock is held */
+ if (!s->in_use)
+ continue;
+
+ /* not database specific, skip */
+ if (s->data.database == InvalidOid)
+
+ /* not our database, skip */
+ if (s->data.database != dboid)
+ continue;
+
+ /* count slots with spinlock held */
+ SpinLockAcquire(&s->mutex);
+ (*nslots)++;
+ if (s->active)
+ (*nactive)++;
+ SpinLockRelease(&s->mutex);
+ }
+ LWLockRelease(ReplicationSlotControlLock);
+
+ if (*nslots > 0)
+ return true;
+ return false;
+}
+
+
/*
* Check whether the server's configuration supports using replication
* slots.
return;
/* Now that we have recovered all the data, compute replication xmin */
- ReplicationSlotsComputeRequiredXmin();
+ ReplicationSlotsComputeRequiredXmin(false);
ReplicationSlotsComputeRequiredLSN();
}
memcpy(&slot->data, &cp.slotdata,
sizeof(ReplicationSlotPersistentData));
+ /* Don't restore the slot if it's not parked as persistent. */
+ if (slot->data.persistency != RS_PERSISTENT)
+ return;
+
/* initialize in memory state */
slot->effective_xmin = cp.slotdata.xmin;
+ slot->effective_catalog_xmin = cp.slotdata.catalog_xmin;
+
+ slot->candidate_catalog_xmin = InvalidTransactionId;
+ slot->candidate_xmin_lsn = InvalidXLogRecPtr;
+ slot->candidate_restart_lsn = InvalidXLogRecPtr;
+ slot->candidate_restart_valid = InvalidXLogRecPtr;
+
slot->in_use = true;
slot->active = false;
#include "funcapi.h"
#include "miscadmin.h"
+
#include "access/htup_details.h"
+#include "replication/slot.h"
+#include "replication/logical.h"
+#include "replication/logicalfuncs.h"
#include "utils/builtins.h"
#include "utils/pg_lsn.h"
-#include "replication/slot.h"
-
-Datum pg_create_physical_replication_slot(PG_FUNCTION_ARGS);
-Datum pg_drop_replication_slot(PG_FUNCTION_ARGS);
static void
check_permissions(void)
elog(ERROR, "return type must be a row type");
/* acquire replication slot, this will check for conflicting names*/
- ReplicationSlotCreate(NameStr(*name), false);
+ ReplicationSlotCreate(NameStr(*name), false, RS_PERSISTENT);
values[0] = NameGetDatum(&MyReplicationSlot->data.name);
PG_RETURN_DATUM(result);
}
+
+/*
+ * SQL function for creating a new logical replication slot.
+ */
+Datum
+pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
+{
+ Name name = PG_GETARG_NAME(0);
+ Name plugin = PG_GETARG_NAME(1);
+
+ LogicalDecodingContext *ctx = NULL;
+
+ TupleDesc tupdesc;
+ HeapTuple tuple;
+ Datum result;
+ Datum values[2];
+ bool nulls[2];
+
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ check_permissions();
+
+ CheckLogicalDecodingRequirements();
+
+ Assert(!MyReplicationSlot);
+
+ /*
+ * Acquire a logical decoding slot, this will check for conflicting
+ * names.
+ */
+ ReplicationSlotCreate(NameStr(*name), true, RS_EPHEMERAL);
+
+ /*
+ * Create logical decoding context, to build the initial snapshot.
+ */
+ ctx = CreateInitDecodingContext(
+ NameStr(*plugin), NIL,
+ logical_read_local_xlog_page, NULL, NULL);
+
+ /* build initial snapshot, might take a while */
+ DecodingContextFindStartpoint(ctx);
+
+ values[0] = CStringGetTextDatum(NameStr(MyReplicationSlot->data.name));
+ values[1] = LSNGetDatum(MyReplicationSlot->data.confirmed_flush);
+
+ /* don't need the decoding context anymore */
+ FreeDecodingContext(ctx);
+
+ memset(nulls, 0, sizeof(nulls));
+
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+ result = HeapTupleGetDatum(tuple);
+
+ /* ok, slot is now fully created, mark it as persistent */
+ ReplicationSlotPersist();
+ ReplicationSlotRelease();
+
+ PG_RETURN_DATUM(result);
+}
+
+
/*
* SQL function for dropping a replication slot.
*/
Datum
pg_get_replication_slots(PG_FUNCTION_ARGS)
{
-#define PG_STAT_GET_REPLICATION_SLOTS_COLS 6
+#define PG_GET_REPLICATION_SLOTS_COLS 8
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
TupleDesc tupdesc;
Tuplestorestate *tupstore;
for (slotno = 0; slotno < max_replication_slots; slotno++)
{
ReplicationSlot *slot = &ReplicationSlotCtl->replication_slots[slotno];
- Datum values[PG_STAT_GET_REPLICATION_SLOTS_COLS];
- bool nulls[PG_STAT_GET_REPLICATION_SLOTS_COLS];
+ Datum values[PG_GET_REPLICATION_SLOTS_COLS];
+ bool nulls[PG_GET_REPLICATION_SLOTS_COLS];
TransactionId xmin;
+ TransactionId catalog_xmin;
XLogRecPtr restart_lsn;
bool active;
Oid database;
NameData slot_name;
-
+ NameData plugin;
int i;
SpinLockAcquire(&slot->mutex);
else
{
xmin = slot->data.xmin;
+ catalog_xmin = slot->data.catalog_xmin;
database = slot->data.database;
restart_lsn = slot->data.restart_lsn;
namecpy(&slot_name, &slot->data.name);
+ namecpy(&plugin, &slot->data.plugin);
active = slot->active;
}
i = 0;
values[i++] = NameGetDatum(&slot_name);
+
+ if (database == InvalidOid)
+ nulls[i++] = true;
+ else
+ values[i++] = NameGetDatum(&plugin);
+
if (database == InvalidOid)
values[i++] = CStringGetTextDatum("physical");
else
values[i++] = CStringGetTextDatum("logical");
+
if (database == InvalidOid)
nulls[i++] = true;
else
values[i++] = database;
+
values[i++] = BoolGetDatum(active);
+
if (xmin != InvalidTransactionId)
values[i++] = TransactionIdGetDatum(xmin);
else
nulls[i++] = true;
+
+ if (catalog_xmin != InvalidTransactionId)
+ values[i++] = TransactionIdGetDatum(catalog_xmin);
+ else
+ nulls[i++] = true;
+
if (restart_lsn != InvalidTransactionId)
values[i++] = LSNGetDatum(restart_lsn);
else
* everything else has been checked.
*/
if (hot_standby_feedback)
- xmin = GetOldestXmin(true, false);
+ xmin = GetOldestXmin(NULL, false);
else
xmin = InvalidTransactionId;
#include "replication/basebackup.h"
#include "replication/slot.h"
#include "replication/syncrep.h"
+#include "replication/slot.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
if (MyReplicationSlot->data.database != InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- (errmsg("cannot use a replication slot created for changeset extraction for streaming replication"))));
+ (errmsg("cannot use a logical replication slot for physical replication"))));
}
/*
sendTimeLineIsHistoric = false;
sendTimeLine = ThisTimeLineID;
- ReplicationSlotCreate(cmd->slotname, cmd->kind == REPLICATION_KIND_LOGICAL);
+ ReplicationSlotCreate(cmd->slotname,
+ cmd->kind == REPLICATION_KIND_LOGICAL,
+ RS_PERSISTENT);
initStringInfo(&output_message);
if (cmd->kind == REPLICATION_KIND_PHYSICAL)
StartReplication(cmd);
else
- elog(ERROR, "cannot handle changeset extraction yet");
+ elog(ERROR, "cannot handle logical decoding yet");
break;
}
if (MyReplicationSlot && flushPtr != InvalidXLogRecPtr)
{
if (MyReplicationSlot->data.database != InvalidOid)
- elog(ERROR, "cannot handle changeset extraction yet");
+ elog(ERROR, "cannot handle logical decoding yet");
else
PhysicalConfirmReceivedLocation(flushPtr);
}
if (changed)
{
ReplicationSlotMarkDirty();
- ReplicationSlotsComputeRequiredXmin();
+ ReplicationSlotsComputeRequiredXmin(false);
}
}
#include "access/transam.h"
#include "access/xact.h"
#include "access/twophase.h"
+#include "catalog/catalog.h"
#include "miscadmin.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/spin.h"
#include "utils/builtins.h"
+#include "utils/rel.h"
#include "utils/snapmgr.h"
/* oldest xmin of any replication slot */
TransactionId replication_slot_xmin;
+ /* oldest catalog xmin of any replication slot */
+ TransactionId replication_slot_catalog_xmin;
/*
* We declare pgprocnos[] as 1 entry because C wants a fixed-size array,
* GetOldestXmin -- returns oldest transaction that was running
* when any current transaction was started.
*
- * If allDbs is TRUE then all backends are considered; if allDbs is FALSE
- * then only backends running in my own database are considered.
+ * If rel is NULL or a shared relation, all backends are considered, otherwise
+ * only backends running in this database are considered.
*
* If ignoreVacuum is TRUE then backends with the PROC_IN_VACUUM flag set are
* ignored.
*
- * This is used by VACUUM to decide which deleted tuples must be preserved
- * in a table. allDbs = TRUE is needed for shared relations, but allDbs =
- * FALSE is sufficient for non-shared relations, since only backends in my
- * own database could ever see the tuples in them. Also, we can ignore
- * concurrently running lazy VACUUMs because (a) they must be working on other
- * tables, and (b) they don't need to do snapshot-based lookups.
+ * This is used by VACUUM to decide which deleted tuples must be preserved in
+ * the passed in table. For shared relations backends in all databases must be
+ * considered, but for non-shared relations that's not required, since only
+ * backends in my own database could ever see the tuples in them. Also, we can
+ * ignore concurrently running lazy VACUUMs because (a) they must be working
+ * on other tables, and (b) they don't need to do snapshot-based lookups.
*
- * This is also used to determine where to truncate pg_subtrans. allDbs
- * must be TRUE for that case, and ignoreVacuum FALSE.
+ * This is also used to determine where to truncate pg_subtrans. For that
+ * backends in all databases have to be considered, so rel = NULL has to be
+ * passed in.
*
* Note: we include all currently running xids in the set of considered xids.
* This ensures that if a just-started xact has not yet set its snapshot,
* backwards on repeated calls. The calculated value is conservative, so that
* anything older is definitely not considered as running by anyone anymore,
* but the exact value calculated depends on a number of things. For example,
- * if allDbs is FALSE and there are no transactions running in the current
+ * if rel = NULL and there are no transactions running in the current
* database, GetOldestXmin() returns latestCompletedXid. If a transaction
* begins after that, its xmin will include in-progress transactions in other
* databases that started earlier, so another call will return a lower value.
* GetOldestXmin() move backwards, with no consequences for data integrity.
*/
TransactionId
-GetOldestXmin(bool allDbs, bool ignoreVacuum)
+GetOldestXmin(Relation rel, bool ignoreVacuum)
{
ProcArrayStruct *arrayP = procArray;
TransactionId result;
int index;
+ bool allDbs;
+
volatile TransactionId replication_slot_xmin = InvalidTransactionId;
+ volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
+
+ /*
+ * If we're not computing a relation specific limit, or if a shared
+ * relation has been passed in, backends in all databases have to be
+ * considered.
+ */
+ allDbs = rel == NULL || rel->rd_rel->relisshared;
/* Cannot look for individual databases during recovery */
Assert(allDbs || !RecoveryInProgress());
volatile PGPROC *proc = &allProcs[pgprocno];
volatile PGXACT *pgxact = &allPgXact[pgprocno];
+ /*
+ * Backend is doing logical decoding which manages xmin separately,
+ * check below.
+ */
+ if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING)
+ continue;
+
if (ignoreVacuum && (pgxact->vacuumFlags & PROC_IN_VACUUM))
continue;
/* fetch into volatile var while ProcArrayLock is held */
replication_slot_xmin = procArray->replication_slot_xmin;
+ replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
if (RecoveryInProgress())
{
NormalTransactionIdPrecedes(replication_slot_xmin, result))
result = replication_slot_xmin;
+ /*
+ * After locks have been released and defer_cleanup_age has been applied,
+ * check whether we need to back up further to make logical decoding
+ * possible. We need to do so if we're computing the global limit (rel =
+ * NULL) or if the passed relation is a catalog relation of some kind.
+ */
+ if ((rel == NULL ||
+ RelationIsAccessibleInLogicalDecoding(rel)) &&
+ TransactionIdIsValid(replication_slot_catalog_xmin) &&
+ NormalTransactionIdPrecedes(replication_slot_catalog_xmin, result))
+ result = replication_slot_catalog_xmin;
+
return result;
}
* RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
* running transactions, except those running LAZY VACUUM). This is
* the same computation done by GetOldestXmin(true, true).
+ * RecentGlobalDataXmin: the global xmin for non-catalog tables
+ * >= RecentGlobalXmin
*
* Note: this function should probably not be called with an argument that's
* not statically allocated (see xip allocation below).
int subcount = 0;
bool suboverflowed = false;
volatile TransactionId replication_slot_xmin = InvalidTransactionId;
+ volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
Assert(snapshot != NULL);
volatile PGXACT *pgxact = &allPgXact[pgprocno];
TransactionId xid;
+ /*
+ * Backend is doing logical decoding which manages xmin
+ * separately, check below.
+ */
+ if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING)
+ continue;
+
/* Ignore procs running LAZY VACUUM */
if (pgxact->vacuumFlags & PROC_IN_VACUUM)
continue;
/* fetch into volatile var while ProcArrayLock is held */
replication_slot_xmin = procArray->replication_slot_xmin;
+ replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
if (!TransactionIdIsValid(MyPgXact->xmin))
MyPgXact->xmin = TransactionXmin = xmin;
NormalTransactionIdPrecedes(replication_slot_xmin, RecentGlobalXmin))
RecentGlobalXmin = replication_slot_xmin;
+ /* Non-catalog tables can be vacuumed if older than this xid */
+ RecentGlobalDataXmin = RecentGlobalXmin;
+
+ /*
+ * Check whether there's a replication slot requiring an older catalog
+ * xmin.
+ */
+ if (TransactionIdIsNormal(replication_slot_catalog_xmin) &&
+ NormalTransactionIdPrecedes(replication_slot_catalog_xmin, RecentGlobalXmin))
+ RecentGlobalXmin = replication_slot_catalog_xmin;
+
RecentXmin = xmin;
snapshot->xmin = xmin;
* Similar to GetSnapshotData but returns more information. We include
* all PGXACTs with an assigned TransactionId, even VACUUM processes.
*
- * We acquire XidGenLock, but the caller is responsible for releasing it.
- * This ensures that no new XIDs enter the proc array until the caller has
- * WAL-logged this snapshot, and releases the lock.
+ * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
+ * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
+ * array until the caller has WAL-logged this snapshot, and releases the
+ * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
+ * lock is released.
*
* The returned data structure is statically allocated; caller should not
* modify it, and must not assume it is valid past the next call.
}
}
+ /*
+ * It's important *not* to include the limits set by slots here because
+ * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
+ * were to be included here the initial value could never increase because
+ * of a circular dependency where slots only increase their limits when
+ * running xacts increases oldestRunningXid and running xacts only
+ * increases if slots do.
+ */
+
CurrentRunningXacts->xcnt = count - subcount;
CurrentRunningXacts->subxcnt = subcount;
CurrentRunningXacts->subxid_overflow = suboverflowed;
CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
- /* We don't release XidGenLock here, the caller is responsible for that */
- LWLockRelease(ProcArrayLock);
-
Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
+ /* We don't release the locks here, the caller is responsible for that */
+
return CurrentRunningXacts;
}
return oldestRunningXid;
}
+/*
+ * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
+ *
+ * Returns the oldest xid that we can guarantee not to have been affected by
+ * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
+ * transaction aborted. Note that the value can (and most of the time will) be
+ * much more conservative than what really has been affected by vacuum, but we
+ * currently don't have better data available.
+ *
+ * This is useful to initalize the cutoff xid after which a new changeset
+ * extraction replication slot can start decoding changes.
+ *
+ * Must be called with ProcArrayLock held either shared or exclusively,
+ * although most callers will want to use exclusive mode since it is expected
+ * that the caller will immediately use the xid to peg the xmin horizon.
+ */
+TransactionId
+GetOldestSafeDecodingTransactionId(void)
+{
+ ProcArrayStruct *arrayP = procArray;
+ TransactionId oldestSafeXid;
+ int index;
+ bool recovery_in_progress = RecoveryInProgress();
+
+ Assert(LWLockHeldByMe(ProcArrayLock));
+
+ /*
+ * Acquire XidGenLock, so no transactions can acquire an xid while we're
+ * running. If no transaction with xid were running concurrently a new xid
+ * could influence the the RecentXmin et al.
+ *
+ * We initialize the computation to nextXid since that's guaranteed to be
+ * a safe, albeit pessimal, value.
+ */
+ LWLockAcquire(XidGenLock, LW_SHARED);
+ oldestSafeXid = ShmemVariableCache->nextXid;
+
+ /*
+ * If there's already a slot pegging the xmin horizon, we can start with
+ * that value, it's guaranteed to be safe since it's computed by this
+ * routine initally and has been enforced since.
+ */
+ if (TransactionIdIsValid(procArray->replication_slot_catalog_xmin) &&
+ TransactionIdPrecedes(procArray->replication_slot_catalog_xmin,
+ oldestSafeXid))
+ oldestSafeXid = procArray->replication_slot_catalog_xmin;
+
+ /*
+ * If we're not in recovery, we walk over the procarray and collect the
+ * lowest xid. Since we're called with ProcArrayLock held and have
+ * acquired XidGenLock, no entries can vanish concurrently, since
+ * PGXACT->xid is only set with XidGenLock held and only cleared with
+ * ProcArrayLock held.
+ *
+ * In recovery we can't lower the safe value besides what we've computed
+ * above, so we'll have to wait a bit longer there. We unfortunately can
+ * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
+ * machinery can miss values and return an older value than is safe.
+ */
+ if (!recovery_in_progress)
+ {
+ /*
+ * Spin over procArray collecting all min(PGXACT->xid)
+ */
+ for (index = 0; index < arrayP->numProcs; index++)
+ {
+ int pgprocno = arrayP->pgprocnos[index];
+ volatile PGXACT *pgxact = &allPgXact[pgprocno];
+ TransactionId xid;
+
+ /* Fetch xid just once - see GetNewTransactionId */
+ xid = pgxact->xid;
+
+ if (!TransactionIdIsNormal(xid))
+ continue;
+
+ if (TransactionIdPrecedes(xid, oldestSafeXid))
+ oldestSafeXid = xid;
+ }
+ }
+
+ LWLockRelease(XidGenLock);
+
+ return oldestSafeXid;
+}
+
/*
* GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
* delaying checkpoint because they have critical actions in progress.
* replicaton slots.
*/
void
-ProcArraySetReplicationSlotXmin(TransactionId xmin)
+ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin,
+ bool already_locked)
{
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
+
+ if (!already_locked)
+ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+
procArray->replication_slot_xmin = xmin;
+ procArray->replication_slot_catalog_xmin = catalog_xmin;
+
+ if (!already_locked)
+ LWLockRelease(ProcArrayLock);
+}
+
+/*
+ * ProcArrayGetReplicationSlotXmin
+ *
+ * Return the current slot xmin limits. That's useful to be able to remove
+ * data that's older than those limits.
+ */
+void
+ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
+ TransactionId *catalog_xmin)
+{
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+ if (xmin != NULL)
+ *xmin = procArray->replication_slot_xmin;
+
+ if (catalog_xmin != NULL)
+ *catalog_xmin = procArray->replication_slot_catalog_xmin;
+
LWLockRelease(ProcArrayLock);
}
/*
* Log details of the current snapshot to WAL. This allows the snapshot state
- * to be reconstructed on the standby.
+ * to be reconstructed on the standby and for logical decoding.
+ *
+ * This is used for Hot Standby as follows:
*
* We can move directly to STANDBY_SNAPSHOT_READY at startup if we
* start from a shutdown checkpoint because we know nothing was running
* Zero xids should no longer be possible, but we may be replaying WAL
* from a time when they were possible.
*
+ * For logical decoding only the running xacts information is needed;
+ * there's no need to look at the locking information, but it's logged anyway,
+ * as there's no independent knob to just enable logical decoding. For
+ * details of how this is used, check snapbuild.c's introductory comment.
+ *
+ *
* Returns the RecPtr of the last inserted record.
*/
XLogRecPtr
* record we write, because standby will open up when it sees this.
*/
running = GetRunningTransactionData();
+
+ /*
+ * GetRunningTransactionData() acquired ProcArrayLock, we must release
+ * it. For Hot Standby this can be done before inserting the WAL record
+ * because ProcArrayApplyRecoveryInfo() rechecks the commit status using
+ * the clog. For logical decoding, though, the lock can't be released
+ * early becuase the clog might be "in the future" from the POV of the
+ * historic snapshot. This would allow for situations where we're waiting
+ * for the end of a transaction listed in the xl_running_xacts record
+ * which, according to the WAL, have commit before the xl_running_xacts
+ * record. Fortunately this routine isn't executed frequently, and it's
+ * only a shared lock.
+ */
+ if (wal_level < WAL_LEVEL_LOGICAL)
+ LWLockRelease(ProcArrayLock);
+
recptr = LogCurrentRunningXacts(running);
+ /* Release lock if we kept it longer ... */
+ if (wal_level >= WAL_LEVEL_LOGICAL)
+ LWLockRelease(ProcArrayLock);
+
/* GetRunningTransactionData() acquired XidGenLock, we must release it */
LWLockRelease(XidGenLock);
/* Make sure we're out of the sync rep lists */
SyncRepCleanupAtProcExit();
- /* Make sure active replication slots are released */
- if (MyReplicationSlot != NULL)
- ReplicationSlotRelease();
-
#ifdef USE_ASSERT_CHECKING
if (assert_enabled)
{
*/
LWLockReleaseAll();
+ /* Make sure active replication slots are released */
+ if (MyReplicationSlot != NULL)
+ ReplicationSlotRelease();
+
/*
* Clear MyProc first; then disown the process latch. This is so that
* signal handlers won't try to clear the process latch after it's no
#include "pg_getopt.h"
#include "postmaster/autovacuum.h"
#include "postmaster/postmaster.h"
+#include "replication/slot.h"
#include "replication/walsender.h"
#include "rewrite/rewriteHandler.h"
#include "storage/bufmgr.h"
if (am_walsender)
WalSndErrorCleanup();
+ /*
+ * We can't release replication slots inside AbortTransaction() as we
+ * need to be able to start and abort transactions while having a slot
+ * acquired. But we never need to hold them across top level errors,
+ * so releasing here is fine. There's another cleanup in ProcKill()
+ * ensuring we'll correctly cleanup on FATAL errors as well.
+ */
+ if (MyReplicationSlot != NULL)
+ ReplicationSlotRelease();
+
/*
* Now return to normal top-level context and clear ErrorContext for
* next time.
* Only the local caches are flushed; this does not transmit the message
* to other backends.
*/
-static void
+void
LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
{
if (msg->id >= 0)
* since that tells us we've lost some shared-inval messages and hence
* don't know what needs to be invalidated.
*/
-static void
+void
InvalidateSystemCaches(void)
{
int i;
#include "utils/memutils.h"
#include "utils/relmapper.h"
#include "utils/resowner_private.h"
+#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
bool isshared, bool hasoids,
int natts, const FormData_pg_attribute *attrs);
-static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
+static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic);
static Relation AllocateRelationDesc(Form_pg_class relp);
static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
static void RelationBuildTupleDesc(Relation relation);
* and must eventually be freed with heap_freetuple.
*/
static HeapTuple
-ScanPgRelation(Oid targetRelId, bool indexOK)
+ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic)
{
HeapTuple pg_class_tuple;
Relation pg_class_desc;
SysScanDesc pg_class_scan;
ScanKeyData key[1];
+ Snapshot snapshot;
/*
* If something goes wrong during backend startup, we might find ourselves
* scan by setting indexOK == false.
*/
pg_class_desc = heap_open(RelationRelationId, AccessShareLock);
+
+ /*
+ * The caller might need a tuple that's newer than the one the historic
+ * snapshot; currently the only case requiring to do so is looking up the
+ * relfilenode of non mapped system relations during decoding.
+ */
+ if (force_non_historic)
+ snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId);
+ else
+ snapshot = GetCatalogSnapshot(RelationRelationId);
+
pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId,
indexOK && criticalRelcachesBuilt,
- NULL,
+ snapshot,
1, key);
pg_class_tuple = systable_getnext(pg_class_scan);
/*
* find the tuple in pg_class corresponding to the given relation id
*/
- pg_class_tuple = ScanPgRelation(targetRelId, true);
+ pg_class_tuple = ScanPgRelation(targetRelId, true, false);
/*
* if no such tuple exists, return NULL
relation->rd_node.dbNode = InvalidOid;
else
relation->rd_node.dbNode = MyDatabaseId;
+
if (relation->rd_rel->relfilenode)
+ {
+ /*
+ * Even if we are using a decoding snapshot that doesn't represent
+ * the current state of the catalog we need to make sure the
+ * filenode points to the current file since the older file will
+ * be gone (or truncated). The new file will still contain older
+ * rows so lookups in them will work correctly. This wouldn't work
+ * correctly if rewrites were allowed to change the schema in a
+ * noncompatible way, but those are prevented both on catalog
+ * tables and on user tables declared as additional catalog
+ * tables.
+ */
+ if (HistoricSnapshotActive()
+ && RelationIsAccessibleInLogicalDecoding(relation)
+ && IsTransactionState())
+ {
+ HeapTuple phys_tuple;
+ Form_pg_class physrel;
+
+ phys_tuple = ScanPgRelation(RelationGetRelid(relation),
+ RelationGetRelid(relation) != ClassOidIndexId,
+ true);
+ if (!HeapTupleIsValid(phys_tuple))
+ elog(ERROR, "could not find pg_class entry for %u",
+ RelationGetRelid(relation));
+ physrel = (Form_pg_class) GETSTRUCT(phys_tuple);
+
+ relation->rd_rel->reltablespace = physrel->reltablespace;
+ relation->rd_rel->relfilenode = physrel->relfilenode;
+ heap_freetuple(phys_tuple);
+ }
+
relation->rd_node.relNode = relation->rd_rel->relfilenode;
+ }
else
{
/* Consult the relation mapper */
* for pg_class_oid_index ...
*/
indexOK = (RelationGetRelid(relation) != ClassOidIndexId);
- pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK);
+ pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false);
if (!HeapTupleIsValid(pg_class_tuple))
elog(ERROR, "could not find pg_class tuple for index %u",
RelationGetRelid(relation));
* have regd_count = 1 and are counted in RegisteredSnapshots, but are not
* tracked by any resource owner.
*
+ * The same is true for historic snapshots used during logical decoding,
+ * their lifetime is managed separately (as they life longer as one xact.c
+ * transaction).
+ *
* These arrangements let us reset MyPgXact->xmin when there are no snapshots
* referenced by this transaction. (One possible improvement would be to be
* able to advance Xmin when the snapshot with the earliest Xmin is no longer
*/
static SnapshotData CurrentSnapshotData = {HeapTupleSatisfiesMVCC};
static SnapshotData SecondarySnapshotData = {HeapTupleSatisfiesMVCC};
-static SnapshotData CatalogSnapshotData = {HeapTupleSatisfiesMVCC};
+SnapshotData CatalogSnapshotData = {HeapTupleSatisfiesMVCC};
/* Pointers to valid snapshots */
static Snapshot CurrentSnapshot = NULL;
static Snapshot SecondarySnapshot = NULL;
static Snapshot CatalogSnapshot = NULL;
+static Snapshot HistoricSnapshot = NULL;
/*
* Staleness detection for CatalogSnapshot.
* for the convenience of TransactionIdIsInProgress: even in bootstrap
* mode, we don't want it to say that BootstrapTransactionId is in progress.
*
- * RecentGlobalXmin is initialized to InvalidTransactionId, to ensure that no
- * one tries to use a stale value. Readers should ensure that it has been set
- * to something else before using it.
+ * RecentGlobalXmin and RecentGlobalDataXmin are initialized to
+ * InvalidTransactionId, to ensure that no one tries to use a stale
+ * value. Readers should ensure that it has been set to something else
+ * before using it.
*/
TransactionId TransactionXmin = FirstNormalTransactionId;
TransactionId RecentXmin = FirstNormalTransactionId;
TransactionId RecentGlobalXmin = InvalidTransactionId;
+TransactionId RecentGlobalDataXmin = InvalidTransactionId;
+
+/* (table, ctid) => (cmin, cmax) mapping during timetravel */
+static HTAB *tuplecid_data = NULL;
/*
* Elements of the active snapshot stack.
Snapshot
GetTransactionSnapshot(void)
{
+ /*
+ * Return historic snapshot if doing logical decoding. We'll never
+ * need a non-historic transaction snapshot in this (sub-)transaction, so
+ * there's no need to be careful to set one up for later calls to
+ * GetTransactionSnapshot().
+ */
+ if (HistoricSnapshotActive())
+ {
+ Assert(!FirstSnapshotSet);
+ return HistoricSnapshot;
+ }
+
/* First call in transaction? */
if (!FirstSnapshotSet)
{
Snapshot
GetLatestSnapshot(void)
{
+ /*
+ * So far there are no cases requiring support for GetLatestSnapshot()
+ * during logical decoding, but it wouldn't be hard to add if
+ * required.
+ */
+ Assert(!HistoricSnapshotActive());
+
/* If first call in transaction, go ahead and set the xact snapshot */
if (!FirstSnapshotSet)
return GetTransactionSnapshot();
*/
Snapshot
GetCatalogSnapshot(Oid relid)
+{
+ /*
+ * Return historic snapshot if we're doing logical decoding, but
+ * return a non-historic, snapshot if we temporarily are doing up2date
+ * lookups.
+ */
+ if (HistoricSnapshotActive())
+ return HistoricSnapshot;
+
+ return GetNonHistoricCatalogSnapshot(relid);
+}
+
+/*
+ * GetNonHistoricCatalogSnapshot
+ * Get a snapshot that is sufficiently up-to-date for scan of the system
+ * catalog with the specified OID, even while historic snapshots are set
+ * up.
+ */
+Snapshot
+GetNonHistoricCatalogSnapshot(Oid relid)
{
/*
* If the caller is trying to scan a relation that has no syscache,
Assert(RegisteredSnapshots == 0);
Assert(FirstXactSnapshot == NULL);
+ Assert(HistoricSnapshotActive());
/*
* Even though we are not going to use the snapshot it computes, we must
* Returns the token (the file name) that can be used to import this
* snapshot.
*/
-static char *
+char *
ExportSnapshot(Snapshot snapshot)
{
TransactionId topXid;
return false;
}
+
+/*
+ * Setup a snapshot that replaces normal catalog snapshots that allows catalog
+ * access to behave just like it did at a certain point in the past.
+ *
+ * Needed for logical decoding.
+ */
+void
+SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
+{
+ Assert(historic_snapshot != NULL);
+
+ /* setup the timetravel snapshot */
+ HistoricSnapshot = historic_snapshot;
+
+ /* setup (cmin, cmax) lookup hash */
+ tuplecid_data = tuplecids;
+}
+
+
+/*
+ * Make catalog snapshots behave normally again.
+ */
+void
+TeardownHistoricSnapshot(bool is_error)
+{
+ HistoricSnapshot = NULL;
+ tuplecid_data = NULL;
+}
+
+bool
+HistoricSnapshotActive(void)
+{
+ return HistoricSnapshot != NULL;
+}
+
+HTAB *
+HistoricSnapshotGetTupleCids(void)
+{
+ Assert(HistoricSnapshotActive());
+ return tuplecid_data;
+}
#include "access/xact.h"
#include "storage/bufmgr.h"
#include "storage/procarray.h"
+#include "utils/builtins.h"
+#include "utils/combocid.h"
+#include "utils/snapmgr.h"
#include "utils/tqual.h"
/* local functions */
static bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);
-
/*
* SetHintBits()
*
*/
return true;
}
+
+/*
+ * check whether the transaciont id 'xid' in in the pre-sorted array 'xip'.
+ */
+static bool
+TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
+{
+ return bsearch(&xid, xip, num,
+ sizeof(TransactionId), xidComparator) != NULL;
+}
+
+/*
+ * See the comments for HeapTupleSatisfiesMVCC for the semantics this function
+ * obeys.
+ *
+ * Only usable on tuples from catalog tables!
+ *
+ * We don't need to support HEAP_MOVED_(IN|OFF) for now because we only support
+ * reading catalog pages which couldn't have been created in an older version.
+ *
+ * We don't set any hint bits in here as it seems unlikely to be beneficial as
+ * those should already be set by normal access and it seems to be too
+ * dangerous to do so as the semantics of doing so during timetravel are more
+ * complicated than when dealing "only" with the present.
+ */
+bool
+HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot,
+ Buffer buffer)
+{
+ HeapTupleHeader tuple = htup->t_data;
+ TransactionId xmin = HeapTupleHeaderGetXmin(tuple);
+ TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple);
+
+ Assert(ItemPointerIsValid(&htup->t_self));
+ Assert(htup->t_tableOid != InvalidOid);
+
+ /* inserting transaction aborted */
+ if (HeapTupleHeaderXminInvalid(tuple))
+ {
+ Assert(!TransactionIdDidCommit(xmin));
+ return false;
+ }
+ /* check if its one of our txids, toplevel is also in there */
+ else if (TransactionIdInArray(xmin, snapshot->subxip, snapshot->subxcnt))
+ {
+ bool resolved;
+ CommandId cmin = HeapTupleHeaderGetRawCommandId(tuple);
+ CommandId cmax = InvalidCommandId;
+
+ /*
+ * another transaction might have (tried to) delete this tuple or
+ * cmin/cmax was stored in a combocid. S we need to to lookup the
+ * actual values externally.
+ */
+ resolved = ResolveCminCmaxDuringDecoding(HistoricSnapshotGetTupleCids(), snapshot,
+ htup, buffer,
+ &cmin, &cmax);
+
+ if (!resolved)
+ elog(ERROR, "could not resolve cmin/cmax of catalog tuple");
+
+ Assert(cmin != InvalidCommandId);
+
+ if (cmin >= snapshot->curcid)
+ return false; /* inserted after scan started */
+ /* fall through */
+ }
+ /* committed before our xmin horizon. Do a normal visibility check. */
+ else if (TransactionIdPrecedes(xmin, snapshot->xmin))
+ {
+ Assert(!(HeapTupleHeaderXminCommitted(tuple) &&
+ !TransactionIdDidCommit(xmin)));
+
+ /* check for hint bit first, consult clog afterwards */
+ if (!HeapTupleHeaderXminCommitted(tuple) &&
+ !TransactionIdDidCommit(xmin))
+ return false;
+ /* fall through */
+ }
+ /* beyond our xmax horizon, i.e. invisible */
+ else if (TransactionIdFollowsOrEquals(xmin, snapshot->xmax))
+ {
+ return false;
+ }
+ /* check if it's a committed transaction in [xmin, xmax) */
+ else if(TransactionIdInArray(xmin, snapshot->xip, snapshot->xcnt))
+ {
+ /* fall through */
+ }
+ /*
+ * none of the above, i.e. between [xmin, xmax) but hasn't
+ * committed. I.e. invisible.
+ */
+ else
+ {
+ return false;
+ }
+
+ /* at this point we know xmin is visible, go on to check xmax */
+
+ /* xid invalid or aborted */
+ if (tuple->t_infomask & HEAP_XMAX_INVALID)
+ return true;
+ /* locked tuples are always visible */
+ else if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
+ return true;
+ /*
+ * We can see multis here if we're looking at user tables or if
+ * somebody SELECT ... FOR SHARE/UPDATE a system table.
+ */
+ else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ xmax = HeapTupleGetUpdateXid(tuple);
+ }
+
+ /* check if its one of our txids, toplevel is also in there */
+ if (TransactionIdInArray(xmax, snapshot->subxip, snapshot->subxcnt))
+ {
+ bool resolved;
+ CommandId cmin;
+ CommandId cmax = HeapTupleHeaderGetRawCommandId(tuple);
+
+ /* Lookup actual cmin/cmax values */
+ resolved = ResolveCminCmaxDuringDecoding(HistoricSnapshotGetTupleCids(), snapshot,
+ htup, buffer,
+ &cmin, &cmax);
+
+ if (!resolved)
+ elog(ERROR, "could not resolve combocid to cmax");
+
+ Assert(cmax != InvalidCommandId);
+
+ if (cmax >= snapshot->curcid)
+ return true; /* deleted after scan started */
+ else
+ return false; /* deleted before scan started */
+ }
+ /* below xmin horizon, normal transaction state is valid */
+ else if (TransactionIdPrecedes(xmax, snapshot->xmin))
+ {
+ Assert(!(tuple->t_infomask & HEAP_XMAX_COMMITTED &&
+ !TransactionIdDidCommit(xmax)));
+
+ /* check hint bit first */
+ if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
+ return false;
+
+ /* check clog */
+ return !TransactionIdDidCommit(xmax);
+ }
+ /* above xmax horizon, we cannot possibly see the deleting transaction */
+ else if (TransactionIdFollowsOrEquals(xmax, snapshot->xmax))
+ return true;
+ /* xmax is between [xmin, xmax), check known committed array */
+ else if (TransactionIdInArray(xmax, snapshot->xip, snapshot->xcnt))
+ return false;
+ /* xmax is between [xmin, xmax), but known not to have committed yet */
+ else
+ return true;
+}
"pg_replslot",
"pg_tblspc",
"pg_stat",
- "pg_stat_tmp"
+ "pg_stat_tmp",
+ "pg_llog",
+ "pg_llog/snapshots",
+ "pg_llog/mappings"
};
extern void heap_sync(Relation relation);
/* in heap/pruneheap.c */
-extern void heap_page_prune_opt(Relation relation, Buffer buffer,
- TransactionId OldestXmin);
+extern void heap_page_prune_opt(Relation relation, Buffer buffer);
extern int heap_page_prune(Relation relation, Buffer buffer,
TransactionId OldestXmin,
bool report_stats, TransactionId *latestRemovedXid);
* the ones above associated with RM_HEAP_ID. XLOG_HEAP_OPMASK applies to
* these, too.
*/
-/* 0x00 is free, was XLOG_HEAP2_FREEZE */
+#define XLOG_HEAP2_REWRITE 0x00
#define XLOG_HEAP2_CLEAN 0x10
#define XLOG_HEAP2_FREEZE_PAGE 0x20
#define XLOG_HEAP2_CLEANUP_INFO 0x30
xl_heaptid target;
} xl_heap_new_cid;
+/* logical rewrite xlog record header */
+typedef struct xl_heap_rewrite_mapping
+{
+ TransactionId mapped_xid; /* xid that might need to see the row */
+ Oid mapped_db; /* DbOid or InvalidOid for shared rels */
+ Oid mapped_rel; /* Oid of the mapped relation */
+ off_t offset; /* How far have we written so far */
+ uint32 num_mappings; /* Number of in-memory mappings */
+ XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */
+} xl_heap_rewrite_mapping;
+
#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target) + SizeOfHeapTid)
extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec);
extern void heap2_redo(XLogRecPtr lsn, XLogRecord *rptr);
extern void heap2_desc(StringInfo buf, uint8 xl_info, char *rec);
+extern void heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r);
extern XLogRecPtr log_heap_cleanup_info(RelFileNode rnode,
TransactionId latestRemovedXid);
#define REWRITE_HEAP_H
#include "access/htup.h"
+#include "storage/itemptr.h"
+#include "storage/relfilenode.h"
#include "utils/relcache.h"
/* struct definition is private to rewriteheap.c */
typedef struct RewriteStateData *RewriteState;
-extern RewriteState begin_heap_rewrite(Relation NewHeap,
+extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
TransactionId OldestXmin, TransactionId FreezeXid,
MultiXactId MultiXactCutoff, bool use_wal);
extern void end_heap_rewrite(RewriteState state);
HeapTuple newTuple);
extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple);
+/*
+ * On-Disk data format for an individual logical rewrite mapping.
+ */
+typedef struct LogicalRewriteMappingData
+{
+ RelFileNode old_node;
+ RelFileNode new_node;
+ ItemPointerData old_tid;
+ ItemPointerData new_tid;
+} LogicalRewriteMappingData;
+
+/* ---
+ * The filename consists out of the following, dash separated,
+ * components:
+ * 1) database oid or InvalidOid for shared relations
+ * 2) the oid of the relation
+ * 3) xid we are mapping for
+ * 4) upper 32bit of the LSN at which a rewrite started
+ * 5) lower 32bit of the LSN at which a rewrite started
+ * 6) xid of the xact performing the mapping
+ * ---
+ */
+#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x-%x"
+void CheckPointLogicalRewriteHeap(void);
+
#endif /* REWRITE_HEAP_H */
(AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
(int32) ((id1) - (id2)) < 0)
+/* compare two XIDs already known to be normal; this is a macro for speed */
+#define NormalTransactionIdFollows(id1, id2) \
+ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
+ (int32) ((id1) - (id2)) > 0)
+
/* ----------
* Object ID (OID) zero is InvalidOid.
*
/* Size of an EXTERNAL datum that contains a standard TOAST pointer */
#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_external))
-/* Size of an indirect datum that contains an indirect TOAST pointer */
+/* Size of an indirect datum that contains a standard TOAST pointer */
#define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_indirect))
+/*
+ * Testing whether an externally-stored value is compressed now requires
+ * comparing extsize (the actual length of the external data) to rawsize
+ * (the original uncompressed datum's size). The latter includes VARHDRSZ
+ * overhead, the former doesn't. We never use compression unless it actually
+ * saves space, so we expect either equality or less-than.
+ */
+#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \
+ ((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ)
+
+/*
+ * Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
+ * into a local "struct varatt_external" toast pointer. This should be
+ * just a memcpy, but some versions of gcc seem to produce broken code
+ * that assumes the datum contents are aligned. Introducing an explicit
+ * intermediate "varattrib_1b_e *" variable seems to fix it.
+ */
+#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \
+do { \
+ varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \
+ Assert(VARATT_IS_EXTERNAL(attre)); \
+ Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \
+ memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \
+} while (0)
+
/* ----------
* toast_insert_or_update -
*
extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
+extern XLogSegNo XLogGetLastRemovedSegno(void);
extern void XLogSetAsyncXactLSN(XLogRecPtr record);
extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn);
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201403031
+#define CATALOG_VERSION_NO 201403032
#endif
DESCR("create a physical replication slot");
DATA(insert OID = 3780 ( pg_drop_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 2278 "19" _null_ _null_ _null_ _null_ pg_drop_replication_slot _null_ _null_ _null_ ));
DESCR("drop a replication slot");
-DATA(insert OID = 3781 ( pg_get_replication_slots PGNSP PGUID 12 1 10 0 0 f f f f f t s 0 0 2249 "" "{19,25,26,16,28,3220}" "{o,o,o,o,o,o}" "{slot_name,slot_type,datoid,active,xmin,restart_lsn}" _null_ pg_get_replication_slots _null_ _null_ _null_ ));
+DATA(insert OID = 3781 ( pg_get_replication_slots PGNSP PGUID 12 1 10 0 0 f f f f f t s 0 0 2249 "" "{19,19,25,26,16,28,28,3220}" "{o,o,o,o,o,o,o,o}" "{slot_name,plugin,slot_type,datoid,active,xmin,catalog_xmin,restart_lsn}" _null_ pg_get_replication_slots _null_ _null_ _null_ ));
DESCR("information about replication slots currently in use");
+DATA(insert OID = 3786 ( pg_create_logical_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f f f v 2 0 2249 "19 19" "{19,19,25,3220}" "{i,i,o,o}" "{slotname,plugin,slotname,xlog_position}" _null_ pg_create_logical_replication_slot _null_ _null_ _null_ ));
+DESCR("set up a logical replication slot");
+DATA(insert OID = 3782 ( pg_logical_slot_get_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,25}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_get_changes _null_ _null_ _null_ ));
+DESCR("get changes from replication slot");
+DATA(insert OID = 3783 ( pg_logical_slot_get_binary_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,17}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_get_binary_changes _null_ _null_ _null_ ));
+DESCR("get binary changes from replication slot");
+DATA(insert OID = 3784 ( pg_logical_slot_peek_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,25}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_peek_changes _null_ _null_ _null_ ));
+DESCR("peek at changes from replication slot");
+DATA(insert OID = 3785 ( pg_logical_slot_peek_binary_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,17}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_peek_binary_changes _null_ _null_ _null_ ));
+DESCR("peek at binary changes from replication slot");
/* event triggers */
DATA(insert OID = 3566 ( pg_event_trigger_dropped_objects PGNSP PGUID 12 10 100 0 0 f f f f t t s 0 0 2249 "" "{26,26,23,25,25,25,25}" "{o,o,o,o,o,o,o}" "{classid, objid, objsubid, object_type, schema_name, object_name, object_identity}" _null_ pg_event_trigger_dropped_objects _null_ _null_ _null_ ));
bool hasindex,
TransactionId frozenxid,
MultiXactId minmulti);
-extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age,
+extern void vacuum_set_xid_limits(Relation rel,
+ int freeze_min_age, int freeze_table_age,
int multixact_freeze_min_age,
int multixact_freeze_table_age,
- bool sharedRel,
TransactionId *oldestXmin,
TransactionId *freezeLimit,
TransactionId *xidFullScanLimit,
--- /dev/null
+/*-------------------------------------------------------------------------
+ * decode.h
+ * PostgreSQL WAL to logical transformation
+ *
+ * Portions Copyright (c) 2012-2014, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef DECODE_H
+#define DECODE_H
+
+#include "access/xlogreader.h"
+#include "replication/reorderbuffer.h"
+#include "replication/logical.h"
+
+void LogicalDecodingProcessRecord(LogicalDecodingContext *ctx,
+ XLogRecord *record);
+
+#endif
--- /dev/null
+/*-------------------------------------------------------------------------
+ * logical.h
+ * PostgreSQL logical decoding coordination
+ *
+ * Copyright (c) 2012-2014, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LOGICAL_H
+#define LOGICAL_H
+
+#include "replication/slot.h"
+
+#include "access/xlog.h"
+#include "access/xlogreader.h"
+#include "replication/output_plugin.h"
+
+struct LogicalDecodingContext;
+
+typedef void (*LogicalOutputPluginWriterWrite) (
+ struct LogicalDecodingContext *lr,
+ XLogRecPtr Ptr,
+ TransactionId xid,
+ bool last_write
+);
+
+typedef LogicalOutputPluginWriterWrite LogicalOutputPluginWriterPrepareWrite;
+
+typedef struct LogicalDecodingContext
+{
+ /* memory context this is all allocated in */
+ MemoryContext context;
+
+ /* infrastructure pieces */
+ XLogReaderState *reader;
+ ReplicationSlot *slot;
+ struct ReorderBuffer *reorder;
+ struct SnapBuild *snapshot_builder;
+
+ OutputPluginCallbacks callbacks;
+ OutputPluginOptions options;
+
+ /*
+ * User specified options
+ */
+ List *output_plugin_options;
+
+ /*
+ * User-Provided callback for writing/streaming out data.
+ */
+ LogicalOutputPluginWriterPrepareWrite prepare_write;
+ LogicalOutputPluginWriterWrite write;
+
+ /*
+ * Output buffer.
+ */
+ StringInfo out;
+
+ /*
+ * Private data pointer of the output plugin.
+ */
+ void *output_plugin_private;
+
+ /*
+ * Private data pointer for the data writer.
+ */
+ void *output_writer_private;
+
+ /*
+ * State for writing output.
+ */
+ bool accept_writes;
+ bool prepared_write;
+ XLogRecPtr write_location;
+ TransactionId write_xid;
+} LogicalDecodingContext;
+
+extern void CheckLogicalDecodingRequirements(void);
+
+extern LogicalDecodingContext *CreateInitDecodingContext(char *plugin,
+ List *output_plugin_options,
+ XLogPageReadCB read_page,
+ LogicalOutputPluginWriterPrepareWrite prepare_write,
+ LogicalOutputPluginWriterWrite do_write);
+extern LogicalDecodingContext *CreateDecodingContext(
+ XLogRecPtr start_lsn,
+ List *output_plugin_options,
+ XLogPageReadCB read_page,
+ LogicalOutputPluginWriterPrepareWrite prepare_write,
+ LogicalOutputPluginWriterWrite do_write);
+extern void DecodingContextFindStartpoint(LogicalDecodingContext *ctx);
+extern bool DecodingContextReady(LogicalDecodingContext *ctx);
+extern void FreeDecodingContext(LogicalDecodingContext *ctx);
+
+extern void LogicalIncreaseXminForSlot(XLogRecPtr lsn, TransactionId xmin);
+extern void LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn,
+ XLogRecPtr restart_lsn);
+extern void LogicalConfirmReceivedLocation(XLogRecPtr lsn);
+
+#endif
--- /dev/null
+/*-------------------------------------------------------------------------
+ * logicalfuncs.h
+ * PostgreSQL WAL to logical transformation support functions
+ *
+ * Copyright (c) 2012-2014, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LOGICALFUNCS_H
+#define LOGICALFUNCS_H
+
+#include "replication/logical.h"
+
+extern int logical_read_local_xlog_page(XLogReaderState *state,
+ XLogRecPtr targetPagePtr,
+ int reqLen, XLogRecPtr targetRecPtr,
+ char *cur_page, TimeLineID *pageTLI);
+
+extern Datum pg_logical_slot_get_changes(PG_FUNCTION_ARGS);
+extern Datum pg_logical_slot_get_binary_changes(PG_FUNCTION_ARGS);
+extern Datum pg_logical_slot_peek_changes(PG_FUNCTION_ARGS);
+extern Datum pg_logical_slot_peek_binary_changes(PG_FUNCTION_ARGS);
+
+#endif
--- /dev/null
+/*-------------------------------------------------------------------------
+ * output_plugin.h
+ * PostgreSQL Logical Decode Plugin Interface
+ *
+ * Copyright (c) 2012-2014, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef OUTPUT_PLUGIN_H
+#define OUTPUT_PLUGIN_H
+
+#include "replication/reorderbuffer.h"
+
+struct LogicalDecodingContext;
+struct OutputPluginCallbacks;
+
+typedef enum OutputPluginOutputType
+{
+ OUTPUT_PLUGIN_BINARY_OUTPUT,
+ OUTPUT_PLUGIN_TEXTUAL_OUTPUT
+} OutputPluginOutputType;
+
+/*
+ * Options set by the output plugin, in the startup callback.
+ */
+typedef struct OutputPluginOptions
+{
+ OutputPluginOutputType output_type;
+} OutputPluginOptions;
+
+/*
+ * Type of the shared library symbol _PG_output_plugin_init that is looked up
+ * when loading an output plugin shared library.
+ */
+typedef void (*LogicalOutputPluginInit)(struct OutputPluginCallbacks *cb);
+
+/*
+ * Callback that gets called in a user-defined plugin. ctx->private_data can
+ * be set to some private data.
+ *
+ * "is_init" will be set to "true" if the decoding slot just got defined. When
+ * the same slot is used from there one, it will be "false".
+ */
+typedef void (*LogicalDecodeStartupCB) (
+ struct LogicalDecodingContext *ctx,
+ OutputPluginOptions *options,
+ bool is_init
+);
+
+/*
+ * Callback called for every (explicit or implicit) BEGIN of a successful
+ * transaction.
+ */
+typedef void (*LogicalDecodeBeginCB) (
+ struct LogicalDecodingContext *,
+ ReorderBufferTXN *txn);
+
+/*
+ * Callback for every individual change in a successful transaction.
+ */
+typedef void (*LogicalDecodeChangeCB) (
+ struct LogicalDecodingContext *,
+ ReorderBufferTXN *txn,
+ Relation relation,
+ ReorderBufferChange *change
+);
+
+/*
+ * Called for every (explicit or implicit) COMMIT of a successful transaction.
+ */
+typedef void (*LogicalDecodeCommitCB) (
+ struct LogicalDecodingContext *,
+ ReorderBufferTXN *txn,
+ XLogRecPtr commit_lsn);
+
+/*
+ * Called to shutdown an output plugin.
+ */
+typedef void (*LogicalDecodeShutdownCB) (
+ struct LogicalDecodingContext *
+);
+
+/*
+ * Output plugin callbacks
+ */
+typedef struct OutputPluginCallbacks
+{
+ LogicalDecodeStartupCB startup_cb;
+ LogicalDecodeBeginCB begin_cb;
+ LogicalDecodeChangeCB change_cb;
+ LogicalDecodeCommitCB commit_cb;
+ LogicalDecodeShutdownCB shutdown_cb;
+} OutputPluginCallbacks;
+
+void OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write);
+void OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write);
+
+#endif /* OUTPUT_PLUGIN_H */
--- /dev/null
+/*
+ * reorderbuffer.h
+ * PostgreSQL logical replay/reorder buffer management.
+ *
+ * Copyright (c) 2012-2014, PostgreSQL Global Development Group
+ *
+ * src/include/replication/reorderbuffer.h
+ */
+#ifndef REORDERBUFFER_H
+#define REORDERBUFFER_H
+
+#include "access/htup_details.h"
+
+#include "lib/ilist.h"
+
+#include "storage/sinval.h"
+
+#include "utils/hsearch.h"
+#include "utils/rel.h"
+#include "utils/snapshot.h"
+#include "utils/timestamp.h"
+
+/* an individual tuple, stored in one chunk of memory */
+typedef struct ReorderBufferTupleBuf
+{
+ /* position in preallocated list */
+ slist_node node;
+
+ /* tuple, stored sequentially */
+ HeapTupleData tuple;
+ HeapTupleHeaderData header;
+ char data[MaxHeapTupleSize];
+} ReorderBufferTupleBuf;
+
+/* types of the change passed to a 'change' callback */
+enum ReorderBufferChangeType
+{
+ REORDER_BUFFER_CHANGE_INSERT,
+ REORDER_BUFFER_CHANGE_UPDATE,
+ REORDER_BUFFER_CHANGE_DELETE
+};
+
+/*
+ * a single 'change', can be an insert (with one tuple), an update (old, new),
+ * or a delete (old).
+ *
+ * The same struct is also used internally for other purposes but that should
+ * never be visible outside reorderbuffer.c.
+ */
+typedef struct ReorderBufferChange
+{
+ XLogRecPtr lsn;
+
+ /* type of change */
+ union
+ {
+ enum ReorderBufferChangeType action;
+ /* do not leak internal enum values to the outside */
+ int action_internal;
+ };
+
+ /*
+ * Context data for the change, which part of the union is valid depends
+ * on action/action_internal.
+ */
+ union
+ {
+ /* old, new tuples when action == *_INSERT|UPDATE|DELETE */
+ struct
+ {
+ /* relation that has been changed */
+ RelFileNode relnode;
+ /* valid for DELETE || UPDATE */
+ ReorderBufferTupleBuf *oldtuple;
+ /* valid for INSERT || UPDATE */
+ ReorderBufferTupleBuf *newtuple;
+ } tp;
+
+ /* new snapshot */
+ Snapshot snapshot;
+
+ /* new command id for existing snapshot in a catalog changing tx */
+ CommandId command_id;
+
+ /* new cid mapping for catalog changing transaction */
+ struct
+ {
+ RelFileNode node;
+ ItemPointerData tid;
+ CommandId cmin;
+ CommandId cmax;
+ CommandId combocid;
+ } tuplecid;
+ };
+
+ /*
+ * While in use this is how a change is linked into a transactions,
+ * otherwise it's the preallocated list.
+ */
+ dlist_node node;
+} ReorderBufferChange;
+
+typedef struct ReorderBufferTXN
+{
+ /*
+ * The transactions transaction id, can be a toplevel or sub xid.
+ */
+ TransactionId xid;
+
+ /* did the TX have catalog changes */
+ bool has_catalog_changes;
+
+ /*
+ * Do we know this is a subxact?
+ */
+ bool is_known_as_subxact;
+
+ /*
+ * LSN of the first data carrying, WAL record with knowledge about this
+ * xid. This is allowed to *not* be first record adorned with this xid, if
+ * the previous records aren't relevant for logical decoding.
+ */
+ XLogRecPtr first_lsn;
+
+ /* ----
+ * LSN of the record that lead to this xact to be committed or
+ * aborted. This can be a
+ * * plain commit record
+ * * plain commit record, of a parent transaction
+ * * prepared transaction commit
+ * * plain abort record
+ * * prepared transaction abort
+ * * error during decoding
+ * ----
+ */
+ XLogRecPtr final_lsn;
+
+ /*
+ * LSN pointing to the end of the commit record + 1.
+ */
+ XLogRecPtr end_lsn;
+
+ /*
+ * LSN of the last lsn at which snapshot information reside, so we can
+ * restart decoding from there and fully recover this transaction from
+ * WAL.
+ */
+ XLogRecPtr restart_decoding_lsn;
+
+ /*
+ * Commit time, only known when we read the actual commit record.
+ */
+ TimestampTz commit_time;
+
+ /*
+ * Base snapshot or NULL.
+ */
+ Snapshot base_snapshot;
+ XLogRecPtr base_snapshot_lsn;
+
+ /*
+ * How many ReorderBufferChange's do we have in this txn.
+ *
+ * Changes in subtransactions are *not* included but tracked separately.
+ */
+ uint64 nentries;
+
+ /*
+ * How many of the above entries are stored in memory in contrast to being
+ * spilled to disk.
+ */
+ uint64 nentries_mem;
+
+ /*
+ * List of ReorderBufferChange structs, including new Snapshots and new
+ * CommandIds
+ */
+ dlist_head changes;
+
+ /*
+ * List of (relation, ctid) => (cmin, cmax) mappings for catalog tuples.
+ * Those are always assigned to the toplevel transaction. (Keep track of
+ * #entries to create a hash of the right size)
+ */
+ dlist_head tuplecids;
+ uint64 ntuplecids;
+
+ /*
+ * On-demand built hash for looking up the above values.
+ */
+ HTAB *tuplecid_hash;
+
+ /*
+ * Hash containing (potentially partial) toast entries. NULL if no toast
+ * tuples have been found for the current change.
+ */
+ HTAB *toast_hash;
+
+ /*
+ * non-hierarchical list of subtransactions that are *not* aborted. Only
+ * used in toplevel transactions.
+ */
+ dlist_head subtxns;
+ uint32 nsubtxns;
+
+ /*
+ * Stored cache invalidations. This is not a linked list because we get
+ * all the invalidations at once.
+ */
+ uint32 ninvalidations;
+ SharedInvalidationMessage *invalidations;
+
+ /* ---
+ * Position in one of three lists:
+ * * list of subtransactions if we are *known* to be subxact
+ * * list of toplevel xacts (can be a as-yet unknown subxact)
+ * * list of preallocated ReorderBufferTXNs
+ * ---
+ */
+ dlist_node node;
+
+} ReorderBufferTXN;
+
+/* so we can define the callbacks used inside struct ReorderBuffer itself */
+typedef struct ReorderBuffer ReorderBuffer;
+
+/* change callback signature */
+typedef void (*ReorderBufferApplyChangeCB) (
+ ReorderBuffer *rb,
+ ReorderBufferTXN *txn,
+ Relation relation,
+ ReorderBufferChange *change);
+
+/* begin callback signature */
+typedef void (*ReorderBufferBeginCB) (
+ ReorderBuffer *rb,
+ ReorderBufferTXN *txn);
+
+/* commit callback signature */
+typedef void (*ReorderBufferCommitCB) (
+ ReorderBuffer *rb,
+ ReorderBufferTXN *txn,
+ XLogRecPtr commit_lsn);
+
+struct ReorderBuffer
+{
+ /*
+ * xid => ReorderBufferTXN lookup table
+ */
+ HTAB *by_txn;
+
+ /*
+ * Transactions that could be a toplevel xact, ordered by LSN of the first
+ * record bearing that xid..
+ */
+ dlist_head toplevel_by_lsn;
+
+ /*
+ * one-entry sized cache for by_txn. Very frequently the same txn gets
+ * looked up over and over again.
+ */
+ TransactionId by_txn_last_xid;
+ ReorderBufferTXN *by_txn_last_txn;
+
+ /*
+ * Callacks to be called when a transactions commits.
+ */
+ ReorderBufferBeginCB begin;
+ ReorderBufferApplyChangeCB apply_change;
+ ReorderBufferCommitCB commit;
+
+ /*
+ * Pointer that will be passed untouched to the callbacks.
+ */
+ void *private_data;
+
+ /*
+ * Private memory context.
+ */
+ MemoryContext context;
+
+ /*
+ * Data structure slab cache.
+ *
+ * We allocate/deallocate some structures very frequently, to avoid bigger
+ * overhead we cache some unused ones here.
+ *
+ * The maximum number of cached entries is controlled by const variables
+ * ontop of reorderbuffer.c
+ */
+
+ /* cached ReorderBufferTXNs */
+ dlist_head cached_transactions;
+ Size nr_cached_transactions;
+
+ /* cached ReorderBufferChanges */
+ dlist_head cached_changes;
+ Size nr_cached_changes;
+
+ /* cached ReorderBufferTupleBufs */
+ slist_head cached_tuplebufs;
+ Size nr_cached_tuplebufs;
+
+ XLogRecPtr current_restart_decoding_lsn;
+
+ /* buffer for disk<->memory conversions */
+ char *outbuf;
+ Size outbufsize;
+};
+
+
+ReorderBuffer *ReorderBufferAllocate(void);
+void ReorderBufferFree(ReorderBuffer *);
+
+ReorderBufferTupleBuf *ReorderBufferGetTupleBuf(ReorderBuffer *);
+void ReorderBufferReturnTupleBuf(ReorderBuffer *, ReorderBufferTupleBuf *tuple);
+ReorderBufferChange *ReorderBufferGetChange(ReorderBuffer *);
+void ReorderBufferReturnChange(ReorderBuffer *, ReorderBufferChange *);
+
+void ReorderBufferQueueChange(ReorderBuffer *, TransactionId, XLogRecPtr lsn, ReorderBufferChange *);
+void ReorderBufferCommit(ReorderBuffer *, TransactionId,
+ XLogRecPtr commit_lsn, XLogRecPtr end_lsn,
+ TimestampTz commit_time);
+void ReorderBufferAssignChild(ReorderBuffer *, TransactionId, TransactionId, XLogRecPtr commit_lsn);
+void ReorderBufferCommitChild(ReorderBuffer *, TransactionId, TransactionId,
+ XLogRecPtr commit_lsn, XLogRecPtr end_lsn);
+void ReorderBufferAbort(ReorderBuffer *, TransactionId, XLogRecPtr lsn);
+void ReorderBufferAbortOld(ReorderBuffer *, TransactionId xid);
+void ReorderBufferForget(ReorderBuffer *, TransactionId, XLogRecPtr lsn);
+
+void ReorderBufferSetBaseSnapshot(ReorderBuffer *, TransactionId, XLogRecPtr lsn, struct SnapshotData *snap);
+void ReorderBufferAddSnapshot(ReorderBuffer *, TransactionId, XLogRecPtr lsn, struct SnapshotData *snap);
+void ReorderBufferAddNewCommandId(ReorderBuffer *, TransactionId, XLogRecPtr lsn,
+ CommandId cid);
+void ReorderBufferAddNewTupleCids(ReorderBuffer *, TransactionId, XLogRecPtr lsn,
+ RelFileNode node, ItemPointerData pt,
+ CommandId cmin, CommandId cmax, CommandId combocid);
+void ReorderBufferAddInvalidations(ReorderBuffer *, TransactionId, XLogRecPtr lsn,
+ Size nmsgs, SharedInvalidationMessage *msgs);
+bool ReorderBufferIsXidKnown(ReorderBuffer *, TransactionId xid);
+void ReorderBufferXidSetCatalogChanges(ReorderBuffer *, TransactionId xid, XLogRecPtr lsn);
+bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *, TransactionId xid);
+bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *, TransactionId xid);
+
+ReorderBufferTXN *ReorderBufferGetOldestTXN(ReorderBuffer *);
+
+void ReorderBufferSetRestartPoint(ReorderBuffer *, XLogRecPtr ptr);
+
+void StartupReorderBuffer(void);
+
+#endif
#include "storage/shmem.h"
#include "storage/spin.h"
+/*
+ * Behaviour of replication slots, upon release or crash.
+ *
+ * Slots marked as PERSISTENT are crashsafe and will not be dropped when
+ * released. Slots marked as EPHEMERAL will be dropped when released or after
+ * restarts.
+ *
+ * EPHEMERAL slots can be made PERSISTENT by calling ReplicationSlotPersist().
+ */
+typedef enum ReplicationSlotPersistency
+{
+ RS_PERSISTENT,
+ RS_EPHEMERAL
+} ReplicationSlotPersistency;
+
+/*
+ * On-Disk data of a replication slot, preserved across restarts.
+ */
typedef struct ReplicationSlotPersistentData
{
/* The slot's identifier */
/* database the slot is active on */
Oid database;
+ /*
+ * The slot's behaviour when being dropped (or restored after a crash).
+ */
+ ReplicationSlotPersistency persistency;
+
/*
* xmin horizon for data
*
*/
TransactionId xmin;
+ /*
+ * xmin horizon for catalog tuples
+ *
+ * NB: This may represent a value that hasn't been written to disk yet;
+ * see notes for effective_xmin, below.
+ */
+ TransactionId catalog_xmin;
+
/* oldest LSN that might be required by this replication slot */
XLogRecPtr restart_lsn;
+ /* oldest LSN that the client has acked receipt for */
+ XLogRecPtr confirmed_flush;
+
+ /* plugin name */
+ NameData plugin;
} ReplicationSlotPersistentData;
/*
* same as the persistent value (data.xmin).
*/
TransactionId effective_xmin;
+ TransactionId effective_catalog_xmin;
/* data surviving shutdowns and crashes */
ReplicationSlotPersistentData data;
/* is somebody performing io on this slot? */
LWLock *io_in_progress_lock;
+
+ /* all the remaining data is only used for logical slots */
+
+ /* ----
+ * When the client has confirmed flushes >= candidate_xmin_lsn we can
+ * advance the catalog xmin, when restart_valid has been passed,
+ * restart_lsn can be increased.
+ * ----
+ */
+ TransactionId candidate_catalog_xmin;
+ XLogRecPtr candidate_xmin_lsn;
+ XLogRecPtr candidate_restart_valid;
+ XLogRecPtr candidate_restart_lsn;
} ReplicationSlot;
/*
extern void ReplicationSlotsShmemInit(void);
/* management of individual slots */
-extern void ReplicationSlotCreate(const char *name, bool db_specific);
+extern void ReplicationSlotCreate(const char *name, bool db_specific,
+ ReplicationSlotPersistency p);
+extern void ReplicationSlotPersist(void);
extern void ReplicationSlotDrop(const char *name);
+
extern void ReplicationSlotAcquire(const char *name);
extern void ReplicationSlotRelease(void);
extern void ReplicationSlotSave(void);
/* misc stuff */
extern bool ReplicationSlotValidateName(const char *name, int elevel);
-extern void ReplicationSlotsComputeRequiredXmin(void);
+extern void ReplicationSlotsComputeRequiredXmin(bool already_locked);
extern void ReplicationSlotsComputeRequiredLSN(void);
+extern XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void);
+extern bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive);
+
extern void StartupReplicationSlots(XLogRecPtr checkPointRedo);
extern void CheckPointReplicationSlots(void);
extern void CheckSlotRequirements(void);
-extern void ReplicationSlotAtProcExit(void);
/* SQL callable functions */
+extern Datum pg_create_physical_replication_slot(PG_FUNCTION_ARGS);
+extern Datum pg_create_logical_replication_slot(PG_FUNCTION_ARGS);
+extern Datum pg_drop_replication_slot(PG_FUNCTION_ARGS);
extern Datum pg_get_replication_slots(PG_FUNCTION_ARGS);
#endif /* SLOT_H */
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * snapbuild.h
+ * Exports from replication/logical/snapbuild.c.
+ *
+ * Copyright (c) 2012-2014, PostgreSQL Global Development Group
+ *
+ * src/include/replication/snapbuild.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SNAPBUILD_H
+#define SNAPBUILD_H
+
+#include "access/xlogdefs.h"
+#include "utils/snapmgr.h"
+
+typedef enum
+{
+ /*
+ * Initial state, we can't do much yet.
+ */
+ SNAPBUILD_START,
+
+ /*
+ * We have collected enough information to decode tuples in transactions
+ * that started after this.
+ *
+ * Once we reached this we start to collect changes. We cannot apply them
+ * yet because the might be based on transactions that were still running
+ * when we reached them yet.
+ */
+ SNAPBUILD_FULL_SNAPSHOT,
+
+ /*
+ * Found a point after hitting built_full_snapshot where all transactions
+ * that were running at that point finished. Till we reach that we hold
+ * off calling any commit callbacks.
+ */
+ SNAPBUILD_CONSISTENT
+} SnapBuildState;
+
+/* forward declare so we don't have to expose the struct to the public */
+struct SnapBuild;
+typedef struct SnapBuild SnapBuild;
+
+/* forward declare so we don't have to include reorderbuffer.h */
+struct ReorderBuffer;
+
+/* forward declare so we don't have to include heapam_xlog.h */
+struct xl_heap_new_cid;
+struct xl_running_xacts;
+
+extern void CheckPointSnapBuild(void);
+
+extern SnapBuild *AllocateSnapshotBuilder(struct ReorderBuffer *cache,
+ TransactionId xmin_horizon, XLogRecPtr start_lsn);
+extern void FreeSnapshotBuilder(SnapBuild *cache);
+
+extern void SnapBuildSnapDecRefcount(Snapshot snap);
+
+extern const char *SnapBuildExportSnapshot(SnapBuild *snapstate);
+extern void SnapBuildClearExportedSnapshot(void);
+
+extern SnapBuildState SnapBuildCurrentState(SnapBuild *snapstate);
+
+extern bool SnapBuildXactNeedsSkip(SnapBuild *snapstate, XLogRecPtr ptr);
+
+extern void SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn,
+ TransactionId xid, int nsubxacts,
+ TransactionId *subxacts);
+extern void SnapBuildAbortTxn(SnapBuild *builder, XLogRecPtr lsn,
+ TransactionId xid, int nsubxacts,
+ TransactionId *subxacts);
+extern bool SnapBuildProcessChange(SnapBuild *builder, TransactionId xid,
+ XLogRecPtr lsn);
+extern void SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
+ XLogRecPtr lsn, struct xl_heap_new_cid *cid);
+extern void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn,
+ struct xl_running_xacts *running);
+extern void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn);
+
+#endif /* SNAPBUILD_H */
/*
* ItemPointerCopy
* Copies the contents of one disk item pointer to another.
+ *
+ * Should there ever be padding in an ItemPointer this would need to be handled
+ * differently as it's used as hash key.
*/
#define ItemPointerCopy(fromPointer, toPointer) \
( \
#define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */
#define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */
#define PROC_IN_ANALYZE 0x04 /* currently running analyze */
-#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */
+#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */
+#define PROC_IN_LOGICAL_DECODING 0x10 /* currently doing logical decoding */
/* flags reset at EOXact */
-#define PROC_VACUUM_STATE_MASK (0x0E)
+#define PROC_VACUUM_STATE_MASK \
+ (PROC_IN_VACUUM | PROC_IN_ANALYZE | PROC_VACUUM_FOR_WRAPAROUND)
/*
* We allow a small number of "weak" relation locks (AccesShareLock,
#define PROCARRAY_H
#include "storage/standby.h"
+#include "utils/relcache.h"
#include "utils/snapshot.h"
extern bool TransactionIdIsInProgress(TransactionId xid);
extern bool TransactionIdIsActive(TransactionId xid);
-extern TransactionId GetOldestXmin(bool allDbs, bool ignoreVacuum);
+extern TransactionId GetOldestXmin(Relation rel, bool ignoreVacuum);
extern TransactionId GetOldestActiveTransactionId(void);
+extern TransactionId GetOldestSafeDecodingTransactionId(void);
extern VirtualTransactionId *GetVirtualXIDsDelayingChkpt(int *nvxids);
extern bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids);
int nxids, const TransactionId *xids,
TransactionId latestXid);
-extern void ProcArraySetReplicationSlotXmin(TransactionId xmin);
+extern void ProcArraySetReplicationSlotXmin(TransactionId xmin,
+ TransactionId catalog_xmin, bool already_locked);
+
+extern void ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
+ TransactionId *catalog_xmin);
#endif /* PROCARRAY_H */
int nmsgs, bool RelcacheInitFileInval,
Oid dbid, Oid tsid);
+extern void LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg);
+
#endif /* SINVAL_H */
extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue);
+extern void InvalidateSystemCaches(void);
#endif /* INVAL_H */
extern TransactionId TransactionXmin;
extern TransactionId RecentXmin;
extern TransactionId RecentGlobalXmin;
+extern TransactionId RecentGlobalDataXmin;
extern Snapshot GetTransactionSnapshot(void);
extern Snapshot GetLatestSnapshot(void);
extern void SnapshotSetCommandId(CommandId curcid);
extern Snapshot GetCatalogSnapshot(Oid relid);
+extern Snapshot GetNonHistoricCatalogSnapshot(Oid relid);
extern void InvalidateCatalogSnapshot(void);
extern void PushActiveSnapshot(Snapshot snapshot);
extern void DeleteAllExportedSnapshotFiles(void);
extern bool ThereAreNoPriorRegisteredSnapshots(void);
+extern char *ExportSnapshot(Snapshot snapshot);
+
+/* Support for catalog timetravel for logical decoding */
+struct HTAB;
+extern struct HTAB *HistoricSnapshotGetTupleCids(void);
+extern void SetupHistoricSnapshot(Snapshot snapshot_now, struct HTAB *tuplecids);
+extern void TeardownHistoricSnapshot(bool is_error);
+extern bool HistoricSnapshotActive(void);
+
#endif /* SNAPMGR_H */
typedef bool (*SnapshotSatisfiesFunc) (HeapTuple htup,
Snapshot snapshot, Buffer buffer);
+/*
+ * Struct representing all kind of possible snapshots.
+ *
+ * There are several different kinds of snapshots:
+ * * Normal MVCC snapshots
+ * * MVCC snapshots taken during recovery (in Hot-Standby mode)
+ * * Historic MVCC snapshots used during logical decoding
+ * * snapshots passed to HeapTupleSatisfiesDirty()
+ * * snapshots used for SatisfiesAny, Toast, Self where no members are
+ * accessed.
+ *
+ * TODO: It's probably a good idea to split this struct using a NodeTag
+ * similar to how parser and executor nodes are handled, with one type for
+ * each different kind of snapshot to avoid overloading the meaning of
+ * individual fields.
+ */
typedef struct SnapshotData
{
SnapshotSatisfiesFunc satisfies; /* tuple test function */
*/
TransactionId xmin; /* all XID < xmin are visible to me */
TransactionId xmax; /* all XID >= xmax are invisible to me */
- TransactionId *xip; /* array of xact IDs in progress */
+ /*
+ * For normal MVCC snapshot this contains the all xact IDs that are in
+ * progress, unless the snapshot was taken during recovery in which case
+ * it's empty. For historic MVCC snapshots, the meaning is inverted,
+ * i.e. it contains *committed* transactions between xmin and xmax.
+ */
+ TransactionId *xip;
uint32 xcnt; /* # of xact ids in xip[] */
/* note: all ids in xip[] satisfy xmin <= xip[i] < xmax */
int32 subxcnt; /* # of xact ids in subxip[] */
- TransactionId *subxip; /* array of subxact IDs in progress */
+ /*
+ * For non-historic MVCC snapshots, this contains subxact IDs that are in
+ * progress (and other transactions that are in progress if taken during
+ * recovery). For historic snapshot it contains *all* xids assigned to the
+ * replayed transaction, including the toplevel xid.
+ */
+ TransactionId *subxip;
bool suboverflowed; /* has the subxip array overflowed? */
bool takenDuringRecovery; /* recovery-shaped snapshot? */
bool copied; /* false if it's a static snapshot */
extern PGDLLIMPORT SnapshotData SnapshotSelfData;
extern PGDLLIMPORT SnapshotData SnapshotAnyData;
extern PGDLLIMPORT SnapshotData SnapshotToastData;
+extern PGDLLIMPORT SnapshotData CatalogSnapshotData;
#define SnapshotSelf (&SnapshotSelfData)
#define SnapshotAny (&SnapshotAnyData)
/* This macro encodes the knowledge of which snapshots are MVCC-safe */
#define IsMVCCSnapshot(snapshot) \
- ((snapshot)->satisfies == HeapTupleSatisfiesMVCC)
+ ((snapshot)->satisfies == HeapTupleSatisfiesMVCC || \
+ (snapshot)->satisfies == HeapTupleSatisfiesHistoricMVCC)
/*
* HeapTupleSatisfiesVisibility
Snapshot snapshot, Buffer buffer);
extern bool HeapTupleSatisfiesDirty(HeapTuple htup,
Snapshot snapshot, Buffer buffer);
+extern bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup,
+ Snapshot snapshot, Buffer buffer);
/* Special "satisfies" routines with different APIs */
extern HTSU_Result HeapTupleSatisfiesUpdate(HeapTuple htup,
uint16 infomask, TransactionId xid);
extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
+/*
+ * To avoid leaking to much knowledge about reorderbuffer implementation
+ * details this is implemented in reorderbuffer.c not tqual.c.
+ */
+extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
+ Snapshot snapshot,
+ HeapTuple htup,
+ Buffer buffer,
+ CommandId *cmin, CommandId *cmax);
#endif /* TQUAL_H */
LEFT JOIN pg_authid u ON ((p.ownerid = u.oid)))
LEFT JOIN pg_database d ON ((p.dbid = d.oid)));
pg_replication_slots| SELECT l.slot_name,
+ l.plugin,
l.slot_type,
l.datoid,
d.datname AS database,
l.active,
l.xmin,
+ l.catalog_xmin,
l.restart_lsn
- FROM (pg_get_replication_slots() l(slot_name, slot_type, datoid, active, xmin, restart_lsn)
+ FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, active, xmin, catalog_xmin, restart_lsn)
LEFT JOIN pg_database d ON ((l.datoid = d.oid)));
pg_roles| SELECT pg_authid.rolname,
pg_authid.rolsuper,
LockingClause
LogOpts
LogStmtLevel
+LogicalDecodeBeginCB
+LogicalDecodeChangeCB
+LogicalDecodeCleanupCB
+LogicalDecodeCommitCB
+LogicalDecodeInitCB
+LogicalDecodingCheckpointData
+LogicalDecodingContext
+LogicalDecodingCtlData
+LogicalDecodingSlot
+LogicalOutputPluginWriterPrepareWrite
+LogicalOutputPluginWriterWrite
LogicalTape
LogicalTapeSet
MAGIC
OprProofCacheEntry
OprProofCacheKey
OutputContext
+OutputPluginCallbacks
OverrideSearchPath
OverrideStackEntry
PACE_HEADER
RelocationBufferInfo
RenameStmt
ReopenPtr
+ReorderBuffer
+ReorderBufferApplyChangeCB
+ReorderBufferBeginCB
+ReorderBufferChange
+ReorderBufferChangeTypeInternal
+ReorderBufferCommitCB
+ReorderBufferDiskChange
+ReorderBufferIterTXNEntry
+ReorderBufferIterTXNState
+ReorderBufferToastEnt
+ReorderBufferTupleBuf
+ReorderBufferTupleCidEnt
+ReorderBufferTupleCidKey
+ReorderBufferTXN
+ReorderBufferTXNByIdEnt
ReplaceVarsFromTargetList_context
ReplaceVarsNoMatchOption
ResTarget
SISeg
SMgrRelation
SMgrRelationData
+SnapBuildAction
+SnapBuildState
SOCKADDR
SOCKET
SPELL
Snapshot
SnapshotData
SnapshotSatisfiesFunc
+Snapstate
+SnapstateOnDisk
SockAddr
Sort
SortBy
XLogRecData
XLogRecPtr
XLogRecord
+XLogRecordBuffer
XLogSegNo
XLogSource
XLogwrtResult
tablespaceinfo
teReqs
teSection
+TestDecodingData
temp_tablespaces_extra
text
timeKEY