The original implementation of TABLESAMPLE modeled the tablesample method
API on index access methods, which wasn't a good choice because, without
specialized DDL commands, there's no way to build an extension that can
implement a TSM. (Raw inserts into system catalogs are not an acceptable
thing to do, because we can't undo them during DROP EXTENSION, nor will
pg_upgrade behave sanely.) Instead adopt an API more like procedural
language handlers or foreign data wrappers, wherein the only SQL-level
support object needed is a single handler function identified by having
a special return type. This lets us get rid of the supporting catalog
altogether, so that no custom DDL support is needed for the feature.
Adjust the API so that it can support non-constant tablesample arguments
(the original coding assumed we could evaluate the argument expressions at
ExecInitSampleScan time, which is undesirable even if it weren't outright
unsafe), and discourage sampling methods from looking at invisible tuples.
Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable
within and across queries, as required by the SQL standard, and deal more
honestly with methods that can't support that requirement.
Make a full code-review pass over the tablesample additions, and fix
assorted bugs, omissions, infelicities, and cosmetic issues (such as
failure to put the added code stanzas in a consistent ordering).
Improve EXPLAIN's output of tablesample plans, too.
Back-patch to 9.5 so that we don't have to support the original API
in production.
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+ FROM generate_series(0, 30) s(i);
ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1000);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (0);
+ count
+-------
+ 0
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1);
+ count
+-------
+ 1
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (10);
+ count
+-------
+ 10
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (100);
count
-------
31
(1 row)
-SELECT id FROM test_tablesample TABLESAMPLE system_rows (8) REPEATABLE (5432);
- id
-----
- 7
- 14
- 21
- 28
- 4
- 11
- 18
- 25
-(8 rows)
-
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_rows (20) REPEATABLE (10);
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+ FROM generate_series(0, 30) s(i);
ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1000);
-SELECT id FROM test_tablesample TABLESAMPLE system_rows (8) REPEATABLE (5432);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (0);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (10);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (100);
+
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) REPEATABLE (0);
+
+-- but a join should be allowed:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (VALUES (0),(10),(100)) v(nrows),
+ LATERAL (SELECT count(*) FROM test_tablesample
+ TABLESAMPLE system_rows (nrows)) ss;
+
+SELECT * FROM
+ (VALUES (0),(10),(100)) v(nrows),
+ LATERAL (SELECT count(*) FROM test_tablesample
+ TABLESAMPLE system_rows (nrows)) ss;
+
+CREATE VIEW vv AS
+ SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (20);
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_rows (20) REPEATABLE (10);
+SELECT * FROM vv;
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_rows; -- fail, view depends on extension
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+ FROM generate_series(0, 30) s(i);
ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
+-- It's a bit tricky to test SYSTEM_TIME in a platform-independent way.
+-- We can test the zero-time corner case ...
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (0);
count
-------
- 31
+ 0
(1 row)
-SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
- id
-----
- 7
- 14
- 21
- 28
- 4
- 11
- 18
- 25
- 1
- 8
- 15
- 22
- 29
- 5
- 12
- 19
- 26
- 2
- 9
- 16
- 23
- 30
- 6
- 13
- 20
- 27
- 3
- 10
- 17
- 24
- 0
-(31 rows)
-
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+ FROM generate_series(0, 30) s(i);
ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
-SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
+-- It's a bit tricky to test SYSTEM_TIME in a platform-independent way.
+-- We can test the zero-time corner case ...
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (0);
+-- ... and we assume that this will finish before running out of time:
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (100000);
+
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_time (10) REPEATABLE (0);
+
+-- since it's not repeatable, we expect a Materialize node in these plans:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (100000)) ss;
+
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (100000)) ss;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (time)) ss;
+
+SELECT * FROM
+ (VALUES (0),(100000)) v(time),
+ LATERAL (SELECT COUNT(*) FROM test_tablesample
+ TABLESAMPLE system_time (time)) ss;
+
+CREATE VIEW vv AS
+ SELECT * FROM test_tablesample TABLESAMPLE system_time (20);
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
+EXPLAIN (COSTS OFF) SELECT * FROM vv;
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_time; -- fail, view depends on extension
@@ -3734,6+3734,16 @@ DATA(insert OID = 3116 ( fdw_handler_in PGNSP PGUID 12 1 0 0 0 f f f f f f i 1
DESCR("I/O");
DATA(insert OID = 3117 ( fdw_handler_out PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3115" _null_ _null_ _null_ _null_ _null_ fdw_handler_out _null_ _null_ _null_ ));
DESCR("I/O");
+DATA(insert OID = 3311 ( tsm_handler_in PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 3310 "2275" _null_ _null_ _null_ _null_ _null_ tsm_handler_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3312 ( tsm_handler_out PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3310" _null_ _null_ _null_ _null_ _null_ tsm_handler_out _null_ _null_ _null_ ));
+DESCR("I/O");
+
+/* tablesample method handlers */
+DATA(insert OID = 3313 ( bernoulli PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_handler _null_ _null_ _null_ ));
+DESCR("BERNOULLI tablesample method handler");
+DATA(insert OID = 3314 ( system PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_handler _null_ _null_ _null_ ));
+DESCR("SYSTEM tablesample method handler");
/* cryptographic */
DATA(insert OID = 2311 ( md5 PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ ));
@@ -5321,33+5331,6 @@ DESCR("get an individual replication origin's replication progress");
DATA(insert OID = 6014 ( pg_show_replication_origin_status PGNSP PGUID 12 1 100 0 0 f f f f f t v 0 0 2249 "" "{26,25,3220,3220}" "{o,o,o,o}" "{local_id, external_id, remote_lsn, local_lsn}" _null_ _null_ pg_show_replication_origin_status _null_ _null_ _null_ ));
DESCR("get progress for all replication origins");
-/* tablesample */
-DATA(insert OID = 3335 ( tsm_system_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_system_init _null_ _null_ _null_ ));
-DESCR("tsm_system_init(internal)");
-DATA(insert OID = 3336 ( tsm_system_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nextblock _null_ _null_ _null_ ));
-DESCR("tsm_system_nextblock(internal)");
-DATA(insert OID = 3337 ( tsm_system_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nexttuple _null_ _null_ _null_ ));
-DESCR("tsm_system_nexttuple(internal)");
-DATA(insert OID = 3338 ( tsm_system_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_end _null_ _null_ _null_ ));
-DESCR("tsm_system_end(internal)");
-DATA(insert OID = 3339 ( tsm_system_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_reset _null_ _null_ _null_ ));
-DESCR("tsm_system_reset(internal)");
-DATA(insert OID = 3340 ( tsm_system_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_system_cost _null_ _null_ _null_ ));
-DESCR("tsm_system_cost(internal)");
-
-DATA(insert OID = 3341 ( tsm_bernoulli_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_init _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_init(internal)");
-DATA(insert OID = 3342 ( tsm_bernoulli_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nextblock _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_nextblock(internal)");
-DATA(insert OID = 3343 ( tsm_bernoulli_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nexttuple _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_nexttuple(internal)");
-DATA(insert OID = 3344 ( tsm_bernoulli_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_end _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_end(internal)");
-DATA(insert OID = 3345 ( tsm_bernoulli_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_reset _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_reset(internal)");
-DATA(insert OID = 3346 ( tsm_bernoulli_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_cost _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_cost(internal)");
-
/*
* Symbolic values for provolatile column: these indicate whether the result
* of a function is dependent *only* on the values of its explicit arguments,
@@ -694,6+694,8 @@ DATA(insert OID = 3500 ( anyenum PGNSP PGUID 4 t p P f t \054 0 0 0 anyenum_in
#define ANYENUMOID 3500
DATA(insert OID = 3115 ( fdw_handler PGNSP PGUID 4 t p P f t \054 0 0 0 fdw_handler_in fdw_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
#define FDW_HANDLEROID 3115
+DATA(insert OID = 3310 ( tsm_handler PGNSP PGUID 4 t p P f t \054 0 0 0 tsm_handler_in tsm_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
+#define TSM_HANDLEROID 3310
DATA(insert OID = 3831 ( anyrange PGNSP PGUID -1 f p P f t \054 0 0 0 anyrange_in anyrange_out - - - - - d x f 0 -1 0 0 _null_ _null_ _null_ ));
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
+INSERT INTO test_tablesample
+ SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
id
----
- 0
- 1
- 2
3
4
5
- 9
-(7 rows)
-
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
- id
-----
6
7
8
-(3 rows)
+(6 rows)
-SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
- count
--------
- 10
-(1 row)
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+ id
+----
+(0 rows)
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
id
----
- 0
- 1
- 2
+ 3
+ 4
+ 5
6
7
8
- 9
-(7 rows)
+(6 rows)
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0);
id
----
- 0
- 1
- 3
4
5
+ 6
+ 7
+ 8
(5 rows)
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0);
id
----
- 0
- 5
-(2 rows)
+ 7
+(1 row)
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
+INSERT INTO test_tablesample
+ SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0);
+
+-- 100% should give repeatable count results (ie, all rows) in any case
SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4);
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);