Redesign tablesample method API, and do extensive code review.

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c

index 0eb991cdf0e86f50e182ef07b7bd2a22f7a5b0b6..59b8a2e2b3d9cd99ae6eb38746a85de44a74862d 100644 (file)
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2297,6 +2297,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
                 {
                         case RTE_RELATION:
                                 APP_JUMB(rte->relid);
+                               JumbleExpr(jstate, (Node *) rte->tablesample);
                                 break;
                         case RTE_SUBQUERY:
                                 JumbleQuery(jstate, rte->subquery);
@@ -2767,6 +2768,15 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
                                 JumbleExpr(jstate, rtfunc->funcexpr);
                         }
                         break;
+               case T_TableSampleClause:
+                       {
+                               TableSampleClause *tsc = (TableSampleClause *) node;
+
+                               APP_JUMB(tsc->tsmhandler);
+                               JumbleExpr(jstate, (Node *) tsc->args);
+                               JumbleExpr(jstate, (Node *) tsc->repeatable);
+                       }
+                       break;
                 default:
                         /* Only a warning, since we can stumble along anyway */
                         elog(WARNING, "unrecognized node type: %d",
diff --git a/contrib/tsm_system_rows/Makefile b/contrib/tsm_system_rows/Makefile

index 700ab276db2e95b546dee914751387ce3bb940b6..609af463c5c2438b340c8ec6c32fdcddb7d627a1 100644 (file)
--- a/contrib/tsm_system_rows/Makefile
+++ b/contrib/tsm_system_rows/Makefile
@@ -1,8 +1,8 @@
-# src/test/modules/tsm_system_rows/Makefile
+# contrib/tsm_system_rows/Makefile
  
  MODULE_big = tsm_system_rows
  OBJS = tsm_system_rows.o $(WIN32RES)
-PGFILEDESC = "tsm_system_rows - SYSTEM TABLESAMPLE method which accepts number of rows as a limit"
+PGFILEDESC = "tsm_system_rows - TABLESAMPLE method which accepts number of rows as a limit"
  
  EXTENSION = tsm_system_rows
  DATA = tsm_system_rows--1.0.sql
diff --git a/contrib/tsm_system_rows/expected/tsm_system_rows.out b/contrib/tsm_system_rows/expected/tsm_system_rows.out

index 7e0f72b02b7df38392293f314341058ceb293d06..87b4a8fc64bd222438b14dbce830923f862b0c6a 100644 (file)
--- a/contrib/tsm_system_rows/expected/tsm_system_rows.out
+++ b/contrib/tsm_system_rows/expected/tsm_system_rows.out
@@ -1,31 +1,83 @@
  CREATE EXTENSION tsm_system_rows;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
  ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1000);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (0);
+ count 
+-------
+     0
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1);
+ count 
+-------
+     1
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (10);
+ count 
+-------
+    10
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (100);
   count 
  -------
      31
  (1 row)
  
-SELECT id FROM test_tablesample TABLESAMPLE system_rows (8) REPEATABLE (5432);
- id 
-----
-  7
- 14
- 21
- 28
-  4
- 11
- 18
- 25
-(8 rows)
-
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_rows (20) REPEATABLE (10);
-                                    QUERY PLAN                                     
------------------------------------------------------------------------------------
- Sample Scan (system_rows) on test_tablesample  (cost=0.00..80.20 rows=20 width=4)
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+               QUERY PLAN               
+----------------------------------------
+ Sample Scan on test_tablesample
+   Sampling: system_rows ('-1'::bigint)
+(2 rows)
+
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+ERROR:  sample size must not be negative
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) REPEATABLE (0);
+ERROR:  tablesample method system_rows does not support REPEATABLE
+LINE 1: SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) ...
+                                                   ^
+-- but a join should be allowed:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Nested Loop
+   ->  Values Scan on "*VALUES*"
+   ->  Aggregate
+         ->  Sample Scan on test_tablesample
+               Sampling: system_rows ("*VALUES*".column1)
+(5 rows)
+
+SELECT * FROM
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+ nrows | count 
+-------+-------
+     0 |     0
+    10 |    10
+   100 |    31
+(3 rows)
+
+CREATE VIEW vv AS
+  SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (20);
+SELECT * FROM vv;
+ count 
+-------
+    20
  (1 row)
  
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_rows;  -- fail, view depends on extension
+ERROR:  cannot drop extension tsm_system_rows because other objects depend on it
+DETAIL:  view vv depends on function system_rows(internal)
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
diff --git a/contrib/tsm_system_rows/sql/tsm_system_rows.sql b/contrib/tsm_system_rows/sql/tsm_system_rows.sql

index bd812220ed98dcab5f8e51128062fc65a50b6f95..e3ab4204eea5ae601aaf9e43c1edec6d0bda29d5 100644 (file)
--- a/contrib/tsm_system_rows/sql/tsm_system_rows.sql
+++ b/contrib/tsm_system_rows/sql/tsm_system_rows.sql
@@ -1,14 +1,39 @@
  CREATE EXTENSION tsm_system_rows;
  
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
  ANALYZE test_tablesample;
  
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1000);
-SELECT id FROM test_tablesample TABLESAMPLE system_rows (8) REPEATABLE (5432);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (0);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (10);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (100);
+
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) REPEATABLE (0);
+
+-- but a join should be allowed:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+
+SELECT * FROM
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+
+CREATE VIEW vv AS
+  SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (20);
  
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_rows (20) REPEATABLE (10);
+SELECT * FROM vv;
  
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_rows;  -- fail, view depends on extension
diff --git a/contrib/tsm_system_rows/tsm_system_rows--1.0.sql b/contrib/tsm_system_rows/tsm_system_rows--1.0.sql

index 1a29c584b5a8386180083120e25fb80edd8004e7..de508ed72675fe3cb51fd1dce770227b97d31acc 100644 (file)
--- a/contrib/tsm_system_rows/tsm_system_rows--1.0.sql
+++ b/contrib/tsm_system_rows/tsm_system_rows--1.0.sql
@@ -1,44 +1,9 @@
-/* src/test/modules/tablesample/tsm_system_rows--1.0.sql */
+/* contrib/tsm_system_rows/tsm_system_rows--1.0.sql */
  
  -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  \echo Use "CREATE EXTENSION tsm_system_rows" to load this file. \quit
  
-CREATE FUNCTION tsm_system_rows_init(internal, int4, int4)
-RETURNS void
-AS 'MODULE_PATHNAME'
+CREATE FUNCTION system_rows(internal)
+RETURNS tsm_handler
+AS 'MODULE_PATHNAME', 'tsm_system_rows_handler'
  LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_nextblock(internal)
-RETURNS int4
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_nexttuple(internal, int4, int2)
-RETURNS int2
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_examinetuple(internal, int4, internal, bool)
-RETURNS bool
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_end(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_reset(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_rows_cost(internal, internal, internal, internal, internal, internal, internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-INSERT INTO pg_tablesample_method VALUES('system_rows', false, true,
-       'tsm_system_rows_init', 'tsm_system_rows_nextblock',
-       'tsm_system_rows_nexttuple', 'tsm_system_rows_examinetuple',
-       'tsm_system_rows_end', 'tsm_system_rows_reset', 'tsm_system_rows_cost');
diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c

index e325eaff498972b46595b3be60b2f5ce92c8ed6d..f251e3e5e06dd9416d5a8bac14417cfa70311ca2 100644 (file)
--- a/contrib/tsm_system_rows/tsm_system_rows.c
+++ b/contrib/tsm_system_rows/tsm_system_rows.c
@@ -1,240 +1,356 @@
  /*-------------------------------------------------------------------------
   *
   * tsm_system_rows.c
- *       interface routines for system_rows tablesample method
+ *       support routines for SYSTEM_ROWS tablesample method
   *
+ * The desire here is to produce a random sample with a given number of rows
+ * (or the whole relation, if that is fewer rows).  We use a block-sampling
+ * approach.  To ensure that the whole relation will be visited if necessary,
+ * we start at a randomly chosen block and then advance with a stride that
+ * is randomly chosen but is relatively prime to the relation's nblocks.
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Because of the dependence on nblocks, this method cannot be repeatable
+ * across queries.  (Even if the user hasn't explicitly changed the relation,
+ * maintenance activities such as autovacuum might change nblocks.)  However,
+ * we can at least make it repeatable across scans, by determining the
+ * sampling pattern only once on the first scan.  This means that rescans
+ * won't visit blocks added after the first scan, but that is fine since
+ * such blocks shouldn't contain any visible tuples anyway.
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       contrib/tsm_system_rows_rowlimit/tsm_system_rows.c
+ *       contrib/tsm_system_rows/tsm_system_rows.c
   *
   *-------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
-#include "fmgr.h"
-
-#include "access/tablesample.h"
  #include "access/relscan.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
  #include "miscadmin.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
  #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
+#include "optimizer/cost.h"
  #include "utils/sampling.h"
  
  PG_MODULE_MAGIC;
  
-/*
- * State
- */
+PG_FUNCTION_INFO_V1(tsm_system_rows_handler);
+
+
+/* Private state */
  typedef struct
  {
-       SamplerRandomState randstate;
         uint32          seed;                   /* random seed */
-       BlockNumber nblocks;            /* number of block in relation */
-       int32           ntuples;                /* number of tuples to return */
-       int32           donetuples;             /* tuples already returned */
+       int64           ntuples;                /* number of tuples to return */
+       int64           donetuples;             /* number of tuples already returned */
         OffsetNumber lt;                        /* last tuple returned from current block */
-       BlockNumber step;                       /* step size */
+       BlockNumber doneblocks;         /* number of already-scanned blocks */
         BlockNumber lb;                         /* last block visited */
-       BlockNumber doneblocks;         /* number of already returned blocks */
-} SystemSamplerData;
-
-
-PG_FUNCTION_INFO_V1(tsm_system_rows_init);
-PG_FUNCTION_INFO_V1(tsm_system_rows_nextblock);
-PG_FUNCTION_INFO_V1(tsm_system_rows_nexttuple);
-PG_FUNCTION_INFO_V1(tsm_system_rows_examinetuple);
-PG_FUNCTION_INFO_V1(tsm_system_rows_end);
-PG_FUNCTION_INFO_V1(tsm_system_rows_reset);
-PG_FUNCTION_INFO_V1(tsm_system_rows_cost);
-
+       /* these three values are not changed during a rescan: */
+       BlockNumber nblocks;            /* number of blocks in relation */
+       BlockNumber firstblock;         /* first block to sample from */
+       BlockNumber step;                       /* step size, or 0 if not set yet */
+} SystemRowsSamplerData;
+
+static void system_rows_samplescangetsamplesize(PlannerInfo *root,
+                                                                       RelOptInfo *baserel,
+                                                                       List *paramexprs,
+                                                                       BlockNumber *pages,
+                                                                       double *tuples);
+static void system_rows_initsamplescan(SampleScanState *node,
+                                                  int eflags);
+static void system_rows_beginsamplescan(SampleScanState *node,
+                                                       Datum *params,
+                                                       int nparams,
+                                                       uint32 seed);
+static BlockNumber system_rows_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_rows_nextsampletuple(SampleScanState *node,
+                                                       BlockNumber blockno,
+                                                       OffsetNumber maxoffset);
+static bool SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan);
  static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
  
+
  /*
- * Initializes the state.
+ * Create a TsmRoutine descriptor for the SYSTEM_ROWS method.
   */
  Datum
-tsm_system_rows_init(PG_FUNCTION_ARGS)
+tsm_system_rows_handler(PG_FUNCTION_ARGS)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       uint32          seed = PG_GETARG_UINT32(1);
-       int32           ntuples = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
-       HeapScanDesc scan = tsdesc->heapScan;
-       SystemSamplerData *sampler;
+       TsmRoutine *tsm = makeNode(TsmRoutine);
  
-       if (ntuples < 1)
-               ereport(ERROR,
-                               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                                errmsg("invalid sample size"),
-                                errhint("Sample size must be positive integer value.")));
+       tsm->parameterTypes = list_make1_oid(INT8OID);
  
-       sampler = palloc0(sizeof(SystemSamplerData));
+       /* See notes at head of file */
+       tsm->repeatable_across_queries = false;
+       tsm->repeatable_across_scans = true;
  
-       /* Remember initial values for reinit */
-       sampler->seed = seed;
-       sampler->nblocks = scan->rs_nblocks;
-       sampler->ntuples = ntuples;
-       sampler->donetuples = 0;
-       sampler->lt = InvalidOffsetNumber;
-       sampler->doneblocks = 0;
-
-       sampler_random_init_state(sampler->seed, sampler->randstate);
-
-       /* Find relative prime as step size for linear probing. */
-       sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-
-       /*
-        * Randomize start position so that blocks close to step size don't have
-        * higher probability of being chosen on very short scan.
-        */
-       sampler->lb = sampler_random_fract(sampler->randstate) *
-               (sampler->nblocks / sampler->step);
+       tsm->SampleScanGetSampleSize = system_rows_samplescangetsamplesize;
+       tsm->InitSampleScan = system_rows_initsamplescan;
+       tsm->BeginSampleScan = system_rows_beginsamplescan;
+       tsm->NextSampleBlock = system_rows_nextsampleblock;
+       tsm->NextSampleTuple = system_rows_nextsampletuple;
+       tsm->EndSampleScan = NULL;
  
-       tsdesc->tsmdata = (void *) sampler;
-
-       PG_RETURN_VOID();
+       PG_RETURN_POINTER(tsm);
  }
  
  /*
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses linear probing algorithm for picking next block.
+ * Sample size estimation.
   */
-Datum
-tsm_system_rows_nextblock(PG_FUNCTION_ARGS)
+static void
+system_rows_samplescangetsamplesize(PlannerInfo *root,
+                                                                       RelOptInfo *baserel,
+                                                                       List *paramexprs,
+                                                                       BlockNumber *pages,
+                                                                       double *tuples)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+       Node       *limitnode;
+       int64           ntuples;
+       double          npages;
  
-       sampler->lb = (sampler->lb + sampler->step) % sampler->nblocks;
-       sampler->doneblocks++;
+       /* Try to extract an estimate for the limit rowcount */
+       limitnode = (Node *) linitial(paramexprs);
+       limitnode = estimate_expression_value(root, limitnode);
  
-       /* All blocks have been read, we're done */
-       if (sampler->doneblocks > sampler->nblocks ||
-               sampler->donetuples >= sampler->ntuples)
-               PG_RETURN_UINT32(InvalidBlockNumber);
+       if (IsA(limitnode, Const) &&
+               !((Const *) limitnode)->constisnull)
+       {
+               ntuples = DatumGetInt64(((Const *) limitnode)->constvalue);
+               if (ntuples < 0)
+               {
+                       /* Default ntuples if the value is bogus */
+                       ntuples = 1000;
+               }
+       }
+       else
+       {
+               /* Default ntuples if we didn't obtain a non-null Const */
+               ntuples = 1000;
+       }
  
-       PG_RETURN_UINT32(sampler->lb);
-}
+       /* Clamp to the estimated relation size */
+       if (ntuples > baserel->tuples)
+               ntuples = (int64) baserel->tuples;
+       ntuples = clamp_row_est(ntuples);
  
-/*
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
- */
-Datum
-tsm_system_rows_nexttuple(PG_FUNCTION_ARGS)
-{
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-       SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-       OffsetNumber tupoffset = sampler->lt;
+       if (baserel->tuples > 0 && baserel->pages > 0)
+       {
+               /* Estimate number of pages visited based on tuple density */
+               double          density = baserel->tuples / (double) baserel->pages;
  
-       if (tupoffset == InvalidOffsetNumber)
-               tupoffset = FirstOffsetNumber;
+               npages = ntuples / density;
+       }
         else
-               tupoffset++;
-
-       if (tupoffset > maxoffset ||
-               sampler->donetuples >= sampler->ntuples)
-               tupoffset = InvalidOffsetNumber;
+       {
+               /* For lack of data, assume one tuple per page */
+               npages = ntuples;
+       }
  
-       sampler->lt = tupoffset;
+       /* Clamp to sane value */
+       npages = clamp_row_est(Min((double) baserel->pages, npages));
  
-       PG_RETURN_UINT16(tupoffset);
+       *pages = npages;
+       *tuples = ntuples;
  }
  
  /*
- * Examine tuple and decide if it should be returned.
+ * Initialize during executor setup.
   */
-Datum
-tsm_system_rows_examinetuple(PG_FUNCTION_ARGS)
+static void
+system_rows_initsamplescan(SampleScanState *node, int eflags)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       bool            visible = PG_GETARG_BOOL(3);
-       SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-
-       if (!visible)
-               PG_RETURN_BOOL(false);
-
-       sampler->donetuples++;
-
-       PG_RETURN_BOOL(true);
+       node->tsm_state = palloc0(sizeof(SystemRowsSamplerData));
+       /* Note the above leaves tsm_state->step equal to zero */
  }
  
  /*
- * Cleanup method.
+ * Examine parameters and prepare for a sample scan.
   */
-Datum
-tsm_system_rows_end(PG_FUNCTION_ARGS)
+static void
+system_rows_beginsamplescan(SampleScanState *node,
+                                                       Datum *params,
+                                                       int nparams,
+                                                       uint32 seed)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+       SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+       int64           ntuples = DatumGetInt64(params[0]);
+
+       if (ntuples < 0)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                                errmsg("sample size must not be negative")));
  
-       pfree(tsdesc->tsmdata);
+       sampler->seed = seed;
+       sampler->ntuples = ntuples;
+       sampler->donetuples = 0;
+       sampler->lt = InvalidOffsetNumber;
+       sampler->doneblocks = 0;
+       /* lb will be initialized during first NextSampleBlock call */
+       /* we intentionally do not change nblocks/firstblock/step here */
  
-       PG_RETURN_VOID();
+       /*
+        * We *must* use pagemode visibility checking in this module, so force
+        * that even though it's currently default.
+        */
+       node->use_pagemode = true;
  }
  
  /*
- * Reset state (called by ReScan).
+ * Select next block to sample.
+ *
+ * Uses linear probing algorithm for picking next block.
   */
-Datum
-tsm_system_rows_reset(PG_FUNCTION_ARGS)
+static BlockNumber
+system_rows_nextsampleblock(SampleScanState *node)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+       SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+       HeapScanDesc scan = node->ss.ss_currentScanDesc;
  
-       sampler->lt = InvalidOffsetNumber;
-       sampler->donetuples = 0;
-       sampler->doneblocks = 0;
+       /* First call within scan? */
+       if (sampler->doneblocks == 0)
+       {
+               /* First scan within query? */
+               if (sampler->step == 0)
+               {
+                       /* Initialize now that we have scan descriptor */
+                       SamplerRandomState randstate;
+
+                       /* If relation is empty, there's nothing to scan */
+                       if (scan->rs_nblocks == 0)
+                               return InvalidBlockNumber;
+
+                       /* We only need an RNG during this setup step */
+                       sampler_random_init_state(sampler->seed, randstate);
+
+                       /* Compute nblocks/firstblock/step only once per query */
+                       sampler->nblocks = scan->rs_nblocks;
  
-       sampler_random_init_state(sampler->seed, sampler->randstate);
-       sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-       sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
+                       /* Choose random starting block within the relation */
+                       /* (Actually this is the predecessor of the first block visited) */
+                       sampler->firstblock = sampler_random_fract(randstate) *
+                               sampler->nblocks;
+
+                       /* Find relative prime as step size for linear probing */
+                       sampler->step = random_relative_prime(sampler->nblocks, randstate);
+               }
+
+               /* Reinitialize lb */
+               sampler->lb = sampler->firstblock;
+       }
+
+       /* If we've read all blocks or returned all needed tuples, we're done */
+       if (++sampler->doneblocks > sampler->nblocks ||
+               sampler->donetuples >= sampler->ntuples)
+               return InvalidBlockNumber;
+
+       /*
+        * It's probably impossible for scan->rs_nblocks to decrease between scans
+        * within a query; but just in case, loop until we select a block number
+        * less than scan->rs_nblocks.  We don't care if scan->rs_nblocks has
+        * increased since the first scan.
+        */
+       do
+       {
+               /* Advance lb, using uint64 arithmetic to forestall overflow */
+               sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
+       } while (sampler->lb >= scan->rs_nblocks);
  
-       PG_RETURN_VOID();
+       return sampler->lb;
  }
  
  /*
- * Costing function.
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
   */
-Datum
-tsm_system_rows_cost(PG_FUNCTION_ARGS)
+static OffsetNumber
+system_rows_nextsampletuple(SampleScanState *node,
+                                                       BlockNumber blockno,
+                                                       OffsetNumber maxoffset)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       Path       *path = (Path *) PG_GETARG_POINTER(1);
-       RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-       List       *args = (List *) PG_GETARG_POINTER(3);
-       BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-       double     *tuples = (double *) PG_GETARG_POINTER(5);
-       Node       *limitnode;
-       int32           ntuples;
+       SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+       HeapScanDesc scan = node->ss.ss_currentScanDesc;
+       OffsetNumber tupoffset = sampler->lt;
  
-       limitnode = linitial(args);
-       limitnode = estimate_expression_value(root, limitnode);
+       /* Quit if we've returned all needed tuples */
+       if (sampler->donetuples >= sampler->ntuples)
+               return InvalidOffsetNumber;
  
-       if (IsA(limitnode, RelabelType))
-               limitnode = (Node *) ((RelabelType *) limitnode)->arg;
+       /*
+        * Because we should only count visible tuples as being returned, we need
+        * to search for a visible tuple rather than just let the core code do it.
+        */
  
-       if (IsA(limitnode, Const))
-               ntuples = DatumGetInt32(((Const *) limitnode)->constvalue);
-       else
+       /* We rely on the data accumulated in pagemode access */
+       Assert(scan->rs_pageatatime);
+       for (;;)
         {
-               /* Default ntuples if the estimation didn't return Const. */
-               ntuples = 1000;
+               /* Advance to next possible offset on page */
+               if (tupoffset == InvalidOffsetNumber)
+                       tupoffset = FirstOffsetNumber;
+               else
+                       tupoffset++;
+
+               /* Done? */
+               if (tupoffset > maxoffset)
+               {
+                       tupoffset = InvalidOffsetNumber;
+                       break;
+               }
+
+               /* Found a candidate? */
+               if (SampleOffsetVisible(tupoffset, scan))
+               {
+                       sampler->donetuples++;
+                       break;
+               }
         }
  
-       *pages = Min(baserel->pages, ntuples);
-       *tuples = ntuples;
-       path->rows = *tuples;
+       sampler->lt = tupoffset;
  
-       PG_RETURN_VOID();
+       return tupoffset;
  }
  
+/*
+ * Check if tuple offset is visible
+ *
+ * In pageatatime mode, heapgetpage() already did visibility checks,
+ * so just look at the info it left in rs_vistuples[].
+ */
+static bool
+SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan)
+{
+       int                     start = 0,
+                               end = scan->rs_ntuples - 1;
+
+       while (start <= end)
+       {
+               int                     mid = (start + end) / 2;
+               OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+               if (tupoffset == curoffset)
+                       return true;
+               else if (tupoffset < curoffset)
+                       end = mid - 1;
+               else
+                       start = mid + 1;
+       }
+
+       return false;
+}
  
+/*
+ * Compute greatest common divisor of two uint32's.
+ */
  static uint32
  gcd(uint32 a, uint32 b)
  {
@@ -250,22 +366,29 @@ gcd(uint32 a, uint32 b)
         return b;
  }
  
+/*
+ * Pick a random value less than and relatively prime to n, if possible
+ * (else return 1).
+ */
  static uint32
  random_relative_prime(uint32 n, SamplerRandomState randstate)
  {
-       /* Pick random starting number, with some limits on what it can be. */
-       uint32          r = (uint32) sampler_random_fract(randstate) * n / 2 + n / 4,
-                               t;
+       uint32          r;
+
+       /* Safety check to avoid infinite loop or zero result for small n. */
+       if (n <= 1)
+               return 1;
  
         /*
          * This should only take 2 or 3 iterations as the probability of 2 numbers
-        * being relatively prime is ~61%.
+        * being relatively prime is ~61%; but just in case, we'll include a
+        * CHECK_FOR_INTERRUPTS in the loop.
          */
-       while ((t = gcd(r, n)) > 1)
+       do
         {
                 CHECK_FOR_INTERRUPTS();
-               r /= t;
-       }
+               r = (uint32) (sampler_random_fract(randstate) * n);
+       } while (r == 0 || gcd(r, n) > 1);
  
         return r;
  }
diff --git a/contrib/tsm_system_rows/tsm_system_rows.control b/contrib/tsm_system_rows/tsm_system_rows.control

index 84ea7adb49a261247b936ba4c884b2edb165529a..4bd0232f97215933516bf5ea801a49f43f8c0d6c 100644 (file)
--- a/contrib/tsm_system_rows/tsm_system_rows.control
+++ b/contrib/tsm_system_rows/tsm_system_rows.control
@@ -1,5 +1,5 @@
  # tsm_system_rows extension
-comment = 'SYSTEM TABLESAMPLE method which accepts number rows as a limit'
+comment = 'TABLESAMPLE method which accepts number of rows as a limit'
  default_version = '1.0'
  module_pathname = '$libdir/tsm_system_rows'
  relocatable = true
diff --git a/contrib/tsm_system_time/Makefile b/contrib/tsm_system_time/Makefile

index c42c1c6bb61f22f1e01925c02e41ecba53f184e6..168becf54e2ff225a583437d87d590602b6677e4 100644 (file)
--- a/contrib/tsm_system_time/Makefile
+++ b/contrib/tsm_system_time/Makefile
@@ -1,8 +1,8 @@
-# src/test/modules/tsm_system_time/Makefile
+# contrib/tsm_system_time/Makefile
  
  MODULE_big = tsm_system_time
  OBJS = tsm_system_time.o $(WIN32RES)
-PGFILEDESC = "tsm_system_time - SYSTEM TABLESAMPLE method which accepts number rows of as a limit"
+PGFILEDESC = "tsm_system_time - TABLESAMPLE method which accepts time in milliseconds as a limit"
  
  EXTENSION = tsm_system_time
  DATA = tsm_system_time--1.0.sql
diff --git a/contrib/tsm_system_time/expected/tsm_system_time.out b/contrib/tsm_system_time/expected/tsm_system_time.out

index 32ad03c4bdcef47e9bfce5ed7ab2060b528e77bc..ac44f30be90386407273b213239aa234a5dd0bd1 100644 (file)
--- a/contrib/tsm_system_time/expected/tsm_system_time.out
+++ b/contrib/tsm_system_time/expected/tsm_system_time.out
@@ -1,54 +1,100 @@
  CREATE EXTENSION tsm_system_time;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
  ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
+-- It's a bit tricky to test SYSTEM_TIME in a platform-independent way.
+-- We can test the zero-time corner case ...
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (0);
   count 
  -------
-    31
+     0
  (1 row)
  
-SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
- id 
-----
-  7
- 14
- 21
- 28
-  4
- 11
- 18
- 25
-  1
-  8
- 15
- 22
- 29
-  5
- 12
- 19
- 26
-  2
-  9
- 16
- 23
- 30
-  6
- 13
- 20
- 27
-  3
- 10
- 17
- 24
-  0
-(31 rows)
-
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
-                                     QUERY PLAN                                     
-------------------------------------------------------------------------------------
- Sample Scan (system_time) on test_tablesample  (cost=0.00..100.25 rows=25 width=4)
+-- ... and we assume that this will finish before running out of time:
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (100000);
+ count 
+-------
+    31
  (1 row)
  
--- done
-DROP TABLE test_tablesample CASCADE;
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+                    QUERY PLAN                    
+--------------------------------------------------
+ Sample Scan on test_tablesample
+   Sampling: system_time ('-1'::double precision)
+(2 rows)
+
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+ERROR:  sample collection time must not be negative
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_time (10) REPEATABLE (0);
+ERROR:  tablesample method system_time does not support REPEATABLE
+LINE 1: SELECT * FROM test_tablesample TABLESAMPLE system_time (10) ...
+                                                   ^
+-- since it's not repeatable, we expect a Materialize node in these plans:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Nested Loop
+   ->  Aggregate
+         ->  Materialize
+               ->  Sample Scan on test_tablesample
+                     Sampling: system_time ('100000'::double precision)
+   ->  Values Scan on "*VALUES*"
+(6 rows)
+
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+  time  | count 
+--------+-------
+      0 |    31
+ 100000 |    31
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Nested Loop
+   ->  Values Scan on "*VALUES*"
+   ->  Aggregate
+         ->  Materialize
+               ->  Sample Scan on test_tablesample
+                     Sampling: system_time ("*VALUES*".column1)
+(6 rows)
+
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+  time  | count 
+--------+-------
+      0 |     0
+ 100000 |    31
+(2 rows)
+
+CREATE VIEW vv AS
+  SELECT * FROM test_tablesample TABLESAMPLE system_time (20);
+EXPLAIN (COSTS OFF) SELECT * FROM vv;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Sample Scan on test_tablesample
+   Sampling: system_time ('20'::double precision)
+(2 rows)
+
+DROP EXTENSION tsm_system_time;  -- fail, view depends on extension
+ERROR:  cannot drop extension tsm_system_time because other objects depend on it
+DETAIL:  view vv depends on function system_time(internal)
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
diff --git a/contrib/tsm_system_time/sql/tsm_system_time.sql b/contrib/tsm_system_time/sql/tsm_system_time.sql

index 68dbbf98afd2df5f66e0aa5258c4e00166613d33..117de163d85059f362cda878ef94f5f6b43aa65e 100644 (file)
--- a/contrib/tsm_system_time/sql/tsm_system_time.sql
+++ b/contrib/tsm_system_time/sql/tsm_system_time.sql
@@ -1,14 +1,51 @@
  CREATE EXTENSION tsm_system_time;
  
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
  ANALYZE test_tablesample;
  
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
-SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
+-- It's a bit tricky to test SYSTEM_TIME in a platform-independent way.
+-- We can test the zero-time corner case ...
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (0);
+-- ... and we assume that this will finish before running out of time:
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (100000);
+
+-- bad parameters should get through planning, but not execution:
+EXPLAIN (COSTS OFF)
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_time (10) REPEATABLE (0);
+
+-- since it's not repeatable, we expect a Materialize node in these plans:
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+
+SELECT * FROM
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+
+CREATE VIEW vv AS
+  SELECT * FROM test_tablesample TABLESAMPLE system_time (20);
  
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
+EXPLAIN (COSTS OFF) SELECT * FROM vv;
  
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_time;  -- fail, view depends on extension
diff --git a/contrib/tsm_system_time/tsm_system_time--1.0.sql b/contrib/tsm_system_time/tsm_system_time--1.0.sql

index 1f390d6ed7acac601567e67a42fdbc2220802ac8..c59d2e84efdabfa9e9163c036eb702e8ad981d26 100644 (file)
--- a/contrib/tsm_system_time/tsm_system_time--1.0.sql
+++ b/contrib/tsm_system_time/tsm_system_time--1.0.sql
@@ -1,39 +1,9 @@
-/* src/test/modules/tablesample/tsm_system_time--1.0.sql */
+/* contrib/tsm_system_time/tsm_system_time--1.0.sql */
  
  -- complain if script is sourced in psql, rather than via CREATE EXTENSION
  \echo Use "CREATE EXTENSION tsm_system_time" to load this file. \quit
  
-CREATE FUNCTION tsm_system_time_init(internal, int4, int4)
-RETURNS void
-AS 'MODULE_PATHNAME'
+CREATE FUNCTION system_time(internal)
+RETURNS tsm_handler
+AS 'MODULE_PATHNAME', 'tsm_system_time_handler'
  LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_nextblock(internal)
-RETURNS int4
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_nexttuple(internal, int4, int2)
-RETURNS int2
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_end(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_reset(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-CREATE FUNCTION tsm_system_time_cost(internal, internal, internal, internal, internal, internal, internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE C STRICT;
-
-INSERT INTO pg_tablesample_method VALUES('system_time', false, true,
-       'tsm_system_time_init', 'tsm_system_time_nextblock',
-       'tsm_system_time_nexttuple', '-', 'tsm_system_time_end',
-       'tsm_system_time_reset', 'tsm_system_time_cost');
diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c

index 7708fc07617488e9a57128a72eba9707004dc9f3..83f1455c5fa248b3028e095acd8ceedd6ae4c9e1 100644 (file)
--- a/contrib/tsm_system_time/tsm_system_time.c
+++ b/contrib/tsm_system_time/tsm_system_time.c
@@ -1,286 +1,320 @@
  /*-------------------------------------------------------------------------
   *
   * tsm_system_time.c
- *       interface routines for system_time tablesample method
+ *       support routines for SYSTEM_TIME tablesample method
   *
+ * The desire here is to produce a random sample with as many rows as possible
+ * in no more than the specified amount of time.  We use a block-sampling
+ * approach.  To ensure that the whole relation will be visited if necessary,
+ * we start at a randomly chosen block and then advance with a stride that
+ * is randomly chosen but is relatively prime to the relation's nblocks.
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Because of the time dependence, this method is necessarily unrepeatable.
+ * However, we do what we can to reduce surprising behavior by selecting
+ * the sampling pattern just once per query, much as in tsm_system_rows.
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       contrib/tsm_system_time_rowlimit/tsm_system_time.c
+ *       contrib/tsm_system_time/tsm_system_time.c
   *
   *-------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h>                             /* for _isnan */
+#endif
+#include <math.h>
  
-#include "access/tablesample.h"
  #include "access/relscan.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
  #include "miscadmin.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
  #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
+#include "optimizer/cost.h"
  #include "utils/sampling.h"
  #include "utils/spccache.h"
-#include "utils/timestamp.h"
  
  PG_MODULE_MAGIC;
  
-/*
- * State
- */
+PG_FUNCTION_INFO_V1(tsm_system_time_handler);
+
+
+/* Private state */
  typedef struct
  {
-       SamplerRandomState randstate;
         uint32          seed;                   /* random seed */
-       BlockNumber nblocks;            /* number of block in relation */
-       int32           time;                   /* time limit for sampling */
-       TimestampTz start_time;         /* start time of sampling */
-       TimestampTz end_time;           /* end time of sampling */
+       double          millis;                 /* time limit for sampling */
+       instr_time      start_time;             /* scan start time */
         OffsetNumber lt;                        /* last tuple returned from current block */
-       BlockNumber step;                       /* step size */
+       BlockNumber doneblocks;         /* number of already-scanned blocks */
         BlockNumber lb;                         /* last block visited */
-       BlockNumber estblocks;          /* estimated number of returned blocks
-                                                                * (moving) */
-       BlockNumber doneblocks;         /* number of already returned blocks */
-} SystemSamplerData;
-
-
-PG_FUNCTION_INFO_V1(tsm_system_time_init);
-PG_FUNCTION_INFO_V1(tsm_system_time_nextblock);
-PG_FUNCTION_INFO_V1(tsm_system_time_nexttuple);
-PG_FUNCTION_INFO_V1(tsm_system_time_end);
-PG_FUNCTION_INFO_V1(tsm_system_time_reset);
-PG_FUNCTION_INFO_V1(tsm_system_time_cost);
-
+       /* these three values are not changed during a rescan: */
+       BlockNumber nblocks;            /* number of blocks in relation */
+       BlockNumber firstblock;         /* first block to sample from */
+       BlockNumber step;                       /* step size, or 0 if not set yet */
+} SystemTimeSamplerData;
+
+static void system_time_samplescangetsamplesize(PlannerInfo *root,
+                                                                       RelOptInfo *baserel,
+                                                                       List *paramexprs,
+                                                                       BlockNumber *pages,
+                                                                       double *tuples);
+static void system_time_initsamplescan(SampleScanState *node,
+                                                  int eflags);
+static void system_time_beginsamplescan(SampleScanState *node,
+                                                       Datum *params,
+                                                       int nparams,
+                                                       uint32 seed);
+static BlockNumber system_time_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_time_nextsampletuple(SampleScanState *node,
+                                                       BlockNumber blockno,
+                                                       OffsetNumber maxoffset);
  static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
  
+
  /*
- * Initializes the state.
+ * Create a TsmRoutine descriptor for the SYSTEM_TIME method.
   */
  Datum
-tsm_system_time_init(PG_FUNCTION_ARGS)
+tsm_system_time_handler(PG_FUNCTION_ARGS)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       uint32          seed = PG_GETARG_UINT32(1);
-       int32           time = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
-       HeapScanDesc scan = tsdesc->heapScan;
-       SystemSamplerData *sampler;
-
-       if (time < 1)
-               ereport(ERROR,
-                               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                                errmsg("invalid time limit"),
-                                errhint("Time limit must be positive integer value.")));
+       TsmRoutine *tsm = makeNode(TsmRoutine);
  
-       sampler = palloc0(sizeof(SystemSamplerData));
+       tsm->parameterTypes = list_make1_oid(FLOAT8OID);
  
-       /* Remember initial values for reinit */
-       sampler->seed = seed;
-       sampler->nblocks = scan->rs_nblocks;
-       sampler->lt = InvalidOffsetNumber;
-       sampler->estblocks = 2;
-       sampler->doneblocks = 0;
-       sampler->time = time;
-       sampler->start_time = GetCurrentTimestamp();
-       sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time,
-                                                                                                       sampler->time);
+       /* See notes at head of file */
+       tsm->repeatable_across_queries = false;
+       tsm->repeatable_across_scans = false;
  
-       sampler_random_init_state(sampler->seed, sampler->randstate);
+       tsm->SampleScanGetSampleSize = system_time_samplescangetsamplesize;
+       tsm->InitSampleScan = system_time_initsamplescan;
+       tsm->BeginSampleScan = system_time_beginsamplescan;
+       tsm->NextSampleBlock = system_time_nextsampleblock;
+       tsm->NextSampleTuple = system_time_nextsampletuple;
+       tsm->EndSampleScan = NULL;
  
-       /* Find relative prime as step size for linear probing. */
-       sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-
-       /*
-        * Randomize start position so that blocks close to step size don't have
-        * higher probability of being chosen on very short scan.
-        */
-       sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
-
-       tsdesc->tsmdata = (void *) sampler;
-
-       PG_RETURN_VOID();
+       PG_RETURN_POINTER(tsm);
  }
  
  /*
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses linear probing algorithm for picking next block.
+ * Sample size estimation.
   */
-Datum
-tsm_system_time_nextblock(PG_FUNCTION_ARGS)
+static void
+system_time_samplescangetsamplesize(PlannerInfo *root,
+                                                                       RelOptInfo *baserel,
+                                                                       List *paramexprs,
+                                                                       BlockNumber *pages,
+                                                                       double *tuples)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-
-       sampler->lb = (sampler->lb + sampler->step) % sampler->nblocks;
-       sampler->doneblocks++;
+       Node       *limitnode;
+       double          millis;
+       double          spc_random_page_cost;
+       double          npages;
+       double          ntuples;
  
-       /* All blocks have been read, we're done */
-       if (sampler->doneblocks > sampler->nblocks)
-               PG_RETURN_UINT32(InvalidBlockNumber);
+       /* Try to extract an estimate for the limit time spec */
+       limitnode = (Node *) linitial(paramexprs);
+       limitnode = estimate_expression_value(root, limitnode);
  
-       /*
-        * Update the estimations for time limit at least 10 times per estimated
-        * number of returned blocks to handle variations in block read speed.
-        */
-       if (sampler->doneblocks % Max(sampler->estblocks / 10, 1) == 0)
+       if (IsA(limitnode, Const) &&
+               !((Const *) limitnode)->constisnull)
+       {
+               millis = DatumGetFloat8(((Const *) limitnode)->constvalue);
+               if (millis < 0 || isnan(millis))
+               {
+                       /* Default millis if the value is bogus */
+                       millis = 1000;
+               }
+       }
+       else
         {
-               TimestampTz now = GetCurrentTimestamp();
-               long            secs;
-               int                     usecs;
-               int                     usecs_remaining;
-               int                     time_per_block;
+               /* Default millis if we didn't obtain a non-null Const */
+               millis = 1000;
+       }
  
-               TimestampDifference(sampler->start_time, now, &secs, &usecs);
-               usecs += (int) secs *1000000;
+       /* Get the planner's idea of cost per page read */
+       get_tablespace_page_costs(baserel->reltablespace,
+                                                         &spc_random_page_cost,
+                                                         NULL);
  
-               time_per_block = usecs / sampler->doneblocks;
+       /*
+        * Estimate the number of pages we can read by assuming that the cost
+        * figure is expressed in milliseconds.  This is completely, unmistakably
+        * bogus, but we have to do something to produce an estimate and there's
+        * no better answer.
+        */
+       if (spc_random_page_cost > 0)
+               npages = millis / spc_random_page_cost;
+       else
+               npages = millis;                /* even more bogus, but whatcha gonna do? */
  
-               /* No time left, end. */
-               TimestampDifference(now, sampler->end_time, &secs, &usecs);
-               if (secs <= 0 && usecs <= 0)
-                       PG_RETURN_UINT32(InvalidBlockNumber);
+       /* Clamp to sane value */
+       npages = clamp_row_est(Min((double) baserel->pages, npages));
  
-               /* Remaining microseconds */
-               usecs_remaining = usecs + (int) secs *1000000;
+       if (baserel->tuples > 0 && baserel->pages > 0)
+       {
+               /* Estimate number of tuples returned based on tuple density */
+               double          density = baserel->tuples / (double) baserel->pages;
  
-               /* Recalculate estimated returned number of blocks */
-               if (time_per_block < usecs_remaining && time_per_block > 0)
-                       sampler->estblocks = sampler->time * time_per_block;
+               ntuples = npages * density;
         }
-
-       PG_RETURN_UINT32(sampler->lb);
-}
-
-/*
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
- */
-Datum
-tsm_system_time_nexttuple(PG_FUNCTION_ARGS)
-{
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-       SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-       OffsetNumber tupoffset = sampler->lt;
-
-       if (tupoffset == InvalidOffsetNumber)
-               tupoffset = FirstOffsetNumber;
         else
-               tupoffset++;
-
-       if (tupoffset > maxoffset)
-               tupoffset = InvalidOffsetNumber;
+       {
+               /* For lack of data, assume one tuple per page */
+               ntuples = npages;
+       }
  
-       sampler->lt = tupoffset;
+       /* Clamp to the estimated relation size */
+       ntuples = clamp_row_est(Min(baserel->tuples, ntuples));
  
-       PG_RETURN_UINT16(tupoffset);
+       *pages = npages;
+       *tuples = ntuples;
  }
  
  /*
- * Cleanup method.
+ * Initialize during executor setup.
   */
-Datum
-tsm_system_time_end(PG_FUNCTION_ARGS)
+static void
+system_time_initsamplescan(SampleScanState *node, int eflags)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-
-       pfree(tsdesc->tsmdata);
-
-       PG_RETURN_VOID();
+       node->tsm_state = palloc0(sizeof(SystemTimeSamplerData));
+       /* Note the above leaves tsm_state->step equal to zero */
  }
  
  /*
- * Reset state (called by ReScan).
+ * Examine parameters and prepare for a sample scan.
   */
-Datum
-tsm_system_time_reset(PG_FUNCTION_ARGS)
+static void
+system_time_beginsamplescan(SampleScanState *node,
+                                                       Datum *params,
+                                                       int nparams,
+                                                       uint32 seed)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+       SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+       double          millis = DatumGetFloat8(params[0]);
+
+       if (millis < 0 || isnan(millis))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                                errmsg("sample collection time must not be negative")));
  
+       sampler->seed = seed;
+       sampler->millis = millis;
         sampler->lt = InvalidOffsetNumber;
-       sampler->start_time = GetCurrentTimestamp();
-       sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time,
-                                                                                                       sampler->time);
-       sampler->estblocks = 2;
         sampler->doneblocks = 0;
-
-       sampler_random_init_state(sampler->seed, sampler->randstate);
-       sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-       sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
-
-       PG_RETURN_VOID();
+       /* start_time, lb will be initialized during first NextSampleBlock call */
+       /* we intentionally do not change nblocks/firstblock/step here */
  }
  
  /*
- * Costing function.
+ * Select next block to sample.
+ *
+ * Uses linear probing algorithm for picking next block.
   */
-Datum
-tsm_system_time_cost(PG_FUNCTION_ARGS)
+static BlockNumber
+system_time_nextsampleblock(SampleScanState *node)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       Path       *path = (Path *) PG_GETARG_POINTER(1);
-       RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-       List       *args = (List *) PG_GETARG_POINTER(3);
-       BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-       double     *tuples = (double *) PG_GETARG_POINTER(5);
-       Node       *limitnode;
-       int32           time;
-       BlockNumber relpages;
-       double          reltuples;
-       double          density;
-       double          spc_random_page_cost;
-
-       limitnode = linitial(args);
-       limitnode = estimate_expression_value(root, limitnode);
-
-       if (IsA(limitnode, RelabelType))
-               limitnode = (Node *) ((RelabelType *) limitnode)->arg;
+       SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+       HeapScanDesc scan = node->ss.ss_currentScanDesc;
+       instr_time      cur_time;
  
-       if (IsA(limitnode, Const))
-               time = DatumGetInt32(((Const *) limitnode)->constvalue);
-       else
+       /* First call within scan? */
+       if (sampler->doneblocks == 0)
         {
-               /* Default time (1s) if the estimation didn't return Const. */
-               time = 1000;
+               /* First scan within query? */
+               if (sampler->step == 0)
+               {
+                       /* Initialize now that we have scan descriptor */
+                       SamplerRandomState randstate;
+
+                       /* If relation is empty, there's nothing to scan */
+                       if (scan->rs_nblocks == 0)
+                               return InvalidBlockNumber;
+
+                       /* We only need an RNG during this setup step */
+                       sampler_random_init_state(sampler->seed, randstate);
+
+                       /* Compute nblocks/firstblock/step only once per query */
+                       sampler->nblocks = scan->rs_nblocks;
+
+                       /* Choose random starting block within the relation */
+                       /* (Actually this is the predecessor of the first block visited) */
+                       sampler->firstblock = sampler_random_fract(randstate) *
+                               sampler->nblocks;
+
+                       /* Find relative prime as step size for linear probing */
+                       sampler->step = random_relative_prime(sampler->nblocks, randstate);
+               }
+
+               /* Reinitialize lb and start_time */
+               sampler->lb = sampler->firstblock;
+               INSTR_TIME_SET_CURRENT(sampler->start_time);
         }
  
-       relpages = baserel->pages;
-       reltuples = baserel->tuples;
+       /* If we've read all blocks in relation, we're done */
+       if (++sampler->doneblocks > sampler->nblocks)
+               return InvalidBlockNumber;
  
-       /* estimate the tuple density */
-       if (relpages > 0)
-               density = reltuples / (double) relpages;
-       else
-               density = (BLCKSZ - SizeOfPageHeaderData) / baserel->width;
+       /* If we've used up all the allotted time, we're done */
+       INSTR_TIME_SET_CURRENT(cur_time);
+       INSTR_TIME_SUBTRACT(cur_time, sampler->start_time);
+       if (INSTR_TIME_GET_MILLISEC(cur_time) >= sampler->millis)
+               return InvalidBlockNumber;
  
         /*
-        * We equal random page cost value to number of ms it takes to read the
-        * random page here which is far from accurate but we don't have anything
-        * better to base our predicted page reads.
+        * It's probably impossible for scan->rs_nblocks to decrease between scans
+        * within a query; but just in case, loop until we select a block number
+        * less than scan->rs_nblocks.  We don't care if scan->rs_nblocks has
+        * increased since the first scan.
          */
-       get_tablespace_page_costs(baserel->reltablespace,
-                                                         &spc_random_page_cost,
-                                                         NULL);
+       do
+       {
+               /* Advance lb, using uint64 arithmetic to forestall overflow */
+               sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
+       } while (sampler->lb >= scan->rs_nblocks);
  
-       /*
-        * Assumption here is that we'll never read less than 1% of table pages,
-        * this is here mainly because it is much less bad to overestimate than
-        * underestimate and using just spc_random_page_cost will probably lead to
-        * underestimations in general.
-        */
-       *pages = Min(baserel->pages, Max(time / spc_random_page_cost, baserel->pages / 100));
-       *tuples = rint(density * (double) *pages * path->rows / baserel->tuples);
-       path->rows = *tuples;
+       return sampler->lb;
+}
+
+/*
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+static OffsetNumber
+system_time_nextsampletuple(SampleScanState *node,
+                                                       BlockNumber blockno,
+                                                       OffsetNumber maxoffset)
+{
+       SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+       OffsetNumber tupoffset = sampler->lt;
+
+       /* Advance to next possible offset on page */
+       if (tupoffset == InvalidOffsetNumber)
+               tupoffset = FirstOffsetNumber;
+       else
+               tupoffset++;
+
+       /* Done? */
+       if (tupoffset > maxoffset)
+               tupoffset = InvalidOffsetNumber;
+
+       sampler->lt = tupoffset;
  
-       PG_RETURN_VOID();
+       return tupoffset;
  }
  
+/*
+ * Compute greatest common divisor of two uint32's.
+ */
  static uint32
  gcd(uint32 a, uint32 b)
  {
@@ -296,22 +330,29 @@ gcd(uint32 a, uint32 b)
         return b;
  }
  
+/*
+ * Pick a random value less than and relatively prime to n, if possible
+ * (else return 1).
+ */
  static uint32
  random_relative_prime(uint32 n, SamplerRandomState randstate)
  {
-       /* Pick random starting number, with some limits on what it can be. */
-       uint32          r = (uint32) sampler_random_fract(randstate) * n / 2 + n / 4,
-                               t;
+       uint32          r;
+
+       /* Safety check to avoid infinite loop or zero result for small n. */
+       if (n <= 1)
+               return 1;
  
         /*
          * This should only take 2 or 3 iterations as the probability of 2 numbers
-        * being relatively prime is ~61%.
+        * being relatively prime is ~61%; but just in case, we'll include a
+        * CHECK_FOR_INTERRUPTS in the loop.
          */
-       while ((t = gcd(r, n)) > 1)
+       do
         {
                 CHECK_FOR_INTERRUPTS();
-               r /= t;
-       }
+               r = (uint32) (sampler_random_fract(randstate) * n);
+       } while (r == 0 || gcd(r, n) > 1);
  
         return r;
  }
diff --git a/contrib/tsm_system_time/tsm_system_time.control b/contrib/tsm_system_time/tsm_system_time.control

index ebcee19d23a0db519f1597335e80588d7a56b59d..c247987c66d14b9a2cb75fb07bbe623366d4b458 100644 (file)
--- a/contrib/tsm_system_time/tsm_system_time.control
+++ b/contrib/tsm_system_time/tsm_system_time.control
@@ -1,5 +1,5 @@
  # tsm_system_time extension
-comment = 'SYSTEM TABLESAMPLE method which accepts time in milliseconds as a limit'
+comment = 'TABLESAMPLE method which accepts time in milliseconds as a limit'
  default_version = '1.0'
  module_pathname = '$libdir/tsm_system_time'
  relocatable = true
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml

index 2c2190f13d373e0ff0567f7052bf73838cc3d770..9096ee5d517de88aff7d8cd0c233cc8862c13b3c 100644 (file)
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -278,11 +278,6 @@
        <entry>planner statistics</entry>
       </row>
  
-     <row>
-      <entry><link linkend="catalog-pg-tablesample-method"><structname>pg_tablesample_method</structname></link></entry>
-      <entry>table sampling methods</entry>
-     </row>
-
       <row>
        <entry><link linkend="catalog-pg-tablespace"><structname>pg_tablespace</structname></link></entry>
        <entry>tablespaces within this database cluster</entry>
@@ -6132,121 +6127,6 @@
   </sect1>
  
  
- <sect1 id="catalog-pg-tablesample-method">
-  <title><structname>pg_tabesample_method</structname></title>
-
-  <indexterm zone="catalog-pg-tablesample-method">
-   <primary>pg_am</primary>
-  </indexterm>
-
-  <para>
-   The catalog <structname>pg_tablesample_method</structname> stores
-   information about table sampling methods which can be used in
-   <command>TABLESAMPLE</command> clause of a <command>SELECT</command>
-   statement.
-  </para>
-
-  <table>
-   <title><structname>pg_tablesample_method</> Columns</title>
-
-   <tgroup cols="4">
-    <thead>
-     <row>
-      <entry>Name</entry>
-      <entry>Type</entry>
-      <entry>References</entry>
-      <entry>Description</entry>
-     </row>
-    </thead>
-    <tbody>
-
-     <row>
-      <entry><structfield>oid</structfield></entry>
-      <entry><type>oid</type></entry>
-      <entry></entry>
-      <entry>Row identifier (hidden attribute; must be explicitly selected)</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmname</structfield></entry>
-      <entry><type>name</type></entry>
-      <entry></entry>
-      <entry>Name of the sampling method</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmseqscan</structfield></entry>
-      <entry><type>bool</type></entry>
-      <entry></entry>
-      <entry>If true, the sampling method scans the whole table sequentially.
-      </entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmpagemode</structfield></entry>
-      <entry><type>bool</type></entry>
-      <entry></entry>
-      <entry>If true, the sampling method always reads the pages completely.
-      </entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsminit</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Initialize the sampling scan</quote> function</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmnextblock</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Get next block number</quote> function</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmnexttuple</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Get next tuple offset</quote> function</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmexaminetuple</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry>Function which examines the tuple contents and decides if to
-        return it, or zero if none</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmend</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>End the sampling scan</quote> function</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmreset</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Restart the state of sampling scan</quote> function</entry>
-     </row>
-
-     <row>
-      <entry><structfield>tsmcost</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry>Costing function</entry>
-     </row>
-
-    </tbody>
-   </tgroup>
-  </table>
-
- </sect1>
-
-
   <sect1 id="catalog-pg-tablespace">
    <title><structname>pg_tablespace</structname></title>
  
diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml

index 8e13555a3aa5518d11eac71c1536bb6acf4032be..8113ddf8179f10e1b0f031d2d4106837656e23e3 100644 (file)
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4346,7 +4346,7 @@ SET xmloption TO { DOCUMENT | CONTENT };
      an object identifier.  There are also several alias types for
      <type>oid</>: <type>regproc</>, <type>regprocedure</>,
      <type>regoper</>, <type>regoperator</>, <type>regclass</>,
-    <type>regtype</>, <type>regrole</>, <type>regnamespace</>, 
+    <type>regtype</>, <type>regrole</>, <type>regnamespace</>,
      <type>regconfig</>, and <type>regdictionary</>.
      <xref linkend="datatype-oid-table"> shows an overview.
     </para>
@@ -4622,6 +4622,10 @@ SELECT * FROM pg_attribute
      <primary>fdw_handler</primary>
     </indexterm>
  
+   <indexterm zone="datatype-pseudo">
+    <primary>tsm_handler</primary>
+   </indexterm>
+
     <indexterm zone="datatype-pseudo">
      <primary>cstring</primary>
     </indexterm>
@@ -4716,6 +4720,11 @@ SELECT * FROM pg_attribute
          <entry>A foreign-data wrapper handler is declared to return <type>fdw_handler</>.</entry>
         </row>
  
+       <row>
+        <entry><type>tsm_handler</></entry>
+        <entry>A tablesample method handler is declared to return <type>tsm_handler</>.</entry>
+       </row>
+
         <row>
          <entry><type>record</></entry>
          <entry>Identifies a function returning an unspecified row type.</entry>
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml

index d1703e9c01ff87eb99f48e8e00b3bd5ff0db187f..7e82cdc3b124b870f39ac6d4a8077372d1cd8cd8 100644 (file)
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -243,6 +243,7 @@
    &nls;
    &plhandler;
    &fdwhandler;
+  &tablesample-method;
    &custom-scan;
    &geqo;
    &indexam;
@@ -250,7 +251,6 @@
    &spgist;
    &gin;
    &brin;
-  &tablesample-method;
    &storage;
    &bki;
    &planstats;
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml

index 632d7935cb41fe946cbbd6d356ba927af6c1cf27..44810f4909c06cdfaac8db5a9cf2a0ad5746db50 100644 (file)
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -49,7 +49,8 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
  
  <phrase>where <replaceable class="parameter">from_item</replaceable> can be one of:</phrase>
  
-    [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ] [ TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ] ]
+    [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
+                [ TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ] ]
      [ LATERAL ] ( <replaceable class="parameter">select</replaceable> ) [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ]
      <replaceable class="parameter">with_query_name</replaceable> [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
      [ LATERAL ] <replaceable class="parameter">function_name</replaceable> ( [ <replaceable class="parameter">argument</replaceable> [, ...] ] )
@@ -325,50 +326,6 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
        </listitem>
       </varlistentry>
  
-     <varlistentry>
-      <term>TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ]</term>
-      <listitem>
-       <para>
-        Table sample clause after
-        <replaceable class="parameter">table_name</replaceable> indicates that
-        a <replaceable class="parameter">sampling_method</replaceable> should
-        be used to retrieve subset of rows in the table.
-        The <replaceable class="parameter">sampling_method</replaceable> can be
-        any sampling method installed in the database. There are currently two
-        sampling methods available in the standard
-        <productname>PostgreSQL</productname> distribution:
-        <itemizedlist>
-         <listitem>
-          <para><literal>SYSTEM</literal></para>
-         </listitem>
-         <listitem>
-          <para><literal>BERNOULLI</literal></para>
-         </listitem>
-        </itemizedlist>
-        Both of these sampling methods currently accept only single argument
-        which is the percent (floating point from 0 to 100) of the rows to
-        be returned.
-        The <literal>SYSTEM</literal> sampling method does block level
-        sampling with each block having the same chance of being selected and
-        returns all rows from each selected block.
-        The <literal>BERNOULLI</literal> scans whole table and returns
-        individual rows with equal probability. Additional sampling methods
-        may be installed in the database via extensions.
-       </para>
-       <para>
-        The optional parameter <literal>REPEATABLE</literal> uses the seed
-        parameter, which can be a number or expression producing a number, as
-        a random seed for sampling. Note that subsequent commands may return
-        different results even if same <literal>REPEATABLE</literal> clause was
-        specified. This happens because <acronym>DML</acronym> statements and
-        maintenance operations such as <command>VACUUM</> may affect physical
-        distribution of data. The <function>setseed()</> function will not
-        affect the sampling result when the <literal>REPEATABLE</literal>
-        parameter is used.
-       </para>
-      </listitem>
-     </varlistentry>
-
       <varlistentry>
        <term><replaceable class="parameter">alias</replaceable></term>
        <listitem>
@@ -387,6 +344,61 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
        </listitem>
       </varlistentry>
  
+     <varlistentry>
+      <term><literal>TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ]</literal></term>
+      <listitem>
+       <para>
+        A <literal>TABLESAMPLE</> clause after
+        a <replaceable class="parameter">table_name</> indicates that the
+        specified <replaceable class="parameter">sampling_method</replaceable>
+        should be used to retrieve a subset of the rows in that table.
+        This sampling precedes the application of any other filters such
+        as <literal>WHERE</> clauses.
+        The standard <productname>PostgreSQL</productname> distribution
+        includes two sampling methods, <literal>BERNOULLI</literal>
+        and <literal>SYSTEM</literal>, and other sampling methods can be
+        installed in the database via extensions.
+       </para>
+
+       <para>
+        The <literal>BERNOULLI</> and <literal>SYSTEM</> sampling methods
+        each accept a single <replaceable class="parameter">argument</>
+        which is the fraction of the table to sample, expressed as a
+        percentage between 0 and 100.  This argument can be
+        any <type>real</>-valued expression.  (Other sampling methods might
+        accept more or different arguments.)  These two methods each return
+        a randomly-chosen sample of the table that will contain
+        approximately the specified percentage of the table's rows.
+        The <literal>BERNOULLI</literal> method scans the whole table and
+        selects or ignores individual rows independently with the specified
+        probability.
+        The <literal>SYSTEM</literal> method does block-level sampling with
+        each block having the specified chance of being selected; all rows
+        in each selected block are returned.
+        The <literal>SYSTEM</literal> method is significantly faster than
+        the <literal>BERNOULLI</literal> method when small sampling
+        percentages are specified, but it may return a less-random sample of
+        the table as a result of clustering effects.
+       </para>
+
+       <para>
+        The optional <literal>REPEATABLE</literal> clause specifies
+        a <replaceable class="parameter">seed</> number or expression to use
+        for generating random numbers within the sampling method.  The seed
+        value can be any non-null floating-point value.  Two queries that
+        specify the same seed and <replaceable class="parameter">argument</>
+        values will select the same sample of the table, if the table has
+        not been changed meanwhile.  But different seed values will usually
+        produce different samples.
+        If <literal>REPEATABLE</literal> is not given then a new random
+        sample is selected for each query.
+        Note that some add-on sampling methods do not
+        accept <literal>REPEATABLE</literal>, and will always produce new
+        samples on each use.
+       </para>
+      </listitem>
+     </varlistentry>
+
       <varlistentry>
        <term><replaceable class="parameter">select</replaceable></term>
        <listitem>
@@ -1870,6 +1882,16 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
     </para>
    </refsect2>
  
+  <refsect2>
+   <title><literal>TABLESAMPLE</literal> Clause Restrictions</title>
+
+   <para>
+    The <literal>TABLESAMPLE</> clause is currently accepted only on
+    regular tables and materialized views.  According to the SQL standard
+    it should be possible to apply it to any <literal>FROM</> item.
+   </para>
+  </refsect2>
+
    <refsect2>
     <title>Function Calls in <literal>FROM</literal></title>
  
@@ -1993,19 +2015,5 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
     </para>
    </refsect2>
  
-  <refsect2>
-   <title><literal>TABLESAMPLE</literal> clause</title>
-
-   <para>
-    The <literal>TABLESAMPLE</> clause is currently accepted only on physical
-    relations and materialized views.
-   </para>
-
-   <para>
-    Additional modules allow you to install custom sampling methods and use
-    them instead of the SQL standard methods.
-   </para>
-  </refsect2>
-
   </refsect1>
  </refentry>
diff --git a/doc/src/sgml/tablesample-method.sgml b/doc/src/sgml/tablesample-method.sgml

index 48eb7fe84ea93e61166d7b689582cce2b2a1720c..22f8bbe19aa4b4c5166a8de98e1c8b26624d0d44 100644 (file)
--- a/doc/src/sgml/tablesample-method.sgml
+++ b/doc/src/sgml/tablesample-method.sgml
@@ -1,139 +1,301 @@
  <!-- doc/src/sgml/tablesample-method.sgml -->
  
  <chapter id="tablesample-method">
- <title>Writing A TABLESAMPLE Sampling Method</title>
+ <title>Writing A Table Sampling Method</title>
  
   <indexterm zone="tablesample-method">
-  <primary>tablesample method</primary>
+  <primary>table sampling method</primary>
+ </indexterm>
+
+ <indexterm zone="tablesample-method">
+  <primary><literal>TABLESAMPLE</literal> method</primary>
   </indexterm>
  
   <para>
-  The <command>TABLESAMPLE</command> clause implementation in
-  <productname>PostgreSQL</> supports creating a custom sampling methods.
-  These methods control what sample of the table will be returned when the
-  <command>TABLESAMPLE</command> clause is used.
+  <productname>PostgreSQL</>'s implementation of the <literal>TABLESAMPLE</>
+  clause supports custom table sampling methods, in addition to
+  the <literal>BERNOULLI</> and <literal>SYSTEM</> methods that are required
+  by the SQL standard.  The sampling method determines which rows of the
+  table will be selected when the <literal>TABLESAMPLE</> clause is used.
   </para>
  
- <sect1 id="tablesample-method-functions">
-  <title>Tablesample Method Functions</title>
+ <para>
+  At the SQL level, a table sampling method is represented by a single SQL
+  function, typically implemented in C, having the signature
+<programlisting>
+method_name(internal) RETURNS tsm_handler
+</programlisting>
+  The name of the function is the same method name appearing in the
+  <literal>TABLESAMPLE</> clause.  The <type>internal</> argument is a dummy
+  (always having value zero) that simply serves to prevent this function from
+  being called directly from a SQL command.
+  The result of the function must be a palloc'd struct of
+  type <type>TsmRoutine</>, which contains pointers to support functions for
+  the sampling method.  These support functions are plain C functions and
+  are not visible or callable at the SQL level.  The support functions are
+  described in <xref linkend="tablesample-support-functions">.
+ </para>
+
+ <para>
+  In addition to function pointers, the <type>TsmRoutine</> struct must
+  provide these additional fields:
+ </para>
+
+ <variablelist>
+  <varlistentry>
+   <term><literal>List *parameterTypes</literal></term>
+   <listitem>
+    <para>
+     This is an OID list containing the data type OIDs of the parameter(s)
+     that will be accepted by the <literal>TABLESAMPLE</> clause when this
+     sampling method is used.  For example, for the built-in methods, this
+     list contains a single item with value <literal>FLOAT4OID</>, which
+     represents the sampling percentage.  Custom sampling methods can have
+     more or different parameters.
+    </para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><literal>bool repeatable_across_queries</literal></term>
+   <listitem>
+    <para>
+     If <literal>true</>, the sampling method can deliver identical samples
+     across successive queries, if the same parameters
+     and <literal>REPEATABLE</> seed value are supplied each time and the
+     table contents have not changed.  When this is <literal>false</>,
+     the <literal>REPEATABLE</> clause is not accepted for use with the
+     sampling method.
+    </para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><literal>bool repeatable_across_scans</literal></term>
+   <listitem>
+    <para>
+     If <literal>true</>, the sampling method can deliver identical samples
+     across successive scans in the same query (assuming unchanging
+     parameters, seed value, and snapshot).
+     When this is <literal>false</>, the planner will not select plans that
+     would require scanning the sampled table more than once, since that
+     might result in inconsistent query output.
+    </para>
+   </listitem>
+  </varlistentry>
+ </variablelist>
+
+ <para>
+  The <type>TsmRoutine</> struct type is declared
+  in <filename>src/include/access/tsmapi.h</>, which see for additional
+  details.
+ </para>
+
+ <para>
+  The table sampling methods included in the standard distribution are good
+  references when trying to write your own.  Look into
+  the <filename>src/backend/access/tablesample</> subdirectory of the source
+  tree for the built-in sampling methods, and into the <filename>contrib</>
+  subdirectory for add-on methods.
+ </para>
+
+ <sect1 id="tablesample-support-functions">
+  <title>Sampling Method Support Functions</title>
  
    <para>
-   The tablesample method must provide following set of functions:
+   The TSM handler function returns a palloc'd <type>TsmRoutine</> struct
+   containing pointers to the support functions described below.  Most of
+   the functions are required, but some are optional, and those pointers can
+   be NULL.
    </para>
  
    <para>
  <programlisting>
  void
-tsm_init (TableSampleDesc *desc,
-         uint32 seed, ...);
+SampleScanGetSampleSize (PlannerInfo *root,
+                         RelOptInfo *baserel,
+                         List *paramexprs,
+                         BlockNumber *pages,
+                         double *tuples);
  </programlisting>
-   Initialize the tablesample scan. The function is called at the beginning
-   of each relation scan.
+
+   This function is called during planning.  It must estimate the number of
+   relation pages that will be read during a sample scan, and the number of
+   tuples that will be selected by the scan.  (For example, these might be
+   determined by estimating the sampling fraction, and then multiplying
+   the <literal>baserel-&gt;pages</> and <literal>baserel-&gt;tuples</>
+   numbers by that, being sure to round the results to integral values.)
+   The <literal>paramexprs</> list holds the expression(s) that are
+   parameters to the <literal>TABLESAMPLE</> clause.  It is recommended to
+   use <function>estimate_expression_value()</> to try to reduce these
+   expressions to constants, if their values are needed for estimation
+   purposes; but the function must provide size estimates even if they cannot
+   be reduced, and it should not fail even if the values appear invalid
+   (remember that they're only estimates of what the run-time values will be).
+   The <literal>pages</> and <literal>tuples</> parameters are outputs.
    </para>
+
    <para>
-   Note that the first two parameters are required but you can specify
-   additional parameters which then will be used by the <command>TABLESAMPLE</>
-   clause to determine the required user input in the query itself.
-   This means that if your function will specify additional float4 parameter
-   named percent, the user will have to call the tablesample method with
-   expression which evaluates (or can be coerced) to float4.
-   For example this definition:
  <programlisting>
-tsm_init (TableSampleDesc *desc,
-          uint32 seed, float4 pct);
-</programlisting>
-Will lead to SQL call like this:
-<programlisting>
-... TABLESAMPLE yourmethod(0.5) ...
+void
+InitSampleScan (SampleScanState *node,
+                int eflags);
  </programlisting>
+
+   Initialize for execution of a SampleScan plan node.
+   This is called during executor startup.
+   It should perform any initialization needed before processing can start.
+   The <structname>SampleScanState</> node has already been created, but
+   its <structfield>tsm_state</> field is NULL.
+   The <function>InitSampleScan</> function can palloc whatever internal
+   state data is needed by the sampling method, and store a pointer to
+   it in <literal>node-&gt;tsm_state</>.
+   Information about the table to scan is accessible through other fields
+   of the <structname>SampleScanState</> node (but note that the
+   <literal>node-&gt;ss.ss_currentScanDesc</> scan descriptor is not set
+   up yet).
+   <literal>eflags</> contains flag bits describing the executor's
+   operating mode for this plan node.
    </para>
  
    <para>
-<programlisting>
-BlockNumber
-tsm_nextblock (TableSampleDesc *desc);
-</programlisting>
-   Returns the block number of next page to be scanned. InvalidBlockNumber
-   should be returned if the sampling has reached end of the relation.
+   When <literal>(eflags &amp; EXEC_FLAG_EXPLAIN_ONLY)</> is true,
+   the scan will not actually be performed, so this function should only do
+   the minimum required to make the node state valid for <command>EXPLAIN</>
+   and <function>EndSampleScan</>.
    </para>
  
    <para>
-<programlisting>
-OffsetNumber
-tsm_nexttuple (TableSampleDesc *desc, BlockNumber blockno,
-               OffsetNumber maxoffset);
-</programlisting>
-   Return next tuple offset for the current page. InvalidOffsetNumber should
-   be returned if the sampling has reached end of the page.
+   This function can be omitted (set the pointer to NULL), in which case
+   <function>BeginSampleScan</> must perform all initialization needed
+   by the sampling method.
    </para>
  
    <para>
  <programlisting>
  void
-tsm_end (TableSampleDesc *desc);
+BeginSampleScan (SampleScanState *node,
+                 Datum *params,
+                 int nparams,
+                 uint32 seed);
  </programlisting>
-   The scan has finished, cleanup any left over state.
+
+   Begin execution of a sampling scan.
+   This is called just before the first attempt to fetch a tuple, and
+   may be called again if the scan needs to be restarted.
+   Information about the table to scan is accessible through fields
+   of the <structname>SampleScanState</> node (but note that the
+   <literal>node-&gt;ss.ss_currentScanDesc</> scan descriptor is not set
+   up yet).
+   The <literal>params</> array, of length <literal>nparams</>, contains the
+   values of the parameters supplied in the <literal>TABLESAMPLE</> clause.
+   These will have the number and types specified in the sampling
+   method's <literal>parameterTypes</literal> list, and have been checked
+   to not be null.
+   <literal>seed</> contains a seed to use for any random numbers generated
+   within the sampling method; it is either a hash derived from the
+   <literal>REPEATABLE</> value if one was given, or the result
+   of <literal>random()</> if not.
    </para>
  
    <para>
-<programlisting>
-void
-tsm_reset (TableSampleDesc *desc);
-</programlisting>
-   The scan needs to rescan the relation again, reset any tablesample method
-   state.
+   This function may adjust the fields <literal>node-&gt;use_bulkread</>
+   and <literal>node-&gt;use_pagemode</>.
+   If <literal>node-&gt;use_bulkread</> is <literal>true</>, which it is by
+   default, the scan will use a buffer access strategy that encourages
+   recycling buffers after use.  It might be reasonable to set this
+   to <literal>false</> if the scan will visit only a small fraction of the
+   table's pages.
+   If <literal>node-&gt;use_pagemode</> is <literal>true</>, which it is by
+   default, the scan will perform visibility checking in a single pass for
+   all tuples on each visited page.  It might be reasonable to set this
+   to <literal>false</> if the scan will select only a small fraction of the
+   tuples on each visited page.  That will result in fewer tuple visibility
+   checks being performed, though each one will be more expensive because it
+   will require more locking.
+  </para>
+
+  <para>
+   If the sampling method is
+   marked <literal>repeatable_across_scans</literal>, it must be able to
+   select the same set of tuples during a rescan as it did originally, that is
+   a fresh call of <function>BeginSampleScan</> must lead to selecting the
+   same tuples as before (if the <literal>TABLESAMPLE</> parameters
+   and seed don't change).
    </para>
  
    <para>
  <programlisting>
-void
-tsm_cost (PlannerInfo *root, Path *path, RelOptInfo *baserel,
-          List *args, BlockNumber *pages, double *tuples);
+BlockNumber
+NextSampleBlock (SampleScanState *node);
  </programlisting>
-   This function is used by optimizer to decide best plan and is also used
-   for output of <command>EXPLAIN</>.
+
+   Returns the block number of the next page to be scanned, or
+   <literal>InvalidBlockNumber</> if no pages remain to be scanned.
    </para>
  
    <para>
-   There is one more function which tablesampling method can implement in order
-   to gain more fine grained control over sampling. This function is optional:
+   This function can be omitted (set the pointer to NULL), in which case
+   the core code will perform a sequential scan of the entire relation.
+   Such a scan can use synchronized scanning, so that the sampling method
+   cannot assume that the relation pages are visited in the same order on
+   each scan.
    </para>
  
    <para>
  <programlisting>
-bool
-tsm_examinetuple (TableSampleDesc *desc, BlockNumber blockno,
-                  HeapTuple tuple, bool visible);
+OffsetNumber
+NextSampleTuple (SampleScanState *node,
+                 BlockNumber blockno,
+                 OffsetNumber maxoffset);
  </programlisting>
-   Function that enables the sampling method to examine contents of the tuple
-   (for example to collect some internal statistics). The return value of this
-   function is used to determine if the tuple should be returned to client.
-   Note that this function will receive even invisible tuples but it is not
-   allowed to return true for such tuple (if it does,
-   <productname>PostgreSQL</> will raise an error).
+
+   Returns the offset number of the next tuple to be sampled on the
+   specified page, or <literal>InvalidOffsetNumber</> if no tuples remain to
+   be sampled.  <literal>maxoffset</> is the largest offset number in use
+   on the page.
    </para>
  
+  <note>
+   <para>
+    <function>NextSampleTuple</> is not explicitly told which of the offset
+    numbers in the range <literal>1 .. maxoffset</> actually contain valid
+    tuples.  This is not normally a problem since the core code ignores
+    requests to sample missing or invisible tuples; that should not result in
+    any bias in the sample.  However, if necessary, the function can
+    examine <literal>node-&gt;ss.ss_currentScanDesc-&gt;rs_vistuples[]</>
+    to identify which tuples are valid and visible.  (This
+    requires <literal>node-&gt;use_pagemode</> to be <literal>true</>.)
+   </para>
+  </note>
+
+  <note>
+   <para>
+    <function>NextSampleTuple</> must <emphasis>not</> assume
+    that <literal>blockno</> is the same page number returned by the most
+    recent <function>NextSampleBlock</> call.  It was returned by some
+    previous <function>NextSampleBlock</> call, but the core code is allowed
+    to call <function>NextSampleBlock</> in advance of actually scanning
+    pages, so as to support prefetching.  It is OK to assume that once
+    sampling of a given page begins, successive <function>NextSampleTuple</>
+    calls all refer to the same page until <literal>InvalidOffsetNumber</> is
+    returned.
+   </para>
+  </note>
+
    <para>
-  As you can see most of the tablesample method interfaces get the
-  <structname>TableSampleDesc</> as a first parameter. This structure holds
-  state of the current scan and also provides storage for the tablesample
-  method's state. It is defined as following:
  <programlisting>
-typedef struct TableSampleDesc {
-    HeapScanDesc    heapScan;
-    TupleDesc       tupDesc;
-
-    void           *tsmdata;
-} TableSampleDesc;
+void
+EndSampleScan (SampleScanState *node);
  </programlisting>
-  Where <structfield>heapScan</> is the descriptor of the physical table scan.
-  It's possible to get table size info from it. The <structfield>tupDesc</>
-  represents the tuple descriptor of the tuples returned by the scan and passed
-  to the <function>tsm_examinetuple()</> interface. The <structfield>tsmdata</>
-  can be used by tablesample method itself to store any state info it might
-  need during the scan. If used by the method, it should be <function>pfree</>d
-  in <function>tsm_end()</> function.
+
+   End the scan and release resources.  It is normally not important
+   to release palloc'd memory, but any externally-visible resources
+   should be cleaned up.
+   This function can be omitted (set the pointer to NULL) in the common
+   case where no such resources exist.
    </para>
+
   </sect1>
  
  </chapter>
diff --git a/doc/src/sgml/tsm-system-rows.sgml b/doc/src/sgml/tsm-system-rows.sgml

index 0c2f1779c9ad8750db938a6edb38480ae25b57cf..93aa5366649bda2b93f19f141982a77b8f1e8a01 100644 (file)
--- a/doc/src/sgml/tsm-system-rows.sgml
+++ b/doc/src/sgml/tsm-system-rows.sgml
@@ -8,24 +8,37 @@
   </indexterm>
  
   <para>
-  The <filename>tsm_system_rows</> module provides the tablesample method
-  <literal>SYSTEM_ROWS</literal>, which can be used inside the
-  <command>TABLESAMPLE</command> clause of a <command>SELECT</command>.
+  The <filename>tsm_system_rows</> module provides the table sampling method
+  <literal>SYSTEM_ROWS</literal>, which can be used in
+  the <literal>TABLESAMPLE</> clause of a <xref linkend="sql-select">
+  command.
   </para>
  
   <para>
-  This tablesample method uses a linear probing algorithm to read sample
-  of a table and uses actual number of rows as limit (unlike the
-  <literal>SYSTEM</literal> tablesample method which limits by percentage
-  of a table).
+  This table sampling method accepts a single integer argument that is the
+  maximum number of rows to read.  The resulting sample will always contain
+  exactly that many rows, unless the table does not contain enough rows, in
+  which case the whole table is selected.
+ </para>
+
+ <para>
+  Like the built-in <literal>SYSTEM</literal> sampling
+  method, <literal>SYSTEM_ROWS</literal> performs block-level sampling, so
+  that the sample is not completely random but may be subject to clustering
+  effects, especially if only a small number of rows are requested.
+ </para>
+
+ <para>
+  <literal>SYSTEM_ROWS</literal> does not support
+  the <literal>REPEATABLE</literal> clause.
   </para>
  
   <sect2>
    <title>Examples</title>
  
    <para>
-   Here is an example of selecting sample of a table with
-   <literal>SYSTEM_ROWS</>. First install the extension:
+   Here is an example of selecting a sample of a table with
+   <literal>SYSTEM_ROWS</>.  First install the extension:
    </para>
  
  <programlisting>
@@ -33,8 +46,7 @@ CREATE EXTENSION tsm_system_rows;
  </programlisting>
  
    <para>
-   Then you can use it in <command>SELECT</command> command same way as other
-   tablesample methods:
+   Then you can use it in a <command>SELECT</command> command, for instance:
  
  <programlisting>
  SELECT * FROM my_table TABLESAMPLE SYSTEM_ROWS(100);
@@ -42,8 +54,9 @@ SELECT * FROM my_table TABLESAMPLE SYSTEM_ROWS(100);
    </para>
  
    <para>
-   The above command will return a sample of 100 rows from the table my_table
-   (less if the table does not have 100 visible rows).
+   This command will return a sample of 100 rows from the
+   table <structname>my_table</> (unless the table does not have 100
+   visible rows, in which case all its rows are returned).
    </para>
   </sect2>
  
diff --git a/doc/src/sgml/tsm-system-time.sgml b/doc/src/sgml/tsm-system-time.sgml

index 2343ab16d4f2bf6e3cadd4f599822f27106c26ab..3f8ff1a026f2e1a719dccb2ab7cd29ed6f293139 100644 (file)
--- a/doc/src/sgml/tsm-system-time.sgml
+++ b/doc/src/sgml/tsm-system-time.sgml
@@ -8,25 +8,39 @@
   </indexterm>
  
   <para>
-  The <filename>tsm_system_time</> module provides the tablesample method
-  <literal>SYSTEM_TIME</literal>, which can be used inside the
-  <command>TABLESAMPLE</command> clause of a <command>SELECT</command>.
+  The <filename>tsm_system_time</> module provides the table sampling method
+  <literal>SYSTEM_TIME</literal>, which can be used in
+  the <literal>TABLESAMPLE</> clause of a <xref linkend="sql-select">
+  command.
   </para>
  
   <para>
-  This tablesample method uses a linear probing algorithm to read sample
-  of a table and uses time in milliseconds as limit (unlike the
-  <literal>SYSTEM</literal> tablesample method which limits by percentage
-  of a table). This gives you some control over the length of execution
-  of your query.
+  This table sampling method accepts a single floating-point argument that
+  is the maximum number of milliseconds to spend reading the table.  This
+  gives you direct control over how long the query takes, at the price that
+  the size of the sample becomes hard to predict.  The resulting sample will
+  contain as many rows as could be read in the specified time, unless the
+  whole table has been read first.
+ </para>
+
+ <para>
+  Like the built-in <literal>SYSTEM</literal> sampling
+  method, <literal>SYSTEM_TIME</literal> performs block-level sampling, so
+  that the sample is not completely random but may be subject to clustering
+  effects, especially if only a small number of rows are selected.
+ </para>
+
+ <para>
+  <literal>SYSTEM_TIME</literal> does not support
+  the <literal>REPEATABLE</literal> clause.
   </para>
  
   <sect2>
    <title>Examples</title>
  
    <para>
-   Here is an example of selecting sample of a table with
-   <literal>SYSTEM_TIME</>. First install the extension:
+   Here is an example of selecting a sample of a table with
+   <literal>SYSTEM_TIME</>.  First install the extension:
    </para>
  
  <programlisting>
@@ -34,8 +48,7 @@ CREATE EXTENSION tsm_system_time;
  </programlisting>
  
    <para>
-   Then you can use it in a <command>SELECT</command> command the same way as
-   other tablesample methods:
+   Then you can use it in a <command>SELECT</command> command, for instance:
  
  <programlisting>
  SELECT * FROM my_table TABLESAMPLE SYSTEM_TIME(1000);
@@ -43,8 +56,9 @@ SELECT * FROM my_table TABLESAMPLE SYSTEM_TIME(1000);
    </para>
  
    <para>
-   The above command will return as large a sample of my_table as it can read in
-   1 second (or less if it reads whole table faster).
+   This command will return as large a sample of <structname>my_table</> as
+   it can read in 1 second (1000 milliseconds).  Of course, if the whole
+   table can be read in under 1 second, all its rows will be returned.
    </para>
   </sect2>
  
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c

index 6f4ff2718fed8d224837d2aeb46da44cb5cadecd..050efdc4806a716df0f0515619fe7a04bd9577d5 100644 (file)
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -80,8 +80,11 @@ bool         synchronize_seqscans = true;
  static HeapScanDesc heap_beginscan_internal(Relation relation,
                                                 Snapshot snapshot,
                                                 int nkeys, ScanKey key,
-                                         bool allow_strat, bool allow_sync, bool allow_pagemode,
-                                               bool is_bitmapscan, bool is_samplescan,
+                                               bool allow_strat,
+                                               bool allow_sync,
+                                               bool allow_pagemode,
+                                               bool is_bitmapscan,
+                                               bool is_samplescan,
                                                 bool temp_snap);
  static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
                                         TransactionId xid, CommandId cid, int options);
@@ -207,7 +210,7 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
   * ----------------
   */
  static void
-initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
+initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
  {
         bool            allow_strat;
         bool            allow_sync;
@@ -257,12 +260,12 @@ initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
                 scan->rs_strategy = NULL;
         }
  
-       if (is_rescan)
+       if (keep_startblock)
         {
                 /*
-                * If rescan, keep the previous startblock setting so that rewinding a
-                * cursor doesn't generate surprising results.  Reset the syncscan
-                * setting, though.
+                * When rescanning, we want to keep the previous startblock setting,
+                * so that rewinding a cursor doesn't generate surprising results.
+                * Reset the active syncscan setting, though.
                  */
                 scan->rs_syncscan = (allow_sync && synchronize_seqscans);
         }
@@ -1313,6 +1316,10 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
  /* ----------------
   *             heap_beginscan  - begin relation scan
   *
+ * heap_beginscan is the "standard" case.
+ *
+ * heap_beginscan_catalog differs in setting up its own temporary snapshot.
+ *
   * heap_beginscan_strat offers an extended API that lets the caller control
   * whether a nondefault buffer access strategy can be used, and whether
   * syncscan can be chosen (possibly resulting in the scan not starting from
@@ -1323,8 +1330,11 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
   * really quite unlike a standard seqscan, there is just enough commonality
   * to make it worth using the same data structure.
   *
- * heap_beginscan_samplingscan is alternate entry point for setting up a
- * HeapScanDesc for a TABLESAMPLE scan.
+ * heap_beginscan_sampling is an alternative entry point for setting up a
+ * HeapScanDesc for a TABLESAMPLE scan.  As with bitmap scans, it's worth
+ * using the same data structure although the behavior is rather different.
+ * In addition to the options offered by heap_beginscan_strat, this call
+ * also allows control of whether page-mode visibility checking is used.
   * ----------------
   */
  HeapScanDesc
@@ -1366,18 +1376,22 @@ heap_beginscan_bm(Relation relation, Snapshot snapshot,
  HeapScanDesc
  heap_beginscan_sampling(Relation relation, Snapshot snapshot,
                                                 int nkeys, ScanKey key,
-                                               bool allow_strat, bool allow_pagemode)
+                                         bool allow_strat, bool allow_sync, bool allow_pagemode)
  {
         return heap_beginscan_internal(relation, snapshot, nkeys, key,
-                                                                  allow_strat, false, allow_pagemode,
+                                                                  allow_strat, allow_sync, allow_pagemode,
                                                                    false, true, false);
  }
  
  static HeapScanDesc
  heap_beginscan_internal(Relation relation, Snapshot snapshot,
                                                 int nkeys, ScanKey key,
-                                         bool allow_strat, bool allow_sync, bool allow_pagemode,
-                                         bool is_bitmapscan, bool is_samplescan, bool temp_snap)
+                                               bool allow_strat,
+                                               bool allow_sync,
+                                               bool allow_pagemode,
+                                               bool is_bitmapscan,
+                                               bool is_samplescan,
+                                               bool temp_snap)
  {
         HeapScanDesc scan;
  
@@ -1461,6 +1475,27 @@ heap_rescan(HeapScanDesc scan,
         initscan(scan, key, true);
  }
  
+/* ----------------
+ *             heap_rescan_set_params  - restart a relation scan after changing params
+ *
+ * This call allows changing the buffer strategy, syncscan, and pagemode
+ * options before starting a fresh scan.  Note that although the actual use
+ * of syncscan might change (effectively, enabling or disabling reporting),
+ * the previously selected startblock will be kept.
+ * ----------------
+ */
+void
+heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+                                          bool allow_strat, bool allow_sync, bool allow_pagemode)
+{
+       /* adjust parameters */
+       scan->rs_allow_strat = allow_strat;
+       scan->rs_allow_sync = allow_sync;
+       scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot);
+       /* ... and rescan */
+       heap_rescan(scan, key);
+}
+
  /* ----------------
   *             heap_endscan    - end relation scan
   *
diff --git a/src/backend/access/tablesample/Makefile b/src/backend/access/tablesample/Makefile

index 46eeb59f9c468075c53d241fcb529175461e7a64..68d9ab281472d976e41aea3350fa768b5c296160 100644 (file)
--- a/src/backend/access/tablesample/Makefile
+++ b/src/backend/access/tablesample/Makefile
@@ -1,10 +1,10 @@
  #-------------------------------------------------------------------------
  #
  # Makefile--
-#    Makefile for utils/tablesample
+#    Makefile for access/tablesample
  #
  # IDENTIFICATION
-#    src/backend/utils/tablesample/Makefile
+#    src/backend/access/tablesample/Makefile
  #
  #-------------------------------------------------------------------------
  
@@ -12,6 +12,6 @@ subdir = src/backend/access/tablesample
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
-OBJS = tablesample.o system.o bernoulli.o
+OBJS = bernoulli.o system.o tablesample.o
  
  include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c

index 0a539008221a5592febbeb9cf1a652eb9da0a1d6..cf88f95e757b1754da8b4d074c9abfc367560208 100644 (file)
--- a/src/backend/access/tablesample/bernoulli.c
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -1,233 +1,231 @@
  /*-------------------------------------------------------------------------
   *
   * bernoulli.c
- *       interface routines for BERNOULLI tablesample method
+ *       support routines for BERNOULLI tablesample method
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
+ *
+ * To achieve that, we proceed by hashing each candidate TID together with
+ * the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       src/backend/utils/tablesample/bernoulli.c
+ *       src/backend/access/tablesample/bernoulli.c
   *
   *-------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h>                             /* for _isnan */
+#endif
+#include <math.h>
  
-#include "access/tablesample.h"
-#include "access/relscan.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
+#include "access/hash.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
  #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
-#include "utils/sampling.h"
+#include "optimizer/cost.h"
+#include "utils/builtins.h"
  
  
-/* tsdesc */
+/* Private state */
  typedef struct
  {
+       uint64          cutoff;                 /* select tuples with hash less than this */
         uint32          seed;                   /* random seed */
-       BlockNumber startblock;         /* starting block, we use ths for syncscan
-                                                                * support */
-       BlockNumber nblocks;            /* number of blocks */
-       BlockNumber blockno;            /* current block */
-       float4          probability;    /* probabilty that tuple will be returned
-                                                                * (0.0-1.0) */
         OffsetNumber lt;                        /* last tuple returned from current block */
-       SamplerRandomState randstate;           /* random generator tsdesc */
  } BernoulliSamplerData;
  
+
+static void bernoulli_samplescangetsamplesize(PlannerInfo *root,
+                                                                 RelOptInfo *baserel,
+                                                                 List *paramexprs,
+                                                                 BlockNumber *pages,
+                                                                 double *tuples);
+static void bernoulli_initsamplescan(SampleScanState *node,
+                                                int eflags);
+static void bernoulli_beginsamplescan(SampleScanState *node,
+                                                 Datum *params,
+                                                 int nparams,
+                                                 uint32 seed);
+static OffsetNumber bernoulli_nextsampletuple(SampleScanState *node,
+                                                 BlockNumber blockno,
+                                                 OffsetNumber maxoffset);
+
+
  /*
- * Initialize the state.
+ * Create a TsmRoutine descriptor for the BERNOULLI method.
   */
  Datum
-tsm_bernoulli_init(PG_FUNCTION_ARGS)
+tsm_bernoulli_handler(PG_FUNCTION_ARGS)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       uint32          seed = PG_GETARG_UINT32(1);
-       float4          percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
-       HeapScanDesc scan = tsdesc->heapScan;
-       BernoulliSamplerData *sampler;
+       TsmRoutine *tsm = makeNode(TsmRoutine);
+
+       tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+       tsm->repeatable_across_queries = true;
+       tsm->repeatable_across_scans = true;
+       tsm->SampleScanGetSampleSize = bernoulli_samplescangetsamplesize;
+       tsm->InitSampleScan = bernoulli_initsamplescan;
+       tsm->BeginSampleScan = bernoulli_beginsamplescan;
+       tsm->NextSampleBlock = NULL;
+       tsm->NextSampleTuple = bernoulli_nextsampletuple;
+       tsm->EndSampleScan = NULL;
+
+       PG_RETURN_POINTER(tsm);
+}
  
-       if (percent < 0 || percent > 100)
-               ereport(ERROR,
-                               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                                errmsg("invalid sample size"),
-                                errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+/*
+ * Sample size estimation.
+ */
+static void
+bernoulli_samplescangetsamplesize(PlannerInfo *root,
+                                                                 RelOptInfo *baserel,
+                                                                 List *paramexprs,
+                                                                 BlockNumber *pages,
+                                                                 double *tuples)
+{
+       Node       *pctnode;
+       float4          samplefract;
  
-       sampler = palloc0(sizeof(BernoulliSamplerData));
+       /* Try to extract an estimate for the sample percentage */
+       pctnode = (Node *) linitial(paramexprs);
+       pctnode = estimate_expression_value(root, pctnode);
  
-       /* Remember initial values for reinit */
-       sampler->seed = seed;
-       sampler->startblock = scan->rs_startblock;
-       sampler->nblocks = scan->rs_nblocks;
-       sampler->blockno = InvalidBlockNumber;
-       sampler->probability = percent / 100;
-       sampler->lt = InvalidOffsetNumber;
-       sampler_random_init_state(sampler->seed, sampler->randstate);
+       if (IsA(pctnode, Const) &&
+               !((Const *) pctnode)->constisnull)
+       {
+               samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+               if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+                       samplefract /= 100.0f;
+               else
+               {
+                       /* Default samplefract if the value is bogus */
+                       samplefract = 0.1f;
+               }
+       }
+       else
+       {
+               /* Default samplefract if we didn't obtain a non-null Const */
+               samplefract = 0.1f;
+       }
+
+       /* We'll visit all pages of the baserel */
+       *pages = baserel->pages;
  
-       tsdesc->tsmdata = (void *) sampler;
+       *tuples = clamp_row_est(baserel->tuples * samplefract);
+}
  
-       PG_RETURN_VOID();
+/*
+ * Initialize during executor setup.
+ */
+static void
+bernoulli_initsamplescan(SampleScanState *node, int eflags)
+{
+       node->tsm_state = palloc0(sizeof(BernoulliSamplerData));
  }
  
  /*
- * Get next block number to read or InvalidBlockNumber if we are at the
- * end of the relation.
+ * Examine parameters and prepare for a sample scan.
   */
-Datum
-tsm_bernoulli_nextblock(PG_FUNCTION_ARGS)
+static void
+bernoulli_beginsamplescan(SampleScanState *node,
+                                                 Datum *params,
+                                                 int nparams,
+                                                 uint32 seed)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
+       BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
+       double          percent = DatumGetFloat4(params[0]);
+
+       if (percent < 0 || percent > 100 || isnan(percent))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                                errmsg("sample percentage must be between 0 and 100")));
  
         /*
-        * Bernoulli sampling scans all blocks on the table and supports syncscan
-        * so loop from startblock to startblock instead of from 0 to nblocks.
+        * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+        * store that as a uint64, of course.  Note that this gives strictly
+        * correct behavior at the limits of zero or one probability.
          */
-       if (sampler->blockno == InvalidBlockNumber)
-               sampler->blockno = sampler->startblock;
-       else
-       {
-               sampler->blockno++;
-
-               if (sampler->blockno >= sampler->nblocks)
-                       sampler->blockno = 0;
-
-               if (sampler->blockno == sampler->startblock)
-                       PG_RETURN_UINT32(InvalidBlockNumber);
-       }
+       sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+       sampler->seed = seed;
+       sampler->lt = InvalidOffsetNumber;
  
-       PG_RETURN_UINT32(sampler->blockno);
+       /*
+        * Use bulkread, since we're scanning all pages.  But pagemode visibility
+        * checking is a win only at larger sampling fractions.  The 25% cutoff
+        * here is based on very limited experimentation.
+        */
+       node->use_bulkread = true;
+       node->use_pagemode = (percent >= 25);
  }
  
  /*
- * Get next tuple from current block.
- *
- * This method implements the main logic in bernoulli sampling.
- * The algorithm simply generates new random number (in 0.0-1.0 range) and if
- * it falls within user specified probability (in the same range) return the
- * tuple offset.
- *
- * It is ok here to return tuple offset without knowing if tuple is visible
- * and not check it via examinetuple. The reason for that is that we do the
- * coinflip (random number generation) for every tuple in the table. Since all
- * tuples have same probability of being returned the visible and invisible
- * tuples will be returned in same ratio as they have in the actual table.
- * This means that there is no skew towards either visible or invisible tuples
- * and the number of visible tuples returned from the executor node should
- * match the fraction of visible tuples which was specified by user.
+ * Select next sampled tuple in current block.
   *
- * This is faster than doing the coinflip in examinetuple because we don't
- * have to do visibility checks on uninteresting tuples.
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists).  The reason is that we do the coinflip for every tuple
+ * offset in the table.  Since all tuples have the same probability of being
+ * returned, it doesn't matter if we do extra coinflips for invisible tuples.
   *
- * If we reach end of the block return InvalidOffsetNumber which tells
+ * When we reach end of the block, return InvalidOffsetNumber which tells
   * SampleScan to go to next block.
   */
-Datum
-tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS)
+static OffsetNumber
+bernoulli_nextsampletuple(SampleScanState *node,
+                                                 BlockNumber blockno,
+                                                 OffsetNumber maxoffset)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-       BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
+       BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
         OffsetNumber tupoffset = sampler->lt;
-       float4          probability = sampler->probability;
+       uint32          hashinput[3];
  
+       /* Advance to first/next tuple in block */
         if (tupoffset == InvalidOffsetNumber)
                 tupoffset = FirstOffsetNumber;
         else
                 tupoffset++;
  
         /*
-        * Loop over tuple offsets until the random generator returns value that
-        * is within the probability of returning the tuple or until we reach end
-        * of the block.
+        * We compute the hash by applying hash_any to an array of 3 uint32's
+        * containing the block, offset, and seed.  This is efficient to set up,
+        * and with the current implementation of hash_any, it gives
+        * machine-independent results, which is a nice property for regression
+        * testing.
          *
-        * (This is our implementation of bernoulli trial)
+        * These words in the hash input are the same throughout the block:
          */
-       while (sampler_random_fract(sampler->randstate) > probability)
+       hashinput[0] = blockno;
+       hashinput[2] = sampler->seed;
+
+       /*
+        * Loop over tuple offsets until finding suitable TID or reaching end of
+        * block.
+        */
+       for (; tupoffset <= maxoffset; tupoffset++)
         {
-               tupoffset++;
+               uint32          hash;
  
-               if (tupoffset > maxoffset)
+               hashinput[1] = tupoffset;
+
+               hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+                                                                          (int) sizeof(hashinput)));
+               if (hash < sampler->cutoff)
                         break;
         }
  
         if (tupoffset > maxoffset)
-               /* Tell SampleScan that we want next block. */
                 tupoffset = InvalidOffsetNumber;
  
         sampler->lt = tupoffset;
  
-       PG_RETURN_UINT16(tupoffset);
-}
-
-/*
- * Cleanup method.
- */
-Datum
-tsm_bernoulli_end(PG_FUNCTION_ARGS)
-{
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-
-       pfree(tsdesc->tsmdata);
-
-       PG_RETURN_VOID();
-}
-
-/*
- * Reset tsdesc (called by ReScan).
- */
-Datum
-tsm_bernoulli_reset(PG_FUNCTION_ARGS)
-{
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
-
-       sampler->blockno = InvalidBlockNumber;
-       sampler->lt = InvalidOffsetNumber;
-       sampler_random_init_state(sampler->seed, sampler->randstate);
-
-       PG_RETURN_VOID();
-}
-
-/*
- * Costing function.
- */
-Datum
-tsm_bernoulli_cost(PG_FUNCTION_ARGS)
-{
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       Path       *path = (Path *) PG_GETARG_POINTER(1);
-       RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-       List       *args = (List *) PG_GETARG_POINTER(3);
-       BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-       double     *tuples = (double *) PG_GETARG_POINTER(5);
-       Node       *pctnode;
-       float4          samplesize;
-
-       *pages = baserel->pages;
-
-       pctnode = linitial(args);
-       pctnode = estimate_expression_value(root, pctnode);
-
-       if (IsA(pctnode, RelabelType))
-               pctnode = (Node *) ((RelabelType *) pctnode)->arg;
-
-       if (IsA(pctnode, Const))
-       {
-               samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
-               samplesize /= 100.0;
-       }
-       else
-       {
-               /* Default samplesize if the estimation didn't return Const. */
-               samplesize = 0.1f;
-       }
-
-       *tuples = path->rows * samplesize;
-       path->rows = *tuples;
-
-       PG_RETURN_VOID();
+       return tupoffset;
  }
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c

index 1d834369a4bd11fbf6127d9d8c8d7e3e4859ca01..43c5dab71619a7a6d8e2ee22bc306e56674191c0 100644 (file)
--- a/src/backend/access/tablesample/system.c
+++ b/src/backend/access/tablesample/system.c
@@ -1,186 +1,260 @@
  /*-------------------------------------------------------------------------
   *
   * system.c
- *       interface routines for system tablesample method
+ *       support routines for SYSTEM tablesample method
   *
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * To achieve that, we proceed by hashing each candidate block number together
+ * with the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       src/backend/utils/tablesample/system.c
+ *       src/backend/access/tablesample/system.c
   *
   *-------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h>                             /* for _isnan */
+#endif
+#include <math.h>
  
-#include "access/tablesample.h"
+#include "access/hash.h"
  #include "access/relscan.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
  #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
-#include "utils/sampling.h"
+#include "optimizer/cost.h"
+#include "utils/builtins.h"
  
  
-/*
- * State
- */
+/* Private state */
  typedef struct
  {
-       BlockSamplerData bs;
+       uint64          cutoff;                 /* select blocks with hash less than this */
         uint32          seed;                   /* random seed */
-       BlockNumber nblocks;            /* number of block in relation */
-       int                     samplesize;             /* number of blocks to return */
+       BlockNumber nextblock;          /* next block to consider sampling */
         OffsetNumber lt;                        /* last tuple returned from current block */
  } SystemSamplerData;
  
  
-/*
- * Initializes the state.
- */
-Datum
-tsm_system_init(PG_FUNCTION_ARGS)
-{
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       uint32          seed = PG_GETARG_UINT32(1);
-       float4          percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
-       HeapScanDesc scan = tsdesc->heapScan;
-       SystemSamplerData *sampler;
+static void system_samplescangetsamplesize(PlannerInfo *root,
+                                                          RelOptInfo *baserel,
+                                                          List *paramexprs,
+                                                          BlockNumber *pages,
+                                                          double *tuples);
+static void system_initsamplescan(SampleScanState *node,
+                                         int eflags);
+static void system_beginsamplescan(SampleScanState *node,
+                                          Datum *params,
+                                          int nparams,
+                                          uint32 seed);
+static BlockNumber system_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_nextsampletuple(SampleScanState *node,
+                                          BlockNumber blockno,
+                                          OffsetNumber maxoffset);
  
-       if (percent < 0 || percent > 100)
-               ereport(ERROR,
-                               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                                errmsg("invalid sample size"),
-                                errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
-
-       sampler = palloc0(sizeof(SystemSamplerData));
-
-       /* Remember initial values for reinit */
-       sampler->seed = seed;
-       sampler->nblocks = scan->rs_nblocks;
-       sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0));
-       sampler->lt = InvalidOffsetNumber;
-
-       BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
-                                         sampler->seed);
-
-       tsdesc->tsmdata = (void *) sampler;
-
-       PG_RETURN_VOID();
-}
  
  /*
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses the same logic as ANALYZE for picking the random blocks.
+ * Create a TsmRoutine descriptor for the SYSTEM method.
   */
  Datum
-tsm_system_nextblock(PG_FUNCTION_ARGS)
+tsm_system_handler(PG_FUNCTION_ARGS)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-       BlockNumber blockno;
-
-       if (!BlockSampler_HasMore(&sampler->bs))
-               PG_RETURN_UINT32(InvalidBlockNumber);
-
-       blockno = BlockSampler_Next(&sampler->bs);
-
-       PG_RETURN_UINT32(blockno);
+       TsmRoutine *tsm = makeNode(TsmRoutine);
+
+       tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+       tsm->repeatable_across_queries = true;
+       tsm->repeatable_across_scans = true;
+       tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
+       tsm->InitSampleScan = system_initsamplescan;
+       tsm->BeginSampleScan = system_beginsamplescan;
+       tsm->NextSampleBlock = system_nextsampleblock;
+       tsm->NextSampleTuple = system_nextsampletuple;
+       tsm->EndSampleScan = NULL;
+
+       PG_RETURN_POINTER(tsm);
  }
  
  /*
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
+ * Sample size estimation.
   */
-Datum
-tsm_system_nexttuple(PG_FUNCTION_ARGS)
+static void
+system_samplescangetsamplesize(PlannerInfo *root,
+                                                          RelOptInfo *baserel,
+                                                          List *paramexprs,
+                                                          BlockNumber *pages,
+                                                          double *tuples)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-       SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-       OffsetNumber tupoffset = sampler->lt;
+       Node       *pctnode;
+       float4          samplefract;
  
-       if (tupoffset == InvalidOffsetNumber)
-               tupoffset = FirstOffsetNumber;
-       else
-               tupoffset++;
+       /* Try to extract an estimate for the sample percentage */
+       pctnode = (Node *) linitial(paramexprs);
+       pctnode = estimate_expression_value(root, pctnode);
  
-       if (tupoffset > maxoffset)
-               tupoffset = InvalidOffsetNumber;
+       if (IsA(pctnode, Const) &&
+               !((Const *) pctnode)->constisnull)
+       {
+               samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+               if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+                       samplefract /= 100.0f;
+               else
+               {
+                       /* Default samplefract if the value is bogus */
+                       samplefract = 0.1f;
+               }
+       }
+       else
+       {
+               /* Default samplefract if we didn't obtain a non-null Const */
+               samplefract = 0.1f;
+       }
  
-       sampler->lt = tupoffset;
+       /* We'll visit a sample of the pages ... */
+       *pages = clamp_row_est(baserel->pages * samplefract);
  
-       PG_RETURN_UINT16(tupoffset);
+       /* ... and hopefully get a representative number of tuples from them */
+       *tuples = clamp_row_est(baserel->tuples * samplefract);
  }
  
  /*
- * Cleanup method.
+ * Initialize during executor setup.
   */
-Datum
-tsm_system_end(PG_FUNCTION_ARGS)
+static void
+system_initsamplescan(SampleScanState *node, int eflags)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-
-       pfree(tsdesc->tsmdata);
-
-       PG_RETURN_VOID();
+       node->tsm_state = palloc0(sizeof(SystemSamplerData));
  }
  
  /*
- * Reset state (called by ReScan).
+ * Examine parameters and prepare for a sample scan.
   */
-Datum
-tsm_system_reset(PG_FUNCTION_ARGS)
+static void
+system_beginsamplescan(SampleScanState *node,
+                                          Datum *params,
+                                          int nparams,
+                                          uint32 seed)
  {
-       TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-       SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+       SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+       double          percent = DatumGetFloat4(params[0]);
  
+       if (percent < 0 || percent > 100 || isnan(percent))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                                errmsg("sample percentage must be between 0 and 100")));
+
+       /*
+        * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+        * store that as a uint64, of course.  Note that this gives strictly
+        * correct behavior at the limits of zero or one probability.
+        */
+       sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+       sampler->seed = seed;
+       sampler->nextblock = 0;
         sampler->lt = InvalidOffsetNumber;
-       BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
-                                         sampler->seed);
  
-       PG_RETURN_VOID();
+       /*
+        * Bulkread buffer access strategy probably makes sense unless we're
+        * scanning a very small fraction of the table.  The 1% cutoff here is a
+        * guess.  We should use pagemode visibility checking, since we scan all
+        * tuples on each selected page.
+        */
+       node->use_bulkread = (percent >= 1);
+       node->use_pagemode = true;
  }
  
  /*
- * Costing function.
+ * Select next block to sample.
   */
-Datum
-tsm_system_cost(PG_FUNCTION_ARGS)
+static BlockNumber
+system_nextsampleblock(SampleScanState *node)
  {
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-       Path       *path = (Path *) PG_GETARG_POINTER(1);
-       RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-       List       *args = (List *) PG_GETARG_POINTER(3);
-       BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-       double     *tuples = (double *) PG_GETARG_POINTER(5);
-       Node       *pctnode;
-       float4          samplesize;
+       SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+       HeapScanDesc scan = node->ss.ss_currentScanDesc;
+       BlockNumber nextblock = sampler->nextblock;
+       uint32          hashinput[2];
+
+       /*
+        * We compute the hash by applying hash_any to an array of 2 uint32's
+        * containing the block number and seed.  This is efficient to set up, and
+        * with the current implementation of hash_any, it gives
+        * machine-independent results, which is a nice property for regression
+        * testing.
+        *
+        * These words in the hash input are the same throughout the block:
+        */
+       hashinput[1] = sampler->seed;
+
+       /*
+        * Loop over block numbers until finding suitable block or reaching end of
+        * relation.
+        */
+       for (; nextblock < scan->rs_nblocks; nextblock++)
+       {
+               uint32          hash;
  
-       pctnode = linitial(args);
-       pctnode = estimate_expression_value(root, pctnode);
+               hashinput[0] = nextblock;
  
-       if (IsA(pctnode, RelabelType))
-               pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+               hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+                                                                          (int) sizeof(hashinput)));
+               if (hash < sampler->cutoff)
+                       break;
+       }
  
-       if (IsA(pctnode, Const))
+       if (nextblock < scan->rs_nblocks)
         {
-               samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
-               samplesize /= 100.0;
+               /* Found a suitable block; remember where we should start next time */
+               sampler->nextblock = nextblock + 1;
+               return nextblock;
         }
+
+       /* Done, but let's reset nextblock to 0 for safety. */
+       sampler->nextblock = 0;
+       return InvalidBlockNumber;
+}
+
+/*
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists); nodeSamplescan.c will deal with that.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+static OffsetNumber
+system_nextsampletuple(SampleScanState *node,
+                                          BlockNumber blockno,
+                                          OffsetNumber maxoffset)
+{
+       SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+       OffsetNumber tupoffset = sampler->lt;
+
+       /* Advance to next possible offset on page */
+       if (tupoffset == InvalidOffsetNumber)
+               tupoffset = FirstOffsetNumber;
         else
-       {
-               /* Default samplesize if the estimation didn't return Const. */
-               samplesize = 0.1f;
-       }
+               tupoffset++;
  
-       *pages = baserel->pages * samplesize;
-       *tuples = path->rows * samplesize;
-       path->rows = *tuples;
+       /* Done? */
+       if (tupoffset > maxoffset)
+               tupoffset = InvalidOffsetNumber;
+
+       sampler->lt = tupoffset;
  
-       PG_RETURN_VOID();
+       return tupoffset;
  }
diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c

index f21d42c8e38ca04b82579967f54c613fd84290ee..b8ad7ced743cba99021c4752fc8131aa46c99789 100644 (file)
--- a/src/backend/access/tablesample/tablesample.c
+++ b/src/backend/access/tablesample/tablesample.c
@@ -1,7 +1,7 @@
  /*-------------------------------------------------------------------------
   *
   * tablesample.c
- *               TABLESAMPLE internal API
+ *               Support functions for TABLESAMPLE feature
   *
   * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
@@ -10,356 +10,31 @@
   * IDENTIFICATION
   *               src/backend/access/tablesample/tablesample.c
   *
- * TABLESAMPLE is the SQL standard clause for sampling the relations.
- *
- * The API is interface between the Executor and the TABLESAMPLE Methods.
- *
- * TABLESAMPLE Methods are implementations of actual sampling algorithms which
- * can be used for returning a sample of the source relation.
- * Methods don't read the table directly but are asked for block number and
- * tuple offset which they want to examine (or return) and the tablesample
- * interface implemented here does the reading for them.
- *
- * We currently only support sampling of the physical relations, but in the
- * future we might extend the API to support subqueries as well.
- *
   * -------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
-#include "access/tablesample.h"
-
-#include "catalog/pg_tablesample_method.h"
-#include "miscadmin.h"
-#include "pgstat.h"
-#include "storage/bufmgr.h"
-#include "storage/predicate.h"
-#include "utils/rel.h"
-#include "utils/tqual.h"
-
-
-static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan);
-
-
-/*
- * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method.
- */
-TableSampleDesc *
-tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample)
-{
-       FunctionCallInfoData fcinfo;
-       int                     i;
-       List       *args = tablesample->args;
-       ListCell   *arg;
-       ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
-       TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc));
-
-       /* Load functions */
-       fmgr_info(tablesample->tsminit, &(tsdesc->tsminit));
-       fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock));
-       fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple));
-       if (OidIsValid(tablesample->tsmexaminetuple))
-               fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple));
-       else
-               tsdesc->tsmexaminetuple.fn_oid = InvalidOid;
-       fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset));
-       fmgr_info(tablesample->tsmend, &(tsdesc->tsmend));
-
-       InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit,
-                                                        list_length(args) + 2,
-                                                        InvalidOid, NULL, NULL);
-
-       tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
-       tsdesc->heapScan = scanstate->ss.ss_currentScanDesc;
-
-       /* First argument for init function is always TableSampleDesc */
-       fcinfo.arg[0] = PointerGetDatum(tsdesc);
-       fcinfo.argnull[0] = false;
+#include "access/tsmapi.h"
  
-       /*
-        * Second arg for init function is always REPEATABLE.
-        *
-        * If tablesample->repeatable is NULL then REPEATABLE clause was not
-        * specified, and we insert a random value as default.
-        *
-        * When specified, the expression cannot evaluate to NULL.
-        */
-       if (tablesample->repeatable)
-       {
-               ExprState  *argstate = ExecInitExpr((Expr *) tablesample->repeatable,
-                                                                                       (PlanState *) scanstate);
-
-               fcinfo.arg[1] = ExecEvalExpr(argstate, econtext,
-                                                                        &fcinfo.argnull[1], NULL);
-               if (fcinfo.argnull[1])
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
-                               errmsg("REPEATABLE clause must be NOT NULL numeric value")));
-       }
-       else
-       {
-               fcinfo.arg[1] = UInt32GetDatum(random());
-               fcinfo.argnull[1] = false;
-       }
-
-       /* Rest of the arguments come from user. */
-       i = 2;
-       foreach(arg, args)
-       {
-               Expr       *argexpr = (Expr *) lfirst(arg);
-               ExprState  *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate);
-
-               fcinfo.arg[i] = ExecEvalExpr(argstate, econtext,
-                                                                        &fcinfo.argnull[i], NULL);
-               i++;
-       }
-       Assert(i == fcinfo.nargs);
-
-       (void) FunctionCallInvoke(&fcinfo);
-
-       return tsdesc;
-}
  
  /*
- * Get next tuple from TABLESAMPLE Method.
- */
-HeapTuple
-tablesample_getnext(TableSampleDesc *desc)
-{
-       HeapScanDesc scan = desc->heapScan;
-       HeapTuple       tuple = &(scan->rs_ctup);
-       bool            pagemode = scan->rs_pageatatime;
-       BlockNumber blockno;
-       Page            page;
-       bool            page_all_visible;
-       ItemId          itemid;
-       OffsetNumber tupoffset,
-                               maxoffset;
-
-       if (!scan->rs_inited)
-       {
-               /*
-                * return null immediately if relation is empty
-                */
-               if (scan->rs_nblocks == 0)
-               {
-                       Assert(!BufferIsValid(scan->rs_cbuf));
-                       tuple->t_data = NULL;
-                       return NULL;
-               }
-               blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
-                                                                                         PointerGetDatum(desc)));
-               if (!BlockNumberIsValid(blockno))
-               {
-                       tuple->t_data = NULL;
-                       return NULL;
-               }
-
-               heapgetpage(scan, blockno);
-               scan->rs_inited = true;
-       }
-       else
-       {
-               /* continue from previously returned page/tuple */
-               blockno = scan->rs_cblock;              /* current page */
-       }
-
-       /*
-        * When pagemode is disabled, the scan will do visibility checks for each
-        * tuple it finds so the buffer needs to be locked.
-        */
-       if (!pagemode)
-               LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-       page = (Page) BufferGetPage(scan->rs_cbuf);
-       page_all_visible = PageIsAllVisible(page);
-       maxoffset = PageGetMaxOffsetNumber(page);
-
-       for (;;)
-       {
-               CHECK_FOR_INTERRUPTS();
-
-               tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple,
-                                                                                                PointerGetDatum(desc),
-                                                                                                UInt32GetDatum(blockno),
-                                                                                                UInt16GetDatum(maxoffset)));
-
-               if (OffsetNumberIsValid(tupoffset))
-               {
-                       bool            visible;
-                       bool            found;
-
-                       /* Skip invalid tuple pointers. */
-                       itemid = PageGetItemId(page, tupoffset);
-                       if (!ItemIdIsNormal(itemid))
-                               continue;
-
-                       tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid);
-                       tuple->t_len = ItemIdGetLength(itemid);
-                       ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
-
-                       if (page_all_visible)
-                               visible = true;
-                       else
-                               visible = SampleTupleVisible(tuple, tupoffset, scan);
-
-                       /*
-                        * Let the sampling method examine the actual tuple and decide if
-                        * we should return it.
-                        *
-                        * Note that we let it examine even invisible tuples for
-                        * statistical purposes, but not return them since user should
-                        * never see invisible tuples.
-                        */
-                       if (OidIsValid(desc->tsmexaminetuple.fn_oid))
-                       {
-                               found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple,
-                                                                                                  PointerGetDatum(desc),
-                                                                                                  UInt32GetDatum(blockno),
-                                                                                                  PointerGetDatum(tuple),
-                                                                                                  BoolGetDatum(visible)));
-                               /* Should not happen if sampling method is well written. */
-                               if (found && !visible)
-                                       elog(ERROR, "Sampling method wanted to return invisible tuple");
-                       }
-                       else
-                               found = visible;
-
-                       /* Found visible tuple, return it. */
-                       if (found)
-                       {
-                               if (!pagemode)
-                                       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-                               break;
-                       }
-                       else
-                       {
-                               /* Try next tuple from same page. */
-                               continue;
-                       }
-               }
-
-
-               if (!pagemode)
-                       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-
-               blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
-                                                                                         PointerGetDatum(desc)));
-
-               /*
-                * Report our new scan position for synchronization purposes. We don't
-                * do that when moving backwards, however. That would just mess up any
-                * other forward-moving scanners.
-                *
-                * Note: we do this before checking for end of scan so that the final
-                * state of the position hint is back at the start of the rel.  That's
-                * not strictly necessary, but otherwise when you run the same query
-                * multiple times the starting position would shift a little bit
-                * backwards on every invocation, which is confusing. We don't
-                * guarantee any specific ordering in general, though.
-                */
-               if (scan->rs_syncscan)
-                       ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ?
-                                                          blockno : scan->rs_startblock);
-
-               /*
-                * Reached end of scan.
-                */
-               if (!BlockNumberIsValid(blockno))
-               {
-                       if (BufferIsValid(scan->rs_cbuf))
-                               ReleaseBuffer(scan->rs_cbuf);
-                       scan->rs_cbuf = InvalidBuffer;
-                       scan->rs_cblock = InvalidBlockNumber;
-                       tuple->t_data = NULL;
-                       scan->rs_inited = false;
-                       return NULL;
-               }
-
-               heapgetpage(scan, blockno);
-
-               if (!pagemode)
-                       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-               page = (Page) BufferGetPage(scan->rs_cbuf);
-               page_all_visible = PageIsAllVisible(page);
-               maxoffset = PageGetMaxOffsetNumber(page);
-       }
-
-       pgstat_count_heap_getnext(scan->rs_rd);
-
-       return &(scan->rs_ctup);
-}
-
-/*
- * Reset the sampling to starting state
- */
-void
-tablesample_reset(TableSampleDesc *desc)
-{
-       (void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc));
-}
-
-/*
- * Signal the sampling method that the scan has finished.
- */
-void
-tablesample_end(TableSampleDesc *desc)
-{
-       (void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc));
-}
-
-/*
- * Check visibility of the tuple.
+ * GetTsmRoutine --- get a TsmRoutine struct by invoking the handler.
+ *
+ * This is a convenience routine that's just meant to check for errors.
   */
-static bool
-SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+TsmRoutine *
+GetTsmRoutine(Oid tsmhandler)
  {
-       /*
-        * If this scan is reading whole pages at a time, there is already
-        * visibility info present in rs_vistuples so we can just search it for
-        * the tupoffset.
-        */
-       if (scan->rs_pageatatime)
-       {
-               int                     start = 0,
-                                       end = scan->rs_ntuples - 1;
-
-               /*
-                * Do the binary search over rs_vistuples, it's already sorted by
-                * OffsetNumber so we don't need to do any sorting ourselves here.
-                *
-                * We could use bsearch() here but it's slower for integers because of
-                * the function call overhead and because it needs boiler plate code
-                * it would not save us anything code-wise anyway.
-                */
-               while (start <= end)
-               {
-                       int                     mid = start + (end - start) / 2;
-                       OffsetNumber curoffset = scan->rs_vistuples[mid];
-
-                       if (curoffset == tupoffset)
-                               return true;
-                       else if (curoffset > tupoffset)
-                               end = mid - 1;
-                       else
-                               start = mid + 1;
-               }
-
-               return false;
-       }
-       else
-       {
-               /* No pagemode, we have to check the tuple itself. */
-               Snapshot        snapshot = scan->rs_snapshot;
-               Buffer          buffer = scan->rs_cbuf;
+       Datum           datum;
+       TsmRoutine *routine;
  
-               bool            visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
+       datum = OidFunctionCall1(tsmhandler, PointerGetDatum(NULL));
+       routine = (TsmRoutine *) DatumGetPointer(datum);
  
-               CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer,
-                                                                               snapshot);
+       if (routine == NULL || !IsA(routine, TsmRoutine))
+               elog(ERROR, "tablesample handler function %u did not return a TsmRoutine struct",
+                        tsmhandler);
  
-               return visible;
-       }
+       return routine;
  }
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile

index 3d1139b5ba0bfb7e41041fedc1adf42f022e41ed..25130ecf124805565f61c17045c9589445c10e8c 100644 (file)
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -40,8 +40,9 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
         pg_ts_parser.h pg_ts_template.h pg_extension.h \
         pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
         pg_foreign_table.h pg_policy.h pg_replication_origin.h \
-       pg_tablesample_method.h pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
-       pg_collation.h pg_range.h pg_transform.h toasting.h indexing.h \
+       pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
+       pg_collation.h pg_range.h pg_transform.h \
+       toasting.h indexing.h \
      )
  
  # location of Catalog.pm
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c

index 5d7c441739cec7a45090bab0f331c0ad2fc130c5..90b1cd835f89edad6200872360d4dabee620ce37 100644 (file)
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -1911,6 +1911,14 @@ find_expr_references_walker(Node *node,
                                                                    context->addrs);
                 }
         }
+       else if (IsA(node, TableSampleClause))
+       {
+               TableSampleClause *tsc = (TableSampleClause *) node;
+
+               add_object_address(OCLASS_PROC, tsc->tsmhandler, 0,
+                                                  context->addrs);
+               /* fall through to examine arguments */
+       }
  
         return expression_tree_walker(node, find_expr_references_walker,
                                                                   (void *) context);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c

index 0d1ecc2a3edbb85276c3e707ad7b90840e5fd35f..5d06fa4ea65c4a751c38daaefb05b032a0b7aaca 100644 (file)
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -96,6 +96,8 @@ static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
                                          List *ancestors, ExplainState *es);
  static void show_sortorder_options(StringInfo buf, Node *sortexpr,
                                            Oid sortOperator, Oid collation, bool nullsFirst);
+static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+                                List *ancestors, ExplainState *es);
  static void show_sort_info(SortState *sortstate, ExplainState *es);
  static void show_hash_info(HashState *hashstate, ExplainState *es);
  static void show_tidbitmap_info(BitmapHeapScanState *planstate,
@@ -116,7 +118,7 @@ static void ExplainMemberNodes(List *plans, PlanState **planstates,
  static void ExplainSubPlans(List *plans, List *ancestors,
                                 const char *relationship, ExplainState *es);
  static void ExplainCustomChildren(CustomScanState *css,
-                                                                 List *ancestors, ExplainState *es);
+                                         List *ancestors, ExplainState *es);
  static void ExplainProperty(const char *qlabel, const char *value,
                                 bool numeric, ExplainState *es);
  static void ExplainOpenGroup(const char *objtype, const char *labelname,
@@ -730,6 +732,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
         switch (nodeTag(plan))
         {
                 case T_SeqScan:
+               case T_SampleScan:
                 case T_IndexScan:
                 case T_IndexOnlyScan:
                 case T_BitmapHeapScan:
@@ -739,7 +742,6 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
                 case T_ValuesScan:
                 case T_CteScan:
                 case T_WorkTableScan:
-               case T_SampleScan:
                         *rels_used = bms_add_member(*rels_used,
                                                                                 ((Scan *) plan)->scanrelid);
                         break;
@@ -935,6 +937,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
                 case T_SeqScan:
                         pname = sname = "Seq Scan";
                         break;
+               case T_SampleScan:
+                       pname = sname = "Sample Scan";
+                       break;
                 case T_IndexScan:
                         pname = sname = "Index Scan";
                         break;
@@ -976,23 +981,6 @@ ExplainNode(PlanState *planstate, List *ancestors,
                         else
                                 pname = sname;
                         break;
-               case T_SampleScan:
-                       {
-                               /*
-                                * Fetch the tablesample method name from RTE.
-                                *
-                                * It would be nice to also show parameters, but since we
-                                * support arbitrary expressions as parameter it might get
-                                * quite messy.
-                                */
-                               RangeTblEntry *rte;
-
-                               rte = rt_fetch(((SampleScan *) plan)->scanrelid, es->rtable);
-                               custom_name = get_tablesample_method_name(rte->tablesample->tsmid);
-                               pname = psprintf("Sample Scan (%s)", custom_name);
-                               sname = "Sample Scan";
-                       }
-                       break;
                 case T_Material:
                         pname = sname = "Materialize";
                         break;
@@ -1101,6 +1089,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
         switch (nodeTag(plan))
         {
                 case T_SeqScan:
+               case T_SampleScan:
                 case T_BitmapHeapScan:
                 case T_TidScan:
                 case T_SubqueryScan:
@@ -1115,9 +1104,6 @@ ExplainNode(PlanState *planstate, List *ancestors,
                         if (((Scan *) plan)->scanrelid > 0)
                                 ExplainScanTarget((Scan *) plan, es);
                         break;
-               case T_SampleScan:
-                       ExplainScanTarget((Scan *) plan, es);
-                       break;
                 case T_IndexScan:
                         {
                                 IndexScan  *indexscan = (IndexScan *) plan;
@@ -1363,12 +1349,15 @@ ExplainNode(PlanState *planstate, List *ancestors,
                         if (es->analyze)
                                 show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
                         break;
+               case T_SampleScan:
+                       show_tablesample(((SampleScan *) plan)->tablesample,
+                                                        planstate, ancestors, es);
+                       /* FALL THRU to print additional fields the same as SeqScan */
                 case T_SeqScan:
                 case T_ValuesScan:
                 case T_CteScan:
                 case T_WorkTableScan:
                 case T_SubqueryScan:
-               case T_SampleScan:
                         show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
                         if (plan->qual)
                                 show_instrumentation_count("Rows Removed by Filter", 1,
@@ -2109,6 +2098,72 @@ show_sortorder_options(StringInfo buf, Node *sortexpr,
         }
  }
  
+/*
+ * Show TABLESAMPLE properties
+ */
+static void
+show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+                                List *ancestors, ExplainState *es)
+{
+       List       *context;
+       bool            useprefix;
+       char       *method_name;
+       List       *params = NIL;
+       char       *repeatable;
+       ListCell   *lc;
+
+       /* Set up deparsing context */
+       context = set_deparse_context_planstate(es->deparse_cxt,
+                                                                                       (Node *) planstate,
+                                                                                       ancestors);
+       useprefix = list_length(es->rtable) > 1;
+
+       /* Get the tablesample method name */
+       method_name = get_func_name(tsc->tsmhandler);
+
+       /* Deparse parameter expressions */
+       foreach(lc, tsc->args)
+       {
+               Node       *arg = (Node *) lfirst(lc);
+
+               params = lappend(params,
+                                                deparse_expression(arg, context,
+                                                                                       useprefix, false));
+       }
+       if (tsc->repeatable)
+               repeatable = deparse_expression((Node *) tsc->repeatable, context,
+                                                                               useprefix, false);
+       else
+               repeatable = NULL;
+
+       /* Print results */
+       if (es->format == EXPLAIN_FORMAT_TEXT)
+       {
+               bool            first = true;
+
+               appendStringInfoSpaces(es->str, es->indent * 2);
+               appendStringInfo(es->str, "Sampling: %s (", method_name);
+               foreach(lc, params)
+               {
+                       if (!first)
+                               appendStringInfoString(es->str, ", ");
+                       appendStringInfoString(es->str, (const char *) lfirst(lc));
+                       first = false;
+               }
+               appendStringInfoChar(es->str, ')');
+               if (repeatable)
+                       appendStringInfo(es->str, " REPEATABLE (%s)", repeatable);
+               appendStringInfoChar(es->str, '\n');
+       }
+       else
+       {
+               ExplainPropertyText("Sampling Method", method_name, es);
+               ExplainPropertyList("Sampling Parameters", params, es);
+               if (repeatable)
+                       ExplainPropertyText("Repeatable Seed", repeatable, es);
+       }
+}
+
  /*
   * If it's EXPLAIN ANALYZE, show tuplesort stats for a sort node
   */
@@ -2366,13 +2421,13 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
         switch (nodeTag(plan))
         {
                 case T_SeqScan:
+               case T_SampleScan:
                 case T_IndexScan:
                 case T_IndexOnlyScan:
                 case T_BitmapHeapScan:
                 case T_TidScan:
                 case T_ForeignScan:
                 case T_CustomScan:
-               case T_SampleScan:
                 case T_ModifyTable:
                         /* Assert it's on a real relation */
                         Assert(rte->rtekind == RTE_RELATION);
@@ -2663,9 +2718,9 @@ ExplainCustomChildren(CustomScanState *css, List *ancestors, ExplainState *es)
  {
         ListCell   *cell;
         const char *label =
-               (list_length(css->custom_ps) != 1 ? "children" : "child");
+       (list_length(css->custom_ps) != 1 ? "children" : "child");
  
-       foreach (cell, css->custom_ps)
+       foreach(cell, css->custom_ps)
                 ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, es);
  }
  
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c

index 04073d3f9f916f23a750ad0ce2c45e5b0169b802..93e1e9a691c507b08aa58beddbcb74c66a1a8501 100644 (file)
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -463,6 +463,10 @@ ExecSupportsBackwardScan(Plan *node)
                 case T_CteScan:
                         return TargetListSupportsBackwardScan(node->targetlist);
  
+               case T_SampleScan:
+                       /* Simplify life for tablesample methods by disallowing this */
+                       return false;
+
                 case T_IndexScan:
                         return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) &&
                                 TargetListSupportsBackwardScan(node->targetlist);
@@ -485,9 +489,6 @@ ExecSupportsBackwardScan(Plan *node)
                         }
                         return false;
  
-               case T_SampleScan:
-                       return false;
-
                 case T_Material:
                 case T_Sort:
                         /* these don't evaluate tlist */
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c

index 4c1c5237b7d203c5bd19f48375d87586980776d5..dbe84b0baa86886be548194b2630e6f39497293b 100644 (file)
--- a/src/backend/executor/nodeSamplescan.c
+++ b/src/backend/executor/nodeSamplescan.c
@@ -3,7 +3,7 @@
   * nodeSamplescan.c
   *       Support routines for sample scans of relations (table sampling).
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
@@ -14,22 +14,23 @@
   */
  #include "postgres.h"
  
-#include "access/tablesample.h"
+#include "access/hash.h"
+#include "access/relscan.h"
+#include "access/tsmapi.h"
  #include "executor/executor.h"
  #include "executor/nodeSamplescan.h"
  #include "miscadmin.h"
-#include "parser/parsetree.h"
  #include "pgstat.h"
-#include "storage/bufmgr.h"
  #include "storage/predicate.h"
  #include "utils/rel.h"
-#include "utils/syscache.h"
  #include "utils/tqual.h"
  
-static void InitScanRelation(SampleScanState *node, EState *estate,
-                                int eflags, TableSampleClause *tablesample);
+static void InitScanRelation(SampleScanState *node, EState *estate, int eflags);
  static TupleTableSlot *SampleNext(SampleScanState *node);
-
+static void tablesample_init(SampleScanState *scanstate);
+static HeapTuple tablesample_getnext(SampleScanState *scanstate);
+static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
+                                  HeapScanDesc scan);
  
  /* ----------------------------------------------------------------
   *                                             Scan Support
@@ -45,23 +46,26 @@ static TupleTableSlot *SampleNext(SampleScanState *node);
  static TupleTableSlot *
  SampleNext(SampleScanState *node)
  {
-       TupleTableSlot *slot;
-       TableSampleDesc *tsdesc;
         HeapTuple       tuple;
+       TupleTableSlot *slot;
  
         /*
-        * get information from the scan state
+        * if this is first call within a scan, initialize
          */
-       slot = node->ss.ss_ScanTupleSlot;
-       tsdesc = node->tsdesc;
+       if (!node->begun)
+               tablesample_init(node);
+
+       /*
+        * get the next tuple, and store it in our result slot
+        */
+       tuple = tablesample_getnext(node);
  
-       tuple = tablesample_getnext(tsdesc);
+       slot = node->ss.ss_ScanTupleSlot;
  
         if (tuple)
                 ExecStoreTuple(tuple,   /* tuple to store */
                                            slot,        /* slot to store in */
-                                          tsdesc->heapScan->rs_cbuf,           /* buffer associated
-                                                                                                                * with this tuple */
+                                          node->ss.ss_currentScanDesc->rs_cbuf,        /* tuple's buffer */
                                            false);      /* don't pfree this pointer */
         else
                 ExecClearTuple(slot);
@@ -75,7 +79,10 @@ SampleNext(SampleScanState *node)
  static bool
  SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
  {
-       /* No need to recheck for SampleScan */
+       /*
+        * No need to recheck for SampleScan, since like SeqScan we don't pass any
+        * checkable keys to heap_beginscan.
+        */
         return true;
  }
  
@@ -103,8 +110,7 @@ ExecSampleScan(SampleScanState *node)
   * ----------------------------------------------------------------
   */
  static void
-InitScanRelation(SampleScanState *node, EState *estate, int eflags,
-                                TableSampleClause *tablesample)
+InitScanRelation(SampleScanState *node, EState *estate, int eflags)
  {
         Relation        currentRelation;
  
@@ -113,19 +119,13 @@ InitScanRelation(SampleScanState *node, EState *estate, int eflags,
          * open that relation and acquire appropriate lock on it.
          */
         currentRelation = ExecOpenScanRelation(estate,
-                                                               ((SampleScan *) node->ss.ps.plan)->scanrelid,
+                                                  ((SampleScan *) node->ss.ps.plan)->scan.scanrelid,
                                                                                    eflags);
  
         node->ss.ss_currentRelation = currentRelation;
  
-       /*
-        * Even though we aren't going to do a conventional seqscan, it is useful
-        * to create a HeapScanDesc --- many of the fields in it are usable.
-        */
-       node->ss.ss_currentScanDesc =
-               heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL,
-                                                               tablesample->tsmseqscan,
-                                                               tablesample->tsmpagemode);
+       /* we won't set up the HeapScanDesc till later */
+       node->ss.ss_currentScanDesc = NULL;
  
         /* and report the scan tuple slot's rowtype */
         ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
@@ -140,12 +140,11 @@ SampleScanState *
  ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
  {
         SampleScanState *scanstate;
-       RangeTblEntry *rte = rt_fetch(node->scanrelid,
-                                                                 estate->es_range_table);
+       TableSampleClause *tsc = node->tablesample;
+       TsmRoutine *tsm;
  
         Assert(outerPlan(node) == NULL);
         Assert(innerPlan(node) == NULL);
-       Assert(rte->tablesample != NULL);
  
         /*
          * create state structure
@@ -165,10 +164,17 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
          * initialize child expressions
          */
         scanstate->ss.ps.targetlist = (List *)
-               ExecInitExpr((Expr *) node->plan.targetlist,
+               ExecInitExpr((Expr *) node->scan.plan.targetlist,
                                          (PlanState *) scanstate);
         scanstate->ss.ps.qual = (List *)
-               ExecInitExpr((Expr *) node->plan.qual,
+               ExecInitExpr((Expr *) node->scan.plan.qual,
+                                        (PlanState *) scanstate);
+
+       scanstate->args = (List *)
+               ExecInitExpr((Expr *) tsc->args,
+                                        (PlanState *) scanstate);
+       scanstate->repeatable =
+               ExecInitExpr(tsc->repeatable,
                                          (PlanState *) scanstate);
  
         /*
@@ -180,7 +186,7 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
         /*
          * initialize scan relation
          */
-       InitScanRelation(scanstate, estate, eflags, rte->tablesample);
+       InitScanRelation(scanstate, estate, eflags);
  
         scanstate->ss.ps.ps_TupFromTlist = false;
  
@@ -190,7 +196,25 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
         ExecAssignResultTypeFromTL(&scanstate->ss.ps);
         ExecAssignScanProjectionInfo(&scanstate->ss);
  
-       scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample);
+       /*
+        * If we don't have a REPEATABLE clause, select a random seed.  We want to
+        * do this just once, since the seed shouldn't change over rescans.
+        */
+       if (tsc->repeatable == NULL)
+               scanstate->seed = random();
+
+       /*
+        * Finally, initialize the TABLESAMPLE method handler.
+        */
+       tsm = GetTsmRoutine(tsc->tsmhandler);
+       scanstate->tsmroutine = tsm;
+       scanstate->tsm_state = NULL;
+
+       if (tsm->InitSampleScan)
+               tsm->InitSampleScan(scanstate, eflags);
+
+       /* We'll do BeginSampleScan later; we can't evaluate params yet */
+       scanstate->begun = false;
  
         return scanstate;
  }
@@ -207,7 +231,8 @@ ExecEndSampleScan(SampleScanState *node)
         /*
          * Tell sampling function that we finished the scan.
          */
-       tablesample_end(node->tsdesc);
+       if (node->tsmroutine->EndSampleScan)
+               node->tsmroutine->EndSampleScan(node);
  
         /*
          * Free the exprcontext
@@ -223,7 +248,8 @@ ExecEndSampleScan(SampleScanState *node)
         /*
          * close heap scan
          */
-       heap_endscan(node->ss.ss_currentScanDesc);
+       if (node->ss.ss_currentScanDesc)
+               heap_endscan(node->ss.ss_currentScanDesc);
  
         /*
          * close the heap relation.
@@ -231,11 +257,6 @@ ExecEndSampleScan(SampleScanState *node)
         ExecCloseScanRelation(node->ss.ss_currentRelation);
  }
  
-/* ----------------------------------------------------------------
- *                                             Join Support
- * ----------------------------------------------------------------
- */
-
  /* ----------------------------------------------------------------
   *             ExecReScanSampleScan
   *
@@ -246,12 +267,336 @@ ExecEndSampleScan(SampleScanState *node)
  void
  ExecReScanSampleScan(SampleScanState *node)
  {
-       heap_rescan(node->ss.ss_currentScanDesc, NULL);
+       /* Remember we need to do BeginSampleScan again (if we did it at all) */
+       node->begun = false;
+
+       ExecScanReScan(&node->ss);
+}
+
+
+/*
+ * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
+ */
+static void
+tablesample_init(SampleScanState *scanstate)
+{
+       TsmRoutine *tsm = scanstate->tsmroutine;
+       ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
+       Datum      *params;
+       Datum           datum;
+       bool            isnull;
+       uint32          seed;
+       bool            allow_sync;
+       int                     i;
+       ListCell   *arg;
+
+       params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
+
+       i = 0;
+       foreach(arg, scanstate->args)
+       {
+               ExprState  *argstate = (ExprState *) lfirst(arg);
+
+               params[i] = ExecEvalExprSwitchContext(argstate,
+                                                                                         econtext,
+                                                                                         &isnull,
+                                                                                         NULL);
+               if (isnull)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                                        errmsg("TABLESAMPLE parameter cannot be null")));
+               i++;
+       }
+
+       if (scanstate->repeatable)
+       {
+               datum = ExecEvalExprSwitchContext(scanstate->repeatable,
+                                                                                 econtext,
+                                                                                 &isnull,
+                                                                                 NULL);
+               if (isnull)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
+                                errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
+
+               /*
+                * The REPEATABLE parameter has been coerced to float8 by the parser.
+                * The reason for using float8 at the SQL level is that it will
+                * produce unsurprising results both for users used to databases that
+                * accept only integers in the REPEATABLE clause and for those who
+                * might expect that REPEATABLE works like setseed() (a float in the
+                * range from -1 to 1).
+                *
+                * We use hashfloat8() to convert the supplied value into a suitable
+                * seed.  For regression-testing purposes, that has the convenient
+                * property that REPEATABLE(0) gives a machine-independent result.
+                */
+               seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
+       }
+       else
+       {
+               /* Use the seed selected by ExecInitSampleScan */
+               seed = scanstate->seed;
+       }
+
+       /* Set default values for params that BeginSampleScan can adjust */
+       scanstate->use_bulkread = true;
+       scanstate->use_pagemode = true;
+
+       /* Let tablesample method do its thing */
+       tsm->BeginSampleScan(scanstate,
+                                                params,
+                                                list_length(scanstate->args),
+                                                seed);
+
+       /* We'll use syncscan if there's no NextSampleBlock function */
+       allow_sync = (tsm->NextSampleBlock == NULL);
+
+       /* Now we can create or reset the HeapScanDesc */
+       if (scanstate->ss.ss_currentScanDesc == NULL)
+       {
+               scanstate->ss.ss_currentScanDesc =
+                       heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
+                                                                       scanstate->ss.ps.state->es_snapshot,
+                                                                       0, NULL,
+                                                                       scanstate->use_bulkread,
+                                                                       allow_sync,
+                                                                       scanstate->use_pagemode);
+       }
+       else
+       {
+               heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
+                                                          scanstate->use_bulkread,
+                                                          allow_sync,
+                                                          scanstate->use_pagemode);
+       }
+
+       pfree(params);
+
+       /* And we're initialized. */
+       scanstate->begun = true;
+}
+
+/*
+ * Get next tuple from TABLESAMPLE method.
+ *
+ * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
+ * perhaps be better to refactor to share more code.
+ */
+static HeapTuple
+tablesample_getnext(SampleScanState *scanstate)
+{
+       TsmRoutine *tsm = scanstate->tsmroutine;
+       HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
+       HeapTuple       tuple = &(scan->rs_ctup);
+       Snapshot        snapshot = scan->rs_snapshot;
+       bool            pagemode = scan->rs_pageatatime;
+       BlockNumber blockno;
+       Page            page;
+       bool            all_visible;
+       OffsetNumber maxoffset;
+
+       if (!scan->rs_inited)
+       {
+               /*
+                * return null immediately if relation is empty
+                */
+               if (scan->rs_nblocks == 0)
+               {
+                       Assert(!BufferIsValid(scan->rs_cbuf));
+                       tuple->t_data = NULL;
+                       return NULL;
+               }
+               if (tsm->NextSampleBlock)
+               {
+                       blockno = tsm->NextSampleBlock(scanstate);
+                       if (!BlockNumberIsValid(blockno))
+                       {
+                               tuple->t_data = NULL;
+                               return NULL;
+                       }
+               }
+               else
+                       blockno = scan->rs_startblock;
+               Assert(blockno < scan->rs_nblocks);
+               heapgetpage(scan, blockno);
+               scan->rs_inited = true;
+       }
+       else
+       {
+               /* continue from previously returned page/tuple */
+               blockno = scan->rs_cblock;              /* current page */
+       }
  
         /*
-        * Tell sampling function to reset its state for rescan.
+        * When not using pagemode, we must lock the buffer during tuple
+        * visibility checks.
          */
-       tablesample_reset(node->tsdesc);
+       if (!pagemode)
+               LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+       page = (Page) BufferGetPage(scan->rs_cbuf);
+       all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+       maxoffset = PageGetMaxOffsetNumber(page);
+
+       for (;;)
+       {
+               OffsetNumber tupoffset;
+               bool            finished;
+
+               CHECK_FOR_INTERRUPTS();
+
+               /* Ask the tablesample method which tuples to check on this page. */
+               tupoffset = tsm->NextSampleTuple(scanstate,
+                                                                                blockno,
+                                                                                maxoffset);
+
+               if (OffsetNumberIsValid(tupoffset))
+               {
+                       ItemId          itemid;
+                       bool            visible;
+
+                       /* Skip invalid tuple pointers. */
+                       itemid = PageGetItemId(page, tupoffset);
+                       if (!ItemIdIsNormal(itemid))
+                               continue;
+
+                       tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+                       tuple->t_len = ItemIdGetLength(itemid);
+                       ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+                       if (all_visible)
+                               visible = true;
+                       else
+                               visible = SampleTupleVisible(tuple, tupoffset, scan);
+
+                       /* in pagemode, heapgetpage did this for us */
+                       if (!pagemode)
+                               CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
+                                                                                               scan->rs_cbuf, snapshot);
+
+                       if (visible)
+                       {
+                               /* Found visible tuple, return it. */
+                               if (!pagemode)
+                                       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+                               break;
+                       }
+                       else
+                       {
+                               /* Try next tuple from same page. */
+                               continue;
+                       }
+               }
+
+               /*
+                * if we get here, it means we've exhausted the items on this page and
+                * it's time to move to the next.
+                */
+               if (!pagemode)
+                       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+               if (tsm->NextSampleBlock)
+               {
+                       blockno = tsm->NextSampleBlock(scanstate);
+                       Assert(!scan->rs_syncscan);
+                       finished = !BlockNumberIsValid(blockno);
+               }
+               else
+               {
+                       /* Without NextSampleBlock, just do a plain forward seqscan. */
+                       blockno++;
+                       if (blockno >= scan->rs_nblocks)
+                               blockno = 0;
+
+                       /*
+                        * Report our new scan position for synchronization purposes.
+                        *
+                        * Note: we do this before checking for end of scan so that the
+                        * final state of the position hint is back at the start of the
+                        * rel.  That's not strictly necessary, but otherwise when you run
+                        * the same query multiple times the starting position would shift
+                        * a little bit backwards on every invocation, which is confusing.
+                        * We don't guarantee any specific ordering in general, though.
+                        */
+                       if (scan->rs_syncscan)
+                               ss_report_location(scan->rs_rd, blockno);
+
+                       finished = (blockno == scan->rs_startblock);
+               }
+
+               /*
+                * Reached end of scan?
+                */
+               if (finished)
+               {
+                       if (BufferIsValid(scan->rs_cbuf))
+                               ReleaseBuffer(scan->rs_cbuf);
+                       scan->rs_cbuf = InvalidBuffer;
+                       scan->rs_cblock = InvalidBlockNumber;
+                       tuple->t_data = NULL;
+                       scan->rs_inited = false;
+                       return NULL;
+               }
+
+               Assert(blockno < scan->rs_nblocks);
+               heapgetpage(scan, blockno);
+
+               /* Re-establish state for new page */
+               if (!pagemode)
+                       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+               page = (Page) BufferGetPage(scan->rs_cbuf);
+               all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+               maxoffset = PageGetMaxOffsetNumber(page);
+       }
+
+       /* Count successfully-fetched tuples as heap fetches */
+       pgstat_count_heap_getnext(scan->rs_rd);
+
+       return &(scan->rs_ctup);
+}
  
-       ExecScanReScan(&node->ss);
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+{
+       if (scan->rs_pageatatime)
+       {
+               /*
+                * In pageatatime mode, heapgetpage() already did visibility checks,
+                * so just look at the info it left in rs_vistuples[].
+                *
+                * We use a binary search over the known-sorted array.  Note: we could
+                * save some effort if we insisted that NextSampleTuple select tuples
+                * in increasing order, but it's not clear that there would be enough
+                * gain to justify the restriction.
+                */
+               int                     start = 0,
+                                       end = scan->rs_ntuples - 1;
+
+               while (start <= end)
+               {
+                       int                     mid = (start + end) / 2;
+                       OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+                       if (tupoffset == curoffset)
+                               return true;
+                       else if (tupoffset < curoffset)
+                               end = mid - 1;
+                       else
+                               start = mid + 1;
+               }
+
+               return false;
+       }
+       else
+       {
+               /* Otherwise, we have to check the tuple individually. */
+               return HeapTupleSatisfiesVisibility(tuple,
+                                                                                       scan->rs_snapshot,
+                                                                                       scan->rs_cbuf);
+       }
  }
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index 6a08c2db211b4e65a103b4aacf5e52c5f41b5adc..7248440ead363a0960b20a2f5b73f8662e4c85d0 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -359,6 +359,27 @@ _copySeqScan(const SeqScan *from)
         return newnode;
  }
  
+/*
+ * _copySampleScan
+ */
+static SampleScan *
+_copySampleScan(const SampleScan *from)
+{
+       SampleScan *newnode = makeNode(SampleScan);
+
+       /*
+        * copy node superclass fields
+        */
+       CopyScanFields((const Scan *) from, (Scan *) newnode);
+
+       /*
+        * copy remainder of node
+        */
+       COPY_NODE_FIELD(tablesample);
+
+       return newnode;
+}
+
  /*
   * _copyIndexScan
   */
@@ -641,22 +662,6 @@ _copyCustomScan(const CustomScan *from)
         return newnode;
  }
  
-/*
- * _copySampleScan
- */
-static SampleScan *
-_copySampleScan(const SampleScan *from)
-{
-       SampleScan *newnode = makeNode(SampleScan);
-
-       /*
-        * copy node superclass fields
-        */
-       CopyScanFields((const Scan *) from, (Scan *) newnode);
-
-       return newnode;
-}
-
  /*
   * CopyJoinFields
   *
@@ -2143,6 +2148,18 @@ _copyRangeTblFunction(const RangeTblFunction *from)
         return newnode;
  }
  
+static TableSampleClause *
+_copyTableSampleClause(const TableSampleClause *from)
+{
+       TableSampleClause *newnode = makeNode(TableSampleClause);
+
+       COPY_SCALAR_FIELD(tsmhandler);
+       COPY_NODE_FIELD(args);
+       COPY_NODE_FIELD(repeatable);
+
+       return newnode;
+}
+
  static WithCheckOption *
  _copyWithCheckOption(const WithCheckOption *from)
  {
@@ -2271,40 +2288,6 @@ _copyCommonTableExpr(const CommonTableExpr *from)
         return newnode;
  }
  
-static RangeTableSample *
-_copyRangeTableSample(const RangeTableSample *from)
-{
-       RangeTableSample *newnode = makeNode(RangeTableSample);
-
-       COPY_NODE_FIELD(relation);
-       COPY_STRING_FIELD(method);
-       COPY_NODE_FIELD(repeatable);
-       COPY_NODE_FIELD(args);
-
-       return newnode;
-}
-
-static TableSampleClause *
-_copyTableSampleClause(const TableSampleClause *from)
-{
-       TableSampleClause *newnode = makeNode(TableSampleClause);
-
-       COPY_SCALAR_FIELD(tsmid);
-       COPY_SCALAR_FIELD(tsmseqscan);
-       COPY_SCALAR_FIELD(tsmpagemode);
-       COPY_SCALAR_FIELD(tsminit);
-       COPY_SCALAR_FIELD(tsmnextblock);
-       COPY_SCALAR_FIELD(tsmnexttuple);
-       COPY_SCALAR_FIELD(tsmexaminetuple);
-       COPY_SCALAR_FIELD(tsmend);
-       COPY_SCALAR_FIELD(tsmreset);
-       COPY_SCALAR_FIELD(tsmcost);
-       COPY_NODE_FIELD(repeatable);
-       COPY_NODE_FIELD(args);
-
-       return newnode;
-}
-
  static A_Expr *
  _copyAExpr(const A_Expr *from)
  {
@@ -2532,6 +2515,20 @@ _copyRangeFunction(const RangeFunction *from)
         return newnode;
  }
  
+static RangeTableSample *
+_copyRangeTableSample(const RangeTableSample *from)
+{
+       RangeTableSample *newnode = makeNode(RangeTableSample);
+
+       COPY_NODE_FIELD(relation);
+       COPY_NODE_FIELD(method);
+       COPY_NODE_FIELD(args);
+       COPY_NODE_FIELD(repeatable);
+       COPY_LOCATION_FIELD(location);
+
+       return newnode;
+}
+
  static TypeCast *
  _copyTypeCast(const TypeCast *from)
  {
@@ -4237,6 +4234,9 @@ copyObject(const void *from)
                 case T_SeqScan:
                         retval = _copySeqScan(from);
                         break;
+               case T_SampleScan:
+                       retval = _copySampleScan(from);
+                       break;
                 case T_IndexScan:
                         retval = _copyIndexScan(from);
                         break;
@@ -4273,9 +4273,6 @@ copyObject(const void *from)
                 case T_CustomScan:
                         retval = _copyCustomScan(from);
                         break;
-               case T_SampleScan:
-                       retval = _copySampleScan(from);
-                       break;
                 case T_Join:
                         retval = _copyJoin(from);
                         break;
@@ -4897,6 +4894,9 @@ copyObject(const void *from)
                 case T_RangeFunction:
                         retval = _copyRangeFunction(from);
                         break;
+               case T_RangeTableSample:
+                       retval = _copyRangeTableSample(from);
+                       break;
                 case T_TypeName:
                         retval = _copyTypeName(from);
                         break;
@@ -4921,6 +4921,9 @@ copyObject(const void *from)
                 case T_RangeTblFunction:
                         retval = _copyRangeTblFunction(from);
                         break;
+               case T_TableSampleClause:
+                       retval = _copyTableSampleClause(from);
+                       break;
                 case T_WithCheckOption:
                         retval = _copyWithCheckOption(from);
                         break;
@@ -4948,12 +4951,6 @@ copyObject(const void *from)
                 case T_CommonTableExpr:
                         retval = _copyCommonTableExpr(from);
                         break;
-               case T_RangeTableSample:
-                       retval = _copyRangeTableSample(from);
-                       break;
-               case T_TableSampleClause:
-                       retval = _copyTableSampleClause(from);
-                       break;
                 case T_FuncWithArgs:
                         retval = _copyFuncWithArgs(from);
                         break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index faf5eedab4ed4b7412970b82621d6a42704d008c..6597dbc33e12f9d7e942eb7fa4ca72a566475989 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2290,6 +2290,18 @@ _equalRangeFunction(const RangeFunction *a, const RangeFunction *b)
         return true;
  }
  
+static bool
+_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
+{
+       COMPARE_NODE_FIELD(relation);
+       COMPARE_NODE_FIELD(method);
+       COMPARE_NODE_FIELD(args);
+       COMPARE_NODE_FIELD(repeatable);
+       COMPARE_LOCATION_FIELD(location);
+
+       return true;
+}
+
  static bool
  _equalIndexElem(const IndexElem *a, const IndexElem *b)
  {
@@ -2428,6 +2440,16 @@ _equalRangeTblFunction(const RangeTblFunction *a, const RangeTblFunction *b)
         return true;
  }
  
+static bool
+_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
+{
+       COMPARE_SCALAR_FIELD(tsmhandler);
+       COMPARE_NODE_FIELD(args);
+       COMPARE_NODE_FIELD(repeatable);
+
+       return true;
+}
+
  static bool
  _equalWithCheckOption(const WithCheckOption *a, const WithCheckOption *b)
  {
@@ -2538,36 +2560,6 @@ _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
         return true;
  }
  
-static bool
-_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
-{
-       COMPARE_NODE_FIELD(relation);
-       COMPARE_STRING_FIELD(method);
-       COMPARE_NODE_FIELD(repeatable);
-       COMPARE_NODE_FIELD(args);
-
-       return true;
-}
-
-static bool
-_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
-{
-       COMPARE_SCALAR_FIELD(tsmid);
-       COMPARE_SCALAR_FIELD(tsmseqscan);
-       COMPARE_SCALAR_FIELD(tsmpagemode);
-       COMPARE_SCALAR_FIELD(tsminit);
-       COMPARE_SCALAR_FIELD(tsmnextblock);
-       COMPARE_SCALAR_FIELD(tsmnexttuple);
-       COMPARE_SCALAR_FIELD(tsmexaminetuple);
-       COMPARE_SCALAR_FIELD(tsmend);
-       COMPARE_SCALAR_FIELD(tsmreset);
-       COMPARE_SCALAR_FIELD(tsmcost);
-       COMPARE_NODE_FIELD(repeatable);
-       COMPARE_NODE_FIELD(args);
-
-       return true;
-}
-
  static bool
  _equalXmlSerialize(const XmlSerialize *a, const XmlSerialize *b)
  {
@@ -3260,6 +3252,9 @@ equal(const void *a, const void *b)
                 case T_RangeFunction:
                         retval = _equalRangeFunction(a, b);
                         break;
+               case T_RangeTableSample:
+                       retval = _equalRangeTableSample(a, b);
+                       break;
                 case T_TypeName:
                         retval = _equalTypeName(a, b);
                         break;
@@ -3284,6 +3279,9 @@ equal(const void *a, const void *b)
                 case T_RangeTblFunction:
                         retval = _equalRangeTblFunction(a, b);
                         break;
+               case T_TableSampleClause:
+                       retval = _equalTableSampleClause(a, b);
+                       break;
                 case T_WithCheckOption:
                         retval = _equalWithCheckOption(a, b);
                         break;
@@ -3311,12 +3309,6 @@ equal(const void *a, const void *b)
                 case T_CommonTableExpr:
                         retval = _equalCommonTableExpr(a, b);
                         break;
-               case T_RangeTableSample:
-                       retval = _equalRangeTableSample(a, b);
-                       break;
-               case T_TableSampleClause:
-                       retval = _equalTableSampleClause(a, b);
-                       break;
                 case T_FuncWithArgs:
                         retval = _equalFuncWithArgs(a, b);
                         break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c

index b1e3e6e489320086dce3500b1418178095e99714..c517dfd9d69c6264ecdd0c4904b8b8337ccea099 100644 (file)
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -1486,6 +1486,9 @@ exprLocation(const Node *expr)
                 case T_WindowDef:
                         loc = ((const WindowDef *) expr)->location;
                         break;
+               case T_RangeTableSample:
+                       loc = ((const RangeTableSample *) expr)->location;
+                       break;
                 case T_TypeName:
                         loc = ((const TypeName *) expr)->location;
                         break;
@@ -1995,6 +1998,17 @@ expression_tree_walker(Node *node,
                         return walker(((PlaceHolderInfo *) node)->ph_var, context);
                 case T_RangeTblFunction:
                         return walker(((RangeTblFunction *) node)->funcexpr, context);
+               case T_TableSampleClause:
+                       {
+                               TableSampleClause *tsc = (TableSampleClause *) node;
+
+                               if (expression_tree_walker((Node *) tsc->args,
+                                                                                  walker, context))
+                                       return true;
+                               if (walker((Node *) tsc->repeatable, context))
+                                       return true;
+                       }
+                       break;
                 default:
                         elog(ERROR, "unrecognized node type: %d",
                                  (int) nodeTag(node));
@@ -2082,13 +2096,8 @@ range_table_walker(List *rtable,
                 switch (rte->rtekind)
                 {
                         case RTE_RELATION:
-                               if (rte->tablesample)
-                               {
-                                       if (walker(rte->tablesample->args, context))
-                                               return true;
-                                       if (walker(rte->tablesample->repeatable, context))
-                                               return true;
-                               }
+                               if (walker(rte->tablesample, context))
+                                       return true;
                                 break;
                         case RTE_CTE:
                                 /* nothing to do */
@@ -2782,6 +2791,17 @@ expression_tree_mutator(Node *node,
                                 return (Node *) newnode;
                         }
                         break;
+               case T_TableSampleClause:
+                       {
+                               TableSampleClause *tsc = (TableSampleClause *) node;
+                               TableSampleClause *newnode;
+
+                               FLATCOPY(newnode, tsc, TableSampleClause);
+                               MUTATE(newnode->args, tsc->args, List *);
+                               MUTATE(newnode->repeatable, tsc->repeatable, Expr *);
+                               return (Node *) newnode;
+                       }
+                       break;
                 default:
                         elog(ERROR, "unrecognized node type: %d",
                                  (int) nodeTag(node));
@@ -2868,20 +2888,12 @@ range_table_mutator(List *rtable,
                 switch (rte->rtekind)
                 {
                         case RTE_RELATION:
-                               if (rte->tablesample)
-                               {
-                                       CHECKFLATCOPY(newrte->tablesample, rte->tablesample,
-                                                                 TableSampleClause);
-                                       MUTATE(newrte->tablesample->args,
-                                                  newrte->tablesample->args,
-                                                  List *);
-                                       MUTATE(newrte->tablesample->repeatable,
-                                                  newrte->tablesample->repeatable,
-                                                  Node *);
-                               }
+                               MUTATE(newrte->tablesample, rte->tablesample,
+                                          TableSampleClause *);
+                               /* we don't bother to copy eref, aliases, etc; OK? */
                                 break;
                         case RTE_CTE:
-                               /* we don't bother to copy eref, aliases, etc; OK? */
+                               /* nothing to do */
                                 break;
                         case RTE_SUBQUERY:
                                 if (!(flags & QTW_IGNORE_RT_SUBQUERIES))
@@ -3316,6 +3328,19 @@ raw_expression_tree_walker(Node *node,
                                         return true;
                         }
                         break;
+               case T_RangeTableSample:
+                       {
+                               RangeTableSample *rts = (RangeTableSample *) node;
+
+                               if (walker(rts->relation, context))
+                                       return true;
+                               /* method name is deemed uninteresting */
+                               if (walker(rts->args, context))
+                                       return true;
+                               if (walker(rts->repeatable, context))
+                                       return true;
+                       }
+                       break;
                 case T_TypeName:
                         {
                                 TypeName   *tn = (TypeName *) node;
@@ -3380,18 +3405,6 @@ raw_expression_tree_walker(Node *node,
                         break;
                 case T_CommonTableExpr:
                         return walker(((CommonTableExpr *) node)->ctequery, context);
-               case T_RangeTableSample:
-                       {
-                               RangeTableSample *rts = (RangeTableSample *) node;
-
-                               if (walker(rts->relation, context))
-                                       return true;
-                               if (walker(rts->repeatable, context))
-                                       return true;
-                               if (walker(rts->args, context))
-                                       return true;
-                       }
-                       break;
                 default:
                         elog(ERROR, "unrecognized node type: %d",
                                  (int) nodeTag(node));
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 87304ba9bf65df548c5361bcf33eb7f45aaa0c83..81725d6e59a20d2e2dfc9efea995202e843afb7b 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -444,6 +444,16 @@ _outSeqScan(StringInfo str, const SeqScan *node)
         _outScanInfo(str, (const Scan *) node);
  }
  
+static void
+_outSampleScan(StringInfo str, const SampleScan *node)
+{
+       WRITE_NODE_TYPE("SAMPLESCAN");
+
+       _outScanInfo(str, (const Scan *) node);
+
+       WRITE_NODE_FIELD(tablesample);
+}
+
  static void
  _outIndexScan(StringInfo str, const IndexScan *node)
  {
@@ -591,14 +601,6 @@ _outCustomScan(StringInfo str, const CustomScan *node)
                 node->methods->TextOutCustomScan(str, node);
  }
  
-static void
-_outSampleScan(StringInfo str, const SampleScan *node)
-{
-       WRITE_NODE_TYPE("SAMPLESCAN");
-
-       _outScanInfo(str, (const Scan *) node);
-}
-
  static void
  _outJoin(StringInfo str, const Join *node)
  {
@@ -2478,36 +2480,6 @@ _outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
         WRITE_NODE_FIELD(ctecolcollations);
  }
  
-static void
-_outRangeTableSample(StringInfo str, const RangeTableSample *node)
-{
-       WRITE_NODE_TYPE("RANGETABLESAMPLE");
-
-       WRITE_NODE_FIELD(relation);
-       WRITE_STRING_FIELD(method);
-       WRITE_NODE_FIELD(repeatable);
-       WRITE_NODE_FIELD(args);
-}
-
-static void
-_outTableSampleClause(StringInfo str, const TableSampleClause *node)
-{
-       WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
-
-       WRITE_OID_FIELD(tsmid);
-       WRITE_BOOL_FIELD(tsmseqscan);
-       WRITE_BOOL_FIELD(tsmpagemode);
-       WRITE_OID_FIELD(tsminit);
-       WRITE_OID_FIELD(tsmnextblock);
-       WRITE_OID_FIELD(tsmnexttuple);
-       WRITE_OID_FIELD(tsmexaminetuple);
-       WRITE_OID_FIELD(tsmend);
-       WRITE_OID_FIELD(tsmreset);
-       WRITE_OID_FIELD(tsmcost);
-       WRITE_NODE_FIELD(repeatable);
-       WRITE_NODE_FIELD(args);
-}
-
  static void
  _outSetOperationStmt(StringInfo str, const SetOperationStmt *node)
  {
@@ -2594,6 +2566,16 @@ _outRangeTblFunction(StringInfo str, const RangeTblFunction *node)
         WRITE_BITMAPSET_FIELD(funcparams);
  }
  
+static void
+_outTableSampleClause(StringInfo str, const TableSampleClause *node)
+{
+       WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
+
+       WRITE_OID_FIELD(tsmhandler);
+       WRITE_NODE_FIELD(args);
+       WRITE_NODE_FIELD(repeatable);
+}
+
  static void
  _outAExpr(StringInfo str, const A_Expr *node)
  {
@@ -2845,6 +2827,18 @@ _outRangeFunction(StringInfo str, const RangeFunction *node)
         WRITE_NODE_FIELD(coldeflist);
  }
  
+static void
+_outRangeTableSample(StringInfo str, const RangeTableSample *node)
+{
+       WRITE_NODE_TYPE("RANGETABLESAMPLE");
+
+       WRITE_NODE_FIELD(relation);
+       WRITE_NODE_FIELD(method);
+       WRITE_NODE_FIELD(args);
+       WRITE_NODE_FIELD(repeatable);
+       WRITE_LOCATION_FIELD(location);
+}
+
  static void
  _outConstraint(StringInfo str, const Constraint *node)
  {
@@ -3002,6 +2996,9 @@ _outNode(StringInfo str, const void *obj)
                         case T_SeqScan:
                                 _outSeqScan(str, obj);
                                 break;
+                       case T_SampleScan:
+                               _outSampleScan(str, obj);
+                               break;
                         case T_IndexScan:
                                 _outIndexScan(str, obj);
                                 break;
@@ -3038,9 +3035,6 @@ _outNode(StringInfo str, const void *obj)
                         case T_CustomScan:
                                 _outCustomScan(str, obj);
                                 break;
-                       case T_SampleScan:
-                               _outSampleScan(str, obj);
-                               break;
                         case T_Join:
                                 _outJoin(str, obj);
                                 break;
@@ -3393,12 +3387,6 @@ _outNode(StringInfo str, const void *obj)
                         case T_CommonTableExpr:
                                 _outCommonTableExpr(str, obj);
                                 break;
-                       case T_RangeTableSample:
-                               _outRangeTableSample(str, obj);
-                               break;
-                       case T_TableSampleClause:
-                               _outTableSampleClause(str, obj);
-                               break;
                         case T_SetOperationStmt:
                                 _outSetOperationStmt(str, obj);
                                 break;
@@ -3408,6 +3396,9 @@ _outNode(StringInfo str, const void *obj)
                         case T_RangeTblFunction:
                                 _outRangeTblFunction(str, obj);
                                 break;
+                       case T_TableSampleClause:
+                               _outTableSampleClause(str, obj);
+                               break;
                         case T_A_Expr:
                                 _outAExpr(str, obj);
                                 break;
@@ -3450,6 +3441,9 @@ _outNode(StringInfo str, const void *obj)
                         case T_RangeFunction:
                                 _outRangeFunction(str, obj);
                                 break;
+                       case T_RangeTableSample:
+                               _outRangeTableSample(str, obj);
+                               break;
                         case T_Constraint:
                                 _outConstraint(str, obj);
                                 break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index f5a40fbfb44b8d648a9aa32c1089055c4d3c70a6..71be840eac9f76a44dfbf258fcec629cdd2268d7 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -367,46 +367,6 @@ _readCommonTableExpr(void)
         READ_DONE();
  }
  
-/*
- * _readRangeTableSample
- */
-static RangeTableSample *
-_readRangeTableSample(void)
-{
-       READ_LOCALS(RangeTableSample);
-
-       READ_NODE_FIELD(relation);
-       READ_STRING_FIELD(method);
-       READ_NODE_FIELD(repeatable);
-       READ_NODE_FIELD(args);
-
-       READ_DONE();
-}
-
-/*
- * _readTableSampleClause
- */
-static TableSampleClause *
-_readTableSampleClause(void)
-{
-       READ_LOCALS(TableSampleClause);
-
-       READ_OID_FIELD(tsmid);
-       READ_BOOL_FIELD(tsmseqscan);
-       READ_BOOL_FIELD(tsmpagemode);
-       READ_OID_FIELD(tsminit);
-       READ_OID_FIELD(tsmnextblock);
-       READ_OID_FIELD(tsmnexttuple);
-       READ_OID_FIELD(tsmexaminetuple);
-       READ_OID_FIELD(tsmend);
-       READ_OID_FIELD(tsmreset);
-       READ_OID_FIELD(tsmcost);
-       READ_NODE_FIELD(repeatable);
-       READ_NODE_FIELD(args);
-
-       READ_DONE();
-}
-
  /*
   * _readSetOperationStmt
   */
@@ -1391,6 +1351,21 @@ _readRangeTblFunction(void)
         READ_DONE();
  }
  
+/*
+ * _readTableSampleClause
+ */
+static TableSampleClause *
+_readTableSampleClause(void)
+{
+       READ_LOCALS(TableSampleClause);
+
+       READ_OID_FIELD(tsmhandler);
+       READ_NODE_FIELD(args);
+       READ_NODE_FIELD(repeatable);
+
+       READ_DONE();
+}
+
  
  /*
   * parseNodeString
@@ -1426,10 +1401,6 @@ parseNodeString(void)
                 return_value = _readRowMarkClause();
         else if (MATCH("COMMONTABLEEXPR", 15))
                 return_value = _readCommonTableExpr();
-       else if (MATCH("RANGETABLESAMPLE", 16))
-               return_value = _readRangeTableSample();
-       else if (MATCH("TABLESAMPLECLAUSE", 17))
-               return_value = _readTableSampleClause();
         else if (MATCH("SETOPERATIONSTMT", 16))
                 return_value = _readSetOperationStmt();
         else if (MATCH("ALIAS", 5))
@@ -1528,6 +1499,8 @@ parseNodeString(void)
                 return_value = _readRangeTblEntry();
         else if (MATCH("RANGETBLFUNCTION", 16))
                 return_value = _readRangeTblFunction();
+       else if (MATCH("TABLESAMPLECLAUSE", 17))
+               return_value = _readTableSampleClause();
         else if (MATCH("NOTIFY", 6))
                 return_value = _readNotifyStmt();
         else if (MATCH("DECLARECURSOR", 13))
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index 888eeac5151842a285fb16c94da8f338ce89567b..1590be116750846b8957fe8a9ae1ed03b89d6917 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -18,6 +18,7 @@
  #include <math.h>
  
  #include "access/sysattr.h"
+#include "access/tsmapi.h"
  #include "catalog/pg_class.h"
  #include "catalog/pg_operator.h"
  #include "foreign/fdwapi.h"
@@ -390,7 +391,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                 }
                                 else if (rte->tablesample != NULL)
                                 {
-                                       /* Build sample scan on relation */
+                                       /* Sampled relation */
                                         set_tablesample_rel_pathlist(root, rel, rte);
                                 }
                                 else
@@ -480,11 +481,40 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  
  /*
   * set_tablesample_rel_size
- *       Set size estimates for a sampled relation.
+ *       Set size estimates for a sampled relation
   */
  static void
  set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  {
+       TableSampleClause *tsc = rte->tablesample;
+       TsmRoutine *tsm;
+       BlockNumber pages;
+       double          tuples;
+
+       /*
+        * Test any partial indexes of rel for applicability.  We must do this
+        * first since partial unique indexes can affect size estimates.
+        */
+       check_partial_indexes(root, rel);
+
+       /*
+        * Call the sampling method's estimation function to estimate the number
+        * of pages it will read and the number of tuples it will return.  (Note:
+        * we assume the function returns sane values.)
+        */
+       tsm = GetTsmRoutine(tsc->tsmhandler);
+       tsm->SampleScanGetSampleSize(root, rel, tsc->args,
+                                                                &pages, &tuples);
+
+       /*
+        * For the moment, because we will only consider a SampleScan path for the
+        * rel, it's okay to just overwrite the pages and tuples estimates for the
+        * whole relation.  If we ever consider multiple path types for sampled
+        * rels, we'll need more complication.
+        */
+       rel->pages = pages;
+       rel->tuples = tuples;
+
         /* Mark rel with estimated output rows, width, etc */
         set_baserel_size_estimates(root, rel);
  }
@@ -492,8 +522,6 @@ set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  /*
   * set_tablesample_rel_pathlist
   *       Build access paths for a sampled relation
- *
- * There is only one possible path - sampling scan
   */
  static void
  set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
@@ -502,15 +530,41 @@ set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *
         Path       *path;
  
         /*
-        * We don't support pushing join clauses into the quals of a seqscan, but
-        * it could still have required parameterization due to LATERAL refs in
-        * its tlist.
+        * We don't support pushing join clauses into the quals of a samplescan,
+        * but it could still have required parameterization due to LATERAL refs
+        * in its tlist or TABLESAMPLE arguments.
          */
         required_outer = rel->lateral_relids;
  
-       /* We only do sample scan if it was requested */
+       /* Consider sampled scan */
         path = create_samplescan_path(root, rel, required_outer);
-       rel->pathlist = list_make1(path);
+
+       /*
+        * If the sampling method does not support repeatable scans, we must avoid
+        * plans that would scan the rel multiple times.  Ideally, we'd simply
+        * avoid putting the rel on the inside of a nestloop join; but adding such
+        * a consideration to the planner seems like a great deal of complication
+        * to support an uncommon usage of second-rate sampling methods.  Instead,
+        * if there is a risk that the query might perform an unsafe join, just
+        * wrap the SampleScan in a Materialize node.  We can check for joins by
+        * counting the membership of all_baserels (note that this correctly
+        * counts inheritance trees as single rels).  If we're inside a subquery,
+        * we can't easily check whether a join might occur in the outer query, so
+        * just assume one is possible.
+        *
+        * GetTsmRoutine is relatively expensive compared to the other tests here,
+        * so check repeatable_across_scans last, even though that's a bit odd.
+        */
+       if ((root->query_level > 1 ||
+                bms_membership(root->all_baserels) != BMS_SINGLETON) &&
+        !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
+       {
+               path = (Path *) create_material_path(rel, path);
+       }
+
+       add_path(rel, path);
+
+       /* For the moment, at least, there are no other paths to consider */
  }
  
  /*
@@ -2450,7 +2504,33 @@ print_path(PlannerInfo *root, Path *path, int indent)
         switch (nodeTag(path))
         {
                 case T_Path:
-                       ptype = "SeqScan";
+                       switch (path->pathtype)
+                       {
+                               case T_SeqScan:
+                                       ptype = "SeqScan";
+                                       break;
+                               case T_SampleScan:
+                                       ptype = "SampleScan";
+                                       break;
+                               case T_SubqueryScan:
+                                       ptype = "SubqueryScan";
+                                       break;
+                               case T_FunctionScan:
+                                       ptype = "FunctionScan";
+                                       break;
+                               case T_ValuesScan:
+                                       ptype = "ValuesScan";
+                                       break;
+                               case T_CteScan:
+                                       ptype = "CteScan";
+                                       break;
+                               case T_WorkTableScan:
+                                       ptype = "WorkTableScan";
+                                       break;
+                               default:
+                                       ptype = "???Path";
+                                       break;
+                       }
                         break;
                 case T_IndexPath:
                         ptype = "IdxScan";
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 0d302f66bee4c478dc4cc99729c79a75af727982..7069f6041102e6cb995316a3d951fe61adc0d367 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -74,6 +74,7 @@
  #include <math.h>
  
  #include "access/htup_details.h"
+#include "access/tsmapi.h"
  #include "executor/executor.h"
  #include "executor/nodeHash.h"
  #include "miscadmin.h"
@@ -223,64 +224,66 @@ cost_seqscan(Path *path, PlannerInfo *root,
   * cost_samplescan
   *       Determines and returns the cost of scanning a relation using sampling.
   *
- * From planner/optimizer perspective, we don't care all that much about cost
- * itself since there is always only one scan path to consider when sampling
- * scan is present, but number of rows estimation is still important.
- *
   * 'baserel' is the relation to be scanned
   * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
   */
  void
-cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+cost_samplescan(Path *path, PlannerInfo *root,
+                               RelOptInfo *baserel, ParamPathInfo *param_info)
  {
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
+       RangeTblEntry *rte;
+       TableSampleClause *tsc;
+       TsmRoutine *tsm;
         double          spc_seq_page_cost,
                                 spc_random_page_cost,
                                 spc_page_cost;
         QualCost        qpqual_cost;
         Cost            cpu_per_tuple;
-       BlockNumber pages;
-       double          tuples;
-       RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
-       TableSampleClause *tablesample = rte->tablesample;
  
-       /* Should only be applied to base relations */
+       /* Should only be applied to base relations with tablesample clauses */
         Assert(baserel->relid > 0);
-       Assert(baserel->rtekind == RTE_RELATION);
+       rte = planner_rt_fetch(baserel->relid, root);
+       Assert(rte->rtekind == RTE_RELATION);
+       tsc = rte->tablesample;
+       Assert(tsc != NULL);
+       tsm = GetTsmRoutine(tsc->tsmhandler);
  
         /* Mark the path with the correct row estimate */
-       if (path->param_info)
-               path->rows = path->param_info->ppi_rows;
+       if (param_info)
+               path->rows = param_info->ppi_rows;
         else
                 path->rows = baserel->rows;
  
-       /* Call the sampling method's costing function. */
-       OidFunctionCall6(tablesample->tsmcost, PointerGetDatum(root),
-                                        PointerGetDatum(path), PointerGetDatum(baserel),
-                                        PointerGetDatum(tablesample->args),
-                                        PointerGetDatum(&pages), PointerGetDatum(&tuples));
-
         /* fetch estimated page cost for tablespace containing table */
         get_tablespace_page_costs(baserel->reltablespace,
                                                           &spc_random_page_cost,
                                                           &spc_seq_page_cost);
  
-
-       spc_page_cost = tablesample->tsmseqscan ? spc_seq_page_cost :
-               spc_random_page_cost;
+       /* if NextSampleBlock is used, assume random access, else sequential */
+       spc_page_cost = (tsm->NextSampleBlock != NULL) ?
+               spc_random_page_cost : spc_seq_page_cost;
  
         /*
-        * disk costs
+        * disk costs (recall that baserel->pages has already been set to the
+        * number of pages the sampling method will visit)
          */
-       run_cost += spc_page_cost * pages;
+       run_cost += spc_page_cost * baserel->pages;
  
-       /* CPU costs */
-       get_restriction_qual_cost(root, baserel, path->param_info, &qpqual_cost);
+       /*
+        * CPU costs (recall that baserel->tuples has already been set to the
+        * number of tuples the sampling method will select).  Note that we ignore
+        * execution cost of the TABLESAMPLE parameter expressions; they will be
+        * evaluated only once per scan, and in most usages they'll likely be
+        * simple constants anyway.  We also don't charge anything for the
+        * calculations the sampling method might do internally.
+        */
+       get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
  
         startup_cost += qpqual_cost.startup;
         cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
-       run_cost += cpu_per_tuple * tuples;
+       run_cost += cpu_per_tuple * baserel->tuples;
  
         path->startup_cost = startup_cost;
         path->total_cost = startup_cost + run_cost;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 8d15c8ede90f9be93dec263ce61a0eb20dea5e54..f461586e08c5b3a2711eb55c003c26d2907388c7 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -102,7 +102,8 @@ static List *order_qual_clauses(PlannerInfo *root, List *clauses);
  static void copy_path_costsize(Plan *dest, Path *src);
  static void copy_plan_costsize(Plan *dest, Plan *src);
  static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
-static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid);
+static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid,
+                               TableSampleClause *tsc);
  static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
                            Oid indexid, List *indexqual, List *indexqualorig,
                            List *indexorderby, List *indexorderbyorig,
@@ -1148,7 +1149,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path,
  
  /*
   * create_samplescan_plan
- *      Returns a samplecan plan for the base relation scanned by 'best_path'
+ *      Returns a samplescan plan for the base relation scanned by 'best_path'
   *      with restriction clauses 'scan_clauses' and targetlist 'tlist'.
   */
  static SampleScan *
@@ -1157,11 +1158,15 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path,
  {
         SampleScan *scan_plan;
         Index           scan_relid = best_path->parent->relid;
+       RangeTblEntry *rte;
+       TableSampleClause *tsc;
  
-       /* it should be a base rel with tablesample clause... */
+       /* it should be a base rel with a tablesample clause... */
         Assert(scan_relid > 0);
-       Assert(best_path->parent->rtekind == RTE_RELATION);
-       Assert(best_path->pathtype == T_SampleScan);
+       rte = planner_rt_fetch(scan_relid, root);
+       Assert(rte->rtekind == RTE_RELATION);
+       tsc = rte->tablesample;
+       Assert(tsc != NULL);
  
         /* Sort clauses into best execution order */
         scan_clauses = order_qual_clauses(root, scan_clauses);
@@ -1174,13 +1179,16 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path,
         {
                 scan_clauses = (List *)
                         replace_nestloop_params(root, (Node *) scan_clauses);
+               tsc = (TableSampleClause *)
+                       replace_nestloop_params(root, (Node *) tsc);
         }
  
         scan_plan = make_samplescan(tlist,
                                                                 scan_clauses,
-                                                               scan_relid);
+                                                               scan_relid,
+                                                               tsc);
  
-       copy_path_costsize(&scan_plan->plan, best_path);
+       copy_path_costsize(&scan_plan->scan.plan, best_path);
  
         return scan_plan;
  }
@@ -2161,9 +2169,9 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path,
         ListCell   *lc;
  
         /* Recursively transform child paths. */
-       foreach (lc, best_path->custom_paths)
+       foreach(lc, best_path->custom_paths)
         {
-               Plan   *plan = create_plan_recurse(root, (Path *) lfirst(lc));
+               Plan       *plan = create_plan_recurse(root, (Path *) lfirst(lc));
  
                 custom_plans = lappend(custom_plans, plan);
         }
@@ -3437,17 +3445,19 @@ make_seqscan(List *qptlist,
  static SampleScan *
  make_samplescan(List *qptlist,
                                 List *qpqual,
-                               Index scanrelid)
+                               Index scanrelid,
+                               TableSampleClause *tsc)
  {
         SampleScan *node = makeNode(SampleScan);
-       Plan       *plan = &node->plan;
+       Plan       *plan = &node->scan.plan;
  
         /* cost should be inserted by caller */
         plan->targetlist = qptlist;
         plan->qual = qpqual;
         plan->lefttree = NULL;
         plan->righttree = NULL;
-       node->scanrelid = scanrelid;
+       node->scan.scanrelid = scanrelid;
+       node->tablesample = tsc;
  
         return node;
  }
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index 00b2625d342ee375e884bf30945e4b29230a2118..701b99254db0d1745f3c2965bb8e445f58d4a45b 100644 (file)
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -306,7 +306,9 @@ extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex)
                 return;
  
         /* Fetch the appropriate variables */
-       if (rte->rtekind == RTE_SUBQUERY)
+       if (rte->rtekind == RTE_RELATION)
+               vars = pull_vars_of_level((Node *) rte->tablesample, 0);
+       else if (rte->rtekind == RTE_SUBQUERY)
                 vars = pull_vars_of_level((Node *) rte->subquery, 1);
         else if (rte->rtekind == RTE_FUNCTION)
                 vars = pull_vars_of_level((Node *) rte->functions, 0);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index a6ce96efc48623c187233a1b04e45a64f8eeeae0..b95cc95e5d9a201949d89d713e0cfa77be6a1a22 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -505,14 +505,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
                 if (rte->rtekind == RTE_RELATION)
                 {
                         if (rte->tablesample)
-                       {
-                               rte->tablesample->args = (List *)
-                                       preprocess_expression(root, (Node *) rte->tablesample->args,
-                                                                                 EXPRKIND_TABLESAMPLE);
-                               rte->tablesample->repeatable = (Node *)
-                                       preprocess_expression(root, rte->tablesample->repeatable,
+                               rte->tablesample = (TableSampleClause *)
+                                       preprocess_expression(root,
+                                                                                 (Node *) rte->tablesample,
                                                                                   EXPRKIND_TABLESAMPLE);
-                       }
                 }
                 else if (rte->rtekind == RTE_SUBQUERY)
                 {
@@ -697,11 +693,14 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
          * If the query has any join RTEs, replace join alias variables with
          * base-relation variables.  We must do this before sublink processing,
          * else sublinks expanded out from join aliases would not get processed.
-        * We can skip it in non-lateral RTE functions and VALUES lists, however,
-        * since they can't contain any Vars of the current query level.
+        * We can skip it in non-lateral RTE functions, VALUES lists, and
+        * TABLESAMPLE clauses, however, since they can't contain any Vars of the
+        * current query level.
          */
         if (root->hasJoinRTEs &&
-               !(kind == EXPRKIND_RTFUNC || kind == EXPRKIND_VALUES))
+               !(kind == EXPRKIND_RTFUNC ||
+                 kind == EXPRKIND_VALUES ||
+                 kind == EXPRKIND_TABLESAMPLE))
                 expr = flatten_join_alias_vars(root, expr);
  
         /*
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c

index 258e541754aa165612ff41eb141fc2bc1db9198b..ea185d4b4cff6b98cb1da5a709b376595eb6d652 100644 (file)
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -372,9 +372,8 @@ flatten_rtes_walker(Node *node, PlannerGlobal *glob)
   *
   * In the flat rangetable, we zero out substructure pointers that are not
   * needed by the executor; this reduces the storage space and copying cost
- * for cached plans.  We keep only the tablesample field (which we'd otherwise
- * have to put in the plan tree, anyway); the ctename, alias and eref Alias
- * fields, which are needed by EXPLAIN; and the selectedCols, insertedCols and
+ * for cached plans.  We keep only the ctename, alias and eref Alias fields,
+ * which are needed by EXPLAIN, and the selectedCols, insertedCols and
   * updatedCols bitmaps, which are needed for executor-startup permissions
   * checking and for trigger event checking.
   */
@@ -388,6 +387,7 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, RangeTblEntry *rte)
         memcpy(newrte, rte, sizeof(RangeTblEntry));
  
         /* zap unneeded sub-structure */
+       newrte->tablesample = NULL;
         newrte->subquery = NULL;
         newrte->joinaliasvars = NIL;
         newrte->functions = NIL;
@@ -456,11 +456,13 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
                         {
                                 SampleScan *splan = (SampleScan *) plan;
  
-                               splan->scanrelid += rtoffset;
-                               splan->plan.targetlist =
-                                       fix_scan_list(root, splan->plan.targetlist, rtoffset);
-                               splan->plan.qual =
-                                       fix_scan_list(root, splan->plan.qual, rtoffset);
+                               splan->scan.scanrelid += rtoffset;
+                               splan->scan.plan.targetlist =
+                                       fix_scan_list(root, splan->scan.plan.targetlist, rtoffset);
+                               splan->scan.plan.qual =
+                                       fix_scan_list(root, splan->scan.plan.qual, rtoffset);
+                               splan->tablesample = (TableSampleClause *)
+                                       fix_scan_expr(root, (Node *) splan->tablesample, rtoffset);
                         }
                         break;
                 case T_IndexScan:
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c

index 4708b87f330b6145505afeac4a0be5eb00d441cb..f3038cdffda3ad9467935327df6c1cf7913798f1 100644 (file)
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2216,7 +2216,12 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
                         break;
  
                 case T_SeqScan:
+                       context.paramids = bms_add_members(context.paramids, scan_params);
+                       break;
+
                 case T_SampleScan:
+                       finalize_primnode((Node *) ((SampleScan *) plan)->tablesample,
+                                                         &context);
                         context.paramids = bms_add_members(context.paramids, scan_params);
                         break;
  
@@ -2384,7 +2389,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
                                         bms_add_members(context.paramids, scan_params);
  
                                 /* child nodes if any */
-                               foreach (lc, cscan->custom_plans)
+                               foreach(lc, cscan->custom_plans)
                                 {
                                         context.paramids =
                                                 bms_add_members(context.paramids,
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c

index 92b0562843458b403517d2c008c9db5cd26a1f79..34144ccaf0fa69161541bf5785d9d20d9a162cda 100644 (file)
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1091,12 +1091,15 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
  
                         switch (child_rte->rtekind)
                         {
+                               case RTE_RELATION:
+                                       if (child_rte->tablesample)
+                                               child_rte->lateral = true;
+                                       break;
                                 case RTE_SUBQUERY:
                                 case RTE_FUNCTION:
                                 case RTE_VALUES:
                                         child_rte->lateral = true;
                                         break;
-                               case RTE_RELATION:
                                 case RTE_JOIN:
                                 case RTE_CTE:
                                         /* these can't contain any lateral references */
@@ -1909,6 +1912,13 @@ replace_vars_in_jointree(Node *jtnode,
                         {
                                 switch (rte->rtekind)
                                 {
+                                       case RTE_RELATION:
+                                               /* shouldn't be marked LATERAL unless tablesample */
+                                               Assert(rte->tablesample);
+                                               rte->tablesample = (TableSampleClause *)
+                                                       pullup_replace_vars((Node *) rte->tablesample,
+                                                                                               context);
+                                               break;
                                         case RTE_SUBQUERY:
                                                 rte->subquery =
                                                         pullup_replace_vars_subquery(rte->subquery,
@@ -1924,7 +1934,6 @@ replace_vars_in_jointree(Node *jtnode,
                                                         pullup_replace_vars((Node *) rte->values_lists,
                                                                                                 context);
                                                 break;
-                                       case RTE_RELATION:
                                         case RTE_JOIN:
                                         case RTE_CTE:
                                                 /* these shouldn't be marked LATERAL */
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index f7f33bbe7721b6a01865f1deaf2f25d0d6d96a96..935bc2b9667d33e7e8ddc9a7469b42ea0a2c0fdf 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -713,7 +713,7 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
  
  /*
   * create_samplescan_path
- *       Like seqscan but uses sampling function while scanning.
+ *       Creates a path node for a sampled table scan.
   */
  Path *
  create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
@@ -726,7 +726,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer
                                                                                                          required_outer);
         pathnode->pathkeys = NIL;       /* samplescan has unordered result */
  
-       cost_samplescan(pathnode, root, rel);
+       cost_samplescan(pathnode, root, rel, pathnode->param_info);
  
         return pathnode;
  }
@@ -1773,6 +1773,8 @@ reparameterize_path(PlannerInfo *root, Path *path,
         {
                 case T_SeqScan:
                         return create_seqscan_path(root, rel, required_outer);
+               case T_SampleScan:
+                       return (Path *) create_samplescan_path(root, rel, required_outer);
                 case T_IndexScan:
                 case T_IndexOnlyScan:
                         {
@@ -1805,8 +1807,6 @@ reparameterize_path(PlannerInfo *root, Path *path,
                 case T_SubqueryScan:
                         return create_subqueryscan_path(root, rel, path->pathkeys,
                                                                                         required_outer);
-               case T_SampleScan:
-                       return (Path *) create_samplescan_path(root, rel, required_outer);
                 default:
                         break;
         }
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index 2b02a2e523380cf2a12d2171c63b4ed887cb7285..8f053e47e82df8aebb228138691dc131f61805f8 100644 (file)
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -457,8 +457,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
  %type <jexpr>  joined_table
  %type <range>  relation_expr
  %type <range>  relation_expr_opt_alias
+%type <node>   tablesample_clause opt_repeatable_clause
  %type <target> target_el single_set_clause set_target insert_column_item
-%type <node>   relation_expr_tablesample tablesample_clause opt_repeatable_clause
  
  %type <str>            generic_option_name
  %type <node>   generic_option_arg
@@ -10491,9 +10491,13 @@ table_ref:     relation_expr opt_alias_clause
                                         $1->alias = $2;
                                         $$ = (Node *) $1;
                                 }
-                       | relation_expr_tablesample
+                       | relation_expr opt_alias_clause tablesample_clause
                                 {
-                                       $$ = (Node *) $1;
+                                       RangeTableSample *n = (RangeTableSample *) $3;
+                                       $1->alias = $2;
+                                       /* relation_expr goes inside the RangeTableSample node */
+                                       n->relation = (Node *) $1;
+                                       $$ = (Node *) n;
                                 }
                         | func_table func_alias_clause
                                 {
@@ -10820,23 +10824,18 @@ relation_expr_opt_alias: relation_expr                                        %prec UMINUS
                                 }
                 ;
  
-
-relation_expr_tablesample: relation_expr opt_alias_clause tablesample_clause
-                               {
-                                       RangeTableSample *n = (RangeTableSample *) $3;
-                                       n->relation = $1;
-                                       n->relation->alias = $2;
-                                       $$ = (Node *) n;
-                               }
-               ;
-
+/*
+ * TABLESAMPLE decoration in a FROM item
+ */
  tablesample_clause:
-                       TABLESAMPLE ColId '(' expr_list ')' opt_repeatable_clause
+                       TABLESAMPLE func_name '(' expr_list ')' opt_repeatable_clause
                                 {
                                         RangeTableSample *n = makeNode(RangeTableSample);
+                                       /* n->relation will be filled in later */
                                         n->method = $2;
                                         n->args = $4;
                                         n->repeatable = $6;
+                                       n->location = @2;
                                         $$ = (Node *) n;
                                 }
                 ;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c

index e90e1d68e3a535ad9c549e5fd242e0c6d69232c4..4e490b23b4e272fadc4272fbc06c7453465f2a9d 100644 (file)
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -18,8 +18,8 @@
  #include "miscadmin.h"
  
  #include "access/heapam.h"
+#include "access/tsmapi.h"
  #include "catalog/catalog.h"
-#include "access/htup_details.h"
  #include "catalog/heap.h"
  #include "catalog/pg_constraint.h"
  #include "catalog/pg_type.h"
@@ -43,7 +43,7 @@
  #include "utils/guc.h"
  #include "utils/lsyscache.h"
  #include "utils/rel.h"
-#include "utils/syscache.h"
+
  
  /* Convenience macro for the most common makeNamespaceItem() case */
  #define makeDefaultNSItem(rte) makeNamespaceItem(rte, true, true, false, true)
@@ -63,6 +63,8 @@ static RangeTblEntry *transformRangeSubselect(ParseState *pstate,
                                                 RangeSubselect *r);
  static RangeTblEntry *transformRangeFunction(ParseState *pstate,
                                            RangeFunction *r);
+static TableSampleClause *transformRangeTableSample(ParseState *pstate,
+                                                 RangeTableSample *rts);
  static Node *transformFromClauseItem(ParseState *pstate, Node *n,
                                                 RangeTblEntry **top_rte, int *top_rti,
                                                 List **namespace);
@@ -423,40 +425,6 @@ transformJoinOnClause(ParseState *pstate, JoinExpr *j, List *namespace)
         return result;
  }
  
-static RangeTblEntry *
-transformTableSampleEntry(ParseState *pstate, RangeTableSample *rv)
-{
-       RangeTblEntry *rte = NULL;
-       CommonTableExpr *cte = NULL;
-       TableSampleClause *tablesample = NULL;
-
-       /* if relation has an unqualified name, it might be a CTE reference */
-       if (!rv->relation->schemaname)
-       {
-               Index           levelsup;
-
-               cte = scanNameSpaceForCTE(pstate, rv->relation->relname, &levelsup);
-       }
-
-       /* We first need to build a range table entry */
-       if (!cte)
-               rte = transformTableEntry(pstate, rv->relation);
-
-       if (!rte ||
-               (rte->relkind != RELKIND_RELATION &&
-                rte->relkind != RELKIND_MATVIEW))
-               ereport(ERROR,
-                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                errmsg("TABLESAMPLE clause can only be used on tables and materialized views"),
-                                parser_errposition(pstate, rv->relation->location)));
-
-       tablesample = ParseTableSample(pstate, rv->method, rv->repeatable,
-                                                                  rv->args, rv->relation->location);
-       rte->tablesample = tablesample;
-
-       return rte;
-}
-
  /*
   * transformTableEntry --- transform a RangeVar (simple relation reference)
   */
@@ -748,6 +716,109 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r)
         return rte;
  }
  
+/*
+ * transformRangeTableSample --- transform a TABLESAMPLE clause
+ *
+ * Caller has already transformed rts->relation, we just have to validate
+ * the remaining fields and create a TableSampleClause node.
+ */
+static TableSampleClause *
+transformRangeTableSample(ParseState *pstate, RangeTableSample *rts)
+{
+       TableSampleClause *tablesample;
+       Oid                     handlerOid;
+       Oid                     funcargtypes[1];
+       TsmRoutine *tsm;
+       List       *fargs;
+       ListCell   *larg,
+                          *ltyp;
+
+       /*
+        * To validate the sample method name, look up the handler function, which
+        * has the same name, one dummy INTERNAL argument, and a result type of
+        * tsm_handler.  (Note: tablesample method names are not schema-qualified
+        * in the SQL standard; but since they are just functions to us, we allow
+        * schema qualification to resolve any potential ambiguity.)
+        */
+       funcargtypes[0] = INTERNALOID;
+
+       handlerOid = LookupFuncName(rts->method, 1, funcargtypes, true);
+
+       /* we want error to complain about no-such-method, not no-such-function */
+       if (!OidIsValid(handlerOid))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("tablesample method %s does not exist",
+                                               NameListToString(rts->method)),
+                                parser_errposition(pstate, rts->location)));
+
+       /* check that handler has correct return type */
+       if (get_func_rettype(handlerOid) != TSM_HANDLEROID)
+               ereport(ERROR,
+                               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                errmsg("function %s must return type \"tsm_handler\"",
+                                               NameListToString(rts->method)),
+                                parser_errposition(pstate, rts->location)));
+
+       /* OK, run the handler to get TsmRoutine, for argument type info */
+       tsm = GetTsmRoutine(handlerOid);
+
+       tablesample = makeNode(TableSampleClause);
+       tablesample->tsmhandler = handlerOid;
+
+       /* check user provided the expected number of arguments */
+       if (list_length(rts->args) != list_length(tsm->parameterTypes))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
+                 errmsg_plural("tablesample method %s requires %d argument, not %d",
+                                               "tablesample method %s requires %d arguments, not %d",
+                                               list_length(tsm->parameterTypes),
+                                               NameListToString(rts->method),
+                                               list_length(tsm->parameterTypes),
+                                               list_length(rts->args)),
+                                parser_errposition(pstate, rts->location)));
+
+       /*
+        * Transform the arguments, typecasting them as needed.  Note we must also
+        * assign collations now, because assign_query_collations() doesn't
+        * examine any substructure of RTEs.
+        */
+       fargs = NIL;
+       forboth(larg, rts->args, ltyp, tsm->parameterTypes)
+       {
+               Node       *arg = (Node *) lfirst(larg);
+               Oid                     argtype = lfirst_oid(ltyp);
+
+               arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
+               arg = coerce_to_specific_type(pstate, arg, argtype, "TABLESAMPLE");
+               assign_expr_collations(pstate, arg);
+               fargs = lappend(fargs, arg);
+       }
+       tablesample->args = fargs;
+
+       /* Process REPEATABLE (seed) */
+       if (rts->repeatable != NULL)
+       {
+               Node       *arg;
+
+               if (!tsm->repeatable_across_queries)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                 errmsg("tablesample method %s does not support REPEATABLE",
+                                                NameListToString(rts->method)),
+                                        parser_errposition(pstate, rts->location)));
+
+               arg = transformExpr(pstate, rts->repeatable, EXPR_KIND_FROM_FUNCTION);
+               arg = coerce_to_specific_type(pstate, arg, FLOAT8OID, "REPEATABLE");
+               assign_expr_collations(pstate, arg);
+               tablesample->repeatable = (Expr *) arg;
+       }
+       else
+               tablesample->repeatable = NULL;
+
+       return tablesample;
+}
+
  
  /*
   * transformFromClauseItem -
@@ -844,6 +915,33 @@ transformFromClauseItem(ParseState *pstate, Node *n,
                 rtr->rtindex = rtindex;
                 return (Node *) rtr;
         }
+       else if (IsA(n, RangeTableSample))
+       {
+               /* TABLESAMPLE clause (wrapping some other valid FROM node) */
+               RangeTableSample *rts = (RangeTableSample *) n;
+               Node       *rel;
+               RangeTblRef *rtr;
+               RangeTblEntry *rte;
+
+               /* Recursively transform the contained relation */
+               rel = transformFromClauseItem(pstate, rts->relation,
+                                                                         top_rte, top_rti, namespace);
+               /* Currently, grammar could only return a RangeVar as contained rel */
+               Assert(IsA(rel, RangeTblRef));
+               rtr = (RangeTblRef *) rel;
+               rte = rt_fetch(rtr->rtindex, pstate->p_rtable);
+               /* We only support this on plain relations and matviews */
+               if (rte->relkind != RELKIND_RELATION &&
+                       rte->relkind != RELKIND_MATVIEW)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("TABLESAMPLE clause can only be applied to tables and materialized views"),
+                                  parser_errposition(pstate, exprLocation(rts->relation))));
+
+               /* Transform TABLESAMPLE details and attach to the RTE */
+               rte->tablesample = transformRangeTableSample(pstate, rts);
+               return (Node *) rtr;
+       }
         else if (IsA(n, JoinExpr))
         {
                 /* A newfangled join expression */
@@ -1165,26 +1263,6 @@ transformFromClauseItem(ParseState *pstate, Node *n,
  
                 return (Node *) j;
         }
-       else if (IsA(n, RangeTableSample))
-       {
-               /* Tablesample reference */
-               RangeTableSample *rv = (RangeTableSample *) n;
-               RangeTblRef *rtr;
-               RangeTblEntry *rte = NULL;
-               int                     rtindex;
-
-               rte = transformTableSampleEntry(pstate, rv);
-
-               /* assume new rte is at end */
-               rtindex = list_length(pstate->p_rtable);
-               Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
-               *top_rte = rte;
-               *top_rti = rtindex;
-               *namespace = list_make1(makeDefaultNSItem(rte));
-               rtr = makeNode(RangeTblRef);
-               rtr->rtindex = rtindex;
-               return (Node *) rtr;
-       }
         else
                 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
         return NULL;                            /* can't get here, keep compiler quiet */
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c

index 430baff11652721778e3f37ba49b1707fea62247..554ca9d8c47e5f38eddb5579dd70160a2d5d363b 100644 (file)
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -18,7 +18,6 @@
  #include "catalog/pg_aggregate.h"
  #include "catalog/pg_proc.h"
  #include "catalog/pg_type.h"
-#include "catalog/pg_tablesample_method.h"
  #include "funcapi.h"
  #include "lib/stringinfo.h"
  #include "nodes/makefuncs.h"
@@ -27,7 +26,6 @@
  #include "parser/parse_clause.h"
  #include "parser/parse_coerce.h"
  #include "parser/parse_func.h"
-#include "parser/parse_expr.h"
  #include "parser/parse_relation.h"
  #include "parser/parse_target.h"
  #include "parser/parse_type.h"
@@ -769,148 +767,6 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
  }
  
  
-/*
- * ParseTableSample
- *
- * Parse TABLESAMPLE clause and process the arguments
- */
-TableSampleClause *
-ParseTableSample(ParseState *pstate, char *samplemethod, Node *repeatable,
-                                List *sampleargs, int location)
-{
-       HeapTuple       tuple;
-       Form_pg_tablesample_method tsm;
-       Form_pg_proc procform;
-       TableSampleClause *tablesample;
-       List       *fargs;
-       ListCell   *larg;
-       int                     nargs,
-                               initnargs;
-       Oid                     init_arg_types[FUNC_MAX_ARGS];
-
-       /* Load the tablesample method */
-       tuple = SearchSysCache1(TABLESAMPLEMETHODNAME, PointerGetDatum(samplemethod));
-       if (!HeapTupleIsValid(tuple))
-               ereport(ERROR,
-                               (errcode(ERRCODE_UNDEFINED_OBJECT),
-                                errmsg("tablesample method \"%s\" does not exist",
-                                               samplemethod),
-                                parser_errposition(pstate, location)));
-
-       tablesample = makeNode(TableSampleClause);
-       tablesample->tsmid = HeapTupleGetOid(tuple);
-
-       tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
-
-       tablesample->tsmseqscan = tsm->tsmseqscan;
-       tablesample->tsmpagemode = tsm->tsmpagemode;
-       tablesample->tsminit = tsm->tsminit;
-       tablesample->tsmnextblock = tsm->tsmnextblock;
-       tablesample->tsmnexttuple = tsm->tsmnexttuple;
-       tablesample->tsmexaminetuple = tsm->tsmexaminetuple;
-       tablesample->tsmend = tsm->tsmend;
-       tablesample->tsmreset = tsm->tsmreset;
-       tablesample->tsmcost = tsm->tsmcost;
-
-       ReleaseSysCache(tuple);
-
-       /* Validate the parameters against init function definition. */
-       tuple = SearchSysCache1(PROCOID,
-                                                       ObjectIdGetDatum(tablesample->tsminit));
-
-       if (!HeapTupleIsValid(tuple))           /* should not happen */
-               elog(ERROR, "cache lookup failed for function %u",
-                        tablesample->tsminit);
-
-       procform = (Form_pg_proc) GETSTRUCT(tuple);
-       initnargs = procform->pronargs;
-       Assert(initnargs >= 3);
-
-       /*
-        * First parameter is used to pass the SampleScanState, second is seed
-        * (REPEATABLE), skip the processing for them here, just assert that the
-        * types are correct.
-        */
-       Assert(procform->proargtypes.values[0] == INTERNALOID);
-       Assert(procform->proargtypes.values[1] == INT4OID);
-       initnargs -= 2;
-       memcpy(init_arg_types, procform->proargtypes.values + 2,
-                  initnargs * sizeof(Oid));
-
-       /* Now we are done with the catalog */
-       ReleaseSysCache(tuple);
-
-       /* Process repeatable (seed) */
-       if (repeatable != NULL)
-       {
-               Node       *arg = repeatable;
-
-               if (arg && IsA(arg, A_Const))
-               {
-                       A_Const    *con = (A_Const *) arg;
-
-                       if (con->val.type == T_Null)
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                 errmsg("REPEATABLE clause must be NOT NULL numeric value"),
-                                                parser_errposition(pstate, con->location)));
-
-               }
-
-               arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
-               arg = coerce_to_specific_type(pstate, arg, INT4OID, "REPEATABLE");
-               tablesample->repeatable = arg;
-       }
-       else
-               tablesample->repeatable = NULL;
-
-       /* Check user provided expected number of arguments. */
-       if (list_length(sampleargs) != initnargs)
-               ereport(ERROR,
-                               (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
-               errmsg_plural("tablesample method \"%s\" expects %d argument got %d",
-                                         "tablesample method \"%s\" expects %d arguments got %d",
-                                         initnargs,
-                                         samplemethod,
-                                         initnargs, list_length(sampleargs)),
-                                parser_errposition(pstate, location)));
-
-       /* Transform the arguments, typecasting them as needed. */
-       fargs = NIL;
-       nargs = 0;
-       foreach(larg, sampleargs)
-       {
-               Node       *inarg = (Node *) lfirst(larg);
-               Node       *arg = transformExpr(pstate, inarg, EXPR_KIND_FROM_FUNCTION);
-               Oid                     argtype = exprType(arg);
-
-               if (argtype != init_arg_types[nargs])
-               {
-                       if (!can_coerce_type(1, &argtype, &init_arg_types[nargs],
-                                                                COERCION_IMPLICIT))
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                  errmsg("wrong parameter %d for tablesample method \"%s\"",
-                                                 nargs + 1, samplemethod),
-                                                errdetail("Expected type %s got %s.",
-                                                                  format_type_be(init_arg_types[nargs]),
-                                                                  format_type_be(argtype)),
-                                                parser_errposition(pstate, exprLocation(inarg))));
-
-                       arg = coerce_type(pstate, arg, argtype, init_arg_types[nargs], -1,
-                                                         COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1);
-               }
-
-               fargs = lappend(fargs, arg);
-               nargs++;
-       }
-
-       /* Pass the arguments down */
-       tablesample->args = fargs;
-
-       return tablesample;
-}
-
  /* func_match_argtypes()
   *
   * Given a list of candidate functions (having the right name and number
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c

index bbd6b77c5eab640ee1af8accaaaa6ae686aa1313..1734e48241ada102ac66cfd2788ffff0837dfcab 100644 (file)
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -418,6 +418,10 @@ rewriteRuleAction(Query *parsetree,
  
                         switch (rte->rtekind)
                         {
+                               case RTE_RELATION:
+                                       sub_action->hasSubLinks =
+                                               checkExprHasSubLink((Node *) rte->tablesample);
+                                       break;
                                 case RTE_FUNCTION:
                                         sub_action->hasSubLinks =
                                                 checkExprHasSubLink((Node *) rte->functions);
diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c

index 9ad460abfbdbcc8f1e75613b00841b7926af5592..5b809aa7d4996d55d467aa570e8dca9650031a31 100644 (file)
--- a/src/backend/utils/adt/pseudotypes.c
+++ b/src/backend/utils/adt/pseudotypes.c
@@ -373,6 +373,33 @@ fdw_handler_out(PG_FUNCTION_ARGS)
  }
  
  
+/*
+ * tsm_handler_in              - input routine for pseudo-type TSM_HANDLER.
+ */
+Datum
+tsm_handler_in(PG_FUNCTION_ARGS)
+{
+       ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("cannot accept a value of type tsm_handler")));
+
+       PG_RETURN_VOID();                       /* keep compiler quiet */
+}
+
+/*
+ * tsm_handler_out             - output routine for pseudo-type TSM_HANDLER.
+ */
+Datum
+tsm_handler_out(PG_FUNCTION_ARGS)
+{
+       ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("cannot display a value of type tsm_handler")));
+
+       PG_RETURN_VOID();                       /* keep compiler quiet */
+}
+
+
  /*
   * internal_in         - input routine for pseudo-type INTERNAL.
   */
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c

index 5112cac90173595d56c7cc14beba8ebfcc9113e7..51391f6a4e0d16e4e647f7845425e11795ca7508 100644 (file)
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -32,7 +32,6 @@
  #include "catalog/pg_opclass.h"
  #include "catalog/pg_operator.h"
  #include "catalog/pg_proc.h"
-#include "catalog/pg_tablesample_method.h"
  #include "catalog/pg_trigger.h"
  #include "catalog/pg_type.h"
  #include "commands/defrem.h"
@@ -349,8 +348,6 @@ static void make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
                          int prettyFlags);
  static void make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
                          int prettyFlags, int wrapColumn);
-static void get_tablesample_def(TableSampleClause *tablesample,
-                                       deparse_context *context);
  static void get_query_def(Query *query, StringInfo buf, List *parentnamespace,
                           TupleDesc resultDesc,
                           int prettyFlags, int wrapColumn, int startIndent);
@@ -416,6 +413,8 @@ static void get_column_alias_list(deparse_columns *colinfo,
  static void get_from_clause_coldeflist(RangeTblFunction *rtfunc,
                                                    deparse_columns *colinfo,
                                                    deparse_context *context);
+static void get_tablesample_def(TableSampleClause *tablesample,
+                                       deparse_context *context);
  static void get_opclass_name(Oid opclass, Oid actual_datatype,
                                  StringInfo buf);
  static Node *processIndirection(Node *node, deparse_context *context,
@@ -4235,50 +4234,6 @@ make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
         heap_close(ev_relation, AccessShareLock);
  }
  
-/* ----------
- * get_tablesample_def                 - Convert TableSampleClause back to SQL
- * ----------
- */
-static void
-get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
-{
-       StringInfo      buf = context->buf;
-       HeapTuple       tuple;
-       Form_pg_tablesample_method tsm;
-       char       *tsmname;
-       int                     nargs;
-       ListCell   *l;
-
-       /* Load the tablesample method */
-       tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tablesample->tsmid));
-       if (!HeapTupleIsValid(tuple))
-               ereport(ERROR,
-                               (errcode(ERRCODE_UNDEFINED_OBJECT),
-                                errmsg("cache lookup failed for tablesample method %u",
-                                               tablesample->tsmid)));
-
-       tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
-       tsmname = NameStr(tsm->tsmname);
-       appendStringInfo(buf, " TABLESAMPLE %s (", quote_identifier(tsmname));
-
-       ReleaseSysCache(tuple);
-
-       nargs = 0;
-       foreach(l, tablesample->args)
-       {
-               if (nargs++ > 0)
-                       appendStringInfoString(buf, ", ");
-               get_rule_expr((Node *) lfirst(l), context, true);
-       }
-       appendStringInfoChar(buf, ')');
-
-       if (tablesample->repeatable != NULL)
-       {
-               appendStringInfoString(buf, " REPEATABLE (");
-               get_rule_expr(tablesample->repeatable, context, true);
-               appendStringInfoChar(buf, ')');
-       }
-}
  
  /* ----------
   * get_query_def                       - Parse back one query parsetree
@@ -8781,9 +8736,6 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
                                                                  only_marker(rte),
                                                                  generate_relation_name(rte->relid,
                                                                                                                 context->namespaces));
-
-                               if (rte->tablesample)
-                                       get_tablesample_def(rte->tablesample, context);
                                 break;
                         case RTE_SUBQUERY:
                                 /* Subquery RTE */
@@ -8963,6 +8915,10 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
                         /* Else print column aliases as needed */
                         get_column_alias_list(colinfo, context);
                 }
+
+               /* Tablesample clause must go after any alias */
+               if (rte->rtekind == RTE_RELATION && rte->tablesample)
+                       get_tablesample_def(rte->tablesample, context);
         }
         else if (IsA(jtnode, JoinExpr))
         {
@@ -9162,6 +9118,44 @@ get_from_clause_coldeflist(RangeTblFunction *rtfunc,
         appendStringInfoChar(buf, ')');
  }
  
+/*
+ * get_tablesample_def                 - print a TableSampleClause
+ */
+static void
+get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
+{
+       StringInfo      buf = context->buf;
+       Oid                     argtypes[1];
+       int                     nargs;
+       ListCell   *l;
+
+       /*
+        * We should qualify the handler's function name if it wouldn't be
+        * resolved by lookup in the current search path.
+        */
+       argtypes[0] = INTERNALOID;
+       appendStringInfo(buf, " TABLESAMPLE %s (",
+                                        generate_function_name(tablesample->tsmhandler, 1,
+                                                                                       NIL, argtypes,
+                                                                                       false, NULL, EXPR_KIND_NONE));
+
+       nargs = 0;
+       foreach(l, tablesample->args)
+       {
+               if (nargs++ > 0)
+                       appendStringInfoString(buf, ", ");
+               get_rule_expr((Node *) lfirst(l), context, false);
+       }
+       appendStringInfoChar(buf, ')');
+
+       if (tablesample->repeatable != NULL)
+       {
+               appendStringInfoString(buf, " REPEATABLE (");
+               get_rule_expr((Node *) tablesample->repeatable, context, false);
+               appendStringInfoChar(buf, ')');
+       }
+}
+
  /*
   * get_opclass_name                    - fetch name of an index operator class
   *
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c

index 7b32247d34eae9eb8dadc9514d0b7e2c88828ff0..1dc293297d93edb933b743dc302fb7bcad757290 100644 (file)
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -32,7 +32,6 @@
  #include "catalog/pg_range.h"
  #include "catalog/pg_statistic.h"
  #include "catalog/pg_transform.h"
-#include "catalog/pg_tablesample_method.h"
  #include "catalog/pg_type.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
@@ -2997,29 +2996,3 @@ get_range_subtype(Oid rangeOid)
         else
                 return InvalidOid;
  }
-
-/*                             ---------- PG_TABLESAMPLE_METHOD CACHE ----------                        */
-
-/*
- * get_tablesample_method_name - given a tablesample method OID,
- * look up the name or NULL if not found
- */
-char *
-get_tablesample_method_name(Oid tsmid)
-{
-       HeapTuple       tuple;
-
-       tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tsmid));
-       if (HeapTupleIsValid(tuple))
-       {
-               Form_pg_tablesample_method tup =
-               (Form_pg_tablesample_method) GETSTRUCT(tuple);
-               char       *result;
-
-               result = pstrdup(NameStr(tup->tsmname));
-               ReleaseSysCache(tuple);
-               return result;
-       }
-       else
-               return NULL;
-}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c

index b6333e362f018b467aa34b065457268be6dc3bde..efce7b9a3d13b0ce73aff22292ca9f42ee31b60c 100644 (file)
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -56,7 +56,6 @@
  #include "catalog/pg_shseclabel.h"
  #include "catalog/pg_replication_origin.h"
  #include "catalog/pg_statistic.h"
-#include "catalog/pg_tablesample_method.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_transform.h"
  #include "catalog/pg_ts_config.h"
@@ -667,28 +666,6 @@ static const struct cachedesc cacheinfo[] = {
                 },
                 128
         },
-       {TableSampleMethodRelationId,           /* TABLESAMPLEMETHODNAME */
-               TableSampleMethodNameIndexId,
-               1,
-               {
-                       Anum_pg_tablesample_method_tsmname,
-                       0,
-                       0,
-                       0,
-               },
-               2
-       },
-       {TableSampleMethodRelationId,           /* TABLESAMPLEMETHODOID */
-               TableSampleMethodOidIndexId,
-               1,
-               {
-                       ObjectIdAttributeNumber,
-                       0,
-                       0,
-                       0,
-               },
-               2
-       },
         {TableSpaceRelationId,          /* TABLESPACEOID */
                 TablespaceOidIndexId,
                 1,
diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt

index 6cc3ed96c447bd5b7a743d015cd14ec06b9b3f0a..7b97d45a53a12ed849c13bef3630753109ba1f49 100644 (file)
--- a/src/backend/utils/errcodes.txt
+++ b/src/backend/utils/errcodes.txt
@@ -177,6 +177,8 @@ Section: Class 22 - Data Exception
  2201B    E    ERRCODE_INVALID_REGULAR_EXPRESSION                             invalid_regular_expression
  2201W    E    ERRCODE_INVALID_ROW_COUNT_IN_LIMIT_CLAUSE                      invalid_row_count_in_limit_clause
  2201X    E    ERRCODE_INVALID_ROW_COUNT_IN_RESULT_OFFSET_CLAUSE              invalid_row_count_in_result_offset_clause
+2202H    E    ERRCODE_INVALID_TABLESAMPLE_ARGUMENT                           invalid_tablesample_argument
+2202G    E    ERRCODE_INVALID_TABLESAMPLE_REPEAT                             invalid_tablesample_repeat
  22009    E    ERRCODE_INVALID_TIME_ZONE_DISPLACEMENT_VALUE                   invalid_time_zone_displacement_value
  2200C    E    ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER                        invalid_use_of_escape_character
  2200G    E    ERRCODE_MOST_SPECIFIC_TYPE_MISMATCH                            most_specific_type_mismatch
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c

index 6191f7973441b2ac7dbc473cfc2058e35d0da4d3..4142e01123f79fe880cac07889e9227d4b6678d5 100644 (file)
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -228,7 +228,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
  void
  sampler_random_init_state(long seed, SamplerRandomState randstate)
  {
-       randstate[0] = RAND48_SEED_0;
+       randstate[0] = 0x330e;          /* same as pg_erand48, but could be anything */
         randstate[1] = (unsigned short) seed;
         randstate[2] = (unsigned short) (seed >> 16);
  }
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c

index 9596af6a7b35ad57e3c73e19b1841f20b8c0384c..ece05155490b8755cff88231be9a3e01b6e3b773 100644 (file)
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -738,13 +738,15 @@ static const SchemaQuery Query_for_list_of_matviews = {
  "  WHERE substring(pg_catalog.quote_ident(evtname),1,%d)='%s'"
  
  #define Query_for_list_of_tablesample_methods \
-" SELECT pg_catalog.quote_ident(tsmname) "\
-"   FROM pg_catalog.pg_tablesample_method "\
-"  WHERE substring(pg_catalog.quote_ident(tsmname),1,%d)='%s'"
+" SELECT pg_catalog.quote_ident(proname) "\
+"   FROM pg_catalog.pg_proc "\
+"  WHERE prorettype = 'pg_catalog.tsm_handler'::pg_catalog.regtype AND "\
+"        proargtypes[0] = 'pg_catalog.internal'::pg_catalog.regtype AND "\
+"        substring(pg_catalog.quote_ident(proname),1,%d)='%s'"
  
  #define Query_for_list_of_policies \
  " SELECT pg_catalog.quote_ident(polname) "\
-"   FROM pg_catalog.pg_policy " \
+"   FROM pg_catalog.pg_policy "\
  "  WHERE substring(pg_catalog.quote_ident(polname),1,%d)='%s'"
  
  #define Query_for_list_of_tables_for_policy \
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h

index 31139cbd0ccc736908afa9218dc10e9d61dedd99..75e6b72f9e0204913254548a42322a6fa7708d63 100644 (file)
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -116,11 +116,13 @@ extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot,
                                   int nkeys, ScanKey key);
  extern HeapScanDesc heap_beginscan_sampling(Relation relation,
                                                 Snapshot snapshot, int nkeys, ScanKey key,
-                                               bool allow_strat, bool allow_pagemode);
+                                        bool allow_strat, bool allow_sync, bool allow_pagemode);
  extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk,
                                    BlockNumber endBlk);
  extern void heapgetpage(HeapScanDesc scan, BlockNumber page);
  extern void heap_rescan(HeapScanDesc scan, ScanKey key);
+extern void heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+                                        bool allow_strat, bool allow_sync, bool allow_pagemode);
  extern void heap_endscan(HeapScanDesc scan);
  extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
  
diff --git a/src/include/access/tablesample.h b/src/include/access/tablesample.h

deleted file mode 100644 (file)

index a02e93d..0000000
--- a/src/include/access/tablesample.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * tablesample.h
- *               Public header file for TABLESAMPLE clause interface
- *
- *
- * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/access/tablesample.h
- *
- *-------------------------------------------------------------------------
- */
-#ifndef TABLESAMPLE_H
-#define TABLESAMPLE_H
-
-#include "access/relscan.h"
-#include "executor/executor.h"
-
-typedef struct TableSampleDesc
-{
-       HeapScanDesc heapScan;
-       TupleDesc       tupDesc;                /* Mostly useful for tsmexaminetuple */
-
-       void       *tsmdata;            /* private method data */
-
-       /* These point to he function of the TABLESAMPLE Method. */
-       FmgrInfo        tsminit;
-       FmgrInfo        tsmnextblock;
-       FmgrInfo        tsmnexttuple;
-       FmgrInfo        tsmexaminetuple;
-       FmgrInfo        tsmreset;
-       FmgrInfo        tsmend;
-} TableSampleDesc;
-
-
-extern TableSampleDesc *tablesample_init(SampleScanState *scanstate,
-                                TableSampleClause *tablesample);
-extern HeapTuple tablesample_getnext(TableSampleDesc *desc);
-extern void tablesample_reset(TableSampleDesc *desc);
-extern void tablesample_end(TableSampleDesc *desc);
-extern HeapTuple tablesample_source_getnext(TableSampleDesc *desc);
-extern HeapTuple tablesample_source_gettup(TableSampleDesc *desc, ItemPointer tid,
-                                                 bool *visible);
-
-extern Datum tsm_system_init(PG_FUNCTION_ARGS);
-extern Datum tsm_system_nextblock(PG_FUNCTION_ARGS);
-extern Datum tsm_system_nexttuple(PG_FUNCTION_ARGS);
-extern Datum tsm_system_end(PG_FUNCTION_ARGS);
-extern Datum tsm_system_reset(PG_FUNCTION_ARGS);
-extern Datum tsm_system_cost(PG_FUNCTION_ARGS);
-
-extern Datum tsm_bernoulli_init(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_nextblock(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_end(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_reset(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_cost(PG_FUNCTION_ARGS);
-
-
-#endif
diff --git a/src/include/access/tsmapi.h b/src/include/access/tsmapi.h

new file mode 100644 (file)

index 0000000..4b59ffa
--- /dev/null
+++ b/src/include/access/tsmapi.h
@@ -0,0 +1,81 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsmapi.h
+ *       API for tablesample methods
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * src/include/access/tsmapi.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TSMAPI_H
+#define TSMAPI_H
+
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+
+
+/*
+ * Callback function signatures --- see tablesample-method.sgml for more info.
+ */
+
+typedef void (*SampleScanGetSampleSize_function) (PlannerInfo *root,
+                                                                                                                RelOptInfo *baserel,
+                                                                                                                       List *paramexprs,
+                                                                                                                 BlockNumber *pages,
+                                                                                                                         double *tuples);
+
+typedef void (*InitSampleScan_function) (SampleScanState *node,
+                                                                                                        int eflags);
+
+typedef void (*BeginSampleScan_function) (SampleScanState *node,
+                                                                                                         Datum *params,
+                                                                                                         int nparams,
+                                                                                                         uint32 seed);
+
+typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node);
+
+typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node,
+                                                                                                                BlockNumber blockno,
+                                                                                                        OffsetNumber maxoffset);
+
+typedef void (*EndSampleScan_function) (SampleScanState *node);
+
+/*
+ * TsmRoutine is the struct returned by a tablesample method's handler
+ * function.  It provides pointers to the callback functions needed by the
+ * planner and executor, as well as additional information about the method.
+ *
+ * More function pointers are likely to be added in the future.
+ * Therefore it's recommended that the handler initialize the struct with
+ * makeNode(TsmRoutine) so that all fields are set to NULL.  This will
+ * ensure that no fields are accidentally left undefined.
+ */
+typedef struct TsmRoutine
+{
+       NodeTag         type;
+
+       /* List of datatype OIDs for the arguments of the TABLESAMPLE clause */
+       List       *parameterTypes;
+
+       /* Can method produce repeatable samples across, or even within, queries? */
+       bool            repeatable_across_queries;
+       bool            repeatable_across_scans;
+
+       /* Functions for planning a SampleScan on a physical table */
+       SampleScanGetSampleSize_function SampleScanGetSampleSize;
+
+       /* Functions for executing a SampleScan on a physical table */
+       InitSampleScan_function InitSampleScan;         /* can be NULL */
+       BeginSampleScan_function BeginSampleScan;
+       NextSampleBlock_function NextSampleBlock;       /* can be NULL */
+       NextSampleTuple_function NextSampleTuple;
+       EndSampleScan_function EndSampleScan;           /* can be NULL */
+} TsmRoutine;
+
+
+/* Functions in access/tablesample/tablesample.c */
+extern TsmRoutine *GetTsmRoutine(Oid tsmhandler);
+
+#endif   /* TSMAPI_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 8f6685fd0cce89d8060dd8468b7575bd85bb412d..0e983279313cd59a8a0df57e9d17829cf5967b41 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201507171
+#define CATALOG_VERSION_NO     201507252
  
  #endif
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h

index 748aadde94598945c715a2244d8b12249792eba9..c38958d6c5e26985ceeef95ca0df4defe42bf711 100644 (file)
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -316,11 +316,6 @@ DECLARE_UNIQUE_INDEX(pg_replication_origin_roiident_index, 6001, on pg_replicati
  DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname text_pattern_ops));
  #define ReplicationOriginNameIndex 6002
  
-DECLARE_UNIQUE_INDEX(pg_tablesample_method_name_index, 3331, on pg_tablesample_method using btree(tsmname name_ops));
-#define TableSampleMethodNameIndexId  3331
-DECLARE_UNIQUE_INDEX(pg_tablesample_method_oid_index, 3332, on pg_tablesample_method using btree(oid oid_ops));
-#define TableSampleMethodOidIndexId  3332
-
  /* last step of initialization script: build the indexes declared above */
  BUILD_INDICES
  
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index 1d68ad7209e1c65333491a4a646945dfc525f2ab..09bf1439c46f8f68f0f3ebbb275ebe84fc84c752 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3734,6 +3734,16 @@ DATA(insert OID = 3116 (  fdw_handler_in PGNSP PGUID 12 1 0 0 0 f f f f f f i 1
  DESCR("I/O");
  DATA(insert OID = 3117 (  fdw_handler_out      PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3115" _null_ _null_ _null_ _null_ _null_ fdw_handler_out _null_ _null_ _null_ ));
  DESCR("I/O");
+DATA(insert OID = 3311 (  tsm_handler_in       PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 3310 "2275" _null_ _null_ _null_ _null_ _null_ tsm_handler_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3312 (  tsm_handler_out      PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3310" _null_ _null_ _null_ _null_ _null_ tsm_handler_out _null_ _null_ _null_ ));
+DESCR("I/O");
+
+/* tablesample method handlers */
+DATA(insert OID = 3313 (  bernoulli                    PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_handler _null_ _null_ _null_ ));
+DESCR("BERNOULLI tablesample method handler");
+DATA(insert OID = 3314 (  system                       PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_handler _null_ _null_ _null_ ));
+DESCR("SYSTEM tablesample method handler");
  
  /* cryptographic */
  DATA(insert OID =  2311 (  md5    PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ ));
@@ -5321,33 +5331,6 @@ DESCR("get an individual replication origin's replication progress");
  DATA(insert OID = 6014 ( pg_show_replication_origin_status PGNSP PGUID 12 1 100 0 0 f f f f f t v 0 0 2249 "" "{26,25,3220,3220}" "{o,o,o,o}" "{local_id, external_id, remote_lsn, local_lsn}" _null_ _null_ pg_show_replication_origin_status _null_ _null_ _null_ ));
  DESCR("get progress for all replication origins");
  
-/* tablesample */
-DATA(insert OID = 3335 (  tsm_system_init              PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_system_init _null_ _null_ _null_ ));
-DESCR("tsm_system_init(internal)");
-DATA(insert OID = 3336 (  tsm_system_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nextblock _null_ _null_ _null_ ));
-DESCR("tsm_system_nextblock(internal)");
-DATA(insert OID = 3337 (  tsm_system_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nexttuple _null_ _null_ _null_ ));
-DESCR("tsm_system_nexttuple(internal)");
-DATA(insert OID = 3338 (  tsm_system_end               PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_end _null_ _null_ _null_ ));
-DESCR("tsm_system_end(internal)");
-DATA(insert OID = 3339 (  tsm_system_reset             PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_reset _null_ _null_ _null_ ));
-DESCR("tsm_system_reset(internal)");
-DATA(insert OID = 3340 (  tsm_system_cost              PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_system_cost _null_ _null_ _null_ ));
-DESCR("tsm_system_cost(internal)");
-
-DATA(insert OID = 3341 (  tsm_bernoulli_init           PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_init _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_init(internal)");
-DATA(insert OID = 3342 (  tsm_bernoulli_nextblock      PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nextblock _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_nextblock(internal)");
-DATA(insert OID = 3343 (  tsm_bernoulli_nexttuple      PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nexttuple _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_nexttuple(internal)");
-DATA(insert OID = 3344 (  tsm_bernoulli_end                    PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_end _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_end(internal)");
-DATA(insert OID = 3345 (  tsm_bernoulli_reset          PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_reset _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_reset(internal)");
-DATA(insert OID = 3346 (  tsm_bernoulli_cost           PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_cost _null_ _null_ _null_ ));
-DESCR("tsm_bernoulli_cost(internal)");
-
  /*
   * Symbolic values for provolatile column: these indicate whether the result
   * of a function is dependent *only* on the values of its explicit arguments,
diff --git a/src/include/catalog/pg_tablesample_method.h b/src/include/catalog/pg_tablesample_method.h

deleted file mode 100644 (file)

index b422414..0000000
--- a/src/include/catalog/pg_tablesample_method.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * pg_tablesample_method.h
- *       definition of the table scan methods.
- *
- *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/catalog/pg_tablesample_method.h
- *
- *
- *-------------------------------------------------------------------------
- */
-#ifndef PG_TABLESAMPLE_METHOD_H
-#define PG_TABLESAMPLE_METHOD_H
-
-#include "catalog/genbki.h"
-#include "catalog/objectaddress.h"
-
-/* ----------------
- *             pg_tablesample_method definition.  cpp turns this into
- *             typedef struct FormData_pg_tablesample_method
- * ----------------
- */
-#define TableSampleMethodRelationId 3330
-
-CATALOG(pg_tablesample_method,3330)
-{
-       NameData        tsmname;                /* tablesample method name */
-       bool            tsmseqscan;             /* does this method scan whole table
-                                                                * sequentially? */
-       bool            tsmpagemode;    /* does this method scan page at a time? */
-       regproc         tsminit;                /* init scan function */
-       regproc         tsmnextblock;   /* function returning next block to sample or
-                                                                * InvalidBlockOffset if finished */
-       regproc         tsmnexttuple;   /* function returning next tuple offset from
-                                                                * current block or InvalidOffsetNumber if end
-                                                                * of the block was reacher */
-       regproc         tsmexaminetuple;/* optional function which can examine tuple
-                                                                * contents and decide if tuple should be
-                                                                * returned or not */
-       regproc         tsmend;                 /* end scan function */
-       regproc         tsmreset;               /* reset state - used by rescan */
-       regproc         tsmcost;                /* costing function */
-} FormData_pg_tablesample_method;
-
-/* ----------------
- *             Form_pg_tablesample_method corresponds to a pointer to a tuple with
- *             the format of pg_tablesample_method relation.
- * ----------------
- */
-typedef FormData_pg_tablesample_method *Form_pg_tablesample_method;
-
-/* ----------------
- *             compiler constants for pg_tablesample_method
- * ----------------
- */
-#define Natts_pg_tablesample_method                                    10
-#define Anum_pg_tablesample_method_tsmname                     1
-#define Anum_pg_tablesample_method_tsmseqscan          2
-#define Anum_pg_tablesample_method_tsmpagemode         3
-#define Anum_pg_tablesample_method_tsminit                     4
-#define Anum_pg_tablesample_method_tsmnextblock                5
-#define Anum_pg_tablesample_method_tsmnexttuple                6
-#define Anum_pg_tablesample_method_tsmexaminetuple     7
-#define Anum_pg_tablesample_method_tsmend                      8
-#define Anum_pg_tablesample_method_tsmreset                    9
-#define Anum_pg_tablesample_method_tsmcost                     10
-
-/* ----------------
- *             initial contents of pg_tablesample_method
- * ----------------
- */
-
-DATA(insert OID = 3333 ( system false true tsm_system_init tsm_system_nextblock tsm_system_nexttuple - tsm_system_end tsm_system_reset tsm_system_cost ));
-DESCR("SYSTEM table sampling method");
-DATA(insert OID = 3334 ( bernoulli true false tsm_bernoulli_init tsm_bernoulli_nextblock tsm_bernoulli_nexttuple - tsm_bernoulli_end tsm_bernoulli_reset tsm_bernoulli_cost ));
-DESCR("BERNOULLI table sampling method");
-
-#endif   /* PG_TABLESAMPLE_METHOD_H */
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h

index da123f6c4957e70b3fecc07b948609a6bd21bea2..7dc95c8d2c651ef3fc577af3d62cefd8f4d24169 100644 (file)
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -694,6 +694,8 @@ DATA(insert OID = 3500 ( anyenum            PGNSP PGUID  4 t p P f t \054 0 0 0 anyenum_in
  #define ANYENUMOID             3500
  DATA(insert OID = 3115 ( fdw_handler   PGNSP PGUID  4 t p P f t \054 0 0 0 fdw_handler_in fdw_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
  #define FDW_HANDLEROID 3115
+DATA(insert OID = 3310 ( tsm_handler   PGNSP PGUID  4 t p P f t \054 0 0 0 tsm_handler_in tsm_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
+#define TSM_HANDLEROID 3310
  DATA(insert OID = 3831 ( anyrange              PGNSP PGUID  -1 f p P f t \054 0 0 0 anyrange_in anyrange_out - - - - - d x f 0 -1 0 0 _null_ _null_ _null_ ));
  #define ANYRANGEOID            3831
  
diff --git a/src/include/executor/nodeSamplescan.h b/src/include/executor/nodeSamplescan.h

index 4b769daec8b917e90587597c92c61ba82ddd5bac..a0cc6ce467a9f58bf54b7eaeba9789c315a4fe55 100644 (file)
--- a/src/include/executor/nodeSamplescan.h
+++ b/src/include/executor/nodeSamplescan.h
@@ -4,7 +4,7 @@
   *
   *
   *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * src/include/executor/nodeSamplescan.h
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 541ee187356850c69da8e26fc2615cb968f1a62d..303fc3c1c77dca2f4c2abd51440c8614b1e88f61 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1257,13 +1257,22 @@ typedef struct ScanState
   */
  typedef ScanState SeqScanState;
  
-/*
- * SampleScan
+/* ----------------
+ *      SampleScanState information
+ * ----------------
   */
  typedef struct SampleScanState
  {
         ScanState       ss;
-       struct TableSampleDesc *tsdesc;
+       List       *args;                       /* expr states for TABLESAMPLE params */
+       ExprState  *repeatable;         /* expr state for REPEATABLE expr */
+       /* use struct pointer to avoid including tsmapi.h here */
+       struct TsmRoutine *tsmroutine;          /* descriptor for tablesample method */
+       void       *tsm_state;          /* tablesample method can keep state here */
+       bool            use_bulkread;   /* use bulkread buffer access strategy? */
+       bool            use_pagemode;   /* use page-at-a-time visibility checking? */
+       bool            begun;                  /* false means need to call BeginSampleScan */
+       uint32          seed;                   /* random seed */
  } SampleScanState;
  
  /*
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h

index f8acda4eede0190e444d0d37e84cd7743884d4e7..748e434a27a21a47874b3ae50844ff39c8f54a24 100644 (file)
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -51,6 +51,7 @@ typedef enum NodeTag
         T_BitmapOr,
         T_Scan,
         T_SeqScan,
+       T_SampleScan,
         T_IndexScan,
         T_IndexOnlyScan,
         T_BitmapIndexScan,
@@ -61,7 +62,6 @@ typedef enum NodeTag
         T_ValuesScan,
         T_CteScan,
         T_WorkTableScan,
-       T_SampleScan,
         T_ForeignScan,
         T_CustomScan,
         T_Join,
@@ -400,6 +400,7 @@ typedef enum NodeTag
         T_WindowDef,
         T_RangeSubselect,
         T_RangeFunction,
+       T_RangeTableSample,
         T_TypeName,
         T_ColumnDef,
         T_IndexElem,
@@ -407,6 +408,7 @@ typedef enum NodeTag
         T_DefElem,
         T_RangeTblEntry,
         T_RangeTblFunction,
+       T_TableSampleClause,
         T_WithCheckOption,
         T_SortGroupClause,
         T_GroupingSet,
@@ -425,8 +427,6 @@ typedef enum NodeTag
         T_OnConflictClause,
         T_CommonTableExpr,
         T_RoleSpec,
-       T_RangeTableSample,
-       T_TableSampleClause,
  
         /*
          * TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h)
@@ -452,7 +452,8 @@ typedef enum NodeTag
         T_WindowObjectData,                     /* private in nodeWindowAgg.c */
         T_TIDBitmap,                            /* in nodes/tidbitmap.h */
         T_InlineCodeBlock,                      /* in nodes/parsenodes.h */
-       T_FdwRoutine                            /* in foreign/fdwapi.h */
+       T_FdwRoutine,                           /* in foreign/fdwapi.h */
+       T_TsmRoutine                            /* in access/tsmapi.h */
  } NodeTag;
  
  /*
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index b336ff9c6abaf0bc40bc34d6b3bd527d0e2c5393..151c93a078ea009aa8bd229a64581cb6cce79fc9 100644 (file)
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -337,26 +337,6 @@ typedef struct FuncCall
         int                     location;               /* token location, or -1 if unknown */
  } FuncCall;
  
-/*
- * TableSampleClause - a sampling method information
- */
-typedef struct TableSampleClause
-{
-       NodeTag         type;
-       Oid                     tsmid;
-       bool            tsmseqscan;
-       bool            tsmpagemode;
-       Oid                     tsminit;
-       Oid                     tsmnextblock;
-       Oid                     tsmnexttuple;
-       Oid                     tsmexaminetuple;
-       Oid                     tsmend;
-       Oid                     tsmreset;
-       Oid                     tsmcost;
-       Node       *repeatable;
-       List       *args;
-} TableSampleClause;
-
  /*
   * A_Star - '*' representing all columns of a table or compound field
   *
@@ -558,19 +538,23 @@ typedef struct RangeFunction
  } RangeFunction;
  
  /*
- * RangeTableSample - represents <table> TABLESAMPLE <method> (<params>) REPEATABLE (<num>)
+ * RangeTableSample - TABLESAMPLE appearing in a raw FROM clause
   *
- * SQL Standard specifies only one parameter which is percentage. But we allow
- * custom tablesample methods which may need different input arguments so we
- * accept list of arguments.
+ * This node, appearing only in raw parse trees, represents
+ *             <relation> TABLESAMPLE <method> (<params>) REPEATABLE (<num>)
+ * Currently, the <relation> can only be a RangeVar, but we might in future
+ * allow RangeSubselect and other options.  Note that the RangeTableSample
+ * is wrapped around the node representing the <relation>, rather than being
+ * a subfield of it.
   */
  typedef struct RangeTableSample
  {
         NodeTag         type;
-       RangeVar   *relation;
-       char       *method;                     /* sampling method */
-       Node       *repeatable;
-       List       *args;                       /* arguments for sampling method */
+       Node       *relation;           /* relation to be sampled */
+       List       *method;                     /* sampling method name (possibly qualified) */
+       List       *args;                       /* argument(s) for sampling method */
+       Node       *repeatable;         /* REPEATABLE expression, or NULL if none */
+       int                     location;               /* method name location, or -1 if unknown */
  } RangeTableSample;
  
  /*
@@ -810,7 +794,7 @@ typedef struct RangeTblEntry
          */
         Oid                     relid;                  /* OID of the relation */
         char            relkind;                /* relation kind (see pg_class.relkind) */
-       TableSampleClause *tablesample;         /* sampling method and parameters */
+       struct TableSampleClause *tablesample;          /* sampling info, or NULL */
  
         /*
          * Fields valid for a subquery RTE (else NULL):
@@ -912,6 +896,19 @@ typedef struct RangeTblFunction
         Bitmapset  *funcparams;         /* PARAM_EXEC Param IDs affecting this func */
  } RangeTblFunction;
  
+/*
+ * TableSampleClause - TABLESAMPLE appearing in a transformed FROM clause
+ *
+ * Unlike RangeTableSample, this is a subnode of the relevant RangeTblEntry.
+ */
+typedef struct TableSampleClause
+{
+       NodeTag         type;
+       Oid                     tsmhandler;             /* OID of the tablesample handler function */
+       List       *args;                       /* tablesample argument expression(s) */
+       Expr       *repeatable;         /* REPEATABLE expression, or NULL if none */
+} TableSampleClause;
+
  /*
   * WithCheckOption -
   *             representation of WITH CHECK OPTION checks to be applied to new tuples
@@ -2520,7 +2517,7 @@ typedef struct RenameStmt
  typedef struct AlterObjectSchemaStmt
  {
         NodeTag         type;
-       ObjectType objectType;          /* OBJECT_TABLE, OBJECT_TYPE, etc */
+       ObjectType      objectType;             /* OBJECT_TABLE, OBJECT_TYPE, etc */
         RangeVar   *relation;           /* in case it's a table */
         List       *object;                     /* in case it's some other object */
         List       *objarg;                     /* argument types, if applicable */
@@ -2535,7 +2532,7 @@ typedef struct AlterObjectSchemaStmt
  typedef struct AlterOwnerStmt
  {
         NodeTag         type;
-       ObjectType objectType;          /* OBJECT_TABLE, OBJECT_TYPE, etc */
+       ObjectType      objectType;             /* OBJECT_TABLE, OBJECT_TYPE, etc */
         RangeVar   *relation;           /* in case it's a table */
         List       *object;                     /* in case it's some other object */
         List       *objarg;                     /* argument types, if applicable */
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h

index 5f538f3e8ccb5a9298af8d2a836d3c08fc8d611d..0654d0266cd6e7d17a7fb75c2834b6b6e793fbf4 100644 (file)
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -287,7 +287,12 @@ typedef Scan SeqScan;
   *             table sample scan node
   * ----------------
   */
-typedef Scan SampleScan;
+typedef struct SampleScan
+{
+       Scan            scan;
+       /* use struct pointer to avoid including parsenodes.h here */
+       struct TableSampleClause *tablesample;
+} SampleScan;
  
  /* ----------------
   *             index scan node
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 24003ae3591b9a2e1d74709a5f60485297f7b146..dd43e45d0c0a5b6c98f54c654f06b0c08ed7bff9 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -68,7 +68,8 @@ extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
                                         double index_pages, PlannerInfo *root);
  extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
                          ParamPathInfo *param_info);
-extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
+extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
+                               ParamPathInfo *param_info);
  extern void cost_index(IndexPath *path, PlannerInfo *root,
                    double loop_count);
  extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
diff --git a/src/include/parser/parse_func.h b/src/include/parser/parse_func.h

index 3194da463948a34e96478738e30368c37d1efedf..32646918e20c4b8101d0b5936144e3f8ff3d5776 100644 (file)
--- a/src/include/parser/parse_func.h
+++ b/src/include/parser/parse_func.h
@@ -33,11 +33,6 @@ typedef enum
  extern Node *ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
                                   FuncCall *fn, int location);
  
-extern TableSampleClause *ParseTableSample(ParseState *pstate,
-                                char *samplemethod,
-                                Node *repeatable, List *args,
-                                int location);
-
  extern FuncDetailCode func_get_detail(List *funcname,
                                 List *fargs, List *fargnames,
                                 int nargs, Oid *argtypes,
diff --git a/src/include/port.h b/src/include/port.h

index 71113c03944bd7f88991ef9953ae4ea15e86f443..3787cbfb7614cd318a3885bc59554128fb063502 100644 (file)
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -357,10 +357,6 @@ extern off_t ftello(FILE *stream);
  #endif
  #endif
  
-#define RAND48_SEED_0  (0x330e)
-#define RAND48_SEED_1  (0xabcd)
-#define RAND48_SEED_2  (0x1234)
-
  extern double pg_erand48(unsigned short xseed[3]);
  extern long pg_lrand48(void);
  extern void pg_srand48(long seed);
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h

index fcb0bf0ce8e94c376c683dcf7a8bb18d2b83b0a7..49caa56557420ed96a8adf2ee89d8adfb6157304 100644 (file)
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -566,6 +566,8 @@ extern Datum language_handler_in(PG_FUNCTION_ARGS);
  extern Datum language_handler_out(PG_FUNCTION_ARGS);
  extern Datum fdw_handler_in(PG_FUNCTION_ARGS);
  extern Datum fdw_handler_out(PG_FUNCTION_ARGS);
+extern Datum tsm_handler_in(PG_FUNCTION_ARGS);
+extern Datum tsm_handler_out(PG_FUNCTION_ARGS);
  extern Datum internal_in(PG_FUNCTION_ARGS);
  extern Datum internal_out(PG_FUNCTION_ARGS);
  extern Datum opaque_in(PG_FUNCTION_ARGS);
@@ -1213,6 +1215,12 @@ extern Datum ginqueryarrayextract(PG_FUNCTION_ARGS);
  extern Datum ginarrayconsistent(PG_FUNCTION_ARGS);
  extern Datum ginarraytriconsistent(PG_FUNCTION_ARGS);
  
+/* access/tablesample/bernoulli.c */
+extern Datum tsm_bernoulli_handler(PG_FUNCTION_ARGS);
+
+/* access/tablesample/system.c */
+extern Datum tsm_system_handler(PG_FUNCTION_ARGS);
+
  /* access/transam/twophase.c */
  extern Datum pg_prepared_xact(PG_FUNCTION_ARGS);
  
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h

index a40c9b12732da07f80c1c410a1651cd0db748188..971153843296d55612f201ace510af8ccbee8cdd 100644 (file)
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -156,7 +156,6 @@ extern void free_attstatsslot(Oid atttype,
  extern char *get_namespace_name(Oid nspid);
  extern char *get_namespace_name_or_temp(Oid nspid);
  extern Oid     get_range_subtype(Oid rangeOid);
-extern char *get_tablesample_method_name(Oid tsmid);
  
  #define type_is_array(typid)  (get_element_type(typid) != InvalidOid)
  /* type_is_array_domain accepts both plain arrays and domains over arrays */
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h

index f06f03a996f260455d8d7c9a0cbae7c8badf4e22..18404e266eb63ec0384e7cf8c75d87a6683ef0ca 100644 (file)
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -81,8 +81,6 @@ enum SysCacheIdentifier
         REPLORIGNAME,
         RULERELNAME,
         STATRELATTINH,
-       TABLESAMPLEMETHODNAME,
-       TABLESAMPLEMETHODOID,
         TABLESPACEOID,
         TRFOID,
         TRFTYPELANG,
diff --git a/src/port/erand48.c b/src/port/erand48.c

index 12efd8193c4ed7b424961b44ba65de750debd1d6..9d471197c354056c8903a5a9e1c6b0023419f1d1 100644 (file)
--- a/src/port/erand48.c
+++ b/src/port/erand48.c
@@ -33,6 +33,9 @@
  
  #include <math.h>
  
+#define RAND48_SEED_0  (0x330e)
+#define RAND48_SEED_1  (0xabcd)
+#define RAND48_SEED_2  (0x1234)
  #define RAND48_MULT_0  (0xe66d)
  #define RAND48_MULT_1  (0xdeec)
  #define RAND48_MULT_2  (0x0005)
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out

index 414299a694114112410b8b62682cafcc448af2e8..e7c242cd22d480c0b566a9a8dc617f07fb03b6f6 100644 (file)
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -101,15 +101,17 @@ NOTICE:  f_leak => great manga
    44 |   8 |      1 | rls_regress_user2 | great manga           | manga
  (4 rows)
  
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
-NOTICE:  f_leak => my first novel
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+  WHERE f_leak(dtitle) ORDER BY did;
  NOTICE:  f_leak => my first manga
  NOTICE:  f_leak => great science fiction
+NOTICE:  f_leak => great manga
   did | cid | dlevel |      dauthor      |        dtitle         
  -----+-----+--------+-------------------+-----------------------
-   1 |  11 |      1 | rls_regress_user1 | my first novel
     4 |  44 |      1 | rls_regress_user1 | my first manga
     6 |  22 |      1 | rls_regress_user2 | great science fiction
+   8 |  44 |      1 | rls_regress_user2 | great manga
  (3 rows)
  
  -- viewpoint from rls_regress_user2
@@ -156,20 +158,20 @@ NOTICE:  f_leak => great manga
    44 |   8 |      1 | rls_regress_user2 | great manga           | manga
  (8 rows)
  
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
-NOTICE:  f_leak => my first novel
-NOTICE:  f_leak => my second novel
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+  WHERE f_leak(dtitle) ORDER BY did;
  NOTICE:  f_leak => my first manga
+NOTICE:  f_leak => my second manga
  NOTICE:  f_leak => great science fiction
-NOTICE:  f_leak => great technology book
+NOTICE:  f_leak => great manga
   did | cid | dlevel |      dauthor      |        dtitle         
  -----+-----+--------+-------------------+-----------------------
-   1 |  11 |      1 | rls_regress_user1 | my first novel
-   2 |  11 |      2 | rls_regress_user1 | my second novel
     4 |  44 |      1 | rls_regress_user1 | my first manga
+   5 |  44 |      2 | rls_regress_user1 | my second manga
     6 |  22 |      1 | rls_regress_user2 | great science fiction
-   7 |  33 |      2 | rls_regress_user2 | great technology book
-(5 rows)
+   8 |  44 |      1 | rls_regress_user2 | great manga
+(4 rows)
  
  EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
                          QUERY PLAN                        
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out

index cd5337531d4b41e90aa469c8f33c2a9f13ca8ddd..1e5b0b9a2c43a522d088417dfa249168b3e5eeab 100644 (file)
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2202,6 +2202,10 @@ street| SELECT r.name,
     FROM ONLY road r,
      real_city c
    WHERE (c.outline ## r.thepath);
+test_tablesample_v1| SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2);
+test_tablesample_v2| SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system (99);
  toyemp| SELECT emp.name,
      emp.age,
      emp.location,
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out

index 14acd16da3b3d00d2f1630ac3c7c941f0b4e986f..eb0bc88ef1fb27daee22dd4a2de684df35507417 100644 (file)
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -128,7 +128,6 @@ pg_shdepend|t
  pg_shdescription|t
  pg_shseclabel|t
  pg_statistic|t
-pg_tablesample_method|t
  pg_tablespace|t
  pg_transform|t
  pg_trigger|t
diff --git a/src/test/regress/expected/tablesample.out b/src/test/regress/expected/tablesample.out

index 04e5eb8b807e2d1c95adae29a57e40742fe0ccf0..727a83543973436293d6f6371374a25b54a66078 100644 (file)
--- a/src/test/regress/expected/tablesample.out
+++ b/src/test/regress/expected/tablesample.out
@@ -1,107 +1,123 @@
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
+INSERT INTO test_tablesample
+  SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
   id 
  ----
-  0
-  1
-  2
    3
    4
    5
-  9
-(7 rows)
-
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
- id 
-----
    6
    7
    8
-(3 rows)
+(6 rows)
  
-SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
- count 
--------
-    10
-(1 row)
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+ id 
+----
+(0 rows)
  
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
   id 
  ----
-  0
-  1
-  2
+  3
+  4
+  5
    6
    7
    8
-  9
-(7 rows)
+(6 rows)
  
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0);
   id 
  ----
-  0
-  1
-  3
    4
    5
+  6
+  7
+  8
  (5 rows)
  
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0);
   id 
  ----
-  0
-  5
-(2 rows)
+  7
+(1 row)
  
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
-SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
-                                 pg_get_viewdef                                 
---------------------------------------------------------------------------------
-  SELECT test_tablesample.id                                                   +
-    FROM test_tablesample TABLESAMPLE system (((10 * 2))::real) REPEATABLE (2);
+-- 100% should give repeatable count results (ie, all rows) in any case
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
+ count 
+-------
+    10
  (1 row)
  
-SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
-                      pg_get_viewdef                       
------------------------------------------------------------
-  SELECT test_tablesample.id                              +
-    FROM test_tablesample TABLESAMPLE system ((99)::real);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2);
+ count 
+-------
+    10
+(1 row)
+
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4);
+ count 
+-------
+    10
  (1 row)
  
+CREATE VIEW test_tablesample_v1 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+\d+ test_tablesample_v1
+          View "public.test_tablesample_v1"
+ Column |  Type   | Modifiers | Storage | Description 
+--------+---------+-----------+---------+-------------
+ id     | integer |           | plain   | 
+View definition:
+ SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2);
+
+\d+ test_tablesample_v2
+          View "public.test_tablesample_v2"
+ Column |  Type   | Modifiers | Storage | Description 
+--------+---------+-----------+---------+-------------
+ id     | integer |           | plain   | 
+View definition:
+ SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system (99);
+
+-- check a sampled query doesn't affect cursor in progress
  BEGIN;
-DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+DECLARE tablesample_cur CURSOR FOR
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
  FETCH FIRST FROM tablesample_cur;
   id 
  ----
-  0
+  3
  (1 row)
  
  FETCH NEXT FROM tablesample_cur;
   id 
  ----
-  1
+  4
  (1 row)
  
  FETCH NEXT FROM tablesample_cur;
   id 
  ----
-  2
+  5
  (1 row)
  
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
   id 
  ----
-  0
-  1
-  2
    3
    4
    5
-  9
-(7 rows)
+  6
+  7
+  8
+(6 rows)
  
  FETCH NEXT FROM tablesample_cur;
   id 
@@ -124,19 +140,19 @@ FETCH NEXT FROM tablesample_cur;
  FETCH FIRST FROM tablesample_cur;
   id 
  ----
-  0
+  3
  (1 row)
  
  FETCH NEXT FROM tablesample_cur;
   id 
  ----
-  1
+  4
  (1 row)
  
  FETCH NEXT FROM tablesample_cur;
   id 
  ----
-  2
+  5
  (1 row)
  
  FETCH NEXT FROM tablesample_cur;
@@ -159,41 +175,129 @@ FETCH NEXT FROM tablesample_cur;
  
  CLOSE tablesample_cur;
  END;
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-                                  QUERY PLAN                                   
--------------------------------------------------------------------------------
- Sample Scan (system) on test_tablesample  (cost=0.00..26.35 rows=635 width=4)
+EXPLAIN (COSTS OFF)
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sample Scan on test_tablesample
+   Sampling: system ('50'::real) REPEATABLE ('2'::double precision)
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+  SELECT * FROM test_tablesample_v1;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sample Scan on test_tablesample
+   Sampling: system ('20'::real) REPEATABLE ('2'::double precision)
+(2 rows)
+
+-- check inheritance behavior
+explain (costs off)
+  select count(*) from person tablesample bernoulli (100);
+                   QUERY PLAN                    
+-------------------------------------------------
+ Aggregate
+   ->  Append
+         ->  Sample Scan on person
+               Sampling: bernoulli ('100'::real)
+         ->  Sample Scan on emp
+               Sampling: bernoulli ('100'::real)
+         ->  Sample Scan on student
+               Sampling: bernoulli ('100'::real)
+         ->  Sample Scan on stud_emp
+               Sampling: bernoulli ('100'::real)
+(10 rows)
+
+select count(*) from person tablesample bernoulli (100);
+ count 
+-------
+    58
  (1 row)
  
-EXPLAIN SELECT * FROM test_tablesample_v1;
-                                  QUERY PLAN                                   
--------------------------------------------------------------------------------
- Sample Scan (system) on test_tablesample  (cost=0.00..10.54 rows=254 width=4)
+select count(*) from person;
+ count 
+-------
+    58
+(1 row)
+
+-- check that collations get assigned within the tablesample arguments
+SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int);
+ count 
+-------
+     0
+(1 row)
+
+-- check behavior during rescans, as well as correct handling of min/max pct
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss;
+ pct | count 
+-----+-------
+   0 |     0
+ 100 | 10000
+(2 rows)
+
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample system (pct)) ss;
+ pct | count 
+-----+-------
+   0 |     0
+ 100 | 10000
+(2 rows)
+
+explain (costs off)
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ HashAggregate
+   Group Key: "*VALUES*".column1
+   ->  Nested Loop
+         ->  Values Scan on "*VALUES*"
+         ->  Sample Scan on tenk1
+               Sampling: bernoulli ("*VALUES*".column1)
+(6 rows)
+
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+ pct | count 
+-----+-------
+ 100 | 10000
+(1 row)
+
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample system (pct)) ss
+  group by pct;
+ pct | count 
+-----+-------
+ 100 | 10000
  (1 row)
  
  -- errors
  SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
-ERROR:  tablesample method "foobar" does not exist
+ERROR:  tablesample method foobar does not exist
  LINE 1: SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
-                       ^
+                                                    ^
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL);
+ERROR:  TABLESAMPLE parameter cannot be null
  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
-ERROR:  REPEATABLE clause must be NOT NULL numeric value
-LINE 1: ... test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
-                                                                 ^
+ERROR:  TABLESAMPLE REPEATABLE parameter cannot be null
  SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
  SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
  SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1);
-ERROR:  TABLESAMPLE clause can only be used on tables and materialized views
+ERROR:  TABLESAMPLE clause can only be applied to tables and materialized views
  LINE 1: SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1)...
                         ^
  INSERT INTO test_tablesample_v1 VALUES(1);
@@ -202,30 +306,10 @@ DETAIL:  Views containing TABLESAMPLE are not automatically updatable.
  HINT:  To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.
  WITH query_select AS (SELECT * FROM test_tablesample)
  SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
-ERROR:  TABLESAMPLE clause can only be used on tables and materialized views
+ERROR:  TABLESAMPLE clause can only be applied to tables and materialized views
  LINE 2: SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEA...
                        ^
  SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
  ERROR:  syntax error at or near "TABLESAMPLE"
  LINE 1: ...CT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPL...
                                                               ^
--- catalog sanity
-SELECT *
-FROM pg_tablesample_method
-WHERE tsminit IS NULL
-   OR tsmseqscan IS NULL
-   OR tsmpagemode IS NULL
-   OR tsmnextblock IS NULL
-   OR tsmnexttuple IS NULL
-   OR tsmend IS NULL
-   OR tsmreset IS NULL
-   OR tsmcost IS NULL;
- tsmname | tsmseqscan | tsmpagemode | tsminit | tsmnextblock | tsmnexttuple | tsmexaminetuple | tsmend | tsmreset | tsmcost 
----------+------------+-------------+---------+--------------+--------------+-----------------+--------+----------+---------
-(0 rows)
-
--- done
-DROP TABLE test_tablesample CASCADE;
-NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to view test_tablesample_v1
-drop cascades to view test_tablesample_v2
diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source

index 70c9cc356a642075b3df47ae897b70673e13e471..9eedb363d06be9602d35d90ff68bb84d7b923c7e 100644 (file)
--- a/src/test/regress/output/misc.source
+++ b/src/test/regress/output/misc.source
@@ -686,6 +686,9 @@ SELECT user_relns() AS user_relns
   test_range_excl
   test_range_gist
   test_range_spgist
+ test_tablesample
+ test_tablesample_v1
+ test_tablesample_v2
   test_tsvector
   testjsonb
   text_tbl
@@ -705,7 +708,7 @@ SELECT user_relns() AS user_relns
   tvvmv
   varchar_tbl
   xacttest
-(127 rows)
+(130 rows)
  
  SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer')));
   name 
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule

index 3a607cff46c235ff8b15fdbd095c2098ca3ac217..15d74d4e6eba90abc4476ae85412c0e4dc0b5081 100644 (file)
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -110,6 +110,7 @@ test: lock
  test: replica_identity
  test: rowsecurity
  test: object_address
+test: tablesample
  test: alter_generic
  test: alter_operator
  test: misc
@@ -156,4 +157,3 @@ test: with
  test: xml
  test: event_trigger
  test: stats
-test: tablesample
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql

index 039070b85b73370be2acdb27dfb115770a96427a..e86f8143142cbbee3f7d154874e9fd82bcff702a 100644 (file)
--- a/src/test/regress/sql/rowsecurity.sql
+++ b/src/test/regress/sql/rowsecurity.sql
@@ -94,14 +94,18 @@ SET row_security TO ON;
  SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
  SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
  
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+  WHERE f_leak(dtitle) ORDER BY did;
  
  -- viewpoint from rls_regress_user2
  SET SESSION AUTHORIZATION rls_regress_user2;
  SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
  SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
  
-SELECT * FROM document TABLESAMPLE BERNOULLI (50) REPEATABLE(1) WHERE f_leak(dtitle) ORDER BY did;
+-- try a sampled version
+SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
+  WHERE f_leak(dtitle) ORDER BY did;
  
  EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
  EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
diff --git a/src/test/regress/sql/tablesample.sql b/src/test/regress/sql/tablesample.sql

index 7b3eb9bedf7bb3a82ccd127a65bb1382e10a415b..eec97934966966229800a47563153669b5ea353a 100644 (file)
--- a/src/test/regress/sql/tablesample.sql
+++ b/src/test/regress/sql/tablesample.sql
@@ -1,26 +1,37 @@
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
  
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
+INSERT INTO test_tablesample
+  SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
  
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (0);
+
+-- 100% should give repeatable count results (ie, all rows) in any case
  SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (50) REPEATABLE (100);
-SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4);
  
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
-SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
-SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
+CREATE VIEW test_tablesample_v1 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+\d+ test_tablesample_v1
+\d+ test_tablesample_v2
  
+-- check a sampled query doesn't affect cursor in progress
  BEGIN;
-DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+DECLARE tablesample_cur CURSOR FOR
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+
  FETCH FIRST FROM tablesample_cur;
  FETCH NEXT FROM tablesample_cur;
  FETCH NEXT FROM tablesample_cur;
  
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
  
  FETCH NEXT FROM tablesample_cur;
  FETCH NEXT FROM tablesample_cur;
@@ -36,12 +47,45 @@ FETCH NEXT FROM tablesample_cur;
  CLOSE tablesample_cur;
  END;
  
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-EXPLAIN SELECT * FROM test_tablesample_v1;
+EXPLAIN (COSTS OFF)
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
+EXPLAIN (COSTS OFF)
+  SELECT * FROM test_tablesample_v1;
+
+-- check inheritance behavior
+explain (costs off)
+  select count(*) from person tablesample bernoulli (100);
+select count(*) from person tablesample bernoulli (100);
+select count(*) from person;
+
+-- check that collations get assigned within the tablesample arguments
+SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int);
+
+-- check behavior during rescans, as well as correct handling of min/max pct
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss;
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample system (pct)) ss;
+explain (costs off)
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample system (pct)) ss
+  group by pct;
  
  -- errors
  SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
  
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (NULL);
  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
  
  SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
@@ -56,19 +100,3 @@ WITH query_select AS (SELECT * FROM test_tablesample)
  SELECT * FROM query_select TABLESAMPLE BERNOULLI (5.5) REPEATABLE (1);
  
  SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
-
--- catalog sanity
-
-SELECT *
-FROM pg_tablesample_method
-WHERE tsminit IS NULL
-   OR tsmseqscan IS NULL
-   OR tsmpagemode IS NULL
-   OR tsmnextblock IS NULL
-   OR tsmnexttuple IS NULL
-   OR tsmend IS NULL
-   OR tsmreset IS NULL
-   OR tsmcost IS NULL;
-
--- done
-DROP TABLE test_tablesample CASCADE;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 25 Jul 2015 18:39:00 +0000 (14:39 -0400)
contrib/pg_stat_statements/pg_stat_statements.c		patch \| blob \| history
contrib/tsm_system_rows/Makefile		patch \| blob \| history
contrib/tsm_system_rows/expected/tsm_system_rows.out		patch \| blob \| history
contrib/tsm_system_rows/sql/tsm_system_rows.sql		patch \| blob \| history
contrib/tsm_system_rows/tsm_system_rows--1.0.sql		patch \| blob \| history
contrib/tsm_system_rows/tsm_system_rows.c		patch \| blob \| history
contrib/tsm_system_rows/tsm_system_rows.control		patch \| blob \| history
contrib/tsm_system_time/Makefile		patch \| blob \| history
contrib/tsm_system_time/expected/tsm_system_time.out		patch \| blob \| history
contrib/tsm_system_time/sql/tsm_system_time.sql		patch \| blob \| history
contrib/tsm_system_time/tsm_system_time--1.0.sql		patch \| blob \| history
contrib/tsm_system_time/tsm_system_time.c		patch \| blob \| history
contrib/tsm_system_time/tsm_system_time.control		patch \| blob \| history
doc/src/sgml/catalogs.sgml		patch \| blob \| history
doc/src/sgml/datatype.sgml		patch \| blob \| history
doc/src/sgml/postgres.sgml		patch \| blob \| history
doc/src/sgml/ref/select.sgml		patch \| blob \| history
doc/src/sgml/tablesample-method.sgml		patch \| blob \| history
doc/src/sgml/tsm-system-rows.sgml		patch \| blob \| history
doc/src/sgml/tsm-system-time.sgml		patch \| blob \| history
src/backend/access/heap/heapam.c		patch \| blob \| history
src/backend/access/tablesample/Makefile		patch \| blob \| history
src/backend/access/tablesample/bernoulli.c		patch \| blob \| history
src/backend/access/tablesample/system.c		patch \| blob \| history
src/backend/access/tablesample/tablesample.c		patch \| blob \| history
src/backend/catalog/Makefile		patch \| blob \| history
src/backend/catalog/dependency.c		patch \| blob \| history
src/backend/commands/explain.c		patch \| blob \| history
src/backend/executor/execAmi.c		patch \| blob \| history
src/backend/executor/nodeSamplescan.c		patch \| blob \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| history
src/backend/nodes/nodeFuncs.c		patch \| blob \| history
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/nodes/readfuncs.c		patch \| blob \| history
src/backend/optimizer/path/allpaths.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/plan/initsplan.c		patch \| blob \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| history
src/backend/optimizer/plan/setrefs.c		patch \| blob \| history
src/backend/optimizer/plan/subselect.c		patch \| blob \| history
src/backend/optimizer/prep/prepjointree.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/backend/parser/gram.y		patch \| blob \| history
src/backend/parser/parse_clause.c		patch \| blob \| history
src/backend/parser/parse_func.c		patch \| blob \| history
src/backend/rewrite/rewriteHandler.c		patch \| blob \| history
src/backend/utils/adt/pseudotypes.c		patch \| blob \| history
src/backend/utils/adt/ruleutils.c		patch \| blob \| history
src/backend/utils/cache/lsyscache.c		patch \| blob \| history
src/backend/utils/cache/syscache.c		patch \| blob \| history
src/backend/utils/errcodes.txt		patch \| blob \| history
src/backend/utils/misc/sampling.c		patch \| blob \| history
src/bin/psql/tab-complete.c		patch \| blob \| history
src/include/access/heapam.h		patch \| blob \| history
src/include/access/tablesample.h	[deleted file]	patch \| blob \| history
src/include/access/tsmapi.h	[new file with mode: 0644]	patch \| blob
src/include/catalog/catversion.h		patch \| blob \| history
src/include/catalog/indexing.h		patch \| blob \| history
src/include/catalog/pg_proc.h		patch \| blob \| history
src/include/catalog/pg_tablesample_method.h	[deleted file]	patch \| blob \| history
src/include/catalog/pg_type.h		patch \| blob \| history
src/include/executor/nodeSamplescan.h		patch \| blob \| history
src/include/nodes/execnodes.h		patch \| blob \| history
src/include/nodes/nodes.h		patch \| blob \| history
src/include/nodes/parsenodes.h		patch \| blob \| history
src/include/nodes/plannodes.h		patch \| blob \| history
src/include/optimizer/cost.h		patch \| blob \| history
src/include/parser/parse_func.h		patch \| blob \| history
src/include/port.h		patch \| blob \| history
src/include/utils/builtins.h		patch \| blob \| history
src/include/utils/lsyscache.h		patch \| blob \| history
src/include/utils/syscache.h		patch \| blob \| history
src/port/erand48.c		patch \| blob \| history
src/test/regress/expected/rowsecurity.out		patch \| blob \| history
src/test/regress/expected/rules.out		patch \| blob \| history
src/test/regress/expected/sanity_check.out		patch \| blob \| history
src/test/regress/expected/tablesample.out		patch \| blob \| history
src/test/regress/output/misc.source		patch \| blob \| history
src/test/regress/serial_schedule		patch \| blob \| history
src/test/regress/sql/rowsecurity.sql		patch \| blob \| history
src/test/regress/sql/tablesample.sql		patch \| blob \| history