Separate block sampling functions

author Simon Riggs <simon@2ndQuadrant.com>

Fri, 15 May 2015 02:02:54 +0000 (04:02 +0200)

committer Simon Riggs <simon@2ndQuadrant.com>

Fri, 15 May 2015 02:02:54 +0000 (04:02 +0200)
author Simon Riggs <simon@2ndQuadrant.com>
Fri, 15 May 2015 02:02:54 +0000 (04:02 +0200)
committer Simon Riggs <simon@2ndQuadrant.com>
Fri, 15 May 2015 02:02:54 +0000 (04:02 +0200)
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c

index 4c56444751b4f810de70bc10befccfa9e453252d..ea4ed4008296f2a7e8866808967caa48520c8d1c 100644 (file)
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -34,6 +34,7 @@
  #include "optimizer/var.h"
  #include "utils/memutils.h"
  #include "utils/rel.h"
+#include "utils/sampling.h"
  
  PG_MODULE_MAGIC;
  
@@ -1006,7 +1007,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
  {
         int                     numrows = 0;
         double          rowstoskip = -1;        /* -1 means not set yet */
-       double          rstate;
+       ReservoirStateData rstate;
         TupleDesc       tupDesc;
         Datum      *values;
         bool       *nulls;
@@ -1044,7 +1045,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
                                                                            ALLOCSET_DEFAULT_MAXSIZE);
  
         /* Prepare for sampling rows */
-       rstate = anl_init_selection_state(targrows);
+       reservoir_init_selection_state(&rstate, targrows);
  
         /* Set up callback to identify error line number. */
         errcallback.callback = CopyFromErrorCallback;
@@ -1088,7 +1089,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
                          * not-yet-incremented value of totalrows as t.
                          */
                         if (rowstoskip < 0)
-                               rowstoskip = anl_get_next_S(*totalrows, targrows, &rstate);
+                               rowstoskip = reservoir_get_next_S(&rstate, *totalrows, targrows);
  
                         if (rowstoskip <= 0)
                         {
@@ -1096,7 +1097,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
                                  * Found a suitable tuple, so save it, replacing one old tuple
                                  * at random
                                  */
-                               int                     k = (int) (targrows * anl_random_fract());
+                               int                     k = (int) (targrows * sampler_random_fract());
  
                                 Assert(k >= 0 && k < targrows);
                                 heap_freetuple(rows[k]);
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c

index 4f7123b84f478da6f5c4df00038cd06cb3e1e7de..43288c27b02b95349ac6497e018a58d8288400cb 100644 (file)
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -37,6 +37,7 @@
  #include "utils/lsyscache.h"
  #include "utils/memutils.h"
  #include "utils/rel.h"
+#include "utils/sampling.h"
  
  PG_MODULE_MAGIC;
  
@@ -202,7 +203,7 @@ typedef struct PgFdwAnalyzeState
         /* for random sampling */
         double          samplerows;             /* # of rows fetched */
         double          rowstoskip;             /* # of rows to skip before next sample */
-       double          rstate;                 /* random state */
+       ReservoirStateData rstate;              /* state for reservoir sampling*/
  
         /* working memory contexts */
         MemoryContext anl_cxt;          /* context for per-analyze lifespan data */
@@ -2411,7 +2412,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
         astate.numrows = 0;
         astate.samplerows = 0;
         astate.rowstoskip = -1;         /* -1 means not set yet */
-       astate.rstate = anl_init_selection_state(targrows);
+       reservoir_init_selection_state(&astate.rstate, targrows);
  
         /* Remember ANALYZE context, and create a per-tuple temp context */
         astate.anl_cxt = CurrentMemoryContext;
@@ -2551,13 +2552,12 @@ analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate)
                  * analyze.c; see Jeff Vitter's paper.
                  */
                 if (astate->rowstoskip < 0)
-                       astate->rowstoskip = anl_get_next_S(astate->samplerows, targrows,
-                                                                                               &astate->rstate);
+                       astate->rowstoskip = reservoir_get_next_S(&astate->rstate, astate->samplerows, targrows);
  
                 if (astate->rowstoskip <= 0)
                 {
                         /* Choose a random reservoir element to replace. */
-                       pos = (int) (targrows * anl_random_fract());
+                       pos = (int) (targrows * sampler_random_fract());
                         Assert(pos >= 0 && pos < targrows);
                         heap_freetuple(astate->rows[pos]);
                 }
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 15ec0ad551cdb1bc56576bad4a235122c1523230..952cf204a0ae0193dd6549457a72ae8153bbb02c 100644 (file)
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -50,23 +50,13 @@
  #include "utils/lsyscache.h"
  #include "utils/memutils.h"
  #include "utils/pg_rusage.h"
+#include "utils/sampling.h"
  #include "utils/sortsupport.h"
  #include "utils/syscache.h"
  #include "utils/timestamp.h"
  #include "utils/tqual.h"
  
  
-/* Data structure for Algorithm S from Knuth 3.4.2 */
-typedef struct
-{
-       BlockNumber N;                          /* number of blocks, known in advance */
-       int                     n;                              /* desired sample size */
-       BlockNumber t;                          /* current block number */
-       int                     m;                              /* blocks selected so far */
-} BlockSamplerData;
-
-typedef BlockSamplerData *BlockSampler;
-
  /* Per-index data for ANALYZE */
  typedef struct AnlIndexData
  {
@@ -89,10 +79,6 @@ static void do_analyze_rel(Relation onerel, int options,
                            VacuumParams *params, List *va_cols,
                            AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
                            bool inh, bool in_outer_xact, int elevel);
-static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
-                                 int samplesize);
-static bool BlockSampler_HasMore(BlockSampler bs);
-static BlockNumber BlockSampler_Next(BlockSampler bs);
  static void compute_index_stats(Relation onerel, double totalrows,
                                         AnlIndexData *indexdata, int nindexes,
                                         HeapTuple *rows, int numrows,
@@ -950,94 +936,6 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
         return stats;
  }
  
-/*
- * BlockSampler_Init -- prepare for random sampling of blocknumbers
- *
- * BlockSampler is used for stage one of our new two-stage tuple
- * sampling mechanism as discussed on pgsql-hackers 2004-04-02 (subject
- * "Large DB").  It selects a random sample of samplesize blocks out of
- * the nblocks blocks in the table.  If the table has less than
- * samplesize blocks, all blocks are selected.
- *
- * Since we know the total number of blocks in advance, we can use the
- * straightforward Algorithm S from Knuth 3.4.2, rather than Vitter's
- * algorithm.
- */
-static void
-BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize)
-{
-       bs->N = nblocks;                        /* measured table size */
-
-       /*
-        * If we decide to reduce samplesize for tables that have less or not much
-        * more than samplesize blocks, here is the place to do it.
-        */
-       bs->n = samplesize;
-       bs->t = 0;                                      /* blocks scanned so far */
-       bs->m = 0;                                      /* blocks selected so far */
-}
-
-static bool
-BlockSampler_HasMore(BlockSampler bs)
-{
-       return (bs->t < bs->N) && (bs->m < bs->n);
-}
-
-static BlockNumber
-BlockSampler_Next(BlockSampler bs)
-{
-       BlockNumber K = bs->N - bs->t;          /* remaining blocks */
-       int                     k = bs->n - bs->m;              /* blocks still to sample */
-       double          p;                              /* probability to skip block */
-       double          V;                              /* random */
-
-       Assert(BlockSampler_HasMore(bs));       /* hence K > 0 and k > 0 */
-
-       if ((BlockNumber) k >= K)
-       {
-               /* need all the rest */
-               bs->m++;
-               return bs->t++;
-       }
-
-       /*----------
-        * It is not obvious that this code matches Knuth's Algorithm S.
-        * Knuth says to skip the current block with probability 1 - k/K.
-        * If we are to skip, we should advance t (hence decrease K), and
-        * repeat the same probabilistic test for the next block.  The naive
-        * implementation thus requires an anl_random_fract() call for each block
-        * number.  But we can reduce this to one anl_random_fract() call per
-        * selected block, by noting that each time the while-test succeeds,
-        * we can reinterpret V as a uniform random number in the range 0 to p.
-        * Therefore, instead of choosing a new V, we just adjust p to be
-        * the appropriate fraction of its former value, and our next loop
-        * makes the appropriate probabilistic test.
-        *
-        * We have initially K > k > 0.  If the loop reduces K to equal k,
-        * the next while-test must fail since p will become exactly zero
-        * (we assume there will not be roundoff error in the division).
-        * (Note: Knuth suggests a "<=" loop condition, but we use "<" just
-        * to be doubly sure about roundoff error.)  Therefore K cannot become
-        * less than k, which means that we cannot fail to select enough blocks.
-        *----------
-        */
-       V = anl_random_fract();
-       p = 1.0 - (double) k / (double) K;
-       while (V < p)
-       {
-               /* skip */
-               bs->t++;
-               K--;                                    /* keep K == N - t */
-
-               /* adjust p to be new cutoff point in reduced range */
-               p *= 1.0 - (double) k / (double) K;
-       }
-
-       /* select */
-       bs->m++;
-       return bs->t++;
-}
-
  /*
   * acquire_sample_rows -- acquire a random sample of rows from the table
   *
@@ -1084,7 +982,7 @@ acquire_sample_rows(Relation onerel, int elevel,
         BlockNumber totalblocks;
         TransactionId OldestXmin;
         BlockSamplerData bs;
-       double          rstate;
+       ReservoirStateData rstate;
  
         Assert(targrows > 0);
  
@@ -1094,9 +992,9 @@ acquire_sample_rows(Relation onerel, int elevel,
         OldestXmin = GetOldestXmin(onerel, true);
  
         /* Prepare for sampling block numbers */
-       BlockSampler_Init(&bs, totalblocks, targrows);
+       BlockSampler_Init(&bs, totalblocks, targrows, random());
         /* Prepare for sampling rows */
-       rstate = anl_init_selection_state(targrows);
+       reservoir_init_selection_state(&rstate, targrows);
  
         /* Outer loop over blocks to sample */
         while (BlockSampler_HasMore(&bs))
@@ -1244,8 +1142,7 @@ acquire_sample_rows(Relation onerel, int elevel,
                                          * t.
                                          */
                                         if (rowstoskip < 0)
-                                               rowstoskip = anl_get_next_S(samplerows, targrows,
-                                                                                                       &rstate);
+                                               rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
  
                                         if (rowstoskip <= 0)
                                         {
@@ -1253,7 +1150,7 @@ acquire_sample_rows(Relation onerel, int elevel,
                                                  * Found a suitable tuple, so save it, replacing one
                                                  * old tuple at random
                                                  */
-                                               int                     k = (int) (targrows * anl_random_fract());
+                                               int                     k = (int) (targrows * sampler_random_fract());
  
                                                 Assert(k >= 0 && k < targrows);
                                                 heap_freetuple(rows[k]);
@@ -1312,116 +1209,6 @@ acquire_sample_rows(Relation onerel, int elevel,
         return numrows;
  }
  
-/* Select a random value R uniformly distributed in (0 - 1) */
-double
-anl_random_fract(void)
-{
-       return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
-}
-
-/*
- * These two routines embody Algorithm Z from "Random sampling with a
- * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
- * (Mar. 1985), Pages 37-57.  Vitter describes his algorithm in terms
- * of the count S of records to skip before processing another record.
- * It is computed primarily based on t, the number of records already read.
- * The only extra state needed between calls is W, a random state variable.
- *
- * anl_init_selection_state computes the initial W value.
- *
- * Given that we've already read t records (t >= n), anl_get_next_S
- * determines the number of records to skip before the next record is
- * processed.
- */
-double
-anl_init_selection_state(int n)
-{
-       /* Initial value of W (for use when Algorithm Z is first applied) */
-       return exp(-log(anl_random_fract()) / n);
-}
-
-double
-anl_get_next_S(double t, int n, double *stateptr)
-{
-       double          S;
-
-       /* The magic constant here is T from Vitter's paper */
-       if (t <= (22.0 * n))
-       {
-               /* Process records using Algorithm X until t is large enough */
-               double          V,
-                                       quot;
-
-               V = anl_random_fract(); /* Generate V */
-               S = 0;
-               t += 1;
-               /* Note: "num" in Vitter's code is always equal to t - n */
-               quot = (t - (double) n) / t;
-               /* Find min S satisfying (4.1) */
-               while (quot > V)
-               {
-                       S += 1;
-                       t += 1;
-                       quot *= (t - (double) n) / t;
-               }
-       }
-       else
-       {
-               /* Now apply Algorithm Z */
-               double          W = *stateptr;
-               double          term = t - (double) n + 1;
-
-               for (;;)
-               {
-                       double          numer,
-                                               numer_lim,
-                                               denom;
-                       double          U,
-                                               X,
-                                               lhs,
-                                               rhs,
-                                               y,
-                                               tmp;
-
-                       /* Generate U and X */
-                       U = anl_random_fract();
-                       X = t * (W - 1.0);
-                       S = floor(X);           /* S is tentatively set to floor(X) */
-                       /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
-                       tmp = (t + 1) / term;
-                       lhs = exp(log(((U * tmp * tmp) * (term + S)) / (t + X)) / n);
-                       rhs = (((t + X) / (term + S)) * term) / t;
-                       if (lhs <= rhs)
-                       {
-                               W = rhs / lhs;
-                               break;
-                       }
-                       /* Test if U <= f(S)/cg(X) */
-                       y = (((U * (t + 1)) / term) * (t + S + 1)) / (t + X);
-                       if ((double) n < S)
-                       {
-                               denom = t;
-                               numer_lim = term + S;
-                       }
-                       else
-                       {
-                               denom = t - (double) n + S;
-                               numer_lim = t + 1;
-                       }
-                       for (numer = t + S; numer >= numer_lim; numer -= 1)
-                       {
-                               y *= numer / denom;
-                               denom -= 1;
-                       }
-                       W = exp(-log(anl_random_fract()) / n);          /* Generate W in advance */
-                       if (exp(log(y) / n) <= (t + X) / t)
-                               break;
-               }
-               *stateptr = W;
-       }
-       return S;
-}
-
  /*
   * qsort comparator for sorting rows[] array
   */
diff --git a/src/backend/utils/misc/Makefile b/src/backend/utils/misc/Makefile

index 378b77e0bc53c16994580cc9c75b14dfe44d05a9..788910157de48b3f2f1cf1d844aa0d7cac10a6bf 100644 (file)
--- a/src/backend/utils/misc/Makefile
+++ b/src/backend/utils/misc/Makefile
@@ -15,7 +15,7 @@ include $(top_builddir)/src/Makefile.global
  override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS)
  
  OBJS = guc.o help_config.o pg_rusage.o ps_status.o rls.o \
-       superuser.o timeout.o tzparser.o
+       sampling.o superuser.o timeout.o tzparser.o
  
  # This location might depend on the installation directories. Therefore
  # we can't subsitute it into pg_config.h.
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c

new file mode 100644 (file)

index 0000000..1eeabaf
--- /dev/null
+++ b/src/backend/utils/misc/sampling.c
@@ -0,0 +1,226 @@
+/*-------------------------------------------------------------------------
+ *
+ * sampling.c
+ *       Relation block sampling routines.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *       src/backend/utils/misc/sampling.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <math.h>
+
+#include "utils/sampling.h"
+
+
+/*
+ * BlockSampler_Init -- prepare for random sampling of blocknumbers
+ *
+ * BlockSampler provides algorithm for block level sampling of a relation
+ * as discussed on pgsql-hackers 2004-04-02 (subject "Large DB")
+ * It selects a random sample of samplesize blocks out of
+ * the nblocks blocks in the table. If the table has less than
+ * samplesize blocks, all blocks are selected.
+ *
+ * Since we know the total number of blocks in advance, we can use the
+ * straightforward Algorithm S from Knuth 3.4.2, rather than Vitter's
+ * algorithm.
+ */
+void
+BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
+                                 long randseed)
+{
+       bs->N = nblocks;                        /* measured table size */
+
+       /*
+        * If we decide to reduce samplesize for tables that have less or not much
+        * more than samplesize blocks, here is the place to do it.
+        */
+       bs->n = samplesize;
+       bs->t = 0;                                      /* blocks scanned so far */
+       bs->m = 0;                                      /* blocks selected so far */
+}
+
+bool
+BlockSampler_HasMore(BlockSampler bs)
+{
+       return (bs->t < bs->N) && (bs->m < bs->n);
+}
+
+BlockNumber
+BlockSampler_Next(BlockSampler bs)
+{
+       BlockNumber K = bs->N - bs->t;          /* remaining blocks */
+       int                     k = bs->n - bs->m;              /* blocks still to sample */
+       double          p;                              /* probability to skip block */
+       double          V;                              /* random */
+
+       Assert(BlockSampler_HasMore(bs));       /* hence K > 0 and k > 0 */
+
+       if ((BlockNumber) k >= K)
+       {
+               /* need all the rest */
+               bs->m++;
+               return bs->t++;
+       }
+
+       /*----------
+        * It is not obvious that this code matches Knuth's Algorithm S.
+        * Knuth says to skip the current block with probability 1 - k/K.
+        * If we are to skip, we should advance t (hence decrease K), and
+        * repeat the same probabilistic test for the next block.  The naive
+        * implementation thus requires an sampler_random_fract() call for each
+        * block number.  But we can reduce this to one sampler_random_fract()
+        * call per selected block, by noting that each time the while-test
+        * succeeds, we can reinterpret V as a uniform random number in the range
+        * 0 to p. Therefore, instead of choosing a new V, we just adjust p to be
+        * the appropriate fraction of its former value, and our next loop
+        * makes the appropriate probabilistic test.
+        *
+        * We have initially K > k > 0.  If the loop reduces K to equal k,
+        * the next while-test must fail since p will become exactly zero
+        * (we assume there will not be roundoff error in the division).
+        * (Note: Knuth suggests a "<=" loop condition, but we use "<" just
+        * to be doubly sure about roundoff error.)  Therefore K cannot become
+        * less than k, which means that we cannot fail to select enough blocks.
+        *----------
+        */
+       V = sampler_random_fract();
+       p = 1.0 - (double) k / (double) K;
+       while (V < p)
+       {
+               /* skip */
+               bs->t++;
+               K--;                                    /* keep K == N - t */
+
+               /* adjust p to be new cutoff point in reduced range */
+               p *= 1.0 - (double) k / (double) K;
+       }
+
+       /* select */
+       bs->m++;
+       return bs->t++;
+}
+
+/*
+ * These two routines embody Algorithm Z from "Random sampling with a
+ * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
+ * (Mar. 1985), Pages 37-57.  Vitter describes his algorithm in terms
+ * of the count S of records to skip before processing another record.
+ * It is computed primarily based on t, the number of records already read.
+ * The only extra state needed between calls is W, a random state variable.
+ *
+ * reservoir_init_selection_state computes the initial W value.
+ *
+ * Given that we've already read t records (t >= n), reservoir_get_next_S
+ * determines the number of records to skip before the next record is
+ * processed.
+ */
+void
+reservoir_init_selection_state(ReservoirState rs, int n)
+{
+       /* Initial value of W (for use when Algorithm Z is first applied) */
+       *rs = exp(-log(sampler_random_fract()) / n);
+}
+
+double
+reservoir_get_next_S(ReservoirState rs, double t, int n)
+{
+       double          S;
+
+       /* The magic constant here is T from Vitter's paper */
+       if (t <= (22.0 * n))
+       {
+               /* Process records using Algorithm X until t is large enough */
+               double          V,
+                                       quot;
+
+               V = sampler_random_fract(); /* Generate V */
+               S = 0;
+               t += 1;
+               /* Note: "num" in Vitter's code is always equal to t - n */
+               quot = (t - (double) n) / t;
+               /* Find min S satisfying (4.1) */
+               while (quot > V)
+               {
+                       S += 1;
+                       t += 1;
+                       quot *= (t - (double) n) / t;
+               }
+       }
+       else
+       {
+               /* Now apply Algorithm Z */
+               double          W = *rs;
+               double          term = t - (double) n + 1;
+
+               for (;;)
+               {
+                       double          numer,
+                                               numer_lim,
+                                               denom;
+                       double          U,
+                                               X,
+                                               lhs,
+                                               rhs,
+                                               y,
+                                               tmp;
+
+                       /* Generate U and X */
+                       U = sampler_random_fract();
+                       X = t * (W - 1.0);
+                       S = floor(X);           /* S is tentatively set to floor(X) */
+                       /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
+                       tmp = (t + 1) / term;
+                       lhs = exp(log(((U * tmp * tmp) * (term + S)) / (t + X)) / n);
+                       rhs = (((t + X) / (term + S)) * term) / t;
+                       if (lhs <= rhs)
+                       {
+                               W = rhs / lhs;
+                               break;
+                       }
+                       /* Test if U <= f(S)/cg(X) */
+                       y = (((U * (t + 1)) / term) * (t + S + 1)) / (t + X);
+                       if ((double) n < S)
+                       {
+                               denom = t;
+                               numer_lim = term + S;
+                       }
+                       else
+                       {
+                               denom = t - (double) n + S;
+                               numer_lim = t + 1;
+                       }
+                       for (numer = t + S; numer >= numer_lim; numer -= 1)
+                       {
+                               y *= numer / denom;
+                               denom -= 1;
+                       }
+                       W = exp(-log(sampler_random_fract()) / n);              /* Generate W in advance */
+                       if (exp(log(y) / n) <= (t + X) / t)
+                               break;
+               }
+               *rs = W;
+       }
+       return S;
+}
+
+
+/*----------
+ * Random number generator used by sampling
+ *----------
+ */
+
+/* Select a random value R uniformly distributed in (0 - 1) */
+double
+sampler_random_fract()
+{
+       return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
+}
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h

index 71f016552bf33454f07bb888531530bd01b0f850..ce7b28d22c7850de8dd6a623857716f1d3efaa71 100644 (file)
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -197,8 +197,5 @@ extern void analyze_rel(Oid relid, RangeVar *relation, int options,
                         VacuumParams *params, List *va_cols, bool in_outer_xact,
                         BufferAccessStrategy bstrategy);
  extern bool std_typanalyze(VacAttrStats *stats);
-extern double anl_random_fract(void);
-extern double anl_init_selection_state(int n);
-extern double anl_get_next_S(double t, int n, double *stateptr);
  
  #endif   /* VACUUM_H */
diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h

new file mode 100644 (file)

index 0000000..e3e7f9c
--- /dev/null
+++ b/src/include/utils/sampling.h
@@ -0,0 +1,44 @@
+/*-------------------------------------------------------------------------
+ *
+ * sampling.h
+ *       definitions for sampling functions
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/sampling.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SAMPLING_H
+#define SAMPLING_H
+
+#include "storage/bufmgr.h"
+
+extern double sampler_random_fract(void);
+
+/* Block sampling methods */
+/* Data structure for Algorithm S from Knuth 3.4.2 */
+typedef struct
+{
+       BlockNumber N;                          /* number of blocks, known in advance */
+       int                     n;                              /* desired sample size */
+       BlockNumber t;                          /* current block number */
+       int                     m;                              /* blocks selected so far */
+} BlockSamplerData;
+
+typedef BlockSamplerData *BlockSampler;
+
+extern void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
+                                 int samplesize, long randseed);
+extern bool BlockSampler_HasMore(BlockSampler bs);
+extern BlockNumber BlockSampler_Next(BlockSampler bs);
+
+/* Reservoid sampling methods */
+typedef double ReservoirStateData;
+typedef ReservoirStateData *ReservoirState;
+
+extern void reservoir_init_selection_state(ReservoirState rs, int n);
+extern double reservoir_get_next_S(ReservoirState rs, double t, int n);
+
+#endif /* SAMPLING_H */
author	Simon Riggs <simon@2ndQuadrant.com>
	Fri, 15 May 2015 02:02:54 +0000 (04:02 +0200)
committer	Simon Riggs <simon@2ndQuadrant.com>
	Fri, 15 May 2015 02:02:54 +0000 (04:02 +0200)
contrib/file_fdw/file_fdw.c		patch \| blob \| history
contrib/postgres_fdw/postgres_fdw.c		patch \| blob \| history
src/backend/commands/analyze.c		patch \| blob \| history
src/backend/utils/misc/Makefile		patch \| blob \| history
src/backend/utils/misc/sampling.c	[new file with mode: 0644]	patch \| blob
src/include/commands/vacuum.h		patch \| blob \| history
src/include/utils/sampling.h	[new file with mode: 0644]	patch \| blob