#945, expose and add selectivity to the 3d/4d index (&&&) bindings

author Paul Ramsey <pramsey@cleverelephant.ca>

Tue, 4 Dec 2012 19:54:29 +0000 (19:54 +0000)

committer Paul Ramsey <pramsey@cleverelephant.ca>

Tue, 4 Dec 2012 19:54:29 +0000 (19:54 +0000)
author Paul Ramsey <pramsey@cleverelephant.ca>
Tue, 4 Dec 2012 19:54:29 +0000 (19:54 +0000)
committer Paul Ramsey <pramsey@cleverelephant.ca>
Tue, 4 Dec 2012 19:54:29 +0000 (19:54 +0000)
diff --git a/NEWS b/NEWS

index ef32e6454067fa86e38cfb7be707f51e6b8a123a..54a4839685ea38cf30a9176d3e5934be1dcdac09 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -57,6 +57,8 @@ PostGIS 2.1.0
    - #1895, new r-tree node splitting algorithm (Alex Korotkov)
    - #1709, ST_NotSameAlignmentReason(raster, raster)
    - #1293, ST_Resize(raster) to resize rasters based upon width/height
+  - #945, improved join selectivity, N-D selectivity calculations, user
+    accessible selectivity and stats reader functions for testing
  
  * Enhancements *
    - #823,  tiger geocoder: Make loader_generate_script download portion 
diff --git a/liblwgeom/g_box.c b/liblwgeom/g_box.c

index 908539080c42ee1dfaf8899bdd71b1b25b487024..486dc5f4e5ae8ac905cafc4e1fe6db47e41c214e 100644 (file)
--- a/liblwgeom/g_box.c
+++ b/liblwgeom/g_box.c
@@ -142,6 +142,37 @@ int gbox_same(const GBOX *g1, const GBOX *g2)
         return LW_TRUE;
  }
  
+int gbox_is_valid(const GBOX *gbox)
+{
+       /* X */
+       if ( ! isfinite(gbox->xmin) || isnan(gbox->xmin) ||
+            ! isfinite(gbox->xmax) || isnan(gbox->xmax) )
+               return LW_FALSE;
+               
+       /* Y */
+       if ( ! isfinite(gbox->ymin) || isnan(gbox->ymin) ||
+            ! isfinite(gbox->ymax) || isnan(gbox->ymax) )
+               return LW_FALSE;
+               
+       /* Z */
+       if ( FLAGS_GET_GEODETIC(gbox->flags) || FLAGS_GET_Z(gbox->flags) )
+       {
+               if ( ! isfinite(gbox->zmin) || isnan(gbox->zmin) ||
+                    ! isfinite(gbox->zmax) || isnan(gbox->zmax) )
+                       return LW_FALSE;
+       }
+
+       /* M */
+       if ( FLAGS_GET_M(gbox->flags) )
+       {
+               if ( ! isfinite(gbox->mmin) || isnan(gbox->mmin) ||
+                    ! isfinite(gbox->mmax) || isnan(gbox->mmax) )
+                       return LW_FALSE;
+       }
+       
+       return LW_TRUE;         
+}
+
  int gbox_merge_point3d(const POINT3D *p, GBOX *gbox)
  {
         if ( gbox->xmin > p->x ) gbox->xmin = p->x;
diff --git a/liblwgeom/liblwgeom.h.in b/liblwgeom/liblwgeom.h.in

index 97dde86706abcd579cb44b47377736a413b4ae46..6053c0ffe18efeed3bb24b441082f1be9a28991b 100644 (file)
--- a/liblwgeom/liblwgeom.h.in
+++ b/liblwgeom/liblwgeom.h.in
@@ -1626,6 +1626,11 @@ extern int gbox_same(const GBOX *g1, const GBOX *g2);
   */
  extern void gbox_float_round(GBOX *gbox);
  
+/**
+* Return false if any of the dimensions is NaN or infinite
+*/
+extern int gbox_is_valid(const GBOX *gbox);
+
  /**
  * Utility function to get type number from string. For example, a string 'POINTZ' 
  * would return type of 1 and z of 1 and m of 0. Valid 
diff --git a/libpgcommon/gserialized_gist.c b/libpgcommon/gserialized_gist.c

index d28643a4740faf699c0a34f865623c962a60abca..3bfb1c0e183df59b7e87e6e109ed95efe99866a1 100644 (file)
--- a/libpgcommon/gserialized_gist.c
+++ b/libpgcommon/gserialized_gist.c
@@ -49,6 +49,16 @@ char* gidx_to_string(GIDX *a)
  #endif
  
  
+static uint8_t
+gserialized_datum_get_flags(Datum gsdatum)
+{
+       GSERIALIZED *gpart;
+       POSTGIS_DEBUG(4, "entered function");
+       gpart = (GSERIALIZED*)PG_DETOAST_DATUM_SLICE(gsdatum, 0, 40);
+       POSTGIS_DEBUGF(4, "got flags %d", gpart->flags);
+       return gpart->flags;
+}
+
  /**
  * Given a #GSERIALIZED datum, as quickly as possible (peaking into the top
  * of the memory) return the gbox extents. Does not deserialize the geometry,
@@ -66,6 +76,8 @@ gserialized_datum_get_gbox_p(Datum gsdatum, GBOX *gbox)
                 return LW_FAILURE;
                 
         gbox_from_gidx(gidx, gbox);
+       gbox->flags = gserialized_datum_get_flags(gsdatum);
+       
         return LW_SUCCESS;
  }
  
diff --git a/postgis/Makefile.in b/postgis/Makefile.in

index c1d8ba949182e5a3baf0d7cc7486ee1962d36460..017350542b4e0f7c3064941b2f3fce2954df4720 100644 (file)
--- a/postgis/Makefile.in
+++ b/postgis/Makefile.in
@@ -55,6 +55,7 @@ PG_OBJS= \
         gserialized_typmod.o \
         gserialized_gist_2d.o \
         gserialized_gist_nd.o \
+       gserialized_estimate.o \
         geography_inout.o \
         geography_btree.o \
         geography_estimate.o \
diff --git a/postgis/geography.sql.in.c b/postgis/geography.sql.in.c

index 811ca3ed93bae912d1a687a7e7171651c9523499..77d643d243676c07e2671cf0db75cdba710719fc 100644 (file)
--- a/postgis/geography.sql.in.c
+++ b/postgis/geography.sql.in.c
@@ -52,7 +52,7 @@ CREATE OR REPLACE FUNCTION geography_send(geography)
  -- Availability: 1.5.0
  CREATE OR REPLACE FUNCTION geography_analyze(internal)
         RETURNS bool
-       AS 'MODULE_PATHNAME','geography_analyze'
+       AS 'MODULE_PATHNAME','gserialized_analyze_nd'
         LANGUAGE 'c' VOLATILE STRICT; 
  
  -- Availability: 1.5.0
@@ -255,8 +255,8 @@ CREATE OR REPLACE FUNCTION geography_overlaps(geography, geography)
  CREATE OPERATOR && (
         LEFTARG = geography, RIGHTARG = geography, PROCEDURE = geography_overlaps,
         COMMUTATOR = '&&',
-       RESTRICT = geography_gist_selectivity, 
-       JOIN = geography_gist_join_selectivity
+       RESTRICT = gserialized_gist_sel_nd,
+       JOIN = gserialized_gist_joinsel_nd      
  );
  
  
diff --git a/postgis/gserialized_estimate.c b/postgis/gserialized_estimate.c

new file mode 100644 (file)

index 0000000..f1d3f95
--- /dev/null
+++ b/postgis/gserialized_estimate.c
@@ -0,0 +1,2175 @@
+/**********************************************************************
+ * PostGIS - Spatial Types for PostgreSQL
+ * http://postgis.refractions.net
+ *
+ * Copyright 2012 (C) Paul Ramsey <pramsey@cleverelephant.ca>
+ *
+ * This is free software; you can redistribute and/or modify it under
+ * the terms of the GNU General Public Licence. See the COPYING file.
+ *
+ **********************************************************************/
+
+
+/**********************************************************************
+ THEORY OF OPERATION 
+
+The ANALYZE command hooks to a callback (gserialized_analyze_nd) that
+calculates (compute_gserialized_stats_mode) two histograms of occurances of 
+features, once for the 2D domain (and the && operator) one for the 
+ND domain (and the &&& operator).
+
+Queries in PostgreSQL call into the selectivity sub-system to find out
+the relative effectiveness of different clauses in sub-setting 
+relations. Queries with constant arguments call gserialized_gist_sel,
+queries with relations on both sides call gserialized_gist_joinsel.
+
+gserialized_gist_sel sums up the values in the histogram that overlap
+the contant search box.
+
+gserialized_gist_joinsel sums up the product of the overlapping 
+cells in each relation's histogram.
+
+Depending on the operator and type, the mode of selectivity calculation 
+will be 2D or ND.
+
+- geometry && geometry ==> 2D
+- geometry &&& geometry ==> ND
+- geography && geography ==> ND
+
+The 2D mode is put in effect by retrieving the 2D histogram from the 
+statistics cache and then allowing the generic ND calculations to
+go to work.
+
+TO DO: More testing and examination of the &&& operator and mixed
+dimensionality cases. (2D geometry) &&& (3D column), etc.
+
+**********************************************************************/
+
+#include "postgres.h"
+#include "executor/spi.h"
+#include "fmgr.h"
+#include "commands/vacuum.h"
+#include "nodes/relation.h"
+#include "parser/parsetree.h"
+#include "utils/array.h"
+#include "utils/lsyscache.h"
+#include "utils/builtins.h"
+#include "utils/syscache.h"
+#include "utils/rel.h"
+
+#include "../postgis_config.h"
+
+#if POSTGIS_PGSQL_VERSION >= 93
+       #include "access/htup_details.h"
+#endif
+
+#include "stringbuffer.h"
+#include "liblwgeom.h"
+#include "lwgeom_pg.h"       /* For debugging macros. */
+#include "gserialized_gist.h" /* For index common functions */
+
+#include <math.h>
+#if HAVE_IEEEFP_H
+#include <ieeefp.h>
+#endif
+#include <float.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+
+/* Prototypes */
+Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS);
+Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS);
+Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS);
+Datum gserialized_gist_sel(PG_FUNCTION_ARGS);
+Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS);
+Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
+Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
+Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
+Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
+Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
+Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
+
+/**
+* Assign a number to the n-dimensional statistics kind
+*
+* tgl suggested:
+*
+* 1-100:       reserved for assignment by the core Postgres project
+* 100-199: reserved for assignment by PostGIS
+* 200-9999: reserved for other globally-known stats kinds
+* 10000-32767: reserved for private site-local use
+*/
+#define STATISTIC_KIND_ND 102
+#define STATISTIC_KIND_2D 103
+#define STATISTIC_SLOT_ND 0
+#define STATISTIC_SLOT_2D 1
+
+/*
+* The SD factor restricts the side of the statistics histogram
+* based on the standard deviation of the extent of the data.
+* SDFACTOR is the number of standard deviations from the mean
+* the histogram will extend.
+*/
+#define SDFACTOR 3.25
+
+/**
+* The maximum number of dimensions our code can handle.
+* We'll use this to statically allocate a bunch of
+* arrays below.
+*/
+#define ND_DIMS 4
+
+/**
+* Minimum width of a dimension that we'll bother trying to
+* compute statistics on. Bearing in mind we have no control
+* over units, but noting that for geographics, 10E-5 is in the
+* range of meters, we go lower than that.
+*/
+#define MIN_DIMENSION_WIDTH 0.000000001
+
+/**
+* Default geometry selectivity factor
+*/
+#define DEFAULT_ND_SEL 0.0001
+#define DEFAULT_ND_JOINSEL 0.001
+
+/**
+* More modest fallback selectivity factor
+*/
+#define FALLBACK_ND_SEL 0.2    
+#define FALLBACK_ND_JOINSEL 0.3        
+
+/**
+* N-dimensional box type for calculations, to avoid doing
+* explicit axis conversions from GBOX in all calculations
+* at every step.
+*/
+typedef struct ND_BOX_T
+{
+       float4 min[ND_DIMS];
+       float4 max[ND_DIMS];
+} ND_BOX;
+
+/**
+* N-dimensional box index type
+*/
+typedef struct ND_IBOX_T
+{
+       int min[ND_DIMS];
+       int max[ND_DIMS];
+} ND_IBOX;
+
+
+/**
+* N-dimensional statistics structure. Well, actually 
+* four-dimensional, but set up to handle arbirary dimensions
+* if necessary (really, we just want to get the 2,3,4-d cases
+* into one shared piece of code).
+*/
+typedef struct ND_STATS_T
+{
+       /* Dimensionality of the histogram. */
+       float4 ndims;
+       
+       /* Size of n-d histogram in each dimension. */
+       float4 size[ND_DIMS];
+
+       /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
+       ND_BOX extent;
+
+       /* How many rows in the table itself? */
+       float4 table_features;
+       
+       /* How many rows were in the sample that built this histogram? */
+       float4 sample_features;
+       
+       /* How many not-Null/Empty features were in the sample? */
+       float4 not_null_features;
+
+       /* How many features actually got sampled in the histogram? */
+       float4 histogram_features;
+
+       /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
+       float4 histogram_cells;
+       
+       /* How many cells did those histogram features cover? */
+       /* Since we are pro-rating coverage, this number should */
+       /* now always equal histogram_features */
+       float4 cells_covered;
+       
+       /* Variable length # of floats for histogram */
+       float4 value[1];
+} ND_STATS;
+
+
+
+
+/**
+* Given that geodetic boxes are X/Y/Z regardless of the 
+* underlying geometry dimensionality and other boxes
+* are guided by HAS_Z/HAS_M in their dimesionality,
+* we have a little utility function to make it easy.
+*/
+static int
+gbox_ndims(const GBOX* gbox)
+{
+       int dims = 2;
+       if ( FLAGS_GET_GEODETIC(gbox->flags) )
+               return 3;
+       if ( FLAGS_GET_Z(gbox->flags) )
+               dims++;
+       if ( FLAGS_GET_M(gbox->flags) )
+               dims++;
+       return dims;
+}
+
+/**
+* Utility function to see if the first
+* letter of the mode argument is 'N'. Used
+* by the _postgis_* functions.
+*/
+static int
+text_p_get_mode(const text *txt)
+{
+       int mode = 2;
+       char *modestr = text2cstring(txt);
+       if ( modestr[0] == 'N' )
+               mode = 0;
+       pfree(modestr);
+       return mode;
+}
+
+
+/**
+* Integer comparison function for qsort 
+*/
+static int 
+cmp_int (const void *a, const void *b)
+{
+       int ia = *((const int*)a);
+       int ib = *((const int*)b);
+
+       if ( ia == ib )
+               return 0;
+       else if ( ia > ib )
+               return 1;
+       else 
+               return -1;
+}
+
+/**
+* The difference between the fourth and first quintile values,
+* the "inter-quintile range"
+*/
+static int
+range_quintile(int *vals, int nvals)
+{
+       qsort(vals, nvals, sizeof(int), cmp_int);
+       return vals[4*nvals/5] - vals[nvals/5];
+}
+
+/**
+* Given int array, return sum of values.
+*/
+static int 
+total_int(const int *vals, int nvals)
+{
+       int i;
+       int total = 0;
+       /* Calculate total */
+       for ( i = 0; i < nvals; i++ )
+               total += vals[i];
+
+       return total;
+}
+
+/**
+* Given double array, return sum of values.
+*/
+static double 
+total_double(const double *vals, int nvals)
+{
+       int i;
+       float total = 0;
+       /* Calculate total */
+       for ( i = 0; i < nvals; i++ )
+               total += vals[i];
+
+       return total;
+}
+
+/**
+* The average of an array of integers.
+*/
+static double
+avg(const int *vals, int nvals)
+{
+       int t = total_int(vals, nvals);
+       return (double)t / (double)nvals;
+}
+
+/**
+* The standard deviation of an array of integers.
+*/
+static double
+stddev(const int *vals, int nvals)
+{
+       int i;
+       double sigma2 = 0;
+       double mean = avg(vals, nvals);
+       
+       /* Calculate sigma2 */
+       for ( i = 0; i < nvals; i++ )
+       {
+               double v = (double)(vals[i]);
+               sigma2 += (mean - v) * (mean - v);
+       }
+       return sqrt(sigma2 / nvals);
+}
+
+/**
+* Given a position in the n-d histogram (i,j,k) return the
+* position in the 1-d values array.
+*/
+static int 
+nd_stats_value_index(const ND_STATS *stats, int *indexes)
+{
+       int d;
+       int accum = 1, vdx = 0;
+       
+       /* Calculate the index into the 1-d values array that the (i,j,k,l) */
+       /* n-d histogram coordinate implies. */
+       /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
+       for ( d = 0; d < (int)(stats->ndims); d++ )
+       {
+               int size = (int)(stats->size[d]);
+               if ( indexes[d] < 0 || indexes[d] >= size )
+               {
+                       POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
+                       return -1;
+               }
+               vdx += indexes[d] * accum;
+               accum *= size;
+       }
+       return vdx;
+}
+
+/** 
+* Convert an #ND_BOX to a JSON string for printing 
+*/
+static char*
+nd_box_to_json(const ND_BOX *nd_box, int ndims)
+{
+       char *rv;
+       int i;
+       stringbuffer_t *sb = stringbuffer_create();
+       
+       stringbuffer_append(sb, "{\"min\":[");
+       for ( i = 0; i < ndims; i++ )
+       {
+               if ( i ) stringbuffer_append(sb, ",");
+               stringbuffer_aprintf(sb, "%.6g", nd_box->min[i]);
+       }
+       stringbuffer_append(sb,  "],\"max\":[");
+       for ( i = 0; i < ndims; i++ )
+       {
+               if ( i ) stringbuffer_append(sb, ",");
+               stringbuffer_aprintf(sb, "%.6g", nd_box->max[i]);
+       }
+       stringbuffer_append(sb,  "]}");
+
+       rv = stringbuffer_getstringcopy(sb);
+       stringbuffer_destroy(sb);
+       return rv;      
+}
+
+
+/**
+* Convert an #ND_STATS to a JSON representation for 
+* external use.
+*/
+static char*
+nd_stats_to_json(const ND_STATS *nd_stats)
+{
+       char *json_extent, *str;
+       int d;
+       stringbuffer_t *sb = stringbuffer_create();
+       int ndims = (int)roundf(nd_stats->ndims);
+       
+       stringbuffer_append(sb, "{");
+       stringbuffer_aprintf(sb, "\"ndims\":%d,", ndims);
+       
+       /* Size */
+       stringbuffer_append(sb, "\"size\":[");
+       for ( d = 0; d < ndims; d++ )
+       {
+               if ( d ) stringbuffer_append(sb, ",");
+               stringbuffer_aprintf(sb, "%d", (int)roundf(nd_stats->size[d]));
+       }
+       stringbuffer_append(sb, "],");
+
+       /* Extent */
+       json_extent = nd_box_to_json(&(nd_stats->extent), ndims);
+       stringbuffer_aprintf(sb, "\"extent\":%s,", json_extent);
+       pfree(json_extent);
+       
+       stringbuffer_aprintf(sb, "\"table_features\":%d,", (int)roundf(nd_stats->table_features));
+       stringbuffer_aprintf(sb, "\"sample_features\":%d,", (int)roundf(nd_stats->sample_features));
+       stringbuffer_aprintf(sb, "\"not_null_features\":%d,", (int)roundf(nd_stats->not_null_features));
+       stringbuffer_aprintf(sb, "\"histogram_features\":%d,", (int)roundf(nd_stats->histogram_features));
+       stringbuffer_aprintf(sb, "\"histogram_cells\":%d,", (int)roundf(nd_stats->histogram_cells));
+       stringbuffer_aprintf(sb, "\"cells_covered\":%d", (int)roundf(nd_stats->cells_covered));
+       stringbuffer_append(sb, "}");
+
+       str = stringbuffer_getstringcopy(sb);
+       stringbuffer_destroy(sb);
+       return str;
+}      
+
+
+/**
+* Create a printable view of the #ND_STATS histogram.
+* Caller is responsible for freeing.
+* Currently only prints first two dimensions.
+*/
+static char* 
+nd_stats_to_grid(const ND_STATS *stats)
+{
+       char *rv;
+       int j, k;
+//     int ndims = (int)roundf(stats->ndims);
+       int sizex = (int)roundf(stats->size[0]);
+       int sizey = (int)roundf(stats->size[1]);
+       stringbuffer_t *sb = stringbuffer_create();
+
+       for ( k = 0; k < sizey; k++ )
+       {
+               for ( j = 0; j < sizex; j++ )
+               {
+                       stringbuffer_aprintf(sb, "%3d ", (int)roundf(stats->value[j + k*sizex]));
+               }
+               stringbuffer_append(sb,  "\n");
+       }
+               
+       rv = stringbuffer_getstringcopy(sb);
+       stringbuffer_destroy(sb);
+       return rv;
+}
+
+
+/** Expand the bounds of target to include source */
+static int 
+nd_box_merge(const ND_BOX *source, ND_BOX *target)
+{
+       int d;
+       for ( d = 0; d < ND_DIMS; d++ )
+       {
+               target->min[d] = Min(target->min[d], source->min[d]);
+               target->max[d] = Max(target->max[d], source->max[d]);
+       }
+       return TRUE;
+}
+
+/** Zero out an ND_BOX */
+static int 
+nd_box_init(ND_BOX *a)
+{
+       memset(a, 0, sizeof(ND_BOX));
+       return TRUE;
+}
+
+/** 
+* Prepare an ND_BOX for bounds calculation: 
+* set the maxes to the smallest thing possible and
+* the mins to the largest. 
+*/
+static int 
+nd_box_init_bounds(ND_BOX *a)
+{
+       int d;
+       for ( d = 0; d < ND_DIMS; d++ )
+       {
+               a->min[d] = FLT_MAX;
+               a->max[d] = FLT_MIN;
+       }
+       return TRUE;
+}
+
+/** Set the values of an #ND_BOX from a #GBOX */
+static void 
+nd_box_from_gbox(const GBOX *gbox, ND_BOX *nd_box)
+{
+       int d = 0;
+       POSTGIS_DEBUGF(3, " %s", gbox_to_string(gbox));
+       
+       nd_box_init(nd_box);
+       nd_box->min[d] = gbox->xmin;
+       nd_box->max[d] = gbox->xmax;
+       d++;
+       nd_box->min[d] = gbox->ymin;
+       nd_box->max[d] = gbox->ymax;
+       d++;
+       if ( FLAGS_GET_GEODETIC(gbox->flags) )
+       {
+               nd_box->min[d] = gbox->zmin;
+               nd_box->max[d] = gbox->zmax;
+               return;
+       }
+       if ( FLAGS_GET_Z(gbox->flags) )
+       {
+               nd_box->min[d] = gbox->zmin;
+               nd_box->max[d] = gbox->zmax;
+               d++;
+       }
+       if ( FLAGS_GET_M(gbox->flags) )
+       {
+               nd_box->min[d] = gbox->mmin;
+               nd_box->max[d] = gbox->mmax;
+               d++;
+       }
+       return;
+}
+
+/** 
+* Return TRUE if #ND_BOX a overlaps b, false otherwise.
+*/
+static int
+nd_box_intersects(const ND_BOX *a, const ND_BOX *b, int ndims)
+{
+       int d;
+       for ( d = 0; d < ndims; d++ )
+       {
+               if ( (a->min[d] > b->max[d]) || (a->max[d] < b->min[d]) )
+                       return FALSE;
+       }
+       return TRUE;
+}
+
+/** 
+* Return TRUE if #ND_BOX a contains b, false otherwise.
+*/
+static int
+nd_box_contains(const ND_BOX *a, const ND_BOX *b, int ndims)
+{
+       int d;
+       for ( d = 0; d < ndims; d++ )
+       {
+               if ( ! ((a->min[d] < b->min[d]) && (a->max[d] > b->max[d])) )
+                       return FALSE;
+       }
+       return TRUE;
+}
+
+/**
+* Expand an #ND_BOX ever so slightly. Expand parameter is the proportion
+* of total width to add.
+*/
+static int 
+nd_box_expand(ND_BOX *nd_box, double expansion_factor)
+{
+       int d;
+       double size;
+       for ( d = 0; d < ND_DIMS; d++ )
+       {
+               size = nd_box->max[d] - nd_box->min[d];
+               if ( size <= 0 ) continue;
+               nd_box->min[d] -= size * expansion_factor / 2;
+               nd_box->max[d] += size * expansion_factor / 2;
+       }
+       return TRUE;
+}
+
+/** 
+* What stats cells overlap with this ND_BOX? Put the lowest cell
+* addresses in ND_IBOX->min and the highest in ND_IBOX->max
+*/
+static inline int
+nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
+{
+       int d;
+       
+       POSTGIS_DEBUGF(4, " nd_box: %s", nd_box_to_json(nd_box, nd_stats->ndims));
+       
+       /* Initialize ibox */
+       memset(nd_ibox, 0, sizeof(ND_IBOX));
+       
+       /* In each dimension... */
+       for ( d = 0; d < nd_stats->ndims; d++ )
+       {
+               double smin = nd_stats->extent.min[d];
+               double smax = nd_stats->extent.max[d];
+               double width = smax - smin;
+               int size = roundf(nd_stats->size[d]);
+               
+               /* ... find cells the box overlaps with in this dimension */
+               nd_ibox->min[d] = floor(size * (nd_box->min[d] - smin) / width);
+               nd_ibox->max[d] = floor(size * (nd_box->max[d] - smin) / width);
+
+               POSTGIS_DEBUGF(5, " stats: dim %d: min %g: max %g: width %g", d, smin, smax, width);
+               POSTGIS_DEBUGF(5, " overlap: dim %d: (%d, %d)", d, nd_ibox->min[d], nd_ibox->max[d]);
+               
+               /* Push any out-of range values into range */
+               nd_ibox->min[d] = Max(nd_ibox->min[d], 0);
+               nd_ibox->max[d] = Min(nd_ibox->max[d], size-1);
+       }
+       return TRUE;
+}
+
+/**
+* Returns the proportion of b2 that is covered by b1.
+*/
+static inline double
+nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
+{
+       int d;
+       bool covered = TRUE;
+       double ivol = 1.0;
+       double vol2 = 1.0;
+       double vol1 = 1.0;
+       
+       for ( d = 0 ; d < ndims; d++ )
+       {
+               if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] ) 
+                       return 0.0; /* Disjoint */
+               
+               if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
+                       covered = FALSE;
+       }
+       
+       if ( covered ) 
+               return 1.0;
+
+       for ( d = 0; d < ndims; d++ )
+       {
+               double width1 = b1->max[d] - b1->min[d];
+               double width2 = b2->max[d] - b2->min[d];
+               double imin, imax, iwidth;
+               
+               vol1 *= width1;
+               vol2 *= width2;
+               
+               imin = Max(b1->min[d], b2->min[d]);
+               imax = Min(b1->max[d], b2->max[d]);
+               iwidth = imax - imin;
+               iwidth = Max(0.0, iwidth);
+               
+               ivol *= iwidth;
+       }
+       
+       if ( vol2 == 0.0 ) 
+               return vol2;
+       
+       return ivol / vol2;
+}
+
+
+/**
+* Calculate how much a set of boxes is homogenously distributed
+* or contentrated within one dimension, returning the range_quintile of
+* of the overlap counts per cell in a uniform
+* partition of the extent of the dimension. 
+* A uniform distribution of counts will have a small range
+* and will require few cells in a selectivity histogram.
+* A diverse distribution of counts will have a larger range
+* and require more cells in a selectivity histogram (to
+* distinguish between areas of feature density and areas
+* of feature sparseness. This measurement should help us
+* identify cases like X/Y/Z data where there is lots of variability
+* in density in X/Y (diversely in a multi-kilometer range) and far 
+* less in Z (in a few-hundred meter range).
+*/
+static int
+nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
+{
+       /* How many bins shall we use in figuring out the distribution? */
+       static int num_bins = 50; 
+       int d, i, k, range;
+       int counts[num_bins];
+       double smin, smax;   /* Spatial min, spatial max */
+       double swidth;       /* Spatial width of dimension */
+       double average, sdev, sdev_ratio;
+       int   bmin, bmax;   /* Bin min, bin max */
+       const ND_BOX *ndb; 
+       
+       /* For each dimension... */     
+       for ( d = 0; d < ndims; d++ )
+       {
+               /* Initialize counts for this dimension */
+               memset(counts, 0, sizeof(int)*num_bins);
+               
+               smin = extent->min[d];
+               smax = extent->max[d];
+               swidth = smax - smin;
+               
+               /* Don't try and calculate distribution of overly narrow dimensions */
+               if ( swidth < MIN_DIMENSION_WIDTH )
+               {
+                       distribution[d] = 0;
+                       continue;
+               }
+
+               /* Sum up the overlaps of each feature with the dimensional bins */
+               for ( i = 0; i < num_boxes; i++ )
+               {
+                       double minoffset, maxoffset;
+                       
+                       /* Skip null entries */
+                       ndb = nd_boxes[i];      
+                       if ( ! ndb ) continue;
+                       
+                       /* Where does box fall relative to the working range */
+                       minoffset = ndb->min[d] - smin;
+                       maxoffset = ndb->max[d] - smin;                         
+
+                       /* Skip boxes that our outside our working range */
+                       if ( minoffset < 0 || minoffset > swidth ||
+                            maxoffset < 0 || maxoffset > swidth )
+                       {
+                               continue;
+                       }
+                               
+                       /* What bins does this range correspond to? */
+                       bmin = num_bins * (minoffset) / swidth;
+                       bmax = num_bins * (maxoffset) / swidth;
+                       
+                       POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
+               
+                       /* Increment the counts in all the bins this feature overlaps */
+                       for ( k = bmin; k <= bmax; k++ )
+                       {
+                               counts[k] += 1;
+                       }
+                       
+               }
+
+               /* How dispersed is the distribution of features across bins? */
+               range = range_quintile(counts, num_bins);
+               average = avg(counts, num_bins);
+               sdev = stddev(counts, num_bins);
+               sdev_ratio = sdev/average;
+               
+               POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);
+               POSTGIS_DEBUGF(3, " dimension %d: average = %.6g", d, average);
+               POSTGIS_DEBUGF(3, " dimension %d: stddev = %.6g", d, sdev);
+               POSTGIS_DEBUGF(3, " dimension %d: stddev_ratio = %.6g", d, sdev_ratio);
+               distribution[d] = range;
+       }
+       
+       return TRUE;
+}
+
+/**
+* Given an n-d index array (counter), and a domain to increment it
+* in (ibox) increment it by one, unless it's already at the max of 
+* the domain, in which case return false.
+*/
+static inline int
+nd_increment(ND_IBOX *ibox, int ndims, int *counter)
+{
+       int d = 0;
+
+       while ( d < ndims )
+       {
+               if ( counter[d] < ibox->max[d] )
+               {
+                       counter[d] += 1;
+                       break;
+               }
+               counter[d] = ibox->min[d];
+               d++;
+       }
+       /* That's it, cannot increment any more! */
+       if ( d == ndims )
+               return FALSE;
+
+       /* Increment complete! */
+       return TRUE;
+}
+
+
+/**
+* Pull the stats object from the PgSQL system catalogs. Used
+* by the selectivity functions and the debugging functions.
+*/
+static ND_STATS*
+pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode)
+{
+       HeapTuple stats_tuple;
+       float4 *floatptr;
+       ND_STATS *nd_stats;
+       int rv, nvalues;
+       int stats_kind = STATISTIC_KIND_ND;
+
+       /* First pull the stats tuple */
+       stats_tuple = SearchSysCache2(STATRELATT, table_oid, att_num);
+       if ( ! stats_tuple )
+       {
+               POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid));
+               return NULL;
+       }
+       
+       /* If we're in 2D mode, set the kind appropriately */
+       if ( mode == 2 )
+               stats_kind = STATISTIC_KIND_2D;
+                       
+       /* Then read the geom status histogram from that */
+       rv = get_attstatsslot(stats_tuple, 0, 0, stats_kind, InvalidOid, NULL, NULL, NULL, &floatptr, &nvalues);
+       if ( ! rv )
+       {
+               ReleaseSysCache(stats_tuple);
+               POSTGIS_DEBUGF(2, "histogram for \"%s\" does not exist?", get_rel_name(table_oid));
+               return NULL;
+       }
+       
+       /* Clone the stats here so we can release the attstatsslot immediately */
+       nd_stats = palloc(sizeof(float) * nvalues);
+       memcpy(nd_stats, floatptr, sizeof(float) * nvalues);
+       
+       /* Clean up */
+       free_attstatsslot(0, NULL, 0, floatptr, nvalues);
+       ReleaseSysCache(stats_tuple);
+
+       return nd_stats;
+}
+
+
+/**
+* Pull the stats object from the PgSQL system catalogs. The
+* debugging functions are taking human input (table names)
+* and columns, so we have to look those up first.
+*/
+static ND_STATS*
+pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode)
+{
+       const char *att_name = text2cstring(att_text);
+       AttrNumber att_num;
+
+       /* We know the name? Look up the num */
+       if ( att_text )
+       {
+               /* Get the attribute number */
+               att_num = get_attnum(table_oid, att_name);
+               if  ( ! att_num )
+                       elog(ERROR, "attribute \"%s\" does not exist", att_name);
+       }
+       else 
+       {
+               elog(ERROR, "attribute name is null");
+       }
+       
+       return pg_get_nd_stats(table_oid, att_num, mode);
+}
+
+/**
+* Given two statistics histograms, what is the selectivity
+* of a join driven by the && or &&& operator?
+* 
+* Join selectivity is defined as the number of rows returned by the
+* join operator divided by the number of rows that an 
+* unconstrained join would return (nrows1*nrows2).
+* 
+* To get the estimate of join rows, we walk through the cells
+* of one histogram, and multiply the cell value by the
+* proportion of the cells in the other histogram the cell
+* overlaps: val += val1 * ( val2 * overlap_ratio )
+*/
+static float8
+estimate_join_selectivity(const ND_STATS *s1, const ND_STATS *s2)
+{
+       int ncells1, ncells2;
+       int ndims1, ndims2, ndims;
+       double ntuples_max;
+       double ntuples_not_null1, ntuples_not_null2;
+       
+       ND_BOX extent1, extent2;
+       ND_IBOX ibox1, ibox2;
+       int at1[ND_DIMS];
+       int at2[ND_DIMS];
+       double min1[ND_DIMS];
+       double width1[ND_DIMS];
+       double cellsize1[ND_DIMS];
+       int size2[ND_DIMS];
+       double min2[ND_DIMS];
+       double width2[ND_DIMS];
+       double cellsize2[ND_DIMS];
+       int size1[ND_DIMS];
+       int d;
+       double val = 0;
+       float8 selectivity;
+
+       /* Drop out on null inputs */
+       if ( ! ( s1 && s2 ) )
+       {
+               elog(NOTICE, " estimate_join_selectivity called with null inputs");
+               return FALLBACK_ND_SEL;
+       }
+
+       /* We need to know how many cells each side has... */
+       ncells1 = (int)roundf(s1->histogram_cells);
+       ncells2 = (int)roundf(s2->histogram_cells);
+
+       /* ...so that we can drive the summation loop with the smaller histogram. */
+       if ( ncells1 > ncells2 )
+       {
+               const ND_STATS *stats_tmp = s1;
+               s1 = s2;
+               s2 = stats_tmp;
+       }
+       
+       POSTGIS_DEBUGF(3, "s1: %s", nd_stats_to_json(s1));
+       POSTGIS_DEBUGF(3, "s2: %s", nd_stats_to_json(s2));
+       
+       /* Re-read that info after the swap */
+       ncells1 = (int)roundf(s1->histogram_cells);
+       ncells2 = (int)roundf(s2->histogram_cells);
+
+       /* Q: What's the largest possible join size these relations can create? */
+       /* A: The product of the # of non-null rows in each relation. */
+       ntuples_not_null1 = s1->table_features * (s1->not_null_features / s1->sample_features);
+       ntuples_not_null2 = s2->table_features * (s2->not_null_features / s2->sample_features);
+       ntuples_max = ntuples_not_null1 * ntuples_not_null2;
+
+       /* Get the ndims as ints */
+       ndims1 = (int)roundf(s1->ndims);
+       ndims2 = (int)roundf(s2->ndims);
+       ndims = Max(ndims1, ndims2);
+       
+       /* Get the extents */
+       extent1 = s1->extent;
+       extent2 = s2->extent;
+
+       /* If relation stats do not intersect, join is very very selective. */
+       if ( ! nd_box_intersects(&extent1, &extent2, ndims) )
+       {
+               POSTGIS_DEBUG(3, "relation stats do not intersect, returning 0");
+               PG_RETURN_FLOAT8(0.0);
+       }
+       
+       /* 
+        * First find the index range of the part of the smaller 
+        * histogram that overlaps the larger one.
+        */
+       if ( ! nd_box_overlap(s1, &extent2, &ibox1) )
+       {
+               POSTGIS_DEBUG(3, "could not calculate overlap of relations");
+               PG_RETURN_FLOAT8(FALLBACK_ND_JOINSEL);          
+       }
+       
+       /* Initialize counters / constants on s1 */
+       for ( d = 0; d < ndims1; d++ )
+       {
+               at1[d] = ibox1.min[d];
+               min1[d] = s1->extent.min[d];
+               width1[d] = s1->extent.max[d] - s1->extent.min[d];
+               size1[d] = (int)roundf(s1->size[d]);
+               cellsize1[d] = width1[d] / size1[d];
+       }
+
+       /* Initialize counters / constants on s2 */
+       for ( d = 0; d < ndims2; d++ )
+       {
+               min2[d] = s2->extent.min[d];
+               width2[d] = s2->extent.max[d] - s2->extent.min[d];
+               size2[d] = (int)roundf(s2->size[d]);
+               cellsize2[d] = width2[d] / size2[d];
+       }
+
+       /* For each affected cell of s1... */
+       do
+       {
+               double val1;
+               /* Construct the bounds of this cell */
+               ND_BOX nd_cell1;
+               nd_box_init(&nd_cell1);
+               for ( d = 0; d < ndims1; d++ )
+               {
+                       nd_cell1.min[d] = min1[d] + (at1[d]+0) * cellsize1[d];
+                       nd_cell1.max[d] = min1[d] + (at1[d]+1) * cellsize1[d];
+               }
+               
+               /* Find the cells of s2 that cell1 overlaps.. */
+               nd_box_overlap(s2, &nd_cell1, &ibox2);
+               
+               /* Initialize counter */
+               for ( d = 0; d < ndims2; d++ )
+               {
+                       at2[d] = ibox2.min[d];
+               }
+               
+               POSTGIS_DEBUGF(3, "at1 %d,%d  %s", at1[0], at1[1], nd_box_to_json(&nd_cell1, ndims1));
+               
+               /* Get the value at this cell */
+               val1 = s1->value[nd_stats_value_index(s1, at1)];
+               
+               /* For each overlapped cell of s2... */
+               do
+               {
+                       double ratio2;
+                       double val2;
+                       
+                       /* Construct the bounds of this cell */
+                       ND_BOX nd_cell2;
+                       nd_box_init(&nd_cell2);
+                       for ( d = 0; d < ndims2; d++ )
+                       {
+                               nd_cell2.min[d] = min2[d] + (at2[d]+0) * cellsize2[d];
+                               nd_cell2.max[d] = min2[d] + (at2[d]+1) * cellsize2[d];
+                       }
+
+                       POSTGIS_DEBUGF(3, "  at2 %d,%d  %s", at2[0], at2[1], nd_box_to_json(&nd_cell2, ndims2));
+                       
+                       /* Calculate overlap ratio of the cells */
+                       ratio2 = nd_box_ratio(&nd_cell1, &nd_cell2, Max(ndims1, ndims2));
+                       
+                       /* Multiply the cell counts, scaled by overlap ratio */
+                       val2 = s2->value[nd_stats_value_index(s2, at2)];
+                       POSTGIS_DEBUGF(3, "  val1 %.6g  val2 %.6g  ratio %.6g", val1, val2, ratio2);
+                       val += val1 * (val2 * ratio2);
+               }
+               while ( nd_increment(&ibox2, ndims2, at2) );
+               
+       }
+       while( nd_increment(&ibox1, ndims1, at1) );
+       
+       POSTGIS_DEBUGF(3, "val of histogram = %g", val);
+       
+       /*
+        * In order to compare our total cell count "val" to the 
+        * ntuples_max, we need to scale val up to reflect a full
+        * table estimate. So, multiply by ratio of table size to
+        * sample size.
+        */
+       val *= (s1->table_features / s1->sample_features);
+       val *= (s2->table_features / s2->sample_features);
+
+       POSTGIS_DEBUGF(3, "val scaled to full table size = %g", val);
+       
+       /*
+        * Because the cell counts are over-determined due to 
+        * double counting of features that overlap multiple cells
+        * (see the compute_gserialized_stats routine)
+        * we also have to scale our cell count "val" *down*
+        * to adjust for the double counting.
+        */
+//     val /= (s1->cells_covered / s1->histogram_features);
+//     val /= (s2->cells_covered / s2->histogram_features);
+
+       /*
+        * Finally, the selectivity is the estimated number of 
+        * rows to be returned divided by the maximum possible
+        * number of rows that can be returned.
+        */
+       selectivity = val / ntuples_max;
+       
+       return selectivity;
+}
+
+/**
+* For (geometry &&& geometry) and (geography && geography)
+* we call into the N-D mode.
+*/
+PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_nd);
+Datum gserialized_gist_joinsel_nd(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_DATUM(DirectFunctionCall5(
+          gserialized_gist_joinsel,
+          PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
+          PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
+          Int32GetDatum(0) /* ND mode */
+       ));
+}
+
+/**
+* For (geometry && geometry) 
+* we call into the 2-D mode.
+*/
+PG_FUNCTION_INFO_V1(gserialized_gist_joinsel_2d);
+Datum gserialized_gist_joinsel_2d(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_DATUM(DirectFunctionCall5(
+          gserialized_gist_joinsel,
+          PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
+          PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
+          Int32GetDatum(2) /* 2D mode */
+       ));
+}
+
+/**
+* Join selectivity of the && operator. The selectivity
+* is the ratio of the number of rows we think will be 
+* returned divided the maximum number of rows the join
+* could possibly return (the full combinatoric join).
+*
+* joinsel = estimated_nrows / (totalrows1 * totalrows2)
+*/
+PG_FUNCTION_INFO_V1(gserialized_gist_joinsel);
+Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
+{
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+       /* Oid operator = PG_GETARG_OID(1); */
+       List *args = (List *) PG_GETARG_POINTER(2);
+       JoinType jointype = (JoinType) PG_GETARG_INT16(3);
+       int mode = PG_GETARG_INT32(4);
+
+       Node *arg1, *arg2;
+       Var *var1, *var2;
+       Oid relid1, relid2;
+       
+       ND_STATS *stats1, *stats2;
+       float8 selectivity;
+
+       /* Only respond to an inner join/unknown context join */
+       if (jointype != JOIN_INNER)
+       {
+               elog(NOTICE, "gserialized_gist_joinsel: jointype %d not supported", jointype);
+               PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+       }
+
+       /* Find Oids of the geometry columns we are working with */
+       arg1 = (Node*) linitial(args);
+       arg2 = (Node*) lsecond(args);
+       var1 = (Var*) arg1;
+       var2 = (Var*) arg2;
+
+       /* We only do column joins right now, no functional joins */
+       /* TODO: handle g1 && ST_Expand(g2) */
+       if (!IsA(arg1, Var) || !IsA(arg2, Var))
+       {
+               elog(DEBUG1, "gserialized_gist_joinsel called with arguments that are not column references");
+               PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+       }
+
+       /* What are the Oids of our tables/relations? */
+       relid1 = getrelid(var1->varno, root->parse->rtable);
+       relid2 = getrelid(var2->varno, root->parse->rtable);
+
+       POSTGIS_DEBUGF(3, "using relations \"%s\" Oid(%d), \"%s\" Oid(%d)", 
+                         get_rel_name(relid1), relid1, get_rel_name(relid2), relid2);
+
+       /* Pull the stats from the stats system. */
+       stats1 = pg_get_nd_stats(relid1, var1->varattno, mode);
+       stats2 = pg_get_nd_stats(relid2, var2->varattno, mode);
+
+       /* If we can't get stats, we have to stop here! */
+       if ( ! stats1 )
+       {
+               POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid1), relid1);
+               PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+       }
+       else if ( ! stats2 )
+       {
+               POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid2), relid2);
+               PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+       }
+
+       selectivity = estimate_join_selectivity(stats1, stats2);
+       POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
+       
+       pfree(stats1);
+       pfree(stats2);
+       PG_RETURN_FLOAT8(selectivity);
+}
+
+
+
+
+/**
+ * The gserialized_analyze_nd sets this function as a 
+ * callback on the stats object when called by the ANALYZE
+ * command. ANALYZE then gathers the requisite number of 
+ * sample rows and then calls this function.
+ * 
+ * We could also pass stats->extra_data in from 
+ * gserialized_analyze_nd (things like the column type or
+ * other stuff from the system catalogs) but so far we 
+ * don't use that capability.
+ *
+ * Our job is to build some statistics on the sample data
+ * for use by operator estimators.
+ * 
+ * We will populate an n-d histogram using the provided 
+ * sample rows. The selectivity estimators (sel and j_oinsel)
+ * can then use the histogram 
+ */
+static void
+compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
+                          int sample_rows, double total_rows, int mode)
+{
+       MemoryContext old_context;
+       int d, i;                          /* Counters */
+       int notnull_cnt = 0;               /* # not null rows in the sample */
+       int null_cnt = 0;                  /* # null rows in the sample */
+       int histogram_features = 0;        /* # rows that actually got counted in the histogram */
+
+       ND_STATS *nd_stats;                /* Our histogram */
+       size_t    nd_stats_size;           /* Size to allocate */
+       
+       double total_width = 0;            /* # of bytes used by sample */
+       double total_sample_volume = 0;    /* Area/volume coverage of the sample */
+       double total_cell_count = 0;       /* # of cells in histogram affected by sample */
+
+       ND_BOX sum;                        /* Sum of extents of sample boxes */
+       ND_BOX avg;                        /* Avg of extents of sample boxes */
+       ND_BOX stddev;                     /* StdDev of extents of sample boxes */
+       
+       const ND_BOX **sample_boxes;       /* ND_BOXes for each of the sample features */
+       ND_BOX sample_extent;              /* Extent of the raw sample */
+       int    histo_size[ND_DIMS];        /* histogram nrows, ncols, etc */
+       ND_BOX histo_extent;               /* Spatial extent of the histogram */
+       ND_BOX histo_extent_new;           /* Temporary variable */
+       int    histo_cells_target;         /* Number of cells we will shoot for, given the stats target */
+       int    histo_cells;                /* Number of cells in the histogram */
+       int    histo_cells_new = 1;        /* Temporary variable */
+       
+       int   ndims = 2;                    /* Dimensionality of the sample */
+       int   histo_ndims = 0;              /* Dimensionality of the histogram */
+       double sample_distribution[ND_DIMS]; /* How homogeneous is distribution of sample in each axis? */
+       double total_distribution;           /* Total of sample_distribution */
+
+       int stats_slot;                     /* What slot is this data going into? (2D vs ND) */
+       int stats_kind;                     /* And this is what? (2D vs ND) */
+
+       /* Initialize sums */
+       memset(&sum, 0, sizeof(ND_BOX));
+
+       /*
+        * This is where gserialized_analyze_nd
+        * should put its' custom parameters.
+        */
+       /* void *mystats = stats->extra_data; */
+
+       POSTGIS_DEBUG(2, "compute_gserialized_stats called");
+       POSTGIS_DEBUGF(3, " # sample_rows: %d", sample_rows);
+       POSTGIS_DEBUGF(3, " estimate of total_rows: %.6g", total_rows);
+
+       /*
+        * We might need less space, but don't think
+        * its worth saving...
+        */
+       sample_boxes = palloc(sizeof(ND_BOX*) * sample_rows);
+
+       /*
+        * First scan:
+        *  o read boxes
+        *  o find dimensionality of the sample
+        *  o find extent of the sample
+        *  o count null-infinite/not-null values
+        *  o compute total_width
+        *  o compute total features's box area (for avgFeatureArea)
+        *  o sum features box coordinates (for standard deviation)
+        */
+       for ( i = 0; i < sample_rows; i++ )
+       {
+               Datum datum;
+               GSERIALIZED *geom;
+               GBOX gbox;
+               ND_BOX *nd_box;
+               bool is_null;
+
+               datum = fetchfunc(stats, i, &is_null);
+
+               /* Skip all NULLs. */
+               if ( is_null )
+               {
+                       POSTGIS_DEBUGF(4, " skipped null geometry %d", i);
+                       null_cnt++;
+                       continue;
+               }
+                       
+               /* Read the bounds from the gserialized. */
+               geom = (GSERIALIZED *)PG_DETOAST_DATUM(datum);
+               if ( LW_FAILURE == gserialized_get_gbox_p(geom, &gbox) )
+               {
+                       /* Skip empties too. */
+                       POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
+                       continue;
+               }
+               
+               /* Check bounds for validity (finite and not NaN) */
+               if ( ! gbox_is_valid(&gbox) )
+               {
+                       POSTGIS_DEBUGF(3, " skipped infinite/nan geometry %d", i);
+                       continue;
+               }
+
+               /* 
+                * In N-D mode, set the ndims to the maximum dimensionality found
+                * in the sample. Otherwise, leave at ndims == 2.
+                */
+               if ( mode != 2 )
+                       ndims = Max(gbox_ndims(&gbox), ndims);
+               
+               /* Convert gbox to n-d box */           
+               nd_box = palloc(sizeof(ND_BOX));
+               nd_box_from_gbox(&gbox, nd_box);
+               
+               /* Cache n-d bounding box */
+               sample_boxes[notnull_cnt] = nd_box;
+
+               /* Initialize sample extent before merging first entry */
+               if ( ! notnull_cnt )
+                       nd_box_init_bounds(&sample_extent);
+               
+               /* Add current sample to overall sample extent */
+               nd_box_merge(nd_box, &sample_extent);
+               
+               /* How many bytes does this sample use? */
+               total_width += VARSIZE(geom);
+               
+               /* Add bounds coordinates to sums for stddev calculation */
+               for ( d = 0; d < ndims; d++ )
+               {
+                       sum.min[d] += nd_box->min[d];
+                       sum.max[d] += nd_box->max[d];
+               }
+
+               /* Increment our "good feature" count */
+               notnull_cnt++;
+
+               /* Give backend a chance of interrupting us */
+               vacuum_delay_point();
+       }
+
+       /*
+        * We'll build a histogram having stats->attr->attstattarget cells
+        * on each side,  within reason... we'll use ndims*10000 as the 
+        * maximum number of cells. 
+        * Also, if we're sampling a relatively small table, we'll try to ensure that 
+        * we have an average of 5 features for each cell so the histogram isn't
+        * so sparse.
+        */
+       histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
+       histo_cells_target = Min(histo_cells_target, ndims * 10000);
+       histo_cells_target = Min(histo_cells_target, (int)(total_rows/5));
+       POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
+       POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
+
+       /* If there's no useful features, we can't work out stats */
+       if ( ! notnull_cnt )
+       {
+               elog(NOTICE, "no non-null/empty features, unable to compute statistics");
+               stats->stats_valid = false;
+               return;
+       }
+
+       POSTGIS_DEBUGF(3, " sample_extent: %s", nd_box_to_json(&sample_extent, ndims));
+
+       /*
+        * Second scan:
+        *  o compute standard deviation
+        */     
+       for ( d = 0; d < ndims; d++ )
+       {
+               /* Calculate average bounds values */
+               avg.min[d] = sum.min[d] / notnull_cnt;
+               avg.max[d] = sum.max[d] / notnull_cnt;
+               
+               /* Calculate standard deviation for this dimension bounds */
+               for ( i = 0; i < notnull_cnt; i++ )
+               {
+                       const ND_BOX *ndb = sample_boxes[i];
+                       stddev.min[d] += (ndb->min[d] - avg.min[d]) * (ndb->min[d] - avg.min[d]);
+                       stddev.max[d] += (ndb->max[d] - avg.max[d]) * (ndb->max[d] - avg.max[d]);
+               }
+               stddev.min[d] = sqrt(stddev.min[d] / notnull_cnt);
+               stddev.max[d] = sqrt(stddev.max[d] / notnull_cnt);
+               
+               /* Histogram bounds for this dimension bounds is avg +/- SDFACTOR * stdev */
+               histo_extent.min[d] = Max(avg.min[d] - SDFACTOR * stddev.min[d], sample_extent.min[d]);
+               histo_extent.max[d] = Min(avg.max[d] + SDFACTOR * stddev.max[d], sample_extent.max[d]);
+       }
+
+       /*
+        * Third scan:
+        *   o skip hard deviants
+        *   o compute new histogram box
+        */
+       nd_box_init_bounds(&histo_extent_new);
+       for ( i = 0; i < notnull_cnt; i++ )
+       {
+               const ND_BOX *ndb = sample_boxes[i];
+               /* Skip any hard deviants (boxes entirely outside our histo_extent */
+               if ( ! nd_box_intersects(&histo_extent, ndb, ndims) )
+               {
+                       POSTGIS_DEBUGF(4, " feature %d is a hard deviant, skipped", i);
+                       sample_boxes[i] = NULL;
+                       continue;
+               }       
+               /* Expand our new box to fit all the other features. */
+               nd_box_merge(ndb, &histo_extent_new);
+       }
+       /*
+        * Expand the box slightly (1%) to avoid edge effects 
+        * with objects that are on the boundary 
+        */
+       nd_box_expand(&histo_extent_new, 0.01);
+       histo_extent = histo_extent_new;
+       
+       /*
+        * How should we allocate our histogram cells to the 
+        * different dimensions? We can't do it by raw dimensional width,
+        * because in x/y/z space, the z can have different units
+        * from the x/y. Similarly for x/y/t space. 
+        * So, we instead calculate how much features overlap
+        * each other in their dimension to figure out which
+        *  dimensions have useful selectivity characteristics (more 
+        * variability in density) and therefor would find
+        * more cells useful (to distinguish between dense places and
+        * homogeneous places).
+        */ 
+       nd_box_array_distribution(sample_boxes, notnull_cnt, &histo_extent, ndims,
+                                 sample_distribution);
+
+       /* 
+        * The sample_distribution array now tells us how spread out the 
+        * data is in each dimension, so we use that data to allocate
+        * the histogram cells we have available.
+        * At this point, histo_cells_target is the approximate target number
+        * of cells.
+        */
+       
+       /*
+        * Some dimensions have basically a uniform distribution, we want
+        * to allocate no cells to those dimensions, only to dimensions
+        * that have some interesting differences in data distribution.
+        * Here we count up the number of interesting dimensions 
+        */
+       for ( d = 0; d < ndims; d++ )
+       {
+               if ( sample_distribution[d] > 0 )
+                       histo_ndims++;
+       }
+       
+       if ( histo_ndims == 0 )
+       {
+               /* Special case: all our dimensions had low variability! */
+               /* We just divide the cells up evenly */
+               POSTGIS_DEBUG(3, " special case: no axes have variability");
+               histo_cells_new = 1;
+               for ( d = 0; d < ndims; d++ )
+               {
+                       histo_size[d] = (int)pow((double)histo_cells_target, 1/(double)ndims);
+                       POSTGIS_DEBUGF(3, "   histo_size[d]: %d", histo_size[d]);
+                       histo_cells_new *= histo_size[d];
+               }
+               POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
+       }
+       else
+       {
+               /* 
+                * We're going to express the amount of variability in each dimension
+                * as a proportion of the total variability and allocate cells in that
+                * dimension relative to that proportion.
+                */
+               POSTGIS_DEBUG(3, " allocating histogram axes based on axis variability");
+               total_distribution = total_double(sample_distribution, ndims); /* First get the total */
+               POSTGIS_DEBUGF(3, " total_distribution: %.8g", total_distribution);
+               histo_cells_new = 1; /* For the number of cells in the final histogram */
+               for ( d = 0; d < ndims; d++ )
+               {
+                       if ( sample_distribution[d] == 0 ) /* Uninteresting dimensions don't get any room */
+                       {
+                               histo_size[d] = 1;
+                       }
+                       else /* Interesting dimension */
+                       {
+                               /* How does this dims variability compare to the total? */
+                               float edge_ratio = (float)sample_distribution[d] / (float)total_distribution;
+                               /* 
+                                * Scale the target cells number by the # of dims and ratio,
+                                * then take the appropriate root to get the estimated number of cells 
+                                * on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
+                               */
+                               histo_size[d] = (int)pow(histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
+                               /* If something goes awry, just give this dim one slot */
+                               if ( ! histo_size[d] )
+                                       histo_size[d] = 1;
+                       }
+                       histo_cells_new *= histo_size[d];
+               } 
+               POSTGIS_DEBUGF(3, " histo_cells_new: %d", histo_cells_new);
+       }
+       
+       /* Update histo_cells to the actual number of cells we need to allocate */
+       histo_cells = histo_cells_new;
+       POSTGIS_DEBUGF(3, " histo_cells: %d", histo_cells);
+       
+       /*
+        * Create the histogram (ND_STATS) in the stats memory context
+        */
+       old_context = MemoryContextSwitchTo(stats->anl_context);
+       nd_stats_size = sizeof(ND_STATS) + ((histo_cells - 1) * sizeof(float4));
+       nd_stats = palloc(nd_stats_size);
+       memset(nd_stats, 0, nd_stats_size); /* Initialize all values to 0 */
+       MemoryContextSwitchTo(old_context);
+
+       /* Initialize the #ND_STATS objects */
+       nd_stats->ndims = ndims;
+       nd_stats->extent = histo_extent;
+       nd_stats->sample_features = sample_rows;
+       nd_stats->table_features = total_rows;
+       nd_stats->not_null_features = notnull_cnt;
+       /* Copy in the histogram dimensions */
+       for ( d = 0; d < ndims; d++ )
+               nd_stats->size[d] = histo_size[d];
+
+       /*
+        * Fourth scan:
+        *  o fill histogram values with the proportion of
+        *    features' bbox overlaps: a feature's bvol
+        *    can fully overlap (1) or partially overlap
+        *    (fraction of 1) an histogram cell.
+        *
+        * Note that we are filling each cell with the "portion of
+        * the feature's box that overlaps the cell". So, if we sum
+        * up the values in the histogram, we could get the
+        * histogram feature count.
+        *
+        */
+       for ( i = 0; i < notnull_cnt; i++ )
+       {
+               const ND_BOX *nd_box;
+               ND_IBOX nd_ibox;
+               int at[ND_DIMS];
+               int d;
+               double num_cells = 0;
+               double tmp_volume = 1.0;
+               double min[ND_DIMS];
+               double max[ND_DIMS];
+               double cellsize[ND_DIMS];
+
+               nd_box = sample_boxes[i];
+               if ( ! nd_box ) continue; /* Skip Null'ed out hard deviants */
+
+               /* Give backend a chance of interrupting us */
+               vacuum_delay_point();
+               
+               /* Find the cells that overlap with this box and put them into the ND_IBOX */
+               nd_box_overlap(nd_stats, nd_box, &nd_ibox);
+               memset(at, 0, sizeof(int)*ND_DIMS);
+               
+               POSTGIS_DEBUGF(3, " feature %d: ibox (%d, %d, %d, %d) (%d, %d, %d, %d)", i, 
+                 nd_ibox.min[0], nd_ibox.min[1], nd_ibox.min[2], nd_ibox.min[3],
+                 nd_ibox.max[0], nd_ibox.max[1], nd_ibox.max[2], nd_ibox.max[3]);
+
+               for ( d = 0; d < nd_stats->ndims; d++ )
+               {
+                       /* Initialize the starting values */
+                       at[d] = nd_ibox.min[d];
+                       min[d] = nd_stats->extent.min[d];
+                       max[d] = nd_stats->extent.max[d];
+                       cellsize[d] = (max[d] - min[d])/(nd_stats->size[d]);
+                       
+                       /* What's the volume (area) of this feature's box? */
+                       tmp_volume *= (nd_box->max[d] - nd_box->min[d]);
+               }
+
+               /* Add feature volume (area) to our total */
+               total_sample_volume += tmp_volume;
+
+               /* 
+                * Move through all the overlaped histogram cells values and 
+                * add the box overlap proportion to them. 
+                */
+               do 
+               {
+                       ND_BOX nd_cell;
+                       double ratio;
+                       /* Create a box for this histogram cell */
+                       for ( d = 0; d < nd_stats->ndims; d++ )
+                       {
+                               nd_cell.min[d] = min[d] + (at[d]+0) * cellsize[d];
+                               nd_cell.max[d] = min[d] + (at[d]+1) * cellsize[d];
+                       }
+
+                       /* 
+                        * If a feature box is completely inside one cell the ratio will be
+                        * 1.0. If a feature box is 50% in two cells, each cell will get
+                        * 0.5 added on.
+                        */
+                       ratio = nd_box_ratio(&nd_cell, nd_box, nd_stats->ndims);
+                       nd_stats->value[nd_stats_value_index(nd_stats, at)] += ratio;
+                       num_cells += ratio;
+                       POSTGIS_DEBUGF(3, "               ratio (%.8g)  num_cells (%.8g)", ratio, num_cells);
+                       POSTGIS_DEBUGF(3, "               at (%d, %d, %d, %d)", at[0], at[1], at[2], at[3]);
+               } 
+               while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
+               
+               /* Keep track of overall number of overlaps counted */
+               total_cell_count += num_cells;
+               /* How many features have we added to this histogram? */
+               histogram_features++;
+       }
+
+       POSTGIS_DEBUGF(3, " histogram_features: %d", histogram_features);
+       POSTGIS_DEBUGF(3, " sample_rows: %d", sample_rows);
+       POSTGIS_DEBUGF(3, " table_rows: %.6g", total_rows);
+
+       /* Error out if we got no sample information */
+       if ( ! histogram_features )
+       {
+               POSTGIS_DEBUG(3, " no stats have been gathered");
+               elog(NOTICE, " no features lie in the stats histogram, invalid stats");
+               stats->stats_valid = false;
+               return;
+       }
+       
+       nd_stats->histogram_features = histogram_features;
+       nd_stats->histogram_cells = histo_cells;
+       nd_stats->cells_covered = total_cell_count;
+
+       /* Put this histogram data into the right slot/kind */
+       if ( mode == 2 )
+       {
+               stats_slot = STATISTIC_SLOT_2D;
+               stats_kind = STATISTIC_KIND_2D;
+       }
+       else
+       {
+               stats_slot = STATISTIC_SLOT_ND;
+               stats_kind = STATISTIC_KIND_ND;
+       }
+       
+       /* Write the statistics data */
+       stats->stakind[stats_slot] = stats_kind;
+       stats->staop[stats_slot] = InvalidOid;
+       stats->stanumbers[stats_slot] = (float4*)nd_stats;
+       stats->numnumbers[stats_slot] = nd_stats_size/sizeof(float4);
+       stats->stanullfrac = (float4)null_cnt/sample_rows;
+       stats->stawidth = total_width/notnull_cnt;
+       stats->stadistinct = -1.0;
+       stats->stats_valid = true;
+
+       POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_ND)", stats->stakind[0]);
+       POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
+       POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
+       POSTGIS_DEBUGF(3, " out: null fraction: %f=%d/%d", stats->stanullfrac, null_cnt, sample_rows);
+       POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
+       POSTGIS_DEBUG (3, " out: distinct values: all (no check done)");
+       POSTGIS_DEBUGF(3, " out: %s", nd_stats_to_json(nd_stats));
+       POSTGIS_DEBUGF(3, " out histogram:\n%s", nd_stats_to_grid(nd_stats));
+
+       return;
+}
+
+
+/**
+* In order to do useful selectivity calculations in both 2-D and N-D 
+* modes, we actually have to generate two stats objects, one for 2-D
+* and one for N-D. 
+* You would think that an N-D histogram would be sufficient for 2-D 
+* calculations of selectivity, but you'd be wrong. For features that
+* overlap multiple cells, the N-D histogram over-estimates the number
+* of hits, and can't contain the requisite information to correct
+* that over-estimate. 
+* We use the convenient PgSQL facility of stats slots to store 
+* one 2-D and one N-D stats object, and here in the compute function
+* we just call the computation twice, once in each mode.
+* It would be more efficient to have the computation calculate
+* the two histograms simultaneously, but that would also complicate
+* the (already complicated) logic in the function, 
+* so we'll take the CPU hit and do the computation twice.
+*/
+static void
+compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
+                          int sample_rows, double total_rows)
+{
+       /* 2D Mode */
+       compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
+       /* ND Mode */
+       compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 0);   
+}
+
+
+/**
+* This function will be called when the ANALYZE command is run
+* on a column of the "geometry" or "geography" type.
+*
+* It will need to return a stats builder function reference
+* and a "minimum" sample rows to feed it.
+* If we want analisys to be completely skipped we can return
+* FALSE and leave output vals untouched.
+*
+* What we know from this call is:
+*
+*      o The pg_attribute row referring to the specific column.
+*        Could be used to get reltuples from pg_class (which
+*        might quite inexact though...) and use them to set an
+*        appropriate minimum number of sample rows to feed to
+*        the stats builder. The stats builder will also receive
+*        a more accurate "estimation" of the number or rows.
+*
+*      o The pg_type row for the specific column.
+*        Could be used to set stat builder / sample rows
+*        based on domain type (when postgis will be implemented
+*        that way).
+*
+* Being this experimental we'll stick to a static stat_builder/sample_rows
+* value for now.
+*
+*/
+PG_FUNCTION_INFO_V1(gserialized_analyze_nd);
+Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
+{
+       VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
+       Form_pg_attribute attr = stats->attr;
+
+       POSTGIS_DEBUG(2, "gserialized_analyze_nd called");
+
+       /* If the attstattarget column is negative, use the default value */
+       /* NB: it is okay to scribble on stats->attr since it's a copy */
+       if (attr->attstattarget < 0)
+               attr->attstattarget = default_statistics_target;
+
+       POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
+
+       /* Setup the minimum rows and the algorithm function */
+       stats->minrows = 300 * stats->attr->attstattarget;
+       stats->compute_stats = compute_gserialized_stats;
+
+       POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
+
+       /* Indicate we are done successfully */
+       PG_RETURN_BOOL(true);
+}
+
+/**
+* This function returns an estimate of the selectivity
+* of a search GBOX by looking at data in the ND_STATS
+* structure. The selectivity is a float from 0-1, that estimates
+* the proportion of the rows in the table that will be returned
+* as a result of the search box.
+*
+* To get our estimate,
+* we need "only" sum up the values * the proportion of each cell
+* in the histogram that falls within the search box, then 
+* divide by the number of features that generated the histogram.
+*/
+static float8
+estimate_selectivity(const GBOX *box, const ND_STATS *nd_stats, int mode)
+{
+       int d; /* counter */    
+       float8 selectivity;
+       ND_BOX nd_box;
+       ND_IBOX nd_ibox;
+       int at[ND_DIMS];
+       double cell_size[ND_DIMS];
+       double min[ND_DIMS];
+       double max[ND_DIMS];
+       double total_count = 0.0;
+       int ndims_max = Max(nd_stats->ndims, gbox_ndims(box));  
+//     int ndims_min = Min(nd_stats->ndims, gbox_ndims(box));  
+
+       /* Calculate the overlap of the box on the histogram */
+       if ( ! nd_stats )
+       {
+               elog(NOTICE, " estimate_selectivity called with null input");
+               return FALLBACK_ND_SEL;
+       }
+
+       /* Initialize nd_box. */
+       nd_box_from_gbox(box, &nd_box);
+
+       /*
+        * To return 2D stats on an ND sample, we need to make the 
+        * 2D box cover the full range of the other dimensions in the
+        * histogram.
+        */
+       POSTGIS_DEBUGF(3, " mode: %d", mode);
+       if ( mode == 2 )
+       {
+               POSTGIS_DEBUG(3, " in 2d mode, stripping the computation down to 2d");
+               ndims_max = 2;
+       }
+       
+       POSTGIS_DEBUGF(3, " nd_stats->extent: %s", nd_box_to_json(&(nd_stats->extent), nd_stats->ndims));
+       POSTGIS_DEBUGF(3, " nd_box: %s", nd_box_to_json(&(nd_box), gbox_ndims(box)));
+
+       /* 
+        * Search box completely misses histogram extent? 
+        * We have to intersect in all N dimensions or else we have
+        * zero interaction under the &&& operator. It's important
+        * to short circuit in this case, as some of the tests below
+        * will return junk results when run on non-intersecting inputs.
+        */
+       if ( ! nd_box_intersects(&nd_box, &(nd_stats->extent), ndims_max) )
+       {
+               POSTGIS_DEBUG(3, " search box does not overlap histogram, returning 0");
+               return 0.0;
+       }
+
+       /* Search box completely contains histogram extent! */
+       if ( nd_box_contains(&nd_box, &(nd_stats->extent), ndims_max) )
+       {
+               POSTGIS_DEBUG(3, " search box contains histogram, returning 1");
+               return 1.0;
+       }
+
+       /* Calculate the overlap of the box on the histogram */
+       if ( ! nd_box_overlap(nd_stats, &nd_box, &nd_ibox) )
+       {
+               POSTGIS_DEBUG(3, " search box overlap with stats histogram failed");
+               return FALLBACK_ND_SEL;
+       }
+
+       /* Work out some measurements of the histogram */
+       for ( d = 0; d < nd_stats->ndims; d++ )
+       {
+               /* Cell size in each dim */
+               min[d] = nd_stats->extent.min[d];
+               max[d] = nd_stats->extent.max[d];
+               cell_size[d] = (max[d] - min[d]) / nd_stats->size[d];
+               POSTGIS_DEBUGF(3, " cell_size[%d] : %.9g", d, cell_size[d]);
+               
+               /* Initialize the counter */
+               at[d] = nd_ibox.min[d];
+       }
+
+       /* Move through all the overlap values and sum them */
+       do 
+       {
+               float cell_count, ratio;
+               ND_BOX nd_cell;
+               
+               /* We have to pro-rate partially overlapped cells. */
+               for ( d = 0; d < nd_stats->ndims; d++ )
+               {
+                       nd_cell.min[d] = min[d] + (at[d]+0) * cell_size[d];
+                       nd_cell.max[d] = min[d] + (at[d]+1) * cell_size[d];
+               }
+
+               ratio = nd_box_ratio(&nd_box, &nd_cell, nd_stats->ndims);
+               cell_count = nd_stats->value[nd_stats_value_index(nd_stats, at)];
+               
+               /* Add the pro-rated count for this cell to the overall total */
+               total_count += cell_count * ratio;      
+               POSTGIS_DEBUGF(4, " cell (%d,%d), cell value %.6f, ratio %.6f", at[0], at[1], cell_count, ratio);       
+       } 
+       while ( nd_increment(&nd_ibox, nd_stats->ndims, at) );
+
+       /* Scale by the number of features in our histogram to get the proportion */
+       selectivity = total_count / nd_stats->histogram_features;
+
+       POSTGIS_DEBUGF(3, " nd_stats->histogram_features = %f", nd_stats->histogram_features);
+       POSTGIS_DEBUGF(3, " nd_stats->histogram_cells = %f", nd_stats->histogram_cells);
+       POSTGIS_DEBUGF(3, " sum(overlapped histogram cells) = %f", total_count);
+       POSTGIS_DEBUGF(3, " selectivity = %f", selectivity);
+
+       /* Prevent rounding overflows */
+       if (selectivity > 1.0) selectivity = 1.0;
+       else if (selectivity < 0.0) selectivity = 0.0;
+
+       return selectivity;
+}
+
+
+
+/**
+* Utility function to print the statistics information for a 
+* given table/column in JSON. Used for debugging the selectivity code.
+*/
+PG_FUNCTION_INFO_V1(_postgis_gserialized_stats);
+Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS)
+{
+       ND_STATS *nd_stats;
+       char *str;
+       text *json;
+       int mode;
+       
+       /* Check if we've been asked to not use 2d mode */
+       if ( ! PG_ARGISNULL(2) )
+               mode = text_p_get_mode(PG_GETARG_TEXT_P(2));
+
+       /* Retrieve the stats object */
+       nd_stats = pg_get_nd_stats_by_name(PG_GETARG_OID(0), PG_GETARG_TEXT_P(1), mode);
+       str = nd_stats_to_json(nd_stats);
+       json = cstring2text(str);
+       pfree(str);
+       pfree(nd_stats);
+       PG_RETURN_TEXT_P(json);
+}
+
+
+/**
+* Utility function to read the calculated selectivity for a given search
+* box and table/column. Used for debugging the selectivity code.
+*/
+PG_FUNCTION_INFO_V1(_postgis_gserialized_sel);
+Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS)
+{
+       Oid table_oid = PG_GETARG_OID(0);
+       text *att_text = PG_GETARG_TEXT_P(1);
+       Datum geom_datum = PG_GETARG_DATUM(2);
+       GBOX gbox; /* search box read from gserialized datum */
+       float8 selectivity = 0;
+       ND_STATS *nd_stats;
+       int mode = 2; /* 2D mode by default */
+
+       /* Check if we've been asked to not use 2d mode */
+       if ( ! PG_ARGISNULL(3) )
+               mode = text_p_get_mode(PG_GETARG_TEXT_P(3));
+
+       /* Retrieve the stats object */
+       nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode);
+       
+       if ( ! nd_stats )
+               elog(ERROR, "unable to load statistics! analyze?");
+
+       /* Calculate the gbox */
+       if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
+               elog(ERROR, "unable to calculate bounding box from geometry");
+
+       POSTGIS_DEBUGF(3, " %s", gbox_to_string(&gbox));
+       
+       /* Do the estimation */
+       selectivity = estimate_selectivity(&gbox, nd_stats, mode);
+       
+       pfree(nd_stats);
+       PG_RETURN_FLOAT8(selectivity);
+}
+
+
+/**
+* Utility function to read the calculated join selectivity for a 
+* pair of tables. Used for debugging the selectivity code.
+*/
+PG_FUNCTION_INFO_V1(_postgis_gserialized_joinsel);
+Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS)
+{
+       Oid table_oid1 = PG_GETARG_OID(0);
+       text *att_text1 = PG_GETARG_TEXT_P(1);
+       Oid table_oid2 = PG_GETARG_OID(2);
+       text *att_text2 = PG_GETARG_TEXT_P(3);
+       ND_STATS *nd_stats1, *nd_stats2;
+       float8 selectivity = 0;
+       int mode = 2; /* 2D mode by default */
+
+
+       /* Retrieve the stats object */
+       nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode);
+       nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode);
+
+       if ( ! ( nd_stats1 && nd_stats2 ) ) 
+               elog(ERROR, "unable to load statistics! analyze?");
+
+       /* Check if we've been asked to not use 2d mode */
+       if ( ! PG_ARGISNULL(4) )
+       {
+               text *modetxt = PG_GETARG_TEXT_P(4);
+               char *modestr = text2cstring(modetxt);
+               if ( modestr[0] == 'N' )
+                       mode = 0;               
+       }
+
+       /* Do the estimation */
+       selectivity = estimate_join_selectivity(nd_stats1, nd_stats2);
+       
+       pfree(nd_stats1);
+       pfree(nd_stats2);
+       PG_RETURN_FLOAT8(selectivity);
+}
+
+/**
+* For (geometry && geometry) 
+* we call into the 2-D mode.
+*/
+PG_FUNCTION_INFO_V1(gserialized_gist_sel_2d);
+Datum gserialized_gist_sel_2d(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_DATUM(DirectFunctionCall5(
+          gserialized_gist_sel,
+          PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
+          PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
+          Int32GetDatum(2) /* 2-D mode */
+       ));
+}
+
+/**
+* For (geometry &&& geometry) and (geography && geography)
+* we call into the N-D mode.
+*/
+PG_FUNCTION_INFO_V1(gserialized_gist_sel_nd);
+Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_DATUM(DirectFunctionCall5(
+          gserialized_gist_sel,
+          PG_GETARG_DATUM(0), PG_GETARG_DATUM(1),
+          PG_GETARG_DATUM(2), PG_GETARG_DATUM(3),
+          Int32GetDatum(0) /* N-D mode */
+       ));
+}
+
+/**
+ * This function should return an estimation of the number of
+ * rows returned by a query involving an overlap check
+ * ( it's the restrict function for the && operator )
+ *
+ * It can make use (if available) of the statistics collected
+ * by the geometry analyzer function.
+ *
+ * Note that the good work is done by estimate_selectivity() above.
+ * This function just tries to find the search_box, loads the statistics
+ * and invoke the work-horse.
+ *
+ */
+PG_FUNCTION_INFO_V1(gserialized_gist_sel);
+Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
+{
+       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+       /* Oid operator_oid = PG_GETARG_OID(1); */
+       List *args = (List *) PG_GETARG_POINTER(2);
+       /* int varRelid = PG_GETARG_INT32(3); */
+       int mode = PG_GETARG_INT32(4);
+       
+       Oid relid;
+       ND_STATS *nd_stats;
+
+       Node *other;
+       Var *self;
+       GBOX search_box;
+       float8 selectivity = 0;
+       
+       POSTGIS_DEBUG(2, "gserialized_gist_sel called");
+
+       /* 
+        * TODO: This is a big one,
+        * All this statistics code *only* tries to generate a valid
+        * selectivity for && and &&&. That leaves all the other 
+        * geometry operators with bad stats! The selectivity
+        * calculation should take account of the incoming operator
+        * type and do the right thing.
+        */
+       
+       /* Fail if not a binary opclause (probably shouldn't happen) */
+       if (list_length(args) != 2)
+       {
+               POSTGIS_DEBUG(3, "gserialized_gist_sel: not a binary opclause");
+               PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
+       }
+
+       /* Find the constant part */
+       other = (Node *) linitial(args);
+       if ( ! IsA(other, Const) )
+       {
+               self = (Var *)other;
+               other = (Node *) lsecond(args);
+       }
+       else
+       {
+               self = (Var *) lsecond(args);
+       }
+
+       if ( ! IsA(other, Const) )
+       {
+               POSTGIS_DEBUG(3, " no constant arguments - returning a default selectivity");
+               PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
+       }
+
+       /*
+       * We don't have a nice <const> && <var> or <var> && <const> 
+       * situation here. <const> && <const> would probably get evaluated
+       * away by PgSQL earlier on. <func> && <const> is harder, and the
+       * case we get often is <const> && ST_Expand(<var>), which does 
+       * actually have a subtly different selectivity than a bae
+       * <const> && <var> call. It's calculatable though, by expanding
+       * every cell in the histgram appropriately.
+       * 
+       * Discussion: http://trac.osgeo.org/postgis/ticket/1828
+       *
+       * To do? Do variable selectivity based on the <func> node.
+       */
+       if ( ! IsA(self, Var) )
+       {
+               POSTGIS_DEBUG(3, " no bare variable argument ? - returning a moderate selectivity");
+               PG_RETURN_FLOAT8(FALLBACK_ND_SEL);
+       }
+
+       /* Convert the constant to a BOX */
+       if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
+       {
+               POSTGIS_DEBUG(3, "search box is EMPTY");
+               PG_RETURN_FLOAT8(0.0);
+       }
+       POSTGIS_DEBUGF(4, " requested search box is: %s", gbox_to_string(&search_box));
+
+       /* Get pg_statistic row */
+       relid = getrelid(self->varno, root->parse->rtable);
+       nd_stats = pg_get_nd_stats(relid, self->varattno, mode);
+       if ( ! nd_stats )
+       {
+               POSTGIS_DEBUG(3, " unable to load stats from syscache, not analyzed yet?");
+               PG_RETURN_FLOAT8(FALLBACK_ND_SEL);
+       }
+       POSTGIS_DEBUGF(4, " got stats:\n%s", nd_stats_to_json(nd_stats));
+
+       /* Do the estimation! */
+       selectivity = estimate_selectivity(&search_box, nd_stats, mode);
+       POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);
+
+       pfree(nd_stats);
+       PG_RETURN_FLOAT8(selectivity);
+}
+
+
+
+/**
+ * Return the estimated extent of the table
+ * looking at gathered statistics (or NULL if
+ * no statistics have been gathered).
+ */
+PG_FUNCTION_INFO_V1(gserialized_estimated_extent);
+Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
+{
+       char *nsp = NULL;
+       char *tbl = NULL;
+       text *col = NULL;
+       char *nsp_tbl = NULL;
+       Oid tbl_oid;
+       ND_STATS *nd_stats;
+       GBOX *gbox;
+
+       if ( PG_NARGS() == 3 )
+       {
+               nsp = text2cstring(PG_GETARG_TEXT_P(0));
+               tbl = text2cstring(PG_GETARG_TEXT_P(1));
+               col = PG_GETARG_TEXT_P(2);
+               nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 2);
+               sprintf(nsp_tbl, "%s.%s", nsp, tbl);
+               tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl)));
+               pfree(nsp_tbl);
+       }
+       else if ( PG_NARGS() == 2 )
+       {
+               tbl = text2cstring(PG_GETARG_TEXT_P(0));
+               col = PG_GETARG_TEXT_P(1);
+               tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(tbl)));
+       }
+       else
+       {
+               elog(ERROR, "estimated_extent() called with wrong number of arguments");
+               PG_RETURN_NULL();
+       }
+
+       /* Estimated extent only returns 2D bounds, so use mode 2 */
+       nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2);
+       
+       /* Error out on no stats */
+       if ( ! nd_stats ) 
+               elog(ERROR, "stats for \"%s.%s\" do not exist", tbl, text2cstring(col));
+
+       /* Construct the box */
+       gbox = palloc(sizeof(GBOX));
+       FLAGS_SET_GEODETIC(gbox->flags, 0);
+       FLAGS_SET_Z(gbox->flags, 0);
+       FLAGS_SET_M(gbox->flags, 0);
+       gbox->xmin = nd_stats->extent.min[0];
+       gbox->xmax = nd_stats->extent.max[0];
+       gbox->ymin = nd_stats->extent.min[1];
+       gbox->ymax = nd_stats->extent.max[1];
+
+       pfree(nd_stats);
+       PG_RETURN_POINTER(gbox);
+}
+
diff --git a/postgis/postgis.sql.in.c b/postgis/postgis.sql.in.c

index 27f96a71daf96a4d30400745cbdd47925b3d3201..86ac21934eb4aacaaae7dabd83855f0d030ad6ae 100644 (file)
--- a/postgis/postgis.sql.in.c
+++ b/postgis/postgis.sql.in.c
@@ -74,7 +74,7 @@ CREATE OR REPLACE FUNCTION geometry_typmod_out(integer)
  
  CREATE OR REPLACE FUNCTION geometry_analyze(internal)
         RETURNS bool
-       AS 'MODULE_PATHNAME', 'geometry_analyze_2d'
+       AS 'MODULE_PATHNAME', 'gserialized_analyze_nd'
         LANGUAGE 'c' VOLATILE STRICT;
  
  CREATE OR REPLACE FUNCTION geometry_recv(internal)
@@ -428,12 +428,50 @@ CREATE OR REPLACE FUNCTION geometry_gist_joinsel_2d(internal, oid, internal, sma
         AS 'MODULE_PATHNAME', 'geometry_gist_joinsel_2d'
         LANGUAGE 'c';
  
+
+-----------------------------------------------------------------------------
+
  -- Availability: 2.1.0
-CREATE OR REPLACE FUNCTION _postgis_geometry_sel(tbl regclass, att_name text, geom geometry)
+CREATE OR REPLACE FUNCTION _postgis_selectivity(tbl regclass, att_name text, geom geometry, mode text default '2')
         RETURNS float8
-       AS 'MODULE_PATHNAME', '_postgis_geometry_sel'
+       AS 'MODULE_PATHNAME', '_postgis_gserialized_sel'
         LANGUAGE 'c' STRICT;
  
+-- Availability: 2.1.0
+CREATE OR REPLACE FUNCTION _postgis_join_selectivity(regclass, text, regclass, text, text default '2')
+       RETURNS float8
+       AS 'MODULE_PATHNAME', '_postgis_gserialized_joinsel'
+       LANGUAGE 'c' STRICT;
+
+-- Availability: 2.1.0
+CREATE OR REPLACE FUNCTION _postgis_stats(tbl regclass, att_name text, text default '2')
+       RETURNS text
+       AS 'MODULE_PATHNAME', '_postgis_gserialized_stats'
+       LANGUAGE 'c' STRICT;
+
+-- Availability: 2.1.0
+CREATE OR REPLACE FUNCTION gserialized_gist_sel_2d (internal, oid, internal, int4)
+       RETURNS float8
+       AS 'MODULE_PATHNAME', 'gserialized_gist_sel_2d'
+       LANGUAGE 'c';
+
+-- Availability: 2.1.0
+CREATE OR REPLACE FUNCTION gserialized_gist_sel_nd (internal, oid, internal, int4)
+       RETURNS float8
+       AS 'MODULE_PATHNAME', 'gserialized_gist_sel_nd'
+       LANGUAGE 'c';
+
+-- Availability: 2.1.0
+CREATE OR REPLACE FUNCTION gserialized_gist_joinsel_2d (internal, oid, internal, smallint)
+       RETURNS float8
+       AS 'MODULE_PATHNAME', 'gserialized_gist_joinsel_2d'
+       LANGUAGE 'c';
+
+-- Availability: 2.1.0
+CREATE OR REPLACE FUNCTION gserialized_gist_joinsel_nd (internal, oid, internal, smallint)
+       RETURNS float8
+       AS 'MODULE_PATHNAME', 'gserialized_gist_joinsel_nd'
+       LANGUAGE 'c';
  
  
  -----------------------------------------------------------------------------
@@ -452,9 +490,9 @@ CREATE OR REPLACE FUNCTION geometry_overlaps(geom1 geometry, geom2 geometry)
  
  CREATE OPERATOR && (
         LEFTARG = geometry, RIGHTARG = geometry, PROCEDURE = geometry_overlaps,
-       COMMUTATOR = '&&'
---     ,RESTRICT = contsel, JOIN = contjoinsel
-       ,RESTRICT = geometry_gist_sel_2d, JOIN = geometry_gist_joinsel_2d       
+       COMMUTATOR = '&&',
+       RESTRICT = gserialized_gist_sel_2d, 
+       JOIN = gserialized_gist_joinsel_2d      
  );
  
  -- Availability: 2.0.0
@@ -692,17 +730,6 @@ CREATE OR REPLACE FUNCTION geometry_gist_decompress_nd(internal)
         AS 'MODULE_PATHNAME' ,'gserialized_gist_decompress'
         LANGUAGE 'c';
  
--- Availability: 2.0.0
---CREATE OR REPLACE FUNCTION geometry_gist_selectivity_nd (internal, oid, internal, int4)
---     RETURNS float8
---     AS 'MODULE_PATHNAME', 'geometry_gist_selectivity_nd'
---     LANGUAGE 'c';
-
--- Availability: 2.0.0
---CREATE OR REPLACE FUNCTION geography_gist_join_selectivity_nd(internal, oid, internal, smallint)
---     RETURNS float8
---     AS 'MODULE_PATHNAME', 'geometry_gist_join_selectivity_nd'
---     LANGUAGE 'c';
  
  -- ---------- ---------- ---------- ---------- ---------- ---------- ----------
  -- N-D GEOMETRY Operators
@@ -717,10 +744,9 @@ CREATE OR REPLACE FUNCTION geometry_overlaps_nd(geometry, geometry)
  -- Availability: 2.0.0
  CREATE OPERATOR &&& (
         LEFTARG = geometry, RIGHTARG = geometry, PROCEDURE = geometry_overlaps_nd,
-       COMMUTATOR = '&&&'
-       ,RESTRICT = contsel, JOIN = contjoinsel
---     ,RESTRICT = geometry_gist_selectivity_nd 
---     ,JOIN = geometry_gist_join_selectivity_nd
+       COMMUTATOR = '&&&',
+       RESTRICT = gserialized_gist_sel_nd,
+       JOIN = gserialized_gist_joinsel_nd      
  );
  
  -- Availability: 2.0.0
@@ -904,12 +930,12 @@ CREATE OR REPLACE FUNCTION ST_Combine_BBox(box2d,geometry)
  -- Availability: 1.2.2
  -- Deprecation in 2.1.0 
  CREATE OR REPLACE FUNCTION ST_estimated_extent(text,text,text) RETURNS box2d AS
-       'MODULE_PATHNAME', 'geometry_estimated_extent'
+       'MODULE_PATHNAME', 'gserialized_estimated_extent'
         LANGUAGE 'c' IMMUTABLE STRICT SECURITY DEFINER;
  
  -- Availability: 2.1.0
  CREATE OR REPLACE FUNCTION ST_EstimatedExtent(text,text,text) RETURNS box2d AS
-       'MODULE_PATHNAME', 'geometry_estimated_extent'
+       'MODULE_PATHNAME', 'gserialized_estimated_extent'
         LANGUAGE 'c' IMMUTABLE STRICT SECURITY DEFINER;
  
  -----------------------------------------------------------------------
@@ -918,12 +944,12 @@ CREATE OR REPLACE FUNCTION ST_EstimatedExtent(text,text,text) RETURNS box2d AS
  -- Availability: 1.2.2
  -- Deprecation in 2.1.0 
  CREATE OR REPLACE FUNCTION ST_estimated_extent(text,text) RETURNS box2d AS
-       'MODULE_PATHNAME', 'geometry_estimated_extent'
+       'MODULE_PATHNAME', 'gserialized_estimated_extent'
         LANGUAGE 'c' IMMUTABLE STRICT SECURITY DEFINER;
  
  -- Availability: 2.1.0
  CREATE OR REPLACE FUNCTION ST_EstimatedExtent(text,text) RETURNS box2d AS
-       'MODULE_PATHNAME', 'geometry_estimated_extent'
+       'MODULE_PATHNAME', 'gserialized_estimated_extent'
         LANGUAGE 'c' IMMUTABLE STRICT SECURITY DEFINER;
  
  -----------------------------------------------------------------------
diff --git a/regress/Makefile.in b/regress/Makefile.in

index 4fd758aaf03add7cd790b0de1720e17682d6699d..63f861df44a06211865a0e151e49095654b49aa3 100644 (file)
--- a/regress/Makefile.in
+++ b/regress/Makefile.in
@@ -69,6 +69,7 @@ TESTS = \
         regress \
         regress_index \
         regress_index_nulls \
+       regress_selectivity \
         lwgeom_regress \
         regress_lrs \
         removepoint \
diff --git a/regress/regress_selectivity.sql b/regress/regress_selectivity.sql

new file mode 100644 (file)

index 0000000..ba3d63c
--- /dev/null
+++ b/regress/regress_selectivity.sql
@@ -0,0 +1,39 @@
+-- Table with uniformly variable density, highest at 1,1, lowest at 10,10
+create table regular_overdots as
+with 
+ij as ( select i, j from generate_series(1, 10) i, generate_series(1, 10) j),
+iijj as (select generate_series(1, i) as a, generate_series(1, j) b from ij)
+select st_makepoint(a, b) as g from iijj;
+
+-- Generate the stats
+analyze regular_overdots;
+
+-- Baseline info
+select 'selectivity_00', count(*) from regular_overdots;
+
+-- First test
+select 'selectivity_01', count(*) from regular_overdots where g && 'LINESTRING(0 0, 11 3.5)';
+select 'selectivity_02', 'actual', round(1068.0/2127.0,3);
+select 'selectivity_03', 'estimated', round(_postgis_selectivity('regular_overdots','g','LINESTRING(0 0, 11 3.5)')::numeric,3);
+
+-- Second test
+select 'selectivity_04', count(*) from regular_overdots where g && 'LINESTRING(5.5 5.5, 11 11)';
+select 'selectivity_05', 'actual', round(161.0/2127.0,3);
+select 'selectivity_06', 'estimated', round(_postgis_selectivity('regular_overdots','g','LINESTRING(5.5 5.5, 11 11)')::numeric,3);
+
+-- Third test
+select 'selectivity_07', count(*) from regular_overdots where g && 'LINESTRING(1.5 1.5, 2.5 2.5)';
+select 'selectivity_08', 'actual', round(81.0/2127.0,3);
+select 'selectivity_09', 'estimated', round(_postgis_selectivity('regular_overdots','g','LINESTRING(1.5 1.5, 2.5 2.5)')::numeric,3);
+
+-- Fourth test
+select 'selectivity_10', 'actual', 0;
+select 'selectivity_09', 'estimated', _postgis_selectivity('regular_overdots','g','LINESTRING(11 11, 12 12)');
+
+-- Fifth test
+select 'selectivity_10', 'actual', 1;
+select 'selectivity_09', 'estimated', _postgis_selectivity('regular_overdots','g','LINESTRING(0 0, 12 12)');
+
+-- Clean
+drop table if exists regular_overdots;
+
diff --git a/regress/regress_selectivity_expected b/regress/regress_selectivity_expected

new file mode 100644 (file)

index 0000000..97acacc
--- /dev/null
+++ b/regress/regress_selectivity_expected
@@ -0,0 +1,14 @@
+selectivity_00|2127
+selectivity_01|1068
+selectivity_02|actual|0.502
+selectivity_03|estimated|0.502
+selectivity_04|161
+selectivity_05|actual|0.076
+selectivity_06|estimated|0.076
+selectivity_07|81
+selectivity_08|actual|0.038
+selectivity_09|estimated|0.038
+selectivity_10|actual|0
+selectivity_09|estimated|0
+selectivity_10|actual|1
+selectivity_09|estimated|1
diff --git a/regress/tickets.sql b/regress/tickets.sql

index d902cf18edbe318648d3a17bb7e7bf61bb462165..8adfd72d97f839983b6469e6db46aa7a101b1f2d 100644 (file)
--- a/regress/tickets.sql
+++ b/regress/tickets.sql
@@ -455,12 +455,23 @@ analyze t;
  select '#877.2', ST_EstimatedExtent('public', 't','g');
  select '#877.2.deprecated', ST_Estimated_Extent('public', 't','g');
  insert into t(g) values ('LINESTRING(-10 -50, 20 30)');
-select '#877.3', ST_EstimatedExtent('t','g');
+
+-- #877.3
+with e as ( select ST_EstimatedExtent('t','g') as e )
+select '#877.3', round(st_xmin(e.e)::numeric, 5), round(st_xmax(e.e)::numeric, 5),
+round(st_ymin(e.e)::numeric, 5), round(st_ymax(e.e)::numeric, 5) from e;
+
+-- #877.4
  analyze t;
-select '#877.4', ST_EstimatedExtent('t','g');
+with e as ( select ST_EstimatedExtent('t','g') as e )
+select '#877.4', round(st_xmin(e.e)::numeric, 5), round(st_xmax(e.e)::numeric, 5),
+round(st_ymin(e.e)::numeric, 5), round(st_ymax(e.e)::numeric, 5) from e;
+
+-- #877.5
  truncate t;
-select '#818.1', ST_EstimatedExtent('t','g');
-select '#818.1.deprecated', ST_Estimated_Extent('t','g');
+with e as ( select ST_EstimatedExtent('t','g') as e )
+select '#877.5', round(st_xmin(e.e)::numeric, 5), round(st_xmax(e.e)::numeric, 5),
+round(st_ymin(e.e)::numeric, 5), round(st_ymax(e.e)::numeric, 5) from e;
  drop table t;
  
  -- #1320
diff --git a/regress/tickets_expected b/regress/tickets_expected

index e7f22f88bed46731944b20da8902f5b0e0d09f79..5c19052cbd9316fffe3315d8f54af19d82ea5037 100644 (file)
--- a/regress/tickets_expected
+++ b/regress/tickets_expected
@@ -143,19 +143,12 @@ ERROR:  First argument must be a LINESTRING
  #1060|FFFFFFFF2
  #1273|t
  #1273.1|t
-NOTICE:  "<current>"."t"."g" is empty or not analyzed
-#877.1|
-NOTICE:  "public"."t"."g" is empty or not analyzed
-#877.2|
-NOTICE:  "public"."t"."g" is empty or not analyzed
-#877.2.deprecated|
-NOTICE:  "<current>"."t"."g" is empty or not analyzed
-#877.3|
-#877.4|BOX(-10 -50,20 30)
-NOTICE:  "<current>"."t"."g" is empty or not analyzed
-#818.1|
-NOTICE:  "<current>"."t"."g" is empty or not analyzed
-#818.1.deprecated|
+ERROR:  stats for "t.g" do not exist
+ERROR:  stats for "t.g" do not exist
+ERROR:  stats for "t.g" do not exist
+ERROR:  stats for "t.g" do not exist
+#877.4|-10.15000|20.15000|-50.40000|30.40000
+#877.5|-10.15000|20.15000|-50.40000|30.40000
  <#1320>
  #1320.geog.1|MULTIPOLYGON|4326
  #1320.geom.1|MULTIPOLYGON|4326
author	Paul Ramsey <pramsey@cleverelephant.ca>
	Tue, 4 Dec 2012 19:54:29 +0000 (19:54 +0000)
committer	Paul Ramsey <pramsey@cleverelephant.ca>
	Tue, 4 Dec 2012 19:54:29 +0000 (19:54 +0000)
NEWS		patch \| blob \| history
liblwgeom/g_box.c		patch \| blob \| history
liblwgeom/liblwgeom.h.in		patch \| blob \| history
libpgcommon/gserialized_gist.c		patch \| blob \| history
postgis/Makefile.in		patch \| blob \| history
postgis/geography.sql.in.c		patch \| blob \| history
postgis/gserialized_estimate.c	[new file with mode: 0644]	patch \| blob
postgis/postgis.sql.in.c		patch \| blob \| history
regress/Makefile.in		patch \| blob \| history
regress/regress_selectivity.sql	[new file with mode: 0644]	patch \| blob
regress/regress_selectivity_expected	[new file with mode: 0644]	patch \| blob
regress/tickets.sql		patch \| blob \| history
regress/tickets_expected		patch \| blob \| history