]> granicus.if.org Git - postgis/commitdiff
#945, remove the old selectivity code, now no longer being called
authorPaul Ramsey <pramsey@cleverelephant.ca>
Fri, 12 Apr 2013 18:33:50 +0000 (18:33 +0000)
committerPaul Ramsey <pramsey@cleverelephant.ca>
Fri, 12 Apr 2013 18:33:50 +0000 (18:33 +0000)
git-svn-id: http://svn.osgeo.org/postgis/trunk@11288 b70326c6-7e19-0410-871a-916f4a2858ee

postgis/Makefile.in
postgis/geography.sql.in
postgis/geography_estimate.c [deleted file]
postgis/geography_inout.c
postgis/geometry_estimate.c [deleted file]
postgis/gserialized_estimate.c
postgis/postgis.sql.in

index b85f92b4065b6efe7adebce18036b39101dcb6d3..bcfc992da3dac4295813ff472908ca95d35d6365 100644 (file)
@@ -58,10 +58,8 @@ PG_OBJS= \
        gserialized_estimate.o \
        geography_inout.o \
        geography_btree.o \
-       geography_estimate.o \
        geography_measurement.o \
        geography_measurement_trees.o \
-       geometry_estimate.o \
        geometry_inout.o
 
 # Objects to build using PGXS
index 77d643d243676c07e2671cf0db75cdba710719fc..b184552c10ef5fff6535248cc0cc64a398711da2 100644 (file)
@@ -233,18 +233,6 @@ CREATE OR REPLACE FUNCTION geography_gist_decompress(internal)
        AS 'MODULE_PATHNAME' ,'gserialized_gist_decompress'
        LANGUAGE 'c';
 
--- Availability: 1.5.0
-CREATE OR REPLACE FUNCTION geography_gist_selectivity (internal, oid, internal, int4)
-       RETURNS float8
-       AS 'MODULE_PATHNAME', 'geography_gist_selectivity'
-       LANGUAGE 'c';
-
--- Availability: 1.5.0
-CREATE OR REPLACE FUNCTION geography_gist_join_selectivity(internal, oid, internal, smallint)
-       RETURNS float8
-       AS 'MODULE_PATHNAME', 'geography_gist_selectivity'
-       LANGUAGE 'c';
-
 -- Availability: 1.5.0
 CREATE OR REPLACE FUNCTION geography_overlaps(geography, geography) 
        RETURNS boolean 
diff --git a/postgis/geography_estimate.c b/postgis/geography_estimate.c
deleted file mode 100644 (file)
index f1fcc20..0000000
+++ /dev/null
@@ -1,1486 +0,0 @@
-/**********************************************************************
- * $Id: geography_estimate.c 4211 2009-06-25 03:32:43Z robe $
- *
- * PostGIS - Spatial Types for PostgreSQL
- * http://postgis.refractions.net
- * Copyright 2001-2006 Refractions Research Inc.
- * Copyright 2009 Mark Cave-Ayland
- *
- * This is free software; you can redistribute and/or modify it under
- * the terms of the GNU General Public Licence. See the COPYING file.
- *
- **********************************************************************/
-
-
-#include "postgres.h"
-#include "commands/vacuum.h"
-#include "nodes/relation.h"
-#include "parser/parsetree.h"
-#include "utils/lsyscache.h"
-#include "utils/syscache.h"
-
-#include "../postgis_config.h"
-#include "liblwgeom.h"
-#include "lwgeom_pg.h"
-
-/* Prototypes */
-Datum geography_gist_selectivity(PG_FUNCTION_ARGS);
-Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS);
-Datum geography_analyze(PG_FUNCTION_ARGS);
-
-
-/**
-* Place holder selectivity calculations to make the index work in
-* the absence of real selectivity calculations.
-*/
-
-#define DEFAULT_GEOGRAPHY_SEL 0.000005
-
-/**
- *     Assign a number to the postgis statistics kind
- *
- *     tgl suggested:
- *
- *     1-100:  reserved for assignment by the core Postgres project
- *     100-199: reserved for assignment by PostGIS
- *     200-9999: reserved for other globally-known stats kinds
- *     10000-32767: reserved for private site-local use
- *
- */
-#define STATISTIC_KIND_GEOGRAPHY 101
-
-/*
- * Define this if you want to use standard deviation based
- * histogram extent computation. If you do, you can also
- * tweak the deviation factor used in computation with
- * SDFACTOR.
- */
-#define SDFACTOR 3.25
-
-
-/* Information about the dimensions stored in the sample */
-struct dimensions
-{
-       char axis;
-       double min;
-       double max;
-};
-
-/* Main geography statistics structure, stored in pg_statistics */
-typedef struct GEOG_STATS_T
-{
-       /* Dimensionality of this column */
-       float4 dims;
-
-       /* x . y . z = total boxes in grid */
-       float4 unitsx;
-       float4 unitsy;
-       float4 unitsz;
-
-       /* average feature coverage of not-null features:
-          this is either an area for the case of a 2D column
-          or a volume in the case of a 3D column */
-       float4 avgFeatureCoverage;
-
-       /*
-        * average number of histogram cells
-        * covered by the sample not-null features
-        */
-       float4 avgFeatureCells;
-
-       /* histogram extent */
-       float4 xmin, ymin, zmin, xmax, ymax, zmax;
-
-       /* No. of geometries within this column (approx) */
-       float4 totalrows;
-
-       /*
-        * variable length # of floats for histogram
-        */
-       float4 value[1];
-}
-GEOG_STATS;
-
-
-/**
- * This function returns an estimate of the selectivity
- * of a search_box looking at data in the GEOG_STATS
- * structure.
- * */
-static float8
-estimate_selectivity(GBOX *box, GEOG_STATS *geogstats)
-{
-       int x, y, z;
-       int x_idx_min, x_idx_max, y_idx_min, y_idx_max, z_idx_min, z_idx_max;
-       double intersect_x, intersect_y, intersect_z;
-       double AOI = 1.0;
-       double cell_coverage = 1.0;
-       double sizex, sizey, sizez;     /* dimensions of histogram */
-       int unitsx, unitsy, unitsz; /* histogram grid size */
-       double value;
-       float overlapping_cells;
-       float avg_feat_cells;
-       double gain;
-       float8 selectivity;
-
-
-       /*
-        * Search box completely miss histogram extent
-        */
-       if ( box->xmax < geogstats->xmin || box->xmin > geogstats->xmax ||
-               box->ymax < geogstats->ymin || box->ymin > geogstats->ymax ||
-               box->zmax < geogstats->zmin || box->zmin > geogstats->zmax)
-       {
-               POSTGIS_DEBUG(3, " search_box does not overlaps histogram, returning 0");
-
-               return 0.0;
-       }
-
-       /*
-        * Search box completely contains histogram extent
-        */
-       if ( box->xmax >= geogstats->xmax && box->xmin <= geogstats->xmin &&
-               box->ymax >= geogstats->ymax && box->ymin <= geogstats->ymin &&
-               box->zmax >= geogstats->zmax && box->zmin <= geogstats->zmin)
-       {
-               POSTGIS_DEBUG(3, " search_box contains histogram, returning 1");
-
-               return 1.0;
-       }
-
-       sizex = geogstats->xmax - geogstats->xmin;
-       sizey = geogstats->ymax - geogstats->ymin;
-       sizez = geogstats->zmax - geogstats->zmin;
-
-       unitsx = geogstats->unitsx;
-       unitsy = geogstats->unitsy;
-       unitsz = geogstats->unitsz;
-
-       POSTGIS_DEBUGF(3, " histogram has %d unitsx, %d unitsy, %d unitsz", unitsx, unitsy, unitsz);
-       POSTGIS_DEBUGF(3, " histogram geosize is %fx%fx%f", sizex, sizey, sizez);
-
-       /* Work out the coverage depending upon the number of dimensions */
-       switch ((int)geogstats->dims)
-       {
-       case 0:
-       case 1:
-               /* Non-existent or multiple single points */
-               cell_coverage = 1;
-               break;
-
-       case 2:
-               /* Work in areas for 2 dimensions: we have to work slightly
-                  harder here to work out which 2 dimensions are valid */
-
-               if (sizez == 0)
-                       /* X and Y */
-                       cell_coverage = (sizex * sizey) / (unitsx * unitsy);
-               else if (sizey == 0)
-                       /* X and Z */
-                       cell_coverage = (sizex * sizez) / (unitsx * unitsz);
-               else if (sizex == 0)
-                       /* Y and Z */
-                       cell_coverage = (sizey * sizez) / (unitsy * unitsz);
-               break;
-
-       case 3:
-               /* Work in volumes for 3 dimensions */
-               cell_coverage = (sizex * sizey * sizey) / (unitsx * unitsy * unitsz);
-               break;
-       }
-
-       value = 0;
-
-       /* Find first overlapping x unit */
-       x_idx_min = (box->xmin-geogstats->xmin) / sizex * unitsx;
-       if (x_idx_min < 0)
-       {
-               POSTGIS_DEBUGF(3, " search_box overlaps %d x units outside (low) of histogram grid", -x_idx_min);
-
-               /* should increment the value somehow */
-               x_idx_min = 0;
-       }
-       if (x_idx_min >= unitsx)
-       {
-               POSTGIS_DEBUGF(3, " search_box overlaps %d x units outside (high) of histogram grid", x_idx_min-unitsx+1);
-
-               /* should increment the value somehow */
-               x_idx_min = unitsx-1;
-       }
-
-       /* Find first overlapping y unit */
-       y_idx_min = (box->ymin-geogstats->ymin) / sizey * unitsy;
-       if (y_idx_min <0)
-       {
-               POSTGIS_DEBUGF(3, " search_box overlaps %d y units outside (low) of histogram grid", -y_idx_min);
-
-               /* should increment the value somehow */
-               y_idx_min = 0;
-       }
-       if (y_idx_min >= unitsy)
-       {
-               POSTGIS_DEBUGF(3, " search_box overlaps %d y units outside (high) of histogram grid", y_idx_min-unitsy+1);
-
-               /* should increment the value somehow */
-               y_idx_min = unitsy-1;
-       }
-
-       /* Find first overlapping z unit */
-       z_idx_min = (box->zmin-geogstats->zmin) / sizez * unitsz;
-       if (z_idx_min <0)
-       {
-               POSTGIS_DEBUGF(3, " search_box overlaps %d z units outside (low) of histogram grid", -z_idx_min);
-
-               /* should increment the value somehow */
-               z_idx_min = 0;
-       }
-       if (z_idx_min >= unitsz)
-       {
-               POSTGIS_DEBUGF(3, " search_box overlaps %d z units outside (high) of histogram grid", z_idx_min-unitsz+1);
-
-               /* should increment the value somehow */
-               z_idx_min = unitsz-1;
-       }
-
-       /* Find last overlapping x unit */
-       x_idx_max = (box->xmax-geogstats->xmin) / sizex * unitsx;
-       if (x_idx_max <0)
-       {
-               /* should increment the value somehow */
-               x_idx_max = 0;
-       }
-       if (x_idx_max >= unitsx )
-       {
-               /* should increment the value somehow */
-               x_idx_max = unitsx-1;
-       }
-
-       /* Find last overlapping y unit */
-       y_idx_max = (box->ymax-geogstats->ymin) / sizey * unitsy;
-       if (y_idx_max <0)
-       {
-               /* should increment the value somehow */
-               y_idx_max = 0;
-       }
-       if (y_idx_max >= unitsy)
-       {
-               /* should increment the value somehow */
-               y_idx_max = unitsy-1;
-       }
-
-       /* Find last overlapping z unit */
-       z_idx_max = (box->zmax-geogstats->zmin) / sizez * unitsz;
-       if (z_idx_max <0)
-       {
-               /* should increment the value somehow */
-               z_idx_max = 0;
-       }
-       if (z_idx_max >= unitsz)
-       {
-               /* should increment the value somehow */
-               z_idx_max = unitsz-1;
-       }
-
-       /*
-        * the {x,y,z}_idx_{min,max}
-        * define the grid squares that the box intersects
-        */
-       for (z=z_idx_min; z<=z_idx_max; z++)
-       {
-               for (y=y_idx_min; y<=y_idx_max; y++)
-               {
-                       for (x=x_idx_min; x<=x_idx_max; x++)
-                       {
-                               double val;
-                               double gain;
-
-                               val = geogstats->value[x + y * unitsx + z * unitsx * unitsy];
-
-                               /*
-                               * Of the cell value we get
-                               * only the overlap fraction.
-                               */
-
-                               intersect_x = Min(box->xmax, geogstats->xmin + (x+1) * sizex / unitsx) - Max(box->xmin, geogstats->xmin + x * sizex / unitsx);
-                               intersect_y = Min(box->ymax, geogstats->ymin + (y+1) * sizey / unitsy) - Max(box->ymin, geogstats->ymin + y * sizey / unitsy);
-                               intersect_z = Min(box->zmax, geogstats->zmin + (z+1) * sizez / unitsz) - Max(box->zmin, geogstats->zmin + z * sizez / unitsz);
-
-
-                               switch ((int)geogstats->dims)
-                               {
-                               case 0:
-                                       AOI = 1;
-                               case 1:
-                                       /* Working in 1 dimension: work out the value we need
-                                          for AOI */
-                                       if (sizex == 0 && sizey == 0)
-                                               AOI = intersect_z;
-                                       else if (sizex == 0 && sizez == 0)
-                                               AOI = intersect_y;
-                                       else if (sizey == 0 && sizez == 0)
-                                               AOI = intersect_x;
-                                       break;
-
-                               case 2:
-                                       /* Working in 2 dimensions: work out which 2 values we
-                                          need for AOI */
-                                       if (sizex == 0)
-                                               AOI = intersect_y * intersect_z;
-                                       else if (sizey == 0)
-                                               AOI = intersect_x * intersect_z;
-                                       else if (sizez == 0)
-                                               AOI = intersect_x * intersect_y;
-                                       break;
-
-                               case 3:
-                                       /* Working in 3 dimensions: use all 3 values */
-                                       AOI = intersect_x * intersect_y * intersect_z;
-                                       break;
-                               }
-
-                               gain = AOI/cell_coverage;
-
-                               POSTGIS_DEBUGF(4, " [%d,%d,%d] cell val %.15f",
-                                              x, y, z, val);
-                               POSTGIS_DEBUGF(4, " [%d,%d,%d] AOI %.15f",
-                                              x, y, z, AOI);
-                               POSTGIS_DEBUGF(4, " [%d,%d,%d] gain %.15f",
-                                              x, y, z, gain);
-
-                               val *= gain;
-
-                               POSTGIS_DEBUGF(4, " [%d,%d,%d] adding %.15f to value",
-                                              x, y, z, val);
-
-                               value += val;
-                       }
-               }
-       }
-
-
-       /*
-        * If the search_box is a point, it will
-        * overlap a single cell and thus get
-        * it's value, which is the fraction of
-        * samples (we can presume of row set also)
-        * which bumped to that cell.
-        *
-        * If the table features are points, each
-        * of them will overlap a single histogram cell.
-        * Our search_box value would then be correctly
-        * computed as the sum of the bumped cells values.
-        *
-        * If both our search_box AND the sample features
-        * overlap more then a single histogram cell we
-        * need to consider the fact that our sum computation
-        * will have many duplicated included. E.g. each
-        * single sample feature would have contributed to
-        * raise the search_box value by as many times as
-        * many cells in the histogram are commonly overlapped
-        * by both searc_box and feature. We should then
-        * divide our value by the number of cells in the virtual
-        * 'intersection' between average feature cell occupation
-        * and occupation of the search_box. This is as
-        * fuzzy as you understand it :)
-        *
-        * Consistency check: whenever the number of cells is
-        * one of whichever part (search_box_occupation,
-        * avg_feature_occupation) the 'intersection' must be 1.
-        * If sounds that our 'intersaction' is actually the
-        * minimun number between search_box_occupation and
-        * avg_feat_occupation.
-        *
-        */
-       overlapping_cells = (x_idx_max-x_idx_min+1) * (y_idx_max-y_idx_min+1) * (z_idx_max-z_idx_min+1);
-       avg_feat_cells = geogstats->avgFeatureCells;
-
-       POSTGIS_DEBUGF(3, " search_box overlaps %f cells", overlapping_cells);
-       POSTGIS_DEBUGF(3, " avg feat overlaps %f cells", avg_feat_cells);
-
-       if ( ! overlapping_cells )
-       {
-               POSTGIS_DEBUG(3, " no overlapping cells, returning 0.0");
-
-               return 0.0;
-       }
-
-       gain = 1 / Min(overlapping_cells, avg_feat_cells);
-       selectivity = value * gain;
-
-       POSTGIS_DEBUGF(3, " SUM(ov_histo_cells)=%f", value);
-       POSTGIS_DEBUGF(3, " gain=%f", gain);
-       POSTGIS_DEBUGF(3, " selectivity=%f", selectivity);
-
-       /* prevent rounding overflows */
-       if (selectivity > 1.0) selectivity = 1.0;
-       else if (selectivity < 0) selectivity = 0.0;
-
-       return selectivity;
-}
-
-
-/**
- * This function should return an estimation of the number of
- * rows returned by a query involving an overlap check
- * ( it's the restrict function for the && operator )
- *
- * It can make use (if available) of the statistics collected
- * by the geometry analyzer function.
- *
- * Note that the good work is done by estimate_selectivity() above.
- * This function just tries to find the search_box, loads the statistics
- * and invoke the work-horse.
- *
- */
-PG_FUNCTION_INFO_V1(geography_gist_selectivity);
-Datum geography_gist_selectivity(PG_FUNCTION_ARGS)
-{
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-
-       /* Oid operator = PG_GETARG_OID(1); */
-       List *args = (List *) PG_GETARG_POINTER(2);
-       /* int varRelid = PG_GETARG_INT32(3); */
-       Oid relid;
-       HeapTuple stats_tuple;
-       GEOG_STATS *geogstats;
-       /*
-        * This is to avoid casting the corresponding
-        * "type-punned" pointer, which would break
-        * "strict-aliasing rules".
-        */
-       GEOG_STATS **gsptr=&geogstats;
-       int geogstats_nvalues = 0;
-       Node *other;
-       Var *self;
-       GBOX search_box;
-       float8 selectivity = 0;
-
-       POSTGIS_DEBUG(2, "geography_gist_selectivity called");
-
-       /* Fail if not a binary opclause (probably shouldn't happen) */
-       if (list_length(args) != 2)
-       {
-               POSTGIS_DEBUG(3, "geography_gist_selectivity: not a binary opclause");
-
-               PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
-       }
-
-       /*
-        * This selectivity function is invoked by a clause of the form <arg> && <arg>
-        *
-        * In typical usage, one argument will be a column reference, while the other will
-        * be a geography constant; set self to point to the column argument and other
-        * to point to the constant argument.
-        */
-       other = (Node *) linitial(args);
-       if ( ! IsA(other, Const) )
-       {
-               self = (Var *)other;
-               other = (Node *) lsecond(args);
-       }
-       else
-       {
-               self = (Var *) lsecond(args);
-       }
-
-       if ( ! IsA(other, Const) )
-       {
-               POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity");
-
-               PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
-       }
-
-       /*
-       * We don't have a nice <const> && <var> or <var> && <const> 
-       * situation here. <const> && <const> would probably get evaluated
-       * away by PgSQL earlier on. <func> && <const> is harder, and the
-       * case we get often is <const> && ST_Expand(<var>), which does 
-       * actually have a subtly different selectivity than a bae
-       * <const> && <var> call. It's calculatable though, by expanding
-       * every cell in the histgram appropriately.
-       * 
-       * Discussion: http://trac.osgeo.org/postgis/ticket/1828
-       *
-       * To do? Do variable selectivity based on the <func> node.
-       */
-       if ( ! IsA(self, Var) )
-       {
-               POSTGIS_DEBUG(3, " no bare variable argument ? - returning a moderate selectivity");
-//             PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
-               PG_RETURN_FLOAT8(0.33333);
-       }
-       
-       /* Convert coordinates to 3D geodesic */
-       FLAGS_SET_GEODETIC(search_box.flags, 1);
-       if ( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
-       {
-               POSTGIS_DEBUG(3, " search box cannot be calculated");
-               PG_RETURN_FLOAT8(0.0);
-       }
-
-       POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g %.15g, %.15g %.15g %.15g",
-                      search_box.xmin, search_box.ymin, search_box.zmin,
-                      search_box.xmax, search_box.ymax, search_box.zmax);
-
-       /*
-        * Get pg_statistic row
-        */
-       relid = getrelid(self->varno, root->parse->rtable);
-
-       stats_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno));
-       if ( ! stats_tuple )
-       {
-               POSTGIS_DEBUG(3, " No statistics, returning default estimate");
-
-               PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
-       }
-
-
-       if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL,
-#if POSTGIS_PGSQL_VERSION >= 85
-                               NULL,
-#endif
-                               (float4 **)gsptr, &geogstats_nvalues) )
-       {
-               POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geography selectivity");
-
-               ReleaseSysCache(stats_tuple);
-               PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
-       }
-
-       POSTGIS_DEBUGF(4, " %d read from stats", geogstats_nvalues);
-
-       POSTGIS_DEBUGF(4, " histo: xmin,ymin,zmin: %f,%f,%f", geogstats->xmin, geogstats->ymin, geogstats->zmin);
-       POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f,%f", geogstats->xmax, geogstats->ymax, geogstats->zmax);
-       POSTGIS_DEBUGF(4, " histo: unitsx: %f", geogstats->unitsx);
-       POSTGIS_DEBUGF(4, " histo: unitsy: %f", geogstats->unitsy);
-       POSTGIS_DEBUGF(4, " histo: unitsz: %f", geogstats->unitsz);
-       POSTGIS_DEBUGF(4, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage);
-       POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells);
-
-       /*
-        * Do the estimation
-        */
-       selectivity = estimate_selectivity(&search_box, geogstats);
-
-       POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);
-
-       free_attstatsslot(0, NULL, 0, (float *)geogstats, geogstats_nvalues);
-       ReleaseSysCache(stats_tuple);
-       PG_RETURN_FLOAT8(selectivity);
-}
-
-
-/**
-* JOIN selectivity in the GiST && operator
-* for all PG versions
-*/
-PG_FUNCTION_INFO_V1(geography_gist_join_selectivity);
-Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS)
-{
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-
-       /* Oid operator = PG_GETARG_OID(1); */
-       List *args = (List *) PG_GETARG_POINTER(2);
-       JoinType jointype = (JoinType) PG_GETARG_INT16(3);
-
-       Node *arg1, *arg2;
-       Var *var1, *var2;
-       Oid relid1, relid2;
-
-       HeapTuple stats1_tuple, stats2_tuple;
-       GEOG_STATS *geogstats1, *geogstats2;
-       /*
-       * These are to avoid casting the corresponding
-       * "type-punned" pointers, which would break
-       * "strict-aliasing rules".
-       */
-       GEOG_STATS **gs1ptr=&geogstats1, **gs2ptr=&geogstats2;
-       int geogstats1_nvalues = 0, geogstats2_nvalues = 0;
-       float8 selectivity1 = 0.0, selectivity2 = 0.0;
-       float4 num1_tuples = 0.0, num2_tuples = 0.0;
-       float4 total_tuples = 0.0, rows_returned = 0.0;
-       GBOX search_box;
-
-
-       /**
-       * Join selectivity algorithm. To calculation the selectivity we
-       * calculate the intersection of the two column sample extents,
-       * sum the results, and then multiply by two since for each
-       * geometry in col 1 that intersects a geometry in col 2, the same
-       * will also be true.
-       */
-
-       POSTGIS_DEBUGF(3, "geography_gist_join_selectivity called with jointype %d", jointype);
-
-       /*
-       * We'll only respond to an inner join/unknown context join
-       */
-       if (jointype != JOIN_INNER)
-       {
-               elog(NOTICE, "geography_gist_join_selectivity called with incorrect join type");
-               PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
-       }
-
-       /*
-       * Determine the oids of the geometry columns we are working with
-       */
-       arg1 = (Node *) linitial(args);
-       arg2 = (Node *) lsecond(args);
-
-       if (!IsA(arg1, Var) || !IsA(arg2, Var))
-       {
-               elog(DEBUG1, "geography_gist_join_selectivity called with arguments that are not column references");
-               PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
-       }
-
-       var1 = (Var *)arg1;
-       var2 = (Var *)arg2;
-
-       relid1 = getrelid(var1->varno, root->parse->rtable);
-       relid2 = getrelid(var2->varno, root->parse->rtable);
-
-       POSTGIS_DEBUGF(3, "Working with relations oids: %d %d", relid1, relid2);
-
-       /* Read the stats tuple from the first column */
-       stats1_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno));
-       if ( ! stats1_tuple )
-       {
-               POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity");
-
-               PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
-       }
-
-       if ( ! get_attstatsslot(stats1_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL,
-#if POSTGIS_PGSQL_VERSION >= 85
-                               NULL,
-#endif
-                               (float4 **)gs1ptr, &geogstats1_nvalues) )
-       {
-               POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity");
-
-               ReleaseSysCache(stats1_tuple);
-               PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
-       }
-
-
-       /* Read the stats tuple from the second column */
-       stats2_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno));
-       if ( ! stats2_tuple )
-       {
-               POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity");
-
-               free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues);
-               ReleaseSysCache(stats1_tuple);
-               PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
-       }
-
-       if ( ! get_attstatsslot(stats2_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL,
-#if POSTGIS_PGSQL_VERSION >= 85
-                               NULL,
-#endif
-                               (float4 **)gs2ptr, &geogstats2_nvalues) )
-       {
-               POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity");
-
-               free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues);
-               ReleaseSysCache(stats2_tuple);
-               ReleaseSysCache(stats1_tuple);
-               PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
-       }
-
-
-       /**
-       * Setup the search box - this is the intersection of the two column
-       * extents.
-       */
-       search_box.xmin = Max(geogstats1->xmin, geogstats2->xmin);
-       search_box.ymin = Max(geogstats1->ymin, geogstats2->ymin);
-       search_box.zmin = Max(geogstats1->zmin, geogstats2->zmin);
-       search_box.xmax = Min(geogstats1->xmax, geogstats2->xmax);
-       search_box.ymax = Min(geogstats1->ymax, geogstats2->ymax);
-       search_box.zmax = Min(geogstats1->zmax, geogstats2->zmax);
-
-       /* If the extents of the two columns don't intersect, return zero */
-       if (search_box.xmin > search_box.xmax || search_box.ymin > search_box.ymax ||
-               search_box.zmin > search_box.zmax)
-               PG_RETURN_FLOAT8(0.0);
-
-       POSTGIS_DEBUGF(3, " -- geomstats1 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats1->xmin, geogstats1->ymin, geogstats1->zmin, geogstats1->xmax, geogstats1->ymax, geogstats1->zmax);
-       POSTGIS_DEBUGF(3, " -- geomstats2 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats2->xmin, geogstats2->ymin, geogstats2->zmin, geogstats2->xmax, geogstats2->ymax, geogstats2->zmax);
-       POSTGIS_DEBUGF(3, " -- calculated intersection box is : %.15g %.15g %.15g, %.15g %.15g %.15g", search_box.xmin, search_box.ymin, search_box.zmin, search_box.xmax, search_box.ymax, search_box.zmax);
-
-
-       /* Do the selectivity */
-       selectivity1 = estimate_selectivity(&search_box, geogstats1);
-       selectivity2 = estimate_selectivity(&search_box, geogstats2);
-
-       POSTGIS_DEBUGF(3, "selectivity1: %.15g   selectivity2: %.15g", selectivity1, selectivity2);
-
-       /*
-       * OK, so before we calculate the join selectivity we also need to
-       * know the number of tuples in each of the columns since
-       * estimate_selectivity returns the number of estimated tuples
-       * divided by the total number of tuples.
-       */
-       num1_tuples = geogstats1->totalrows;
-       num2_tuples = geogstats2->totalrows;
-
-       /* Free the statistic tuples */
-       free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues);
-       ReleaseSysCache(stats1_tuple);
-
-       free_attstatsslot(0, NULL, 0, (float *)geogstats2, geogstats2_nvalues);
-       ReleaseSysCache(stats2_tuple);
-
-       /*
-       * Finally calculate the estimate of the number of rows returned
-       *
-       *    = 2 * (nrows from col1 + nrows from col2) /
-       *       total nrows in col1 x total nrows in col2
-       *
-       * The factor of 2 accounts for the fact that for each tuple in
-       * col 1 matching col 2,
-       * there will be another match in col 2 matching col 1
-       */
-       total_tuples = num1_tuples * num2_tuples;
-       rows_returned = 2 * ((num1_tuples * selectivity1) + (num2_tuples * selectivity2));
-
-       POSTGIS_DEBUGF(3, "Rows from rel1: %f", num1_tuples * selectivity1);
-       POSTGIS_DEBUGF(3, "Rows from rel2: %f", num2_tuples * selectivity2);
-       POSTGIS_DEBUGF(3, "Estimated rows returned: %f", rows_returned);
-
-       /*
-       * One (or both) tuple count is zero...
-       * We return default selectivity estimate.
-       * We could probably attempt at an estimate
-       * w/out looking at tables tuple count, with
-       * a function of selectivity1, selectivity2.
-       */
-       if ( ! total_tuples )
-       {
-               POSTGIS_DEBUG(3, "Total tuples == 0, returning default join selectivity");
-
-               PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
-       }
-
-       if ( rows_returned > total_tuples )
-               PG_RETURN_FLOAT8(1.0);
-
-       PG_RETURN_FLOAT8(rows_returned / total_tuples);
-}
-
-
-/**
- * This function is called by the analyze function iff
- * the geography_analyze() function give it its pointer
- * (this is always the case so far).
- * The geography_analyze() function is also responsible
- * of deciding the number of "sample" rows we will receive
- * here. It is able to give use other 'custom' data, but we
- * won't use them so far.
- *
- * Our job is to build some statistics on the sample data
- * for use by operator estimators.
- *
- * Currently we only need statistics to estimate the number of rows
- * overlapping a given extent (estimation function bound
- * to the && operator).
- *
- */
-
-static void
-compute_geography_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
-                        int samplerows, double totalrows)
-{
-       MemoryContext old_context;
-       GEOG_STATS *geogstats;
-
-       GBOX gbox;
-
-       GBOX *sample_extent = NULL;
-       GBOX **sampleboxes;
-       GBOX histobox;
-       int histocells;
-       double sizex, sizey, sizez, edgelength;
-       int unitsx = 0, unitsy = 0, unitsz = 0;
-       int geog_stats_size;
-       struct dimensions histodims[3];
-       int ndims;
-
-       double total_width = 0;
-       int notnull_cnt = 0, examinedsamples = 0, total_count_cells=0, total_cells_coverage = 0;
-
-       /* for standard deviation */
-       double avgLOWx, avgLOWy, avgLOWz, avgHIGx, avgHIGy, avgHIGz;
-       double sumLOWx = 0, sumLOWy = 0, sumLOWz = 0, sumHIGx = 0, sumHIGy = 0, sumHIGz = 0;
-       double sdLOWx = 0, sdLOWy = 0, sdLOWz = 0, sdHIGx = 0, sdHIGy = 0, sdHIGz = 0;
-       GBOX *newhistobox = NULL;
-
-       bool isnull;
-       int i;
-
-       POSTGIS_DEBUG(2, "compute_geography_stats called");
-
-       /*
-        * We'll build an histogram having from 40 to 400 boxesPerSide
-        * Total number of cells is determined by attribute stat
-        * target. It can go from  1600 to 160000 (stat target: 10,1000)
-        */
-       histocells = 160 * stats->attr->attstattarget;
-
-       /*
-        * Memory to store the bounding boxes from all of the sampled rows
-        */
-       sampleboxes = palloc(sizeof(GBOX *) * samplerows);
-
-       /* Mark the GBOX as being geodetic */
-       FLAGS_SET_GEODETIC(gbox.flags, 1);
-
-       /*
-        * First scan:
-        *  o find extent of the sample rows
-        *  o count null-infinite/not-null values
-        *  o compute total_width
-        *  o compute total features's box area (for avgFeatureArea)
-        *  o sum features box coordinates (for standard deviation)
-        */
-       for (i = 0; i < samplerows; i++)
-       {
-               Datum datum;
-               GSERIALIZED *serialized;
-               LWGEOM *geometry;
-               
-               /* Fetch the datum and cast it into a geography */
-               datum = fetchfunc(stats, i, &isnull);
-
-               /* Skip nulls */
-               if (isnull)
-                       continue;
-
-               serialized = (GSERIALIZED*)PG_DETOAST_DATUM(datum);
-               geometry = lwgeom_from_gserialized(serialized);
-               /* Convert coordinates to 3D geodesic */
-               if ( ! lwgeom_calculate_gbox_geodetic(geometry, &gbox) )
-               {
-                       /* Unable to obtain or calculate a bounding box */
-                       POSTGIS_DEBUGF(3, "skipping geometry at position %d", i);
-
-                       continue;
-               }
-
-               /*
-                * Skip infinite geoms
-                */
-               if ( ! finite(gbox.xmin) || ! finite(gbox.xmax) ||
-                       ! finite(gbox.ymin) || ! finite(gbox.ymax) ||
-                       ! finite(gbox.zmin) || ! finite(gbox.zmax) )
-               {
-                       POSTGIS_DEBUGF(3, " skipped infinite geometry at position %d", i);
-
-                       continue;
-               }
-
-               /*
-                * Store bounding box in array
-                */
-               sampleboxes[notnull_cnt] = palloc(sizeof(GBOX));
-               memcpy(sampleboxes[notnull_cnt], &gbox, sizeof(GBOX));
-
-               /*
-                * Add to sample extent union
-                */
-               if ( ! sample_extent )
-               {
-                       sample_extent = palloc(sizeof(GBOX));
-                       memcpy(sample_extent, &gbox, sizeof(GBOX));
-               }
-               else
-               {
-                       sample_extent->xmax = Max(sample_extent->xmax, gbox.xmax);
-                       sample_extent->ymax = Max(sample_extent->ymax, gbox.ymax);
-                       sample_extent->zmax = Max(sample_extent->zmax, gbox.zmax);
-                       sample_extent->xmin = Min(sample_extent->xmin, gbox.xmin);
-                       sample_extent->ymin = Min(sample_extent->ymin, gbox.ymin);
-                       sample_extent->zmin = Min(sample_extent->zmin, gbox.zmin);
-               }
-
-               /** TODO: ask if we need geom or bvol size for stawidth */
-               total_width += VARSIZE(serialized);
-
-               /*
-                * Add bvol coordinates to sum for standard deviation
-                * computation.
-                */
-               sumLOWx += gbox.xmin;
-               sumLOWy += gbox.ymin;
-               sumLOWz += gbox.zmin;
-               sumHIGx += gbox.xmax;
-               sumHIGy += gbox.ymax;
-               sumHIGz += gbox.zmax;
-
-               notnull_cnt++;
-
-               /* give backend a chance of interrupting us */
-               vacuum_delay_point();
-       }
-
-       POSTGIS_DEBUG(3, "End of 1st scan:");
-       POSTGIS_DEBUGF(3, " Sample extent (min, max): (%g %g %g), (%g %g %g)", sample_extent->xmin, sample_extent->ymin,
-                      sample_extent->zmin, sample_extent->xmax, sample_extent->ymax, sample_extent->zmax);
-       POSTGIS_DEBUGF(3, " No. of geometries sampled: %d", samplerows);
-       POSTGIS_DEBUGF(3, " No. of non-null geometries sampled: %d", notnull_cnt);
-
-       if ( ! notnull_cnt )
-       {
-               elog(NOTICE, " no notnull values, invalid stats");
-               stats->stats_valid = false;
-               return;
-       }
-
-       POSTGIS_DEBUG(3, "Standard deviation filter enabled");
-
-       /*
-        * Second scan:
-        *  o compute standard deviation
-        */
-       avgLOWx = sumLOWx / notnull_cnt;
-       avgLOWy = sumLOWy / notnull_cnt;
-       avgLOWz = sumLOWz / notnull_cnt;
-       avgHIGx = sumHIGx / notnull_cnt;
-       avgHIGy = sumHIGy / notnull_cnt;
-       avgHIGz = sumHIGz / notnull_cnt;
-
-       for (i = 0; i < notnull_cnt; i++)
-       {
-               GBOX *box;
-               box = (GBOX *)sampleboxes[i];
-
-               sdLOWx += (box->xmin - avgLOWx) * (box->xmin - avgLOWx);
-               sdLOWy += (box->ymin - avgLOWy) * (box->ymin - avgLOWy);
-               sdLOWz += (box->zmin - avgLOWz) * (box->zmin - avgLOWz);
-               sdHIGx += (box->xmax - avgHIGx) * (box->xmax - avgHIGx);
-               sdHIGy += (box->ymax - avgHIGy) * (box->ymax - avgHIGy);
-               sdHIGz += (box->zmax - avgHIGz) * (box->zmax - avgHIGz);
-       }
-       sdLOWx = sqrt(sdLOWx / notnull_cnt);
-       sdLOWy = sqrt(sdLOWy / notnull_cnt);
-       sdLOWz = sqrt(sdLOWz / notnull_cnt);
-       sdHIGx = sqrt(sdHIGx / notnull_cnt);
-       sdHIGy = sqrt(sdHIGy / notnull_cnt);
-       sdHIGz = sqrt(sdHIGz / notnull_cnt);
-
-       POSTGIS_DEBUG(3, " standard deviations:");
-       POSTGIS_DEBUGF(3, "  LOWx - avg:%f sd:%f", avgLOWx, sdLOWx);
-       POSTGIS_DEBUGF(3, "  LOWy - avg:%f sd:%f", avgLOWy, sdLOWy);
-       POSTGIS_DEBUGF(3, "  LOWz - avg:%f sd:%f", avgLOWz, sdLOWz);
-       POSTGIS_DEBUGF(3, "  HIGx - avg:%f sd:%f", avgHIGx, sdHIGx);
-       POSTGIS_DEBUGF(3, "  HIGy - avg:%f sd:%f", avgHIGy, sdHIGy);
-       POSTGIS_DEBUGF(3, "  HIGz - avg:%f sd:%f", avgHIGz, sdHIGz);
-
-       histobox.xmin = Max((avgLOWx - SDFACTOR * sdLOWx), sample_extent->xmin);
-       histobox.ymin = Max((avgLOWy - SDFACTOR * sdLOWy), sample_extent->ymin);
-       histobox.zmin = Max((avgLOWz - SDFACTOR * sdLOWz), sample_extent->zmin);
-       histobox.xmax = Min((avgHIGx + SDFACTOR * sdHIGx), sample_extent->xmax);
-       histobox.ymax = Min((avgHIGy + SDFACTOR * sdHIGy), sample_extent->ymax);
-       histobox.zmax = Min((avgHIGz + SDFACTOR * sdHIGz), sample_extent->zmax);
-
-       POSTGIS_DEBUGF(3, " sd_extent: xmin, ymin, zmin: %f, %f, %f",
-                      histobox.xmin, histobox.ymin, histobox.zmin);
-       POSTGIS_DEBUGF(3, " sd_extent: xmax, ymax, zmax: %f, %f, %f",
-                      histobox.xmax, histobox.ymax, histobox.zmax);
-
-       /*
-        * Third scan:
-        *   o skip hard deviants
-        *   o compute new histogram box
-        */
-       for (i = 0; i < notnull_cnt; i++)
-       {
-               GBOX *box;
-               box = (GBOX *)sampleboxes[i];
-
-               if ( box->xmin > histobox.xmax || box->xmax < histobox.xmin ||
-                       box->ymin > histobox.ymax || box->ymax < histobox.ymin ||
-                       box->zmin > histobox.zmax || box->zmax < histobox.zmin)
-               {
-                       POSTGIS_DEBUGF(4, " feat %d is an hard deviant, skipped", i);
-
-                       sampleboxes[i] = NULL;
-                       continue;
-               }
-
-               if ( ! newhistobox )
-               {
-                       newhistobox = palloc(sizeof(GBOX));
-                       memcpy(newhistobox, box, sizeof(GBOX));
-               }
-               else
-               {
-                       if ( box->xmin < newhistobox->xmin )
-                               newhistobox->xmin = box->xmin;
-                       if ( box->ymin < newhistobox->ymin )
-                               newhistobox->ymin = box->ymin;
-                       if ( box->zmin < newhistobox->zmin )
-                               newhistobox->zmin = box->zmin;
-                       if ( box->xmax > newhistobox->xmax )
-                               newhistobox->xmax = box->xmax;
-                       if ( box->ymax > newhistobox->ymax )
-                               newhistobox->ymax = box->ymax;
-                       if ( box->zmax > newhistobox->zmax )
-                               newhistobox->zmax = box->zmax;
-               }
-       }
-
-       /* If everything was a deviant, the new histobox is the same as the old histobox */
-       if ( ! newhistobox )
-       {
-               newhistobox = palloc(sizeof(GBOX));
-               memcpy(newhistobox, &histobox, sizeof(GBOX));
-       }
-
-       /*
-        * Set histogram extent as the intersection between
-        * standard deviation based histogram extent
-        * and computed sample extent after removal of
-        * hard deviants (there might be no hard deviants).
-        */
-       if ( histobox.xmin < newhistobox->xmin )
-               histobox.xmin = newhistobox->xmin;
-       if ( histobox.ymin < newhistobox->ymin )
-               histobox.ymin = newhistobox->ymin;
-       if ( histobox.zmin < newhistobox->zmin )
-               histobox.zmin = newhistobox->zmin;
-       if ( histobox.xmax > newhistobox->xmax )
-               histobox.xmax = newhistobox->xmax;
-       if ( histobox.ymax > newhistobox->ymax )
-               histobox.ymax = newhistobox->ymax;
-       if ( histobox.zmax > newhistobox->zmax )
-               histobox.zmax = newhistobox->zmax;
-
-       POSTGIS_DEBUGF(3, " histogram_extent: xmin, ymin, zmin: %f, %f, %f",
-                      histobox.xmin, histobox.ymin, histobox.zmin);
-       POSTGIS_DEBUGF(3, " histogram_extent: xmax, ymax, zmax: %f, %f, %f",
-                      histobox.xmax, histobox.ymax, histobox.zmax);
-
-       /* Calculate the size of each dimension */
-       sizex = histobox.xmax - histobox.xmin;
-       sizey = histobox.ymax - histobox.ymin;
-       sizez = histobox.zmax - histobox.zmin;
-
-       /* In order to calculate a suitable aspect ratio for the histogram, we need
-          to work out how many dimensions exist within our sample data (which we
-          assume is representative of the whole data) */
-       ndims = 0;
-       if (sizex != 0)
-       {
-               histodims[ndims].axis = 'X';
-               histodims[ndims].min = histobox.xmin;
-               histodims[ndims].max = histobox.xmax;
-               ndims++;
-       }
-
-       if (sizey != 0)
-       {
-               histodims[ndims].axis = 'Y';
-               histodims[ndims].min = histobox.ymin;
-               histodims[ndims].max = histobox.ymax;
-
-               ndims++;
-       }
-
-       if (sizez != 0)
-       {
-               histodims[ndims].axis = 'Z';
-               histodims[ndims].min = histobox.zmin;
-               histodims[ndims].max = histobox.zmax;
-
-               ndims++;
-       }
-
-       /* Based upon the number of dimensions, we now work out the number of units in each dimension.
-          The number of units is defined as the number of cell blocks in each dimension which make
-          up the total number of histocells; i.e. unitsx * unitsy * unitsz = histocells */
-
-       /* Note: geodetic data is currently indexed in 3 dimensions; however code for remaining dimensions
-          is also included to allow for indexing 3D cartesian data at a later date */
-
-       POSTGIS_DEBUGF(3, "Number of dimensions in sample set: %d", ndims);
-
-       switch (ndims)
-       {
-       case 0:
-               /* An empty column, or multiple points in exactly the same
-                  position in space */
-               unitsx = 1;
-               unitsy = 1;
-               unitsz = 1;
-               histocells = 1;
-               break;
-
-       case 1:
-               /* Sample data all lies on a single line, so set the correct
-                  units variables depending upon which axis is in use */
-               for (i = 0; i < ndims; i++)
-               {
-                       if ( (histodims[i].max - histodims[i].min) != 0)
-                       {
-                               /* We've found the non-zero dimension, so set the
-                                  units variables accordingly */
-                               switch (histodims[i].axis)
-                               {
-                               case 'X':
-                                       unitsx = histocells;
-                                       unitsy = 1;
-                                       unitsz = 1;
-                                       break;
-
-                               case 'Y':
-                                       unitsx = 1;
-                                       unitsy = histocells;
-                                       unitsz = 1;
-                                       break;
-
-                               case 'Z':
-                                       unitsx = 1;
-                                       unitsy = 1;
-                                       unitsz = histocells;
-                                       break;
-                               }
-                       }
-               }
-               break;
-
-       case 2:
-               /* Sample data lies within 2D space: divide the total area by the total
-                  number of cells, and thus work out the edge size of the unit block */
-               edgelength = sqrt(
-                                Abs(histodims[0].max - histodims[0].min) *
-                                Abs(histodims[1].max - histodims[1].min) / (double)histocells
-                            );
-
-               /* The calculation is easy; the harder part is to work out which dimensions
-                  we actually have to set the units variables appropriately */
-               if (histodims[0].axis == 'X' && histodims[1].axis == 'Y')
-               {
-                       /* X and Y */
-                       unitsx = Abs(histodims[0].max - histodims[0].min) / edgelength;
-                       unitsy = Abs(histodims[1].max - histodims[1].min) / edgelength;
-                       unitsz = 1;
-               }
-               else if (histodims[0].axis == 'Y' && histodims[1].axis == 'X')
-               {
-                       /* Y and X */
-                       unitsx = Abs(histodims[1].max - histodims[1].min) / edgelength;
-                       unitsy = Abs(histodims[0].max - histodims[0].min) / edgelength;
-                       unitsz = 1;
-               }
-               else if (histodims[0].axis == 'X' && histodims[1].axis == 'Z')
-               {
-                       /* X and Z */
-                       unitsx = Abs(histodims[0].max - histodims[0].min) / edgelength;
-                       unitsy = 1;
-                       unitsz = Abs(histodims[1].max - histodims[1].min) / edgelength;
-               }
-               else if (histodims[0].axis == 'Z' && histodims[1].axis == 'X')
-               {
-                       /* Z and X */
-                       unitsx = Abs(histodims[1].max - histodims[1].min) / edgelength;
-                       unitsy = 1;
-                       unitsz = Abs(histodims[0].max - histodims[0].min) / edgelength;
-               }
-               else if (histodims[0].axis == 'Y' && histodims[1].axis == 'Z')
-               {
-                       /* Y and Z */
-                       unitsx = 1;
-                       unitsy = Abs(histodims[0].max - histodims[0].min) / edgelength;
-                       unitsz = Abs(histodims[1].max - histodims[1].min) / edgelength;
-               }
-               else if (histodims[0].axis == 'Z' && histodims[1].axis == 'Y')
-               {
-                       /* Z and Y */
-                       unitsx = 1;
-                       unitsy = Abs(histodims[1].max - histodims[1].min) / edgelength;
-                       unitsz = Abs(histodims[0].max - histodims[0].min) / edgelength;
-               }
-
-               break;
-
-       case 3:
-               /* Sample data lies within 3D space: divide the total volume by the total
-                  number of cells, and thus work out the edge size of the unit block */
-               edgelength = pow(
-                                Abs(histodims[0].max - histodims[0].min) *
-                                Abs(histodims[1].max - histodims[1].min) *
-                                Abs(histodims[2].max - histodims[2].min) / (double)histocells,
-                                (double)1/3);
-
-               /* Units are simple in 3 dimensions */
-               unitsx = Abs(histodims[0].max - histodims[0].min) / edgelength;
-               unitsy = Abs(histodims[1].max - histodims[1].min) / edgelength;
-               unitsz = Abs(histodims[2].max - histodims[2].min) / edgelength;
-
-               break;
-       }
-
-       POSTGIS_DEBUGF(3, " computed histogram grid size (X,Y,Z): %d x %d x %d (%d out of %d cells)", unitsx, unitsy, unitsz, unitsx * unitsy * unitsz, histocells);
-
-       /*
-        * Create the histogram (GEOG_STATS)
-        */
-       old_context = MemoryContextSwitchTo(stats->anl_context);
-       geog_stats_size = sizeof(GEOG_STATS) + (histocells - 1) * sizeof(float4);
-       geogstats = palloc(geog_stats_size);
-       MemoryContextSwitchTo(old_context);
-
-       geogstats->dims = ndims;
-       geogstats->xmin = histobox.xmin;
-       geogstats->ymin = histobox.ymin;
-       geogstats->zmin = histobox.zmin;
-       geogstats->xmax = histobox.xmax;
-       geogstats->ymax = histobox.ymax;
-       geogstats->zmax = histobox.zmax;
-       geogstats->unitsx = unitsx;
-       geogstats->unitsy = unitsy;
-       geogstats->unitsz = unitsz;
-       geogstats->totalrows = totalrows;
-
-       /* Initialize all values to 0 */
-       for (i = 0; i < histocells; i++)
-               geogstats->value[i] = 0;
-
-
-       /*
-        * Fourth scan:
-        *  o fill histogram values with the number of
-        *    features' bbox overlaps: a feature's bvol
-        *    can fully overlap (1) or partially overlap
-        *    (fraction of 1) an histogram cell.
-        *
-        *  o compute total cells occupation
-        *
-        */
-
-       POSTGIS_DEBUG(3, "Beginning histogram intersection calculations");
-
-       for (i = 0; i < notnull_cnt; i++)
-       {
-               GBOX *box;
-
-               /* Note these array index variables are zero-based */
-               int x_idx_min, x_idx_max, x;
-               int y_idx_min, y_idx_max, y;
-               int z_idx_min, z_idx_max, z;
-               int numcells = 0;
-
-               box = (GBOX *)sampleboxes[i];
-               if ( ! box ) continue; /* hard deviant.. */
-
-               /* give backend a chance of interrupting us */
-               vacuum_delay_point();
-
-               POSTGIS_DEBUGF(4, " feat %d box is %f %f %f, %f %f %f",
-                              i, box->xmax, box->ymax, box->zmax, box->xmin, box->ymin, box->zmin);
-
-               /* Find first overlapping unitsx cell */
-               x_idx_min = (box->xmin - geogstats->xmin) / sizex * unitsx;
-               if (x_idx_min <0) x_idx_min = 0;
-               if (x_idx_min >= unitsx) x_idx_min = unitsx - 1;
-
-               /* Find first overlapping unitsy cell */
-               y_idx_min = (box->ymin - geogstats->ymin) / sizey * unitsy;
-               if (y_idx_min <0) y_idx_min = 0;
-               if (y_idx_min >= unitsy) y_idx_min = unitsy - 1;
-
-               /* Find first overlapping unitsz cell */
-               z_idx_min = (box->zmin - geogstats->zmin) / sizez * unitsz;
-               if (z_idx_min <0) z_idx_min = 0;
-               if (z_idx_min >= unitsz) z_idx_min = unitsz - 1;
-
-               /* Find last overlapping unitsx cell */
-               x_idx_max = (box->xmax - geogstats->xmin) / sizex * unitsx;
-               if (x_idx_max <0) x_idx_max = 0;
-               if (x_idx_max >= unitsx ) x_idx_max = unitsx - 1;
-
-               /* Find last overlapping unitsy cell */
-               y_idx_max = (box->ymax - geogstats->ymin) / sizey * unitsy;
-               if (y_idx_max <0) y_idx_max = 0;
-               if (y_idx_max >= unitsy) y_idx_max = unitsy - 1;
-
-               /* Find last overlapping unitsz cell */
-               z_idx_max = (box->zmax - geogstats->zmin) / sizez * unitsz;
-               if (z_idx_max <0) z_idx_max = 0;
-               if (z_idx_max >= unitsz) z_idx_max = unitsz - 1;
-
-               POSTGIS_DEBUGF(4, " feat %d overlaps unitsx %d-%d, unitsy %d-%d, unitsz %d-%d",
-                              i, x_idx_min, x_idx_max, y_idx_min, y_idx_max, z_idx_min, z_idx_max);
-
-               /* Calculate the feature coverage - this of course depends upon the number of dims */
-               switch (ndims)
-               {
-               case 1:
-                       total_cells_coverage++;
-                       break;
-
-               case 2:
-                       total_cells_coverage += (box->xmax - box->xmin) * (box->ymax - box->ymin);
-                       break;
-
-               case 3:
-                       total_cells_coverage += (box->xmax - box->xmin) * (box->ymax - box->ymin) *
-                                               (box->zmax - box->zmin);
-                       break;
-               }
-
-               /*
-                * the {x,y,z}_idx_{min,max}
-                * define the grid squares that the box intersects
-                */
-
-               for (z = z_idx_min; z <= z_idx_max; z++)
-               {
-                       for (y = y_idx_min; y <= y_idx_max; y++)
-                       {
-                               for (x = x_idx_min; x <= x_idx_max; x++)
-                               {
-                                       geogstats->value[x + y * unitsx + z * unitsx * unitsy] += 1;
-                                       numcells++;
-                               }
-                       }
-               }
-
-               /*
-                * before adding to the total cells
-                * we could decide if we really
-                * want this feature to count
-                */
-               total_count_cells += numcells;
-
-               examinedsamples++;
-       }
-
-       POSTGIS_DEBUGF(3, " examined_samples: %d/%d", examinedsamples, samplerows);
-
-       if ( ! examinedsamples )
-       {
-               elog(NOTICE, " no examined values, invalid stats");
-               stats->stats_valid = false;
-
-               POSTGIS_DEBUG(3, " no stats have been gathered");
-
-               return;
-       }
-
-       /** TODO: what about null features (TODO) */
-       geogstats->avgFeatureCells = (float4)total_count_cells / examinedsamples;
-       geogstats->avgFeatureCoverage = total_cells_coverage / examinedsamples;
-
-       POSTGIS_DEBUGF(3, " histo: total_boxes_cells: %d", total_count_cells);
-       POSTGIS_DEBUGF(3, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells);
-       POSTGIS_DEBUGF(3, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage);
-
-       /*
-        * Normalize histogram
-        *
-        * We divide each histogram cell value
-        * by the number of samples examined.
-        *
-        */
-       for (i = 0; i < histocells; i++)
-               geogstats->value[i] /= examinedsamples;
-
-#if POSTGIS_DEBUG_LEVEL >= 4
-       /* Dump the resulting histogram for analysis */
-       {
-               int x, y, z;
-               for (x = 0; x < unitsx; x++)
-               {
-                       for (y = 0; y < unitsy; y++)
-                       {
-                               for (z = 0; z < unitsz; z++)
-                               {
-                                       POSTGIS_DEBUGF(4, " histo[%d,%d,%d] = %.15f", x, y, z,
-                                                      geogstats->value[x + y * unitsx + z * unitsx * unitsy]);
-                               }
-                       }
-               }
-       }
-#endif
-
-       /*
-        * Write the statistics data
-        */
-       stats->stakind[0] = STATISTIC_KIND_GEOGRAPHY;
-       stats->staop[0] = InvalidOid;
-       stats->stanumbers[0] = (float4 *)geogstats;
-       stats->numnumbers[0] = geog_stats_size/sizeof(float4);
-
-       stats->stanullfrac = (float4)(samplerows - notnull_cnt)/samplerows;
-       stats->stawidth = total_width/notnull_cnt;
-       stats->stadistinct = -1.0;
-
-       POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_GEOGRAPHY)",
-                      stats->stakind[0]);
-       POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
-       POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
-       POSTGIS_DEBUGF(3, " out: null fraction: %d/%d=%g", (samplerows - notnull_cnt), samplerows, stats->stanullfrac);
-       POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
-       POSTGIS_DEBUG(3, " out: distinct values: all (no check done)");
-
-       stats->stats_valid = true;
-
-}
-
-
-/**
- * This function will be called when the ANALYZE command is run
- * on a column of the "geography" type.
- *
- * It will need to return a stats builder function reference
- * and a "minimum" sample rows to feed it.
- * If we want analysis to be completely skipped we can return
- * FALSE and leave output vals untouched.
- *
- * What we know from this call is:
- *
- *     o The pg_attribute row referring to the specific column.
- *       Could be used to get reltuples from pg_class (which
- *       might quite inexact though...) and use them to set an
- *       appropriate minimum number of sample rows to feed to
- *       the stats builder. The stats builder will also receive
- *       a more accurate "estimation" of the number or rows.
- *
- *     o The pg_type row for the specific column.
- *       Could be used to set stat builder / sample rows
- *       based on domain type (when postgis will be implemented
- *       that way).
- *
- * Being this experimental we'll stick to a static stat_builder/sample_rows
- * value for now.
- *
- */
-
-PG_FUNCTION_INFO_V1(geography_analyze);
-Datum geography_analyze(PG_FUNCTION_ARGS)
-{
-       VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
-       Form_pg_attribute attr = stats->attr;
-
-       POSTGIS_DEBUG(2, "geography_analyze called");
-
-       /* If the attstattarget column is negative, use the default value */
-       /* NB: it is okay to scribble on stats->attr since it's a copy */
-       if (attr->attstattarget < 0)
-               attr->attstattarget = default_statistics_target;
-
-       POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
-
-       /* Setup the minimum rows and the algorithm function */
-       stats->minrows = 300 * stats->attr->attstattarget;
-       stats->compute_stats = compute_geography_stats;
-
-       POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
-
-       /* Indicate we are done successfully */
-       PG_RETURN_BOOL(true);
-}
index ab73ae55e10c3d271f6be2f2c91a3291d1d86d5b..f77deab4e545b4005de041739aaf4c91c731b995 100644 (file)
@@ -46,9 +46,6 @@ Datum geometry_from_geography(PG_FUNCTION_ARGS);
 Datum geography_send(PG_FUNCTION_ARGS);
 Datum geography_recv(PG_FUNCTION_ARGS);
 
-/* Datum geography_gist_selectivity(PG_FUNCTION_ARGS); TBD */
-/* Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS); TBD */
-
 GSERIALIZED* gserialized_geography_from_lwgeom(LWGEOM *lwgeom, int32 geog_typmod);
 
 /**
diff --git a/postgis/geometry_estimate.c b/postgis/geometry_estimate.c
deleted file mode 100644 (file)
index 94a5a25..0000000
+++ /dev/null
@@ -1,1575 +0,0 @@
-/**********************************************************************
- * $Id: geometry_gist_selectivity.c 6385 2010-12-15 00:57:35Z pramsey $
- *
- * PostGIS - Spatial Types for PostgreSQL
- * http://postgis.refractions.net
- *
- * Copyright 2010 (C) Paul Ramsey <pramsey@cleverelephant.ca>
- * Copyright 2004 (C) Sandro Santilli <strk@keybit.net>
- *
- * This is free software; you can redistribute and/or modify it under
- * the terms of the GNU General Public Licence. See the COPYING file.
- *
- **********************************************************************/
-
-#include "postgres.h"
-#include "executor/spi.h"
-#include "fmgr.h"
-#include "commands/vacuum.h"
-#include "nodes/relation.h"
-#include "parser/parsetree.h"
-#include "utils/array.h"
-#include "utils/lsyscache.h"
-#include "utils/syscache.h"
-#include "utils/rel.h"
-
-#include "../postgis_config.h"
-
-#if POSTGIS_PGSQL_VERSION >= 93
-       #include "access/htup_details.h"
-#endif
-
-#include "liblwgeom.h"
-#include "lwgeom_pg.h"       /* For debugging macros. */
-#include "gserialized_gist.h" /* For index common functions */
-
-
-#include <math.h>
-#if HAVE_IEEEFP_H
-#include <ieeefp.h>
-#endif
-#include <float.h>
-#include <string.h>
-#include <stdio.h>
-#include <errno.h>
-#include <ctype.h>
-
-/**
- *     Assign a number to the postgis statistics kind
- *
- *     tgl suggested:
- *
- *     1-100:  reserved for assignment by the core Postgres project
- *     100-199: reserved for assignment by PostGIS
- *     200-9999: reserved for other globally-known stats kinds
- *     10000-32767: reserved for private site-local use
- *
- */
-#define STATISTIC_KIND_GEOMETRY 100
-
-/*
- * Define this if you want to use standard deviation based
- * histogram extent computation. If you do, you can also
- * tweak the deviation factor used in computation with
- * SDFACTOR.
- */
-#define SDFACTOR 3.25
-
-typedef struct GEOM_STATS_T
-{
-       /* cols * rows = total boxes in grid */
-       float4 cols;
-       float4 rows;
-
-       /* average bounding box area of not-null features */
-       float4 avgFeatureArea;
-
-       /*
-        * average number of histogram cells
-        * covered by the sample not-null features
-        */
-       float4 avgFeatureCells;
-
-       /* BOX of area */
-       float4 xmin,ymin, xmax, ymax;
-
-       /*
-        * variable length # of floats for histogram
-        */
-       float4 value[1];
-}
-GEOM_STATS;
-
-static float8 estimate_selectivity(GBOX *box, GEOM_STATS *geomstats);
-
-
-#define SHOW_DIGS_DOUBLE 15
-#define MAX_DIGS_DOUBLE (SHOW_DIGS_DOUBLE + 6 + 1 + 3 +1)
-
-/**
- * Default geometry selectivity factor
- */
-#define DEFAULT_GEOMETRY_SEL 0.000005
-
-/**
- * Default geometry join selectivity factor
- */
-#define DEFAULT_GEOMETRY_JOINSEL 0.000005
-
-Datum geometry_gist_sel_2d(PG_FUNCTION_ARGS);
-Datum geometry_gist_joinsel_2d(PG_FUNCTION_ARGS);
-Datum geometry_analyze_2d(PG_FUNCTION_ARGS);
-Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
-Datum _postgis_geometry_sel(PG_FUNCTION_ARGS);
-
-
-static int
-calculate_column_intersection(GBOX *search_box, GEOM_STATS *geomstats1, GEOM_STATS *geomstats2)
-{
-       /**
-       * Calculate the intersection of two columns from their geomstats extents - return true
-       * if a valid intersection was found, false if there is no overlap
-       */
-
-       float8 i_xmin = Max(geomstats1->xmin, geomstats2->xmin);
-       float8 i_ymin = Max(geomstats1->ymin, geomstats2->ymin);
-       float8 i_xmax = Min(geomstats1->xmax, geomstats2->xmax);
-       float8 i_ymax = Min(geomstats1->ymax, geomstats2->ymax);
-
-       /* If the rectangles don't intersect, return false */
-       if (i_xmin > i_xmax || i_ymin > i_ymax)
-               return FALSE;
-
-       /* Otherwise return the rectangle in search_box */
-       search_box->xmin = i_xmin;
-       search_box->ymin = i_ymin;
-       search_box->xmax = i_xmax;
-       search_box->ymax = i_ymax;
-
-       return TRUE;
-}
-
-/**
-* Join selectivity of the && operator. The selectivity
-* is the ratio of the number of rows we think will be 
-* returned divided the maximum number of rows the join
-* could possibly return (the full combinatoric join).
-*
-* selectivity = estimated_nrows / (totalrows1 * totalrows2)
-*/
-PG_FUNCTION_INFO_V1(geometry_gist_joinsel_2d);
-Datum geometry_gist_joinsel_2d(PG_FUNCTION_ARGS)
-{
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-
-       /* Oid operator = PG_GETARG_OID(1); */
-       List *args = (List *) PG_GETARG_POINTER(2);
-       JoinType jointype = (JoinType) PG_GETARG_INT16(3);
-
-       Node *arg1, *arg2;
-       Var *var1, *var2;
-       Oid relid1, relid2;
-
-       HeapTuple stats1_tuple, stats2_tuple, class_tuple;
-       GEOM_STATS *geomstats1, *geomstats2;
-       /*
-       * These are to avoid casting the corresponding
-       * "type-punned" pointers, which would break
-       * "strict-aliasing rules".
-       */
-       GEOM_STATS **gs1ptr=&geomstats1, **gs2ptr=&geomstats2;
-       int geomstats1_nvalues = 0, geomstats2_nvalues = 0;
-       float8 selectivity1 = 0.0, selectivity2 = 0.0;
-       float4 num1_tuples = 0.0, num2_tuples = 0.0;
-       float4 total_tuples = 0.0, rows_returned = 0.0;
-       GBOX search_box;
-
-
-       /**
-       * Join selectivity algorithm. To calculation the selectivity we
-       * calculate the intersection of the two column sample extents,
-       * sum the results, and then multiply by two since for each
-       * geometry in col 1 that intersects a geometry in col 2, the same
-       * will also be true.
-       */
-       POSTGIS_DEBUGF(3, "geometry_gist_joinsel called with jointype %d", jointype);
-
-       /*
-       * We'll only respond to an inner join/unknown context join
-       */
-       if (jointype != JOIN_INNER)
-       {
-               elog(NOTICE, "geometry_gist_joinsel called with incorrect join type");
-               PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
-       }
-
-       /*
-       * Determine the oids of the geometry columns we are working with
-       */
-       arg1 = (Node *) linitial(args);
-       arg2 = (Node *) lsecond(args);
-
-       if (!IsA(arg1, Var) || !IsA(arg2, Var))
-       {
-               elog(DEBUG1, "geometry_gist_joinsel called with arguments that are not column references");
-               PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
-       }
-
-       var1 = (Var *)arg1;
-       var2 = (Var *)arg2;
-
-       relid1 = getrelid(var1->varno, root->parse->rtable);
-       relid2 = getrelid(var2->varno, root->parse->rtable);
-
-       POSTGIS_DEBUGF(3, "Working with relations oids: %d %d", relid1, relid2);
-
-       /* Read the stats tuple from the first column */
-       stats1_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno));
-       if ( ! stats1_tuple )
-       {
-               POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity");
-
-               PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
-       }
-
-
-
-       if ( ! get_attstatsslot(stats1_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL,
-                               NULL,
-                               (float4 **)gs1ptr, &geomstats1_nvalues) )
-       {
-               POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry join selectivity");
-
-               ReleaseSysCache(stats1_tuple);
-               PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
-       }
-
-
-       /* Read the stats tuple from the second column */
-       stats2_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno));
-       if ( ! stats2_tuple )
-       {
-               POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity");
-
-               free_attstatsslot(0, NULL, 0, (float *)geomstats1,
-                                 geomstats1_nvalues);
-               ReleaseSysCache(stats1_tuple);
-               PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
-       }
-
-
-       if ( ! get_attstatsslot(stats2_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL,
-                               NULL,
-                               (float4 **)gs2ptr, &geomstats2_nvalues) )
-       {
-               POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry join selectivity");
-
-               free_attstatsslot(0, NULL, 0, (float *)geomstats1,
-                                 geomstats1_nvalues);
-               ReleaseSysCache(stats2_tuple);
-               ReleaseSysCache(stats1_tuple);
-               PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
-       }
-
-
-       /**
-       * Setup the search box - this is the intersection of the two column
-       * extents.
-       */
-       calculate_column_intersection(&search_box, geomstats1, geomstats2);
-
-       POSTGIS_DEBUGF(3, " -- geomstats1 box: %.15g %.15g, %.15g %.15g",geomstats1->xmin,geomstats1->ymin,geomstats1->xmax,geomstats1->ymax);
-       POSTGIS_DEBUGF(3, " -- geomstats2 box: %.15g %.15g, %.15g %.15g",geomstats2->xmin,geomstats2->ymin,geomstats2->xmax,geomstats2->ymax);
-       POSTGIS_DEBUGF(3, " -- calculated intersection box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax);
-
-
-       /* Do the selectivity */
-       selectivity1 = estimate_selectivity(&search_box, geomstats1);
-       selectivity2 = estimate_selectivity(&search_box, geomstats2);
-
-       POSTGIS_DEBUGF(3, "selectivity1: %.15g   selectivity2: %.15g", selectivity1, selectivity2);
-
-       /* Free the statistic tuples */
-       free_attstatsslot(0, NULL, 0, (float *)geomstats1, geomstats1_nvalues);
-       ReleaseSysCache(stats1_tuple);
-
-       free_attstatsslot(0, NULL, 0, (float *)geomstats2, geomstats2_nvalues);
-       ReleaseSysCache(stats2_tuple);
-
-       /*
-       * OK, so before we calculate the join selectivity we also need to
-       * know the number of tuples in each of the columns since
-       * estimate_selectivity returns the number of estimated tuples
-       * divided by the total number of tuples - hence we need to
-       * multiply out the returned selectivity by the total number of rows.
-       */
-       class_tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid1));
-
-       if (HeapTupleIsValid(class_tuple))
-       {
-               Form_pg_class reltup = (Form_pg_class) GETSTRUCT(class_tuple);
-               num1_tuples = reltup->reltuples;
-       }
-
-       ReleaseSysCache(class_tuple);
-
-
-       class_tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid2));
-
-       if (HeapTupleIsValid(class_tuple))
-       {
-               Form_pg_class reltup = (Form_pg_class) GETSTRUCT(class_tuple);
-               num2_tuples = reltup->reltuples;
-       }
-
-       ReleaseSysCache(class_tuple);
-
-
-       /*
-       * Finally calculate the estimate of the number of rows returned
-       *
-       *    = 2 * (nrows from col1 + nrows from col2) /
-       *       total nrows in col1 x total nrows in col2
-       *
-       * The factor of 2 accounts for the fact that for each tuple in
-       * col 1 matching col 2,
-       * there will be another match in col 2 matching col 1
-       */
-
-       total_tuples = num1_tuples * num2_tuples;
-       rows_returned = 2 * ((num1_tuples * selectivity1) +
-                            (num2_tuples * selectivity2));
-
-       POSTGIS_DEBUGF(3, "Rows from rel1: %g", num1_tuples * selectivity1);
-       POSTGIS_DEBUGF(3, "Rows from rel2: %g", num2_tuples * selectivity2);
-       POSTGIS_DEBUGF(3, "Estimated rows returned: %g", rows_returned);
-
-       /*
-       * One (or both) tuple count is zero...
-       * We return default selectivity estimate.
-       * We could probably attempt at an estimate
-       * w/out looking at tables tuple count, with
-       * a function of selectivity1, selectivity2.
-       */
-       if ( ! total_tuples )
-       {
-               POSTGIS_DEBUG(3, "Total tuples == 0, returning default join selectivity");
-
-               PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
-       }
-
-       if ( rows_returned > total_tuples )
-               PG_RETURN_FLOAT8(1.0);
-
-       PG_RETURN_FLOAT8(rows_returned / total_tuples);
-}
-
-
-/**
-* This function returns an estimate of the selectivity
-* of a search GBOX by looking at data in the GEOM_STATS
-* structure. The selectivity is a float from 0-1, that estimates
-* the proportion of the rows in the table that will be returned
-* as a result of the search box.
-*
-* TODO: handle box dimension collapses (probably should be handled
-* by the statistic generator, avoiding GEOM_STATS with collapsed
-* dimensions)
-*/
-static float8
-estimate_selectivity(GBOX *box, GEOM_STATS *geomstats)
-{
-       int x, y;
-       int x_idx_min, x_idx_max, y_idx_min, y_idx_max;
-       double intersect_x, intersect_y, AOI;
-       double cell_area, box_area;
-       double geow, geoh; /* width and height of histogram */
-       int histocols, historows; /* histogram grid size */
-       double value;
-       float overlapping_cells;
-       float avg_feat_cells;
-       double gain;
-       float8 selectivity;
-
-
-       /*
-        * Search box completely miss histogram extent
-        */
-       if ( box->xmax < geomstats->xmin ||
-               box->xmin > geomstats->xmax ||
-               box->ymax < geomstats->ymin ||
-               box->ymin > geomstats->ymax )
-       {
-               POSTGIS_DEBUG(3, " search_box does not overlaps histogram, returning 0");
-
-               return 0.0;
-       }
-
-       /*
-        * Search box completely contains histogram extent
-        */
-       if ( box->xmax >= geomstats->xmax &&
-               box->xmin <= geomstats->xmin &&
-               box->ymax >= geomstats->ymax &&
-               box->ymin <= geomstats->ymin )
-       {
-               POSTGIS_DEBUG(3, " search_box contains histogram, returning 1");
-
-               return 1.0;
-       }
-
-       geow = geomstats->xmax-geomstats->xmin;
-       geoh = geomstats->ymax-geomstats->ymin;
-
-       histocols = geomstats->cols;
-       historows = geomstats->rows;
-
-       POSTGIS_DEBUGF(3, " histogram has %d cols, %d rows", histocols, historows);
-       POSTGIS_DEBUGF(3, " histogram geosize is %fx%f", geow, geoh);
-
-       cell_area = (geow*geoh) / (histocols*historows);
-       box_area = (box->xmax-box->xmin)*(box->ymax-box->ymin);
-       value = 0;
-
-       /* Find first overlapping column */
-       x_idx_min = (box->xmin-geomstats->xmin) / geow * histocols;
-       if (x_idx_min < 0)
-       {
-               POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the left of histogram grid", -x_idx_min);
-
-               /* should increment the value somehow */
-               x_idx_min = 0;
-       }
-       if (x_idx_min >= histocols)
-       {
-               POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the right of histogram grid", x_idx_min-histocols+1);
-
-               /* should increment the value somehow */
-               x_idx_min = histocols-1;
-       }
-
-       /* Find first overlapping row */
-       y_idx_min = (box->ymin-geomstats->ymin) / geoh * historows;
-       if (y_idx_min <0)
-       {
-               POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the bottom of histogram grid", -y_idx_min);
-
-               /* should increment the value somehow */
-               y_idx_min = 0;
-       }
-       if (y_idx_min >= historows)
-       {
-               POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the top of histogram grid", y_idx_min-historows+1);
-
-               /* should increment the value somehow */
-               y_idx_min = historows-1;
-       }
-
-       /* Find last overlapping column */
-       x_idx_max = (box->xmax-geomstats->xmin) / geow * histocols;
-       if (x_idx_max <0)
-       {
-               /* should increment the value somehow */
-               x_idx_max = 0;
-       }
-       if (x_idx_max >= histocols )
-       {
-               /* should increment the value somehow */
-               x_idx_max = histocols-1;
-       }
-
-       /* Find last overlapping row */
-       y_idx_max = (box->ymax-geomstats->ymin) / geoh * historows;
-       if (y_idx_max <0)
-       {
-               /* should increment the value somehow */
-               y_idx_max = 0;
-       }
-       if (y_idx_max >= historows)
-       {
-               /* should increment the value somehow */
-               y_idx_max = historows-1;
-       }
-
-       /*
-        * the {x,y}_idx_{min,max}
-        * define the grid squares that the box intersects
-        */
-       for (y=y_idx_min; y<=y_idx_max; y++)
-       {
-               for (x=x_idx_min; x<=x_idx_max; x++)
-               {
-                       double val;
-                       double gain;
-
-                       val = geomstats->value[x+y*histocols];
-
-                       /*
-                        * Of the cell value we get
-                        * only the overlap fraction.
-                        */
-
-                       intersect_x = Min(box->xmax, geomstats->xmin + (x+1) * geow / histocols) - Max(box->xmin, geomstats->xmin + x * geow / histocols );
-                       intersect_y = Min(box->ymax, geomstats->ymin + (y+1) * geoh / historows) - Max(box->ymin, geomstats->ymin+ y * geoh / historows) ;
-
-                       AOI = intersect_x*intersect_y;
-                       gain = AOI/cell_area;
-
-                       POSTGIS_DEBUGF(4, " [%d,%d] cell val %.15f",
-                                      x, y, val);
-                       POSTGIS_DEBUGF(4, " [%d,%d] AOI %.15f",
-                                      x, y, AOI);
-                       POSTGIS_DEBUGF(4, " [%d,%d] gain %.15f",
-                                      x, y, gain);
-
-                       val *= gain;
-
-                       POSTGIS_DEBUGF(4, " [%d,%d] adding %.15f to value",
-                                      x, y, val);
-
-                       value += val;
-               }
-       }
-
-
-       /*
-        * If the search_box is a point, it will
-        * overlap a single cell and thus get
-        * it's value, which is the fraction of
-        * samples (we can presume of row set also)
-        * which bumped to that cell.
-        *
-        * If the table features are points, each
-        * of them will overlap a single histogram cell.
-        * Our search_box value would then be correctly
-        * computed as the sum of the bumped cells values.
-        *
-        * If both our search_box AND the sample features
-        * overlap more then a single histogram cell we
-        * need to consider the fact that our sum computation
-        * will have many duplicated included. E.g. each
-        * single sample feature would have contributed to
-        * raise the search_box value by as many times as
-        * many cells in the histogram are commonly overlapped
-        * by both search_box and feature. We should then
-        * divide our value by the number of cells in the virtual
-        * 'intersection' between average feature cell occupation
-        * and occupation of the search_box. This is as
-        * fuzzy as you understand it :)
-        *
-        * Consistency check: whenever the number of cells is
-        * one of whichever part (search_box_occupation,
-        * avg_feature_occupation) the 'intersection' must be 1.
-        * If sounds that our 'intersaction' is actually the
-        * minimun number between search_box_occupation and
-        * avg_feat_occupation.
-        *
-        */
-       overlapping_cells = (x_idx_max-x_idx_min+1) *
-                           (y_idx_max-y_idx_min+1);
-       avg_feat_cells = geomstats->avgFeatureCells;
-
-       POSTGIS_DEBUGF(3, " search_box overlaps %f cells", overlapping_cells);
-       POSTGIS_DEBUGF(3, " avg feat overlaps %f cells", avg_feat_cells);
-
-       if ( ! overlapping_cells )
-       {
-               POSTGIS_DEBUG(3, " no overlapping cells, returning 0.0");
-
-               return 0.0;
-       }
-
-       gain = 1/Min(overlapping_cells, avg_feat_cells);
-       selectivity = value*gain;
-
-       POSTGIS_DEBUGF(3, " SUM(ov_histo_cells)=%f", value);
-       POSTGIS_DEBUGF(3, " gain=%f", gain);
-       POSTGIS_DEBUGF(3, " selectivity=%f", selectivity);
-
-       /* prevent rounding overflows */
-       if (selectivity > 1.0) selectivity = 1.0;
-       else if (selectivity < 0) selectivity = 0.0;
-
-       return selectivity;
-}
-
-/**
-* Utility function to read the calculated selectivity for a given search
-* box and table/column. Used for debugging the selectivity code.
-*/
-PG_FUNCTION_INFO_V1(_postgis_geometry_sel);
-Datum _postgis_geometry_sel(PG_FUNCTION_ARGS)
-{
-       HeapTuple stats_tuple;
-       float4 *floatptr;
-       Oid table_oid = PG_GETARG_OID(0);
-       Datum geom_datum = PG_GETARG_DATUM(2);
-       text *att_text = PG_GETARG_TEXT_P(1);
-       const char *att_name = text2cstring(att_text);
-       int rv;
-       GBOX gbox;
-       int32 nvalues = 0;
-       float8 selectivity = 0;
-       AttrNumber att_num;
-
-       /* Calculate the gbox */
-       if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) )
-               elog(ERROR, "unable to calculate bounding box from geometry");
-
-       /* Get the attribute number */
-       att_num = get_attnum(table_oid, att_name);
-       if  ( ! att_num )
-               elog(ERROR, "attribute \"%s\" does not exist", att_name);
-       
-       /* First pull the stats tuple */
-       stats_tuple = SearchSysCache2(STATRELATT, table_oid, att_num);
-       if ( ! stats_tuple )
-               elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), att_name);
-               
-       /* Then read the geom status histogram from that */
-       rv = get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL, NULL, &floatptr, &nvalues);
-       if ( ! rv )
-       {
-               ReleaseSysCache(stats_tuple);
-               elog(ERROR, "unable to retreive geomstats, hmmm");              
-       }
-       
-       /* Do the estimation */
-       selectivity = estimate_selectivity(&gbox, (GEOM_STATS*)floatptr);
-
-       /* Clean up */
-       free_attstatsslot(0, NULL, 0, floatptr, nvalues);
-       ReleaseSysCache(stats_tuple);
-       
-       PG_RETURN_FLOAT8(selectivity);
-}
-
-
-
-/**
- * This function should return an estimation of the number of
- * rows returned by a query involving an overlap check
- * ( it's the restrict function for the && operator )
- *
- * It can make use (if available) of the statistics collected
- * by the geometry analyzer function.
- *
- * Note that the good work is done by estimate_selectivity() above.
- * This function just tries to find the search_box, loads the statistics
- * and invoke the work-horse.
- *
- */
-PG_FUNCTION_INFO_V1(geometry_gist_sel_2d);
-Datum geometry_gist_sel_2d(PG_FUNCTION_ARGS)
-{
-       PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-
-       /* Oid operator = PG_GETARG_OID(1); */
-       List *args = (List *) PG_GETARG_POINTER(2);
-       /* int varRelid = PG_GETARG_INT32(3); */
-       Oid relid;
-       HeapTuple stats_tuple;
-       GEOM_STATS *geomstats;
-       /*
-        * This is to avoid casting the corresponding
-        * "type-punned" pointer, which would break
-        * "strict-aliasing rules".
-        */
-       GEOM_STATS **gsptr=&geomstats;
-       int geomstats_nvalues=0;
-       Node *other;
-       Var *self;
-       GBOX search_box;
-       float8 selectivity=0;
-
-       POSTGIS_DEBUG(2, "geometry_gist_sel called");
-
-       /* Fail if not a binary opclause (probably shouldn't happen) */
-       if (list_length(args) != 2)
-       {
-               POSTGIS_DEBUG(3, "geometry_gist_sel: not a binary opclause");
-
-               PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
-       }
-
-
-       /*
-        * Find the constant part
-        */
-       other = (Node *) linitial(args);
-       if ( ! IsA(other, Const) )
-       {
-               self = (Var *)other;
-               other = (Node *) lsecond(args);
-       }
-       else
-       {
-               self = (Var *) lsecond(args);
-       }
-
-       if ( ! IsA(other, Const) )
-       {
-               POSTGIS_DEBUG(3, " no constant arguments - returning a default selectivity");
-
-               PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
-//             PG_RETURN_FLOAT8(0.33333);
-       }
-
-       /*
-       * We don't have a nice <const> && <var> or <var> && <const> 
-       * situation here. <const> && <const> would probably get evaluated
-       * away by PgSQL earlier on. <func> && <const> is harder, and the
-       * case we get often is <const> && ST_Expand(<var>), which does 
-       * actually have a subtly different selectivity than a bae
-       * <const> && <var> call. It's calculatable though, by expanding
-       * every cell in the histgram appropriately.
-       * 
-       * Discussion: http://trac.osgeo.org/postgis/ticket/1828
-       *
-       * To do? Do variable selectivity based on the <func> node.
-       */
-       if ( ! IsA(self, Var) )
-       {
-               POSTGIS_DEBUG(3, " no bare variable argument ? - returning a moderate selectivity");
-//             PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
-               PG_RETURN_FLOAT8(0.33333);
-       }
-
-       /*
-        * Convert the constant to a BOX
-        */
-
-       if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
-       {
-               POSTGIS_DEBUG(3, "search box is EMPTY");
-               PG_RETURN_FLOAT8(0.0);
-       }
-
-       POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax);
-
-       /*
-        * Get pg_statistic row
-        */
-
-       relid = getrelid(self->varno, root->parse->rtable);
-
-       stats_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno));
-       if ( ! stats_tuple )
-       {
-               POSTGIS_DEBUG(3, " No statistics, returning default estimate");
-
-               PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
-       }
-
-
-       if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL,
-#if POSTGIS_PGSQL_VERSION >= 85
-                               NULL,
-#endif
-                               (float4 **)gsptr, &geomstats_nvalues) )
-       {
-               POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry selectivity");
-
-               ReleaseSysCache(stats_tuple);
-               PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
-       }
-
-       POSTGIS_DEBUGF(4, " %d read from stats", geomstats_nvalues);
-
-       POSTGIS_DEBUGF(4, " histo: xmin,ymin: %f,%f",
-                      geomstats->xmin, geomstats->ymin);
-       POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f",
-                      geomstats->xmax, geomstats->ymax);
-       POSTGIS_DEBUGF(4, " histo: cols: %f", geomstats->rows);
-       POSTGIS_DEBUGF(4, " histo: rows: %f", geomstats->cols);
-       POSTGIS_DEBUGF(4, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea);
-       POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells);
-
-       /*
-        * Do the estimation
-        */
-       selectivity = estimate_selectivity(&search_box, geomstats);
-
-
-       POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);
-
-       free_attstatsslot(0, NULL, 0, (float *)geomstats, geomstats_nvalues);
-       ReleaseSysCache(stats_tuple);
-       PG_RETURN_FLOAT8(selectivity);
-
-}
-
-
-/**
- * This function is called by the analyze function iff
- * the geometry_analyze() function give it its pointer
- * (this is always the case so far).
- * The geometry_analyze() function is also responsible
- * of deciding the number of "sample" rows we will receive
- * here. It is able to give use other 'custom' data, but we
- * won't use them so far.
- *
- * Our job is to build some statistics on the sample data
- * for use by operator estimators.
- *
- * Currently we only need statistics to estimate the number of rows
- * overlapping a given extent (estimation function bound
- * to the && operator).
- *
- */
-static void
-compute_geometry_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
-                       int samplerows, double totalrows)
-{
-       MemoryContext old_context;
-       int i;
-       int geom_stats_size;
-       GBOX **sampleboxes;
-       GEOM_STATS *geomstats;
-       bool isnull;
-       int null_cnt=0, notnull_cnt=0, examinedsamples=0;
-       GBOX *sample_extent=NULL;
-       double total_width=0;
-       double total_boxes_area=0;
-       int total_boxes_cells=0;
-       double cell_area;
-       double cell_width;
-       double cell_height;
-
-       /* for standard deviation */
-       double avgLOWx, avgLOWy, avgHIGx, avgHIGy;
-       double sumLOWx=0, sumLOWy=0, sumHIGx=0, sumHIGy=0;
-       double sdLOWx=0, sdLOWy=0, sdHIGx=0, sdHIGy=0;
-       GBOX *newhistobox=NULL;
-
-       double geow, geoh; /* width and height of histogram */
-       int histocells;
-       int cols, rows; /* histogram grid size */
-       GBOX histobox;
-
-       /*
-        * This is where geometry_analyze
-        * should put its' custom parameters.
-        */
-       /* void *mystats = stats->extra_data; */
-
-       /*
-        * We'll build an histogram having from 40 to 400 boxesPerSide
-        * Total number of cells is determined by attribute stat
-        * target. It can go from  1600 to 160000 (stat target: 10,1000)
-        */
-       histocells = 160*stats->attr->attstattarget;
-
-
-       POSTGIS_DEBUG(2, "compute_geometry_stats called");
-       POSTGIS_DEBUGF(3, " samplerows: %d", samplerows);
-       POSTGIS_DEBUGF(3, " histogram cells: %d", histocells);
-
-       /*
-        * We might need less space, but don't think
-        * its worth saving...
-        */
-       sampleboxes = palloc(sizeof(GBOX *)*samplerows);
-
-       /*
-        * First scan:
-        *  o find extent of the sample rows
-        *  o count null-infinite/not-null values
-        *  o compute total_width
-        *  o compute total features's box area (for avgFeatureArea)
-        *  o sum features box coordinates (for standard deviation)
-        */
-       for (i=0; i<samplerows; i++)
-       {
-               Datum datum;
-               GBOX box;
-
-               datum = fetchfunc(stats, i, &isnull);
-
-               /*
-                * Skip nulls
-                */
-               if ( isnull )
-               {
-                       null_cnt++;
-                       continue;
-               }
-
-               /** NOTE: we use the toasted width for stawidth */
-               total_width += VARSIZE_ANY(DatumGetPointer(datum));
-
-               if ( LW_FAILURE == gserialized_datum_get_gbox_p(datum, &box) )
-               {
-                       /* Skip empty geometry */
-                       POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
-
-                       continue;
-               }
-
-               /*
-                * Skip infinite geoms
-                */
-               if ( ! finite(box.xmin) ||
-                       ! finite(box.xmax) ||
-                       ! finite(box.ymin) ||
-                       ! finite(box.ymax) )
-               {
-                       POSTGIS_DEBUGF(3, " skipped infinite geometry %d", i);
-
-                       continue;
-               }
-
-               /*
-                * Cache bounding box
-                * TODO: reduce BOX2DFLOAT4 copies
-                */
-               sampleboxes[notnull_cnt] = palloc(sizeof(GBOX));
-               memcpy(sampleboxes[notnull_cnt], &box, sizeof(GBOX));
-
-               /*
-                * Add to sample extent union
-                */
-               if ( ! sample_extent )
-               {
-                       sample_extent = palloc(sizeof(GBOX));
-                       memcpy(sample_extent, &box, sizeof(GBOX));
-               }
-               else
-               {
-                       sample_extent->xmax = Max(sample_extent->xmax,
-                                                         box.xmax);
-                       sample_extent->ymax = Max(sample_extent->ymax,
-                                                         box.ymax);
-                       sample_extent->xmin = Min(sample_extent->xmin,
-                                                         box.xmin);
-                       sample_extent->ymin = Min(sample_extent->ymin,
-                                                         box.ymin);
-               }
-
-               total_boxes_area += (box.xmax-box.xmin)*(box.ymax-box.ymin);
-
-               /*
-                * Add bvol coordinates to sum for standard deviation
-                * computation.
-                */
-               sumLOWx += box.xmin;
-               sumLOWy += box.ymin;
-               sumHIGx += box.xmax;
-               sumHIGy += box.ymax;
-
-               notnull_cnt++;
-
-               /* give backend a chance of interrupting us */
-               vacuum_delay_point();
-
-       }
-
-       if ( ! notnull_cnt )
-       {
-               elog(NOTICE, " no notnull values, invalid stats");
-               stats->stats_valid = false;
-               return;
-       }
-
-
-       POSTGIS_DEBUGF(3, " sample_extent: xmin,ymin: %f,%f",
-                      sample_extent->xmin, sample_extent->ymin);
-       POSTGIS_DEBUGF(3, " sample_extent: xmax,ymax: %f,%f",
-                      sample_extent->xmax, sample_extent->ymax);
-
-       /*
-        * Second scan:
-        *  o compute standard deviation
-        */
-       avgLOWx = sumLOWx/notnull_cnt;
-       avgLOWy = sumLOWy/notnull_cnt;
-       avgHIGx = sumHIGx/notnull_cnt;
-       avgHIGy = sumHIGy/notnull_cnt;
-       for (i=0; i<notnull_cnt; i++)
-       {
-               GBOX *box;
-               box = (GBOX *)sampleboxes[i];
-
-               sdLOWx += (box->xmin - avgLOWx) * (box->xmin - avgLOWx);
-               sdLOWy += (box->ymin - avgLOWy) * (box->ymin - avgLOWy);
-               sdHIGx += (box->xmax - avgHIGx) * (box->xmax - avgHIGx);
-               sdHIGy += (box->ymax - avgHIGy) * (box->ymax - avgHIGy);
-       }
-       sdLOWx = sqrt(sdLOWx/notnull_cnt);
-       sdLOWy = sqrt(sdLOWy/notnull_cnt);
-       sdHIGx = sqrt(sdHIGx/notnull_cnt);
-       sdHIGy = sqrt(sdHIGy/notnull_cnt);
-
-       POSTGIS_DEBUG(3, " standard deviations:");
-       POSTGIS_DEBUGF(3, "  LOWx - avg:%f sd:%f", avgLOWx, sdLOWx);
-       POSTGIS_DEBUGF(3, "  LOWy - avg:%f sd:%f", avgLOWy, sdLOWy);
-       POSTGIS_DEBUGF(3, "  HIGx - avg:%f sd:%f", avgHIGx, sdHIGx);
-       POSTGIS_DEBUGF(3, "  HIGy - avg:%f sd:%f", avgHIGy, sdHIGy);
-
-       histobox.xmin = Max((avgLOWx - SDFACTOR * sdLOWx),
-                              sample_extent->xmin);
-       histobox.ymin = Max((avgLOWy - SDFACTOR * sdLOWy),
-                              sample_extent->ymin);
-       histobox.xmax = Min((avgHIGx + SDFACTOR * sdHIGx),
-                              sample_extent->xmax);
-       histobox.ymax = Min((avgHIGy + SDFACTOR * sdHIGy),
-                              sample_extent->ymax);
-
-       POSTGIS_DEBUGF(3, " sd_extent: xmin,ymin: %f,%f",
-                      histobox.xmin, histobox.ymin);
-       POSTGIS_DEBUGF(3, " sd_extent: xmax,ymax: %f,%f",
-                      histobox.xmin, histobox.ymax);
-
-       /*
-        * Third scan:
-        *   o skip hard deviants
-        *   o compute new histogram box
-        */
-       for (i=0; i<notnull_cnt; i++)
-       {
-               GBOX *box;
-               box = (GBOX *)sampleboxes[i];
-
-               if ( box->xmin > histobox.xmax ||
-                       box->xmax < histobox.xmin ||
-                       box->ymin > histobox.ymax ||
-                       box->ymax < histobox.ymin )
-               {
-                       POSTGIS_DEBUGF(4, " feat %d is an hard deviant, skipped", i);
-
-                       sampleboxes[i] = NULL;
-                       continue;
-               }
-               if ( ! newhistobox )
-               {
-                       newhistobox = palloc(sizeof(GBOX));
-                       memcpy(newhistobox, box, sizeof(GBOX));
-               }
-               else
-               {
-                       if ( box->xmin < newhistobox->xmin )
-                               newhistobox->xmin = box->xmin;
-                       if ( box->ymin < newhistobox->ymin )
-                               newhistobox->ymin = box->ymin;
-                       if ( box->xmax > newhistobox->xmax )
-                               newhistobox->xmax = box->xmax;
-                       if ( box->ymax > newhistobox->ymax )
-                               newhistobox->ymax = box->ymax;
-               }
-       }
-
-       /*
-        * Set histogram extent as the intersection between
-        * standard deviation based histogram extent
-        * and computed sample extent after removal of
-        * hard deviants (there might be no hard deviants).
-        */
-       if ( histobox.xmin < newhistobox->xmin )
-               histobox.xmin = newhistobox->xmin;
-       if ( histobox.ymin < newhistobox->ymin )
-               histobox.ymin = newhistobox->ymin;
-       if ( histobox.xmax > newhistobox->xmax )
-               histobox.xmax = newhistobox->xmax;
-       if ( histobox.ymax > newhistobox->ymax )
-               histobox.ymax = newhistobox->ymax;
-
-
-       POSTGIS_DEBUGF(3, " histogram_extent: xmin,ymin: %f,%f",
-                      histobox.xmin, histobox.ymin);
-       POSTGIS_DEBUGF(3, " histogram_extent: xmax,ymax: %f,%f",
-                      histobox.xmax, histobox.ymax);
-
-
-       geow = histobox.xmax - histobox.xmin;
-       geoh = histobox.ymax - histobox.ymin;
-
-       /*
-        * Compute histogram cols and rows based on aspect ratio
-        * of histogram extent
-        */
-       if ( ! geow && ! geoh )
-       {
-               cols = 1;
-               rows = 1;
-               histocells = 1;
-       }
-       else if ( ! geow )
-       {
-               cols = 1;
-               rows = histocells;
-       }
-       else if ( ! geoh )
-       {
-               cols = histocells;
-               rows = 1;
-       }
-       else
-       {
-               if ( geow<geoh)
-               {
-                       cols = ceil(sqrt((double)histocells*(geow/geoh)));
-                       rows = ceil((double)histocells/cols);
-               }
-               else
-               {
-                       rows = ceil(sqrt((double)histocells*(geoh/geow)));
-                       cols = ceil((double)histocells/rows);
-               }
-               histocells = cols*rows;
-       }
-
-       POSTGIS_DEBUGF(3, " computed histogram grid size (CxR): %dx%d (%d cells)", cols, rows, histocells);
-
-
-       /*
-        * Create the histogram (GEOM_STATS)
-        */
-       old_context = MemoryContextSwitchTo(stats->anl_context);
-       geom_stats_size=sizeof(GEOM_STATS)+(histocells-1)*sizeof(float4);
-       geomstats = palloc(geom_stats_size);
-       MemoryContextSwitchTo(old_context);
-
-       geomstats->avgFeatureArea = total_boxes_area/notnull_cnt;
-       geomstats->xmin = histobox.xmin;
-       geomstats->ymin = histobox.ymin;
-       geomstats->xmax = histobox.xmax;
-       geomstats->ymax = histobox.ymax;
-       geomstats->cols = cols;
-       geomstats->rows = rows;
-
-       /* Initialize all values to 0 */
-       for (i=0; i<histocells; i++) geomstats->value[i] = 0;
-
-       cell_width = geow/cols;
-       cell_height = geoh/rows;
-       cell_area = cell_width*cell_height;
-
-       POSTGIS_DEBUGF(4, "cell_width: %f", cell_width);
-       POSTGIS_DEBUGF(4, "cell_height: %f", cell_height);
-
-
-       /*
-        * Fourth scan:
-        *  o fill histogram values with the number of
-        *    features' bbox overlaps: a feature's bvol
-        *    can fully overlap (1) or partially overlap
-        *    (fraction of 1) an histogram cell.
-        *
-        *  o compute total cells occupation
-        *
-        */
-       for (i=0; i<notnull_cnt; i++)
-       {
-               GBOX *box;
-               int x_idx_min, x_idx_max, x;
-               int y_idx_min, y_idx_max, y;
-               int numcells=0;
-
-               box = (GBOX *)sampleboxes[i];
-               if ( ! box ) continue; /* hard deviant.. */
-
-               /* give backend a chance of interrupting us */
-               vacuum_delay_point();
-
-               POSTGIS_DEBUGF(4, " feat %d box is %f %f, %f %f",
-                              i, box->xmax, box->ymax,
-                              box->xmin, box->ymin);
-
-               /* Find first overlapping column */
-               x_idx_min = (box->xmin-geomstats->xmin) / geow * cols;
-               if (x_idx_min <0) x_idx_min = 0;
-               if (x_idx_min >= cols) x_idx_min = cols-1;
-
-               /* Find first overlapping row */
-               y_idx_min = (box->ymin-geomstats->ymin) / geoh * rows;
-               if (y_idx_min <0) y_idx_min = 0;
-               if (y_idx_min >= rows) y_idx_min = rows-1;
-
-               /* Find last overlapping column */
-               x_idx_max = (box->xmax-geomstats->xmin) / geow * cols;
-               if (x_idx_max <0) x_idx_max = 0;
-               if (x_idx_max >= cols ) x_idx_max = cols-1;
-
-               /* Find last overlapping row */
-               y_idx_max = (box->ymax-geomstats->ymin) / geoh * rows;
-               if (y_idx_max <0) y_idx_max = 0;
-               if (y_idx_max >= rows) y_idx_max = rows-1;
-
-               POSTGIS_DEBUGF(4, " feat %d overlaps columns %d-%d, rows %d-%d",
-                              i, x_idx_min, x_idx_max, y_idx_min, y_idx_max);
-
-               /*
-                * the {x,y}_idx_{min,max}
-                * define the grid squares that the box intersects
-                */
-               for (y=y_idx_min; y<=y_idx_max; y++)
-               {
-                       for (x=x_idx_min; x<=x_idx_max; x++)
-                       {
-                               geomstats->value[x+y*cols] += 1;
-                               numcells++;
-                       }
-               }
-
-               /*
-                * before adding to the total cells
-                * we could decide if we really
-                * want this feature to count
-                */
-               total_boxes_cells += numcells;
-
-               examinedsamples++;
-       }
-
-       POSTGIS_DEBUGF(3, " examined_samples: %d/%d", examinedsamples, samplerows);
-
-       if ( ! examinedsamples )
-       {
-               elog(NOTICE, " no examined values, invalid stats");
-               stats->stats_valid = false;
-
-               POSTGIS_DEBUG(3, " no stats have been gathered");
-
-               return;
-       }
-
-       /** TODO: what about null features (TODO) */
-       geomstats->avgFeatureCells = (float4)total_boxes_cells/examinedsamples;
-
-       POSTGIS_DEBUGF(3, " histo: total_boxes_cells: %d", total_boxes_cells);
-       POSTGIS_DEBUGF(3, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea);
-       POSTGIS_DEBUGF(3, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells);
-
-
-       /*
-        * Normalize histogram
-        *
-        * We divide each histogram cell value
-        * by the number of samples examined.
-        *
-        */
-       for (i=0; i<histocells; i++)
-               geomstats->value[i] /= examinedsamples;
-
-       {
-               int x, y;
-               for (x=0; x<cols; x++)
-               {
-                       for (y=0; y<rows; y++)
-                       {
-                               POSTGIS_DEBUGF(4, " histo[%d,%d] = %.15f", x, y, geomstats->value[x+y*cols]);
-                       }
-               }
-       }
-
-
-       /*
-        * Write the statistics data
-        */
-       stats->stakind[0] = STATISTIC_KIND_GEOMETRY;
-       stats->staop[0] = InvalidOid;
-       stats->stanumbers[0] = (float4 *)geomstats;
-       stats->numnumbers[0] = geom_stats_size/sizeof(float4);
-
-       stats->stanullfrac = (float4)null_cnt/samplerows;
-       stats->stawidth = total_width/notnull_cnt;
-       stats->stadistinct = -1.0;
-
-       POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_GEOMETRY)",
-                      stats->stakind[0]);
-       POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
-       POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
-       POSTGIS_DEBUGF(3, " out: null fraction: %d/%d=%g", null_cnt, samplerows, stats->stanullfrac);
-       POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
-       POSTGIS_DEBUG(3, " out: distinct values: all (no check done)");
-
-       stats->stats_valid = true;
-}
-
-/**
- * This function will be called when the ANALYZE command is run
- * on a column of the "geometry" type.
- *
- * It will need to return a stats builder function reference
- * and a "minimum" sample rows to feed it.
- * If we want analisys to be completely skipped we can return
- * FALSE and leave output vals untouched.
- *
- * What we know from this call is:
- *
- *     o The pg_attribute row referring to the specific column.
- *       Could be used to get reltuples from pg_class (which
- *       might quite inexact though...) and use them to set an
- *       appropriate minimum number of sample rows to feed to
- *       the stats builder. The stats builder will also receive
- *       a more accurate "estimation" of the number or rows.
- *
- *     o The pg_type row for the specific column.
- *       Could be used to set stat builder / sample rows
- *       based on domain type (when postgis will be implemented
- *       that way).
- *
- * Being this experimental we'll stick to a static stat_builder/sample_rows
- * value for now.
- *
- */
-PG_FUNCTION_INFO_V1(geometry_analyze_2d);
-Datum geometry_analyze_2d(PG_FUNCTION_ARGS)
-{
-       VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
-       Form_pg_attribute attr = stats->attr;
-
-       POSTGIS_DEBUG(2, "geometry_analyze called");
-
-       /* If the attstattarget column is negative, use the default value */
-       /* NB: it is okay to scribble on stats->attr since it's a copy */
-       if (attr->attstattarget < 0)
-               attr->attstattarget = default_statistics_target;
-
-       POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
-
-       /* Setup the minimum rows and the algorithm function */
-       stats->minrows = 300 * stats->attr->attstattarget;
-       stats->compute_stats = compute_geometry_stats;
-
-       POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
-
-       /* Indicate we are done successfully */
-       PG_RETURN_BOOL(true);
-}
-
-/**
- * Return the estimated extent of the table
- * looking at gathered statistics (or NULL if
- * no statistics have been gathered).
- */
-PG_FUNCTION_INFO_V1(geometry_estimated_extent);
-Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
-{
-       text *txnsp = NULL;
-       text *txtbl = NULL;
-       text *txcol = NULL;
-       char *nsp = NULL;
-       char *tbl = NULL;
-       char *col = NULL;
-       char *query;
-       ArrayType *array = NULL;
-       int SPIcode;
-       SPITupleTable *tuptable;
-       TupleDesc tupdesc ;
-       HeapTuple tuple ;
-       bool isnull;
-       GBOX *box;
-       size_t querysize;
-       GEOM_STATS geomstats;
-       float reltuples;
-       Datum binval;
-
-       if ( PG_NARGS() == 3 )
-       {
-               txnsp = PG_GETARG_TEXT_P(0);
-               txtbl = PG_GETARG_TEXT_P(1);
-               txcol = PG_GETARG_TEXT_P(2);
-       }
-       else if ( PG_NARGS() == 2 )
-       {
-               txtbl = PG_GETARG_TEXT_P(0);
-               txcol = PG_GETARG_TEXT_P(1);
-       }
-       else
-       {
-               elog(ERROR, "estimated_extent() called with wrong number of arguments");
-               PG_RETURN_NULL();
-       }
-
-       POSTGIS_DEBUG(2, "geomtery_estimated_extent called");
-
-       /* Connect to SPI manager */
-       SPIcode = SPI_connect();
-       if (SPIcode != SPI_OK_CONNECT)
-       {
-               elog(ERROR, "geometry_estimated_extent: couldnt open a connection to SPI");
-               PG_RETURN_NULL() ;
-       }
-
-       querysize = VARSIZE(txtbl)+VARSIZE(txcol)+516;
-
-       if ( txnsp )
-       {
-               nsp = text2cstring(txnsp);
-               querysize += VARSIZE(txnsp);
-       }
-       else
-       {
-               querysize += 32; /* current_schema() */
-       }
-
-       tbl = text2cstring(txtbl);
-       col = text2cstring(txcol);
-
-#if POSTGIS_DEBUG_LEVEL > 0
-       if ( txnsp )
-       {
-               POSTGIS_DEBUGF(3, " schema:%s table:%s column:%s", nsp, tbl, col);
-       }
-       else
-       {
-               POSTGIS_DEBUGF(3, " schema:current_schema() table:%s column:%s",
-                              tbl, col);
-       }
-#endif
-
-       query = palloc(querysize);
-
-
-       /* Security check: because we access information in the pg_statistic table, we must run as the database
-       superuser (by marking the function as SECURITY DEFINER) and check permissions ourselves */
-       if ( txnsp )
-       {
-               sprintf(query, "SELECT has_table_privilege((SELECT usesysid FROM pg_user WHERE usename = session_user), '\"%s\".\"%s\"', 'select')", nsp, tbl);
-       }
-       else
-       {
-               sprintf(query, "SELECT has_table_privilege((SELECT usesysid FROM pg_user WHERE usename = session_user), '\"%s\"', 'select')", tbl);
-       }
-
-       POSTGIS_DEBUGF(4, "permission check sql query is: %s", query);
-
-       SPIcode = SPI_exec(query, 1);
-       if (SPIcode != SPI_OK_SELECT)
-       {
-               elog(ERROR, "geometry_estimated_extent: couldn't execute permission check sql via SPI");
-               SPI_finish();
-               PG_RETURN_NULL();
-       }
-
-       tuptable = SPI_tuptable;
-       tupdesc = SPI_tuptable->tupdesc;
-       tuple = tuptable->vals[0];
-
-       if (!DatumGetBool(SPI_getbinval(tuple, tupdesc, 1, &isnull)))
-       {
-               elog(ERROR, "geometry_estimated_extent: permission denied for relation %s", tbl);
-               SPI_finish();
-               PG_RETURN_NULL();
-       }
-
-
-       /* Return the stats data */
-       if ( txnsp )
-       {
-         sprintf(query, 
-           "SELECT s.stanumbers1[5:8], c.reltuples FROM pg_class c"
-           " LEFT OUTER JOIN pg_namespace n ON (n.oid = c.relnamespace)"
-           " LEFT OUTER JOIN pg_attribute a ON (a.attrelid = c.oid )"
-           " LEFT OUTER JOIN pg_statistic s ON (s.starelid = c.oid AND "
-                                               "s.staattnum = a.attnum )"
-           " WHERE c.relname = '%s' AND a.attname = '%s' "
-           " AND n.nspname = '%s';",
-           tbl, col, nsp);
-       }
-       else
-       {
-         sprintf(query, 
-           "SELECT s.stanumbers1[5:8], c.reltuples FROM pg_class c"
-           " LEFT OUTER JOIN pg_namespace n ON (n.oid = c.relnamespace)"
-           " LEFT OUTER JOIN pg_attribute a ON (a.attrelid = c.oid )"
-           " LEFT OUTER JOIN pg_statistic s ON (s.starelid = c.oid AND "
-                                               "s.staattnum = a.attnum )"
-           " WHERE c.relname = '%s' AND a.attname = '%s' "
-           " AND n.nspname = current_schema();",
-           tbl, col);
-       }
-
-       POSTGIS_DEBUGF(4, " query: %s", query);
-
-       SPIcode = SPI_exec(query, 1);
-       if (SPIcode != SPI_OK_SELECT )
-       {
-               elog(ERROR,"geometry_estimated_extent: couldnt execute sql via SPI");
-               SPI_finish();
-               PG_RETURN_NULL();
-       }
-       if (SPI_processed != 1)
-       {
-
-               POSTGIS_DEBUGF(3, " %d stat rows", SPI_processed);
-
-               elog(ERROR, "Unexistent field \"%s\".\"%s\".\"%s\"",
-                       ( nsp ? nsp : "<current>" ), tbl, col);
-
-               SPI_finish();
-               PG_RETURN_NULL() ;
-       }
-
-       tuptable = SPI_tuptable;
-       tupdesc = SPI_tuptable->tupdesc;
-       tuple = tuptable->vals[0];
-
-       /* Check if the table has zero rows first */
-       binval = SPI_getbinval(tuple, tupdesc, 2, &isnull);
-       if (isnull)
-       {
-
-               POSTGIS_DEBUG(3, " reltuples is NULL");
-
-               elog(ERROR, "geometry_estimated_extent: null reltuples for table");
-
-               SPI_finish();
-               PG_RETURN_NULL();
-       }
-       reltuples = DatumGetFloat4(binval);
-       if ( ! reltuples )
-       {
-               POSTGIS_DEBUG(3, "table has estimated zero rows");
-
-               /* 
-                * TODO: distinguish between empty and not analyzed ?
-                */
-               elog(NOTICE, "\"%s\".\"%s\".\"%s\" is empty or not analyzed",
-                       ( nsp ? nsp : "<current>" ), tbl, col);
-
-               SPI_finish();
-               PG_RETURN_NULL();
-       }
-
-       binval = SPI_getbinval(tuple, tupdesc, 1, &isnull);
-       if (isnull)
-       {
-
-               POSTGIS_DEBUG(3, " stats are NULL");
-
-               elog(ERROR, "geometry_estimated_extent: null statistics for table");
-
-               SPI_finish();
-               PG_RETURN_NULL();
-       }
-       array = DatumGetArrayTypeP(binval);
-       if ( ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array)) != 4 )
-       {
-               elog(ERROR, " corrupted histogram");
-               PG_RETURN_NULL();
-       }
-
-       POSTGIS_DEBUGF(3, " stats array has %d elems", ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array)));
-
-       /*
-        * Construct GBOX.
-        * Must allocate this in upper executor context
-        * to keep it alive after SPI_finish().
-        */
-       box = SPI_palloc(sizeof(GBOX));
-       FLAGS_SET_GEODETIC(box->flags, 0);
-       FLAGS_SET_Z(box->flags, 0);
-       FLAGS_SET_M(box->flags, 0);
-
-       /* Construct the box */
-       memcpy(&(geomstats.xmin), ARR_DATA_PTR(array), sizeof(float)*4);
-       box->xmin = geomstats.xmin;
-       box->xmax = geomstats.xmax;
-       box->ymin = geomstats.ymin;
-       box->ymax = geomstats.ymax;
-
-       POSTGIS_DEBUGF(3, " histogram extent = %g %g, %g %g", box->xmin,
-                      box->ymin, box->xmax, box->ymax);
-
-       SPIcode = SPI_finish();
-       if (SPIcode != SPI_OK_FINISH )
-       {
-               elog(ERROR, "geometry_estimated_extent: couldn't disconnect from SPI");
-       }
-
-       /* TODO: enlarge the box by some factor */
-
-       PG_RETURN_POINTER(box);
-}
index 7b8aa3c29296c0e7d4fcd472c2dda46462fbf00b..9dc5b62d01e234a860d07ac1605e33022eaeae86 100644 (file)
@@ -91,6 +91,9 @@ Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
 
+/* Old Prototype */
+Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
+
 /**
 * Assign a number to the n-dimensional statistics kind
 *
@@ -2182,3 +2185,31 @@ Datum gserialized_estimated_extent(PG_FUNCTION_ARGS)
        PG_RETURN_POINTER(gbox);
 }
 
+/**
+ * Return the estimated extent of the table
+ * looking at gathered statistics (or NULL if
+ * no statistics have been gathered).
+ */
+  
+PG_FUNCTION_INFO_V1(geometry_estimated_extent);
+Datum geometry_estimated_extent(PG_FUNCTION_ARGS)
+{
+       if ( PG_NARGS() == 3 )
+       {
+           DirectFunctionCall3(gserialized_estimated_extent, 
+           PG_GETARG_DATUM(0), 
+           PG_GETARG_DATUM(1), 
+        PG_GETARG_DATUM(2));
+       }
+       else if ( PG_NARGS() == 2 )
+       {
+           DirectFunctionCall2(gserialized_estimated_extent, 
+           PG_GETARG_DATUM(0), 
+           PG_GETARG_DATUM(1));
+       }
+       else
+       {
+               elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
+               PG_RETURN_NULL();
+       }
+}
index a8ff6d78f62a67520df906533439f7d1cb8fb2b3..3e487daba215b776726fd3a89fc88c0aee8a67db 100644 (file)
@@ -427,18 +427,6 @@ CREATE OR REPLACE FUNCTION geometry_gist_decompress_2d(internal)
        AS 'MODULE_PATHNAME' ,'gserialized_gist_decompress_2d'
        LANGUAGE 'c';
 
--- Availability: 2.0.0
-CREATE OR REPLACE FUNCTION geometry_gist_sel_2d (internal, oid, internal, int4)
-       RETURNS float8
-       AS 'MODULE_PATHNAME', 'geometry_gist_sel_2d'
-       LANGUAGE 'c';
-
--- Availability: 2.0.0
-CREATE OR REPLACE FUNCTION geometry_gist_joinsel_2d(internal, oid, internal, smallint)
-       RETURNS float8
-       AS 'MODULE_PATHNAME', 'geometry_gist_joinsel_2d'
-       LANGUAGE 'c';
-
 
 -----------------------------------------------------------------------------