From af683a6a46479e45e17df25634b12035b88ff7c4 Mon Sep 17 00:00:00 2001 From: Paul Ramsey Date: Fri, 12 Apr 2013 18:33:50 +0000 Subject: [PATCH] #945, remove the old selectivity code, now no longer being called git-svn-id: http://svn.osgeo.org/postgis/trunk@11288 b70326c6-7e19-0410-871a-916f4a2858ee --- postgis/Makefile.in | 2 - postgis/geography.sql.in | 12 - postgis/geography_estimate.c | 1486 ------------------------------ postgis/geography_inout.c | 3 - postgis/geometry_estimate.c | 1575 -------------------------------- postgis/gserialized_estimate.c | 31 + postgis/postgis.sql.in | 12 - 7 files changed, 31 insertions(+), 3090 deletions(-) delete mode 100644 postgis/geography_estimate.c delete mode 100644 postgis/geometry_estimate.c diff --git a/postgis/Makefile.in b/postgis/Makefile.in index b85f92b40..bcfc992da 100644 --- a/postgis/Makefile.in +++ b/postgis/Makefile.in @@ -58,10 +58,8 @@ PG_OBJS= \ gserialized_estimate.o \ geography_inout.o \ geography_btree.o \ - geography_estimate.o \ geography_measurement.o \ geography_measurement_trees.o \ - geometry_estimate.o \ geometry_inout.o # Objects to build using PGXS diff --git a/postgis/geography.sql.in b/postgis/geography.sql.in index 77d643d24..b184552c1 100644 --- a/postgis/geography.sql.in +++ b/postgis/geography.sql.in @@ -233,18 +233,6 @@ CREATE OR REPLACE FUNCTION geography_gist_decompress(internal) AS 'MODULE_PATHNAME' ,'gserialized_gist_decompress' LANGUAGE 'c'; --- Availability: 1.5.0 -CREATE OR REPLACE FUNCTION geography_gist_selectivity (internal, oid, internal, int4) - RETURNS float8 - AS 'MODULE_PATHNAME', 'geography_gist_selectivity' - LANGUAGE 'c'; - --- Availability: 1.5.0 -CREATE OR REPLACE FUNCTION geography_gist_join_selectivity(internal, oid, internal, smallint) - RETURNS float8 - AS 'MODULE_PATHNAME', 'geography_gist_selectivity' - LANGUAGE 'c'; - -- Availability: 1.5.0 CREATE OR REPLACE FUNCTION geography_overlaps(geography, geography) RETURNS boolean diff --git a/postgis/geography_estimate.c b/postgis/geography_estimate.c deleted file mode 100644 index f1fcc20fe..000000000 --- a/postgis/geography_estimate.c +++ /dev/null @@ -1,1486 +0,0 @@ -/********************************************************************** - * $Id: geography_estimate.c 4211 2009-06-25 03:32:43Z robe $ - * - * PostGIS - Spatial Types for PostgreSQL - * http://postgis.refractions.net - * Copyright 2001-2006 Refractions Research Inc. - * Copyright 2009 Mark Cave-Ayland - * - * This is free software; you can redistribute and/or modify it under - * the terms of the GNU General Public Licence. See the COPYING file. - * - **********************************************************************/ - - -#include "postgres.h" -#include "commands/vacuum.h" -#include "nodes/relation.h" -#include "parser/parsetree.h" -#include "utils/lsyscache.h" -#include "utils/syscache.h" - -#include "../postgis_config.h" -#include "liblwgeom.h" -#include "lwgeom_pg.h" - -/* Prototypes */ -Datum geography_gist_selectivity(PG_FUNCTION_ARGS); -Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS); -Datum geography_analyze(PG_FUNCTION_ARGS); - - -/** -* Place holder selectivity calculations to make the index work in -* the absence of real selectivity calculations. -*/ - -#define DEFAULT_GEOGRAPHY_SEL 0.000005 - -/** - * Assign a number to the postgis statistics kind - * - * tgl suggested: - * - * 1-100: reserved for assignment by the core Postgres project - * 100-199: reserved for assignment by PostGIS - * 200-9999: reserved for other globally-known stats kinds - * 10000-32767: reserved for private site-local use - * - */ -#define STATISTIC_KIND_GEOGRAPHY 101 - -/* - * Define this if you want to use standard deviation based - * histogram extent computation. If you do, you can also - * tweak the deviation factor used in computation with - * SDFACTOR. - */ -#define SDFACTOR 3.25 - - -/* Information about the dimensions stored in the sample */ -struct dimensions -{ - char axis; - double min; - double max; -}; - -/* Main geography statistics structure, stored in pg_statistics */ -typedef struct GEOG_STATS_T -{ - /* Dimensionality of this column */ - float4 dims; - - /* x . y . z = total boxes in grid */ - float4 unitsx; - float4 unitsy; - float4 unitsz; - - /* average feature coverage of not-null features: - this is either an area for the case of a 2D column - or a volume in the case of a 3D column */ - float4 avgFeatureCoverage; - - /* - * average number of histogram cells - * covered by the sample not-null features - */ - float4 avgFeatureCells; - - /* histogram extent */ - float4 xmin, ymin, zmin, xmax, ymax, zmax; - - /* No. of geometries within this column (approx) */ - float4 totalrows; - - /* - * variable length # of floats for histogram - */ - float4 value[1]; -} -GEOG_STATS; - - -/** - * This function returns an estimate of the selectivity - * of a search_box looking at data in the GEOG_STATS - * structure. - * */ -static float8 -estimate_selectivity(GBOX *box, GEOG_STATS *geogstats) -{ - int x, y, z; - int x_idx_min, x_idx_max, y_idx_min, y_idx_max, z_idx_min, z_idx_max; - double intersect_x, intersect_y, intersect_z; - double AOI = 1.0; - double cell_coverage = 1.0; - double sizex, sizey, sizez; /* dimensions of histogram */ - int unitsx, unitsy, unitsz; /* histogram grid size */ - double value; - float overlapping_cells; - float avg_feat_cells; - double gain; - float8 selectivity; - - - /* - * Search box completely miss histogram extent - */ - if ( box->xmax < geogstats->xmin || box->xmin > geogstats->xmax || - box->ymax < geogstats->ymin || box->ymin > geogstats->ymax || - box->zmax < geogstats->zmin || box->zmin > geogstats->zmax) - { - POSTGIS_DEBUG(3, " search_box does not overlaps histogram, returning 0"); - - return 0.0; - } - - /* - * Search box completely contains histogram extent - */ - if ( box->xmax >= geogstats->xmax && box->xmin <= geogstats->xmin && - box->ymax >= geogstats->ymax && box->ymin <= geogstats->ymin && - box->zmax >= geogstats->zmax && box->zmin <= geogstats->zmin) - { - POSTGIS_DEBUG(3, " search_box contains histogram, returning 1"); - - return 1.0; - } - - sizex = geogstats->xmax - geogstats->xmin; - sizey = geogstats->ymax - geogstats->ymin; - sizez = geogstats->zmax - geogstats->zmin; - - unitsx = geogstats->unitsx; - unitsy = geogstats->unitsy; - unitsz = geogstats->unitsz; - - POSTGIS_DEBUGF(3, " histogram has %d unitsx, %d unitsy, %d unitsz", unitsx, unitsy, unitsz); - POSTGIS_DEBUGF(3, " histogram geosize is %fx%fx%f", sizex, sizey, sizez); - - /* Work out the coverage depending upon the number of dimensions */ - switch ((int)geogstats->dims) - { - case 0: - case 1: - /* Non-existent or multiple single points */ - cell_coverage = 1; - break; - - case 2: - /* Work in areas for 2 dimensions: we have to work slightly - harder here to work out which 2 dimensions are valid */ - - if (sizez == 0) - /* X and Y */ - cell_coverage = (sizex * sizey) / (unitsx * unitsy); - else if (sizey == 0) - /* X and Z */ - cell_coverage = (sizex * sizez) / (unitsx * unitsz); - else if (sizex == 0) - /* Y and Z */ - cell_coverage = (sizey * sizez) / (unitsy * unitsz); - break; - - case 3: - /* Work in volumes for 3 dimensions */ - cell_coverage = (sizex * sizey * sizey) / (unitsx * unitsy * unitsz); - break; - } - - value = 0; - - /* Find first overlapping x unit */ - x_idx_min = (box->xmin-geogstats->xmin) / sizex * unitsx; - if (x_idx_min < 0) - { - POSTGIS_DEBUGF(3, " search_box overlaps %d x units outside (low) of histogram grid", -x_idx_min); - - /* should increment the value somehow */ - x_idx_min = 0; - } - if (x_idx_min >= unitsx) - { - POSTGIS_DEBUGF(3, " search_box overlaps %d x units outside (high) of histogram grid", x_idx_min-unitsx+1); - - /* should increment the value somehow */ - x_idx_min = unitsx-1; - } - - /* Find first overlapping y unit */ - y_idx_min = (box->ymin-geogstats->ymin) / sizey * unitsy; - if (y_idx_min <0) - { - POSTGIS_DEBUGF(3, " search_box overlaps %d y units outside (low) of histogram grid", -y_idx_min); - - /* should increment the value somehow */ - y_idx_min = 0; - } - if (y_idx_min >= unitsy) - { - POSTGIS_DEBUGF(3, " search_box overlaps %d y units outside (high) of histogram grid", y_idx_min-unitsy+1); - - /* should increment the value somehow */ - y_idx_min = unitsy-1; - } - - /* Find first overlapping z unit */ - z_idx_min = (box->zmin-geogstats->zmin) / sizez * unitsz; - if (z_idx_min <0) - { - POSTGIS_DEBUGF(3, " search_box overlaps %d z units outside (low) of histogram grid", -z_idx_min); - - /* should increment the value somehow */ - z_idx_min = 0; - } - if (z_idx_min >= unitsz) - { - POSTGIS_DEBUGF(3, " search_box overlaps %d z units outside (high) of histogram grid", z_idx_min-unitsz+1); - - /* should increment the value somehow */ - z_idx_min = unitsz-1; - } - - /* Find last overlapping x unit */ - x_idx_max = (box->xmax-geogstats->xmin) / sizex * unitsx; - if (x_idx_max <0) - { - /* should increment the value somehow */ - x_idx_max = 0; - } - if (x_idx_max >= unitsx ) - { - /* should increment the value somehow */ - x_idx_max = unitsx-1; - } - - /* Find last overlapping y unit */ - y_idx_max = (box->ymax-geogstats->ymin) / sizey * unitsy; - if (y_idx_max <0) - { - /* should increment the value somehow */ - y_idx_max = 0; - } - if (y_idx_max >= unitsy) - { - /* should increment the value somehow */ - y_idx_max = unitsy-1; - } - - /* Find last overlapping z unit */ - z_idx_max = (box->zmax-geogstats->zmin) / sizez * unitsz; - if (z_idx_max <0) - { - /* should increment the value somehow */ - z_idx_max = 0; - } - if (z_idx_max >= unitsz) - { - /* should increment the value somehow */ - z_idx_max = unitsz-1; - } - - /* - * the {x,y,z}_idx_{min,max} - * define the grid squares that the box intersects - */ - for (z=z_idx_min; z<=z_idx_max; z++) - { - for (y=y_idx_min; y<=y_idx_max; y++) - { - for (x=x_idx_min; x<=x_idx_max; x++) - { - double val; - double gain; - - val = geogstats->value[x + y * unitsx + z * unitsx * unitsy]; - - /* - * Of the cell value we get - * only the overlap fraction. - */ - - intersect_x = Min(box->xmax, geogstats->xmin + (x+1) * sizex / unitsx) - Max(box->xmin, geogstats->xmin + x * sizex / unitsx); - intersect_y = Min(box->ymax, geogstats->ymin + (y+1) * sizey / unitsy) - Max(box->ymin, geogstats->ymin + y * sizey / unitsy); - intersect_z = Min(box->zmax, geogstats->zmin + (z+1) * sizez / unitsz) - Max(box->zmin, geogstats->zmin + z * sizez / unitsz); - - - switch ((int)geogstats->dims) - { - case 0: - AOI = 1; - case 1: - /* Working in 1 dimension: work out the value we need - for AOI */ - if (sizex == 0 && sizey == 0) - AOI = intersect_z; - else if (sizex == 0 && sizez == 0) - AOI = intersect_y; - else if (sizey == 0 && sizez == 0) - AOI = intersect_x; - break; - - case 2: - /* Working in 2 dimensions: work out which 2 values we - need for AOI */ - if (sizex == 0) - AOI = intersect_y * intersect_z; - else if (sizey == 0) - AOI = intersect_x * intersect_z; - else if (sizez == 0) - AOI = intersect_x * intersect_y; - break; - - case 3: - /* Working in 3 dimensions: use all 3 values */ - AOI = intersect_x * intersect_y * intersect_z; - break; - } - - gain = AOI/cell_coverage; - - POSTGIS_DEBUGF(4, " [%d,%d,%d] cell val %.15f", - x, y, z, val); - POSTGIS_DEBUGF(4, " [%d,%d,%d] AOI %.15f", - x, y, z, AOI); - POSTGIS_DEBUGF(4, " [%d,%d,%d] gain %.15f", - x, y, z, gain); - - val *= gain; - - POSTGIS_DEBUGF(4, " [%d,%d,%d] adding %.15f to value", - x, y, z, val); - - value += val; - } - } - } - - - /* - * If the search_box is a point, it will - * overlap a single cell and thus get - * it's value, which is the fraction of - * samples (we can presume of row set also) - * which bumped to that cell. - * - * If the table features are points, each - * of them will overlap a single histogram cell. - * Our search_box value would then be correctly - * computed as the sum of the bumped cells values. - * - * If both our search_box AND the sample features - * overlap more then a single histogram cell we - * need to consider the fact that our sum computation - * will have many duplicated included. E.g. each - * single sample feature would have contributed to - * raise the search_box value by as many times as - * many cells in the histogram are commonly overlapped - * by both searc_box and feature. We should then - * divide our value by the number of cells in the virtual - * 'intersection' between average feature cell occupation - * and occupation of the search_box. This is as - * fuzzy as you understand it :) - * - * Consistency check: whenever the number of cells is - * one of whichever part (search_box_occupation, - * avg_feature_occupation) the 'intersection' must be 1. - * If sounds that our 'intersaction' is actually the - * minimun number between search_box_occupation and - * avg_feat_occupation. - * - */ - overlapping_cells = (x_idx_max-x_idx_min+1) * (y_idx_max-y_idx_min+1) * (z_idx_max-z_idx_min+1); - avg_feat_cells = geogstats->avgFeatureCells; - - POSTGIS_DEBUGF(3, " search_box overlaps %f cells", overlapping_cells); - POSTGIS_DEBUGF(3, " avg feat overlaps %f cells", avg_feat_cells); - - if ( ! overlapping_cells ) - { - POSTGIS_DEBUG(3, " no overlapping cells, returning 0.0"); - - return 0.0; - } - - gain = 1 / Min(overlapping_cells, avg_feat_cells); - selectivity = value * gain; - - POSTGIS_DEBUGF(3, " SUM(ov_histo_cells)=%f", value); - POSTGIS_DEBUGF(3, " gain=%f", gain); - POSTGIS_DEBUGF(3, " selectivity=%f", selectivity); - - /* prevent rounding overflows */ - if (selectivity > 1.0) selectivity = 1.0; - else if (selectivity < 0) selectivity = 0.0; - - return selectivity; -} - - -/** - * This function should return an estimation of the number of - * rows returned by a query involving an overlap check - * ( it's the restrict function for the && operator ) - * - * It can make use (if available) of the statistics collected - * by the geometry analyzer function. - * - * Note that the good work is done by estimate_selectivity() above. - * This function just tries to find the search_box, loads the statistics - * and invoke the work-horse. - * - */ -PG_FUNCTION_INFO_V1(geography_gist_selectivity); -Datum geography_gist_selectivity(PG_FUNCTION_ARGS) -{ - PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); - - /* Oid operator = PG_GETARG_OID(1); */ - List *args = (List *) PG_GETARG_POINTER(2); - /* int varRelid = PG_GETARG_INT32(3); */ - Oid relid; - HeapTuple stats_tuple; - GEOG_STATS *geogstats; - /* - * This is to avoid casting the corresponding - * "type-punned" pointer, which would break - * "strict-aliasing rules". - */ - GEOG_STATS **gsptr=&geogstats; - int geogstats_nvalues = 0; - Node *other; - Var *self; - GBOX search_box; - float8 selectivity = 0; - - POSTGIS_DEBUG(2, "geography_gist_selectivity called"); - - /* Fail if not a binary opclause (probably shouldn't happen) */ - if (list_length(args) != 2) - { - POSTGIS_DEBUG(3, "geography_gist_selectivity: not a binary opclause"); - - PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); - } - - /* - * This selectivity function is invoked by a clause of the form && - * - * In typical usage, one argument will be a column reference, while the other will - * be a geography constant; set self to point to the column argument and other - * to point to the constant argument. - */ - other = (Node *) linitial(args); - if ( ! IsA(other, Const) ) - { - self = (Var *)other; - other = (Node *) lsecond(args); - } - else - { - self = (Var *) lsecond(args); - } - - if ( ! IsA(other, Const) ) - { - POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity"); - - PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); - } - - /* - * We don't have a nice && or && - * situation here. && would probably get evaluated - * away by PgSQL earlier on. && is harder, and the - * case we get often is && ST_Expand(), which does - * actually have a subtly different selectivity than a bae - * && call. It's calculatable though, by expanding - * every cell in the histgram appropriately. - * - * Discussion: http://trac.osgeo.org/postgis/ticket/1828 - * - * To do? Do variable selectivity based on the node. - */ - if ( ! IsA(self, Var) ) - { - POSTGIS_DEBUG(3, " no bare variable argument ? - returning a moderate selectivity"); -// PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); - PG_RETURN_FLOAT8(0.33333); - } - - /* Convert coordinates to 3D geodesic */ - FLAGS_SET_GEODETIC(search_box.flags, 1); - if ( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) ) - { - POSTGIS_DEBUG(3, " search box cannot be calculated"); - PG_RETURN_FLOAT8(0.0); - } - - POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g %.15g, %.15g %.15g %.15g", - search_box.xmin, search_box.ymin, search_box.zmin, - search_box.xmax, search_box.ymax, search_box.zmax); - - /* - * Get pg_statistic row - */ - relid = getrelid(self->varno, root->parse->rtable); - - stats_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno)); - if ( ! stats_tuple ) - { - POSTGIS_DEBUG(3, " No statistics, returning default estimate"); - - PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); - } - - - if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, -#if POSTGIS_PGSQL_VERSION >= 85 - NULL, -#endif - (float4 **)gsptr, &geogstats_nvalues) ) - { - POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geography selectivity"); - - ReleaseSysCache(stats_tuple); - PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); - } - - POSTGIS_DEBUGF(4, " %d read from stats", geogstats_nvalues); - - POSTGIS_DEBUGF(4, " histo: xmin,ymin,zmin: %f,%f,%f", geogstats->xmin, geogstats->ymin, geogstats->zmin); - POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f,%f", geogstats->xmax, geogstats->ymax, geogstats->zmax); - POSTGIS_DEBUGF(4, " histo: unitsx: %f", geogstats->unitsx); - POSTGIS_DEBUGF(4, " histo: unitsy: %f", geogstats->unitsy); - POSTGIS_DEBUGF(4, " histo: unitsz: %f", geogstats->unitsz); - POSTGIS_DEBUGF(4, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage); - POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells); - - /* - * Do the estimation - */ - selectivity = estimate_selectivity(&search_box, geogstats); - - POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity); - - free_attstatsslot(0, NULL, 0, (float *)geogstats, geogstats_nvalues); - ReleaseSysCache(stats_tuple); - PG_RETURN_FLOAT8(selectivity); -} - - -/** -* JOIN selectivity in the GiST && operator -* for all PG versions -*/ -PG_FUNCTION_INFO_V1(geography_gist_join_selectivity); -Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS) -{ - PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); - - /* Oid operator = PG_GETARG_OID(1); */ - List *args = (List *) PG_GETARG_POINTER(2); - JoinType jointype = (JoinType) PG_GETARG_INT16(3); - - Node *arg1, *arg2; - Var *var1, *var2; - Oid relid1, relid2; - - HeapTuple stats1_tuple, stats2_tuple; - GEOG_STATS *geogstats1, *geogstats2; - /* - * These are to avoid casting the corresponding - * "type-punned" pointers, which would break - * "strict-aliasing rules". - */ - GEOG_STATS **gs1ptr=&geogstats1, **gs2ptr=&geogstats2; - int geogstats1_nvalues = 0, geogstats2_nvalues = 0; - float8 selectivity1 = 0.0, selectivity2 = 0.0; - float4 num1_tuples = 0.0, num2_tuples = 0.0; - float4 total_tuples = 0.0, rows_returned = 0.0; - GBOX search_box; - - - /** - * Join selectivity algorithm. To calculation the selectivity we - * calculate the intersection of the two column sample extents, - * sum the results, and then multiply by two since for each - * geometry in col 1 that intersects a geometry in col 2, the same - * will also be true. - */ - - POSTGIS_DEBUGF(3, "geography_gist_join_selectivity called with jointype %d", jointype); - - /* - * We'll only respond to an inner join/unknown context join - */ - if (jointype != JOIN_INNER) - { - elog(NOTICE, "geography_gist_join_selectivity called with incorrect join type"); - PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); - } - - /* - * Determine the oids of the geometry columns we are working with - */ - arg1 = (Node *) linitial(args); - arg2 = (Node *) lsecond(args); - - if (!IsA(arg1, Var) || !IsA(arg2, Var)) - { - elog(DEBUG1, "geography_gist_join_selectivity called with arguments that are not column references"); - PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); - } - - var1 = (Var *)arg1; - var2 = (Var *)arg2; - - relid1 = getrelid(var1->varno, root->parse->rtable); - relid2 = getrelid(var2->varno, root->parse->rtable); - - POSTGIS_DEBUGF(3, "Working with relations oids: %d %d", relid1, relid2); - - /* Read the stats tuple from the first column */ - stats1_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno)); - if ( ! stats1_tuple ) - { - POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity"); - - PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); - } - - if ( ! get_attstatsslot(stats1_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, -#if POSTGIS_PGSQL_VERSION >= 85 - NULL, -#endif - (float4 **)gs1ptr, &geogstats1_nvalues) ) - { - POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity"); - - ReleaseSysCache(stats1_tuple); - PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); - } - - - /* Read the stats tuple from the second column */ - stats2_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno)); - if ( ! stats2_tuple ) - { - POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity"); - - free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); - ReleaseSysCache(stats1_tuple); - PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); - } - - if ( ! get_attstatsslot(stats2_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, -#if POSTGIS_PGSQL_VERSION >= 85 - NULL, -#endif - (float4 **)gs2ptr, &geogstats2_nvalues) ) - { - POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity"); - - free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); - ReleaseSysCache(stats2_tuple); - ReleaseSysCache(stats1_tuple); - PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); - } - - - /** - * Setup the search box - this is the intersection of the two column - * extents. - */ - search_box.xmin = Max(geogstats1->xmin, geogstats2->xmin); - search_box.ymin = Max(geogstats1->ymin, geogstats2->ymin); - search_box.zmin = Max(geogstats1->zmin, geogstats2->zmin); - search_box.xmax = Min(geogstats1->xmax, geogstats2->xmax); - search_box.ymax = Min(geogstats1->ymax, geogstats2->ymax); - search_box.zmax = Min(geogstats1->zmax, geogstats2->zmax); - - /* If the extents of the two columns don't intersect, return zero */ - if (search_box.xmin > search_box.xmax || search_box.ymin > search_box.ymax || - search_box.zmin > search_box.zmax) - PG_RETURN_FLOAT8(0.0); - - POSTGIS_DEBUGF(3, " -- geomstats1 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats1->xmin, geogstats1->ymin, geogstats1->zmin, geogstats1->xmax, geogstats1->ymax, geogstats1->zmax); - POSTGIS_DEBUGF(3, " -- geomstats2 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats2->xmin, geogstats2->ymin, geogstats2->zmin, geogstats2->xmax, geogstats2->ymax, geogstats2->zmax); - POSTGIS_DEBUGF(3, " -- calculated intersection box is : %.15g %.15g %.15g, %.15g %.15g %.15g", search_box.xmin, search_box.ymin, search_box.zmin, search_box.xmax, search_box.ymax, search_box.zmax); - - - /* Do the selectivity */ - selectivity1 = estimate_selectivity(&search_box, geogstats1); - selectivity2 = estimate_selectivity(&search_box, geogstats2); - - POSTGIS_DEBUGF(3, "selectivity1: %.15g selectivity2: %.15g", selectivity1, selectivity2); - - /* - * OK, so before we calculate the join selectivity we also need to - * know the number of tuples in each of the columns since - * estimate_selectivity returns the number of estimated tuples - * divided by the total number of tuples. - */ - num1_tuples = geogstats1->totalrows; - num2_tuples = geogstats2->totalrows; - - /* Free the statistic tuples */ - free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); - ReleaseSysCache(stats1_tuple); - - free_attstatsslot(0, NULL, 0, (float *)geogstats2, geogstats2_nvalues); - ReleaseSysCache(stats2_tuple); - - /* - * Finally calculate the estimate of the number of rows returned - * - * = 2 * (nrows from col1 + nrows from col2) / - * total nrows in col1 x total nrows in col2 - * - * The factor of 2 accounts for the fact that for each tuple in - * col 1 matching col 2, - * there will be another match in col 2 matching col 1 - */ - total_tuples = num1_tuples * num2_tuples; - rows_returned = 2 * ((num1_tuples * selectivity1) + (num2_tuples * selectivity2)); - - POSTGIS_DEBUGF(3, "Rows from rel1: %f", num1_tuples * selectivity1); - POSTGIS_DEBUGF(3, "Rows from rel2: %f", num2_tuples * selectivity2); - POSTGIS_DEBUGF(3, "Estimated rows returned: %f", rows_returned); - - /* - * One (or both) tuple count is zero... - * We return default selectivity estimate. - * We could probably attempt at an estimate - * w/out looking at tables tuple count, with - * a function of selectivity1, selectivity2. - */ - if ( ! total_tuples ) - { - POSTGIS_DEBUG(3, "Total tuples == 0, returning default join selectivity"); - - PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); - } - - if ( rows_returned > total_tuples ) - PG_RETURN_FLOAT8(1.0); - - PG_RETURN_FLOAT8(rows_returned / total_tuples); -} - - -/** - * This function is called by the analyze function iff - * the geography_analyze() function give it its pointer - * (this is always the case so far). - * The geography_analyze() function is also responsible - * of deciding the number of "sample" rows we will receive - * here. It is able to give use other 'custom' data, but we - * won't use them so far. - * - * Our job is to build some statistics on the sample data - * for use by operator estimators. - * - * Currently we only need statistics to estimate the number of rows - * overlapping a given extent (estimation function bound - * to the && operator). - * - */ - -static void -compute_geography_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, - int samplerows, double totalrows) -{ - MemoryContext old_context; - GEOG_STATS *geogstats; - - GBOX gbox; - - GBOX *sample_extent = NULL; - GBOX **sampleboxes; - GBOX histobox; - int histocells; - double sizex, sizey, sizez, edgelength; - int unitsx = 0, unitsy = 0, unitsz = 0; - int geog_stats_size; - struct dimensions histodims[3]; - int ndims; - - double total_width = 0; - int notnull_cnt = 0, examinedsamples = 0, total_count_cells=0, total_cells_coverage = 0; - - /* for standard deviation */ - double avgLOWx, avgLOWy, avgLOWz, avgHIGx, avgHIGy, avgHIGz; - double sumLOWx = 0, sumLOWy = 0, sumLOWz = 0, sumHIGx = 0, sumHIGy = 0, sumHIGz = 0; - double sdLOWx = 0, sdLOWy = 0, sdLOWz = 0, sdHIGx = 0, sdHIGy = 0, sdHIGz = 0; - GBOX *newhistobox = NULL; - - bool isnull; - int i; - - POSTGIS_DEBUG(2, "compute_geography_stats called"); - - /* - * We'll build an histogram having from 40 to 400 boxesPerSide - * Total number of cells is determined by attribute stat - * target. It can go from 1600 to 160000 (stat target: 10,1000) - */ - histocells = 160 * stats->attr->attstattarget; - - /* - * Memory to store the bounding boxes from all of the sampled rows - */ - sampleboxes = palloc(sizeof(GBOX *) * samplerows); - - /* Mark the GBOX as being geodetic */ - FLAGS_SET_GEODETIC(gbox.flags, 1); - - /* - * First scan: - * o find extent of the sample rows - * o count null-infinite/not-null values - * o compute total_width - * o compute total features's box area (for avgFeatureArea) - * o sum features box coordinates (for standard deviation) - */ - for (i = 0; i < samplerows; i++) - { - Datum datum; - GSERIALIZED *serialized; - LWGEOM *geometry; - - /* Fetch the datum and cast it into a geography */ - datum = fetchfunc(stats, i, &isnull); - - /* Skip nulls */ - if (isnull) - continue; - - serialized = (GSERIALIZED*)PG_DETOAST_DATUM(datum); - geometry = lwgeom_from_gserialized(serialized); - /* Convert coordinates to 3D geodesic */ - if ( ! lwgeom_calculate_gbox_geodetic(geometry, &gbox) ) - { - /* Unable to obtain or calculate a bounding box */ - POSTGIS_DEBUGF(3, "skipping geometry at position %d", i); - - continue; - } - - /* - * Skip infinite geoms - */ - if ( ! finite(gbox.xmin) || ! finite(gbox.xmax) || - ! finite(gbox.ymin) || ! finite(gbox.ymax) || - ! finite(gbox.zmin) || ! finite(gbox.zmax) ) - { - POSTGIS_DEBUGF(3, " skipped infinite geometry at position %d", i); - - continue; - } - - /* - * Store bounding box in array - */ - sampleboxes[notnull_cnt] = palloc(sizeof(GBOX)); - memcpy(sampleboxes[notnull_cnt], &gbox, sizeof(GBOX)); - - /* - * Add to sample extent union - */ - if ( ! sample_extent ) - { - sample_extent = palloc(sizeof(GBOX)); - memcpy(sample_extent, &gbox, sizeof(GBOX)); - } - else - { - sample_extent->xmax = Max(sample_extent->xmax, gbox.xmax); - sample_extent->ymax = Max(sample_extent->ymax, gbox.ymax); - sample_extent->zmax = Max(sample_extent->zmax, gbox.zmax); - sample_extent->xmin = Min(sample_extent->xmin, gbox.xmin); - sample_extent->ymin = Min(sample_extent->ymin, gbox.ymin); - sample_extent->zmin = Min(sample_extent->zmin, gbox.zmin); - } - - /** TODO: ask if we need geom or bvol size for stawidth */ - total_width += VARSIZE(serialized); - - /* - * Add bvol coordinates to sum for standard deviation - * computation. - */ - sumLOWx += gbox.xmin; - sumLOWy += gbox.ymin; - sumLOWz += gbox.zmin; - sumHIGx += gbox.xmax; - sumHIGy += gbox.ymax; - sumHIGz += gbox.zmax; - - notnull_cnt++; - - /* give backend a chance of interrupting us */ - vacuum_delay_point(); - } - - POSTGIS_DEBUG(3, "End of 1st scan:"); - POSTGIS_DEBUGF(3, " Sample extent (min, max): (%g %g %g), (%g %g %g)", sample_extent->xmin, sample_extent->ymin, - sample_extent->zmin, sample_extent->xmax, sample_extent->ymax, sample_extent->zmax); - POSTGIS_DEBUGF(3, " No. of geometries sampled: %d", samplerows); - POSTGIS_DEBUGF(3, " No. of non-null geometries sampled: %d", notnull_cnt); - - if ( ! notnull_cnt ) - { - elog(NOTICE, " no notnull values, invalid stats"); - stats->stats_valid = false; - return; - } - - POSTGIS_DEBUG(3, "Standard deviation filter enabled"); - - /* - * Second scan: - * o compute standard deviation - */ - avgLOWx = sumLOWx / notnull_cnt; - avgLOWy = sumLOWy / notnull_cnt; - avgLOWz = sumLOWz / notnull_cnt; - avgHIGx = sumHIGx / notnull_cnt; - avgHIGy = sumHIGy / notnull_cnt; - avgHIGz = sumHIGz / notnull_cnt; - - for (i = 0; i < notnull_cnt; i++) - { - GBOX *box; - box = (GBOX *)sampleboxes[i]; - - sdLOWx += (box->xmin - avgLOWx) * (box->xmin - avgLOWx); - sdLOWy += (box->ymin - avgLOWy) * (box->ymin - avgLOWy); - sdLOWz += (box->zmin - avgLOWz) * (box->zmin - avgLOWz); - sdHIGx += (box->xmax - avgHIGx) * (box->xmax - avgHIGx); - sdHIGy += (box->ymax - avgHIGy) * (box->ymax - avgHIGy); - sdHIGz += (box->zmax - avgHIGz) * (box->zmax - avgHIGz); - } - sdLOWx = sqrt(sdLOWx / notnull_cnt); - sdLOWy = sqrt(sdLOWy / notnull_cnt); - sdLOWz = sqrt(sdLOWz / notnull_cnt); - sdHIGx = sqrt(sdHIGx / notnull_cnt); - sdHIGy = sqrt(sdHIGy / notnull_cnt); - sdHIGz = sqrt(sdHIGz / notnull_cnt); - - POSTGIS_DEBUG(3, " standard deviations:"); - POSTGIS_DEBUGF(3, " LOWx - avg:%f sd:%f", avgLOWx, sdLOWx); - POSTGIS_DEBUGF(3, " LOWy - avg:%f sd:%f", avgLOWy, sdLOWy); - POSTGIS_DEBUGF(3, " LOWz - avg:%f sd:%f", avgLOWz, sdLOWz); - POSTGIS_DEBUGF(3, " HIGx - avg:%f sd:%f", avgHIGx, sdHIGx); - POSTGIS_DEBUGF(3, " HIGy - avg:%f sd:%f", avgHIGy, sdHIGy); - POSTGIS_DEBUGF(3, " HIGz - avg:%f sd:%f", avgHIGz, sdHIGz); - - histobox.xmin = Max((avgLOWx - SDFACTOR * sdLOWx), sample_extent->xmin); - histobox.ymin = Max((avgLOWy - SDFACTOR * sdLOWy), sample_extent->ymin); - histobox.zmin = Max((avgLOWz - SDFACTOR * sdLOWz), sample_extent->zmin); - histobox.xmax = Min((avgHIGx + SDFACTOR * sdHIGx), sample_extent->xmax); - histobox.ymax = Min((avgHIGy + SDFACTOR * sdHIGy), sample_extent->ymax); - histobox.zmax = Min((avgHIGz + SDFACTOR * sdHIGz), sample_extent->zmax); - - POSTGIS_DEBUGF(3, " sd_extent: xmin, ymin, zmin: %f, %f, %f", - histobox.xmin, histobox.ymin, histobox.zmin); - POSTGIS_DEBUGF(3, " sd_extent: xmax, ymax, zmax: %f, %f, %f", - histobox.xmax, histobox.ymax, histobox.zmax); - - /* - * Third scan: - * o skip hard deviants - * o compute new histogram box - */ - for (i = 0; i < notnull_cnt; i++) - { - GBOX *box; - box = (GBOX *)sampleboxes[i]; - - if ( box->xmin > histobox.xmax || box->xmax < histobox.xmin || - box->ymin > histobox.ymax || box->ymax < histobox.ymin || - box->zmin > histobox.zmax || box->zmax < histobox.zmin) - { - POSTGIS_DEBUGF(4, " feat %d is an hard deviant, skipped", i); - - sampleboxes[i] = NULL; - continue; - } - - if ( ! newhistobox ) - { - newhistobox = palloc(sizeof(GBOX)); - memcpy(newhistobox, box, sizeof(GBOX)); - } - else - { - if ( box->xmin < newhistobox->xmin ) - newhistobox->xmin = box->xmin; - if ( box->ymin < newhistobox->ymin ) - newhistobox->ymin = box->ymin; - if ( box->zmin < newhistobox->zmin ) - newhistobox->zmin = box->zmin; - if ( box->xmax > newhistobox->xmax ) - newhistobox->xmax = box->xmax; - if ( box->ymax > newhistobox->ymax ) - newhistobox->ymax = box->ymax; - if ( box->zmax > newhistobox->zmax ) - newhistobox->zmax = box->zmax; - } - } - - /* If everything was a deviant, the new histobox is the same as the old histobox */ - if ( ! newhistobox ) - { - newhistobox = palloc(sizeof(GBOX)); - memcpy(newhistobox, &histobox, sizeof(GBOX)); - } - - /* - * Set histogram extent as the intersection between - * standard deviation based histogram extent - * and computed sample extent after removal of - * hard deviants (there might be no hard deviants). - */ - if ( histobox.xmin < newhistobox->xmin ) - histobox.xmin = newhistobox->xmin; - if ( histobox.ymin < newhistobox->ymin ) - histobox.ymin = newhistobox->ymin; - if ( histobox.zmin < newhistobox->zmin ) - histobox.zmin = newhistobox->zmin; - if ( histobox.xmax > newhistobox->xmax ) - histobox.xmax = newhistobox->xmax; - if ( histobox.ymax > newhistobox->ymax ) - histobox.ymax = newhistobox->ymax; - if ( histobox.zmax > newhistobox->zmax ) - histobox.zmax = newhistobox->zmax; - - POSTGIS_DEBUGF(3, " histogram_extent: xmin, ymin, zmin: %f, %f, %f", - histobox.xmin, histobox.ymin, histobox.zmin); - POSTGIS_DEBUGF(3, " histogram_extent: xmax, ymax, zmax: %f, %f, %f", - histobox.xmax, histobox.ymax, histobox.zmax); - - /* Calculate the size of each dimension */ - sizex = histobox.xmax - histobox.xmin; - sizey = histobox.ymax - histobox.ymin; - sizez = histobox.zmax - histobox.zmin; - - /* In order to calculate a suitable aspect ratio for the histogram, we need - to work out how many dimensions exist within our sample data (which we - assume is representative of the whole data) */ - ndims = 0; - if (sizex != 0) - { - histodims[ndims].axis = 'X'; - histodims[ndims].min = histobox.xmin; - histodims[ndims].max = histobox.xmax; - ndims++; - } - - if (sizey != 0) - { - histodims[ndims].axis = 'Y'; - histodims[ndims].min = histobox.ymin; - histodims[ndims].max = histobox.ymax; - - ndims++; - } - - if (sizez != 0) - { - histodims[ndims].axis = 'Z'; - histodims[ndims].min = histobox.zmin; - histodims[ndims].max = histobox.zmax; - - ndims++; - } - - /* Based upon the number of dimensions, we now work out the number of units in each dimension. - The number of units is defined as the number of cell blocks in each dimension which make - up the total number of histocells; i.e. unitsx * unitsy * unitsz = histocells */ - - /* Note: geodetic data is currently indexed in 3 dimensions; however code for remaining dimensions - is also included to allow for indexing 3D cartesian data at a later date */ - - POSTGIS_DEBUGF(3, "Number of dimensions in sample set: %d", ndims); - - switch (ndims) - { - case 0: - /* An empty column, or multiple points in exactly the same - position in space */ - unitsx = 1; - unitsy = 1; - unitsz = 1; - histocells = 1; - break; - - case 1: - /* Sample data all lies on a single line, so set the correct - units variables depending upon which axis is in use */ - for (i = 0; i < ndims; i++) - { - if ( (histodims[i].max - histodims[i].min) != 0) - { - /* We've found the non-zero dimension, so set the - units variables accordingly */ - switch (histodims[i].axis) - { - case 'X': - unitsx = histocells; - unitsy = 1; - unitsz = 1; - break; - - case 'Y': - unitsx = 1; - unitsy = histocells; - unitsz = 1; - break; - - case 'Z': - unitsx = 1; - unitsy = 1; - unitsz = histocells; - break; - } - } - } - break; - - case 2: - /* Sample data lies within 2D space: divide the total area by the total - number of cells, and thus work out the edge size of the unit block */ - edgelength = sqrt( - Abs(histodims[0].max - histodims[0].min) * - Abs(histodims[1].max - histodims[1].min) / (double)histocells - ); - - /* The calculation is easy; the harder part is to work out which dimensions - we actually have to set the units variables appropriately */ - if (histodims[0].axis == 'X' && histodims[1].axis == 'Y') - { - /* X and Y */ - unitsx = Abs(histodims[0].max - histodims[0].min) / edgelength; - unitsy = Abs(histodims[1].max - histodims[1].min) / edgelength; - unitsz = 1; - } - else if (histodims[0].axis == 'Y' && histodims[1].axis == 'X') - { - /* Y and X */ - unitsx = Abs(histodims[1].max - histodims[1].min) / edgelength; - unitsy = Abs(histodims[0].max - histodims[0].min) / edgelength; - unitsz = 1; - } - else if (histodims[0].axis == 'X' && histodims[1].axis == 'Z') - { - /* X and Z */ - unitsx = Abs(histodims[0].max - histodims[0].min) / edgelength; - unitsy = 1; - unitsz = Abs(histodims[1].max - histodims[1].min) / edgelength; - } - else if (histodims[0].axis == 'Z' && histodims[1].axis == 'X') - { - /* Z and X */ - unitsx = Abs(histodims[1].max - histodims[1].min) / edgelength; - unitsy = 1; - unitsz = Abs(histodims[0].max - histodims[0].min) / edgelength; - } - else if (histodims[0].axis == 'Y' && histodims[1].axis == 'Z') - { - /* Y and Z */ - unitsx = 1; - unitsy = Abs(histodims[0].max - histodims[0].min) / edgelength; - unitsz = Abs(histodims[1].max - histodims[1].min) / edgelength; - } - else if (histodims[0].axis == 'Z' && histodims[1].axis == 'Y') - { - /* Z and Y */ - unitsx = 1; - unitsy = Abs(histodims[1].max - histodims[1].min) / edgelength; - unitsz = Abs(histodims[0].max - histodims[0].min) / edgelength; - } - - break; - - case 3: - /* Sample data lies within 3D space: divide the total volume by the total - number of cells, and thus work out the edge size of the unit block */ - edgelength = pow( - Abs(histodims[0].max - histodims[0].min) * - Abs(histodims[1].max - histodims[1].min) * - Abs(histodims[2].max - histodims[2].min) / (double)histocells, - (double)1/3); - - /* Units are simple in 3 dimensions */ - unitsx = Abs(histodims[0].max - histodims[0].min) / edgelength; - unitsy = Abs(histodims[1].max - histodims[1].min) / edgelength; - unitsz = Abs(histodims[2].max - histodims[2].min) / edgelength; - - break; - } - - POSTGIS_DEBUGF(3, " computed histogram grid size (X,Y,Z): %d x %d x %d (%d out of %d cells)", unitsx, unitsy, unitsz, unitsx * unitsy * unitsz, histocells); - - /* - * Create the histogram (GEOG_STATS) - */ - old_context = MemoryContextSwitchTo(stats->anl_context); - geog_stats_size = sizeof(GEOG_STATS) + (histocells - 1) * sizeof(float4); - geogstats = palloc(geog_stats_size); - MemoryContextSwitchTo(old_context); - - geogstats->dims = ndims; - geogstats->xmin = histobox.xmin; - geogstats->ymin = histobox.ymin; - geogstats->zmin = histobox.zmin; - geogstats->xmax = histobox.xmax; - geogstats->ymax = histobox.ymax; - geogstats->zmax = histobox.zmax; - geogstats->unitsx = unitsx; - geogstats->unitsy = unitsy; - geogstats->unitsz = unitsz; - geogstats->totalrows = totalrows; - - /* Initialize all values to 0 */ - for (i = 0; i < histocells; i++) - geogstats->value[i] = 0; - - - /* - * Fourth scan: - * o fill histogram values with the number of - * features' bbox overlaps: a feature's bvol - * can fully overlap (1) or partially overlap - * (fraction of 1) an histogram cell. - * - * o compute total cells occupation - * - */ - - POSTGIS_DEBUG(3, "Beginning histogram intersection calculations"); - - for (i = 0; i < notnull_cnt; i++) - { - GBOX *box; - - /* Note these array index variables are zero-based */ - int x_idx_min, x_idx_max, x; - int y_idx_min, y_idx_max, y; - int z_idx_min, z_idx_max, z; - int numcells = 0; - - box = (GBOX *)sampleboxes[i]; - if ( ! box ) continue; /* hard deviant.. */ - - /* give backend a chance of interrupting us */ - vacuum_delay_point(); - - POSTGIS_DEBUGF(4, " feat %d box is %f %f %f, %f %f %f", - i, box->xmax, box->ymax, box->zmax, box->xmin, box->ymin, box->zmin); - - /* Find first overlapping unitsx cell */ - x_idx_min = (box->xmin - geogstats->xmin) / sizex * unitsx; - if (x_idx_min <0) x_idx_min = 0; - if (x_idx_min >= unitsx) x_idx_min = unitsx - 1; - - /* Find first overlapping unitsy cell */ - y_idx_min = (box->ymin - geogstats->ymin) / sizey * unitsy; - if (y_idx_min <0) y_idx_min = 0; - if (y_idx_min >= unitsy) y_idx_min = unitsy - 1; - - /* Find first overlapping unitsz cell */ - z_idx_min = (box->zmin - geogstats->zmin) / sizez * unitsz; - if (z_idx_min <0) z_idx_min = 0; - if (z_idx_min >= unitsz) z_idx_min = unitsz - 1; - - /* Find last overlapping unitsx cell */ - x_idx_max = (box->xmax - geogstats->xmin) / sizex * unitsx; - if (x_idx_max <0) x_idx_max = 0; - if (x_idx_max >= unitsx ) x_idx_max = unitsx - 1; - - /* Find last overlapping unitsy cell */ - y_idx_max = (box->ymax - geogstats->ymin) / sizey * unitsy; - if (y_idx_max <0) y_idx_max = 0; - if (y_idx_max >= unitsy) y_idx_max = unitsy - 1; - - /* Find last overlapping unitsz cell */ - z_idx_max = (box->zmax - geogstats->zmin) / sizez * unitsz; - if (z_idx_max <0) z_idx_max = 0; - if (z_idx_max >= unitsz) z_idx_max = unitsz - 1; - - POSTGIS_DEBUGF(4, " feat %d overlaps unitsx %d-%d, unitsy %d-%d, unitsz %d-%d", - i, x_idx_min, x_idx_max, y_idx_min, y_idx_max, z_idx_min, z_idx_max); - - /* Calculate the feature coverage - this of course depends upon the number of dims */ - switch (ndims) - { - case 1: - total_cells_coverage++; - break; - - case 2: - total_cells_coverage += (box->xmax - box->xmin) * (box->ymax - box->ymin); - break; - - case 3: - total_cells_coverage += (box->xmax - box->xmin) * (box->ymax - box->ymin) * - (box->zmax - box->zmin); - break; - } - - /* - * the {x,y,z}_idx_{min,max} - * define the grid squares that the box intersects - */ - - for (z = z_idx_min; z <= z_idx_max; z++) - { - for (y = y_idx_min; y <= y_idx_max; y++) - { - for (x = x_idx_min; x <= x_idx_max; x++) - { - geogstats->value[x + y * unitsx + z * unitsx * unitsy] += 1; - numcells++; - } - } - } - - /* - * before adding to the total cells - * we could decide if we really - * want this feature to count - */ - total_count_cells += numcells; - - examinedsamples++; - } - - POSTGIS_DEBUGF(3, " examined_samples: %d/%d", examinedsamples, samplerows); - - if ( ! examinedsamples ) - { - elog(NOTICE, " no examined values, invalid stats"); - stats->stats_valid = false; - - POSTGIS_DEBUG(3, " no stats have been gathered"); - - return; - } - - /** TODO: what about null features (TODO) */ - geogstats->avgFeatureCells = (float4)total_count_cells / examinedsamples; - geogstats->avgFeatureCoverage = total_cells_coverage / examinedsamples; - - POSTGIS_DEBUGF(3, " histo: total_boxes_cells: %d", total_count_cells); - POSTGIS_DEBUGF(3, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells); - POSTGIS_DEBUGF(3, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage); - - /* - * Normalize histogram - * - * We divide each histogram cell value - * by the number of samples examined. - * - */ - for (i = 0; i < histocells; i++) - geogstats->value[i] /= examinedsamples; - -#if POSTGIS_DEBUG_LEVEL >= 4 - /* Dump the resulting histogram for analysis */ - { - int x, y, z; - for (x = 0; x < unitsx; x++) - { - for (y = 0; y < unitsy; y++) - { - for (z = 0; z < unitsz; z++) - { - POSTGIS_DEBUGF(4, " histo[%d,%d,%d] = %.15f", x, y, z, - geogstats->value[x + y * unitsx + z * unitsx * unitsy]); - } - } - } - } -#endif - - /* - * Write the statistics data - */ - stats->stakind[0] = STATISTIC_KIND_GEOGRAPHY; - stats->staop[0] = InvalidOid; - stats->stanumbers[0] = (float4 *)geogstats; - stats->numnumbers[0] = geog_stats_size/sizeof(float4); - - stats->stanullfrac = (float4)(samplerows - notnull_cnt)/samplerows; - stats->stawidth = total_width/notnull_cnt; - stats->stadistinct = -1.0; - - POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_GEOGRAPHY)", - stats->stakind[0]); - POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]); - POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]); - POSTGIS_DEBUGF(3, " out: null fraction: %d/%d=%g", (samplerows - notnull_cnt), samplerows, stats->stanullfrac); - POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth); - POSTGIS_DEBUG(3, " out: distinct values: all (no check done)"); - - stats->stats_valid = true; - -} - - -/** - * This function will be called when the ANALYZE command is run - * on a column of the "geography" type. - * - * It will need to return a stats builder function reference - * and a "minimum" sample rows to feed it. - * If we want analysis to be completely skipped we can return - * FALSE and leave output vals untouched. - * - * What we know from this call is: - * - * o The pg_attribute row referring to the specific column. - * Could be used to get reltuples from pg_class (which - * might quite inexact though...) and use them to set an - * appropriate minimum number of sample rows to feed to - * the stats builder. The stats builder will also receive - * a more accurate "estimation" of the number or rows. - * - * o The pg_type row for the specific column. - * Could be used to set stat builder / sample rows - * based on domain type (when postgis will be implemented - * that way). - * - * Being this experimental we'll stick to a static stat_builder/sample_rows - * value for now. - * - */ - -PG_FUNCTION_INFO_V1(geography_analyze); -Datum geography_analyze(PG_FUNCTION_ARGS) -{ - VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0); - Form_pg_attribute attr = stats->attr; - - POSTGIS_DEBUG(2, "geography_analyze called"); - - /* If the attstattarget column is negative, use the default value */ - /* NB: it is okay to scribble on stats->attr since it's a copy */ - if (attr->attstattarget < 0) - attr->attstattarget = default_statistics_target; - - POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget); - - /* Setup the minimum rows and the algorithm function */ - stats->minrows = 300 * stats->attr->attstattarget; - stats->compute_stats = compute_geography_stats; - - POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows); - - /* Indicate we are done successfully */ - PG_RETURN_BOOL(true); -} diff --git a/postgis/geography_inout.c b/postgis/geography_inout.c index ab73ae55e..f77deab4e 100644 --- a/postgis/geography_inout.c +++ b/postgis/geography_inout.c @@ -46,9 +46,6 @@ Datum geometry_from_geography(PG_FUNCTION_ARGS); Datum geography_send(PG_FUNCTION_ARGS); Datum geography_recv(PG_FUNCTION_ARGS); -/* Datum geography_gist_selectivity(PG_FUNCTION_ARGS); TBD */ -/* Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS); TBD */ - GSERIALIZED* gserialized_geography_from_lwgeom(LWGEOM *lwgeom, int32 geog_typmod); /** diff --git a/postgis/geometry_estimate.c b/postgis/geometry_estimate.c deleted file mode 100644 index 94a5a2593..000000000 --- a/postgis/geometry_estimate.c +++ /dev/null @@ -1,1575 +0,0 @@ -/********************************************************************** - * $Id: geometry_gist_selectivity.c 6385 2010-12-15 00:57:35Z pramsey $ - * - * PostGIS - Spatial Types for PostgreSQL - * http://postgis.refractions.net - * - * Copyright 2010 (C) Paul Ramsey - * Copyright 2004 (C) Sandro Santilli - * - * This is free software; you can redistribute and/or modify it under - * the terms of the GNU General Public Licence. See the COPYING file. - * - **********************************************************************/ - -#include "postgres.h" -#include "executor/spi.h" -#include "fmgr.h" -#include "commands/vacuum.h" -#include "nodes/relation.h" -#include "parser/parsetree.h" -#include "utils/array.h" -#include "utils/lsyscache.h" -#include "utils/syscache.h" -#include "utils/rel.h" - -#include "../postgis_config.h" - -#if POSTGIS_PGSQL_VERSION >= 93 - #include "access/htup_details.h" -#endif - -#include "liblwgeom.h" -#include "lwgeom_pg.h" /* For debugging macros. */ -#include "gserialized_gist.h" /* For index common functions */ - - -#include -#if HAVE_IEEEFP_H -#include -#endif -#include -#include -#include -#include -#include - -/** - * Assign a number to the postgis statistics kind - * - * tgl suggested: - * - * 1-100: reserved for assignment by the core Postgres project - * 100-199: reserved for assignment by PostGIS - * 200-9999: reserved for other globally-known stats kinds - * 10000-32767: reserved for private site-local use - * - */ -#define STATISTIC_KIND_GEOMETRY 100 - -/* - * Define this if you want to use standard deviation based - * histogram extent computation. If you do, you can also - * tweak the deviation factor used in computation with - * SDFACTOR. - */ -#define SDFACTOR 3.25 - -typedef struct GEOM_STATS_T -{ - /* cols * rows = total boxes in grid */ - float4 cols; - float4 rows; - - /* average bounding box area of not-null features */ - float4 avgFeatureArea; - - /* - * average number of histogram cells - * covered by the sample not-null features - */ - float4 avgFeatureCells; - - /* BOX of area */ - float4 xmin,ymin, xmax, ymax; - - /* - * variable length # of floats for histogram - */ - float4 value[1]; -} -GEOM_STATS; - -static float8 estimate_selectivity(GBOX *box, GEOM_STATS *geomstats); - - -#define SHOW_DIGS_DOUBLE 15 -#define MAX_DIGS_DOUBLE (SHOW_DIGS_DOUBLE + 6 + 1 + 3 +1) - -/** - * Default geometry selectivity factor - */ -#define DEFAULT_GEOMETRY_SEL 0.000005 - -/** - * Default geometry join selectivity factor - */ -#define DEFAULT_GEOMETRY_JOINSEL 0.000005 - -Datum geometry_gist_sel_2d(PG_FUNCTION_ARGS); -Datum geometry_gist_joinsel_2d(PG_FUNCTION_ARGS); -Datum geometry_analyze_2d(PG_FUNCTION_ARGS); -Datum geometry_estimated_extent(PG_FUNCTION_ARGS); -Datum _postgis_geometry_sel(PG_FUNCTION_ARGS); - - -static int -calculate_column_intersection(GBOX *search_box, GEOM_STATS *geomstats1, GEOM_STATS *geomstats2) -{ - /** - * Calculate the intersection of two columns from their geomstats extents - return true - * if a valid intersection was found, false if there is no overlap - */ - - float8 i_xmin = Max(geomstats1->xmin, geomstats2->xmin); - float8 i_ymin = Max(geomstats1->ymin, geomstats2->ymin); - float8 i_xmax = Min(geomstats1->xmax, geomstats2->xmax); - float8 i_ymax = Min(geomstats1->ymax, geomstats2->ymax); - - /* If the rectangles don't intersect, return false */ - if (i_xmin > i_xmax || i_ymin > i_ymax) - return FALSE; - - /* Otherwise return the rectangle in search_box */ - search_box->xmin = i_xmin; - search_box->ymin = i_ymin; - search_box->xmax = i_xmax; - search_box->ymax = i_ymax; - - return TRUE; -} - -/** -* Join selectivity of the && operator. The selectivity -* is the ratio of the number of rows we think will be -* returned divided the maximum number of rows the join -* could possibly return (the full combinatoric join). -* -* selectivity = estimated_nrows / (totalrows1 * totalrows2) -*/ -PG_FUNCTION_INFO_V1(geometry_gist_joinsel_2d); -Datum geometry_gist_joinsel_2d(PG_FUNCTION_ARGS) -{ - PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); - - /* Oid operator = PG_GETARG_OID(1); */ - List *args = (List *) PG_GETARG_POINTER(2); - JoinType jointype = (JoinType) PG_GETARG_INT16(3); - - Node *arg1, *arg2; - Var *var1, *var2; - Oid relid1, relid2; - - HeapTuple stats1_tuple, stats2_tuple, class_tuple; - GEOM_STATS *geomstats1, *geomstats2; - /* - * These are to avoid casting the corresponding - * "type-punned" pointers, which would break - * "strict-aliasing rules". - */ - GEOM_STATS **gs1ptr=&geomstats1, **gs2ptr=&geomstats2; - int geomstats1_nvalues = 0, geomstats2_nvalues = 0; - float8 selectivity1 = 0.0, selectivity2 = 0.0; - float4 num1_tuples = 0.0, num2_tuples = 0.0; - float4 total_tuples = 0.0, rows_returned = 0.0; - GBOX search_box; - - - /** - * Join selectivity algorithm. To calculation the selectivity we - * calculate the intersection of the two column sample extents, - * sum the results, and then multiply by two since for each - * geometry in col 1 that intersects a geometry in col 2, the same - * will also be true. - */ - POSTGIS_DEBUGF(3, "geometry_gist_joinsel called with jointype %d", jointype); - - /* - * We'll only respond to an inner join/unknown context join - */ - if (jointype != JOIN_INNER) - { - elog(NOTICE, "geometry_gist_joinsel called with incorrect join type"); - PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL); - } - - /* - * Determine the oids of the geometry columns we are working with - */ - arg1 = (Node *) linitial(args); - arg2 = (Node *) lsecond(args); - - if (!IsA(arg1, Var) || !IsA(arg2, Var)) - { - elog(DEBUG1, "geometry_gist_joinsel called with arguments that are not column references"); - PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL); - } - - var1 = (Var *)arg1; - var2 = (Var *)arg2; - - relid1 = getrelid(var1->varno, root->parse->rtable); - relid2 = getrelid(var2->varno, root->parse->rtable); - - POSTGIS_DEBUGF(3, "Working with relations oids: %d %d", relid1, relid2); - - /* Read the stats tuple from the first column */ - stats1_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno)); - if ( ! stats1_tuple ) - { - POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity"); - - PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL); - } - - - - if ( ! get_attstatsslot(stats1_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL, - NULL, - (float4 **)gs1ptr, &geomstats1_nvalues) ) - { - POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry join selectivity"); - - ReleaseSysCache(stats1_tuple); - PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL); - } - - - /* Read the stats tuple from the second column */ - stats2_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno)); - if ( ! stats2_tuple ) - { - POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity"); - - free_attstatsslot(0, NULL, 0, (float *)geomstats1, - geomstats1_nvalues); - ReleaseSysCache(stats1_tuple); - PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL); - } - - - if ( ! get_attstatsslot(stats2_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL, - NULL, - (float4 **)gs2ptr, &geomstats2_nvalues) ) - { - POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry join selectivity"); - - free_attstatsslot(0, NULL, 0, (float *)geomstats1, - geomstats1_nvalues); - ReleaseSysCache(stats2_tuple); - ReleaseSysCache(stats1_tuple); - PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL); - } - - - /** - * Setup the search box - this is the intersection of the two column - * extents. - */ - calculate_column_intersection(&search_box, geomstats1, geomstats2); - - POSTGIS_DEBUGF(3, " -- geomstats1 box: %.15g %.15g, %.15g %.15g",geomstats1->xmin,geomstats1->ymin,geomstats1->xmax,geomstats1->ymax); - POSTGIS_DEBUGF(3, " -- geomstats2 box: %.15g %.15g, %.15g %.15g",geomstats2->xmin,geomstats2->ymin,geomstats2->xmax,geomstats2->ymax); - POSTGIS_DEBUGF(3, " -- calculated intersection box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax); - - - /* Do the selectivity */ - selectivity1 = estimate_selectivity(&search_box, geomstats1); - selectivity2 = estimate_selectivity(&search_box, geomstats2); - - POSTGIS_DEBUGF(3, "selectivity1: %.15g selectivity2: %.15g", selectivity1, selectivity2); - - /* Free the statistic tuples */ - free_attstatsslot(0, NULL, 0, (float *)geomstats1, geomstats1_nvalues); - ReleaseSysCache(stats1_tuple); - - free_attstatsslot(0, NULL, 0, (float *)geomstats2, geomstats2_nvalues); - ReleaseSysCache(stats2_tuple); - - /* - * OK, so before we calculate the join selectivity we also need to - * know the number of tuples in each of the columns since - * estimate_selectivity returns the number of estimated tuples - * divided by the total number of tuples - hence we need to - * multiply out the returned selectivity by the total number of rows. - */ - class_tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid1)); - - if (HeapTupleIsValid(class_tuple)) - { - Form_pg_class reltup = (Form_pg_class) GETSTRUCT(class_tuple); - num1_tuples = reltup->reltuples; - } - - ReleaseSysCache(class_tuple); - - - class_tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid2)); - - if (HeapTupleIsValid(class_tuple)) - { - Form_pg_class reltup = (Form_pg_class) GETSTRUCT(class_tuple); - num2_tuples = reltup->reltuples; - } - - ReleaseSysCache(class_tuple); - - - /* - * Finally calculate the estimate of the number of rows returned - * - * = 2 * (nrows from col1 + nrows from col2) / - * total nrows in col1 x total nrows in col2 - * - * The factor of 2 accounts for the fact that for each tuple in - * col 1 matching col 2, - * there will be another match in col 2 matching col 1 - */ - - total_tuples = num1_tuples * num2_tuples; - rows_returned = 2 * ((num1_tuples * selectivity1) + - (num2_tuples * selectivity2)); - - POSTGIS_DEBUGF(3, "Rows from rel1: %g", num1_tuples * selectivity1); - POSTGIS_DEBUGF(3, "Rows from rel2: %g", num2_tuples * selectivity2); - POSTGIS_DEBUGF(3, "Estimated rows returned: %g", rows_returned); - - /* - * One (or both) tuple count is zero... - * We return default selectivity estimate. - * We could probably attempt at an estimate - * w/out looking at tables tuple count, with - * a function of selectivity1, selectivity2. - */ - if ( ! total_tuples ) - { - POSTGIS_DEBUG(3, "Total tuples == 0, returning default join selectivity"); - - PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL); - } - - if ( rows_returned > total_tuples ) - PG_RETURN_FLOAT8(1.0); - - PG_RETURN_FLOAT8(rows_returned / total_tuples); -} - - -/** -* This function returns an estimate of the selectivity -* of a search GBOX by looking at data in the GEOM_STATS -* structure. The selectivity is a float from 0-1, that estimates -* the proportion of the rows in the table that will be returned -* as a result of the search box. -* -* TODO: handle box dimension collapses (probably should be handled -* by the statistic generator, avoiding GEOM_STATS with collapsed -* dimensions) -*/ -static float8 -estimate_selectivity(GBOX *box, GEOM_STATS *geomstats) -{ - int x, y; - int x_idx_min, x_idx_max, y_idx_min, y_idx_max; - double intersect_x, intersect_y, AOI; - double cell_area, box_area; - double geow, geoh; /* width and height of histogram */ - int histocols, historows; /* histogram grid size */ - double value; - float overlapping_cells; - float avg_feat_cells; - double gain; - float8 selectivity; - - - /* - * Search box completely miss histogram extent - */ - if ( box->xmax < geomstats->xmin || - box->xmin > geomstats->xmax || - box->ymax < geomstats->ymin || - box->ymin > geomstats->ymax ) - { - POSTGIS_DEBUG(3, " search_box does not overlaps histogram, returning 0"); - - return 0.0; - } - - /* - * Search box completely contains histogram extent - */ - if ( box->xmax >= geomstats->xmax && - box->xmin <= geomstats->xmin && - box->ymax >= geomstats->ymax && - box->ymin <= geomstats->ymin ) - { - POSTGIS_DEBUG(3, " search_box contains histogram, returning 1"); - - return 1.0; - } - - geow = geomstats->xmax-geomstats->xmin; - geoh = geomstats->ymax-geomstats->ymin; - - histocols = geomstats->cols; - historows = geomstats->rows; - - POSTGIS_DEBUGF(3, " histogram has %d cols, %d rows", histocols, historows); - POSTGIS_DEBUGF(3, " histogram geosize is %fx%f", geow, geoh); - - cell_area = (geow*geoh) / (histocols*historows); - box_area = (box->xmax-box->xmin)*(box->ymax-box->ymin); - value = 0; - - /* Find first overlapping column */ - x_idx_min = (box->xmin-geomstats->xmin) / geow * histocols; - if (x_idx_min < 0) - { - POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the left of histogram grid", -x_idx_min); - - /* should increment the value somehow */ - x_idx_min = 0; - } - if (x_idx_min >= histocols) - { - POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the right of histogram grid", x_idx_min-histocols+1); - - /* should increment the value somehow */ - x_idx_min = histocols-1; - } - - /* Find first overlapping row */ - y_idx_min = (box->ymin-geomstats->ymin) / geoh * historows; - if (y_idx_min <0) - { - POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the bottom of histogram grid", -y_idx_min); - - /* should increment the value somehow */ - y_idx_min = 0; - } - if (y_idx_min >= historows) - { - POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the top of histogram grid", y_idx_min-historows+1); - - /* should increment the value somehow */ - y_idx_min = historows-1; - } - - /* Find last overlapping column */ - x_idx_max = (box->xmax-geomstats->xmin) / geow * histocols; - if (x_idx_max <0) - { - /* should increment the value somehow */ - x_idx_max = 0; - } - if (x_idx_max >= histocols ) - { - /* should increment the value somehow */ - x_idx_max = histocols-1; - } - - /* Find last overlapping row */ - y_idx_max = (box->ymax-geomstats->ymin) / geoh * historows; - if (y_idx_max <0) - { - /* should increment the value somehow */ - y_idx_max = 0; - } - if (y_idx_max >= historows) - { - /* should increment the value somehow */ - y_idx_max = historows-1; - } - - /* - * the {x,y}_idx_{min,max} - * define the grid squares that the box intersects - */ - for (y=y_idx_min; y<=y_idx_max; y++) - { - for (x=x_idx_min; x<=x_idx_max; x++) - { - double val; - double gain; - - val = geomstats->value[x+y*histocols]; - - /* - * Of the cell value we get - * only the overlap fraction. - */ - - intersect_x = Min(box->xmax, geomstats->xmin + (x+1) * geow / histocols) - Max(box->xmin, geomstats->xmin + x * geow / histocols ); - intersect_y = Min(box->ymax, geomstats->ymin + (y+1) * geoh / historows) - Max(box->ymin, geomstats->ymin+ y * geoh / historows) ; - - AOI = intersect_x*intersect_y; - gain = AOI/cell_area; - - POSTGIS_DEBUGF(4, " [%d,%d] cell val %.15f", - x, y, val); - POSTGIS_DEBUGF(4, " [%d,%d] AOI %.15f", - x, y, AOI); - POSTGIS_DEBUGF(4, " [%d,%d] gain %.15f", - x, y, gain); - - val *= gain; - - POSTGIS_DEBUGF(4, " [%d,%d] adding %.15f to value", - x, y, val); - - value += val; - } - } - - - /* - * If the search_box is a point, it will - * overlap a single cell and thus get - * it's value, which is the fraction of - * samples (we can presume of row set also) - * which bumped to that cell. - * - * If the table features are points, each - * of them will overlap a single histogram cell. - * Our search_box value would then be correctly - * computed as the sum of the bumped cells values. - * - * If both our search_box AND the sample features - * overlap more then a single histogram cell we - * need to consider the fact that our sum computation - * will have many duplicated included. E.g. each - * single sample feature would have contributed to - * raise the search_box value by as many times as - * many cells in the histogram are commonly overlapped - * by both search_box and feature. We should then - * divide our value by the number of cells in the virtual - * 'intersection' between average feature cell occupation - * and occupation of the search_box. This is as - * fuzzy as you understand it :) - * - * Consistency check: whenever the number of cells is - * one of whichever part (search_box_occupation, - * avg_feature_occupation) the 'intersection' must be 1. - * If sounds that our 'intersaction' is actually the - * minimun number between search_box_occupation and - * avg_feat_occupation. - * - */ - overlapping_cells = (x_idx_max-x_idx_min+1) * - (y_idx_max-y_idx_min+1); - avg_feat_cells = geomstats->avgFeatureCells; - - POSTGIS_DEBUGF(3, " search_box overlaps %f cells", overlapping_cells); - POSTGIS_DEBUGF(3, " avg feat overlaps %f cells", avg_feat_cells); - - if ( ! overlapping_cells ) - { - POSTGIS_DEBUG(3, " no overlapping cells, returning 0.0"); - - return 0.0; - } - - gain = 1/Min(overlapping_cells, avg_feat_cells); - selectivity = value*gain; - - POSTGIS_DEBUGF(3, " SUM(ov_histo_cells)=%f", value); - POSTGIS_DEBUGF(3, " gain=%f", gain); - POSTGIS_DEBUGF(3, " selectivity=%f", selectivity); - - /* prevent rounding overflows */ - if (selectivity > 1.0) selectivity = 1.0; - else if (selectivity < 0) selectivity = 0.0; - - return selectivity; -} - -/** -* Utility function to read the calculated selectivity for a given search -* box and table/column. Used for debugging the selectivity code. -*/ -PG_FUNCTION_INFO_V1(_postgis_geometry_sel); -Datum _postgis_geometry_sel(PG_FUNCTION_ARGS) -{ - HeapTuple stats_tuple; - float4 *floatptr; - Oid table_oid = PG_GETARG_OID(0); - Datum geom_datum = PG_GETARG_DATUM(2); - text *att_text = PG_GETARG_TEXT_P(1); - const char *att_name = text2cstring(att_text); - int rv; - GBOX gbox; - int32 nvalues = 0; - float8 selectivity = 0; - AttrNumber att_num; - - /* Calculate the gbox */ - if ( ! gserialized_datum_get_gbox_p(geom_datum, &gbox) ) - elog(ERROR, "unable to calculate bounding box from geometry"); - - /* Get the attribute number */ - att_num = get_attnum(table_oid, att_name); - if ( ! att_num ) - elog(ERROR, "attribute \"%s\" does not exist", att_name); - - /* First pull the stats tuple */ - stats_tuple = SearchSysCache2(STATRELATT, table_oid, att_num); - if ( ! stats_tuple ) - elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), att_name); - - /* Then read the geom status histogram from that */ - rv = get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL, NULL, &floatptr, &nvalues); - if ( ! rv ) - { - ReleaseSysCache(stats_tuple); - elog(ERROR, "unable to retreive geomstats, hmmm"); - } - - /* Do the estimation */ - selectivity = estimate_selectivity(&gbox, (GEOM_STATS*)floatptr); - - /* Clean up */ - free_attstatsslot(0, NULL, 0, floatptr, nvalues); - ReleaseSysCache(stats_tuple); - - PG_RETURN_FLOAT8(selectivity); -} - - - -/** - * This function should return an estimation of the number of - * rows returned by a query involving an overlap check - * ( it's the restrict function for the && operator ) - * - * It can make use (if available) of the statistics collected - * by the geometry analyzer function. - * - * Note that the good work is done by estimate_selectivity() above. - * This function just tries to find the search_box, loads the statistics - * and invoke the work-horse. - * - */ -PG_FUNCTION_INFO_V1(geometry_gist_sel_2d); -Datum geometry_gist_sel_2d(PG_FUNCTION_ARGS) -{ - PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); - - /* Oid operator = PG_GETARG_OID(1); */ - List *args = (List *) PG_GETARG_POINTER(2); - /* int varRelid = PG_GETARG_INT32(3); */ - Oid relid; - HeapTuple stats_tuple; - GEOM_STATS *geomstats; - /* - * This is to avoid casting the corresponding - * "type-punned" pointer, which would break - * "strict-aliasing rules". - */ - GEOM_STATS **gsptr=&geomstats; - int geomstats_nvalues=0; - Node *other; - Var *self; - GBOX search_box; - float8 selectivity=0; - - POSTGIS_DEBUG(2, "geometry_gist_sel called"); - - /* Fail if not a binary opclause (probably shouldn't happen) */ - if (list_length(args) != 2) - { - POSTGIS_DEBUG(3, "geometry_gist_sel: not a binary opclause"); - - PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); - } - - - /* - * Find the constant part - */ - other = (Node *) linitial(args); - if ( ! IsA(other, Const) ) - { - self = (Var *)other; - other = (Node *) lsecond(args); - } - else - { - self = (Var *) lsecond(args); - } - - if ( ! IsA(other, Const) ) - { - POSTGIS_DEBUG(3, " no constant arguments - returning a default selectivity"); - - PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); -// PG_RETURN_FLOAT8(0.33333); - } - - /* - * We don't have a nice && or && - * situation here. && would probably get evaluated - * away by PgSQL earlier on. && is harder, and the - * case we get often is && ST_Expand(), which does - * actually have a subtly different selectivity than a bae - * && call. It's calculatable though, by expanding - * every cell in the histgram appropriately. - * - * Discussion: http://trac.osgeo.org/postgis/ticket/1828 - * - * To do? Do variable selectivity based on the node. - */ - if ( ! IsA(self, Var) ) - { - POSTGIS_DEBUG(3, " no bare variable argument ? - returning a moderate selectivity"); -// PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); - PG_RETURN_FLOAT8(0.33333); - } - - /* - * Convert the constant to a BOX - */ - - if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) ) - { - POSTGIS_DEBUG(3, "search box is EMPTY"); - PG_RETURN_FLOAT8(0.0); - } - - POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax); - - /* - * Get pg_statistic row - */ - - relid = getrelid(self->varno, root->parse->rtable); - - stats_tuple = SearchSysCache2(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno)); - if ( ! stats_tuple ) - { - POSTGIS_DEBUG(3, " No statistics, returning default estimate"); - - PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); - } - - - if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL, -#if POSTGIS_PGSQL_VERSION >= 85 - NULL, -#endif - (float4 **)gsptr, &geomstats_nvalues) ) - { - POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry selectivity"); - - ReleaseSysCache(stats_tuple); - PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); - } - - POSTGIS_DEBUGF(4, " %d read from stats", geomstats_nvalues); - - POSTGIS_DEBUGF(4, " histo: xmin,ymin: %f,%f", - geomstats->xmin, geomstats->ymin); - POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f", - geomstats->xmax, geomstats->ymax); - POSTGIS_DEBUGF(4, " histo: cols: %f", geomstats->rows); - POSTGIS_DEBUGF(4, " histo: rows: %f", geomstats->cols); - POSTGIS_DEBUGF(4, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea); - POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells); - - /* - * Do the estimation - */ - selectivity = estimate_selectivity(&search_box, geomstats); - - - POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity); - - free_attstatsslot(0, NULL, 0, (float *)geomstats, geomstats_nvalues); - ReleaseSysCache(stats_tuple); - PG_RETURN_FLOAT8(selectivity); - -} - - -/** - * This function is called by the analyze function iff - * the geometry_analyze() function give it its pointer - * (this is always the case so far). - * The geometry_analyze() function is also responsible - * of deciding the number of "sample" rows we will receive - * here. It is able to give use other 'custom' data, but we - * won't use them so far. - * - * Our job is to build some statistics on the sample data - * for use by operator estimators. - * - * Currently we only need statistics to estimate the number of rows - * overlapping a given extent (estimation function bound - * to the && operator). - * - */ -static void -compute_geometry_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, - int samplerows, double totalrows) -{ - MemoryContext old_context; - int i; - int geom_stats_size; - GBOX **sampleboxes; - GEOM_STATS *geomstats; - bool isnull; - int null_cnt=0, notnull_cnt=0, examinedsamples=0; - GBOX *sample_extent=NULL; - double total_width=0; - double total_boxes_area=0; - int total_boxes_cells=0; - double cell_area; - double cell_width; - double cell_height; - - /* for standard deviation */ - double avgLOWx, avgLOWy, avgHIGx, avgHIGy; - double sumLOWx=0, sumLOWy=0, sumHIGx=0, sumHIGy=0; - double sdLOWx=0, sdLOWy=0, sdHIGx=0, sdHIGy=0; - GBOX *newhistobox=NULL; - - double geow, geoh; /* width and height of histogram */ - int histocells; - int cols, rows; /* histogram grid size */ - GBOX histobox; - - /* - * This is where geometry_analyze - * should put its' custom parameters. - */ - /* void *mystats = stats->extra_data; */ - - /* - * We'll build an histogram having from 40 to 400 boxesPerSide - * Total number of cells is determined by attribute stat - * target. It can go from 1600 to 160000 (stat target: 10,1000) - */ - histocells = 160*stats->attr->attstattarget; - - - POSTGIS_DEBUG(2, "compute_geometry_stats called"); - POSTGIS_DEBUGF(3, " samplerows: %d", samplerows); - POSTGIS_DEBUGF(3, " histogram cells: %d", histocells); - - /* - * We might need less space, but don't think - * its worth saving... - */ - sampleboxes = palloc(sizeof(GBOX *)*samplerows); - - /* - * First scan: - * o find extent of the sample rows - * o count null-infinite/not-null values - * o compute total_width - * o compute total features's box area (for avgFeatureArea) - * o sum features box coordinates (for standard deviation) - */ - for (i=0; ixmax = Max(sample_extent->xmax, - box.xmax); - sample_extent->ymax = Max(sample_extent->ymax, - box.ymax); - sample_extent->xmin = Min(sample_extent->xmin, - box.xmin); - sample_extent->ymin = Min(sample_extent->ymin, - box.ymin); - } - - total_boxes_area += (box.xmax-box.xmin)*(box.ymax-box.ymin); - - /* - * Add bvol coordinates to sum for standard deviation - * computation. - */ - sumLOWx += box.xmin; - sumLOWy += box.ymin; - sumHIGx += box.xmax; - sumHIGy += box.ymax; - - notnull_cnt++; - - /* give backend a chance of interrupting us */ - vacuum_delay_point(); - - } - - if ( ! notnull_cnt ) - { - elog(NOTICE, " no notnull values, invalid stats"); - stats->stats_valid = false; - return; - } - - - POSTGIS_DEBUGF(3, " sample_extent: xmin,ymin: %f,%f", - sample_extent->xmin, sample_extent->ymin); - POSTGIS_DEBUGF(3, " sample_extent: xmax,ymax: %f,%f", - sample_extent->xmax, sample_extent->ymax); - - /* - * Second scan: - * o compute standard deviation - */ - avgLOWx = sumLOWx/notnull_cnt; - avgLOWy = sumLOWy/notnull_cnt; - avgHIGx = sumHIGx/notnull_cnt; - avgHIGy = sumHIGy/notnull_cnt; - for (i=0; ixmin - avgLOWx) * (box->xmin - avgLOWx); - sdLOWy += (box->ymin - avgLOWy) * (box->ymin - avgLOWy); - sdHIGx += (box->xmax - avgHIGx) * (box->xmax - avgHIGx); - sdHIGy += (box->ymax - avgHIGy) * (box->ymax - avgHIGy); - } - sdLOWx = sqrt(sdLOWx/notnull_cnt); - sdLOWy = sqrt(sdLOWy/notnull_cnt); - sdHIGx = sqrt(sdHIGx/notnull_cnt); - sdHIGy = sqrt(sdHIGy/notnull_cnt); - - POSTGIS_DEBUG(3, " standard deviations:"); - POSTGIS_DEBUGF(3, " LOWx - avg:%f sd:%f", avgLOWx, sdLOWx); - POSTGIS_DEBUGF(3, " LOWy - avg:%f sd:%f", avgLOWy, sdLOWy); - POSTGIS_DEBUGF(3, " HIGx - avg:%f sd:%f", avgHIGx, sdHIGx); - POSTGIS_DEBUGF(3, " HIGy - avg:%f sd:%f", avgHIGy, sdHIGy); - - histobox.xmin = Max((avgLOWx - SDFACTOR * sdLOWx), - sample_extent->xmin); - histobox.ymin = Max((avgLOWy - SDFACTOR * sdLOWy), - sample_extent->ymin); - histobox.xmax = Min((avgHIGx + SDFACTOR * sdHIGx), - sample_extent->xmax); - histobox.ymax = Min((avgHIGy + SDFACTOR * sdHIGy), - sample_extent->ymax); - - POSTGIS_DEBUGF(3, " sd_extent: xmin,ymin: %f,%f", - histobox.xmin, histobox.ymin); - POSTGIS_DEBUGF(3, " sd_extent: xmax,ymax: %f,%f", - histobox.xmin, histobox.ymax); - - /* - * Third scan: - * o skip hard deviants - * o compute new histogram box - */ - for (i=0; ixmin > histobox.xmax || - box->xmax < histobox.xmin || - box->ymin > histobox.ymax || - box->ymax < histobox.ymin ) - { - POSTGIS_DEBUGF(4, " feat %d is an hard deviant, skipped", i); - - sampleboxes[i] = NULL; - continue; - } - if ( ! newhistobox ) - { - newhistobox = palloc(sizeof(GBOX)); - memcpy(newhistobox, box, sizeof(GBOX)); - } - else - { - if ( box->xmin < newhistobox->xmin ) - newhistobox->xmin = box->xmin; - if ( box->ymin < newhistobox->ymin ) - newhistobox->ymin = box->ymin; - if ( box->xmax > newhistobox->xmax ) - newhistobox->xmax = box->xmax; - if ( box->ymax > newhistobox->ymax ) - newhistobox->ymax = box->ymax; - } - } - - /* - * Set histogram extent as the intersection between - * standard deviation based histogram extent - * and computed sample extent after removal of - * hard deviants (there might be no hard deviants). - */ - if ( histobox.xmin < newhistobox->xmin ) - histobox.xmin = newhistobox->xmin; - if ( histobox.ymin < newhistobox->ymin ) - histobox.ymin = newhistobox->ymin; - if ( histobox.xmax > newhistobox->xmax ) - histobox.xmax = newhistobox->xmax; - if ( histobox.ymax > newhistobox->ymax ) - histobox.ymax = newhistobox->ymax; - - - POSTGIS_DEBUGF(3, " histogram_extent: xmin,ymin: %f,%f", - histobox.xmin, histobox.ymin); - POSTGIS_DEBUGF(3, " histogram_extent: xmax,ymax: %f,%f", - histobox.xmax, histobox.ymax); - - - geow = histobox.xmax - histobox.xmin; - geoh = histobox.ymax - histobox.ymin; - - /* - * Compute histogram cols and rows based on aspect ratio - * of histogram extent - */ - if ( ! geow && ! geoh ) - { - cols = 1; - rows = 1; - histocells = 1; - } - else if ( ! geow ) - { - cols = 1; - rows = histocells; - } - else if ( ! geoh ) - { - cols = histocells; - rows = 1; - } - else - { - if ( geowanl_context); - geom_stats_size=sizeof(GEOM_STATS)+(histocells-1)*sizeof(float4); - geomstats = palloc(geom_stats_size); - MemoryContextSwitchTo(old_context); - - geomstats->avgFeatureArea = total_boxes_area/notnull_cnt; - geomstats->xmin = histobox.xmin; - geomstats->ymin = histobox.ymin; - geomstats->xmax = histobox.xmax; - geomstats->ymax = histobox.ymax; - geomstats->cols = cols; - geomstats->rows = rows; - - /* Initialize all values to 0 */ - for (i=0; ivalue[i] = 0; - - cell_width = geow/cols; - cell_height = geoh/rows; - cell_area = cell_width*cell_height; - - POSTGIS_DEBUGF(4, "cell_width: %f", cell_width); - POSTGIS_DEBUGF(4, "cell_height: %f", cell_height); - - - /* - * Fourth scan: - * o fill histogram values with the number of - * features' bbox overlaps: a feature's bvol - * can fully overlap (1) or partially overlap - * (fraction of 1) an histogram cell. - * - * o compute total cells occupation - * - */ - for (i=0; ixmax, box->ymax, - box->xmin, box->ymin); - - /* Find first overlapping column */ - x_idx_min = (box->xmin-geomstats->xmin) / geow * cols; - if (x_idx_min <0) x_idx_min = 0; - if (x_idx_min >= cols) x_idx_min = cols-1; - - /* Find first overlapping row */ - y_idx_min = (box->ymin-geomstats->ymin) / geoh * rows; - if (y_idx_min <0) y_idx_min = 0; - if (y_idx_min >= rows) y_idx_min = rows-1; - - /* Find last overlapping column */ - x_idx_max = (box->xmax-geomstats->xmin) / geow * cols; - if (x_idx_max <0) x_idx_max = 0; - if (x_idx_max >= cols ) x_idx_max = cols-1; - - /* Find last overlapping row */ - y_idx_max = (box->ymax-geomstats->ymin) / geoh * rows; - if (y_idx_max <0) y_idx_max = 0; - if (y_idx_max >= rows) y_idx_max = rows-1; - - POSTGIS_DEBUGF(4, " feat %d overlaps columns %d-%d, rows %d-%d", - i, x_idx_min, x_idx_max, y_idx_min, y_idx_max); - - /* - * the {x,y}_idx_{min,max} - * define the grid squares that the box intersects - */ - for (y=y_idx_min; y<=y_idx_max; y++) - { - for (x=x_idx_min; x<=x_idx_max; x++) - { - geomstats->value[x+y*cols] += 1; - numcells++; - } - } - - /* - * before adding to the total cells - * we could decide if we really - * want this feature to count - */ - total_boxes_cells += numcells; - - examinedsamples++; - } - - POSTGIS_DEBUGF(3, " examined_samples: %d/%d", examinedsamples, samplerows); - - if ( ! examinedsamples ) - { - elog(NOTICE, " no examined values, invalid stats"); - stats->stats_valid = false; - - POSTGIS_DEBUG(3, " no stats have been gathered"); - - return; - } - - /** TODO: what about null features (TODO) */ - geomstats->avgFeatureCells = (float4)total_boxes_cells/examinedsamples; - - POSTGIS_DEBUGF(3, " histo: total_boxes_cells: %d", total_boxes_cells); - POSTGIS_DEBUGF(3, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea); - POSTGIS_DEBUGF(3, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells); - - - /* - * Normalize histogram - * - * We divide each histogram cell value - * by the number of samples examined. - * - */ - for (i=0; ivalue[i] /= examinedsamples; - - { - int x, y; - for (x=0; xvalue[x+y*cols]); - } - } - } - - - /* - * Write the statistics data - */ - stats->stakind[0] = STATISTIC_KIND_GEOMETRY; - stats->staop[0] = InvalidOid; - stats->stanumbers[0] = (float4 *)geomstats; - stats->numnumbers[0] = geom_stats_size/sizeof(float4); - - stats->stanullfrac = (float4)null_cnt/samplerows; - stats->stawidth = total_width/notnull_cnt; - stats->stadistinct = -1.0; - - POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_GEOMETRY)", - stats->stakind[0]); - POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]); - POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]); - POSTGIS_DEBUGF(3, " out: null fraction: %d/%d=%g", null_cnt, samplerows, stats->stanullfrac); - POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth); - POSTGIS_DEBUG(3, " out: distinct values: all (no check done)"); - - stats->stats_valid = true; -} - -/** - * This function will be called when the ANALYZE command is run - * on a column of the "geometry" type. - * - * It will need to return a stats builder function reference - * and a "minimum" sample rows to feed it. - * If we want analisys to be completely skipped we can return - * FALSE and leave output vals untouched. - * - * What we know from this call is: - * - * o The pg_attribute row referring to the specific column. - * Could be used to get reltuples from pg_class (which - * might quite inexact though...) and use them to set an - * appropriate minimum number of sample rows to feed to - * the stats builder. The stats builder will also receive - * a more accurate "estimation" of the number or rows. - * - * o The pg_type row for the specific column. - * Could be used to set stat builder / sample rows - * based on domain type (when postgis will be implemented - * that way). - * - * Being this experimental we'll stick to a static stat_builder/sample_rows - * value for now. - * - */ -PG_FUNCTION_INFO_V1(geometry_analyze_2d); -Datum geometry_analyze_2d(PG_FUNCTION_ARGS) -{ - VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0); - Form_pg_attribute attr = stats->attr; - - POSTGIS_DEBUG(2, "geometry_analyze called"); - - /* If the attstattarget column is negative, use the default value */ - /* NB: it is okay to scribble on stats->attr since it's a copy */ - if (attr->attstattarget < 0) - attr->attstattarget = default_statistics_target; - - POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget); - - /* Setup the minimum rows and the algorithm function */ - stats->minrows = 300 * stats->attr->attstattarget; - stats->compute_stats = compute_geometry_stats; - - POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows); - - /* Indicate we are done successfully */ - PG_RETURN_BOOL(true); -} - -/** - * Return the estimated extent of the table - * looking at gathered statistics (or NULL if - * no statistics have been gathered). - */ -PG_FUNCTION_INFO_V1(geometry_estimated_extent); -Datum geometry_estimated_extent(PG_FUNCTION_ARGS) -{ - text *txnsp = NULL; - text *txtbl = NULL; - text *txcol = NULL; - char *nsp = NULL; - char *tbl = NULL; - char *col = NULL; - char *query; - ArrayType *array = NULL; - int SPIcode; - SPITupleTable *tuptable; - TupleDesc tupdesc ; - HeapTuple tuple ; - bool isnull; - GBOX *box; - size_t querysize; - GEOM_STATS geomstats; - float reltuples; - Datum binval; - - if ( PG_NARGS() == 3 ) - { - txnsp = PG_GETARG_TEXT_P(0); - txtbl = PG_GETARG_TEXT_P(1); - txcol = PG_GETARG_TEXT_P(2); - } - else if ( PG_NARGS() == 2 ) - { - txtbl = PG_GETARG_TEXT_P(0); - txcol = PG_GETARG_TEXT_P(1); - } - else - { - elog(ERROR, "estimated_extent() called with wrong number of arguments"); - PG_RETURN_NULL(); - } - - POSTGIS_DEBUG(2, "geomtery_estimated_extent called"); - - /* Connect to SPI manager */ - SPIcode = SPI_connect(); - if (SPIcode != SPI_OK_CONNECT) - { - elog(ERROR, "geometry_estimated_extent: couldnt open a connection to SPI"); - PG_RETURN_NULL() ; - } - - querysize = VARSIZE(txtbl)+VARSIZE(txcol)+516; - - if ( txnsp ) - { - nsp = text2cstring(txnsp); - querysize += VARSIZE(txnsp); - } - else - { - querysize += 32; /* current_schema() */ - } - - tbl = text2cstring(txtbl); - col = text2cstring(txcol); - -#if POSTGIS_DEBUG_LEVEL > 0 - if ( txnsp ) - { - POSTGIS_DEBUGF(3, " schema:%s table:%s column:%s", nsp, tbl, col); - } - else - { - POSTGIS_DEBUGF(3, " schema:current_schema() table:%s column:%s", - tbl, col); - } -#endif - - query = palloc(querysize); - - - /* Security check: because we access information in the pg_statistic table, we must run as the database - superuser (by marking the function as SECURITY DEFINER) and check permissions ourselves */ - if ( txnsp ) - { - sprintf(query, "SELECT has_table_privilege((SELECT usesysid FROM pg_user WHERE usename = session_user), '\"%s\".\"%s\"', 'select')", nsp, tbl); - } - else - { - sprintf(query, "SELECT has_table_privilege((SELECT usesysid FROM pg_user WHERE usename = session_user), '\"%s\"', 'select')", tbl); - } - - POSTGIS_DEBUGF(4, "permission check sql query is: %s", query); - - SPIcode = SPI_exec(query, 1); - if (SPIcode != SPI_OK_SELECT) - { - elog(ERROR, "geometry_estimated_extent: couldn't execute permission check sql via SPI"); - SPI_finish(); - PG_RETURN_NULL(); - } - - tuptable = SPI_tuptable; - tupdesc = SPI_tuptable->tupdesc; - tuple = tuptable->vals[0]; - - if (!DatumGetBool(SPI_getbinval(tuple, tupdesc, 1, &isnull))) - { - elog(ERROR, "geometry_estimated_extent: permission denied for relation %s", tbl); - SPI_finish(); - PG_RETURN_NULL(); - } - - - /* Return the stats data */ - if ( txnsp ) - { - sprintf(query, - "SELECT s.stanumbers1[5:8], c.reltuples FROM pg_class c" - " LEFT OUTER JOIN pg_namespace n ON (n.oid = c.relnamespace)" - " LEFT OUTER JOIN pg_attribute a ON (a.attrelid = c.oid )" - " LEFT OUTER JOIN pg_statistic s ON (s.starelid = c.oid AND " - "s.staattnum = a.attnum )" - " WHERE c.relname = '%s' AND a.attname = '%s' " - " AND n.nspname = '%s';", - tbl, col, nsp); - } - else - { - sprintf(query, - "SELECT s.stanumbers1[5:8], c.reltuples FROM pg_class c" - " LEFT OUTER JOIN pg_namespace n ON (n.oid = c.relnamespace)" - " LEFT OUTER JOIN pg_attribute a ON (a.attrelid = c.oid )" - " LEFT OUTER JOIN pg_statistic s ON (s.starelid = c.oid AND " - "s.staattnum = a.attnum )" - " WHERE c.relname = '%s' AND a.attname = '%s' " - " AND n.nspname = current_schema();", - tbl, col); - } - - POSTGIS_DEBUGF(4, " query: %s", query); - - SPIcode = SPI_exec(query, 1); - if (SPIcode != SPI_OK_SELECT ) - { - elog(ERROR,"geometry_estimated_extent: couldnt execute sql via SPI"); - SPI_finish(); - PG_RETURN_NULL(); - } - if (SPI_processed != 1) - { - - POSTGIS_DEBUGF(3, " %d stat rows", SPI_processed); - - elog(ERROR, "Unexistent field \"%s\".\"%s\".\"%s\"", - ( nsp ? nsp : "" ), tbl, col); - - SPI_finish(); - PG_RETURN_NULL() ; - } - - tuptable = SPI_tuptable; - tupdesc = SPI_tuptable->tupdesc; - tuple = tuptable->vals[0]; - - /* Check if the table has zero rows first */ - binval = SPI_getbinval(tuple, tupdesc, 2, &isnull); - if (isnull) - { - - POSTGIS_DEBUG(3, " reltuples is NULL"); - - elog(ERROR, "geometry_estimated_extent: null reltuples for table"); - - SPI_finish(); - PG_RETURN_NULL(); - } - reltuples = DatumGetFloat4(binval); - if ( ! reltuples ) - { - POSTGIS_DEBUG(3, "table has estimated zero rows"); - - /* - * TODO: distinguish between empty and not analyzed ? - */ - elog(NOTICE, "\"%s\".\"%s\".\"%s\" is empty or not analyzed", - ( nsp ? nsp : "" ), tbl, col); - - SPI_finish(); - PG_RETURN_NULL(); - } - - binval = SPI_getbinval(tuple, tupdesc, 1, &isnull); - if (isnull) - { - - POSTGIS_DEBUG(3, " stats are NULL"); - - elog(ERROR, "geometry_estimated_extent: null statistics for table"); - - SPI_finish(); - PG_RETURN_NULL(); - } - array = DatumGetArrayTypeP(binval); - if ( ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array)) != 4 ) - { - elog(ERROR, " corrupted histogram"); - PG_RETURN_NULL(); - } - - POSTGIS_DEBUGF(3, " stats array has %d elems", ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array))); - - /* - * Construct GBOX. - * Must allocate this in upper executor context - * to keep it alive after SPI_finish(). - */ - box = SPI_palloc(sizeof(GBOX)); - FLAGS_SET_GEODETIC(box->flags, 0); - FLAGS_SET_Z(box->flags, 0); - FLAGS_SET_M(box->flags, 0); - - /* Construct the box */ - memcpy(&(geomstats.xmin), ARR_DATA_PTR(array), sizeof(float)*4); - box->xmin = geomstats.xmin; - box->xmax = geomstats.xmax; - box->ymin = geomstats.ymin; - box->ymax = geomstats.ymax; - - POSTGIS_DEBUGF(3, " histogram extent = %g %g, %g %g", box->xmin, - box->ymin, box->xmax, box->ymax); - - SPIcode = SPI_finish(); - if (SPIcode != SPI_OK_FINISH ) - { - elog(ERROR, "geometry_estimated_extent: couldn't disconnect from SPI"); - } - - /* TODO: enlarge the box by some factor */ - - PG_RETURN_POINTER(box); -} diff --git a/postgis/gserialized_estimate.c b/postgis/gserialized_estimate.c index 7b8aa3c29..9dc5b62d0 100644 --- a/postgis/gserialized_estimate.c +++ b/postgis/gserialized_estimate.c @@ -91,6 +91,9 @@ Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS); Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS); Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS); +/* Old Prototype */ +Datum geometry_estimated_extent(PG_FUNCTION_ARGS); + /** * Assign a number to the n-dimensional statistics kind * @@ -2182,3 +2185,31 @@ Datum gserialized_estimated_extent(PG_FUNCTION_ARGS) PG_RETURN_POINTER(gbox); } +/** + * Return the estimated extent of the table + * looking at gathered statistics (or NULL if + * no statistics have been gathered). + */ + +PG_FUNCTION_INFO_V1(geometry_estimated_extent); +Datum geometry_estimated_extent(PG_FUNCTION_ARGS) +{ + if ( PG_NARGS() == 3 ) + { + DirectFunctionCall3(gserialized_estimated_extent, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2)); + } + else if ( PG_NARGS() == 2 ) + { + DirectFunctionCall2(gserialized_estimated_extent, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1)); + } + else + { + elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments"); + PG_RETURN_NULL(); + } +} diff --git a/postgis/postgis.sql.in b/postgis/postgis.sql.in index a8ff6d78f..3e487daba 100644 --- a/postgis/postgis.sql.in +++ b/postgis/postgis.sql.in @@ -427,18 +427,6 @@ CREATE OR REPLACE FUNCTION geometry_gist_decompress_2d(internal) AS 'MODULE_PATHNAME' ,'gserialized_gist_decompress_2d' LANGUAGE 'c'; --- Availability: 2.0.0 -CREATE OR REPLACE FUNCTION geometry_gist_sel_2d (internal, oid, internal, int4) - RETURNS float8 - AS 'MODULE_PATHNAME', 'geometry_gist_sel_2d' - LANGUAGE 'c'; - --- Availability: 2.0.0 -CREATE OR REPLACE FUNCTION geometry_gist_joinsel_2d(internal, oid, internal, smallint) - RETURNS float8 - AS 'MODULE_PATHNAME', 'geometry_gist_joinsel_2d' - LANGUAGE 'c'; - ----------------------------------------------------------------------------- -- 2.50.1