From 92865ad6b0b3501a29e1575c9ef9ab89f23013a6 Mon Sep 17 00:00:00 2001 From: Sandro Santilli Date: Tue, 15 Dec 2015 11:19:14 +0000 Subject: [PATCH] Add table inheritance support in ST_EstimatedExtent Ticket: #3391 Patch By: Alessandro Pasotti git-svn-id: http://svn.osgeo.org/postgis/trunk@14490 b70326c6-7e19-0410-871a-916f4a2858ee --- doc/reference_misc.xml | 16 ++- postgis/gserialized_estimate.c | 64 +++++++++--- postgis/postgis.sql.in | 5 + regress/estimatedextent.sql | 161 +++++++++++++++++++++++++++++++ regress/estimatedextent_expected | 21 ++++ 5 files changed, 251 insertions(+), 16 deletions(-) diff --git a/doc/reference_misc.xml b/doc/reference_misc.xml index b7508430a..573b9863f 100644 --- a/doc/reference_misc.xml +++ b/doc/reference_misc.xml @@ -175,6 +175,15 @@ all_em|grabone | grab_rest + + box2d ST_EstimatedExtent + text schema_name + text table_name + text geocolumn_name + boolean parent_ony + + + box2d ST_EstimatedExtent text schema_name @@ -195,7 +204,12 @@ all_em|grabone | grab_rest Return the 'estimated' extent of the given spatial table. The estimated is taken from the geometry column's statistics. The - current schema will be used if not specified. + current schema will be used if not specified. The default behavior + is to also use statistics collected from children tables (tables + with INHERITS) if available. If 'parent_ony' is set to TRUE, only + statistics for the given table are used and children tables are + ignored. + For PostgreSQL>=8.0.0 statistics are gathered by VACUUM ANALYZE and resulting extent will be about 95% of the real diff --git a/postgis/gserialized_estimate.c b/postgis/gserialized_estimate.c index ef5ff1a3e..d580c5b8c 100644 --- a/postgis/gserialized_estimate.c +++ b/postgis/gserialized_estimate.c @@ -849,13 +849,27 @@ pg_nd_stats_from_tuple(HeapTuple stats_tuple, int mode) * by the selectivity functions and the debugging functions. */ static ND_STATS* -pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode) +pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode, bool only_parent) { - HeapTuple stats_tuple; + HeapTuple stats_tuple = NULL; ND_STATS *nd_stats; - /* First pull the stats tuple */ - stats_tuple = SearchSysCache2(STATRELATT, table_oid, att_num); + /* First pull the stats tuple for the whole tree */ + if ( ! only_parent ) + { + POSTGIS_DEBUGF(2, "searching whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL"); + stats_tuple = SearchSysCache3(STATRELATT, table_oid, att_num, TRUE); + if ( stats_tuple ) + POSTGIS_DEBUGF(2, "found whole tree stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL"); + } + /* Fall-back to main table stats only, if not found for whole tree or explicitly ignored */ + if ( only_parent || ! stats_tuple ) + { + POSTGIS_DEBUGF(2, "searching parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL"); + stats_tuple = SearchSysCache2(STATRELATT, table_oid, att_num); + if ( stats_tuple ) + POSTGIS_DEBUGF(2, "found parent table stats for \"%s\"", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL"); + } if ( ! stats_tuple ) { POSTGIS_DEBUGF(2, "stats for \"%s\" do not exist", get_rel_name(table_oid)? get_rel_name(table_oid) : "NULL"); @@ -878,9 +892,12 @@ pg_get_nd_stats(const Oid table_oid, AttrNumber att_num, int mode) * Pull the stats object from the PgSQL system catalogs. The * debugging functions are taking human input (table names) * and columns, so we have to look those up first. +* In case of parent tables whith INHERITS, when "only_parent" +* is TRUE this function only searchs for stats in the parent +* table ignoring any statistic collected from the children. */ static ND_STATS* -pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode) +pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode, bool only_parent) { const char *att_name = text2cstring(att_text); AttrNumber att_num; @@ -901,7 +918,7 @@ pg_get_nd_stats_by_name(const Oid table_oid, const text *att_text, int mode) return NULL; } - return pg_get_nd_stats(table_oid, att_num, mode); + return pg_get_nd_stats(table_oid, att_num, mode, only_parent); } /** @@ -1201,8 +1218,8 @@ Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS) get_rel_name(relid1) ? get_rel_name(relid1) : "NULL", relid1, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2); /* Pull the stats from the stats system. */ - stats1 = pg_get_nd_stats(relid1, var1->varattno, mode); - stats2 = pg_get_nd_stats(relid2, var2->varattno, mode); + stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, FALSE); + stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, FALSE); /* If we can't get stats, we have to stop here! */ if ( ! stats1 ) @@ -1944,13 +1961,18 @@ Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS) char *str; text *json; int mode = 2; /* default to 2D mode */ - + bool only_parent = FALSE; /* default to whole tree stats */ + /* Check if we've been asked to not use 2d mode */ if ( ! PG_ARGISNULL(2) ) mode = text_p_get_mode(PG_GETARG_TEXT_P(2)); + /* Check if we've been asked to only use stats from parent */ + if ( ! PG_ARGISNULL(3) ) + only_parent = PG_GETARG_BOOL(3); + /* Retrieve the stats object */ - nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode); + nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, only_parent); if ( ! nd_stats ) elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text2cstring(att_text)); @@ -1983,7 +2005,7 @@ Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS) mode = text_p_get_mode(PG_GETARG_TEXT_P(3)); /* Retrieve the stats object */ - nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode); + nd_stats = pg_get_nd_stats_by_name(table_oid, att_text, mode, FALSE); if ( ! nd_stats ) elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid), text2cstring(att_text)); @@ -2019,8 +2041,8 @@ Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS) /* Retrieve the stats object */ - nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode); - nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode); + nd_stats1 = pg_get_nd_stats_by_name(table_oid1, att_text1, mode, FALSE); + nd_stats2 = pg_get_nd_stats_by_name(table_oid2, att_text2, mode, FALSE); if ( ! nd_stats1 ) elog(ERROR, "stats for \"%s.%s\" do not exist", get_rel_name(table_oid1), text2cstring(att_text1)); @@ -2189,8 +2211,20 @@ Datum gserialized_estimated_extent(PG_FUNCTION_ARGS) Oid tbl_oid; ND_STATS *nd_stats; GBOX *gbox; + bool only_parent = FALSE; - if ( PG_NARGS() == 3 ) + if ( PG_NARGS() == 4 ) + { + nsp = text2cstring(PG_GETARG_TEXT_P(0)); + tbl = text2cstring(PG_GETARG_TEXT_P(1)); + col = PG_GETARG_TEXT_P(2); + only_parent = PG_GETARG_BOOL(3); + nsp_tbl = palloc(strlen(nsp) + strlen(tbl) + 6); + sprintf(nsp_tbl, "\"%s\".\"%s\"", nsp, tbl); + tbl_oid = DatumGetObjectId(DirectFunctionCall1(regclassin, CStringGetDatum(nsp_tbl))); + pfree(nsp_tbl); + } + else if ( PG_NARGS() == 3 ) { nsp = text2cstring(PG_GETARG_TEXT_P(0)); tbl = text2cstring(PG_GETARG_TEXT_P(1)); @@ -2216,7 +2250,7 @@ Datum gserialized_estimated_extent(PG_FUNCTION_ARGS) } /* Estimated extent only returns 2D bounds, so use mode 2 */ - nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2); + nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent); /* Error out on no stats */ if ( ! nd_stats ) diff --git a/postgis/postgis.sql.in b/postgis/postgis.sql.in index 82f6f2df1..58a7ef20e 100644 --- a/postgis/postgis.sql.in +++ b/postgis/postgis.sql.in @@ -961,6 +961,11 @@ CREATE OR REPLACE FUNCTION ST_MakeBox2d(geom1 geometry, geom2 geometry) -- ST_ESTIMATED_EXTENT( , , ) ----------------------------------------------------------------------- +-- Availability: 2.3.0 +CREATE OR REPLACE FUNCTION ST_EstimatedExtent(text,text,text,boolean) RETURNS box2d AS + 'MODULE_PATHNAME', 'gserialized_estimated_extent' + LANGUAGE 'c' IMMUTABLE STRICT SECURITY DEFINER; + -- Availability: 2.1.0 CREATE OR REPLACE FUNCTION ST_EstimatedExtent(text,text,text) RETURNS box2d AS 'MODULE_PATHNAME', 'gserialized_estimated_extent' diff --git a/regress/estimatedextent.sql b/regress/estimatedextent.sql index b33107882..3d09ab67b 100644 --- a/regress/estimatedextent.sql +++ b/regress/estimatedextent.sql @@ -26,4 +26,165 @@ select '#877.5', round(st_xmin(e.e)::numeric, 5), round(st_xmax(e.e)::numeric, 5 round(st_ymin(e.e)::numeric, 5), round(st_ymax(e.e)::numeric, 5) from e; drop table t; +-- #3391 +-- drop table if exists p cascade; + +create table p(g geometry); +create table c1() inherits (p); +create table c2() inherits (p); + +analyze c1; +analyze c2; +analyze p; + + +-- #3391.1 +with e as ( select ST_EstimatedExtent('c1','g') as e ) +select '#3391.1', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.2 +with e as ( select ST_EstimatedExtent('c2','g') as e ) +select '#3391.2', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.3 +with e as ( select ST_EstimatedExtent('p','g') as e ) +select '#3391.3', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + + +insert into c1 values ('Point(0 0)'::geometry); +insert into c1 values ('Point(1 1)'::geometry); + +analyze c1; +analyze c2; +analyze p; + +-- #3391.4 +with e as ( select ST_EstimatedExtent('c1','g') as e ) +select '#3391.4', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.5 +with e as ( select ST_EstimatedExtent('c2','g') as e ) +select '#3391.5', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.6 +with e as ( select ST_EstimatedExtent('p','g') as e ) +select '#3391.6', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + + +insert into c2 values ('Point(0 0)'::geometry); +insert into c2 values ('Point(-1 -1)'::geometry); + +analyze c1; +analyze c2; +analyze p; + +-- #3391.7 +with e as ( select ST_EstimatedExtent('c1','g') as e ) +select '#3391.7', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.8 +with e as ( select ST_EstimatedExtent('c2','g') as e ) +select '#3391.8', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.9 +with e as ( select ST_EstimatedExtent('p','g') as e ) +select '#3391.9', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + + +insert into p values ('Point(1 1)'::geometry); +insert into p values ('Point(2 2)'::geometry); + +analyze c1; +analyze c2; +analyze p; + +-- #3391.10 +with e as ( select ST_EstimatedExtent('c1','g') as e ) +select '#3391.10', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.11 +with e as ( select ST_EstimatedExtent('c2','g') as e ) +select '#3391.11', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.12 +with e as ( select ST_EstimatedExtent('p','g') as e ) +select '#3391.12', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- test calls with 3th parameter + +delete from p where 't'; +delete from c1 where 't'; +delete from c2 where 't'; + +delete from pg_statistic where starelid = 'p'::regclass; +delete from pg_statistic where starelid = 'c1'::regclass; +delete from pg_statistic where starelid = 'c2'::regclass; + +analyze c1; +analyze c2; +analyze p; + + +-- #3391.13 +with e as ( select ST_EstimatedExtent('public','p','g','t') as e ) +select '#3391.13', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.14 +with e as ( select ST_EstimatedExtent('public','p','g','f') as e ) +select '#3391.14', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.15 +with e as ( select ST_EstimatedExtent('public','c1','g', 't') as e ) +select '#3391.15', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.16 +with e as ( select ST_EstimatedExtent('public','c1','g', 'f') as e ) +select '#3391.16', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + + +insert into c1 values ('Point(0 0)'::geometry); +insert into c1 values ('Point(1 1)'::geometry); + +analyze c1; +analyze c2; +analyze p; + +-- #3391.17 +with e as ( select ST_EstimatedExtent('public','p','g','f') as e ) +select '#3391.17', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.18 +with e as ( select ST_EstimatedExtent('public','p','g','t') as e ) +select '#3391.18', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.19 +with e as ( select ST_EstimatedExtent('public','c1','g', 'f') as e ) +select '#3391.19', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + +-- #3391.20 +with e as ( select ST_EstimatedExtent('public','c1','g', 't') as e ) +select '#3391.20', round(st_xmin(e.e)::numeric, 2), round(st_xmax(e.e)::numeric, 2), +round(st_ymin(e.e)::numeric, 2), round(st_ymax(e.e)::numeric, 2) from e; + + +drop table p cascade; diff --git a/regress/estimatedextent_expected b/regress/estimatedextent_expected index bc65afc96..d3ae7c351 100644 --- a/regress/estimatedextent_expected +++ b/regress/estimatedextent_expected @@ -5,3 +5,24 @@ ERROR: stats for "t.g" do not exist ERROR: stats for "t.g" do not exist #877.4|-10.15000|20.15000|-50.40000|30.40000 #877.5|-10.15000|20.15000|-50.40000|30.40000 +ERROR: stats for "c1.g" do not exist +ERROR: stats for "c2.g" do not exist +ERROR: stats for "p.g" do not exist +#3391.4|0.00|1.00|0.00|1.00 +ERROR: stats for "c2.g" do not exist +#3391.6|0.00|1.00|0.00|1.00 +#3391.7|0.00|1.00|0.00|1.00 +#3391.8|-1.00|0.00|-1.00|0.00 +#3391.9|-1.01|1.01|-1.01|1.01 +#3391.10|0.00|1.00|0.00|1.00 +#3391.11|-1.00|0.00|-1.00|0.00 +#3391.12|-1.01|2.02|-1.01|2.02 +ERROR: stats for "p.g" do not exist +ERROR: stats for "p.g" do not exist +ERROR: stats for "c1.g" do not exist +ERROR: stats for "c1.g" do not exist +#3391.17|0.00|1.00|0.00|1.00 +ERROR: stats for "p.g" do not exist +#3391.19|0.00|1.00|0.00|1.00 +#3391.20|0.00|1.00|0.00|1.00 +NOTICE: drop cascades to 2 other objects -- 2.50.0