From: Darafei Praliaskouski Date: Thu, 18 Apr 2019 19:56:30 +0000 (+0000) Subject: ST_Union: allow to input more than 1GB of geometries. X-Git-Tag: 3.0.0alpha1~44 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=41e46ce7a242e16c9d1320d045d6f3d7dffdbf2c;p=postgis ST_Union: allow to input more than 1GB of geometries. Output still has to be below 1GB due to postgres tuple length limit. Closes #4340 Closes https://github.com/postgis/postgis/pull/394 git-svn-id: http://svn.osgeo.org/postgis/trunk@17398 b70326c6-7e19-0410-871a-916f4a2858ee --- diff --git a/NEWS b/NEWS index 5e8daa9d2..9ee99d3d5 100644 --- a/NEWS +++ b/NEWS @@ -89,6 +89,8 @@ PostGIS 3.0.0 - #4295, Allow GEOMETRYCOLLECTION in ST_Overlaps, ST_Contains, ST_ContainsProperly, ST_Covers, ST_CoveredBy, ST_Crosses, ST_Touches, ST_Disjoint, ST_Relate, ST_Equals (Esteban Zimányi) + - #4340, ST_Union aggregate now can handle more than 1 GB of geometries + (Darafei Praliaskouski) - #4378, Allow passing TINs as input to GEOS-backed functions (Darafei Praliaskouski) diff --git a/postgis/lwgeom_accum.c b/postgis/lwgeom_accum.c index ad28228f8..ddd5ed3f6 100644 --- a/postgis/lwgeom_accum.c +++ b/postgis/lwgeom_accum.c @@ -174,31 +174,6 @@ pgis_accum_finalfn(pgis_abs *p, MemoryContext mctx, __attribute__((__unused__)) return result; } -/** -* The "union" final function passes the geometry[] to a union -* conversion before returning the result. -*/ -PG_FUNCTION_INFO_V1(pgis_geometry_union_finalfn); -Datum -pgis_geometry_union_finalfn(PG_FUNCTION_ARGS) -{ - pgis_abs *p; - Datum result = 0; - Datum geometry_array = 0; - - if (PG_ARGISNULL(0)) - PG_RETURN_NULL(); /* returns null iff no input values */ - - p = (pgis_abs*) PG_GETARG_POINTER(0); - - geometry_array = pgis_accum_finalfn(p, CurrentMemoryContext, fcinfo); - result = PGISDirectFunctionCall1( pgis_union_geometry_array, geometry_array ); - if (!result) - PG_RETURN_NULL(); - - PG_RETURN_DATUM(result); -} - /** * The "collect" final function passes the geometry[] to a geometrycollection * conversion before returning the result. diff --git a/postgis/lwgeom_geos.c b/postgis/lwgeom_geos.c index e09bf7e70..3718fe92c 100644 --- a/postgis/lwgeom_geos.c +++ b/postgis/lwgeom_geos.c @@ -508,6 +508,161 @@ Datum pgis_union_geometry_array(PG_FUNCTION_ARGS) PG_RETURN_POINTER(gser_out); } +typedef struct UnionBuildState +{ + MemoryContext mcontext; /* where all the temp stuff is kept */ + GEOSGeometry **geoms; /* collected GEOS geometries*/ + int empty_type; + uint32_t alen; /* allocated length of above arrays */ + uint32_t ngeoms; /* number of valid entries in above arrays */ + int32_t srid; + bool is3d; +} UnionBuildState; + +PG_FUNCTION_INFO_V1(pgis_geometry_union_transfn); +Datum pgis_geometry_union_transfn(PG_FUNCTION_ARGS) +{ + MemoryContext aggcontext; + UnionBuildState *state; + GSERIALIZED *gser_in; + uint32_t curgeom; + GEOSGeometry *g; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of dummy-type argument */ + elog(ERROR, "%s called in non-aggregate context", __func__); + aggcontext = NULL; /* keep compiler quiet */ + } + + if (!PG_ARGISNULL(0)) + { + state = (UnionBuildState *)PG_GETARG_POINTER(0); + } + else + { + MemoryContext old = MemoryContextSwitchTo(aggcontext); + state = (UnionBuildState *)palloc(sizeof(UnionBuildState)); + + state->mcontext = aggcontext; + state->alen = 10; + state->ngeoms = 0; + state->geoms = palloc(sizeof(GEOSGeometry *) * state->alen); + state->is3d = false; + state->srid = 0; + state->empty_type = 0; + + initGEOS(lwpgnotice, lwgeom_geos_error); + + MemoryContextSwitchTo(old); + }; + + /* do we have geometry to push? */ + if (!PG_ARGISNULL(1)) + { + gser_in = PG_GETARG_GSERIALIZED_P(1); + + if (state->ngeoms > 0) + { + if (state->srid != gserialized_get_srid(gser_in)) + for (curgeom = 0; curgeom < state->ngeoms; curgeom++) + GEOSGeom_destroy(state->geoms[curgeom]); + error_if_srid_mismatch(state->srid, gserialized_get_srid(gser_in)); + } + + if (!gserialized_is_empty(gser_in)) + { + if (state->ngeoms == 0) + { + state->srid = gserialized_get_srid(gser_in); + state->is3d = gserialized_has_z(gser_in); + } + + g = POSTGIS2GEOS(gser_in); + + if (!g) + { + for (curgeom = 0; curgeom < state->ngeoms; curgeom++) + GEOSGeom_destroy(state->geoms[curgeom]); + HANDLE_GEOS_ERROR("One of the geometries in the set could not be converted to GEOS"); + } + + curgeom = state->ngeoms; + state->ngeoms++; + + if (state->ngeoms > state->alen) + { + state->alen *= 2; + state->geoms = repalloc(state->geoms, state->alen); + } + + state->geoms[curgeom] = g; + } + else + { + int gser_type = gserialized_get_type(gser_in); + if (gser_type > state->empty_type) + state->empty_type = gser_type; + } + } + + PG_RETURN_POINTER(state); +} + +PG_FUNCTION_INFO_V1(pgis_geometry_union_finalfn); +Datum pgis_geometry_union_finalfn(PG_FUNCTION_ARGS) +{ + UnionBuildState *state; + GSERIALIZED *gser_out = NULL; + GEOSGeometry *g = NULL; + GEOSGeometry *g_union = NULL; + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); /* returns null iff no input values */ + + state = (UnionBuildState *)PG_GETARG_POINTER(0); + + /* + ** Take our GEOS geometries and turn them into a GEOS collection, + ** then pass that into cascaded union. + */ + if (state->ngeoms > 0) + { + g = GEOSGeom_createCollection(GEOS_GEOMETRYCOLLECTION, state->geoms, state->ngeoms); + if (!g) + HANDLE_GEOS_ERROR("Could not create GEOS COLLECTION from geometry array"); + + g_union = GEOSUnaryUnion(g); + GEOSGeom_destroy(g); + if (!g_union) + HANDLE_GEOS_ERROR("GEOSUnaryUnion"); + + GEOSSetSRID(g_union, state->srid); + gser_out = GEOS2POSTGIS(g_union, state->is3d); + GEOSGeom_destroy(g_union); + } + /* No real geometries in our array, any empties? */ + else + { + /* If it was only empties, we'll return the largest type number */ + if (state->empty_type > 0) + PG_RETURN_POINTER( + geometry_serialize(lwgeom_construct_empty(state->empty_type, state->srid, state->is3d, 0))); + + /* Nothing but NULL, returns NULL */ + else + PG_RETURN_NULL(); + } + + if (!gser_out) + { + /* Union returned a NULL geometry */ + PG_RETURN_NULL(); + } + + PG_RETURN_POINTER(gser_out); +} + /** * @example ST_UnaryUnion {@link #geomunion} SELECT ST_UnaryUnion( * 'POLYGON((0 0, 10 0, 0 10, 10 10, 0 0))' diff --git a/postgis/postgis.sql.in b/postgis/postgis.sql.in index 586036999..2fe9ce60c 100644 --- a/postgis/postgis.sql.in +++ b/postgis/postgis.sql.in @@ -3820,6 +3820,12 @@ CREATE OR REPLACE FUNCTION pgis_geometry_accum_transfn(internal, geometry, float AS 'MODULE_PATHNAME' LANGUAGE 'c' _PARALLEL; +-- Availability: 3.0.0 +CREATE OR REPLACE FUNCTION pgis_geometry_union_transfn(internal, geometry) + RETURNS internal + AS 'MODULE_PATHNAME' + LANGUAGE 'c' _PARALLEL; + -- Availability: 1.4.0 -- Changed: 2.5.0 use 'internal' transfer type CREATE OR REPLACE FUNCTION pgis_geometry_union_finalfn(internal) @@ -3873,8 +3879,9 @@ CREATE OR REPLACE FUNCTION ST_Union (geometry[]) -- we don't want to force drop of this agg since its often used in views -- parallel handling dealt with in postgis_drop_after.sql -- Changed: 2.5.0 use 'internal' stype +-- Changed: 3.0.0 transfn now converts to GEOS CREATE AGGREGATE ST_Union (geometry) ( - sfunc = pgis_geometry_accum_transfn, + sfunc = pgis_geometry_union_transfn, stype = internal, #if POSTGIS_PGSQL_VERSION >= 96 parallel = safe,