From: Daniel Baston Date: Sat, 5 Mar 2016 01:22:48 +0000 (+0000) Subject: #3364, ST_GeometricMedian X-Git-Tag: 2.3.0beta1~187 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=05afd9d9deb774470077b38d0e4dbf83b22c15b8;p=postgis #3364, ST_GeometricMedian git-svn-id: http://svn.osgeo.org/postgis/trunk@14746 b70326c6-7e19-0410-871a-916f4a2858ee --- diff --git a/NEWS b/NEWS index 661084b5a..ed9283ed9 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,7 @@ PostGIS 2.3.0 - #2991 Enable ST_Transform to use PROJ.4 text (Mike Toews) - #3339 ST_GeneratePoints (Paul Ramsey) - #3362 ST_ClusterDBSCAN (Dan Baston) + - #3364 ST_GeometricMedian (Dan Baston) - #3428 ST_Points (Dan Baston) - #3465 ST_ClusterKMeans (Paul Ramsey) - #3469 ST_MakeLine with MULTIPOINTs (Paul Norman) diff --git a/doc/html/image_src/Makefile.in b/doc/html/image_src/Makefile.in index 18ca3a0ac..4339f600a 100644 --- a/doc/html/image_src/Makefile.in +++ b/doc/html/image_src/Makefile.in @@ -69,6 +69,7 @@ IMAGES= \ ../images/st_extrude03.png \ ../images/st_generatepoints01.png \ ../images/st_generatepoints02.png \ + ../images/st_geometricmedian01.png \ ../images/st_issimple01.png \ ../images/st_issimple02.png \ ../images/st_issimple03.png \ diff --git a/doc/reference_measure.xml b/doc/reference_measure.xml index 65a32c824..b817a5dd0 100644 --- a/doc/reference_measure.xml +++ b/doc/reference_measure.xml @@ -2867,6 +2867,128 @@ SELECT ST_Equals(ST_Reverse(ST_GeomFromText('LINESTRING(0 0, 10 10)')), + + + + ST_GeometricMedian + + + + Returns the geometric median of a MultiPoint. + + + + + + + geometry + + ST_GeometricMedian + + + + + + geometry + + + g + + + + + + float8 + + + tolerance + + + + + + int + + + max_iter + + + + + + boolean + + + fail_if_not_converged + + + + + + + + + Description + + + Computes the approximate geometric median of a MultiPoint geometry + using the Weiszfeld algorithm. The geometric median provides a + centrality measure that is less sensitive to outlier points than + the centroid. + + The algorithm will iterate until the distance change between + successive iterations is less than the supplied tolerance + parameter. If this condition has not been met after max_iterations + iterations, the function will produce an error and exit, unless fail_if_not_converged + is set to false. + + If a tolerance value is not provided, a default tolerance value + will be calculated based on the extent of the input geometry. + + + Availability: 2.3.0 + &Z_support; + + + Examples + + + + + + + + + + Comparison of the centroid (turquoise point) and geometric + median (red point) of a four-point MultiPoint (yellow points). + + + + + + +WITH test AS ( +SELECT 'MULTIPOINT((0 0), (1 1), (2 2), (200 200))'::geometry geom) +SELECT + ST_AsText(ST_Centroid(geom)) centroid, + ST_AsText(ST_GeometricMedian(geom)) median +FROM test; + centroid | median +--------------------+---------------------------------------- + POINT(50.75 50.75) | POINT(1.9761550281255 1.9761550281255) +(1 row) + + + + + See Also + + + + + + ST_HasArc diff --git a/liblwgeom/Makefile.in b/liblwgeom/Makefile.in index 5a625fa86..1c76b53dc 100644 --- a/liblwgeom/Makefile.in +++ b/liblwgeom/Makefile.in @@ -79,6 +79,7 @@ SA_OBJS = \ lwin_wkb.o \ lwin_twkb.o \ lwiterator.o \ + lwgeom_median.o \ lwout_wkt.o \ lwout_twkb.o \ lwin_wkt_parse.o \ diff --git a/liblwgeom/cunit/cu_algorithm.c b/liblwgeom/cunit/cu_algorithm.c index 0a5a5d7a0..da44104c1 100644 --- a/liblwgeom/cunit/cu_algorithm.c +++ b/liblwgeom/cunit/cu_algorithm.c @@ -983,6 +983,85 @@ static void test_lwgeom_simplify(void) } +static void do_median_dims_check(char* wkt, int expected_dims) +{ + LWGEOM* g = lwgeom_from_wkt(wkt, LW_PARSER_CHECK_NONE); + LWPOINT* result = lwgeom_median(g, 1e-8, 100, LW_FALSE); + + CU_ASSERT_EQUAL(expected_dims, lwgeom_ndims((LWGEOM*) result)); + + lwgeom_free(g); + lwpoint_free(result); +} + +static void test_median_handles_3d_correctly(void) +{ + do_median_dims_check("MULTIPOINT ((1 3), (4 7), (2 9), (0 4), (2 2))", 2); + do_median_dims_check("MULTIPOINT Z ((1 3 4), (4 7 8), (2 9 1), (0 4 4), (2 2 3))", 3); + do_median_dims_check("MULTIPOINT M ((1 3 4), (4 7 8), (2 9 1), (0 4 4), (2 2 3))", 2); + do_median_dims_check("MULTIPOINT ZM ((1 3 4 5), (4 7 8 6), (2 9 1 7), (0 4 4 8), (2 2 3 9))", 3); +} + +static void do_median_test(char* input, char* expected) +{ + LWGEOM* g = lwgeom_from_wkt(input, LW_PARSER_CHECK_NONE); + LWPOINT* expected_result = lwgeom_as_lwpoint(lwgeom_from_wkt(expected, LW_PARSER_CHECK_NONE)); + POINT3DZ actual_pt; + POINT3DZ expected_pt; + + LWPOINT* result = lwgeom_median(g, FP_TOLERANCE / 10.0, 1000, LW_TRUE); + int passed = LW_TRUE; + + lwpoint_getPoint3dz_p(result, &actual_pt); + lwpoint_getPoint3dz_p(expected_result, &expected_pt); + + passed = passed && lwgeom_is_empty((LWGEOM*) expected_result) == lwgeom_is_empty((LWGEOM*) result); + passed = passed && (lwgeom_has_z((LWGEOM*) expected_result) == lwgeom_has_z((LWGEOM*) result)); + + if (!lwgeom_is_empty((LWGEOM*) result)) + { + passed = passed && FP_EQUALS(actual_pt.x, expected_pt.x); + passed = passed && FP_EQUALS(actual_pt.y, expected_pt.y); + passed = passed && (!lwgeom_has_z((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.z, expected_pt.z)); + } + + if (!passed) + printf("median_test expected %s got %s\n", lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result)); + + CU_ASSERT_TRUE(passed); + + lwgeom_free(g); + lwpoint_free(expected_result); + lwpoint_free(result); +} + +static void test_median_robustness(void) +{ + /* A simple implementation of Weiszfeld's algorithm will fail if the median is equal + * to any one of the inputs, during any iteration of the algorithm. + * + * Because the algorithm uses the centroid as a starting point, this situation will + * occur in the test case below. + */ + do_median_test("MULTIPOINT ((0 -1), (0 0), (0 1))", "POINT (0 0)"); + + /* Same as above but 3D, and shifter */ + do_median_test("MULTIPOINT ((1 -1 3), (1 0 2), (2 1 1))", "POINT (1 0 2)"); + + /* Starting point is duplicated */ + do_median_test("MULTIPOINT ((0 -1), (0 0), (0 0), (0 1))", "POINT (0 0)"); + + /* Cube */ + do_median_test("MULTIPOINT ((10 10 10), (10 20 10), (20 10 10), (20 20 10), (10 10 20), (10 20 20), (20 10 20), (20 20 20))", + "POINT (15 15 15)"); + + /* Some edge cases */ + do_median_test("MULTIPOINT EMPTY", "POINT EMPTY"); + do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY"); + do_median_test("POINT (7 6)", "POINT (7 6)"); + do_median_test("POINT (7 6 2)", "POINT (7 6 2)"); + do_median_test("MULTIPOINT ((7 6 2), EMPTY)", "POINT (7 6 2)"); +} static void test_point_density(void) { @@ -1091,4 +1170,6 @@ void algorithms_suite_setup(void) PG_ADD_TEST(suite,test_lw_arc_center); PG_ADD_TEST(suite,test_point_density); PG_ADD_TEST(suite,test_kmeans); + PG_ADD_TEST(suite,test_median_handles_3d_correctly); + PG_ADD_TEST(suite,test_median_robustness); } diff --git a/liblwgeom/liblwgeom.h.in b/liblwgeom/liblwgeom.h.in index c67818609..79b1ca4fa 100644 --- a/liblwgeom/liblwgeom.h.in +++ b/liblwgeom/liblwgeom.h.in @@ -1448,6 +1448,12 @@ extern LWMPOINT *lwpoly_to_points(const LWPOLY *poly, int npoints); extern LWMPOINT *lwmpoly_to_points(const LWMPOLY *mpoly, int npoints); extern LWMPOINT *lwgeom_to_points(const LWGEOM *lwgeom, int npoints); +/* + * Geometric median + */ +extern LWPOINT* lwgeom_median(const LWGEOM *g, double tol, uint32_t maxiter, char fail_if_not_converged); +extern LWPOINT* lwmpoint_median(const LWMPOINT *g, double tol, uint32_t maxiter, char fail_if_not_converged); + /** * Calculate the GeoHash (http://geohash.org) string for a geometry. Caller must free. */ diff --git a/postgis/lwgeom_functions_analytic.c b/postgis/lwgeom_functions_analytic.c index 10f7574c6..346ef4cec 100644 --- a/postgis/lwgeom_functions_analytic.c +++ b/postgis/lwgeom_functions_analytic.c @@ -53,6 +53,7 @@ Datum LWGEOM_simplify2d(PG_FUNCTION_ARGS); Datum LWGEOM_SetEffectiveArea(PG_FUNCTION_ARGS); Datum ST_LineCrossingDirection(PG_FUNCTION_ARGS); Datum ST_MinimumBoundingRadius(PG_FUNCTION_ARGS); +Datum ST_GeometricMedian(PG_FUNCTION_ARGS); static double determineSide(const POINT2D *seg1, const POINT2D *seg2, const POINT2D *point); @@ -1141,3 +1142,91 @@ Datum ST_MinimumBoundingRadius(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } +/********************************************************************** + * + * ST_GeometricMedian + * + **********************************************************************/ + +PG_FUNCTION_INFO_V1(ST_GeometricMedian); +Datum ST_GeometricMedian(PG_FUNCTION_ARGS) +{ + GSERIALIZED* geom; + GSERIALIZED* result; + LWGEOM* input; + LWPOINT* lwresult; + double tolerance; + bool compute_tolerance_from_box; + bool fail_if_not_converged; + int max_iter; + + /* Read and validate our input arguments */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + compute_tolerance_from_box = PG_ARGISNULL(1); + + if (!compute_tolerance_from_box) + { + tolerance = PG_GETARG_FLOAT8(1); + if (tolerance < 0) + { + lwpgerror("Tolerance must be positive."); + PG_RETURN_NULL(); + } + } + + max_iter = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2); + fail_if_not_converged = PG_ARGISNULL(3) ? LW_FALSE : PG_GETARG_BOOL(3); + + if (max_iter < 0) + { + lwpgerror("Maximum iterations must be positive."); + PG_RETURN_NULL(); + } + + /* OK, inputs are valid. */ + geom = PG_GETARG_GSERIALIZED_P(0); + input = lwgeom_from_gserialized(geom); + + if (compute_tolerance_from_box) + { + /* Compute a default tolerance based on the smallest dimension + * of the geometry's bounding box. + */ + static const double min_default_tolerance = 1e-8; + static const double tolerance_coefficient = 1e-6; + const GBOX* box = lwgeom_get_bbox(input); + + if (!box) + { + tolerance = min_default_tolerance; + } + else + { + double min_dim = FP_MIN(box->xmax - box->xmin, box->ymax - box->ymin); + if (lwgeom_has_z(input)) + min_dim = FP_MIN(min_dim, box->zmax - box->zmin); + + /* Apply a lower bound to the computed default tolerance to + * avoid a tolerance of zero in the case of collinear + * points. + */ + tolerance = FP_MAX(min_default_tolerance, tolerance_coefficient * min_dim); + } + } + + lwresult = lwgeom_median(input, tolerance, max_iter, fail_if_not_converged); + lwgeom_free(input); + + if(!lwresult) + { + lwpgerror("Error computing geometric median."); + PG_RETURN_NULL(); + } + + result = geometry_serialize(lwpoint_as_lwgeom(lwresult)); + + PG_RETURN_POINTER(result); +} + diff --git a/postgis/postgis.sql.in b/postgis/postgis.sql.in index 8af3ad9dd..a01d0bc09 100644 --- a/postgis/postgis.sql.in +++ b/postgis/postgis.sql.in @@ -4007,6 +4007,13 @@ CREATE OR REPLACE FUNCTION ST_Centroid(geometry) AS 'MODULE_PATHNAME', 'centroid' LANGUAGE 'c' IMMUTABLE STRICT; + +-- Availability: 2.3.0 +CREATE OR REPLACE FUNCTION ST_GeometricMedian(g geometry, tolerance float8 DEFAULT NULL, max_iter int DEFAULT 10000, fail_if_not_converged boolean DEFAULT false) + RETURNS geometry + AS 'MODULE_PATHNAME', 'ST_GeometricMedian' + LANGUAGE 'c' IMMUTABLE; + -- PostGIS equivalent function: IsRing(geometry) CREATE OR REPLACE FUNCTION ST_IsRing(geometry) RETURNS boolean diff --git a/regress/Makefile.in b/regress/Makefile.in index 71304ffef..0fb564bbc 100644 --- a/regress/Makefile.in +++ b/regress/Makefile.in @@ -87,6 +87,7 @@ TESTS = \ estimatedextent \ forcecurve \ geography \ + geometric_median \ in_geohash \ in_gml \ in_kml \