]> granicus.if.org Git - postgis/commitdiff
#3364, ST_GeometricMedian
authorDaniel Baston <dbaston@gmail.com>
Sat, 5 Mar 2016 01:22:48 +0000 (01:22 +0000)
committerDaniel Baston <dbaston@gmail.com>
Sat, 5 Mar 2016 01:22:48 +0000 (01:22 +0000)
git-svn-id: http://svn.osgeo.org/postgis/trunk@14746 b70326c6-7e19-0410-871a-916f4a2858ee

NEWS
doc/html/image_src/Makefile.in
doc/reference_measure.xml
liblwgeom/Makefile.in
liblwgeom/cunit/cu_algorithm.c
liblwgeom/liblwgeom.h.in
postgis/lwgeom_functions_analytic.c
postgis/postgis.sql.in
regress/Makefile.in

diff --git a/NEWS b/NEWS
index 661084b5a10f75929e6ae9afefe98ee5b694afdb..ed9283ed95d676e5142a2f7e6a2a3b8e1c58ba5c 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,7 @@ PostGIS 2.3.0
   - #2991 Enable ST_Transform to use PROJ.4 text (Mike Toews)
   - #3339 ST_GeneratePoints (Paul Ramsey)
   - #3362 ST_ClusterDBSCAN (Dan Baston) 
+  - #3364 ST_GeometricMedian (Dan Baston)
   - #3428 ST_Points (Dan Baston)
   - #3465 ST_ClusterKMeans (Paul Ramsey)
   - #3469 ST_MakeLine with MULTIPOINTs (Paul Norman)
index 18ca3a0ac2cae0cb43824f8883b484af16d5b016..4339f600a2dca2d8ebc8d46f4c8c5d90fe4f3aca 100644 (file)
@@ -69,6 +69,7 @@ IMAGES= \
        ../images/st_extrude03.png \
        ../images/st_generatepoints01.png \
        ../images/st_generatepoints02.png \
+       ../images/st_geometricmedian01.png \
        ../images/st_issimple01.png \
        ../images/st_issimple02.png \
        ../images/st_issimple03.png \
index 65a32c824bfae84c44d13ff35fdacdbced63d97a..b817a5dd06ad495cbcde098610cad972f1983a68 100644 (file)
@@ -2867,6 +2867,128 @@ SELECT ST_Equals(ST_Reverse(ST_GeomFromText('LINESTRING(0 0, 10 10)')),
 
        </refentry>
 
+       <refentry id="ST_GeometricMedian">
+         <refnamediv>
+                 <refname>
+                         ST_GeometricMedian
+                 </refname>
+
+               <refpurpose>
+                       Returns the geometric median of a MultiPoint.
+               </refpurpose>
+         </refnamediv>
+
+       <refsynopsisdiv>
+         <funcsynopsis>
+               <funcprototype>
+                       <funcdef>geometry
+                               <function>
+                                       ST_GeometricMedian
+                               </function>
+                       </funcdef>
+
+                       <paramdef>
+                               <type>
+                                       geometry
+                               </type>
+                               <parameter>
+                                       g
+                               </parameter>
+                       </paramdef>
+
+                       <paramdef>
+                               <type>
+                                       float8
+                               </type>
+                               <parameter>
+                                       tolerance
+                               </parameter>
+                       </paramdef>
+
+                       <paramdef>
+                               <type>
+                                       int
+                               </type>
+                               <parameter>
+                                       max_iter
+                               </parameter>
+                       </paramdef>
+
+                       <paramdef>
+                               <type>
+                                       boolean
+                               </type>
+                               <parameter>
+                                       fail_if_not_converged
+                               </parameter>
+                       </paramdef>
+
+               </funcprototype>
+         </funcsynopsis>
+       </refsynopsisdiv>
+
+       <refsection>
+         <title>Description</title>
+
+         <para>
+                 Computes the approximate geometric median of a MultiPoint geometry
+                 using the Weiszfeld algorithm.  The geometric median provides a 
+                 centrality measure that is less sensitive to outlier points than 
+                 the centroid.
+               
+                 The algorithm will iterate until the distance change between 
+                 successive iterations is less than the supplied <varname>tolerance</varname>
+                 parameter.  If this condition has not been met after <varname>max_iterations</varname>
+                 iterations, the function will produce an error and exit, unless <varname>fail_if_not_converged</varname>
+                 is set to false.
+
+                 If a tolerance value is not provided, a default tolerance value
+                 will be calculated based on the extent of the input geometry.
+         </para>
+
+         <para>Availability: 2.3.0</para>
+         <para>&Z_support;</para>
+  </refsection>
+    <refsection>
+      <title>Examples</title>
+         <para>
+                 <informalfigure>
+                         <mediaobject>
+                                       <imageobject>
+                                               <imagedata fileref="images/st_geometricmedian01.png" />
+                                       </imageobject>
+
+                                       <caption>
+                                               <para>
+                                               Comparison of the centroid (turquoise point) and geometric
+                                               median (red point) of a four-point MultiPoint (yellow points).
+                                               </para>
+                                       </caption>
+                         </mediaobject>
+               </informalfigure>
+         </para>
+         <programlisting>
+WITH test AS (
+SELECT 'MULTIPOINT((0 0), (1 1), (2 2), (200 200))'::geometry geom)
+SELECT
+  ST_AsText(ST_Centroid(geom)) centroid, 
+  ST_AsText(ST_GeometricMedian(geom)) median
+FROM test;
+      centroid      |                 median                 
+--------------------+----------------------------------------
+ POINT(50.75 50.75) | POINT(1.9761550281255 1.9761550281255)
+(1 row)
+         </programlisting>
+       </refsection>
+       
+       <refsection>
+         <title>See Also</title>
+
+         <para><xref linkend="ST_Centroid"/></para>
+       </refsection>
+
+       </refentry>
+
        <refentry id="ST_HasArc">
          <refnamediv>
                <refname>ST_HasArc</refname>
index 5a625fa86ba7ba062683562ac80bd977cef35bae..1c76b53dcfafc578cc64d9dcf4500f4cf3b72650 100644 (file)
@@ -79,6 +79,7 @@ SA_OBJS = \
        lwin_wkb.o \
        lwin_twkb.o \
        lwiterator.o \
+       lwgeom_median.o \
        lwout_wkt.o \
        lwout_twkb.o \
        lwin_wkt_parse.o \
index 0a5a5d7a08f2885e6a3efa3e54e844cbfa5836a9..da44104c145b42fd9ae9330d8bb43e5f6ebd8846 100644 (file)
@@ -983,6 +983,85 @@ static void test_lwgeom_simplify(void)
 }
 
 
+static void do_median_dims_check(char* wkt, int expected_dims)
+{
+       LWGEOM* g = lwgeom_from_wkt(wkt, LW_PARSER_CHECK_NONE);
+       LWPOINT* result = lwgeom_median(g, 1e-8, 100, LW_FALSE);
+
+       CU_ASSERT_EQUAL(expected_dims, lwgeom_ndims((LWGEOM*) result));
+
+       lwgeom_free(g);
+       lwpoint_free(result);
+}
+
+static void test_median_handles_3d_correctly(void)
+{
+       do_median_dims_check("MULTIPOINT ((1 3), (4 7), (2 9), (0 4), (2 2))", 2);
+       do_median_dims_check("MULTIPOINT Z ((1 3 4), (4 7 8), (2 9 1), (0 4 4), (2 2 3))", 3);
+       do_median_dims_check("MULTIPOINT M ((1 3 4), (4 7 8), (2 9 1), (0 4 4), (2 2 3))", 2);
+       do_median_dims_check("MULTIPOINT ZM ((1 3 4 5), (4 7 8 6), (2 9 1 7), (0 4 4 8), (2 2 3 9))", 3);
+}
+
+static void do_median_test(char* input, char* expected)
+{
+       LWGEOM* g = lwgeom_from_wkt(input, LW_PARSER_CHECK_NONE);
+       LWPOINT* expected_result = lwgeom_as_lwpoint(lwgeom_from_wkt(expected, LW_PARSER_CHECK_NONE));
+       POINT3DZ actual_pt;
+       POINT3DZ expected_pt;
+
+       LWPOINT* result = lwgeom_median(g, FP_TOLERANCE / 10.0, 1000, LW_TRUE);
+       int passed = LW_TRUE;
+       
+       lwpoint_getPoint3dz_p(result, &actual_pt);
+       lwpoint_getPoint3dz_p(expected_result, &expected_pt);
+
+       passed = passed && lwgeom_is_empty((LWGEOM*) expected_result) == lwgeom_is_empty((LWGEOM*) result);
+       passed = passed && (lwgeom_has_z((LWGEOM*) expected_result) == lwgeom_has_z((LWGEOM*) result));
+
+       if (!lwgeom_is_empty((LWGEOM*) result))
+       {
+               passed = passed && FP_EQUALS(actual_pt.x, expected_pt.x);
+               passed = passed && FP_EQUALS(actual_pt.y, expected_pt.y);
+               passed = passed && (!lwgeom_has_z((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.z, expected_pt.z));
+       }
+
+       if (!passed)
+               printf("median_test expected %s got %s\n", lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result));
+
+       CU_ASSERT_TRUE(passed);
+
+       lwgeom_free(g);
+       lwpoint_free(expected_result);
+       lwpoint_free(result);
+}
+
+static void test_median_robustness(void)
+{
+       /* A simple implementation of Weiszfeld's algorithm will fail if the median is equal
+        * to any one of the inputs, during any iteration of the algorithm.
+        *
+        * Because the algorithm uses the centroid as a starting point, this situation will
+        * occur in the test case below.
+        */
+       do_median_test("MULTIPOINT ((0 -1), (0 0), (0 1))", "POINT (0 0)");
+
+       /* Same as above but 3D, and shifter */
+       do_median_test("MULTIPOINT ((1 -1 3), (1 0 2), (2 1 1))", "POINT (1 0 2)");
+
+       /* Starting point is duplicated */
+       do_median_test("MULTIPOINT ((0 -1), (0 0), (0 0), (0 1))", "POINT (0 0)");
+
+       /* Cube */
+       do_median_test("MULTIPOINT ((10 10 10), (10 20 10), (20 10 10), (20 20 10), (10 10 20), (10 20 20), (20 10 20), (20 20 20))",
+                                  "POINT (15 15 15)");
+
+       /* Some edge cases */
+       do_median_test("MULTIPOINT EMPTY", "POINT EMPTY");
+       do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY");
+       do_median_test("POINT (7 6)", "POINT (7 6)");
+       do_median_test("POINT (7 6 2)", "POINT (7 6 2)");
+       do_median_test("MULTIPOINT ((7 6 2), EMPTY)", "POINT (7 6 2)");
+}
 
 static void test_point_density(void)
 {
@@ -1091,4 +1170,6 @@ void algorithms_suite_setup(void)
        PG_ADD_TEST(suite,test_lw_arc_center);
        PG_ADD_TEST(suite,test_point_density);
        PG_ADD_TEST(suite,test_kmeans);
+       PG_ADD_TEST(suite,test_median_handles_3d_correctly);
+       PG_ADD_TEST(suite,test_median_robustness);
 }
index c67818609729ae216018c0e8f37713764433c111..79b1ca4fab4a0581b2a8143e41bd77de78d2e0c3 100644 (file)
@@ -1448,6 +1448,12 @@ extern LWMPOINT *lwpoly_to_points(const LWPOLY *poly, int npoints);
 extern LWMPOINT *lwmpoly_to_points(const LWMPOLY *mpoly, int npoints);
 extern LWMPOINT *lwgeom_to_points(const LWGEOM *lwgeom, int npoints);
 
+/*
+ * Geometric median
+ */
+extern LWPOINT* lwgeom_median(const LWGEOM *g, double tol, uint32_t maxiter, char fail_if_not_converged);
+extern LWPOINT* lwmpoint_median(const LWMPOINT *g, double tol, uint32_t maxiter, char fail_if_not_converged);
+
 /**
 * Calculate the GeoHash (http://geohash.org) string for a geometry. Caller must free.
 */
index 10f7574c67a7c7085bb8c7afed742f586b70c924..346ef4cece7647d0473455df66357cf85bfceb6f 100644 (file)
@@ -53,6 +53,7 @@ Datum LWGEOM_simplify2d(PG_FUNCTION_ARGS);
 Datum LWGEOM_SetEffectiveArea(PG_FUNCTION_ARGS);
 Datum ST_LineCrossingDirection(PG_FUNCTION_ARGS);
 Datum ST_MinimumBoundingRadius(PG_FUNCTION_ARGS);
+Datum ST_GeometricMedian(PG_FUNCTION_ARGS);
 
 
 static double determineSide(const POINT2D *seg1, const POINT2D *seg2, const POINT2D *point);
@@ -1141,3 +1142,91 @@ Datum ST_MinimumBoundingRadius(PG_FUNCTION_ARGS)
        PG_RETURN_DATUM(result);
 }
 
+/**********************************************************************
+ *
+ * ST_GeometricMedian
+ *
+ **********************************************************************/
+
+PG_FUNCTION_INFO_V1(ST_GeometricMedian);
+Datum ST_GeometricMedian(PG_FUNCTION_ARGS)
+{
+       GSERIALIZED* geom;
+       GSERIALIZED* result;
+       LWGEOM* input;
+       LWPOINT* lwresult;
+       double tolerance;
+       bool compute_tolerance_from_box;
+       bool fail_if_not_converged;
+       int max_iter;
+
+       /* Read and validate our input arguments */
+       if (PG_ARGISNULL(0))
+               PG_RETURN_NULL();
+
+       compute_tolerance_from_box = PG_ARGISNULL(1);
+
+       if (!compute_tolerance_from_box)
+       {
+               tolerance = PG_GETARG_FLOAT8(1);
+               if (tolerance < 0)
+               {
+                       lwpgerror("Tolerance must be positive.");
+                       PG_RETURN_NULL();
+               }
+       }
+
+       max_iter = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
+       fail_if_not_converged = PG_ARGISNULL(3) ? LW_FALSE : PG_GETARG_BOOL(3);
+
+       if (max_iter < 0)
+       {
+               lwpgerror("Maximum iterations must be positive.");
+               PG_RETURN_NULL();
+       }
+
+       /* OK, inputs are valid. */
+       geom = PG_GETARG_GSERIALIZED_P(0);
+       input = lwgeom_from_gserialized(geom);
+
+       if (compute_tolerance_from_box)
+       {
+               /* Compute a default tolerance based on the smallest dimension
+                * of the geometry's bounding box.
+                */
+               static const double min_default_tolerance = 1e-8;
+               static const double tolerance_coefficient = 1e-6;
+               const GBOX* box = lwgeom_get_bbox(input);
+
+               if (!box)
+               {
+                       tolerance = min_default_tolerance;
+               }
+               else
+               {
+                       double min_dim = FP_MIN(box->xmax - box->xmin, box->ymax - box->ymin);
+                       if (lwgeom_has_z(input))
+                               min_dim = FP_MIN(min_dim, box->zmax - box->zmin);
+
+                       /* Apply a lower bound to the computed default tolerance to
+                        * avoid a tolerance of zero in the case of collinear 
+                        * points.
+                        */
+                       tolerance = FP_MAX(min_default_tolerance, tolerance_coefficient * min_dim);
+               }
+       }
+
+       lwresult = lwgeom_median(input, tolerance, max_iter, fail_if_not_converged);
+       lwgeom_free(input);
+
+       if(!lwresult)
+       {
+               lwpgerror("Error computing geometric median.");
+               PG_RETURN_NULL();
+       }
+
+       result = geometry_serialize(lwpoint_as_lwgeom(lwresult));
+       
+       PG_RETURN_POINTER(result);
+}
+
index 8af3ad9ddf1726f5320eeba7663e7babadbc5bb6..a01d0bc09795d9bdae01d139138de221ded23315 100644 (file)
@@ -4007,6 +4007,13 @@ CREATE OR REPLACE FUNCTION ST_Centroid(geometry)
        AS 'MODULE_PATHNAME', 'centroid'
        LANGUAGE 'c' IMMUTABLE STRICT;
        
+
+-- Availability: 2.3.0
+CREATE OR REPLACE FUNCTION ST_GeometricMedian(g geometry, tolerance float8 DEFAULT NULL, max_iter int DEFAULT 10000, fail_if_not_converged boolean DEFAULT false)
+       RETURNS geometry
+       AS 'MODULE_PATHNAME', 'ST_GeometricMedian'
+       LANGUAGE 'c' IMMUTABLE;
+
 -- PostGIS equivalent function: IsRing(geometry)
 CREATE OR REPLACE FUNCTION ST_IsRing(geometry)
        RETURNS boolean
index 71304ffef8de6f806b5a76d250b26a1ca077cbc3..0fb564bbc1f26f0f5391844019a4ee352b7c7a13 100644 (file)
@@ -87,6 +87,7 @@ TESTS = \
        estimatedextent \
        forcecurve \
        geography \
+       geometric_median \
        in_geohash \
        in_gml \
        in_kml \