From fb1cf349f63b5efc2f146c76ab4378295bb94676 Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Tue, 26 Dec 2017 14:07:49 +0000 Subject: [PATCH] Weight-aware ST_GeometricMedian Weight is to be supplied as M ordinate of individual points in MultiPoint. Bring lwgeom_median.c cunit test coverage to 100%. ST_GeometricMedian(fail_if_not_converged=false, max_iter=0) might give you weighted centroid, but that's not a promise. Closes #3954 Closes https://github.com/postgis/postgis/pull/176 git-svn-id: http://svn.osgeo.org/postgis/trunk@16188 b70326c6-7e19-0410-871a-916f4a2858ee --- NEWS | 1 + doc/reference_measure.xml | 50 ++++++----- liblwgeom/cunit/cu_algorithm.c | 102 +++++++++++++++++------ liblwgeom/lwgeom_median.c | 146 ++++++++++++++++++++++----------- 4 files changed, 203 insertions(+), 96 deletions(-) diff --git a/NEWS b/NEWS index c60bcfa20..d6408a749 100644 --- a/NEWS +++ b/NEWS @@ -28,6 +28,7 @@ PostGIS 2.5.0 - #1014, Hashable geometry, allowing direct use in CTE signatures (Paul Ramsey) - #3097, Really allow MULTILINESTRING blades in ST_Split() (Paul Ramsey) - #3942, geojson: Do not include private header for json-c >= 0.13 (Björn Esser) + - #3954, ST_GeometricMedian now supports point weights (Darafei Praliaskouski) PostGIS 2.4.0 diff --git a/doc/reference_measure.xml b/doc/reference_measure.xml index c376f02ac..4c158c61d 100644 --- a/doc/reference_measure.xml +++ b/doc/reference_measure.xml @@ -820,7 +820,7 @@ SELECT degrees(ST_Azimuth(ST_Point(25, 45), ST_Point(75, 100))) AS degA_B, - + ST_Angle @@ -835,7 +835,7 @@ SELECT degrees(ST_Azimuth(ST_Point(25, 45), ST_Point(75, 100))) AS degA_B, geometry point2 geometry point3 geometry point4 - + float ST_Angle geometry line1 @@ -850,12 +850,12 @@ SELECT degrees(ST_Azimuth(ST_Point(25, 45), ST_Point(75, 100))) AS degA_B, If input are 2 lines, get first and last point of the lines as 4 points. For 4 points,compute the angle measured clockwise of P1P2,P3P4. Results are always positive, between 0 and 2*Pi radians. - + Uses azimuth of pairs or points. - ST_Angle(P1,P2,P3) = ST_Angle(P2,P1,P2,P3) - Result is in radian and can be converted to degrees using a built-in PostgreSQL function degrees(), as shown in the example. - Availability: 2.5.0 + ST_Angle(P1,P2,P3) = ST_Angle(P2,P1,P2,P3) + Result is in radian and can be converted to degrees using a built-in PostgreSQL function degrees(), as shown in the example. + Availability: 2.5.0 @@ -867,24 +867,24 @@ SELECT degrees(ST_Azimuth(ST_Point(25, 45), ST_Point(75, 100))) AS degA_B, , random() * 2 * PI() AS rad2 FROM generate_series(1,2,2) AS s ) - , points AS ( + , points AS ( SELECT s, rad1,rad2, ST_MakePoint(cos1+s,sin1+s) as p1, ST_MakePoint(s,s) AS p2, ST_MakePoint(cos2+s,sin2+s) as p3 - FROM rand - ,cos(rad1) cos1, sin(rad1) sin1 + FROM rand + ,cos(rad1) cos1, sin(rad1) sin1 ,cos(rad2) cos2, sin(rad2) sin2 ) SELECT s, ST_AsText(ST_SnapToGrid(ST_MakeLine(ARRAY[p1,p2,p3]),0.001)) AS line , degrees(ST_Angle(p1,p2,p3)) as computed_angle - , round(degrees(2*PI()-rad2 -2*PI()+rad1+2*PI()))::int%360 AS reference - , round(degrees(2*PI()-rad2 -2*PI()+rad1+2*PI()))::int%360 AS reference + , round(degrees(2*PI()-rad2 -2*PI()+rad1+2*PI()))::int%360 AS reference + , round(degrees(2*PI()-rad2 -2*PI()+rad1+2*PI()))::int%360 AS reference FROM points ; 1 | line | computed_angle | reference ------------------+------------------ 1 | LINESTRING(1.511 1.86,1 1,0.896 0.005) | 155.27033848688 | 155 - - + + @@ -3281,25 +3281,31 @@ SELECT ST_Equals(ST_Reverse(ST_GeomFromText('LINESTRING(0 0, 10 10)')), Description - + Computes the approximate geometric median of a MultiPoint geometry using the Weiszfeld algorithm. The geometric median provides a centrality measure that is less sensitive to outlier points than the centroid. - + + The algorithm will iterate until the distance change between successive iterations is less than the supplied tolerance parameter. If this condition has not been met after max_iterations iterations, the function will produce an error and exit, unless fail_if_not_converged is set to false. - - If a tolerance value is not provided, a default tolerance value + + + If a tolerance value is not provided, a default tolerance value will be calculated based on the extent of the input geometry. - - - Availability: 2.3.0 - &Z_support; - + + + M value of points, if present, is interpreted as their relative weight. + + Availability: 2.3.0 + Enhanced: 2.5.0 Added support for M as weight of points. + &Z_support; + &M_support; + Examples diff --git a/liblwgeom/cunit/cu_algorithm.c b/liblwgeom/cunit/cu_algorithm.c index eca739931..27f121c03 100644 --- a/liblwgeom/cunit/cu_algorithm.c +++ b/liblwgeom/cunit/cu_algorithm.c @@ -1113,31 +1113,55 @@ static void test_median_handles_3d_correctly(void) do_median_dims_check("MULTIPOINT ZM ((1 3 4 5), (4 7 8 6), (2 9 1 7), (0 4 4 8), (2 2 3 9))", 3); } -static void do_median_test(char* input, char* expected) +static void do_median_test(char* input, char* expected, int fail_if_not_converged, int iter_count) { LWGEOM* g = lwgeom_from_wkt(input, LW_PARSER_CHECK_NONE); - LWPOINT* expected_result = lwgeom_as_lwpoint(lwgeom_from_wkt(expected, LW_PARSER_CHECK_NONE)); - POINT3DZ actual_pt; - POINT3DZ expected_pt; + LWPOINT* expected_result = NULL; + POINT4D actual_pt; + POINT4D expected_pt; - LWPOINT* result = lwgeom_median(g, FP_TOLERANCE / 10.0, 1000, LW_TRUE); - int passed = LW_TRUE; + LWPOINT* result = lwgeom_median(g, FP_TOLERANCE / 10.0, iter_count, fail_if_not_converged); + int passed = LW_FALSE; - lwpoint_getPoint3dz_p(result, &actual_pt); - lwpoint_getPoint3dz_p(expected_result, &expected_pt); - - passed = passed && lwgeom_is_empty((LWGEOM*) expected_result) == lwgeom_is_empty((LWGEOM*) result); - passed = passed && (lwgeom_has_z((LWGEOM*) expected_result) == lwgeom_has_z((LWGEOM*) result)); - - if (!lwgeom_is_empty((LWGEOM*) result)) + if (expected != NULL) + { + expected_result = lwgeom_as_lwpoint(lwgeom_from_wkt(expected, LW_PARSER_CHECK_NONE)); + lwpoint_getPoint4d_p(expected_result, &expected_pt); + } + if (result != NULL) { - passed = passed && FP_EQUALS(actual_pt.x, expected_pt.x); - passed = passed && FP_EQUALS(actual_pt.y, expected_pt.y); - passed = passed && (!lwgeom_has_z((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.z, expected_pt.z)); + lwpoint_getPoint4d_p(result, &actual_pt); } - if (!passed) - printf("median_test expected %s got %s\n", lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result)); + if (result != NULL && expected != NULL) /* got something, expecting something */ + { + passed = LW_TRUE; + passed = passed && lwgeom_is_empty((LWGEOM*) expected_result) == lwgeom_is_empty((LWGEOM*) result); + passed = passed && (lwgeom_has_z((LWGEOM*) expected_result) == lwgeom_has_z((LWGEOM*) result)); + if (!lwgeom_is_empty((LWGEOM*) result)) + { + passed = passed && FP_EQUALS(actual_pt.x, expected_pt.x); + passed = passed && FP_EQUALS(actual_pt.y, expected_pt.y); + passed = passed && (!lwgeom_has_z((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.z, expected_pt.z)); + passed = passed && (!lwgeom_has_m((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.m, expected_pt.m)); + } + if (!passed) + printf("median_test expected %s got %s\n", lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result)); + } + else if (result == NULL && expected == NULL) /* got nothing, expecting nothing */ + { + passed = LW_TRUE; + } + else if (result != NULL && expected == NULL) /* got something, expecting nothing */ + { + passed = LW_FALSE; + printf("median_test expected NULL got %s\n", lwgeom_to_ewkt((LWGEOM*) result)); + } + else if (result == NULL && expected != NULL) /* got nothing, expecting something */ + { + passed = LW_FALSE; + printf("median_test expected %s got NULL\n", lwgeom_to_ewkt((LWGEOM*) expected_result)); + } CU_ASSERT_TRUE(passed); @@ -1154,24 +1178,48 @@ static void test_median_robustness(void) * Because the algorithm uses the centroid as a starting point, this situation will * occur in the test case below. */ - do_median_test("MULTIPOINT ((0 -1), (0 0), (0 1))", "POINT (0 0)"); + do_median_test("MULTIPOINT ((0 -1), (0 0), (0 1))", "POINT (0 0)", LW_TRUE, 1000); /* Same as above but 3D, and shifter */ - do_median_test("MULTIPOINT ((1 -1 3), (1 0 2), (2 1 1))", "POINT (1 0 2)"); + do_median_test("MULTIPOINT ((1 -1 3), (1 0 2), (2 1 1))", "POINT (1 0 2)", LW_TRUE, 1000); /* Starting point is duplicated */ - do_median_test("MULTIPOINT ((0 -1), (0 0), (0 0), (0 1))", "POINT (0 0)"); + do_median_test("MULTIPOINT ((0 -1), (0 0), (0 0), (0 1))", "POINT (0 0)", LW_TRUE, 1000); /* Cube */ do_median_test("MULTIPOINT ((10 10 10), (10 20 10), (20 10 10), (20 20 10), (10 10 20), (10 20 20), (20 10 20), (20 20 20))", - "POINT (15 15 15)"); + "POINT (15 15 15)", LW_TRUE, 1000); /* Some edge cases */ - do_median_test("MULTIPOINT EMPTY", "POINT EMPTY"); - do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY"); - do_median_test("POINT (7 6)", "POINT (7 6)"); - do_median_test("POINT (7 6 2)", "POINT (7 6 2)"); - do_median_test("MULTIPOINT ((7 6 2), EMPTY)", "POINT (7 6 2)"); + do_median_test("POINT (7 6)", "POINT (7 6)", LW_TRUE, 1000); + do_median_test("POINT (7 6 2)", "POINT (7 6 2)", LW_TRUE, 1000); + do_median_test("MULTIPOINT ((7 6 2), EMPTY)", "POINT (7 6 2)", LW_TRUE, 1000); + + /* Empty input */ + do_median_test("MULTIPOINT EMPTY", "POINT EMPTY", LW_FALSE, 1000); + do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY", LW_FALSE, 1000); + do_median_test("MULTIPOINT EMPTY", NULL, LW_TRUE, 1000); + do_median_test("MULTIPOINT (EMPTY)", NULL, LW_TRUE, 1000); + + /* Weighted input */ + do_median_test("MULTIPOINT ((1 -1 3 1), (1 0 2 7), (2 1 1 1))", "POINT (1 0 2)", LW_TRUE, 1000); + /* Point that is replaced by two half-weighted */ + do_median_test("MULTIPOINT ((0 -1 0 1), (0 0 0 1), (0 1 0 0.5), (0 1 0 0.5))", "POINT (0 0 0)", LW_TRUE, 1000); + /* Point is doubled and then erased by negative weight */ + do_median_test("MULTIPOINT ((1 -1 3 1), (1 0 2 7), (2 1 1 2), (2 1 1 -1))", "POINT (1 0 2)", LW_TRUE, 1000); + /* Weightless input is empty */ + do_median_test("MULTIPOINT ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", "POINT EMPTY", LW_FALSE, 1000); + do_median_test("MULTIPOINT ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", NULL, LW_TRUE, 1000); + /* Negative weight won't converge */ + do_median_test("MULTIPOINT ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", "POINT EMPTY", LW_FALSE, 1000); + do_median_test("MULTIPOINT ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", NULL, LW_TRUE, 1000); + + /* Bind convergence too tightly */ + do_median_test("MULTIPOINT ((0 0), (1 1), (0 1), (2 2))", "POINT(0.75 1.0)", LW_FALSE, 0); + do_median_test("MULTIPOINT ((0 0), (1 1), (0 1), (2 2))", NULL, LW_TRUE, 1); + /* Unsupported geometry type */ + do_median_test("POLYGON((1 0,0 1,1 2,2 1,1 0))", NULL, LW_TRUE, 1000); + do_median_test("POLYGON((1 0,0 1,1 2,2 1,1 0))", NULL, LW_FALSE, 1000); } static void test_point_density(void) diff --git a/liblwgeom/lwgeom_median.c b/liblwgeom/lwgeom_median.c index fb570072c..bab6d02e2 100644 --- a/liblwgeom/lwgeom_median.c +++ b/liblwgeom/lwgeom_median.c @@ -19,6 +19,7 @@ ********************************************************************** * * Copyright 2015 Daniel Baston + * Copyright 2017 Darafei Praliaskouski * **********************************************************************/ @@ -28,43 +29,51 @@ #include "lwgeom_log.h" static void -calc_distances_3d(const POINT3D* curr, const POINT3D* points, uint32_t npoints, double* distances) +calc_weighted_distances_3d(const POINT3D* curr, const POINT4D* points, size_t npoints, double* distances) { - uint32_t i; + size_t i; for (i = 0; i < npoints; i++) { - distances[i] = distance3d_pt_pt(curr, &points[i]); + distances[i] = distance3d_pt_pt(curr, (POINT3D*)&points[i]) / points[i].m; } } static double -iterate_3d(POINT3D* curr, const POINT3D* points, uint32_t npoints, double* distances) +iterate_4d(POINT4D* curr, const POINT4D* points, size_t npoints, double* distances) { - uint32_t i; - POINT3D next = { 0, 0, 0 }; + size_t i; + POINT4D next = { 0, 0, 0, 1}; double delta; double denom = 0; char hit = LW_FALSE; - calc_distances_3d(curr, points, npoints, distances); + calc_weighted_distances_3d((POINT3D*)curr, points, npoints, distances); for (i = 0; i < npoints; i++) { - if (distances[i] == 0) - hit = LW_TRUE; - else + /* we need to use lower epsilon than in FP_IS_ZERO in the loop for calculation to converge */ + if (fabs(distances[i]) > DBL_EPSILON) + { + next.x += points[i].x / distances[i]; + next.y += points[i].y / distances[i]; + next.z += points[i].z / distances[i]; denom += 1.0 / distances[i]; - } - - for (i = 0; i < npoints; i++) - { - if (distances[i] > 0) + } + else { - next.x += (points[i].x / distances[i]) / denom; - next.y += (points[i].y / distances[i]) / denom; - next.z += (points[i].z / distances[i]) / denom; + hit = LW_TRUE; } } + if (fabs(denom) > DBL_EPSILON) + { + next.x /= denom; + next.y /= denom; + next.z /= denom; + } + else + { + hit = LW_TRUE; + } /* If any of the intermediate points in the calculation is found in the * set of input points, the standard Weiszfeld method gets stuck with a @@ -86,10 +95,9 @@ iterate_3d(POINT3D* curr, const POINT3D* points, uint32_t npoints, double* dista double dy = 0; double dz = 0; double r_inv; - POINT3D alt; for (i = 0; i < npoints; i++) { - if (distances[i] > 0) + if (fabs(distances[i]) > DBL_EPSILON) { dx += (points[i].x - curr->x) / distances[i]; dy += (points[i].y - curr->y) / distances[i]; @@ -99,14 +107,12 @@ iterate_3d(POINT3D* curr, const POINT3D* points, uint32_t npoints, double* dista r_inv = 1.0 / sqrt ( dx*dx + dy*dy + dz*dz ); - alt.x = FP_MAX(0, 1.0 - r_inv)*next.x + FP_MIN(1.0, r_inv)*curr->x; - alt.y = FP_MAX(0, 1.0 - r_inv)*next.y + FP_MIN(1.0, r_inv)*curr->y; - alt.z = FP_MAX(0, 1.0 - r_inv)*next.z + FP_MIN(1.0, r_inv)*curr->z; - - next = alt; + next.x = FP_MAX(0, 1.0 - r_inv)*next.x + FP_MIN(1.0, r_inv)*curr->x; + next.y = FP_MAX(0, 1.0 - r_inv)*next.y + FP_MIN(1.0, r_inv)*curr->y; + next.z = FP_MAX(0, 1.0 - r_inv)*next.z + FP_MIN(1.0, r_inv)*curr->z; } - delta = distance3d_pt_pt(curr, &next); + delta = distance3d_pt_pt((POINT3D*)curr, (POINT3D*)&next); curr->x = next.x; curr->y = next.y; @@ -115,37 +121,59 @@ iterate_3d(POINT3D* curr, const POINT3D* points, uint32_t npoints, double* dista return delta; } -static POINT3D -init_guess(const POINT3D* points, uint32_t npoints) +static POINT4D +init_guess(const POINT4D* points, size_t npoints) { - POINT3D guess = { 0, 0, 0 }; - uint32_t i; + POINT4D guess = { 0, 0, 0, 0 }; + size_t i; for (i = 0; i < npoints; i++) { - guess.x += points[i].x / npoints; - guess.y += points[i].y / npoints; - guess.z += points[i].z / npoints; + guess.x += points[i].x * points[i].m; + guess.y += points[i].y * points[i].m; + guess.z += points[i].z * points[i].m; + guess.m += points[i].m; + } + if (!FP_IS_ZERO(guess.m)) + { + guess.x /= guess.m; + guess.y /= guess.m; + guess.z /= guess.m; } - return guess; } -static POINT3D* -lwmpoint_extract_points_3d(const LWMPOINT* g, uint32_t* ngeoms) +static POINT4D* +lwmpoint_extract_points_4d(const LWMPOINT* g, size_t* ngeoms) { - uint32_t i; - uint32_t n = 0; + size_t i; + size_t n = 0; int is_3d = lwgeom_has_z((LWGEOM*) g); + int has_m = lwgeom_has_m((LWGEOM*) g); - POINT3D* points = lwalloc(g->ngeoms * sizeof(POINT3D)); + POINT4D* points = lwalloc(g->ngeoms * sizeof(POINT4D)); for (i = 0; i < g->ngeoms; i++) { LWGEOM* subg = lwcollection_getsubgeom((LWCOLLECTION*) g, i); if (!lwgeom_is_empty(subg)) { - getPoint3dz_p(((LWPOINT*) subg)->point, 0, (POINT3DZ*) &points[n++]); + getPoint4d_p(((LWPOINT*) subg)->point, 0, (POINT4D*) &points[n]); if (!is_3d) - points[n-1].z = 0.0; /* in case the getPoint functions return NaN in the future for 2d */ + { + points[n].z = 0.0; /* in case the getPoint functions return NaN in the future for 2d */ + } + if (!has_m) + { + points[n].m = 1.0; + n++; + } + else + { + /* points with zero weight are not going to affect calculation, drop them early */ + if (!FP_IS_ZERO(points[n].m)) + { + n++; + } + } } } @@ -155,29 +183,53 @@ lwmpoint_extract_points_3d(const LWMPOINT* g, uint32_t* ngeoms) return points; } + LWPOINT* lwmpoint_median(const LWMPOINT* g, double tol, uint32_t max_iter, char fail_if_not_converged) { - uint32_t npoints; /* we need to count this ourselves so we can exclude empties */ - uint32_t i; + size_t npoints; /* we need to count this ourselves so we can exclude empties and weightless points */ + size_t i; double delta = DBL_MAX; double* distances; - POINT3D* points = lwmpoint_extract_points_3d(g, &npoints); - POINT3D median; + POINT4D* points = lwmpoint_extract_points_4d(g, &npoints); /* m ordinate is considered weight, if defined */ + POINT4D median; if (npoints == 0) { lwfree(points); - return lwpoint_construct_empty(g->srid, 0, 0); + if (fail_if_not_converged) + { + lwerror("Median failed to find input points with weight."); + return NULL; + } + else + { + return lwpoint_construct_empty(g->srid, 0, 0); + } } median = init_guess(points, npoints); + /* negative total weight means median cannot converge */ + if (median.m <= 0.0) + { + lwfree(points); + if (fail_if_not_converged) + { + lwerror("Median cannot converge for input with negative sum of weights."); + return NULL; + } + else + { + return lwpoint_construct_empty(g->srid, 0, 0); + } + } + distances = lwalloc(npoints * sizeof(double)); for (i = 0; i < max_iter && delta > tol; i++) { - delta = iterate_3d(&median, points, npoints, distances); + delta = iterate_4d(&median, points, npoints, distances); } lwfree(points); -- 2.40.0