]> granicus.if.org Git - postgis/commitdiff
#3612, Calling ST_ClusterDBSCAN with fewer than minpoints geometries in window frame...
authorDaniel Baston <dbaston@gmail.com>
Mon, 5 Sep 2016 02:10:12 +0000 (02:10 +0000)
committerDaniel Baston <dbaston@gmail.com>
Mon, 5 Sep 2016 02:10:12 +0000 (02:10 +0000)
git-svn-id: http://svn.osgeo.org/postgis/trunk@15057 b70326c6-7e19-0410-871a-916f4a2858ee

liblwgeom/cunit/cu_geos_cluster.c
liblwgeom/lwgeom_geos_cluster.c
postgis/lwgeom_window.c
regress/cluster.sql
regress/cluster_expected

index 76720b9c4e7ffa6d13b228da9da8d56425fc9f1f..0cd53c403fcf1b7649de469949ccb91df06d2cae 100644 (file)
@@ -255,33 +255,35 @@ static void multipoint_test(void)
        perform_cluster_intersecting_test(wkt_inputs_pt, 2, expected_outputs_pt, 1);
 }
 
-static void dbscan_test(void)
+struct dbscan_test_info {
+       double eps;
+       uint32_t min_points;
+       uint32_t num_geoms;
+       char** wkt_inputs;
+       uint32_t* expected_ids;
+       int* expected_in_cluster;
+};
+
+static void do_dbscan_test(struct dbscan_test_info test)
 {
-       char* wkt_inputs[] = { "POINT (0 0)", "POINT (-1 0)", "POINT (-1 -0.1)", "POINT (-1 0.1)",
-                                  "POINT (1 0)",
-                                                  "POINT (2 0)", "POINT (3  0)", "POINT ( 3 -0.1)", "POINT ( 3 0.1)" };
-       uint32_t num_geoms = sizeof(wkt_inputs) / sizeof(char*);
-       LWGEOM** geoms = WKTARRAY2LWGEOM(wkt_inputs, num_geoms);
-       UNIONFIND* uf = UF_create(num_geoms);
+       LWGEOM** geoms = WKTARRAY2LWGEOM(test.wkt_inputs, test.num_geoms);
+       UNIONFIND* uf = UF_create(test.num_geoms);
        uint32_t* ids;
        char* in_a_cluster;
        uint32_t i;
 
-       /* Although POINT (1 0) and POINT (2 0) are within eps distance of each other,
-        * they do not connect the two clusters because POINT (1 0) is not a core point.
-        * See #3572
-        */
-       double eps = 1.01;
-       uint32_t min_points = 5;
-       uint32_t expected_ids[] = { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 };
-
-       union_dbscan(geoms, num_geoms, uf, eps, min_points, &in_a_cluster);
+       union_dbscan(geoms, test.num_geoms, uf, test.eps, test.min_points, &in_a_cluster);
        ids = UF_get_collapsed_cluster_ids(uf, in_a_cluster);
 
-       ASSERT_INTARRAY_EQUAL(ids, expected_ids, num_geoms);
+       for (i = 0; i < test.num_geoms; i++)
+       {
+               ASSERT_INT_EQUAL(in_a_cluster[i], test.expected_in_cluster[i]);
+               if (in_a_cluster[i])
+                       ASSERT_INT_EQUAL(ids[i], test.expected_ids[i]);
+       }
 
        UF_destroy(uf);
-       for (i = 0; i < num_geoms; i++)
+       for (i = 0; i < test.num_geoms; i++)
        {
                lwgeom_free(geoms[i]);
        }
@@ -290,6 +292,79 @@ static void dbscan_test(void)
        lwfree(ids);
 }
 
+static void dbscan_test(void)
+{
+       struct dbscan_test_info test;
+       char* wkt_inputs[] = { "POINT (0 0)", "POINT (-1 0)", "POINT (-1 -0.1)", "POINT (-1 0.1)",
+                                  "POINT (1 0)",
+                                                  "POINT (2 0)", "POINT (3  0)", "POINT ( 3 -0.1)", "POINT ( 3 0.1)" };
+       /* Although POINT (1 0) and POINT (2 0) are within eps distance of each other,
+        * they do not connect the two clusters because POINT (1 0) is not a core point.
+        * See #3572
+        */
+       test.eps = 1.01;
+       test.min_points = 5;
+       uint32_t expected_ids[] =   { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 };
+       int expected_in_cluster[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+       test.num_geoms = sizeof(wkt_inputs) / sizeof(char*);
+
+       test.expected_ids = expected_ids;
+       test.wkt_inputs = wkt_inputs;
+       test.expected_in_cluster = expected_in_cluster;
+       do_dbscan_test(test);
+}
+
+static void dbscan_test_3612a(void)
+{
+       struct dbscan_test_info test;
+       char* wkt_inputs[] = { "POINT (1 1)" };
+
+       test.eps = 0.0;
+       test.min_points = 5;
+       uint32_t expected_ids[] = { rand() };
+       int expected_in_cluster[] = { 0 };
+       test.num_geoms = sizeof(wkt_inputs) / sizeof(char*);
+
+       test.expected_ids = expected_ids;
+       test.expected_in_cluster = expected_in_cluster;
+       test.wkt_inputs = wkt_inputs;
+       do_dbscan_test(test);
+}
+
+static void dbscan_test_3612b(void)
+{
+       struct dbscan_test_info test;
+       char* wkt_inputs[] = { "POINT (1 1)" };
+
+       test.eps = 0.0;
+       test.min_points = 1;
+       uint32_t expected_ids[]   = { 0 };
+       int expected_in_cluster[] = { 1 };
+       test.num_geoms = sizeof(wkt_inputs) / sizeof(char*);
+
+       test.expected_ids = expected_ids;
+       test.expected_in_cluster = expected_in_cluster;
+       test.wkt_inputs = wkt_inputs;
+       do_dbscan_test(test);
+}
+
+static void dbscan_test_3612c(void)
+{
+       struct dbscan_test_info test;
+       char* wkt_inputs[] = { "POLYGONM((-71.1319 42.2503 1,-71.132 42.2502 3,-71.1323 42.2504 -2,-71.1322 42.2505 1,-71.1319 42.2503 0))",
+                                                  "POLYGONM((-71.1319 42.2512 0,-71.1318 42.2511 20,-71.1317 42.2511 -20,-71.1317 42.251 5,-71.1317 42.2509 4,-71.132 42.2511 6,-71.1319 42.2512 30))" }; 
+       test.eps = 20.1;
+       test.min_points = 5;
+       uint32_t expected_ids[]   = { rand(), rand() };
+       int expected_in_cluster[] = { 0, 0 };
+       test.num_geoms = sizeof(wkt_inputs) / sizeof(char*);
+
+       test.expected_ids = expected_ids;
+       test.expected_in_cluster = expected_in_cluster;
+       test.wkt_inputs = wkt_inputs;
+       do_dbscan_test(test);
+}
+
 void geos_cluster_suite_setup(void);
 void geos_cluster_suite_setup(void)
 {
@@ -301,4 +376,7 @@ void geos_cluster_suite_setup(void)
        PG_ADD_TEST(suite, empty_inputs_test);
        PG_ADD_TEST(suite, multipoint_test);
        PG_ADD_TEST(suite, dbscan_test);
+       PG_ADD_TEST(suite, dbscan_test_3612a);
+       PG_ADD_TEST(suite, dbscan_test_3612b);
+       PG_ADD_TEST(suite, dbscan_test_3612c);
 }
index d45282b8653e9b4eb06a448c4eb8f3f12aa2f864..d2a73e66b7fc56262d52d91f5049eb11c7f55c86 100644 (file)
@@ -323,6 +323,14 @@ union_dbscan_minpoints_1(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, doub
        };
        int success = LW_SUCCESS;
 
+       if (in_a_cluster_ret)
+       {
+               char* in_a_cluster = lwalloc(num_geoms * sizeof(char));
+               for (i = 0; i < num_geoms; i++)
+                       in_a_cluster[i] = LW_TRUE;
+               *in_a_cluster_ret = in_a_cluster;
+       }
+
        if (num_geoms <= 1)
                return LW_SUCCESS;
 
@@ -358,14 +366,6 @@ union_dbscan_minpoints_1(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, doub
                }
        }
 
-       if (in_a_cluster_ret)
-       {
-               char* in_a_cluster = lwalloc(num_geoms * sizeof(char));
-               for (i = 0; i < num_geoms; i++)
-                       in_a_cluster[i] = LW_TRUE;
-               *in_a_cluster_ret = in_a_cluster;
-       }
-
        if (cxt.items_found)
                lwfree(cxt.items_found);
 
@@ -390,9 +390,19 @@ union_dbscan_general(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double e
        char* in_a_cluster;
        char* is_in_core;
 
+       in_a_cluster = lwalloc(num_geoms * sizeof(char));
+       memset(in_a_cluster, 0, num_geoms * sizeof(char));
+
+       if (in_a_cluster_ret)
+               *in_a_cluster_ret = in_a_cluster;
+
        /* Bail if we don't even have enough inputs to make a cluster. */
        if (num_geoms <= min_points)
+       {
+               if (!in_a_cluster_ret)
+                       lwfree(in_a_cluster);
                return LW_SUCCESS;
+       }
 
        tree = make_strtree((void**) geoms, num_geoms, LW_TRUE);
        if (tree.tree == NULL)
@@ -401,8 +411,6 @@ union_dbscan_general(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double e
                return LW_FAILURE;
        }
 
-       in_a_cluster = lwalloc(num_geoms * sizeof(char));
-       memset(in_a_cluster, 0, num_geoms * sizeof(char));
        is_in_core = lwalloc(num_geoms * sizeof(char));
        memset(is_in_core, 0, num_geoms * sizeof(char));
        neighbors = lwalloc(min_points * sizeof(uint32_t));
@@ -488,10 +496,8 @@ union_dbscan_general(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double e
        lwfree(neighbors);
        lwfree(is_in_core);
 
-       /* Either pass in_a_cluster to our caller, or free it. */
-       if (in_a_cluster_ret)
-               *in_a_cluster_ret = in_a_cluster;
-       else
+       /* Free in_a_cluster if we're not giving it to our caller */
+       if (!in_a_cluster_ret)
                lwfree(in_a_cluster);
 
        if (cxt.items_found)
index 09920871b20610bbcf272391d687cef72ecde388..9c197dfeca8a3789d3caa7b26fec30b90cae4c47 100644 (file)
@@ -92,7 +92,7 @@ Datum ST_ClusterDBSCAN(PG_FUNCTION_ARGS)
                uint32_t i;
                uint32_t* result_ids;
                LWGEOM** geoms;
-               char* is_in_cluster;
+               char* is_in_cluster = NULL;
                UNIONFIND* uf;
                bool tolerance_is_null;
                bool minpoints_is_null;
@@ -128,7 +128,7 @@ Datum ST_ClusterDBSCAN(PG_FUNCTION_ARGS)
                        }
                }
 
-               if (union_dbscan(geoms, ngeoms, uf, tolerance, minpoints, &is_in_cluster) == LW_SUCCESS)
+               if (union_dbscan(geoms, ngeoms, uf, tolerance, minpoints, minpoints > 1 ? &is_in_cluster : NULL) == LW_SUCCESS)
                        context->is_error = LW_FALSE;
 
                for (i = 0; i < ngeoms; i++)
@@ -140,7 +140,8 @@ Datum ST_ClusterDBSCAN(PG_FUNCTION_ARGS)
                if (context->is_error)
                {
                        UF_destroy(uf);
-                       lwfree(is_in_cluster);
+                       if (is_in_cluster)
+                               lwfree(is_in_cluster);
                        lwpgerror("Error during clustering");
                        PG_RETURN_NULL();
                }
@@ -148,7 +149,7 @@ Datum ST_ClusterDBSCAN(PG_FUNCTION_ARGS)
                result_ids = UF_get_collapsed_cluster_ids(uf, is_in_cluster);
                for (i = 0; i < ngeoms; i++)
                {
-                       if (!is_in_cluster[i])
+                       if (minpoints > 1 && !is_in_cluster[i])
                        {
                                context->cluster_assignments[i].is_null = LW_TRUE;
                        }
index 4cc6c88a6801c962b7fdde13389165112120afb6..93a85cd6b3ff376b4d0148344402363e8ac45019 100644 (file)
@@ -35,3 +35,9 @@ SELECT 't102', id, ST_ClusterDBSCAN(geom, eps := 0.8, minpoints := 4) OVER () fr
 /* minpoints = 3, but eps too small to form cluster on left */
 SELECT 't103', id, ST_ClusterDBSCAN(geom, eps := 0.6, minpoints := 3) OVER () from dbscan_inputs;
 
+-- #3612
+SELECT 't3612a', ST_ClusterDBSCAN(foo1.the_geom, 20.1, 5)OVER()  As result
+                                                       FROM ((SELECT geom  As the_geom
+                                                                       FROM (VALUES ( ST_GeomFromEWKT('SRID=4326;POLYGONM((-71.1319 42.2503 1,-71.132 42.2502 3,-71.1323 42.2504 -2,-71.1322 42.2505 1,-71.1319 42.2503 0))') ),
+                                                                                       ( ST_GeomFromEWKT('SRID=4326;POLYGONM((-71.1319 42.2512 0,-71.1318 42.2511 20,-71.1317 42.2511 -20,-71.1317 42.251 5,-71.1317 42.2509 4,-71.132 42.2511 6,-71.1319 42.2512 30))') ) ) As g(geom))) As foo1 LIMIT 3;
+SELECT 't3612b', ST_ClusterDBSCAN( ST_Point(1,1), 20.1, 5) OVER();
index 28f498f3c41ba0dd61638d24af8430f3159bd971..0d5603eef36ccf7c49396e66b40fcaa30482f69c 100644 (file)
@@ -27,3 +27,6 @@ t103|3|
 t103|4|0
 t103|5|0
 t103|6|0
+t3612a|
+t3612a|
+t3612b|