]> granicus.if.org Git - postgis/commitdiff
Add ST_QuantizeCoordinates
authorDaniel Baston <dbaston@gmail.com>
Mon, 12 Mar 2018 02:41:56 +0000 (02:41 +0000)
committerDaniel Baston <dbaston@gmail.com>
Mon, 12 Mar 2018 02:41:56 +0000 (02:41 +0000)
Closes #4029
Closes https://github.com/postgis/postgis/pull/225

git-svn-id: http://svn.osgeo.org/postgis/trunk@16455 b70326c6-7e19-0410-871a-916f4a2858ee

NEWS
doc/reference_misc.xml
liblwgeom/cunit/cu_algorithm.c
liblwgeom/liblwgeom.h.in
liblwgeom/lwgeom.c
postgis/lwgeom_functions_basic.c
postgis/postgis.sql.in
regress/Makefile.in
regress/quantize_coordinates.sql [new file with mode: 0644]
regress/quantize_coordinates_expected [new file with mode: 0644]

diff --git a/NEWS b/NEWS
index 7146c9a46970096082e51300f138ec0bb9c57bad..396f982e9d8244f772887e25d58a5899af6aa8b3 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,7 @@ PostGIS 2.5.0
   - #3913, Upgrade when creating extension from unpackaged (Sandro Santilli)
   - #2256, _postgis_index_extent() for extent from index (Paul Ramsey)
   - #3176, Add ST_OrientedEnvelope (Dan Baston)
+  - #4029, Add ST_QuantizeCoordinates (Dan Baston)
 
 * Breaking Changes *
   - #3885, version number removed from address_standardize lib file
index bdad161a73e6635b8e4e044bbf6c2c0d255e27a1..e6233a419e07682562ebb4551e93a5757c821395 100644 (file)
@@ -695,5 +695,177 @@ fulltable_size geomsize  pergeom
          </refsection>
        </refentry>
 
+       <refentry id="ST_QuantizeCoordinates">
+               <refnamediv>
+                       <refname>
+                               ST_QuantizeCoordinates
+                       </refname>
+                       <refpurpose>
+                               Sets least significant bits of coordinates to zero
+                       </refpurpose>
+               </refnamediv>
+
+               <refsynopsisdiv>
+                       <funcsynopsis>
+                               <funcprototype>
+                                       <funcdef>
+                                               geometry
+                                               <function>ST_QuantizeCoordinates</function>
+                                       </funcdef>
+                                       <paramdef>
+                                               <type>geometry</type>
+                                               <parameter>g</parameter>
+                                       </paramdef>
+                                       <paramdef>
+                                               <type>int</type>
+                                               <parameter>prec_x</parameter>
+                                       </paramdef>
+                                       <paramdef>
+                                               <type>int</type>
+                                               <parameter>prec_y</parameter>
+                                       </paramdef>
+                                       <paramdef>
+                                               <type>int</type>
+                                               <parameter>prec_z</parameter>
+                                       </paramdef>
+                                       <paramdef>
+                                               <type>int</type>
+                                               <parameter>prec_m</parameter>
+                                       </paramdef>
+                               </funcprototype>
+                       </funcsynopsis>
+               </refsynopsisdiv>
+
+               <refsection>
+                       <title>Description</title>
+                       <para>
+                               <code>ST_QuantizeCoordinates</code> determines the number of bits
+                               (<code>N</code>) required to represent a coordinate value with a
+                               specified number of digits after the decimal point, and then sets
+                               all but the <code>N</code> most significant bits to zero. The
+                               resulting coordinate value will still round to the original value,
+                               but will have improved compressiblity. This can result in a
+                               significant disk usage reduction provided that the geometry column
+                               is using a <ulink
+                                       url="https://www.postgresql.org/docs/current/static/storage-toast.html#STORAGE-TOAST-ONDISK">
+                                       compressible storage type</ulink>. The function allows
+                               specification of a different number of digits after the decimal
+                               point in each dimension; unspecified dimensions are assumed to have
+                               the precsion of the <code>x</code> dimension. Negative digits are
+                               interpreted to refer digits to the left of the decimal point, (i.e.,
+                               <code>prec_x=-2</code> will preserve coordinate values to the
+                               nearest 100. 
+                       </para>
+                       <para>
+                               The coordinates produced by <code>ST_QuantizeCoordinates</code> are
+                               independent of the geometry that contains those coordinates and the
+                               relative position of those coordinates within the geometry. As a result,
+                               existing topological relationships between geometries are unaffected
+                               by use of this function. The function will not produce invalid geometry
+                               unless it is called with a number of digits lower than the intrinsic
+                               precision of the geometry.
+                       </para>
+                       <para>Availability: 2.5.0</para>
+               </refsection>
+               <refsection>
+                       <title>Technical Background</title>
+                       <para>
+                               PostGIS stores all coordinate values as double-precision floating
+                               point integers, which can reliably represent 15 significant digits.
+                               However, PostGIS may be used to manage data that intrinsically has
+                               fewer than 15 significant digits. An example is TIGER data, which is
+                               provided as geographic coordinates with six digits of precision
+                               after the decimal point (thus requiring only nine significant digits
+                               of longitude and eight significant digits of latitude.)
+                       </para>
+                       <para>
+                               When 15 significant digits are available, there are many possible
+                               representations of a number with 9 significant digits.  A double
+                               precision floating point number uses 52 explicit bits to represent
+                               the significand (mantissa) of the coordinate. Only 30 bits are needed
+                               to represent a mantissa with 9 significant digits, leaving 22
+                               insignificant bits; we can set their value to anything we like and
+                               still end up with a number that rounds to our input value.  For
+                               example, the value 100.123456 can be represented by the floating
+                               point numbers closest to 100.123456000000, 100.123456000001, and
+                               100.123456432199. All are equally valid, in that
+                               <code>ST_AsText(geom, 6)</code> will return the same result with 
+                               any of these inputs. Because we can set these bits to whatever we
+                               like, <code>ST_QuantizeCoordinates</code> sets the 22 insignificant
+                               bits to zero because a block of 22 zeros is easily (and automatically)
+                               compressed by PostgreSQL's built-in compression algorithm.
+                       </para>
+
+                       <note>
+                               <para>
+                                       Only the on-disk size of the geometry is potentially affected by
+                                       <code>ST_QuantizeCoordinates</code>.  <xref linkend="ST_MemSize"></xref>,
+                                       which reports the in-memory usage of the geometry, will return the
+                                       the same value regardless of the disk space used by a geometry.
+                               </para>
+                       </note>
+               </refsection>
+
+               <refsection>
+                       <title>Examples</title>
+
+                       <programlisting>SELECT ST_AsText(ST_QuantizeCoordinates('POINT (100.123456 0)'));
+st_astext
+-------------------------
+POINT(100.123455941677 0)
+                       </programlisting>
+
+                       <programlisting>
+WITH test AS (SELECT 'POINT (123.456789123456 123.456789123456)'::geometry AS geom)
+SELECT
+  digits,
+  encode(ST_QuantizeCoordinates(geom, digits), 'hex'),
+  ST_AsText(ST_QuantizeCoordinates(geom, digits))
+FROM test, generate_series(15, -15, -1) AS digits;
+
+digits  |                   encode                   |                st_astext                 
+--------+--------------------------------------------+------------------------------------------
+15      | 01010000005f9a72083cdd5e405f9a72083cdd5e40 | POINT(123.456789123456 123.456789123456)
+14      | 01010000005f9a72083cdd5e405f9a72083cdd5e40 | POINT(123.456789123456 123.456789123456)
+13      | 01010000005f9a72083cdd5e405f9a72083cdd5e40 | POINT(123.456789123456 123.456789123456)
+12      | 01010000005c9a72083cdd5e405c9a72083cdd5e40 | POINT(123.456789123456 123.456789123456)
+11      | 0101000000409a72083cdd5e40409a72083cdd5e40 | POINT(123.456789123456 123.456789123456)
+10      | 0101000000009a72083cdd5e40009a72083cdd5e40 | POINT(123.456789123455 123.456789123455)
+9       | 0101000000009072083cdd5e40009072083cdd5e40 | POINT(123.456789123418 123.456789123418)
+8       | 0101000000008072083cdd5e40008072083cdd5e40 | POINT(123.45678912336 123.45678912336)
+7       | 0101000000000070083cdd5e40000070083cdd5e40 | POINT(123.456789121032 123.456789121032)
+6       | 0101000000000040083cdd5e40000040083cdd5e40 | POINT(123.456789076328 123.456789076328)
+5       | 0101000000000000083cdd5e40000000083cdd5e40 | POINT(123.456789016724 123.456789016724)
+4       | 0101000000000000003cdd5e40000000003cdd5e40 | POINT(123.456787109375 123.456787109375)
+3       | 0101000000000000003cdd5e40000000003cdd5e40 | POINT(123.456787109375 123.456787109375)
+2       | 01010000000000000038dd5e400000000038dd5e40 | POINT(123.45654296875 123.45654296875)
+1       | 01010000000000000000dd5e400000000000dd5e40 | POINT(123.453125 123.453125)
+0       | 01010000000000000000dc5e400000000000dc5e40 | POINT(123.4375 123.4375)
+-1      | 01010000000000000000c05e400000000000c05e40 | POINT(123 123)
+-2      | 01010000000000000000005e400000000000005e40 | POINT(120 120)
+-3      | 010100000000000000000058400000000000005840 | POINT(96 96)
+-4      | 010100000000000000000058400000000000005840 | POINT(96 96)
+-5      | 010100000000000000000058400000000000005840 | POINT(96 96)
+-6      | 010100000000000000000058400000000000005840 | POINT(96 96)
+-7      | 010100000000000000000058400000000000005840 | POINT(96 96)
+-8      | 010100000000000000000058400000000000005840 | POINT(96 96)
+-9      | 010100000000000000000058400000000000005840 | POINT(96 96)
+-10     | 010100000000000000000058400000000000005840 | POINT(96 96)
+-11     | 010100000000000000000058400000000000005840 | POINT(96 96)
+-12     | 010100000000000000000058400000000000005840 | POINT(96 96)
+-13     | 010100000000000000000058400000000000005840 | POINT(96 96)
+-14     | 010100000000000000000058400000000000005840 | POINT(96 96)
+-15     | 010100000000000000000058400000000000005840 | POINT(96 96)
+</programlisting>
+
+               </refsection>
+
+               <refsection>
+                       <title>See Also</title>
+
+                       <para><xref linkend="ST_SnapToGrid" /></para>
+               </refsection>
+
+       </refentry>
 
  </sect1>
index bd9b5dae762baa891c34299ff5325a42b5e9cf02..3e9741c6ba4a348639ffb511f0da9898db951a71 100644 (file)
@@ -1434,6 +1434,52 @@ static void test_kmeans(void)
        return;
 }
 
+static void test_trim_bits(void)
+{
+       POINTARRAY *pta = ptarray_construct_empty(LW_TRUE, LW_TRUE, 2);
+       POINT4D pt;
+       LWLINE *line;
+       int precision;
+       uint32_t i;
+
+       pt.x = 1.2345678901234;
+       pt.y = 2.3456789012345;
+       pt.z = 3.4567890123456;
+       pt.m = 4.5678901234567;
+
+       ptarray_insert_point(pta, &pt, 0);
+
+       pt.x *= 3;
+       pt.y *= 3;
+       pt.y *= 3;
+       pt.z *= 3;
+
+       ptarray_insert_point(pta, &pt, 1);
+
+       line = lwline_construct(0, NULL, pta);
+
+       for (precision = -15; precision <= 15; precision++)
+       {
+               LWLINE *line2 = (LWLINE*) lwgeom_clone_deep((LWGEOM*) line);
+               lwgeom_trim_bits_in_place((LWGEOM*) line2, precision, precision, precision, precision);
+
+               for (i = 0; i < line->points->npoints; i++)
+               {
+                       POINT4D pt1 = getPoint4d(line->points, i);
+                       POINT4D pt2 = getPoint4d(line2->points, i);
+
+                       CU_ASSERT_DOUBLE_EQUAL(pt1.x, pt2.x, pow(10, -1*precision));
+                       CU_ASSERT_DOUBLE_EQUAL(pt1.y, pt2.y, pow(10, -1*precision));
+                       CU_ASSERT_DOUBLE_EQUAL(pt1.z, pt2.z, pow(10, -1*precision));
+                       CU_ASSERT_DOUBLE_EQUAL(pt1.m, pt2.m, pow(10, -1*precision));
+               }
+
+               lwline_free(line2);
+       }
+
+       lwline_free(line);
+}
+
 /*
 ** Used by test harness to register the tests in this file.
 */
@@ -1465,5 +1511,6 @@ void algorithms_suite_setup(void)
        PG_ADD_TEST(suite,test_median_handles_3d_correctly);
        PG_ADD_TEST(suite,test_median_robustness);
        PG_ADD_TEST(suite,test_lwpoly_construct_circle);
+       PG_ADD_TEST(suite,test_trim_bits);
        PG_ADD_TEST(suite,test_lwgeom_remove_repeated_points);
 }
index 5455a57ff81af995e68fab157b09d3194e91a5c8..290908e2f767fd5318df715fb4a37eb1bf9b383d 100644 (file)
@@ -2170,6 +2170,20 @@ extern uint8_t* lwgeom_to_twkb(const LWGEOM *geom, uint8_t variant, int8_t preci
 
 extern uint8_t* lwgeom_to_twkb_with_idlist(const LWGEOM *geom, int64_t *idlist, uint8_t variant, int8_t precision_xy, int8_t precision_z, int8_t precision_m, size_t *twkb_size);
 
+/**
+ * Trim the bits of an LWGEOM in place, to optimize it for compression.
+ * Sets all bits to zero that are not required to maintain a specified
+ * number of digits after the decimal point. Negative precision values
+ * indicate digits before the decimal point do not need to be preserved.
+ *
+ * @param geom input geometry
+ * @param prec_x precision (digits after decimal point) in x dimension
+ * @param prec_y precision (digits after decimal point) in y dimension
+ * @param prec_z precision (digits after decimal point) in z dimension
+ * @param prec_m precision (digits after decimal point) in m dimension
+ */
+extern void lwgeom_trim_bits_in_place(LWGEOM *geom, int32_t prec_x, int32_t prec_y, int32_t prec_z, int32_t prec_m);
+
 /*******************************************************************************
  * SQLMM internal functions
  ******************************************************************************/
index 9e213ae3497f412b398e068f48f4479238b64284..480d508b78646c7cffe0023658706bb2e924bb44 100644 (file)
@@ -19,6 +19,7 @@
  **********************************************************************
  *
  * Copyright (C) 2001-2006 Refractions Research Inc.
+ * Copyright (C) 2017-2018 Daniel Baston <dbaston@gmail.com>
  *
  **********************************************************************/
 
@@ -2444,3 +2445,63 @@ lwgeom_is_trajectory(const LWGEOM *geom)
        return lwline_is_trajectory((LWLINE*)geom);
 }
 
+static uint8_t
+bits_for_precision(int32_t significant_digits)
+{
+       int32_t bits_needed = ceil(significant_digits / log10(2));
+
+       if (bits_needed > 52)
+       {
+               return 52;
+       }
+       else if (bits_needed < 1)
+       {
+               return 1;
+       }
+
+       return bits_needed;
+}
+
+static inline
+double mask_double(double d, uint64_t mask)
+{
+       uint64_t* double_bits = (uint64_t*) (&d);
+
+       (*double_bits) &= mask;
+
+       return *((double*) double_bits);
+}
+
+static double trim_preserve_decimal_digits(double d, int32_t decimal_digits)
+{
+       if (d==0)
+               return 0;
+
+       int digits_left_of_decimal = (int) (1 + log10(fabs(d)));
+       uint8_t bits_needed = bits_for_precision(decimal_digits + digits_left_of_decimal);
+
+
+       uint64_t mask = 0xffffffffffffffff << (52 - bits_needed);
+
+       return mask_double(d, mask);
+}
+
+void lwgeom_trim_bits_in_place(LWGEOM* geom, int32_t prec_x, int32_t prec_y, int32_t prec_z, int32_t prec_m)
+{
+       LWPOINTITERATOR* it = lwpointiterator_create_rw(geom);
+       POINT4D p;
+
+       while (lwpointiterator_has_next(it))
+       {
+               lwpointiterator_peek(it, &p);
+               p.x = trim_preserve_decimal_digits(p.x, prec_x);
+               p.y = trim_preserve_decimal_digits(p.y, prec_y);
+               if (lwgeom_has_z(geom))
+                       p.z = trim_preserve_decimal_digits(p.z, prec_z);
+               if (lwgeom_has_m(geom))
+                       p.m = trim_preserve_decimal_digits(p.m, prec_m);
+               lwpointiterator_modify_next(it, &p);
+       }
+
+       lwpointiterator_destroy(it);
+}
index 64b1483e32ab9c38b110a6c42add5add513043c2..7398d0621b93e3af7bba701b055cd7d347a31996 100644 (file)
@@ -19,6 +19,7 @@
  **********************************************************************
  *
  * Copyright 2001-2006 Refractions Research Inc.
+ * Copyright 2017-2018 Daniel Baston <dbaston@gmail.com>
  *
  **********************************************************************/
 
@@ -58,6 +59,7 @@ Datum LWGEOM_length2d_linestring(PG_FUNCTION_ARGS);
 Datum LWGEOM_length_linestring(PG_FUNCTION_ARGS);
 Datum LWGEOM_perimeter2d_poly(PG_FUNCTION_ARGS);
 Datum LWGEOM_perimeter_poly(PG_FUNCTION_ARGS);
+Datum postgis_optimize_geometry(PG_FUNCTION_ARGS);
 
 Datum LWGEOM_maxdistance2d_linestring(PG_FUNCTION_ARGS);
 Datum LWGEOM_mindistance2d(PG_FUNCTION_ARGS);
@@ -2983,3 +2985,41 @@ Datum ST_Points(PG_FUNCTION_ARGS)
                PG_RETURN_POINTER(ret);
        }
 }
+
+PG_FUNCTION_INFO_V1(ST_QuantizeCoordinates);
+Datum ST_QuantizeCoordinates(PG_FUNCTION_ARGS)
+{
+       GSERIALIZED* input;
+       GSERIALIZED* result;
+       LWGEOM* g;
+       int32_t prec_x;
+       int32_t prec_y;
+       int32_t prec_z;
+       int32_t prec_m;
+
+       if (PG_ARGISNULL(0))
+               PG_RETURN_NULL();
+       if (PG_ARGISNULL(1))
+       {
+               lwpgerror("Must specify precision");
+               PG_RETURN_NULL();
+       }
+       else
+       {
+               prec_x = PG_GETARG_INT32(1);
+       }
+       prec_y = PG_ARGISNULL(2) ? prec_x : PG_GETARG_INT32(2);
+       prec_z = PG_ARGISNULL(3) ? prec_x : PG_GETARG_INT32(3);
+       prec_m = PG_ARGISNULL(4) ? prec_x : PG_GETARG_INT32(4);
+
+       input = PG_GETARG_GSERIALIZED_P_COPY(0);
+
+       g = lwgeom_from_gserialized(input);
+
+       lwgeom_trim_bits_in_place(g, prec_x, prec_y, prec_z, prec_m);
+
+       result = geometry_serialize(g);
+       lwgeom_free(g);
+       PG_FREE_IF_COPY(input, 0);
+       PG_RETURN_POINTER(result);
+}
index 0ba0628c5098f189335b550a7ce7781cf1c3275c..ec456ff26b0de5ab7367d0251b3e7c320bfff646 100644 (file)
@@ -1099,6 +1099,13 @@ CREATE OR REPLACE FUNCTION postgis_hasbbox(geometry)
        AS 'MODULE_PATHNAME', 'LWGEOM_hasBBOX'
        LANGUAGE 'c' IMMUTABLE STRICT _PARALLEL;
 
+-- Availability: 2.5.0
+CREATE OR REPLACE FUNCTION ST_QuantizeCoordinates(g geometry, prec_x int, prec_y int DEFAULT NULL, prec_z int DEFAULT NULL, prec_m int DEFAULT NULL)
+       RETURNS geometry
+       AS 'MODULE_PATHNAME', 'ST_QuantizeCoordinates'
+       LANGUAGE 'c' IMMUTABLE _PARALLEL
+       COST 10;
+
 ------------------------------------------------------------------------
 -- DEBUG
 ------------------------------------------------------------------------
index 516a7015c3a016f149ec241745e9f0e08e082f9d..0e76194e16165c3df5d4b527fd330baffc854b71 100644 (file)
@@ -109,6 +109,7 @@ TESTS = \
        polygonize \
        polyhedralsurface \
        postgis_type_name \
+       quantize_coordinates \
        regress \
        regress_bdpoly \
        regress_index \
diff --git a/regress/quantize_coordinates.sql b/regress/quantize_coordinates.sql
new file mode 100644 (file)
index 0000000..02f21d7
--- /dev/null
@@ -0,0 +1,14 @@
+-- Returns NULL for NULL geometry
+SELECT 't1', ST_QuantizeCoordinates(NULL::geometry, 6) IS NULL;
+-- Throws error if no precision specified
+SELECT 't2', ST_QuantizeCoordinates('POINT (3 7)', NULL);
+-- Preserves SRID
+SELECT 't3', ST_SRID(ST_QuantizeCoordinates('SRID=32611;POINT (3 7)', 5)) = 32611;
+-- Carries X precision through to other dimensions if not specified
+SELECT 't4', ST_X(ST_QuantizeCoordinates('POINT (1.2345678 1.2345678)', 3)) = ST_Y(ST_QuantizeCoordinates('POINT (1.2345678 1.2345678)', 3));
+-- Or they can be specified independently
+SELECT 't5', ST_X(ST_QuantizeCoordinates('POINT (1.2345678 1.2345678)', 7, 4)) != ST_Y(ST_QuantizeCoordinates('POINT (1.2345678 1.2345678)', 7, 4));
+-- Significant digits are preserved
+WITH input AS (SELECT ST_MakePoint(1.23456789012345, 0) AS geom)
+SELECT 't6', bool_and(abs(ST_X(geom)-ST_X(ST_QuantizeCoordinates(geom, i))) < pow(10, -i))
+FROM input, generate_series(1,15) AS i
diff --git a/regress/quantize_coordinates_expected b/regress/quantize_coordinates_expected
new file mode 100644 (file)
index 0000000..7c37d83
--- /dev/null
@@ -0,0 +1,6 @@
+t1|t
+ERROR:  Must specify precision
+t3|t
+t4|t
+t5|t
+t6|t