From: Daniel Baston Date: Mon, 12 Mar 2018 02:41:56 +0000 (+0000) Subject: Add ST_QuantizeCoordinates X-Git-Tag: 2.5.0alpha~72 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c54ccbb87ade2af8aebd0dad373be94f9126506b;p=postgis Add ST_QuantizeCoordinates Closes #4029 Closes https://github.com/postgis/postgis/pull/225 git-svn-id: http://svn.osgeo.org/postgis/trunk@16455 b70326c6-7e19-0410-871a-916f4a2858ee --- diff --git a/NEWS b/NEWS index 7146c9a46..396f982e9 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,7 @@ PostGIS 2.5.0 - #3913, Upgrade when creating extension from unpackaged (Sandro Santilli) - #2256, _postgis_index_extent() for extent from index (Paul Ramsey) - #3176, Add ST_OrientedEnvelope (Dan Baston) + - #4029, Add ST_QuantizeCoordinates (Dan Baston) * Breaking Changes * - #3885, version number removed from address_standardize lib file diff --git a/doc/reference_misc.xml b/doc/reference_misc.xml index bdad161a7..e6233a419 100644 --- a/doc/reference_misc.xml +++ b/doc/reference_misc.xml @@ -695,5 +695,177 @@ fulltable_size geomsize pergeom + + + + ST_QuantizeCoordinates + + + Sets least significant bits of coordinates to zero + + + + + + + + geometry + ST_QuantizeCoordinates + + + geometry + g + + + int + prec_x + + + int + prec_y + + + int + prec_z + + + int + prec_m + + + + + + + Description + + ST_QuantizeCoordinates determines the number of bits + (N) required to represent a coordinate value with a + specified number of digits after the decimal point, and then sets + all but the N most significant bits to zero. The + resulting coordinate value will still round to the original value, + but will have improved compressiblity. This can result in a + significant disk usage reduction provided that the geometry column + is using a + compressible storage type. The function allows + specification of a different number of digits after the decimal + point in each dimension; unspecified dimensions are assumed to have + the precsion of the x dimension. Negative digits are + interpreted to refer digits to the left of the decimal point, (i.e., + prec_x=-2 will preserve coordinate values to the + nearest 100. + + + The coordinates produced by ST_QuantizeCoordinates are + independent of the geometry that contains those coordinates and the + relative position of those coordinates within the geometry. As a result, + existing topological relationships between geometries are unaffected + by use of this function. The function will not produce invalid geometry + unless it is called with a number of digits lower than the intrinsic + precision of the geometry. + + Availability: 2.5.0 + + + Technical Background + + PostGIS stores all coordinate values as double-precision floating + point integers, which can reliably represent 15 significant digits. + However, PostGIS may be used to manage data that intrinsically has + fewer than 15 significant digits. An example is TIGER data, which is + provided as geographic coordinates with six digits of precision + after the decimal point (thus requiring only nine significant digits + of longitude and eight significant digits of latitude.) + + + When 15 significant digits are available, there are many possible + representations of a number with 9 significant digits. A double + precision floating point number uses 52 explicit bits to represent + the significand (mantissa) of the coordinate. Only 30 bits are needed + to represent a mantissa with 9 significant digits, leaving 22 + insignificant bits; we can set their value to anything we like and + still end up with a number that rounds to our input value. For + example, the value 100.123456 can be represented by the floating + point numbers closest to 100.123456000000, 100.123456000001, and + 100.123456432199. All are equally valid, in that + ST_AsText(geom, 6) will return the same result with + any of these inputs. Because we can set these bits to whatever we + like, ST_QuantizeCoordinates sets the 22 insignificant + bits to zero because a block of 22 zeros is easily (and automatically) + compressed by PostgreSQL's built-in compression algorithm. + + + + + Only the on-disk size of the geometry is potentially affected by + ST_QuantizeCoordinates. , + which reports the in-memory usage of the geometry, will return the + the same value regardless of the disk space used by a geometry. + + + + + + Examples + + SELECT ST_AsText(ST_QuantizeCoordinates('POINT (100.123456 0)')); +st_astext +------------------------- +POINT(100.123455941677 0) + + + +WITH test AS (SELECT 'POINT (123.456789123456 123.456789123456)'::geometry AS geom) +SELECT + digits, + encode(ST_QuantizeCoordinates(geom, digits), 'hex'), + ST_AsText(ST_QuantizeCoordinates(geom, digits)) +FROM test, generate_series(15, -15, -1) AS digits; + +digits | encode | st_astext +--------+--------------------------------------------+------------------------------------------ +15 | 01010000005f9a72083cdd5e405f9a72083cdd5e40 | POINT(123.456789123456 123.456789123456) +14 | 01010000005f9a72083cdd5e405f9a72083cdd5e40 | POINT(123.456789123456 123.456789123456) +13 | 01010000005f9a72083cdd5e405f9a72083cdd5e40 | POINT(123.456789123456 123.456789123456) +12 | 01010000005c9a72083cdd5e405c9a72083cdd5e40 | POINT(123.456789123456 123.456789123456) +11 | 0101000000409a72083cdd5e40409a72083cdd5e40 | POINT(123.456789123456 123.456789123456) +10 | 0101000000009a72083cdd5e40009a72083cdd5e40 | POINT(123.456789123455 123.456789123455) +9 | 0101000000009072083cdd5e40009072083cdd5e40 | POINT(123.456789123418 123.456789123418) +8 | 0101000000008072083cdd5e40008072083cdd5e40 | POINT(123.45678912336 123.45678912336) +7 | 0101000000000070083cdd5e40000070083cdd5e40 | POINT(123.456789121032 123.456789121032) +6 | 0101000000000040083cdd5e40000040083cdd5e40 | POINT(123.456789076328 123.456789076328) +5 | 0101000000000000083cdd5e40000000083cdd5e40 | POINT(123.456789016724 123.456789016724) +4 | 0101000000000000003cdd5e40000000003cdd5e40 | POINT(123.456787109375 123.456787109375) +3 | 0101000000000000003cdd5e40000000003cdd5e40 | POINT(123.456787109375 123.456787109375) +2 | 01010000000000000038dd5e400000000038dd5e40 | POINT(123.45654296875 123.45654296875) +1 | 01010000000000000000dd5e400000000000dd5e40 | POINT(123.453125 123.453125) +0 | 01010000000000000000dc5e400000000000dc5e40 | POINT(123.4375 123.4375) +-1 | 01010000000000000000c05e400000000000c05e40 | POINT(123 123) +-2 | 01010000000000000000005e400000000000005e40 | POINT(120 120) +-3 | 010100000000000000000058400000000000005840 | POINT(96 96) +-4 | 010100000000000000000058400000000000005840 | POINT(96 96) +-5 | 010100000000000000000058400000000000005840 | POINT(96 96) +-6 | 010100000000000000000058400000000000005840 | POINT(96 96) +-7 | 010100000000000000000058400000000000005840 | POINT(96 96) +-8 | 010100000000000000000058400000000000005840 | POINT(96 96) +-9 | 010100000000000000000058400000000000005840 | POINT(96 96) +-10 | 010100000000000000000058400000000000005840 | POINT(96 96) +-11 | 010100000000000000000058400000000000005840 | POINT(96 96) +-12 | 010100000000000000000058400000000000005840 | POINT(96 96) +-13 | 010100000000000000000058400000000000005840 | POINT(96 96) +-14 | 010100000000000000000058400000000000005840 | POINT(96 96) +-15 | 010100000000000000000058400000000000005840 | POINT(96 96) + + + + + + See Also + + + + + diff --git a/liblwgeom/cunit/cu_algorithm.c b/liblwgeom/cunit/cu_algorithm.c index bd9b5dae7..3e9741c6b 100644 --- a/liblwgeom/cunit/cu_algorithm.c +++ b/liblwgeom/cunit/cu_algorithm.c @@ -1434,6 +1434,52 @@ static void test_kmeans(void) return; } +static void test_trim_bits(void) +{ + POINTARRAY *pta = ptarray_construct_empty(LW_TRUE, LW_TRUE, 2); + POINT4D pt; + LWLINE *line; + int precision; + uint32_t i; + + pt.x = 1.2345678901234; + pt.y = 2.3456789012345; + pt.z = 3.4567890123456; + pt.m = 4.5678901234567; + + ptarray_insert_point(pta, &pt, 0); + + pt.x *= 3; + pt.y *= 3; + pt.y *= 3; + pt.z *= 3; + + ptarray_insert_point(pta, &pt, 1); + + line = lwline_construct(0, NULL, pta); + + for (precision = -15; precision <= 15; precision++) + { + LWLINE *line2 = (LWLINE*) lwgeom_clone_deep((LWGEOM*) line); + lwgeom_trim_bits_in_place((LWGEOM*) line2, precision, precision, precision, precision); + + for (i = 0; i < line->points->npoints; i++) + { + POINT4D pt1 = getPoint4d(line->points, i); + POINT4D pt2 = getPoint4d(line2->points, i); + + CU_ASSERT_DOUBLE_EQUAL(pt1.x, pt2.x, pow(10, -1*precision)); + CU_ASSERT_DOUBLE_EQUAL(pt1.y, pt2.y, pow(10, -1*precision)); + CU_ASSERT_DOUBLE_EQUAL(pt1.z, pt2.z, pow(10, -1*precision)); + CU_ASSERT_DOUBLE_EQUAL(pt1.m, pt2.m, pow(10, -1*precision)); + } + + lwline_free(line2); + } + + lwline_free(line); +} + /* ** Used by test harness to register the tests in this file. */ @@ -1465,5 +1511,6 @@ void algorithms_suite_setup(void) PG_ADD_TEST(suite,test_median_handles_3d_correctly); PG_ADD_TEST(suite,test_median_robustness); PG_ADD_TEST(suite,test_lwpoly_construct_circle); + PG_ADD_TEST(suite,test_trim_bits); PG_ADD_TEST(suite,test_lwgeom_remove_repeated_points); } diff --git a/liblwgeom/liblwgeom.h.in b/liblwgeom/liblwgeom.h.in index 5455a57ff..290908e2f 100644 --- a/liblwgeom/liblwgeom.h.in +++ b/liblwgeom/liblwgeom.h.in @@ -2170,6 +2170,20 @@ extern uint8_t* lwgeom_to_twkb(const LWGEOM *geom, uint8_t variant, int8_t preci extern uint8_t* lwgeom_to_twkb_with_idlist(const LWGEOM *geom, int64_t *idlist, uint8_t variant, int8_t precision_xy, int8_t precision_z, int8_t precision_m, size_t *twkb_size); +/** + * Trim the bits of an LWGEOM in place, to optimize it for compression. + * Sets all bits to zero that are not required to maintain a specified + * number of digits after the decimal point. Negative precision values + * indicate digits before the decimal point do not need to be preserved. + * + * @param geom input geometry + * @param prec_x precision (digits after decimal point) in x dimension + * @param prec_y precision (digits after decimal point) in y dimension + * @param prec_z precision (digits after decimal point) in z dimension + * @param prec_m precision (digits after decimal point) in m dimension + */ +extern void lwgeom_trim_bits_in_place(LWGEOM *geom, int32_t prec_x, int32_t prec_y, int32_t prec_z, int32_t prec_m); + /******************************************************************************* * SQLMM internal functions ******************************************************************************/ diff --git a/liblwgeom/lwgeom.c b/liblwgeom/lwgeom.c index 9e213ae34..480d508b7 100644 --- a/liblwgeom/lwgeom.c +++ b/liblwgeom/lwgeom.c @@ -19,6 +19,7 @@ ********************************************************************** * * Copyright (C) 2001-2006 Refractions Research Inc. + * Copyright (C) 2017-2018 Daniel Baston * **********************************************************************/ @@ -2444,3 +2445,63 @@ lwgeom_is_trajectory(const LWGEOM *geom) return lwline_is_trajectory((LWLINE*)geom); } +static uint8_t +bits_for_precision(int32_t significant_digits) +{ + int32_t bits_needed = ceil(significant_digits / log10(2)); + + if (bits_needed > 52) + { + return 52; + } + else if (bits_needed < 1) + { + return 1; + } + + return bits_needed; +} + +static inline +double mask_double(double d, uint64_t mask) +{ + uint64_t* double_bits = (uint64_t*) (&d); + + (*double_bits) &= mask; + + return *((double*) double_bits); +} + +static double trim_preserve_decimal_digits(double d, int32_t decimal_digits) +{ + if (d==0) + return 0; + + int digits_left_of_decimal = (int) (1 + log10(fabs(d))); + uint8_t bits_needed = bits_for_precision(decimal_digits + digits_left_of_decimal); + + + uint64_t mask = 0xffffffffffffffff << (52 - bits_needed); + + return mask_double(d, mask); +} + +void lwgeom_trim_bits_in_place(LWGEOM* geom, int32_t prec_x, int32_t prec_y, int32_t prec_z, int32_t prec_m) +{ + LWPOINTITERATOR* it = lwpointiterator_create_rw(geom); + POINT4D p; + + while (lwpointiterator_has_next(it)) + { + lwpointiterator_peek(it, &p); + p.x = trim_preserve_decimal_digits(p.x, prec_x); + p.y = trim_preserve_decimal_digits(p.y, prec_y); + if (lwgeom_has_z(geom)) + p.z = trim_preserve_decimal_digits(p.z, prec_z); + if (lwgeom_has_m(geom)) + p.m = trim_preserve_decimal_digits(p.m, prec_m); + lwpointiterator_modify_next(it, &p); + } + + lwpointiterator_destroy(it); +} diff --git a/postgis/lwgeom_functions_basic.c b/postgis/lwgeom_functions_basic.c index 64b1483e3..7398d0621 100644 --- a/postgis/lwgeom_functions_basic.c +++ b/postgis/lwgeom_functions_basic.c @@ -19,6 +19,7 @@ ********************************************************************** * * Copyright 2001-2006 Refractions Research Inc. + * Copyright 2017-2018 Daniel Baston * **********************************************************************/ @@ -58,6 +59,7 @@ Datum LWGEOM_length2d_linestring(PG_FUNCTION_ARGS); Datum LWGEOM_length_linestring(PG_FUNCTION_ARGS); Datum LWGEOM_perimeter2d_poly(PG_FUNCTION_ARGS); Datum LWGEOM_perimeter_poly(PG_FUNCTION_ARGS); +Datum postgis_optimize_geometry(PG_FUNCTION_ARGS); Datum LWGEOM_maxdistance2d_linestring(PG_FUNCTION_ARGS); Datum LWGEOM_mindistance2d(PG_FUNCTION_ARGS); @@ -2983,3 +2985,41 @@ Datum ST_Points(PG_FUNCTION_ARGS) PG_RETURN_POINTER(ret); } } + +PG_FUNCTION_INFO_V1(ST_QuantizeCoordinates); +Datum ST_QuantizeCoordinates(PG_FUNCTION_ARGS) +{ + GSERIALIZED* input; + GSERIALIZED* result; + LWGEOM* g; + int32_t prec_x; + int32_t prec_y; + int32_t prec_z; + int32_t prec_m; + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + if (PG_ARGISNULL(1)) + { + lwpgerror("Must specify precision"); + PG_RETURN_NULL(); + } + else + { + prec_x = PG_GETARG_INT32(1); + } + prec_y = PG_ARGISNULL(2) ? prec_x : PG_GETARG_INT32(2); + prec_z = PG_ARGISNULL(3) ? prec_x : PG_GETARG_INT32(3); + prec_m = PG_ARGISNULL(4) ? prec_x : PG_GETARG_INT32(4); + + input = PG_GETARG_GSERIALIZED_P_COPY(0); + + g = lwgeom_from_gserialized(input); + + lwgeom_trim_bits_in_place(g, prec_x, prec_y, prec_z, prec_m); + + result = geometry_serialize(g); + lwgeom_free(g); + PG_FREE_IF_COPY(input, 0); + PG_RETURN_POINTER(result); +} diff --git a/postgis/postgis.sql.in b/postgis/postgis.sql.in index 0ba0628c5..ec456ff26 100644 --- a/postgis/postgis.sql.in +++ b/postgis/postgis.sql.in @@ -1099,6 +1099,13 @@ CREATE OR REPLACE FUNCTION postgis_hasbbox(geometry) AS 'MODULE_PATHNAME', 'LWGEOM_hasBBOX' LANGUAGE 'c' IMMUTABLE STRICT _PARALLEL; +-- Availability: 2.5.0 +CREATE OR REPLACE FUNCTION ST_QuantizeCoordinates(g geometry, prec_x int, prec_y int DEFAULT NULL, prec_z int DEFAULT NULL, prec_m int DEFAULT NULL) + RETURNS geometry + AS 'MODULE_PATHNAME', 'ST_QuantizeCoordinates' + LANGUAGE 'c' IMMUTABLE _PARALLEL + COST 10; + ------------------------------------------------------------------------ -- DEBUG ------------------------------------------------------------------------ diff --git a/regress/Makefile.in b/regress/Makefile.in index 516a7015c..0e76194e1 100644 --- a/regress/Makefile.in +++ b/regress/Makefile.in @@ -109,6 +109,7 @@ TESTS = \ polygonize \ polyhedralsurface \ postgis_type_name \ + quantize_coordinates \ regress \ regress_bdpoly \ regress_index \ diff --git a/regress/quantize_coordinates.sql b/regress/quantize_coordinates.sql new file mode 100644 index 000000000..02f21d742 --- /dev/null +++ b/regress/quantize_coordinates.sql @@ -0,0 +1,14 @@ +-- Returns NULL for NULL geometry +SELECT 't1', ST_QuantizeCoordinates(NULL::geometry, 6) IS NULL; +-- Throws error if no precision specified +SELECT 't2', ST_QuantizeCoordinates('POINT (3 7)', NULL); +-- Preserves SRID +SELECT 't3', ST_SRID(ST_QuantizeCoordinates('SRID=32611;POINT (3 7)', 5)) = 32611; +-- Carries X precision through to other dimensions if not specified +SELECT 't4', ST_X(ST_QuantizeCoordinates('POINT (1.2345678 1.2345678)', 3)) = ST_Y(ST_QuantizeCoordinates('POINT (1.2345678 1.2345678)', 3)); +-- Or they can be specified independently +SELECT 't5', ST_X(ST_QuantizeCoordinates('POINT (1.2345678 1.2345678)', 7, 4)) != ST_Y(ST_QuantizeCoordinates('POINT (1.2345678 1.2345678)', 7, 4)); +-- Significant digits are preserved +WITH input AS (SELECT ST_MakePoint(1.23456789012345, 0) AS geom) +SELECT 't6', bool_and(abs(ST_X(geom)-ST_X(ST_QuantizeCoordinates(geom, i))) < pow(10, -i)) +FROM input, generate_series(1,15) AS i diff --git a/regress/quantize_coordinates_expected b/regress/quantize_coordinates_expected new file mode 100644 index 000000000..7c37d832b --- /dev/null +++ b/regress/quantize_coordinates_expected @@ -0,0 +1,6 @@ +t1|t +ERROR: Must specify precision +t3|t +t4|t +t5|t +t6|t