From: Regina Obe Date: Thu, 11 May 2017 23:44:39 +0000 (+0000) Subject: Gist penalty speed improvement for 2d and nd points X-Git-Tag: 2.4.0alpha~102 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0706464c2e384bb05be3eb4af5bd909d1dcb7843;p=postgis Gist penalty speed improvement for 2d and nd points References #3753 for PostGIS 2.4.0 git-svn-id: http://svn.osgeo.org/postgis/trunk@15392 b70326c6-7e19-0410-871a-916f4a2858ee --- diff --git a/NEWS b/NEWS index cbda3e760..fe9496869 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,8 @@ PostGIS 2.4.0 - #3599, Geobuf output support via ST_AsGeobuf (Björn Harrtell) - #3661, Mapbox vector tile output support via ST_AsMVT (Björn Harrtell / CartoDB) - #3689, Add orientation checking and forcing functions (Dan Baston) + - #3753, Gist penalty speed improvements for 2d and nd points + (Darafei Praliaskouski) * Bug Fixes * - #3682, Boolean (FTLogical) should be 1 byte not 2 bytes diff --git a/postgis/gserialized_gist_2d.c b/postgis/gserialized_gist_2d.c index d83bfc8c6..f4061986f 100644 --- a/postgis/gserialized_gist_2d.c +++ b/postgis/gserialized_gist_2d.c @@ -19,6 +19,7 @@ ********************************************************************** * * Copyright 2009 Paul Ramsey + * Copyright 2017 Darafei Praliaskouski * **********************************************************************/ @@ -216,6 +217,14 @@ static float box2df_size(const BOX2DF *a) return result; } +static float box2df_edge(const BOX2DF *a) +{ + if ( a == NULL ) + return (float)0.0; + + return ((a->xmax) - (a->xmin)) + ((a->ymax) - (a->ymin)); +} + static float box2df_union_size(const BOX2DF *a, const BOX2DF *b) { float result; @@ -243,6 +252,32 @@ static float box2df_union_size(const BOX2DF *a, const BOX2DF *b) } +static float box2df_union_edge(const BOX2DF *a, const BOX2DF *b) +{ + float result; + + POSTGIS_DEBUG(5,"entered function"); + + if ( a == NULL && b == NULL ) + { + elog(ERROR, "box2df_union_edge received two null arguments"); + return 0.0; + } + + if ( a == NULL ) + return box2df_edge(b); + + if ( b == NULL ) + return box2df_edge(a); + + result = (Max(a->xmax,b->xmax) - Min(a->xmin,b->xmin)) + + (Max(a->ymax,b->ymax) - Min(a->ymin,b->ymin)); + + POSTGIS_DEBUGF(5, "union edge of %s and %s is %.8g", box2df_to_string(a), box2df_to_string(b), result); + + return result; +} + /* Convert a double-based GBOX into a float-based BOX2DF, ensuring the float box is larger than the double box */ static inline int box2df_from_gbox_p(GBOX *box, BOX2DF *a) @@ -1242,10 +1277,37 @@ Datum gserialized_gist_distance_2d(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(distance); } +/* +** Function to pack floats of different realms +** This function serves to pack bit flags inside float type +** Resulted value represent can be from four different "realms" +** Every value from realm 3 is greater than any value from realms 2, 1 and 0. +** Every value from realm 2 is less than every value from realm 3 and greater +** than any value from realm 1 and 0, and so on. Values from the same realm +** loose two bits of precision. This technique is possible due to floating +** point numbers specification according to IEEE 754: exponent bits are highest +** (excluding sign bits, but here penalty is always positive). If float a is +** greater than float b, integer A with same bit representation as a is greater +** than integer B with same bits as b. +*/ +static float pack_float(const float value, const int realm) +{ + union { + float f; + struct { unsigned value:31, sign:1; } vbits; + struct { unsigned value:29, realm:2, sign:1; } rbits; + } a; + + a.f = value; + a.rbits.value = a.vbits.value >> 2; + a.rbits.realm = realm; + + return a.f; +} + /* ** GiST support function. Calculate the "penalty" cost of adding this entry into an existing entry. ** Calculate the change in volume of the old entry once the new entry is added. -** TODO: Re-evaluate this in light of R*Tree penalty approaches. */ PG_FUNCTION_INFO_V1(gserialized_gist_penalty_2d); Datum gserialized_gist_penalty_2d(PG_FUNCTION_ARGS) @@ -1254,7 +1316,7 @@ Datum gserialized_gist_penalty_2d(PG_FUNCTION_ARGS) GISTENTRY *newentry = (GISTENTRY*) PG_GETARG_POINTER(1); float *result = (float*) PG_GETARG_POINTER(2); BOX2DF *gbox_index_orig, *gbox_index_new; - float size_union, size_orig; + float size_union, size_orig, edge_union, edge_orig; POSTGIS_DEBUG(4, "[GIST] 'penalty' function called"); @@ -1273,6 +1335,37 @@ Datum gserialized_gist_penalty_2d(PG_FUNCTION_ARGS) size_union = box2df_union_size(gbox_index_orig, gbox_index_new); size_orig = box2df_size(gbox_index_orig); *result = size_union - size_orig; + + /* REALM 0: No extension is required, volume is zero, return edge */ + /* REALM 1: No extension is required, return nonzero area */ + /* REALM 2: Area extension is zero, return nonzero edge extension */ + /* REALM 3: Area extension is nonzero, return it */ + + if( *result == 0 ) + { + if (size_orig > 0) + { + *result = pack_float(size_orig, 1); /* REALM 1 */ + } + else + { + edge_union = box2df_union_edge(gbox_index_orig, gbox_index_new); + edge_orig = box2df_edge(gbox_index_orig); + *result = edge_union - edge_orig; + if( *result == 0 ) + { + *result = pack_float(edge_orig, 0); /* REALM 0 */ + } + else + { + *result = pack_float(*result, 2); /* REALM 2 */ + } + } + } + else + { + *result = pack_float(*result, 3); /* REALM 3 */ + } POSTGIS_DEBUGF(4, "[GIST] 'penalty', union size (%.12f), original size (%.12f), penalty (%.12f)", size_union, size_orig, *result); diff --git a/postgis/gserialized_gist_nd.c b/postgis/gserialized_gist_nd.c index 4fc5fa476..57ab1c53f 100644 --- a/postgis/gserialized_gist_nd.c +++ b/postgis/gserialized_gist_nd.c @@ -19,6 +19,7 @@ ********************************************************************** * * Copyright 2009 Paul Ramsey + * Copyright 2017 Darafei Praliaskouski * **********************************************************************/ @@ -229,6 +230,22 @@ static float gidx_volume(GIDX *a) return result; } +/* Calculate the edge of the GIDX */ +static float gidx_edge(GIDX *a) +{ + float result; + int i; + if ( a == NULL || gidx_is_unknown(a) ) + { + return 0.0; + } + result = GIDX_GET_MAX(a,0) - GIDX_GET_MIN(a,0); + for ( i = 1; i < GIDX_NDIMS(a); i++ ) + result += (GIDX_GET_MAX(a,i) - GIDX_GET_MIN(a,i)); + POSTGIS_DEBUGF(5, "calculated edge of %s as %.8g", gidx_to_string(a), result); + return result; +} + /* Ensure the first argument has the higher dimensionality. */ static void gidx_dimensionality_check(GIDX **a, GIDX **b) { @@ -292,6 +309,58 @@ static float gidx_union_volume(GIDX *a, GIDX *b) return result; } +/* Calculate the edge of the union of the boxes. Avoids creating an intermediate box. */ +static float gidx_union_edge(GIDX *a, GIDX *b) +{ + float result; + int i; + int ndims_a, ndims_b; + + POSTGIS_DEBUG(5,"entered function"); + + if ( a == NULL && b == NULL ) + { + elog(ERROR, "gidx_union_edge received two null arguments"); + return 0.0; + } + + if ( a == NULL || gidx_is_unknown(a) ) + return gidx_volume(b); + + if ( b == NULL || gidx_is_unknown(b) ) + return gidx_volume(a); + + if ( gidx_is_unknown(a) && gidx_is_unknown(b) ) + { + return 0.0; + } + + /* Ensure 'a' has the most dimensions. */ + gidx_dimensionality_check(&a, &b); + + ndims_a = GIDX_NDIMS(a); + ndims_b = GIDX_NDIMS(b); + + /* Initialize with maximal length of first dimension. */ + result = Max(GIDX_GET_MAX(a,0),GIDX_GET_MAX(b,0)) - Min(GIDX_GET_MIN(a,0),GIDX_GET_MIN(b,0)); + + /* Add maximal length of remaining dimensions. */ + for ( i = 1; i < ndims_b; i++ ) + { + result += (Max(GIDX_GET_MAX(a,i),GIDX_GET_MAX(b,i)) - Min(GIDX_GET_MIN(a,i),GIDX_GET_MIN(b,i))); + } + + /* Add in dimensions of higher dimensional box. */ + for ( i = ndims_b; i < ndims_a; i++ ) + { + result += (GIDX_GET_MAX(a,i) - GIDX_GET_MIN(a,i)); + } + + POSTGIS_DEBUGF(5, "edge( %s union %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result); + + return result; +} + /* Calculate the volume of the intersection of the boxes. */ static float gidx_inter_volume(GIDX *a, GIDX *b) { @@ -1238,11 +1307,37 @@ Datum gserialized_gist_consistent(PG_FUNCTION_ARGS) PG_RETURN_BOOL(result); } +/* +** Function to pack floats of different realms +** This function serves to pack bit flags inside float type +** Resulted value represent can be from four different "realms" +** Every value from realm 3 is greater than any value from realms 2, 1 and 0. +** Every value from realm 2 is less than every value from realm 3 and greater +** than any value from realm 1 and 0, and so on. Values from the same realm +** loose two bits of precision. This technique is possible due to floating +** point numbers specification according to IEEE 754: exponent bits are highest +** (excluding sign bits, but here penalty is always positive). If float a is +** greater than float b, integer A with same bit representation as a is greater +** than integer B with same bits as b. +*/ +static float pack_float(const float value, const int realm) +{ + union { + float f; + struct { unsigned value:31, sign:1; } vbits; + struct { unsigned value:29, realm:2, sign:1; } rbits; + } a; + + a.f = value; + a.rbits.value = a.vbits.value >> 2; + a.rbits.realm = realm; + + return a.f; +} /* ** GiST support function. Calculate the "penalty" cost of adding this entry into an existing entry. ** Calculate the change in volume of the old entry once the new entry is added. -** TODO: Re-evaluate this in light of R*Tree penalty approaches. */ PG_FUNCTION_INFO_V1(gserialized_gist_penalty); Datum gserialized_gist_penalty(PG_FUNCTION_ARGS) @@ -1251,7 +1346,7 @@ Datum gserialized_gist_penalty(PG_FUNCTION_ARGS) GISTENTRY *newentry = (GISTENTRY*) PG_GETARG_POINTER(1); float *result = (float*) PG_GETARG_POINTER(2); GIDX *gbox_index_orig, *gbox_index_new; - float size_union, size_orig; + float size_union, size_orig, edge_union, edge_orig; POSTGIS_DEBUG(4, "[GIST] 'penalty' function called"); @@ -1271,19 +1366,37 @@ Datum gserialized_gist_penalty(PG_FUNCTION_ARGS) size_orig = gidx_volume(gbox_index_orig); *result = size_union - size_orig; - /* All things being equal, we prefer to expand small boxes rather than large boxes. - NOTE: This code seemed to be causing badly balanced trees to be built - and therefore has been commented out. Not sure why it didn't work, - but it didn't. - if( FP_IS_ZERO(*result) ) - if( FP_IS_ZERO(size_orig) ) - *result = 0.0; - else - *result = 1.0 - (1.0/(1.0 + size_orig)); - else - *result += 1.0; - */ - + /* REALM 0: No extension is required, volume is zero, return edge */ + /* REALM 1: No extension is required, return nonzero area */ + /* REALM 2: Area extension is zero, return nonzero edge extension */ + /* REALM 3: Area extension is nonzero, return it */ + + if( *result == 0 ) + { + if (size_orig > 0) + { + *result = pack_float(size_orig, 1); /* REALM 1 */ + } + else + { + edge_union = gidx_union_edge(gbox_index_orig, gbox_index_new); + edge_orig = gidx_edge(gbox_index_orig); + *result = edge_union - edge_orig; + if( *result == 0 ) + { + *result = pack_float(edge_orig, 0); /* REALM 0 */ + } + else + { + *result = pack_float(*result, 2); /* REALM 2 */ + } + } + } + else + { + *result = pack_float(*result, 3); /* REALM 3 */ + } + POSTGIS_DEBUGF(4, "[GIST] union size (%.12f), original size (%.12f), penalty (%.12f)", size_union, size_orig, *result); PG_RETURN_POINTER(result);