From 52f06ce22c56c6aef1ae512d68d9c7ed97ec713f Mon Sep 17 00:00:00 2001 From: Darafei Praliaskouski Date: Fri, 28 Jun 2019 10:00:18 +0000 Subject: [PATCH] Rewrite GiST penalty. Use value of 0 as often as possible, as it's special code path in Postgres. Fix picksplit to use correct penalty function. Drop dead code. Closes #4441 Closes https://github.com/postgis/postgis/pull/425 git-svn-id: http://svn.osgeo.org/postgis/trunk@17565 b70326c6-7e19-0410-871a-916f4a2858ee --- .clang-format | 2 +- NEWS | 2 + postgis/gserialized_gist_2d.c | 534 +++++----------------------------- postgis/gserialized_gist_nd.c | 89 +++--- 4 files changed, 108 insertions(+), 519 deletions(-) diff --git a/.clang-format b/.clang-format index 4f42864bc..a69316f0c 100644 --- a/.clang-format +++ b/.clang-format @@ -26,7 +26,7 @@ BraceWrapping: AfterFunction: true AfterNamespace: false AfterObjCDeclaration: false - AfterStruct: true + AfterStruct: false AfterUnion: false AfterExternBlock: false BeforeCatch: true diff --git a/NEWS b/NEWS index 4a02ffc9d..34e1665e7 100644 --- a/NEWS +++ b/NEWS @@ -183,6 +183,8 @@ PostGIS 3.0.0 - #4327, Avoid pfree'ing the result of getenv (Raúl Marín) - #4406, Throw on invalid characters when decoding geohash (Raúl Marín) - #4394, Allow FULL OUTER JOIN on geometry (Darafei Praliaskouski) + - #4441, Make GiST penalty friendly to multi-column indexes and build single-column + ones faster. (Darafei Praliaskouski) PostGIS 2.5.0 diff --git a/postgis/gserialized_gist_2d.c b/postgis/gserialized_gist_2d.c index 3ca3cbee7..086aa22b8 100644 --- a/postgis/gserialized_gist_2d.c +++ b/postgis/gserialized_gist_2d.c @@ -19,11 +19,10 @@ ********************************************************************** * * Copyright 2009 Paul Ramsey - * Copyright 2017 Darafei Praliaskouski + * Copyright 2017-2019 Darafei Praliaskouski * **********************************************************************/ - /* ** R-Tree Bibliography ** @@ -50,7 +49,6 @@ #include "lwgeom_pg.h" /* For debugging macros. */ #include "gserialized_gist.h" /* For utility functions. */ - #include /* For FLT_MAX */ #include @@ -60,12 +58,6 @@ */ #define LIMIT_RATIO 0.1 -/* -** 0 == don't use it -** 1 == use it -*/ -#define KOROTKOV_SPLIT 1 - /* ** For debugging */ @@ -170,8 +162,7 @@ inline void box2df_set_finite(BOX2DF *a) return; } -/* Enlarge b_union to contain b_new. If b_new contains more - dimensions than b_union, expand b_union to contain those dimensions. */ +/* Enlarge b_union to contain b_new. */ void box2df_merge(BOX2DF *b_union, BOX2DF *b_new) { @@ -191,107 +182,6 @@ void box2df_merge(BOX2DF *b_union, BOX2DF *b_new) return; } -#if KOROTKOV_SPLIT < 1 -static bool box2df_intersection(const BOX2DF *a, const BOX2DF *b, BOX2DF *n) -{ - POSTGIS_DEBUGF(5, "calculating intersection of %s with %s", box2df_to_string(a), box2df_to_string(b)); - - if( a == NULL || b == NULL || n == NULL ) - return false; - - n->xmax = Min(a->xmax, b->xmax); - n->ymax = Min(a->ymax, b->ymax); - n->xmin = Max(a->xmin, b->xmin); - n->ymin = Max(a->ymin, b->ymin); - - POSTGIS_DEBUGF(5, "intersection is %s", box2df_to_string(n)); - - if ( (n->xmax < n->xmin) || (n->ymax < n->ymin) ) - return false; - - return true; -} -#endif - -static float box2df_size(const BOX2DF *a) -{ - float result; - - if ( a == NULL || box2df_is_empty(a) ) - return (float)0.0; - - if ( (a->xmax <= a->xmin) || (a->ymax <= a->ymin) ) - { - result = (float) 0.0; - } - else - { - result = (((double) a->xmax)-((double) a->xmin)) * (((double) a->ymax)-((double) a->ymin)); - } - - return result; -} - -static float box2df_edge(const BOX2DF *a) -{ - if ( a == NULL || box2df_is_empty(a) ) - return (float)0.0; - - return ((a->xmax) - (a->xmin)) + ((a->ymax) - (a->ymin)); -} - -static float box2df_union_size(const BOX2DF *a, const BOX2DF *b) -{ - float result; - - POSTGIS_DEBUG(5,"entered function"); - - if ( a == NULL && b == NULL ) - { - elog(ERROR, "box2df_union_size received two null arguments"); - return 0.0; - } - - if ( a == NULL || box2df_is_empty(a) ) - return box2df_size(b); - - if ( b == NULL || box2df_is_empty(b) ) - return box2df_size(a); - - result = ((double)Max(a->xmax,b->xmax) - (double)Min(a->xmin,b->xmin)) * - ((double)Max(a->ymax,b->ymax) - (double)Min(a->ymin,b->ymin)); - - POSTGIS_DEBUGF(5, "union size of %s and %s is %.8g", box2df_to_string(a), box2df_to_string(b), result); - - return result; -} - - -static float box2df_union_edge(const BOX2DF *a, const BOX2DF *b) -{ - float result; - - POSTGIS_DEBUG(5,"entered function"); - - if ( a == NULL && b == NULL ) - { - elog(ERROR, "box2df_union_edge received two null arguments"); - return 0.0; - } - - if ( a == NULL || box2df_is_empty(a) ) - return box2df_edge(b); - - if ( b == NULL || box2df_is_empty(b) ) - return box2df_edge(a); - - result = (Max(a->xmax,b->xmax) - Min(a->xmin,b->xmin)) + - (Max(a->ymax,b->ymax) - Min(a->ymin,b->ymin)); - - POSTGIS_DEBUGF(5, "union edge of %s and %s is %.8g", box2df_to_string(a), box2df_to_string(b), result); - - return result; -} /* Convert a double-based GBOX into a float-based BOX2DF, ensuring the float box is larger than the double box */ @@ -315,14 +205,13 @@ int box2df_to_gbox_p(BOX2DF *a, GBOX *box) } /*********************************************************************** -** BOX3DF tests for 2D index operators. +** BOX2DF tests for 2D index operators. */ /* Ensure all minimums are below maximums. */ inline void box2df_validate(BOX2DF *b) { float tmp; - POSTGIS_DEBUGF(5,"validating box2df (%s)", box2df_to_string(b)); if ( box2df_is_empty(b) ) return; @@ -1227,31 +1116,66 @@ Datum gserialized_gist_distance_2d(PG_FUNCTION_ARGS) } /* -** Function to pack floats of different realms -** This function serves to pack bit flags inside float type -** Resulted value represent can be from four different "realms" -** Every value from realm 3 is greater than any value from realms 2, 1 and 0. -** Every value from realm 2 is less than every value from realm 3 and greater -** than any value from realm 1 and 0, and so on. Values from the same realm -** loose two bits of precision. This technique is possible due to floating -** point numbers specification according to IEEE 754: exponent bits are highest +** Function to pack floats of different realms. +** This function serves to pack bit flags inside float type. +** Result value represent can be from two different "realms". +** Every value from realm 1 is greater than any value from realm 0. +** Values from the same realm loose one bit of precision. +** +** This technique is possible due to floating point numbers specification +** according to IEEE 754: exponent bits are highest ** (excluding sign bits, but here penalty is always positive). If float a is ** greater than float b, integer A with same bit representation as a is greater ** than integer B with same bits as b. */ -static float pack_float(const float value, const int realm) +static inline float +pack_float(const float value, const uint8_t realm) { - union { - float f; - struct { unsigned value:31, sign:1; } vbits; - struct { unsigned value:29, realm:2, sign:1; } rbits; - } a; + union { + float f; + struct { + unsigned value : 31, sign : 1; + } vbits; + struct { + unsigned value : 30, realm : 1, sign : 1; + } rbits; + } a; - a.f = value; - a.rbits.value = a.vbits.value >> 2; - a.rbits.realm = realm; + a.f = value; + a.rbits.value = a.vbits.value >> 1; + a.rbits.realm = realm; - return a.f; + return a.f; +} + +static inline float +box2df_penalty(const BOX2DF *b1, const BOX2DF *b2) +{ + float b1xmin = b1->xmin, b1xmax = b1->xmax; + float b1ymin = b1->ymin, b1ymax = b1->ymax; + float b2xmin = b2->xmin, b2xmax = b2->xmax; + float b2ymin = b2->ymin, b2ymax = b2->ymax; + + float box_union_xmin = Min(b1xmin, b2xmin), box_union_xmax = Max(b1xmax, b2xmax); + float box_union_ymin = Min(b1ymin, b2ymin), box_union_ymax = Max(b1ymax, b2ymax); + + float b1dx = b1xmax - b1xmin, b1dy = b1ymax - b1ymin; + float box_union_dx = box_union_xmax - box_union_xmin, box_union_dy = box_union_ymax - box_union_ymin; + + float box_union_area = box_union_dx * box_union_dy, box1area = b1dx * b1dy; + float box_union_edge = box_union_dx + box_union_dy, box1edge = b1dx + b1dy; + + float area_extension = box_union_area - box1area; + float edge_extension = box_union_edge - box1edge; + + /* REALM 1: Area extension is nonzero, return it */ + if (area_extension > FLT_EPSILON) + return pack_float(area_extension, 1); + /* REALM 0: Area extension is zero, return nonzero edge extension */ + else if (edge_extension > FLT_EPSILON) + return pack_float(edge_extension, 0); + + return 0; } /* @@ -1264,59 +1188,19 @@ Datum gserialized_gist_penalty_2d(PG_FUNCTION_ARGS) GISTENTRY *origentry = (GISTENTRY*) PG_GETARG_POINTER(0); GISTENTRY *newentry = (GISTENTRY*) PG_GETARG_POINTER(1); float *result = (float*) PG_GETARG_POINTER(2); - BOX2DF *gbox_index_orig, *gbox_index_new; - float size_union, size_orig, edge_union, edge_orig; - - POSTGIS_DEBUG(4, "[GIST] 'penalty' function called"); - - gbox_index_orig = (BOX2DF*)DatumGetPointer(origentry->key); - gbox_index_new = (BOX2DF*)DatumGetPointer(newentry->key); - - /* Drop out if we're dealing with null inputs. Shouldn't happen. */ - if ( (gbox_index_orig == NULL) && (gbox_index_new == NULL) ) - { - POSTGIS_DEBUG(4, "[GIST] both inputs NULL! returning penalty of zero"); - *result = 0.0; - PG_RETURN_FLOAT8(*result); - } - - /* Calculate the size difference of the boxes. */ - size_union = box2df_union_size(gbox_index_orig, gbox_index_new); - size_orig = box2df_size(gbox_index_orig); - *result = size_union - size_orig; + BOX2DF *b1, *b2; - /* REALM 0: No extension is required, volume is zero, return edge */ - /* REALM 1: No extension is required, return nonzero area */ - /* REALM 2: Area extension is zero, return nonzero edge extension */ - /* REALM 3: Area extension is nonzero, return it */ + b1 = (BOX2DF *)DatumGetPointer(origentry->key); + b2 = (BOX2DF *)DatumGetPointer(newentry->key); - if( *result == 0 ) - { - if (size_orig > 0) - { - *result = pack_float(size_orig, 1); /* REALM 1 */ - } - else - { - edge_union = box2df_union_edge(gbox_index_orig, gbox_index_new); - edge_orig = box2df_edge(gbox_index_orig); - *result = edge_union - edge_orig; - if( *result == 0 ) - { - *result = pack_float(edge_orig, 0); /* REALM 0 */ - } - else - { - *result = pack_float(*result, 2); /* REALM 2 */ - } - } - } - else - { - *result = pack_float(*result, 3); /* REALM 3 */ - } + /* Penalty value of 0 has special code path in Postgres's gistchoose. + * It is used as an early exit condition in subtree loop, allowing faster tree descend. + * For multi-column index, it lets next column break the tie, possibly more confidently. + */ + *result = 0; - POSTGIS_DEBUGF(4, "[GIST] 'penalty', union size (%.12f), original size (%.12f), penalty (%.12f)", size_union, size_orig, *result); + if (b1 && b2 && !box2df_is_empty(b1) && !box2df_is_empty(b2)) + *result = box2df_penalty(b1, b2); PG_RETURN_POINTER(result); } @@ -1371,7 +1255,6 @@ Datum gserialized_gist_same_2d(PG_FUNCTION_ARGS) PG_RETURN_POINTER(result); } -#if KOROTKOV_SPLIT > 0 /* * Adjust BOX2DF b boundaries with insertion of addon. */ @@ -1683,23 +1566,6 @@ g_box_consider_split(ConsiderSplitContext *context, int dimNum, } } -/* - * Return increase of original BOX2DF area by new BOX2DF area insertion. - */ -static float -box_penalty(BOX2DF *original, BOX2DF *new) -{ - float union_width, - union_height; - - union_width = Max(original->xmax, new->xmax) - - Min(original->xmin, new->xmin); - union_height = Max(original->ymax, new->ymax) - - Min(original->ymin, new->ymin); - return union_width * union_height - (original->xmax - original->xmin) * - (original->ymax - original->ymin); -} - /* * Compare common entries by their deltas. */ @@ -2059,8 +1925,7 @@ Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS) { box = (BOX2DF *) DatumGetPointer(entryvec->vector[ commonEntries[i].index].key); - commonEntries[i].delta = Abs(box_penalty(leftBox, box) - - box_penalty(rightBox, box)); + commonEntries[i].delta = Abs(box2df_penalty(leftBox, box) - box2df_penalty(rightBox, box)); } /* @@ -2088,7 +1953,7 @@ Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS) else { /* Otherwise select the group by minimal penalty */ - if (box_penalty(leftBox, box) < box_penalty(rightBox, box)) + if (box2df_penalty(leftBox, box) < box2df_penalty(rightBox, box)) PLACE_LEFT(box, commonEntries[i].index); else PLACE_RIGHT(box, commonEntries[i].index); @@ -2103,269 +1968,8 @@ Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS) PG_RETURN_POINTER(v); } -#else /* !KOROTOV_SPLIT */ - -typedef struct -{ - BOX2DF *key; - int pos; -} -KBsort; - -static int -compare_KB(const void* a, const void* b) -{ - BOX2DF *abox = ((KBsort*)a)->key; - BOX2DF *bbox = ((KBsort*)b)->key; - float sa = (abox->xmax - abox->xmin) * (abox->ymax - abox->ymin); - float sb = (bbox->xmax - bbox->xmin) * (bbox->ymax - bbox->ymin); - - if ( sa==sb ) return 0; - return ( sa>sb ) ? 1 : -1; -} - -/** -** The GiST PickSplit method -** New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree', -** C.H.Ang and T.C.Tan -*/ -PG_FUNCTION_INFO_V1(gserialized_gist_picksplit_2d); -Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS) -{ - GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); - - GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); - OffsetNumber i; - OffsetNumber *listL, *listR, *listB, *listT; - BOX2DF *unionL, *unionR, *unionB, *unionT; - int posL, posR, posB, posT; - BOX2DF pageunion; - BOX2DF *cur; - char direction = ' '; - bool allisequal = true; - OffsetNumber maxoff; - int nbytes; - - POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered"); - - posL = posR = posB = posT = 0; - - maxoff = entryvec->n - 1; - cur = (BOX2DF*) DatumGetPointer(entryvec->vector[FirstOffsetNumber].key); - - memcpy((void *) &pageunion, (void *) cur, sizeof(BOX2DF)); - - /* find MBR */ - for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i)) - { - cur = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key); - - if ( allisequal == true && ( - pageunion.xmax != cur->xmax || - pageunion.ymax != cur->ymax || - pageunion.xmin != cur->xmin || - pageunion.ymin != cur->ymin - ) ) - allisequal = false; - - if (pageunion.xmax < cur->xmax) - pageunion.xmax = cur->xmax; - if (pageunion.xmin > cur->xmin) - pageunion.xmin = cur->xmin; - if (pageunion.ymax < cur->ymax) - pageunion.ymax = cur->ymax; - if (pageunion.ymin > cur->ymin) - pageunion.ymin = cur->ymin; - } - - POSTGIS_DEBUGF(4, "pageunion is %s", box2df_to_string(&pageunion)); - - nbytes = (maxoff + 2) * sizeof(OffsetNumber); - listL = (OffsetNumber *) palloc(nbytes); - listR = (OffsetNumber *) palloc(nbytes); - unionL = (BOX2DF *) palloc(sizeof(BOX2DF)); - unionR = (BOX2DF *) palloc(sizeof(BOX2DF)); - - if (allisequal) - { - POSTGIS_DEBUG(4, " AllIsEqual!"); - - cur = (BOX2DF*) DatumGetPointer(entryvec->vector[OffsetNumberNext(FirstOffsetNumber)].key); - - - if (memcmp((void *) cur, (void *) &pageunion, sizeof(BOX2DF)) == 0) - { - v->spl_left = listL; - v->spl_right = listR; - v->spl_nleft = v->spl_nright = 0; - memcpy((void *) unionL, (void *) &pageunion, sizeof(BOX2DF)); - memcpy((void *) unionR, (void *) &pageunion, sizeof(BOX2DF)); - - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) - { - if (i <= (maxoff - FirstOffsetNumber + 1) / 2) - { - v->spl_left[v->spl_nleft] = i; - v->spl_nleft++; - } - else - { - v->spl_right[v->spl_nright] = i; - v->spl_nright++; - } - } - v->spl_ldatum = PointerGetDatum(unionL); - v->spl_rdatum = PointerGetDatum(unionR); - - PG_RETURN_POINTER(v); - } - } - - listB = (OffsetNumber *) palloc(nbytes); - listT = (OffsetNumber *) palloc(nbytes); - unionB = (BOX2DF *) palloc(sizeof(BOX2DF)); - unionT = (BOX2DF *) palloc(sizeof(BOX2DF)); - -#define ADDLIST( list, unionD, pos, num ) do { \ - if ( pos ) { \ - if ( unionD->xmax < cur->xmax ) unionD->xmax = cur->xmax; \ - if ( unionD->xmin > cur->xmin ) unionD->xmin = cur->xmin; \ - if ( unionD->ymax < cur->ymax ) unionD->ymax = cur->ymax; \ - if ( unionD->ymin > cur->ymin ) unionD->ymin = cur->ymin; \ - } else { \ - memcpy( (void*)unionD, (void*) cur, sizeof( BOX2DF ) ); \ - } \ - list[pos] = num; \ - (pos)++; \ -} while(0) - - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) - { - cur = (BOX2DF*) DatumGetPointer(entryvec->vector[i].key); - - if (cur->xmin - pageunion.xmin < pageunion.xmax - cur->xmax) - ADDLIST(listL, unionL, posL,i); - else - ADDLIST(listR, unionR, posR,i); - if (cur->ymin - pageunion.ymin < pageunion.ymax - cur->ymax) - ADDLIST(listB, unionB, posB,i); - else - ADDLIST(listT, unionT, posT,i); - } - - POSTGIS_DEBUGF(4, "unionL is %s", box2df_to_string(unionL)); - POSTGIS_DEBUGF(4, "unionR is %s", box2df_to_string(unionR)); - POSTGIS_DEBUGF(4, "unionT is %s", box2df_to_string(unionT)); - POSTGIS_DEBUGF(4, "unionB is %s", box2df_to_string(unionB)); - - /* bad disposition, sort by ascending and resplit */ - if ( (posR==0 || posL==0) && (posT==0 || posB==0) ) - { - KBsort *arr = (KBsort*)palloc( sizeof(KBsort) * maxoff ); - posL = posR = posB = posT = 0; - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) - { - arr[i-1].key = (BOX2DF*) DatumGetPointer(entryvec->vector[i].key); - arr[i-1].pos = i; - } - qsort( arr, maxoff, sizeof(KBsort), compare_KB ); - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) - { - cur = arr[i-1].key; - if (cur->xmin - pageunion.xmin < pageunion.xmax - cur->xmax) - ADDLIST(listL, unionL, posL,arr[i-1].pos); - else if ( cur->xmin - pageunion.xmin == pageunion.xmax - cur->xmax ) - { - if ( posL>posR ) - ADDLIST(listR, unionR, posR,arr[i-1].pos); - else - ADDLIST(listL, unionL, posL,arr[i-1].pos); - } - else - ADDLIST(listR, unionR, posR,arr[i-1].pos); - - if (cur->ymin - pageunion.ymin < pageunion.ymax - cur->ymax) - ADDLIST(listB, unionB, posB,arr[i-1].pos); - else if ( cur->ymin - pageunion.ymin == pageunion.ymax - cur->ymax ) - { - if ( posB>posT ) - ADDLIST(listT, unionT, posT,arr[i-1].pos); - else - ADDLIST(listB, unionB, posB,arr[i-1].pos); - } - else - ADDLIST(listT, unionT, posT,arr[i-1].pos); - } - pfree(arr); - } - - /* which split more optimal? */ - if (Max(posL, posR) < Max(posB, posT)) - direction = 'x'; - else if (Max(posL, posR) > Max(posB, posT)) - direction = 'y'; - else - { - float sizeLR, sizeBT; - BOX2DF interLR, interBT; - - if ( box2df_intersection(unionL, unionR, &interLR) == false ) - sizeLR = 0.0; - else - sizeLR = box2df_size(&interLR); - - if ( box2df_intersection(unionB, unionT, &interBT) == false ) - sizeBT = 0.0; - else - sizeBT = box2df_size(&interBT); - - if (sizeLR < sizeBT) - direction = 'x'; - else - direction = 'y'; - } - - POSTGIS_DEBUGF(4, "split direction '%c'", direction); - - if (direction == 'x') - { - pfree(unionB); - pfree(listB); - pfree(unionT); - pfree(listT); - - v->spl_left = listL; - v->spl_right = listR; - v->spl_nleft = posL; - v->spl_nright = posR; - v->spl_ldatum = PointerGetDatum(unionL); - v->spl_rdatum = PointerGetDatum(unionR); - } - else - { - pfree(unionR); - pfree(listR); - pfree(unionL); - pfree(listL); - - v->spl_left = listB; - v->spl_right = listT; - v->spl_nleft = posB; - v->spl_nright = posT; - v->spl_ldatum = PointerGetDatum(unionB); - v->spl_rdatum = PointerGetDatum(unionT); - } - - POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed"); - - PG_RETURN_POINTER(v); -} - -#endif - - /* -** The BOX32DF key must be defined as a PostgreSQL type, even though it is only +** The BOX2DF key must be defined as a PostgreSQL type, even though it is only ** ever used internally. These no-op stubs are used to bind the type. */ PG_FUNCTION_INFO_V1(box2df_in); diff --git a/postgis/gserialized_gist_nd.c b/postgis/gserialized_gist_nd.c index b9a2417af..8a5b605b2 100644 --- a/postgis/gserialized_gist_nd.c +++ b/postgis/gserialized_gist_nd.c @@ -19,7 +19,7 @@ ********************************************************************** * * Copyright 2009 Paul Ramsey - * Copyright 2017 Darafei Praliaskouski + * Copyright 2017-2019 Darafei Praliaskouski * **********************************************************************/ @@ -1134,35 +1134,33 @@ Datum gserialized_gist_consistent(PG_FUNCTION_ARGS) } /* -** Function to pack floats of different realms -** This function serves to pack bit flags inside float type -** Resulted value represent can be from four different "realms" -** Every value from realm 3 is greater than any value from realms 2, 1 and 0. -** Every value from realm 2 is less than every value from realm 3 and greater -** than any value from realm 1 and 0, and so on. Values from the same realm -** loose two bits of precision. This technique is possible due to floating -** point numbers specification according to IEEE 754: exponent bits are highest +** Function to pack floats of different realms. +** This function serves to pack bit flags inside float type. +** Result value represent can be from two different "realms". +** Every value from realm 1 is greater than any value from realm 0. +** Values from the same realm loose one bit of precision. +** +** This technique is possible due to floating point numbers specification +** according to IEEE 754: exponent bits are highest ** (excluding sign bits, but here penalty is always positive). If float a is ** greater than float b, integer A with same bit representation as a is greater ** than integer B with same bits as b. */ -static float -pack_float(const float value, const int realm) +static inline float +pack_float(const float value, const uint8_t realm) { union { float f; - struct - { + struct { unsigned value : 31, sign : 1; } vbits; - struct - { - unsigned value : 29, realm : 2, sign : 1; + struct { + unsigned value : 30, realm : 1, sign : 1; } rbits; } a; a.f = value; - a.rbits.value = a.vbits.value >> 2; + a.rbits.value = a.vbits.value >> 1; a.rbits.realm = realm; return a.f; @@ -1179,52 +1177,37 @@ Datum gserialized_gist_penalty(PG_FUNCTION_ARGS) GISTENTRY *newentry = (GISTENTRY *)PG_GETARG_POINTER(1); float *result = (float *)PG_GETARG_POINTER(2); GIDX *gbox_index_orig, *gbox_index_new; - float size_union, size_orig, edge_union, edge_orig; - - POSTGIS_DEBUG(4, "[GIST] 'penalty' function called"); gbox_index_orig = (GIDX *)DatumGetPointer(origentry->key); gbox_index_new = (GIDX *)DatumGetPointer(newentry->key); - /* Drop out if we're dealing with null inputs. Shouldn't happen. */ - if (!gbox_index_orig && !gbox_index_new) - { - POSTGIS_DEBUG(4, "[GIST] both inputs NULL! returning penalty of zero"); - *result = 0.0; - PG_RETURN_FLOAT8(*result); - } - - /* Calculate the size difference of the boxes (volume difference in this case). */ - size_union = gidx_union_volume(gbox_index_orig, gbox_index_new); - size_orig = gidx_volume(gbox_index_orig); - *result = size_union - size_orig; - - /* REALM 0: No extension is required, volume is zero, return edge */ - /* REALM 1: No extension is required, return nonzero area */ - /* REALM 2: Area extension is zero, return nonzero edge extension */ - /* REALM 3: Area extension is nonzero, return it */ + /* Penalty value of 0 has special code path in Postgres's gistchoose. + * It is used as an early exit condition in subtree loop, allowing faster tree descend. + * For multi-column index, it lets next column break the tie, possibly more confidently. + */ + *result = 0.0; - if (*result == 0) + /* Drop out if we're dealing with null inputs. Shouldn't happen. */ + if (gbox_index_orig && gbox_index_new) { - if (size_orig > 0) - *result = pack_float(size_orig, 1); /* REALM 1 */ + /* Calculate the size difference of the boxes (volume difference in this case). */ + float size_union = gidx_union_volume(gbox_index_orig, gbox_index_new); + float size_orig = gidx_volume(gbox_index_orig); + float volume_extension = size_union - size_orig; + + /* REALM 1: Area extension is nonzero, return it */ + if (volume_extension > FLT_EPSILON) + *result = pack_float(volume_extension, 1); else { - edge_union = gidx_union_edge(gbox_index_orig, gbox_index_new); - edge_orig = gidx_edge(gbox_index_orig); - *result = edge_union - edge_orig; - if (*result == 0) - *result = pack_float(edge_orig, 0); /* REALM 0 */ - else - *result = pack_float(*result, 2); /* REALM 2 */ + /* REALM 0: Area extension is zero, return nonzero edge extension */ + float edge_union = gidx_union_edge(gbox_index_orig, gbox_index_new); + float edge_orig = gidx_edge(gbox_index_orig); + float edge_extension = edge_union - edge_orig; + if (edge_extension > FLT_EPSILON) + *result = pack_float(edge_extension, 0); } } - else - *result = pack_float(*result, 3); /* REALM 3 */ - - POSTGIS_DEBUGF( - 4, "[GIST] union size (%.12f), original size (%.12f), penalty (%.12f)", size_union, size_orig, *result); - PG_RETURN_POINTER(result); } -- 2.49.0