#1895, New node splitting algorithm for GiST

author Paul Ramsey <pramsey@cleverelephant.ca>

Tue, 13 Nov 2012 22:10:35 +0000 (22:10 +0000)

committer Paul Ramsey <pramsey@cleverelephant.ca>

Tue, 13 Nov 2012 22:10:35 +0000 (22:10 +0000)
author Paul Ramsey <pramsey@cleverelephant.ca>
Tue, 13 Nov 2012 22:10:35 +0000 (22:10 +0000)
committer Paul Ramsey <pramsey@cleverelephant.ca>
Tue, 13 Nov 2012 22:10:35 +0000 (22:10 +0000)
diff --git a/NEWS b/NEWS

index bae94c8b4fe7faec4dfebce99e008cdbedaa3458..ebff2595ebbf9faa869f73a76817a7b2e167471d 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -47,6 +47,7 @@ PostGIS 2.1.0
      (Bborie Park / UC Davis)
    - ST_Tile(raster) to break up a raster into tiles (Bborie Park / UC Davis)
    - #739, UpdateRasterSRID()
+  - #1895, new r-tree node splitting algorithm (Alex Korotkov)
  
  * Enhancements *
    - #823,  tiger geocoder: Make loader_generate_script download portion 
diff --git a/postgis/gserialized_gist_2d.c b/postgis/gserialized_gist_2d.c

index 39e20a6ba97b42e6e72eb83bbb809ea021046ec9..1c6213f7a1c8138f8527632b3bbab35da36a7886 100644 (file)
--- a/postgis/gserialized_gist_2d.c
+++ b/postgis/gserialized_gist_2d.c
@@ -20,6 +20,8 @@
  ** [3] N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger. The R*tree: an
  **     efficient and robust access method for points and rectangles.
  **     Proceedings of the ACM SIGMOD Conference. June 1990.
+** [4] A. Korotkov, "A new double sorting-based node splitting algorithm for R-tree", 
+**     http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
  */
  
  #include "postgres.h"
@@ -40,6 +42,12 @@
  */
  #define LIMIT_RATIO 0.1
  
+/*
+** 0 == don't use it
+** 1 == use it
+*/
+#define KOROTKOV_SPLIT 0
+
  /*
  ** For debugging
  */
@@ -133,8 +141,7 @@ static void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
         return;
  }
  
-
-
+#if KOROTKOV_SPLIT < 1
  static bool box2df_intersection(const BOX2DF *a, const BOX2DF *b, BOX2DF *n)
  {
         POSTGIS_DEBUGF(5, "calculating intersection of %s with %s", box2df_to_string(a), box2df_to_string(b));
@@ -154,6 +161,7 @@ static bool box2df_intersection(const BOX2DF *a, const BOX2DF *b, BOX2DF *n)
  
         return TRUE;
  }
+#endif
  
  static float box2df_size(const BOX2DF *a)
  {
@@ -1107,6 +1115,709 @@ Datum gserialized_gist_same_2d(PG_FUNCTION_ARGS)
         PG_RETURN_POINTER(result);
  }
  
+#if KOROTKOV_SPLIT > 0
+/*
+ * Adjust BOX2DF b boundaries with insertion of addon.
+ */
+static void
+adjustBox(BOX2DF *b, BOX2DF *addon)
+{
+       if (b->xmax < addon->xmax)
+               b->xmax = addon->xmax;
+       if (b->xmin > addon->xmin)
+               b->xmin = addon->xmin;
+       if (b->ymax < addon->ymax)
+               b->ymax = addon->ymax;
+       if (b->ymin > addon->ymin)
+               b->ymin = addon->ymin;
+}
+
+/*
+ * Trivial split: half of entries will be placed on one page
+ * and another half - to another
+ */
+static void
+fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
+{
+       OffsetNumber i,
+                               maxoff;
+       BOX2DF     *unionL = NULL,
+                          *unionR = NULL;
+       int                     nbytes;
+
+       maxoff = entryvec->n - 1;
+
+       nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+       v->spl_left = (OffsetNumber *) palloc(nbytes);
+       v->spl_right = (OffsetNumber *) palloc(nbytes);
+       v->spl_nleft = v->spl_nright = 0;
+
+       for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+       {
+               BOX2DF     *cur = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
+
+               if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
+               {
+                       v->spl_left[v->spl_nleft] = i;
+                       if (unionL == NULL)
+                       {
+                               unionL = (BOX2DF *) palloc(sizeof(BOX2DF));
+                               *unionL = *cur;
+                       }
+                       else
+                               adjustBox(unionL, cur);
+
+                       v->spl_nleft++;
+               }
+               else
+               {
+                       v->spl_right[v->spl_nright] = i;
+                       if (unionR == NULL)
+                       {
+                               unionR = (BOX2DF *) palloc(sizeof(BOX2DF));
+                               *unionR = *cur;
+                       }
+                       else
+                               adjustBox(unionR, cur);
+
+                       v->spl_nright++;
+               }
+       }
+
+       if (v->spl_ldatum_exists)
+               adjustBox(unionL, (BOX2DF *) DatumGetPointer(v->spl_ldatum));
+       v->spl_ldatum = BoxPGetDatum(unionL);
+
+       if (v->spl_rdatum_exists)
+               adjustBox(unionR, (BOX2DF *) DatumGetPointer(v->spl_rdatum));
+       v->spl_rdatum = BoxPGetDatum(unionR);
+
+       v->spl_ldatum_exists = v->spl_rdatum_exists = false;
+}
+
+/*
+ * Represents information about an entry that can be placed to either group
+ * without affecting overlap over selected axis ("common entry").
+ */
+typedef struct
+{
+       /* Index of entry in the initial array */
+       int                     index;
+       /* Delta between penalties of entry insertion into different groups */
+       float           delta;
+} CommonEntry;
+
+/*
+ * Context for g_box_consider_split. Contains information about currently
+ * selected split and some general information.
+ */
+typedef struct
+{
+       int                     entriesCount;   /* total number of entries being split */
+       BOX2DF          boundingBox;    /* minimum bounding box across all entries */
+
+       /* Information about currently selected split follows */
+
+       bool            first;                  /* true if no split was selected yet */
+
+       float           leftUpper;              /* upper bound of left interval */
+       float           rightLower;             /* lower bound of right interval */
+
+       float4          ratio;
+       float4          overlap;
+       int                     dim;                    /* axis of this split */
+       float           range;                  /* width of general MBR projection to the
+                                                                * selected axis */
+} ConsiderSplitContext;
+
+/*
+ * Interval represents projection of box to axis.
+ */
+typedef struct
+{
+       float           lower,
+                               upper;
+} SplitInterval;
+
+/*
+ * Interval comparison function by lower bound of the interval;
+ */
+static int
+interval_cmp_lower(const void *i1, const void *i2)
+{
+       float           lower1 = ((const SplitInterval *) i1)->lower,
+                               lower2 = ((const SplitInterval *) i2)->lower;
+
+       if (lower1 < lower2)
+               return -1;
+       else if (lower1 > lower2)
+               return 1;
+       else
+               return 0;
+}
+
+/*
+ * Interval comparison function by upper bound of the interval;
+ */
+static int
+interval_cmp_upper(const void *i1, const void *i2)
+{
+       float           upper1 = ((const SplitInterval *) i1)->upper,
+                               upper2 = ((const SplitInterval *) i2)->upper;
+
+       if (upper1 < upper2)
+               return -1;
+       else if (upper1 > upper2)
+               return 1;
+       else
+               return 0;
+}
+
+/*
+ * Replace negative value with zero.
+ */
+static inline float
+non_negative(float val)
+{
+       if (val >= 0.0f)
+               return val;
+       else
+               return 0.0f;
+}
+
+/*
+ * Consider replacement of currently selected split with the better one.
+ */
+static inline void
+g_box_consider_split(ConsiderSplitContext *context, int dimNum,
+                                        float rightLower, int minLeftCount,
+                                        float leftUpper, int maxLeftCount)
+{
+       int                     leftCount,
+                               rightCount;
+       float4          ratio,
+                               overlap;
+       float           range;
+
+       POSTGIS_DEBUGF(5, "consider split: dimNum = %d, rightLower = %f, "
+               "minLeftCount = %d, leftUpper = %f, maxLeftCount = %d ",
+               dimNum, rightLower, minLeftCount, leftUpper, maxLeftCount);
+       
+       /*
+        * Calculate entries distribution ratio assuming most uniform distribution
+        * of common entries.
+        */
+       if (minLeftCount >= (context->entriesCount + 1) / 2)
+       {
+               leftCount = minLeftCount;
+       }
+       else
+       {
+               if (maxLeftCount <= context->entriesCount / 2)
+                       leftCount = maxLeftCount;
+               else
+                       leftCount = context->entriesCount / 2;
+       }
+       rightCount = context->entriesCount - leftCount;
+
+       /*
+        * Ratio of split - quotient between size of lesser group and total
+        * entries count.
+        */
+       ratio = ((float4) Min(leftCount, rightCount)) /
+               ((float4) context->entriesCount);
+
+       if (ratio > LIMIT_RATIO)
+       {
+               bool            selectthis = false;
+
+               /*
+                * The ratio is acceptable, so compare current split with previously
+                * selected one. Between splits of one dimension we search for minimal
+                * overlap (allowing negative values) and minimal ration (between same
+                * overlaps. We switch dimension if find less overlap (non-negative)
+                * or less range with same overlap.
+                */
+               if (dimNum == 0)
+                       range = context->boundingBox.xmax - context->boundingBox.xmin;
+               else
+                       range = context->boundingBox.ymax - context->boundingBox.ymin;
+
+               overlap = (leftUpper - rightLower) / range;
+
+               /* If there is no previous selection, select this */
+               if (context->first)
+                       selectthis = true;
+               else if (context->dim == dimNum)
+               {
+                       /*
+                        * Within the same dimension, choose the new split if it has a
+                        * smaller overlap, or same overlap but better ratio.
+                        */
+                       if (overlap < context->overlap ||
+                               (overlap == context->overlap && ratio > context->ratio))
+                               selectthis = true;
+               }
+               else
+               {
+                       /*
+                        * Across dimensions, choose the new split if it has a smaller
+                        * *non-negative* overlap, or same *non-negative* overlap but
+                        * bigger range. This condition differs from the one described in
+                        * the article. On the datasets where leaf MBRs don't overlap
+                        * themselves, non-overlapping splits (i.e. splits which have zero
+                        * *non-negative* overlap) are frequently possible. In this case
+                        * splits tends to be along one dimension, because most distant
+                        * non-overlapping splits (i.e. having lowest negative overlap)
+                        * appears to be in the same dimension as in the previous split.
+                        * Therefore MBRs appear to be very prolonged along another
+                        * dimension, which leads to bad search performance. Using range
+                        * as the second split criteria makes MBRs more quadratic. Using
+                        * *non-negative* overlap instead of overlap as the first split
+                        * criteria gives to range criteria a chance to matter, because
+                        * non-overlapping splits are equivalent in this criteria.
+                        */
+                       if (non_negative(overlap) < non_negative(context->overlap) ||
+                               (range > context->range &&
+                                non_negative(overlap) <= non_negative(context->overlap)))
+                               selectthis = true;
+               }
+
+               if (selectthis)
+               {
+                       /* save information about selected split */
+                       context->first = false;
+                       context->ratio = ratio;
+                       context->range = range;
+                       context->overlap = overlap;
+                       context->rightLower = rightLower;
+                       context->leftUpper = leftUpper;
+                       context->dim = dimNum;
+                       POSTGIS_DEBUG(5, "split selected");
+               }
+       }
+}
+
+/*
+ * Return increase of original BOX2DF area by new BOX2DF area insertion.
+ */
+static float
+box_penalty(BOX2DF *original, BOX2DF *new)
+{
+       float           union_width,
+                               union_height;
+
+       union_width = Max(original->xmax, new->xmax) -
+               Min(original->xmin, new->xmin);
+       union_height = Max(original->ymax, new->ymax) -
+               Min(original->ymin, new->ymin);
+       return union_width * union_height - (original->xmax - original->xmin) *
+               (original->ymax - original->ymin);
+}
+
+/*
+ * Compare common entries by their deltas.
+ */
+static int
+common_entry_cmp(const void *i1, const void *i2)
+{
+       float           delta1 = ((const CommonEntry *) i1)->delta,
+                               delta2 = ((const CommonEntry *) i2)->delta;
+
+       if (delta1 < delta2)
+               return -1;
+       else if (delta1 > delta2)
+               return 1;
+       else
+               return 0;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Double sorting split algorithm. This is used for both boxes and points.
+ *
+ * The algorithm finds split of boxes by considering splits along each axis.
+ * Each entry is first projected as an interval on the X-axis, and different
+ * ways to split the intervals into two groups are considered, trying to
+ * minimize the overlap of the groups. Then the same is repeated for the
+ * Y-axis, and the overall best split is chosen. The quality of a split is
+ * determined by overlap along that axis and some other criteria (see
+ * g_box_consider_split).
+ *
+ * After that, all the entries are divided into three groups:
+ *
+ * 1) Entries which should be placed to the left group
+ * 2) Entries which should be placed to the right group
+ * 3) "Common entries" which can be placed to any of groups without affecting
+ *       of overlap along selected axis.
+ *
+ * The common entries are distributed by minimizing penalty.
+ *
+ * For details see:
+ * "A new double sorting-based node splitting algorithm for R-tree", A. Korotkov
+ * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
+ * --------------------------------------------------------------------------
+ */
+PG_FUNCTION_INFO_V1(gserialized_gist_picksplit_2d);
+Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS)
+{
+       GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+       GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+       OffsetNumber i,
+                               maxoff;
+       ConsiderSplitContext context;
+       BOX2DF     *box,
+                          *leftBox,
+                          *rightBox;
+       int                     dim,
+                               commonEntriesCount;
+       SplitInterval *intervalsLower,
+                          *intervalsUpper;
+       CommonEntry *commonEntries;
+       int                     nentries;
+       
+       POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
+
+       memset(&context, 0, sizeof(ConsiderSplitContext));
+
+       maxoff = entryvec->n - 1;
+       nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
+
+       /* Allocate arrays for intervals along axes */
+       intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
+       intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
+
+       /*
+        * Calculate the overall minimum bounding box over all the entries.
+        */
+       for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+       {
+               box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
+               if (i == FirstOffsetNumber)
+                       context.boundingBox = *box;
+               else
+                       adjustBox(&context.boundingBox, box);
+       }
+       
+       POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
+                                                                                                               &context.boundingBox));
+
+       /*
+        * Iterate over axes for optimal split searching.
+        */
+       context.first = true;           /* nothing selected yet */
+       for (dim = 0; dim < 2; dim++)
+       {
+               float           leftUpper,
+                                       rightLower;
+               int                     i1,
+                                       i2;
+
+               /* Project each entry as an interval on the selected axis. */
+               for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+               {
+                       box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
+                       if (dim == 0)
+                       {
+                               intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
+                               intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
+                       }
+                       else
+                       {
+                               intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
+                               intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
+                       }
+               }
+
+               /*
+                * Make two arrays of intervals: one sorted by lower bound and another
+                * sorted by upper bound.
+                */
+               memcpy(intervalsUpper, intervalsLower,
+                          sizeof(SplitInterval) * nentries);
+               qsort(intervalsLower, nentries, sizeof(SplitInterval),
+                         interval_cmp_lower);
+               qsort(intervalsUpper, nentries, sizeof(SplitInterval),
+                         interval_cmp_upper);
+
+               /*----
+                * The goal is to form a left and right interval, so that every entry
+                * interval is contained by either left or right interval (or both).
+                *
+                * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
+                *
+                * 0 1 2 3 4
+                * +-+
+                *       +---+
+                *         +-+
+                *         +---+
+                *
+                * The left and right intervals are of the form (0,a) and (b,4).
+                * We first consider splits where b is the lower bound of an entry.
+                * We iterate through all entries, and for each b, calculate the
+                * smallest possible a. Then we consider splits where a is the
+                * uppper bound of an entry, and for each a, calculate the greatest
+                * possible b.
+                *
+                * In the above example, the first loop would consider splits:
+                * b=0: (0,1)-(0,4)
+                * b=1: (0,1)-(1,4)
+                * b=2: (0,3)-(2,4)
+                *
+                * And the second loop:
+                * a=1: (0,1)-(1,4)
+                * a=3: (0,3)-(2,4)
+                * a=4: (0,4)-(2,4)
+                */
+
+               /*
+                * Iterate over lower bound of right group, finding smallest possible
+                * upper bound of left group.
+                */
+               i1 = 0;
+               i2 = 0;
+               rightLower = intervalsLower[i1].lower;
+               leftUpper = intervalsUpper[i2].lower;
+               while (true)
+               {
+                       /*
+                        * Find next lower bound of right group.
+                        */
+                       while (i1 < nentries && rightLower == intervalsLower[i1].lower)
+                       {
+                               leftUpper = Max(leftUpper, intervalsLower[i1].upper);
+                               i1++;
+                       }
+                       if (i1 >= nentries)
+                               break;
+                       rightLower = intervalsLower[i1].lower;
+
+                       /*
+                        * Find count of intervals which anyway should be placed to the
+                        * left group.
+                        */
+                       while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
+                               i2++;
+
+                       /*
+                        * Consider found split.
+                        */
+                       g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
+               }
+
+               /*
+                * Iterate over upper bound of left group finding greates possible
+                * lower bound of right group.
+                */
+               i1 = nentries - 1;
+               i2 = nentries - 1;
+               rightLower = intervalsLower[i1].upper;
+               leftUpper = intervalsUpper[i2].upper;
+               while (true)
+               {
+                       /*
+                        * Find next upper bound of left group.
+                        */
+                       while (i2 >= 0 && leftUpper == intervalsUpper[i2].upper)
+                       {
+                               rightLower = Min(rightLower, intervalsUpper[i2].lower);
+                               i2--;
+                       }
+                       if (i2 < 0)
+                               break;
+                       leftUpper = intervalsUpper[i2].upper;
+
+                       /*
+                        * Find count of intervals which anyway should be placed to the
+                        * right group.
+                        */
+                       while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
+                               i1--;
+
+                       /*
+                        * Consider found split.
+                        */
+                       g_box_consider_split(&context, dim,
+                                                                rightLower, i1 + 1, leftUpper, i2 + 1);
+               }
+       }
+
+       /*
+        * If we failed to find any acceptable splits, use trivial split.
+        */
+       if (context.first)
+       {
+               POSTGIS_DEBUG(4, "no acceptable splits,  trivial split");
+               fallbackSplit(entryvec, v);
+               PG_RETURN_POINTER(v);
+       }
+
+       /*
+        * Ok, we have now selected the split across one axis.
+        *
+        * While considering the splits, we already determined that there will be
+        * enough entries in both groups to reach the desired ratio, but we did
+        * not memorize which entries go to which group. So determine that now.
+        */
+
+       POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
+       
+       /* Allocate vectors for results */
+       v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
+       v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
+       v->spl_nleft = 0;
+       v->spl_nright = 0;
+
+       /* Allocate bounding boxes of left and right groups */
+       leftBox = palloc0(sizeof(BOX2DF));
+       rightBox = palloc0(sizeof(BOX2DF));
+
+       /*
+        * Allocate an array for "common entries" - entries which can be placed to
+        * either group without affecting overlap along selected axis.
+        */
+       commonEntriesCount = 0;
+       commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
+
+       /* Helper macros to place an entry in the left or right group */
+#define PLACE_LEFT(box, off)                                   \
+       do {                                                                            \
+               if (v->spl_nleft > 0)                                   \
+                       adjustBox(leftBox, box);                        \
+               else                                                                    \
+                       *leftBox = *(box);                                      \
+               v->spl_left[v->spl_nleft++] = off;              \
+       } while(0)
+
+#define PLACE_RIGHT(box, off)                                  \
+       do {                                                                            \
+               if (v->spl_nright > 0)                                  \
+                       adjustBox(rightBox, box);                       \
+               else                                                                    \
+                       *rightBox = *(box);                                     \
+               v->spl_right[v->spl_nright++] = off;    \
+       } while(0)
+
+       /*
+        * Distribute entries which can be distributed unambiguously, and collect
+        * common entries.
+        */
+       for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+       {
+               float           lower,
+                                       upper;
+
+               /*
+                * Get upper and lower bounds along selected axis.
+                */
+               box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
+               if (context.dim == 0)
+               {
+                       lower = box->xmin;
+                       upper = box->xmax;
+               }
+               else
+               {
+                       lower = box->ymin;
+                       upper = box->ymax;
+               }
+
+               if (upper <= context.leftUpper)
+               {
+                       /* Fits to the left group */
+                       if (lower >= context.rightLower)
+                       {
+                               /* Fits also to the right group, so "common entry" */
+                               commonEntries[commonEntriesCount++].index = i;
+                       }
+                       else
+                       {
+                               /* Doesn't fit to the right group, so join to the left group */
+                               PLACE_LEFT(box, i);
+                       }
+               }
+               else
+               {
+                       /*
+                        * Each entry should fit on either left or right group. Since this
+                        * entry didn't fit on the left group, it better fit in the right
+                        * group.
+                        */
+                       Assert(lower >= context.rightLower);
+
+                       /* Doesn't fit to the left group, so join to the right group */
+                       PLACE_RIGHT(box, i);
+               }
+       }
+       
+       POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
+       POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
+
+       /*
+        * Distribute "common entries", if any.
+        */
+       if (commonEntriesCount > 0)
+       {
+               /*
+                * Calculate minimum number of entries that must be placed in both
+                * groups, to reach LIMIT_RATIO.
+                */
+               int                     m = ceil(LIMIT_RATIO * (double) nentries);
+
+               /*
+                * Calculate delta between penalties of join "common entries" to
+                * different groups.
+                */
+               for (i = 0; i < commonEntriesCount; i++)
+               {
+                       box = (BOX2DF *) DatumGetPointer(entryvec->vector[
+                                                                                               commonEntries[i].index].key);
+                       commonEntries[i].delta = Abs(box_penalty(leftBox, box) -
+                                                                                box_penalty(rightBox, box));
+               }
+
+               /*
+                * Sort "common entries" by calculated deltas in order to distribute
+                * the most ambiguous entries first.
+                */
+               qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
+
+               /*
+                * Distribute "common entries" between groups.
+                */
+               for (i = 0; i < commonEntriesCount; i++)
+               {
+                       box = (BOX2DF *) DatumGetPointer(entryvec->vector[
+                                                                                               commonEntries[i].index].key);
+
+                       /*
+                        * Check if we have to place this entry in either group to achieve
+                        * LIMIT_RATIO.
+                        */
+                       if (v->spl_nleft + (commonEntriesCount - i) <= m)
+                               PLACE_LEFT(box, commonEntries[i].index);
+                       else if (v->spl_nright + (commonEntriesCount - i) <= m)
+                               PLACE_RIGHT(box, commonEntries[i].index);
+                       else
+                       {
+                               /* Otherwise select the group by minimal penalty */
+                               if (box_penalty(leftBox, box) < box_penalty(rightBox, box))
+                                       PLACE_LEFT(box, commonEntries[i].index);
+                               else
+                                       PLACE_RIGHT(box, commonEntries[i].index);
+                       }
+               }
+       }
+       v->spl_ldatum = PointerGetDatum(leftBox);
+       v->spl_rdatum = PointerGetDatum(rightBox);
+       
+       POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
+       
+       PG_RETURN_POINTER(v);
+}
+
+#else /* !KOROTOV_SPLIT */
+
  typedef struct
  {
         BOX2DF *key;
@@ -1363,6 +2074,8 @@ Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS)
         PG_RETURN_POINTER(v);
  }
  
+#endif
+
  
  /*
  ** The BOX32DF key must be defined as a PostgreSQL type, even though it is only
author	Paul Ramsey <pramsey@cleverelephant.ca>
	Tue, 13 Nov 2012 22:10:35 +0000 (22:10 +0000)
committer	Paul Ramsey <pramsey@cleverelephant.ca>
	Tue, 13 Nov 2012 22:10:35 +0000 (22:10 +0000)
NEWS		patch \| blob \| history
postgis/gserialized_gist_2d.c		patch \| blob \| history