From ae99a524037e76c041d18f410cd3fb037d81e33b Mon Sep 17 00:00:00 2001
From: Paul Ramsey <pramsey@cleverelephant.ca>
Date: Tue, 13 Nov 2012 22:10:35 +0000
Subject: [PATCH] #1895, New node splitting algorithm for GiST Set the
 KOROTKOV_SPLIT define to 1 to use the new approach, to 0 to use the old
 approach. After testing is complete, we can set the new split as the default.

git-svn-id: http://svn.osgeo.org/postgis/trunk@10665 b70326c6-7e19-0410-871a-916f4a2858ee
---
 NEWS                          |   1 +
 postgis/gserialized_gist_2d.c | 717 +++++++++++++++++++++++++++++++++-
 2 files changed, 716 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index bae94c8b4..ebff2595e 100644
--- a/NEWS
+++ b/NEWS
@@ -47,6 +47,7 @@ PostGIS 2.1.0
     (Bborie Park / UC Davis)
   - ST_Tile(raster) to break up a raster into tiles (Bborie Park / UC Davis)
   - #739, UpdateRasterSRID()
+  - #1895, new r-tree node splitting algorithm (Alex Korotkov)
 
 * Enhancements *
   - #823,  tiger geocoder: Make loader_generate_script download portion 
diff --git a/postgis/gserialized_gist_2d.c b/postgis/gserialized_gist_2d.c
index 39e20a6ba..1c6213f7a 100644
--- a/postgis/gserialized_gist_2d.c
+++ b/postgis/gserialized_gist_2d.c
@@ -20,6 +20,8 @@
 ** [3] N. Beckmann, H.-P. Kriegel, R. Schneider, B. Seeger. The R*tree: an
 **     efficient and robust access method for points and rectangles.
 **     Proceedings of the ACM SIGMOD Conference. June 1990.
+** [4] A. Korotkov, "A new double sorting-based node splitting algorithm for R-tree", 
+**     http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
 */
 
 #include "postgres.h"
@@ -40,6 +42,12 @@
 */
 #define LIMIT_RATIO 0.1
 
+/*
+** 0 == don't use it
+** 1 == use it
+*/
+#define KOROTKOV_SPLIT 0
+
 /*
 ** For debugging
 */
@@ -133,8 +141,7 @@ static void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
 	return;
 }
 
-
-
+#if KOROTKOV_SPLIT < 1
 static bool box2df_intersection(const BOX2DF *a, const BOX2DF *b, BOX2DF *n)
 {
 	POSTGIS_DEBUGF(5, "calculating intersection of %s with %s", box2df_to_string(a), box2df_to_string(b));
@@ -154,6 +161,7 @@ static bool box2df_intersection(const BOX2DF *a, const BOX2DF *b, BOX2DF *n)
 
 	return TRUE;
 }
+#endif
 
 static float box2df_size(const BOX2DF *a)
 {
@@ -1107,6 +1115,709 @@ Datum gserialized_gist_same_2d(PG_FUNCTION_ARGS)
 	PG_RETURN_POINTER(result);
 }
 
+#if KOROTKOV_SPLIT > 0
+/*
+ * Adjust BOX2DF b boundaries with insertion of addon.
+ */
+static void
+adjustBox(BOX2DF *b, BOX2DF *addon)
+{
+	if (b->xmax < addon->xmax)
+		b->xmax = addon->xmax;
+	if (b->xmin > addon->xmin)
+		b->xmin = addon->xmin;
+	if (b->ymax < addon->ymax)
+		b->ymax = addon->ymax;
+	if (b->ymin > addon->ymin)
+		b->ymin = addon->ymin;
+}
+
+/*
+ * Trivial split: half of entries will be placed on one page
+ * and another half - to another
+ */
+static void
+fallbackSplit(GistEntryVector *entryvec, GIST_SPLITVEC *v)
+{
+	OffsetNumber i,
+				maxoff;
+	BOX2DF	   *unionL = NULL,
+			   *unionR = NULL;
+	int			nbytes;
+
+	maxoff = entryvec->n - 1;
+
+	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+	v->spl_left = (OffsetNumber *) palloc(nbytes);
+	v->spl_right = (OffsetNumber *) palloc(nbytes);
+	v->spl_nleft = v->spl_nright = 0;
+
+	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+	{
+		BOX2DF	   *cur = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
+
+		if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
+		{
+			v->spl_left[v->spl_nleft] = i;
+			if (unionL == NULL)
+			{
+				unionL = (BOX2DF *) palloc(sizeof(BOX2DF));
+				*unionL = *cur;
+			}
+			else
+				adjustBox(unionL, cur);
+
+			v->spl_nleft++;
+		}
+		else
+		{
+			v->spl_right[v->spl_nright] = i;
+			if (unionR == NULL)
+			{
+				unionR = (BOX2DF *) palloc(sizeof(BOX2DF));
+				*unionR = *cur;
+			}
+			else
+				adjustBox(unionR, cur);
+
+			v->spl_nright++;
+		}
+	}
+
+	if (v->spl_ldatum_exists)
+		adjustBox(unionL, (BOX2DF *) DatumGetPointer(v->spl_ldatum));
+	v->spl_ldatum = BoxPGetDatum(unionL);
+
+	if (v->spl_rdatum_exists)
+		adjustBox(unionR, (BOX2DF *) DatumGetPointer(v->spl_rdatum));
+	v->spl_rdatum = BoxPGetDatum(unionR);
+
+	v->spl_ldatum_exists = v->spl_rdatum_exists = false;
+}
+
+/*
+ * Represents information about an entry that can be placed to either group
+ * without affecting overlap over selected axis ("common entry").
+ */
+typedef struct
+{
+	/* Index of entry in the initial array */
+	int			index;
+	/* Delta between penalties of entry insertion into different groups */
+	float		delta;
+} CommonEntry;
+
+/*
+ * Context for g_box_consider_split. Contains information about currently
+ * selected split and some general information.
+ */
+typedef struct
+{
+	int			entriesCount;	/* total number of entries being split */
+	BOX2DF		boundingBox;	/* minimum bounding box across all entries */
+
+	/* Information about currently selected split follows */
+
+	bool		first;			/* true if no split was selected yet */
+
+	float		leftUpper;		/* upper bound of left interval */
+	float		rightLower;		/* lower bound of right interval */
+
+	float4		ratio;
+	float4		overlap;
+	int			dim;			/* axis of this split */
+	float		range;			/* width of general MBR projection to the
+								 * selected axis */
+} ConsiderSplitContext;
+
+/*
+ * Interval represents projection of box to axis.
+ */
+typedef struct
+{
+	float		lower,
+				upper;
+} SplitInterval;
+
+/*
+ * Interval comparison function by lower bound of the interval;
+ */
+static int
+interval_cmp_lower(const void *i1, const void *i2)
+{
+	float		lower1 = ((const SplitInterval *) i1)->lower,
+				lower2 = ((const SplitInterval *) i2)->lower;
+
+	if (lower1 < lower2)
+		return -1;
+	else if (lower1 > lower2)
+		return 1;
+	else
+		return 0;
+}
+
+/*
+ * Interval comparison function by upper bound of the interval;
+ */
+static int
+interval_cmp_upper(const void *i1, const void *i2)
+{
+	float		upper1 = ((const SplitInterval *) i1)->upper,
+				upper2 = ((const SplitInterval *) i2)->upper;
+
+	if (upper1 < upper2)
+		return -1;
+	else if (upper1 > upper2)
+		return 1;
+	else
+		return 0;
+}
+
+/*
+ * Replace negative value with zero.
+ */
+static inline float
+non_negative(float val)
+{
+	if (val >= 0.0f)
+		return val;
+	else
+		return 0.0f;
+}
+
+/*
+ * Consider replacement of currently selected split with the better one.
+ */
+static inline void
+g_box_consider_split(ConsiderSplitContext *context, int dimNum,
+					 float rightLower, int minLeftCount,
+					 float leftUpper, int maxLeftCount)
+{
+	int			leftCount,
+				rightCount;
+	float4		ratio,
+				overlap;
+	float		range;
+
+	POSTGIS_DEBUGF(5, "consider split: dimNum = %d, rightLower = %f, "
+		"minLeftCount = %d, leftUpper = %f, maxLeftCount = %d ",
+		dimNum, rightLower, minLeftCount, leftUpper, maxLeftCount);
+	
+	/*
+	 * Calculate entries distribution ratio assuming most uniform distribution
+	 * of common entries.
+	 */
+	if (minLeftCount >= (context->entriesCount + 1) / 2)
+	{
+		leftCount = minLeftCount;
+	}
+	else
+	{
+		if (maxLeftCount <= context->entriesCount / 2)
+			leftCount = maxLeftCount;
+		else
+			leftCount = context->entriesCount / 2;
+	}
+	rightCount = context->entriesCount - leftCount;
+
+	/*
+	 * Ratio of split - quotient between size of lesser group and total
+	 * entries count.
+	 */
+	ratio = ((float4) Min(leftCount, rightCount)) /
+		((float4) context->entriesCount);
+
+	if (ratio > LIMIT_RATIO)
+	{
+		bool		selectthis = false;
+
+		/*
+		 * The ratio is acceptable, so compare current split with previously
+		 * selected one. Between splits of one dimension we search for minimal
+		 * overlap (allowing negative values) and minimal ration (between same
+		 * overlaps. We switch dimension if find less overlap (non-negative)
+		 * or less range with same overlap.
+		 */
+		if (dimNum == 0)
+			range = context->boundingBox.xmax - context->boundingBox.xmin;
+		else
+			range = context->boundingBox.ymax - context->boundingBox.ymin;
+
+		overlap = (leftUpper - rightLower) / range;
+
+		/* If there is no previous selection, select this */
+		if (context->first)
+			selectthis = true;
+		else if (context->dim == dimNum)
+		{
+			/*
+			 * Within the same dimension, choose the new split if it has a
+			 * smaller overlap, or same overlap but better ratio.
+			 */
+			if (overlap < context->overlap ||
+				(overlap == context->overlap && ratio > context->ratio))
+				selectthis = true;
+		}
+		else
+		{
+			/*
+			 * Across dimensions, choose the new split if it has a smaller
+			 * *non-negative* overlap, or same *non-negative* overlap but
+			 * bigger range. This condition differs from the one described in
+			 * the article. On the datasets where leaf MBRs don't overlap
+			 * themselves, non-overlapping splits (i.e. splits which have zero
+			 * *non-negative* overlap) are frequently possible. In this case
+			 * splits tends to be along one dimension, because most distant
+			 * non-overlapping splits (i.e. having lowest negative overlap)
+			 * appears to be in the same dimension as in the previous split.
+			 * Therefore MBRs appear to be very prolonged along another
+			 * dimension, which leads to bad search performance. Using range
+			 * as the second split criteria makes MBRs more quadratic. Using
+			 * *non-negative* overlap instead of overlap as the first split
+			 * criteria gives to range criteria a chance to matter, because
+			 * non-overlapping splits are equivalent in this criteria.
+			 */
+			if (non_negative(overlap) < non_negative(context->overlap) ||
+				(range > context->range &&
+				 non_negative(overlap) <= non_negative(context->overlap)))
+				selectthis = true;
+		}
+
+		if (selectthis)
+		{
+			/* save information about selected split */
+			context->first = false;
+			context->ratio = ratio;
+			context->range = range;
+			context->overlap = overlap;
+			context->rightLower = rightLower;
+			context->leftUpper = leftUpper;
+			context->dim = dimNum;
+			POSTGIS_DEBUG(5, "split selected");
+		}
+	}
+}
+
+/*
+ * Return increase of original BOX2DF area by new BOX2DF area insertion.
+ */
+static float
+box_penalty(BOX2DF *original, BOX2DF *new)
+{
+	float		union_width,
+				union_height;
+
+	union_width = Max(original->xmax, new->xmax) -
+		Min(original->xmin, new->xmin);
+	union_height = Max(original->ymax, new->ymax) -
+		Min(original->ymin, new->ymin);
+	return union_width * union_height - (original->xmax - original->xmin) *
+		(original->ymax - original->ymin);
+}
+
+/*
+ * Compare common entries by their deltas.
+ */
+static int
+common_entry_cmp(const void *i1, const void *i2)
+{
+	float		delta1 = ((const CommonEntry *) i1)->delta,
+				delta2 = ((const CommonEntry *) i2)->delta;
+
+	if (delta1 < delta2)
+		return -1;
+	else if (delta1 > delta2)
+		return 1;
+	else
+		return 0;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Double sorting split algorithm. This is used for both boxes and points.
+ *
+ * The algorithm finds split of boxes by considering splits along each axis.
+ * Each entry is first projected as an interval on the X-axis, and different
+ * ways to split the intervals into two groups are considered, trying to
+ * minimize the overlap of the groups. Then the same is repeated for the
+ * Y-axis, and the overall best split is chosen. The quality of a split is
+ * determined by overlap along that axis and some other criteria (see
+ * g_box_consider_split).
+ *
+ * After that, all the entries are divided into three groups:
+ *
+ * 1) Entries which should be placed to the left group
+ * 2) Entries which should be placed to the right group
+ * 3) "Common entries" which can be placed to any of groups without affecting
+ *	  of overlap along selected axis.
+ *
+ * The common entries are distributed by minimizing penalty.
+ *
+ * For details see:
+ * "A new double sorting-based node splitting algorithm for R-tree", A. Korotkov
+ * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
+ * --------------------------------------------------------------------------
+ */
+PG_FUNCTION_INFO_V1(gserialized_gist_picksplit_2d);
+Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS)
+{
+	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+	OffsetNumber i,
+				maxoff;
+	ConsiderSplitContext context;
+	BOX2DF	   *box,
+			   *leftBox,
+			   *rightBox;
+	int			dim,
+				commonEntriesCount;
+	SplitInterval *intervalsLower,
+			   *intervalsUpper;
+	CommonEntry *commonEntries;
+	int			nentries;
+	
+	POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
+
+	memset(&context, 0, sizeof(ConsiderSplitContext));
+
+	maxoff = entryvec->n - 1;
+	nentries = context.entriesCount = maxoff - FirstOffsetNumber + 1;
+
+	/* Allocate arrays for intervals along axes */
+	intervalsLower = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
+	intervalsUpper = (SplitInterval *) palloc(nentries * sizeof(SplitInterval));
+
+	/*
+	 * Calculate the overall minimum bounding box over all the entries.
+	 */
+	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+	{
+		box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
+		if (i == FirstOffsetNumber)
+			context.boundingBox = *box;
+		else
+			adjustBox(&context.boundingBox, box);
+	}
+	
+	POSTGIS_DEBUGF(4, "boundingBox is %s", box2df_to_string(
+														&context.boundingBox));
+
+	/*
+	 * Iterate over axes for optimal split searching.
+	 */
+	context.first = true;		/* nothing selected yet */
+	for (dim = 0; dim < 2; dim++)
+	{
+		float		leftUpper,
+					rightLower;
+		int			i1,
+					i2;
+
+		/* Project each entry as an interval on the selected axis. */
+		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+		{
+			box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
+			if (dim == 0)
+			{
+				intervalsLower[i - FirstOffsetNumber].lower = box->xmin;
+				intervalsLower[i - FirstOffsetNumber].upper = box->xmax;
+			}
+			else
+			{
+				intervalsLower[i - FirstOffsetNumber].lower = box->ymin;
+				intervalsLower[i - FirstOffsetNumber].upper = box->ymax;
+			}
+		}
+
+		/*
+		 * Make two arrays of intervals: one sorted by lower bound and another
+		 * sorted by upper bound.
+		 */
+		memcpy(intervalsUpper, intervalsLower,
+			   sizeof(SplitInterval) * nentries);
+		qsort(intervalsLower, nentries, sizeof(SplitInterval),
+			  interval_cmp_lower);
+		qsort(intervalsUpper, nentries, sizeof(SplitInterval),
+			  interval_cmp_upper);
+
+		/*----
+		 * The goal is to form a left and right interval, so that every entry
+		 * interval is contained by either left or right interval (or both).
+		 *
+		 * For example, with the intervals (0,1), (1,3), (2,3), (2,4):
+		 *
+		 * 0 1 2 3 4
+		 * +-+
+		 *	 +---+
+		 *	   +-+
+		 *	   +---+
+		 *
+		 * The left and right intervals are of the form (0,a) and (b,4).
+		 * We first consider splits where b is the lower bound of an entry.
+		 * We iterate through all entries, and for each b, calculate the
+		 * smallest possible a. Then we consider splits where a is the
+		 * uppper bound of an entry, and for each a, calculate the greatest
+		 * possible b.
+		 *
+		 * In the above example, the first loop would consider splits:
+		 * b=0: (0,1)-(0,4)
+		 * b=1: (0,1)-(1,4)
+		 * b=2: (0,3)-(2,4)
+		 *
+		 * And the second loop:
+		 * a=1: (0,1)-(1,4)
+		 * a=3: (0,3)-(2,4)
+		 * a=4: (0,4)-(2,4)
+		 */
+
+		/*
+		 * Iterate over lower bound of right group, finding smallest possible
+		 * upper bound of left group.
+		 */
+		i1 = 0;
+		i2 = 0;
+		rightLower = intervalsLower[i1].lower;
+		leftUpper = intervalsUpper[i2].lower;
+		while (true)
+		{
+			/*
+			 * Find next lower bound of right group.
+			 */
+			while (i1 < nentries && rightLower == intervalsLower[i1].lower)
+			{
+				leftUpper = Max(leftUpper, intervalsLower[i1].upper);
+				i1++;
+			}
+			if (i1 >= nentries)
+				break;
+			rightLower = intervalsLower[i1].lower;
+
+			/*
+			 * Find count of intervals which anyway should be placed to the
+			 * left group.
+			 */
+			while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
+				i2++;
+
+			/*
+			 * Consider found split.
+			 */
+			g_box_consider_split(&context, dim, rightLower, i1, leftUpper, i2);
+		}
+
+		/*
+		 * Iterate over upper bound of left group finding greates possible
+		 * lower bound of right group.
+		 */
+		i1 = nentries - 1;
+		i2 = nentries - 1;
+		rightLower = intervalsLower[i1].upper;
+		leftUpper = intervalsUpper[i2].upper;
+		while (true)
+		{
+			/*
+			 * Find next upper bound of left group.
+			 */
+			while (i2 >= 0 && leftUpper == intervalsUpper[i2].upper)
+			{
+				rightLower = Min(rightLower, intervalsUpper[i2].lower);
+				i2--;
+			}
+			if (i2 < 0)
+				break;
+			leftUpper = intervalsUpper[i2].upper;
+
+			/*
+			 * Find count of intervals which anyway should be placed to the
+			 * right group.
+			 */
+			while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
+				i1--;
+
+			/*
+			 * Consider found split.
+			 */
+			g_box_consider_split(&context, dim,
+								 rightLower, i1 + 1, leftUpper, i2 + 1);
+		}
+	}
+
+	/*
+	 * If we failed to find any acceptable splits, use trivial split.
+	 */
+	if (context.first)
+	{
+		POSTGIS_DEBUG(4, "no acceptable splits,  trivial split");
+		fallbackSplit(entryvec, v);
+		PG_RETURN_POINTER(v);
+	}
+
+	/*
+	 * Ok, we have now selected the split across one axis.
+	 *
+	 * While considering the splits, we already determined that there will be
+	 * enough entries in both groups to reach the desired ratio, but we did
+	 * not memorize which entries go to which group. So determine that now.
+	 */
+
+	POSTGIS_DEBUGF(4, "split direction: %d", context.dim);
+	
+	/* Allocate vectors for results */
+	v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
+	v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
+	v->spl_nleft = 0;
+	v->spl_nright = 0;
+
+	/* Allocate bounding boxes of left and right groups */
+	leftBox = palloc0(sizeof(BOX2DF));
+	rightBox = palloc0(sizeof(BOX2DF));
+
+	/*
+	 * Allocate an array for "common entries" - entries which can be placed to
+	 * either group without affecting overlap along selected axis.
+	 */
+	commonEntriesCount = 0;
+	commonEntries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
+
+	/* Helper macros to place an entry in the left or right group */
+#define PLACE_LEFT(box, off)					\
+	do {										\
+		if (v->spl_nleft > 0)					\
+			adjustBox(leftBox, box);			\
+		else									\
+			*leftBox = *(box);					\
+		v->spl_left[v->spl_nleft++] = off;		\
+	} while(0)
+
+#define PLACE_RIGHT(box, off)					\
+	do {										\
+		if (v->spl_nright > 0)					\
+			adjustBox(rightBox, box);			\
+		else									\
+			*rightBox = *(box);					\
+		v->spl_right[v->spl_nright++] = off;	\
+	} while(0)
+
+	/*
+	 * Distribute entries which can be distributed unambiguously, and collect
+	 * common entries.
+	 */
+	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+	{
+		float		lower,
+					upper;
+
+		/*
+		 * Get upper and lower bounds along selected axis.
+		 */
+		box = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
+		if (context.dim == 0)
+		{
+			lower = box->xmin;
+			upper = box->xmax;
+		}
+		else
+		{
+			lower = box->ymin;
+			upper = box->ymax;
+		}
+
+		if (upper <= context.leftUpper)
+		{
+			/* Fits to the left group */
+			if (lower >= context.rightLower)
+			{
+				/* Fits also to the right group, so "common entry" */
+				commonEntries[commonEntriesCount++].index = i;
+			}
+			else
+			{
+				/* Doesn't fit to the right group, so join to the left group */
+				PLACE_LEFT(box, i);
+			}
+		}
+		else
+		{
+			/*
+			 * Each entry should fit on either left or right group. Since this
+			 * entry didn't fit on the left group, it better fit in the right
+			 * group.
+			 */
+			Assert(lower >= context.rightLower);
+
+			/* Doesn't fit to the left group, so join to the right group */
+			PLACE_RIGHT(box, i);
+		}
+	}
+	
+	POSTGIS_DEBUGF(4, "leftBox is %s", box2df_to_string(leftBox));
+	POSTGIS_DEBUGF(4, "rightBox is %s", box2df_to_string(rightBox));
+
+	/*
+	 * Distribute "common entries", if any.
+	 */
+	if (commonEntriesCount > 0)
+	{
+		/*
+		 * Calculate minimum number of entries that must be placed in both
+		 * groups, to reach LIMIT_RATIO.
+		 */
+		int			m = ceil(LIMIT_RATIO * (double) nentries);
+
+		/*
+		 * Calculate delta between penalties of join "common entries" to
+		 * different groups.
+		 */
+		for (i = 0; i < commonEntriesCount; i++)
+		{
+			box = (BOX2DF *) DatumGetPointer(entryvec->vector[
+												commonEntries[i].index].key);
+			commonEntries[i].delta = Abs(box_penalty(leftBox, box) -
+										 box_penalty(rightBox, box));
+		}
+
+		/*
+		 * Sort "common entries" by calculated deltas in order to distribute
+		 * the most ambiguous entries first.
+		 */
+		qsort(commonEntries, commonEntriesCount, sizeof(CommonEntry), common_entry_cmp);
+
+		/*
+		 * Distribute "common entries" between groups.
+		 */
+		for (i = 0; i < commonEntriesCount; i++)
+		{
+			box = (BOX2DF *) DatumGetPointer(entryvec->vector[
+												commonEntries[i].index].key);
+
+			/*
+			 * Check if we have to place this entry in either group to achieve
+			 * LIMIT_RATIO.
+			 */
+			if (v->spl_nleft + (commonEntriesCount - i) <= m)
+				PLACE_LEFT(box, commonEntries[i].index);
+			else if (v->spl_nright + (commonEntriesCount - i) <= m)
+				PLACE_RIGHT(box, commonEntries[i].index);
+			else
+			{
+				/* Otherwise select the group by minimal penalty */
+				if (box_penalty(leftBox, box) < box_penalty(rightBox, box))
+					PLACE_LEFT(box, commonEntries[i].index);
+				else
+					PLACE_RIGHT(box, commonEntries[i].index);
+			}
+		}
+	}
+	v->spl_ldatum = PointerGetDatum(leftBox);
+	v->spl_rdatum = PointerGetDatum(rightBox);
+	
+	POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
+	
+	PG_RETURN_POINTER(v);
+}
+
+#else /* !KOROTOV_SPLIT */
+
 typedef struct
 {
 	BOX2DF *key;
@@ -1363,6 +2074,8 @@ Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS)
 	PG_RETURN_POINTER(v);
 }
 
+#endif
+
 
 /*
 ** The BOX32DF key must be defined as a PostgreSQL type, even though it is only
-- 
2.50.1