]> granicus.if.org Git - postgresql/blob - src/backend/access/gist/gistsplit.c
Update copyright for 2018
[postgresql] / src / backend / access / gist / gistsplit.c
1 /*-------------------------------------------------------------------------
2  *
3  * gistsplit.c
4  *        Multi-column page splitting algorithm
5  *
6  * This file is concerned with making good page-split decisions in multi-column
7  * GiST indexes.  The opclass-specific picksplit functions can only be expected
8  * to produce answers based on a single column.  We first run the picksplit
9  * function for column 1; then, if there are more columns, we check if any of
10  * the tuples are "don't cares" so far as the column 1 split is concerned
11  * (that is, they could go to either side for no additional penalty).  If so,
12  * we try to redistribute those tuples on the basis of the next column.
13  * Repeat till we're out of columns.
14  *
15  * gistSplitByKey() is the entry point to this file.
16  *
17  *
18  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
19  * Portions Copyright (c) 1994, Regents of the University of California
20  *
21  * IDENTIFICATION
22  *        src/backend/access/gist/gistsplit.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 #include "postgres.h"
27
28 #include "access/gist_private.h"
29 #include "utils/rel.h"
30
31 typedef struct
32 {
33         OffsetNumber *entries;
34         int                     len;
35         Datum      *attr;
36         bool       *isnull;
37         bool       *dontcare;
38 } GistSplitUnion;
39
40
41 /*
42  * Form unions of subkeys in itvec[] entries listed in gsvp->entries[],
43  * ignoring any tuples that are marked in gsvp->dontcare[].  Subroutine for
44  * gistunionsubkey.
45  */
46 static void
47 gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
48                                    GistSplitUnion *gsvp)
49 {
50         IndexTuple *cleanedItVec;
51         int                     i,
52                                 cleanedLen = 0;
53
54         cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
55
56         for (i = 0; i < gsvp->len; i++)
57         {
58                 if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]])
59                         continue;
60
61                 cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
62         }
63
64         gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen,
65                                            gsvp->attr, gsvp->isnull);
66
67         pfree(cleanedItVec);
68 }
69
70 /*
71  * Recompute unions of left- and right-side subkeys after a page split,
72  * ignoring any tuples that are marked in spl->spl_dontcare[].
73  *
74  * Note: we always recompute union keys for all index columns.  In some cases
75  * this might represent duplicate work for the leftmost column(s), but it's
76  * not safe to assume that "zero penalty to move a tuple" means "the union
77  * key doesn't change at all".  Penalty functions aren't 100% accurate.
78  */
79 static void
80 gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
81 {
82         GistSplitUnion gsvp;
83
84         gsvp.dontcare = spl->spl_dontcare;
85
86         gsvp.entries = spl->splitVector.spl_left;
87         gsvp.len = spl->splitVector.spl_nleft;
88         gsvp.attr = spl->spl_lattr;
89         gsvp.isnull = spl->spl_lisnull;
90
91         gistunionsubkeyvec(giststate, itvec, &gsvp);
92
93         gsvp.entries = spl->splitVector.spl_right;
94         gsvp.len = spl->splitVector.spl_nright;
95         gsvp.attr = spl->spl_rattr;
96         gsvp.isnull = spl->spl_risnull;
97
98         gistunionsubkeyvec(giststate, itvec, &gsvp);
99 }
100
101 /*
102  * Find tuples that are "don't cares", that is could be moved to the other
103  * side of the split with zero penalty, so far as the attno column is
104  * concerned.
105  *
106  * Don't-care tuples are marked by setting the corresponding entry in
107  * spl->spl_dontcare[] to "true".  Caller must have initialized that array
108  * to zeroes.
109  *
110  * Returns number of don't-cares found.
111  */
112 static int
113 findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
114                           GistSplitVector *spl, int attno)
115 {
116         int                     i;
117         GISTENTRY       entry;
118         int                     NumDontCare = 0;
119
120         /*
121          * First, search the left-side tuples to see if any have zero penalty to
122          * be added to the right-side union key.
123          *
124          * attno column is known all-not-null (see gistSplitByKey), so we need not
125          * check for nulls
126          */
127         gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL,
128                                   (OffsetNumber) 0, false);
129         for (i = 0; i < spl->splitVector.spl_nleft; i++)
130         {
131                 int                     j = spl->splitVector.spl_left[i];
132                 float           penalty = gistpenalty(giststate, attno, &entry, false,
133                                                                                   &valvec[j], false);
134
135                 if (penalty == 0.0)
136                 {
137                         spl->spl_dontcare[j] = true;
138                         NumDontCare++;
139                 }
140         }
141
142         /* And conversely for the right-side tuples */
143         gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL,
144                                   (OffsetNumber) 0, false);
145         for (i = 0; i < spl->splitVector.spl_nright; i++)
146         {
147                 int                     j = spl->splitVector.spl_right[i];
148                 float           penalty = gistpenalty(giststate, attno, &entry, false,
149                                                                                   &valvec[j], false);
150
151                 if (penalty == 0.0)
152                 {
153                         spl->spl_dontcare[j] = true;
154                         NumDontCare++;
155                 }
156         }
157
158         return NumDontCare;
159 }
160
161 /*
162  * Remove tuples that are marked don't-cares from the tuple index array a[]
163  * of length *len.  This is applied separately to the spl_left and spl_right
164  * arrays.
165  */
166 static void
167 removeDontCares(OffsetNumber *a, int *len, const bool *dontcare)
168 {
169         int                     origlen,
170                                 newlen,
171                                 i;
172         OffsetNumber *curwpos;
173
174         origlen = newlen = *len;
175         curwpos = a;
176         for (i = 0; i < origlen; i++)
177         {
178                 OffsetNumber ai = a[i];
179
180                 if (dontcare[ai] == false)
181                 {
182                         /* re-emit item into a[] */
183                         *curwpos = ai;
184                         curwpos++;
185                 }
186                 else
187                         newlen--;
188         }
189
190         *len = newlen;
191 }
192
193 /*
194  * Place a single don't-care tuple into either the left or right side of the
195  * split, according to which has least penalty for merging the tuple into
196  * the previously-computed union keys.  We need consider only columns starting
197  * at attno.
198  */
199 static void
200 placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v,
201                  IndexTuple itup, OffsetNumber off, int attno)
202 {
203         GISTENTRY       identry[INDEX_MAX_KEYS];
204         bool            isnull[INDEX_MAX_KEYS];
205         bool            toLeft = true;
206
207         gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0,
208                                           identry, isnull);
209
210         for (; attno < giststate->tupdesc->natts; attno++)
211         {
212                 float           lpenalty,
213                                         rpenalty;
214                 GISTENTRY       entry;
215
216                 gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, false);
217                 lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno],
218                                                            identry + attno, isnull[attno]);
219                 gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, false);
220                 rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno],
221                                                            identry + attno, isnull[attno]);
222
223                 if (lpenalty != rpenalty)
224                 {
225                         if (lpenalty > rpenalty)
226                                 toLeft = false;
227                         break;
228                 }
229         }
230
231         if (toLeft)
232                 v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
233         else
234                 v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
235 }
236
237 #define SWAPVAR( s, d, t ) \
238 do {    \
239         (t) = (s); \
240         (s) = (d); \
241         (d) = (t); \
242 } while(0)
243
244 /*
245  * Clean up when we did a secondary split but the user-defined PickSplit
246  * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists
247  * true).
248  *
249  * We consider whether to swap the left and right outputs of the secondary
250  * split; this can be worthwhile if the penalty for merging those tuples into
251  * the previously chosen sets is less that way.
252  *
253  * In any case we must update the union datums for the current column by
254  * adding in the previous union keys (oldL/oldR), since the user-defined
255  * PickSplit method didn't do so.
256  */
257 static void
258 supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
259                                           GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
260 {
261         bool            leaveOnLeft = true,
262                                 tmpBool;
263         GISTENTRY       entryL,
264                                 entryR,
265                                 entrySL,
266                                 entrySR;
267
268         gistentryinit(entryL, oldL, r, NULL, 0, false);
269         gistentryinit(entryR, oldR, r, NULL, 0, false);
270         gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
271         gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
272
273         if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
274         {
275                 float           penalty1,
276                                         penalty2;
277
278                 penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
279                         gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
280                 penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
281                         gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
282
283                 if (penalty1 > penalty2)
284                         leaveOnLeft = false;
285         }
286         else
287         {
288                 GISTENTRY  *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
289                 float           penalty1,
290                                         penalty2;
291
292                 /*
293                  * There is only one previously defined union, so we just choose swap
294                  * or not by lowest penalty for that side.  We can only get here if a
295                  * secondary split happened to have all NULLs in its column in the
296                  * tuples that the outer recursion level had assigned to one side.
297                  * (Note that the null checks in gistSplitByKey don't prevent the
298                  * case, because they'll only be checking tuples that were considered
299                  * don't-cares at the outer recursion level, not the tuples that went
300                  * into determining the passed-down left and right union keys.)
301                  */
302                 penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
303                 penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
304
305                 if (penalty1 < penalty2)
306                         leaveOnLeft = (sv->spl_ldatum_exists) ? true : false;
307                 else
308                         leaveOnLeft = (sv->spl_rdatum_exists) ? true : false;
309         }
310
311         if (leaveOnLeft == false)
312         {
313                 /*
314                  * swap left and right
315                  */
316                 OffsetNumber *off,
317                                         noff;
318                 Datum           datum;
319
320                 SWAPVAR(sv->spl_left, sv->spl_right, off);
321                 SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
322                 SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
323                 gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
324                 gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
325         }
326
327         if (sv->spl_ldatum_exists)
328                 gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
329                                                  &sv->spl_ldatum, &tmpBool);
330
331         if (sv->spl_rdatum_exists)
332                 gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
333                                                  &sv->spl_rdatum, &tmpBool);
334
335         sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
336 }
337
338 /*
339  * Trivial picksplit implementation. Function called only
340  * if user-defined picksplit puts all keys on the same side of the split.
341  * That is a bug of user-defined picksplit but we don't want to fail.
342  */
343 static void
344 genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno)
345 {
346         OffsetNumber i,
347                                 maxoff;
348         int                     nbytes;
349         GistEntryVector *evec;
350
351         maxoff = entryvec->n - 1;
352
353         nbytes = (maxoff + 2) * sizeof(OffsetNumber);
354
355         v->spl_left = (OffsetNumber *) palloc(nbytes);
356         v->spl_right = (OffsetNumber *) palloc(nbytes);
357         v->spl_nleft = v->spl_nright = 0;
358
359         for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
360         {
361                 if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
362                 {
363                         v->spl_left[v->spl_nleft] = i;
364                         v->spl_nleft++;
365                 }
366                 else
367                 {
368                         v->spl_right[v->spl_nright] = i;
369                         v->spl_nright++;
370                 }
371         }
372
373         /*
374          * Form union datums for each side
375          */
376         evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ);
377
378         evec->n = v->spl_nleft;
379         memcpy(evec->vector, entryvec->vector + FirstOffsetNumber,
380                    sizeof(GISTENTRY) * evec->n);
381         v->spl_ldatum = FunctionCall2Coll(&giststate->unionFn[attno],
382                                                                           giststate->supportCollation[attno],
383                                                                           PointerGetDatum(evec),
384                                                                           PointerGetDatum(&nbytes));
385
386         evec->n = v->spl_nright;
387         memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft,
388                    sizeof(GISTENTRY) * evec->n);
389         v->spl_rdatum = FunctionCall2Coll(&giststate->unionFn[attno],
390                                                                           giststate->supportCollation[attno],
391                                                                           PointerGetDatum(evec),
392                                                                           PointerGetDatum(&nbytes));
393 }
394
395 /*
396  * Calls user picksplit method for attno column to split tuples into
397  * two vectors.
398  *
399  * Returns false if split is complete (there are no more index columns, or
400  * there is no need to consider them because split is optimal already).
401  *
402  * Returns true and v->spl_dontcare = NULL if the picksplit result is
403  * degenerate (all tuples seem to be don't-cares), so we should just
404  * disregard this column and split on the next column(s) instead.
405  *
406  * Returns true and v->spl_dontcare != NULL if there are don't-care tuples
407  * that could be relocated based on the next column(s).  The don't-care
408  * tuples have been removed from the split and must be reinserted by caller.
409  * There is at least one non-don't-care tuple on each side of the split,
410  * and union keys for all columns are updated to include just those tuples.
411  *
412  * A true result implies there is at least one more index column.
413  */
414 static bool
415 gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
416                                   IndexTuple *itup, int len, GISTSTATE *giststate)
417 {
418         GIST_SPLITVEC *sv = &v->splitVector;
419
420         /*
421          * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in
422          * case we are doing a secondary split (see comments in gist.h).
423          */
424         sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
425         sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
426         sv->spl_ldatum = v->spl_lattr[attno];
427         sv->spl_rdatum = v->spl_rattr[attno];
428
429         /*
430          * Let the opclass-specific PickSplit method do its thing.  Note that at
431          * this point we know there are no null keys in the entryvec.
432          */
433         FunctionCall2Coll(&giststate->picksplitFn[attno],
434                                           giststate->supportCollation[attno],
435                                           PointerGetDatum(entryvec),
436                                           PointerGetDatum(sv));
437
438         if (sv->spl_nleft == 0 || sv->spl_nright == 0)
439         {
440                 /*
441                  * User-defined picksplit failed to create an actual split, ie it put
442                  * everything on the same side.  Complain but cope.
443                  */
444                 ereport(DEBUG1,
445                                 (errcode(ERRCODE_INTERNAL_ERROR),
446                                  errmsg("picksplit method for column %d of index \"%s\" failed",
447                                                 attno + 1, RelationGetRelationName(r)),
448                                  errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command.")));
449
450                 /*
451                  * Reinit GIST_SPLITVEC. Although these fields are not used by
452                  * genericPickSplit(), set them up for further processing
453                  */
454                 sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
455                 sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
456                 sv->spl_ldatum = v->spl_lattr[attno];
457                 sv->spl_rdatum = v->spl_rattr[attno];
458
459                 /* Do a generic split */
460                 genericPickSplit(giststate, entryvec, sv, attno);
461         }
462         else
463         {
464                 /* hack for compatibility with old picksplit API */
465                 if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
466                         sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
467                 if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
468                         sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
469         }
470
471         /* Clean up if PickSplit didn't take care of a secondary split */
472         if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
473                 supportSecondarySplit(r, giststate, attno, sv,
474                                                           v->spl_lattr[attno], v->spl_rattr[attno]);
475
476         /* emit union datums computed by PickSplit back to v arrays */
477         v->spl_lattr[attno] = sv->spl_ldatum;
478         v->spl_rattr[attno] = sv->spl_rdatum;
479         v->spl_lisnull[attno] = false;
480         v->spl_risnull[attno] = false;
481
482         /*
483          * If index columns remain, then consider whether we can improve the split
484          * by using them.
485          */
486         v->spl_dontcare = NULL;
487
488         if (attno + 1 < giststate->tupdesc->natts)
489         {
490                 int                     NumDontCare;
491
492                 /*
493                  * Make a quick check to see if left and right union keys are equal;
494                  * if so, the split is certainly degenerate, so tell caller to
495                  * re-split with the next column.
496                  */
497                 if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
498                         return true;
499
500                 /*
501                  * Locate don't-care tuples, if any.  If there are none, the split is
502                  * optimal, so just fall out and return false.
503                  */
504                 v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
505
506                 NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno);
507
508                 if (NumDontCare > 0)
509                 {
510                         /*
511                          * Remove don't-cares from spl_left[] and spl_right[].
512                          */
513                         removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare);
514                         removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare);
515
516                         /*
517                          * If all tuples on either side were don't-cares, the split is
518                          * degenerate, and we're best off to ignore it and split on the
519                          * next column.  (We used to try to press on with a secondary
520                          * split by forcing a random tuple on each side to be treated as
521                          * non-don't-care, but it seems unlikely that that technique
522                          * really gives a better result.  Note that we don't want to try a
523                          * secondary split with empty left or right primary split sides,
524                          * because then there is no union key on that side for the
525                          * PickSplit function to try to expand, so it can have no good
526                          * figure of merit for what it's doing.  Also note that this check
527                          * ensures we can't produce a bogus one-side-only split in the
528                          * NumDontCare == 1 special case below.)
529                          */
530                         if (sv->spl_nleft == 0 || sv->spl_nright == 0)
531                         {
532                                 v->spl_dontcare = NULL;
533                                 return true;
534                         }
535
536                         /*
537                          * Recompute union keys, considering only non-don't-care tuples.
538                          * NOTE: this will set union keys for remaining index columns,
539                          * which will cause later calls of gistUserPicksplit to pass those
540                          * values down to user-defined PickSplit methods with
541                          * spl_ldatum_exists/spl_rdatum_exists set true.
542                          */
543                         gistunionsubkey(giststate, itup, v);
544
545                         if (NumDontCare == 1)
546                         {
547                                 /*
548                                  * If there's only one don't-care tuple then we can't do a
549                                  * PickSplit on it, so just choose whether to send it left or
550                                  * right by comparing penalties.  We needed the
551                                  * gistunionsubkey step anyway so that we have appropriate
552                                  * union keys for figuring the penalties.
553                                  */
554                                 OffsetNumber toMove;
555
556                                 /* find it ... */
557                                 for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
558                                 {
559                                         if (v->spl_dontcare[toMove])
560                                                 break;
561                                 }
562                                 Assert(toMove < entryvec->n);
563
564                                 /* ... and assign it to cheaper side */
565                                 placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
566
567                                 /*
568                                  * At this point the union keys are wrong, but we don't care
569                                  * because we're done splitting.  The outermost recursion
570                                  * level of gistSplitByKey will fix things before returning.
571                                  */
572                         }
573                         else
574                                 return true;
575                 }
576         }
577
578         return false;
579 }
580
581 /*
582  * simply split page in half
583  */
584 static void
585 gistSplitHalf(GIST_SPLITVEC *v, int len)
586 {
587         int                     i;
588
589         v->spl_nright = v->spl_nleft = 0;
590         v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
591         v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
592         for (i = 1; i <= len; i++)
593                 if (i < len / 2)
594                         v->spl_right[v->spl_nright++] = i;
595                 else
596                         v->spl_left[v->spl_nleft++] = i;
597
598         /* we need not compute union keys, caller took care of it */
599 }
600
601 /*
602  * gistSplitByKey: main entry point for page-splitting algorithm
603  *
604  * r: index relation
605  * page: page being split
606  * itup: array of IndexTuples to be processed
607  * len: number of IndexTuples to be processed (must be at least 2)
608  * giststate: additional info about index
609  * v: working state and output area
610  * attno: column we are working on (zero-based index)
611  *
612  * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays
613  * to all-true.  On return, spl_left/spl_nleft contain indexes of tuples
614  * to go left, spl_right/spl_nright contain indexes of tuples to go right,
615  * spl_lattr/spl_lisnull contain left-side union key values, and
616  * spl_rattr/spl_risnull contain right-side union key values.  Other fields
617  * in this struct are workspace for this file.
618  *
619  * Outside caller must pass zero for attno.  The function may internally
620  * recurse to the next column by passing attno+1.
621  */
622 void
623 gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
624                            GISTSTATE *giststate, GistSplitVector *v, int attno)
625 {
626         GistEntryVector *entryvec;
627         OffsetNumber *offNullTuples;
628         int                     nOffNullTuples = 0;
629         int                     i;
630
631         /* generate the item array, and identify tuples with null keys */
632         /* note that entryvec->vector[0] goes unused in this code */
633         entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
634         entryvec->n = len + 1;
635         offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
636
637         for (i = 1; i <= len; i++)
638         {
639                 Datum           datum;
640                 bool            IsNull;
641
642                 datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc,
643                                                           &IsNull);
644                 gistdentryinit(giststate, attno, &(entryvec->vector[i]),
645                                            datum, r, page, i,
646                                            false, IsNull);
647                 if (IsNull)
648                         offNullTuples[nOffNullTuples++] = i;
649         }
650
651         if (nOffNullTuples == len)
652         {
653                 /*
654                  * Corner case: All keys in attno column are null, so just transfer
655                  * our attention to the next column.  If there's no next column, just
656                  * split page in half.
657                  */
658                 v->spl_risnull[attno] = v->spl_lisnull[attno] = true;
659
660                 if (attno + 1 < giststate->tupdesc->natts)
661                         gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
662                 else
663                         gistSplitHalf(&v->splitVector, len);
664         }
665         else if (nOffNullTuples > 0)
666         {
667                 int                     j = 0;
668
669                 /*
670                  * We don't want to mix NULL and not-NULL keys on one page, so split
671                  * nulls to right page and not-nulls to left.
672                  */
673                 v->splitVector.spl_right = offNullTuples;
674                 v->splitVector.spl_nright = nOffNullTuples;
675                 v->spl_risnull[attno] = true;
676
677                 v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
678                 v->splitVector.spl_nleft = 0;
679                 for (i = 1; i <= len; i++)
680                         if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
681                                 j++;
682                         else
683                                 v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
684
685                 /* Compute union keys, unless outer recursion level will handle it */
686                 if (attno == 0 && giststate->tupdesc->natts == 1)
687                 {
688                         v->spl_dontcare = NULL;
689                         gistunionsubkey(giststate, itup, v);
690                 }
691         }
692         else
693         {
694                 /*
695                  * All keys are not-null, so apply user-defined PickSplit method
696                  */
697                 if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate))
698                 {
699                         /*
700                          * Splitting on attno column is not optimal, so consider
701                          * redistributing don't-care tuples according to the next column
702                          */
703                         Assert(attno + 1 < giststate->tupdesc->natts);
704
705                         if (v->spl_dontcare == NULL)
706                         {
707                                 /*
708                                  * This split was actually degenerate, so ignore it altogether
709                                  * and just split according to the next column.
710                                  */
711                                 gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
712                         }
713                         else
714                         {
715                                 /*
716                                  * Form an array of just the don't-care tuples to pass to a
717                                  * recursive invocation of this function for the next column.
718                                  */
719                                 IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple));
720                                 OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
721                                 int                     newlen = 0;
722                                 GIST_SPLITVEC backupSplit;
723
724                                 for (i = 0; i < len; i++)
725                                 {
726                                         if (v->spl_dontcare[i + 1])
727                                         {
728                                                 newitup[newlen] = itup[i];
729                                                 map[newlen] = i + 1;
730                                                 newlen++;
731                                         }
732                                 }
733
734                                 Assert(newlen > 0);
735
736                                 /*
737                                  * Make a backup copy of v->splitVector, since the recursive
738                                  * call will overwrite that with its own result.
739                                  */
740                                 backupSplit = v->splitVector;
741                                 backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
742                                 memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
743                                 backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
744                                 memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
745
746                                 /* Recursively decide how to split the don't-care tuples */
747                                 gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1);
748
749                                 /* Merge result of subsplit with non-don't-care tuples */
750                                 for (i = 0; i < v->splitVector.spl_nleft; i++)
751                                         backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
752                                 for (i = 0; i < v->splitVector.spl_nright; i++)
753                                         backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
754
755                                 v->splitVector = backupSplit;
756                         }
757                 }
758         }
759
760         /*
761          * If we're handling a multicolumn index, at the end of the recursion
762          * recompute the left and right union datums for all index columns.  This
763          * makes sure we hand back correct union datums in all corner cases,
764          * including when we haven't processed all columns to start with, or when
765          * a secondary split moved "don't care" tuples from one side to the other
766          * (we really shouldn't assume that that didn't change the union datums).
767          *
768          * Note: when we're in an internal recursion (attno > 0), we do not worry
769          * about whether the union datums we return with are sensible, since
770          * calling levels won't care.  Also, in a single-column index, we expect
771          * that PickSplit (or the special cases above) produced correct union
772          * datums.
773          */
774         if (attno == 0 && giststate->tupdesc->natts > 1)
775         {
776                 v->spl_dontcare = NULL;
777                 gistunionsubkey(giststate, itup, v);
778         }
779 }