]> granicus.if.org Git - postgresql/blob - contrib/intarray/_int_gist.c
Fix longstanding error in contrib/intarray's int[] & int[] operator.
[postgresql] / contrib / intarray / _int_gist.c
1 /*
2  * contrib/intarray/_int_gist.c
3  */
4 #include "postgres.h"
5
6 #include "access/gist.h"
7 #include "access/skey.h"
8
9 #include "_int.h"
10
11 #define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
12
13 /*
14 ** GiST support methods
15 */
16 PG_FUNCTION_INFO_V1(g_int_consistent);
17 PG_FUNCTION_INFO_V1(g_int_compress);
18 PG_FUNCTION_INFO_V1(g_int_decompress);
19 PG_FUNCTION_INFO_V1(g_int_penalty);
20 PG_FUNCTION_INFO_V1(g_int_picksplit);
21 PG_FUNCTION_INFO_V1(g_int_union);
22 PG_FUNCTION_INFO_V1(g_int_same);
23
24 Datum           g_int_consistent(PG_FUNCTION_ARGS);
25 Datum           g_int_compress(PG_FUNCTION_ARGS);
26 Datum           g_int_decompress(PG_FUNCTION_ARGS);
27 Datum           g_int_penalty(PG_FUNCTION_ARGS);
28 Datum           g_int_picksplit(PG_FUNCTION_ARGS);
29 Datum           g_int_union(PG_FUNCTION_ARGS);
30 Datum           g_int_same(PG_FUNCTION_ARGS);
31
32
33 /*
34 ** The GiST Consistent method for _intments
35 ** Should return false if for all data items x below entry,
36 ** the predicate x op query == FALSE, where op is the oper
37 ** corresponding to strategy in the pg_amop table.
38 */
39 Datum
40 g_int_consistent(PG_FUNCTION_ARGS)
41 {
42         GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
43         ArrayType  *query = PG_GETARG_ARRAYTYPE_P_COPY(1);
44         StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
45
46         /* Oid          subtype = PG_GETARG_OID(3); */
47         bool       *recheck = (bool *) PG_GETARG_POINTER(4);
48         bool            retval;
49
50         /* this is exact except for RTSameStrategyNumber */
51         *recheck = (strategy == RTSameStrategyNumber);
52
53         if (strategy == BooleanSearchStrategy)
54         {
55                 retval = execconsistent((QUERYTYPE *) query,
56                                                                 (ArrayType *) DatumGetPointer(entry->key),
57                                                                 GIST_LEAF(entry));
58
59                 pfree(query);
60                 PG_RETURN_BOOL(retval);
61         }
62
63         /* sort query for fast search, key is already sorted */
64         CHECKARRVALID(query);
65         PREPAREARR(query);
66
67         switch (strategy)
68         {
69                 case RTOverlapStrategyNumber:
70                         retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
71                                                                            query);
72                         break;
73                 case RTSameStrategyNumber:
74                         if (GIST_LEAF(entry))
75                                 DirectFunctionCall3(g_int_same,
76                                                                         entry->key,
77                                                                         PointerGetDatum(query),
78                                                                         PointerGetDatum(&retval));
79                         else
80                                 retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
81                                                                                         query);
82                         break;
83                 case RTContainsStrategyNumber:
84                 case RTOldContainsStrategyNumber:
85                         retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
86                                                                                 query);
87                         break;
88                 case RTContainedByStrategyNumber:
89                 case RTOldContainedByStrategyNumber:
90                         if (GIST_LEAF(entry))
91                                 retval = inner_int_contains(query,
92                                                                   (ArrayType *) DatumGetPointer(entry->key));
93                         else
94                                 retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
95                                                                                    query);
96                         break;
97                 default:
98                         retval = FALSE;
99         }
100         pfree(query);
101         PG_RETURN_BOOL(retval);
102 }
103
104 Datum
105 g_int_union(PG_FUNCTION_ARGS)
106 {
107         GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
108         int                *size = (int *) PG_GETARG_POINTER(1);
109         int4            i,
110                            *ptr;
111         ArrayType  *res;
112         int                     totlen = 0;
113
114         for (i = 0; i < entryvec->n; i++)
115         {
116                 ArrayType  *ent = GETENTRY(entryvec, i);
117
118                 CHECKARRVALID(ent);
119                 totlen += ARRNELEMS(ent);
120         }
121
122         res = new_intArrayType(totlen);
123         ptr = ARRPTR(res);
124
125         for (i = 0; i < entryvec->n; i++)
126         {
127                 ArrayType  *ent = GETENTRY(entryvec, i);
128                 int                     nel;
129
130                 nel = ARRNELEMS(ent);
131                 memcpy(ptr, ARRPTR(ent), nel * sizeof(int4));
132                 ptr += nel;
133         }
134
135         QSORT(res, 1);
136         res = _int_unique(res);
137         *size = VARSIZE(res);
138         PG_RETURN_POINTER(res);
139 }
140
141 /*
142 ** GiST Compress and Decompress methods
143 */
144 Datum
145 g_int_compress(PG_FUNCTION_ARGS)
146 {
147         GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
148         GISTENTRY  *retval;
149         ArrayType  *r;
150         int                     len;
151         int                *dr;
152         int                     i,
153                                 min,
154                                 cand;
155
156         if (entry->leafkey)
157         {
158                 r = DatumGetArrayTypePCopy(entry->key);
159                 CHECKARRVALID(r);
160                 PREPAREARR(r);
161
162                 if (ARRNELEMS(r) >= 2 * MAXNUMRANGE)
163                         elog(NOTICE, "input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead",
164                                  2 * MAXNUMRANGE - 1, ARRNELEMS(r));
165
166                 retval = palloc(sizeof(GISTENTRY));
167                 gistentryinit(*retval, PointerGetDatum(r),
168                                           entry->rel, entry->page, entry->offset, FALSE);
169
170                 PG_RETURN_POINTER(retval);
171         }
172
173         /*
174          * leaf entries never compress one more time, only when entry->leafkey
175          * ==true, so now we work only with internal keys
176          */
177
178         r = DatumGetArrayTypeP(entry->key);
179         CHECKARRVALID(r);
180         if (ARRISEMPTY(r))
181         {
182                 if (r != (ArrayType *) DatumGetPointer(entry->key))
183                         pfree(r);
184                 PG_RETURN_POINTER(entry);
185         }
186
187         if ((len = ARRNELEMS(r)) >= 2 * MAXNUMRANGE)
188         {                                                       /* compress */
189                 if (r == (ArrayType *) DatumGetPointer(entry->key))
190                         r = DatumGetArrayTypePCopy(entry->key);
191                 r = resize_intArrayType(r, 2 * (len));
192
193                 dr = ARRPTR(r);
194
195                 for (i = len - 1; i >= 0; i--)
196                         dr[2 * i] = dr[2 * i + 1] = dr[i];
197
198                 len *= 2;
199                 cand = 1;
200                 while (len > MAXNUMRANGE * 2)
201                 {
202                         min = 0x7fffffff;
203                         for (i = 2; i < len; i += 2)
204                                 if (min > (dr[i] - dr[i - 1]))
205                                 {
206                                         min = (dr[i] - dr[i - 1]);
207                                         cand = i;
208                                 }
209                         memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * sizeof(int32));
210                         len -= 2;
211                 }
212                 r = resize_intArrayType(r, len);
213                 retval = palloc(sizeof(GISTENTRY));
214                 gistentryinit(*retval, PointerGetDatum(r),
215                                           entry->rel, entry->page, entry->offset, FALSE);
216                 PG_RETURN_POINTER(retval);
217         }
218         else
219                 PG_RETURN_POINTER(entry);
220
221         PG_RETURN_POINTER(entry);
222 }
223
224 Datum
225 g_int_decompress(PG_FUNCTION_ARGS)
226 {
227         GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
228         GISTENTRY  *retval;
229         ArrayType  *r;
230         int                *dr,
231                                 lenr;
232         ArrayType  *in;
233         int                     lenin;
234         int                *din;
235         int                     i,
236                                 j;
237
238         in = DatumGetArrayTypeP(entry->key);
239
240         CHECKARRVALID(in);
241         if (ARRISEMPTY(in))
242         {
243                 if (in != (ArrayType *) DatumGetPointer(entry->key))
244                 {
245                         retval = palloc(sizeof(GISTENTRY));
246                         gistentryinit(*retval, PointerGetDatum(in),
247                                                   entry->rel, entry->page, entry->offset, FALSE);
248                         PG_RETURN_POINTER(retval);
249                 }
250
251                 PG_RETURN_POINTER(entry);
252         }
253
254         lenin = ARRNELEMS(in);
255
256         if (lenin < 2 * MAXNUMRANGE)
257         {                                                       /* not compressed value */
258                 if (in != (ArrayType *) DatumGetPointer(entry->key))
259                 {
260                         retval = palloc(sizeof(GISTENTRY));
261                         gistentryinit(*retval, PointerGetDatum(in),
262                                                   entry->rel, entry->page, entry->offset, FALSE);
263
264                         PG_RETURN_POINTER(retval);
265                 }
266                 PG_RETURN_POINTER(entry);
267         }
268
269         din = ARRPTR(in);
270         lenr = internal_size(din, lenin);
271
272         r = new_intArrayType(lenr);
273         dr = ARRPTR(r);
274
275         for (i = 0; i < lenin; i += 2)
276                 for (j = din[i]; j <= din[i + 1]; j++)
277                         if ((!i) || *(dr - 1) != j)
278                                 *dr++ = j;
279
280         if (in != (ArrayType *) DatumGetPointer(entry->key))
281                 pfree(in);
282         retval = palloc(sizeof(GISTENTRY));
283         gistentryinit(*retval, PointerGetDatum(r),
284                                   entry->rel, entry->page, entry->offset, FALSE);
285
286         PG_RETURN_POINTER(retval);
287 }
288
289 /*
290 ** The GiST Penalty method for _intments
291 */
292 Datum
293 g_int_penalty(PG_FUNCTION_ARGS)
294 {
295         GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
296         GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
297         float      *result = (float *) PG_GETARG_POINTER(2);
298         ArrayType  *ud;
299         float           tmp1,
300                                 tmp2;
301
302         ud = inner_int_union((ArrayType *) DatumGetPointer(origentry->key),
303                                                  (ArrayType *) DatumGetPointer(newentry->key));
304         rt__int_size(ud, &tmp1);
305         rt__int_size((ArrayType *) DatumGetPointer(origentry->key), &tmp2);
306         *result = tmp1 - tmp2;
307         pfree(ud);
308
309         PG_RETURN_POINTER(result);
310 }
311
312
313
314 Datum
315 g_int_same(PG_FUNCTION_ARGS)
316 {
317         ArrayType  *a = PG_GETARG_ARRAYTYPE_P(0);
318         ArrayType  *b = PG_GETARG_ARRAYTYPE_P(1);
319         bool       *result = (bool *) PG_GETARG_POINTER(2);
320         int4            n = ARRNELEMS(a);
321         int4       *da,
322                            *db;
323
324         CHECKARRVALID(a);
325         CHECKARRVALID(b);
326
327         if (n != ARRNELEMS(b))
328         {
329                 *result = false;
330                 PG_RETURN_POINTER(result);
331         }
332         *result = TRUE;
333         da = ARRPTR(a);
334         db = ARRPTR(b);
335         while (n--)
336         {
337                 if (*da++ != *db++)
338                 {
339                         *result = FALSE;
340                         break;
341                 }
342         }
343
344         PG_RETURN_POINTER(result);
345 }
346
347 /*****************************************************************
348 ** Common GiST Method
349 *****************************************************************/
350
351 typedef struct
352 {
353         OffsetNumber pos;
354         float           cost;
355 } SPLITCOST;
356
357 static int
358 comparecost(const void *a, const void *b)
359 {
360         if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
361                 return 0;
362         else
363                 return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
364 }
365
366 /*
367 ** The GiST PickSplit method for _intments
368 ** We use Guttman's poly time split algorithm
369 */
370 Datum
371 g_int_picksplit(PG_FUNCTION_ARGS)
372 {
373         GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
374         GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
375         OffsetNumber i,
376                                 j;
377         ArrayType  *datum_alpha,
378                            *datum_beta;
379         ArrayType  *datum_l,
380                            *datum_r;
381         ArrayType  *union_d,
382                            *union_dl,
383                            *union_dr;
384         ArrayType  *inter_d;
385         bool            firsttime;
386         float           size_alpha,
387                                 size_beta,
388                                 size_union,
389                                 size_inter;
390         float           size_waste,
391                                 waste;
392         float           size_l,
393                                 size_r;
394         int                     nbytes;
395         OffsetNumber seed_1 = 0,
396                                 seed_2 = 0;
397         OffsetNumber *left,
398                            *right;
399         OffsetNumber maxoff;
400         SPLITCOST  *costvector;
401
402 #ifdef GIST_DEBUG
403         elog(DEBUG3, "--------picksplit %d", entryvec->n);
404 #endif
405
406         maxoff = entryvec->n - 2;
407         nbytes = (maxoff + 2) * sizeof(OffsetNumber);
408         v->spl_left = (OffsetNumber *) palloc(nbytes);
409         v->spl_right = (OffsetNumber *) palloc(nbytes);
410
411         firsttime = true;
412         waste = 0.0;
413         for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
414         {
415                 datum_alpha = GETENTRY(entryvec, i);
416                 for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
417                 {
418                         datum_beta = GETENTRY(entryvec, j);
419
420                         /* compute the wasted space by unioning these guys */
421                         /* size_waste = size_union - size_inter; */
422                         union_d = inner_int_union(datum_alpha, datum_beta);
423                         rt__int_size(union_d, &size_union);
424                         inter_d = inner_int_inter(datum_alpha, datum_beta);
425                         rt__int_size(inter_d, &size_inter);
426                         size_waste = size_union - size_inter;
427
428                         pfree(union_d);
429
430                         if (inter_d != (ArrayType *) NULL)
431                                 pfree(inter_d);
432
433                         /*
434                          * are these a more promising split that what we've already seen?
435                          */
436
437                         if (size_waste > waste || firsttime)
438                         {
439                                 waste = size_waste;
440                                 seed_1 = i;
441                                 seed_2 = j;
442                                 firsttime = false;
443                         }
444                 }
445         }
446
447         left = v->spl_left;
448         v->spl_nleft = 0;
449         right = v->spl_right;
450         v->spl_nright = 0;
451         if (seed_1 == 0 || seed_2 == 0)
452         {
453                 seed_1 = 1;
454                 seed_2 = 2;
455         }
456
457         datum_alpha = GETENTRY(entryvec, seed_1);
458         datum_l = copy_intArrayType(datum_alpha);
459         rt__int_size(datum_l, &size_l);
460         datum_beta = GETENTRY(entryvec, seed_2);
461         datum_r = copy_intArrayType(datum_beta);
462         rt__int_size(datum_r, &size_r);
463
464         maxoff = OffsetNumberNext(maxoff);
465
466         /*
467          * sort entries
468          */
469         costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
470         for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
471         {
472                 costvector[i - 1].pos = i;
473                 datum_alpha = GETENTRY(entryvec, i);
474                 union_d = inner_int_union(datum_l, datum_alpha);
475                 rt__int_size(union_d, &size_alpha);
476                 pfree(union_d);
477                 union_d = inner_int_union(datum_r, datum_alpha);
478                 rt__int_size(union_d, &size_beta);
479                 pfree(union_d);
480                 costvector[i - 1].cost = Abs((size_alpha - size_l) - (size_beta - size_r));
481         }
482         qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
483
484         /*
485          * Now split up the regions between the two seeds.      An important property
486          * of this split algorithm is that the split vector v has the indices of
487          * items to be split in order in its left and right vectors.  We exploit
488          * this property by doing a merge in the code that actually splits the
489          * page.
490          *
491          * For efficiency, we also place the new index tuple in this loop. This is
492          * handled at the very end, when we have placed all the existing tuples
493          * and i == maxoff + 1.
494          */
495
496
497         for (j = 0; j < maxoff; j++)
498         {
499                 i = costvector[j].pos;
500
501                 /*
502                  * If we've already decided where to place this item, just put it on
503                  * the right list.      Otherwise, we need to figure out which page needs
504                  * the least enlargement in order to store the item.
505                  */
506
507                 if (i == seed_1)
508                 {
509                         *left++ = i;
510                         v->spl_nleft++;
511                         continue;
512                 }
513                 else if (i == seed_2)
514                 {
515                         *right++ = i;
516                         v->spl_nright++;
517                         continue;
518                 }
519
520                 /* okay, which page needs least enlargement? */
521                 datum_alpha = GETENTRY(entryvec, i);
522                 union_dl = inner_int_union(datum_l, datum_alpha);
523                 union_dr = inner_int_union(datum_r, datum_alpha);
524                 rt__int_size(union_dl, &size_alpha);
525                 rt__int_size(union_dr, &size_beta);
526
527                 /* pick which page to add it to */
528                 if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))
529                 {
530                         if (datum_l)
531                                 pfree(datum_l);
532                         if (union_dr)
533                                 pfree(union_dr);
534                         datum_l = union_dl;
535                         size_l = size_alpha;
536                         *left++ = i;
537                         v->spl_nleft++;
538                 }
539                 else
540                 {
541                         if (datum_r)
542                                 pfree(datum_r);
543                         if (union_dl)
544                                 pfree(union_dl);
545                         datum_r = union_dr;
546                         size_r = size_beta;
547                         *right++ = i;
548                         v->spl_nright++;
549                 }
550         }
551         pfree(costvector);
552         *right = *left = FirstOffsetNumber;
553
554         v->spl_ldatum = PointerGetDatum(datum_l);
555         v->spl_rdatum = PointerGetDatum(datum_r);
556
557         PG_RETURN_POINTER(v);
558 }