]> granicus.if.org Git - postgresql/blob - contrib/hstore/hstore_gist.c
db58fb62ddfd768b8ebb2850553564e77cc757d8
[postgresql] / contrib / hstore / hstore_gist.c
1 /*
2  * $PostgreSQL: pgsql/contrib/hstore/hstore_gist.c,v 1.12 2010/02/26 02:00:32 momjian Exp $
3  */
4 #include "postgres.h"
5
6 #include "access/gist.h"
7 #include "access/itup.h"
8 #include "access/skey.h"
9 #include "catalog/pg_type.h"
10
11 #include "crc32.h"
12 #include "hstore.h"
13
14 /* bigint defines */
15 #define BITBYTE 8
16 #define SIGLENINT  4                    /* >122 => key will toast, so very slow!!! */
17 #define SIGLEN  ( sizeof(int)*SIGLENINT )
18 #define SIGLENBIT (SIGLEN*BITBYTE)
19
20 typedef char BITVEC[SIGLEN];
21 typedef char *BITVECP;
22
23 #define SIGPTR(x)  ( (BITVECP) ARR_DATA_PTR(x) )
24
25
26 #define LOOPBYTE \
27                         for(i=0;i<SIGLEN;i++)
28
29 #define LOOPBIT \
30                         for(i=0;i<SIGLENBIT;i++)
31
32 /* beware of multiple evaluation of arguments to these macros! */
33 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
34 #define GETBITBYTE(x,i) ( (*((char*)(x)) >> (i)) & 0x01 )
35 #define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
36 #define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
37 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
38 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
39 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
40
41 typedef struct
42 {
43         int32           vl_len_;                /* varlena header (do not touch directly!) */
44         int4            flag;
45         char            data[1];
46 } GISTTYPE;
47
48 #define ALLISTRUE               0x04
49
50 #define ISALLTRUE(x)    ( ((GISTTYPE*)x)->flag & ALLISTRUE )
51
52 #define GTHDRSIZE               (VARHDRSZ + sizeof(int4))
53 #define CALCGTSIZE(flag) ( GTHDRSIZE+(((flag) & ALLISTRUE) ? 0 : SIGLEN) )
54
55 #define GETSIGN(x)              ( (BITVECP)( (char*)x+GTHDRSIZE ) )
56
57 #define SUMBIT(val) (           \
58         GETBITBYTE((val),0) + \
59         GETBITBYTE((val),1) + \
60         GETBITBYTE((val),2) + \
61         GETBITBYTE((val),3) + \
62         GETBITBYTE((val),4) + \
63         GETBITBYTE((val),5) + \
64         GETBITBYTE((val),6) + \
65         GETBITBYTE((val),7)   \
66 )
67
68 #define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer((vec)->vector[(pos)].key))
69
70 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
71
72 PG_FUNCTION_INFO_V1(ghstore_in);
73 Datum           ghstore_in(PG_FUNCTION_ARGS);
74
75 PG_FUNCTION_INFO_V1(ghstore_out);
76 Datum           ghstore_out(PG_FUNCTION_ARGS);
77
78
79 Datum
80 ghstore_in(PG_FUNCTION_ARGS)
81 {
82         elog(ERROR, "Not implemented");
83         PG_RETURN_DATUM(0);
84 }
85
86 Datum
87 ghstore_out(PG_FUNCTION_ARGS)
88 {
89         elog(ERROR, "Not implemented");
90         PG_RETURN_DATUM(0);
91 }
92
93 PG_FUNCTION_INFO_V1(ghstore_consistent);
94 PG_FUNCTION_INFO_V1(ghstore_compress);
95 PG_FUNCTION_INFO_V1(ghstore_decompress);
96 PG_FUNCTION_INFO_V1(ghstore_penalty);
97 PG_FUNCTION_INFO_V1(ghstore_picksplit);
98 PG_FUNCTION_INFO_V1(ghstore_union);
99 PG_FUNCTION_INFO_V1(ghstore_same);
100
101 Datum           ghstore_consistent(PG_FUNCTION_ARGS);
102 Datum           ghstore_compress(PG_FUNCTION_ARGS);
103 Datum           ghstore_decompress(PG_FUNCTION_ARGS);
104 Datum           ghstore_penalty(PG_FUNCTION_ARGS);
105 Datum           ghstore_picksplit(PG_FUNCTION_ARGS);
106 Datum           ghstore_union(PG_FUNCTION_ARGS);
107 Datum           ghstore_same(PG_FUNCTION_ARGS);
108
109 Datum
110 ghstore_compress(PG_FUNCTION_ARGS)
111 {
112         GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
113         GISTENTRY  *retval = entry;
114
115         if (entry->leafkey)
116         {
117                 GISTTYPE   *res = (GISTTYPE *) palloc0(CALCGTSIZE(0));
118                 HStore     *val = DatumGetHStoreP(entry->key);
119                 HEntry     *hsent = ARRPTR(val);
120                 char       *ptr = STRPTR(val);
121                 int                     count = HS_COUNT(val);
122                 int                     i;
123
124                 SET_VARSIZE(res, CALCGTSIZE(0));
125
126                 for (i = 0; i < count; ++i)
127                 {
128                         int                     h;
129
130                         h = crc32_sz((char *) HS_KEY(hsent, ptr, i), HS_KEYLEN(hsent, i));
131                         HASH(GETSIGN(res), h);
132                         if (!HS_VALISNULL(hsent, i))
133                         {
134                                 h = crc32_sz((char *) HS_VAL(hsent, ptr, i), HS_VALLEN(hsent, i));
135                                 HASH(GETSIGN(res), h);
136                         }
137                 }
138
139                 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
140                 gistentryinit(*retval, PointerGetDatum(res),
141                                           entry->rel, entry->page,
142                                           entry->offset,
143                                           FALSE);
144         }
145         else if (!ISALLTRUE(DatumGetPointer(entry->key)))
146         {
147                 int4            i;
148                 GISTTYPE   *res;
149                 BITVECP         sign = GETSIGN(DatumGetPointer(entry->key));
150
151                 LOOPBYTE
152                 {
153                         if ((sign[i] & 0xff) != 0xff)
154                                 PG_RETURN_POINTER(retval);
155                 }
156
157                 res = (GISTTYPE *) palloc(CALCGTSIZE(ALLISTRUE));
158                 SET_VARSIZE(res, CALCGTSIZE(ALLISTRUE));
159                 res->flag = ALLISTRUE;
160
161                 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
162                 gistentryinit(*retval, PointerGetDatum(res),
163                                           entry->rel, entry->page,
164                                           entry->offset,
165                                           FALSE);
166         }
167
168         PG_RETURN_POINTER(retval);
169 }
170
171 Datum
172 ghstore_decompress(PG_FUNCTION_ARGS)
173 {
174         GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
175         GISTENTRY  *retval;
176         HStore     *key;
177
178         key = DatumGetHStoreP(entry->key);
179
180         if (key != (HStore *) DatumGetPointer(entry->key))
181         {
182                 /* need to pass back the decompressed item */
183                 retval = palloc(sizeof(GISTENTRY));
184                 gistentryinit(*retval, PointerGetDatum(key),
185                                           entry->rel, entry->page, entry->offset, entry->leafkey);
186                 PG_RETURN_POINTER(retval);
187         }
188         else
189         {
190                 /* we can return the entry as-is */
191                 PG_RETURN_POINTER(entry);
192         }
193 }
194
195 Datum
196 ghstore_same(PG_FUNCTION_ARGS)
197 {
198         GISTTYPE   *a = (GISTTYPE *) PG_GETARG_POINTER(0);
199         GISTTYPE   *b = (GISTTYPE *) PG_GETARG_POINTER(1);
200         bool       *result = (bool *) PG_GETARG_POINTER(2);
201
202         if (ISALLTRUE(a) && ISALLTRUE(b))
203                 *result = true;
204         else if (ISALLTRUE(a))
205                 *result = false;
206         else if (ISALLTRUE(b))
207                 *result = false;
208         else
209         {
210                 int4            i;
211                 BITVECP         sa = GETSIGN(a),
212                                         sb = GETSIGN(b);
213
214                 *result = true;
215                 LOOPBYTE
216                 {
217                         if (sa[i] != sb[i])
218                         {
219                                 *result = false;
220                                 break;
221                         }
222                 }
223         }
224         PG_RETURN_POINTER(result);
225 }
226
227 static int4
228 sizebitvec(BITVECP sign)
229 {
230         int4            size = 0,
231                                 i;
232
233         LOOPBYTE
234         {
235                 size += SUMBIT(sign);
236                 sign = (BITVECP) (((char *) sign) + 1);
237         }
238         return size;
239 }
240
241 static int
242 hemdistsign(BITVECP a, BITVECP b)
243 {
244         int                     i,
245                                 dist = 0;
246
247         LOOPBIT
248         {
249                 if (GETBIT(a, i) != GETBIT(b, i))
250                         dist++;
251         }
252         return dist;
253 }
254
255 static int
256 hemdist(GISTTYPE *a, GISTTYPE *b)
257 {
258         if (ISALLTRUE(a))
259         {
260                 if (ISALLTRUE(b))
261                         return 0;
262                 else
263                         return SIGLENBIT - sizebitvec(GETSIGN(b));
264         }
265         else if (ISALLTRUE(b))
266                 return SIGLENBIT - sizebitvec(GETSIGN(a));
267
268         return hemdistsign(GETSIGN(a), GETSIGN(b));
269 }
270
271 static int4
272 unionkey(BITVECP sbase, GISTTYPE *add)
273 {
274         int4            i;
275         BITVECP         sadd = GETSIGN(add);
276
277         if (ISALLTRUE(add))
278                 return 1;
279         LOOPBYTE
280                 sbase[i] |= sadd[i];
281         return 0;
282 }
283
284 Datum
285 ghstore_union(PG_FUNCTION_ARGS)
286 {
287         GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
288         int4            len = entryvec->n;
289
290         int                *size = (int *) PG_GETARG_POINTER(1);
291         BITVEC          base;
292         int4            i;
293         int4            flag = 0;
294         GISTTYPE   *result;
295
296         MemSet((void *) base, 0, sizeof(BITVEC));
297         for (i = 0; i < len; i++)
298         {
299                 if (unionkey(base, GETENTRY(entryvec, i)))
300                 {
301                         flag = ALLISTRUE;
302                         break;
303                 }
304         }
305
306         len = CALCGTSIZE(flag);
307         result = (GISTTYPE *) palloc(len);
308         SET_VARSIZE(result, len);
309         result->flag = flag;
310         if (!ISALLTRUE(result))
311                 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
312         *size = len;
313
314         PG_RETURN_POINTER(result);
315 }
316
317 Datum
318 ghstore_penalty(PG_FUNCTION_ARGS)
319 {
320         GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
321         GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
322         float      *penalty = (float *) PG_GETARG_POINTER(2);
323         GISTTYPE   *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
324         GISTTYPE   *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
325
326         *penalty = hemdist(origval, newval);
327         PG_RETURN_POINTER(penalty);
328 }
329
330
331 typedef struct
332 {
333         OffsetNumber pos;
334         int4            cost;
335 } SPLITCOST;
336
337 static int
338 comparecost(const void *a, const void *b)
339 {
340         return ((SPLITCOST *) a)->cost - ((SPLITCOST *) b)->cost;
341 }
342
343
344 Datum
345 ghstore_picksplit(PG_FUNCTION_ARGS)
346 {
347         GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
348         OffsetNumber maxoff = entryvec->n - 2;
349
350         GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
351         OffsetNumber k,
352                                 j;
353         GISTTYPE   *datum_l,
354                            *datum_r;
355         BITVECP         union_l,
356                                 union_r;
357         int4            size_alpha,
358                                 size_beta;
359         int4            size_waste,
360                                 waste = -1;
361         int4            nbytes;
362         OffsetNumber seed_1 = 0,
363                                 seed_2 = 0;
364         OffsetNumber *left,
365                            *right;
366         BITVECP         ptr;
367         int                     i;
368         SPLITCOST  *costvector;
369         GISTTYPE   *_k,
370                            *_j;
371
372         nbytes = (maxoff + 2) * sizeof(OffsetNumber);
373         v->spl_left = (OffsetNumber *) palloc(nbytes);
374         v->spl_right = (OffsetNumber *) palloc(nbytes);
375
376         for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
377         {
378                 _k = GETENTRY(entryvec, k);
379                 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
380                 {
381                         size_waste = hemdist(_k, GETENTRY(entryvec, j));
382                         if (size_waste > waste)
383                         {
384                                 waste = size_waste;
385                                 seed_1 = k;
386                                 seed_2 = j;
387                         }
388                 }
389         }
390
391         left = v->spl_left;
392         v->spl_nleft = 0;
393         right = v->spl_right;
394         v->spl_nright = 0;
395
396         if (seed_1 == 0 || seed_2 == 0)
397         {
398                 seed_1 = 1;
399                 seed_2 = 2;
400         }
401
402         /* form initial .. */
403         if (ISALLTRUE(GETENTRY(entryvec, seed_1)))
404         {
405                 datum_l = (GISTTYPE *) palloc(GTHDRSIZE);
406                 SET_VARSIZE(datum_l, GTHDRSIZE);
407                 datum_l->flag = ALLISTRUE;
408         }
409         else
410         {
411                 datum_l = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
412                 SET_VARSIZE(datum_l, GTHDRSIZE + SIGLEN);
413                 datum_l->flag = 0;
414                 memcpy((void *) GETSIGN(datum_l), (void *) GETSIGN(GETENTRY(entryvec, seed_1)), sizeof(BITVEC))
415                         ;
416         }
417         if (ISALLTRUE(GETENTRY(entryvec, seed_2)))
418         {
419                 datum_r = (GISTTYPE *) palloc(GTHDRSIZE);
420                 SET_VARSIZE(datum_r, GTHDRSIZE);
421                 datum_r->flag = ALLISTRUE;
422         }
423         else
424         {
425                 datum_r = (GISTTYPE *) palloc(GTHDRSIZE + SIGLEN);
426                 SET_VARSIZE(datum_r, GTHDRSIZE + SIGLEN);
427                 datum_r->flag = 0;
428                 memcpy((void *) GETSIGN(datum_r), (void *) GETSIGN(GETENTRY(entryvec, seed_2)), sizeof(BITVEC));
429         }
430
431         maxoff = OffsetNumberNext(maxoff);
432         /* sort before ... */
433         costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
434         for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
435         {
436                 costvector[j - 1].pos = j;
437                 _j = GETENTRY(entryvec, j);
438                 size_alpha = hemdist(datum_l, _j);
439                 size_beta = hemdist(datum_r, _j);
440                 costvector[j - 1].cost = abs(size_alpha - size_beta);
441         }
442         qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
443
444         union_l = GETSIGN(datum_l);
445         union_r = GETSIGN(datum_r);
446
447         for (k = 0; k < maxoff; k++)
448         {
449                 j = costvector[k].pos;
450                 if (j == seed_1)
451                 {
452                         *left++ = j;
453                         v->spl_nleft++;
454                         continue;
455                 }
456                 else if (j == seed_2)
457                 {
458                         *right++ = j;
459                         v->spl_nright++;
460                         continue;
461                 }
462                 _j = GETENTRY(entryvec, j);
463                 size_alpha = hemdist(datum_l, _j);
464                 size_beta = hemdist(datum_r, _j);
465
466                 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.0001))
467                 {
468                         if (ISALLTRUE(datum_l) || ISALLTRUE(_j))
469                         {
470                                 if (!ISALLTRUE(datum_l))
471                                         MemSet((void *) union_l, 0xff, sizeof(BITVEC));
472                         }
473                         else
474                         {
475                                 ptr = GETSIGN(_j);
476                                 LOOPBYTE
477                                         union_l[i] |= ptr[i];
478                         }
479                         *left++ = j;
480                         v->spl_nleft++;
481                 }
482                 else
483                 {
484                         if (ISALLTRUE(datum_r) || ISALLTRUE(_j))
485                         {
486                                 if (!ISALLTRUE(datum_r))
487                                         MemSet((void *) union_r, 0xff, sizeof(BITVEC));
488                         }
489                         else
490                         {
491                                 ptr = GETSIGN(_j);
492                                 LOOPBYTE
493                                         union_r[i] |= ptr[i];
494                         }
495                         *right++ = j;
496                         v->spl_nright++;
497                 }
498         }
499
500         *right = *left = FirstOffsetNumber;
501
502         v->spl_ldatum = PointerGetDatum(datum_l);
503         v->spl_rdatum = PointerGetDatum(datum_r);
504
505         PG_RETURN_POINTER(v);
506 }
507
508
509 Datum
510 ghstore_consistent(PG_FUNCTION_ARGS)
511 {
512         GISTTYPE   *entry = (GISTTYPE *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
513         StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
514
515         /* Oid          subtype = PG_GETARG_OID(3); */
516         bool       *recheck = (bool *) PG_GETARG_POINTER(4);
517         bool            res = true;
518         BITVECP         sign;
519
520         /* All cases served by this function are inexact */
521         *recheck = true;
522
523         if (ISALLTRUE(entry))
524                 PG_RETURN_BOOL(true);
525
526         sign = GETSIGN(entry);
527
528         if (strategy == HStoreContainsStrategyNumber ||
529                 strategy == HStoreOldContainsStrategyNumber)
530         {
531                 HStore     *query = PG_GETARG_HS(1);
532                 HEntry     *qe = ARRPTR(query);
533                 char       *qv = STRPTR(query);
534                 int                     count = HS_COUNT(query);
535                 int                     i;
536
537                 for (i = 0; res && i < count; ++i)
538                 {
539                         int                     crc = crc32_sz((char *) HS_KEY(qe, qv, i), HS_KEYLEN(qe, i));
540
541                         if (GETBIT(sign, HASHVAL(crc)))
542                         {
543                                 if (!HS_VALISNULL(qe, i))
544                                 {
545                                         crc = crc32_sz((char *) HS_VAL(qe, qv, i), HS_VALLEN(qe, i));
546                                         if (!GETBIT(sign, HASHVAL(crc)))
547                                                 res = false;
548                                 }
549                         }
550                         else
551                                 res = false;
552                 }
553         }
554         else if (strategy == HStoreExistsStrategyNumber)
555         {
556                 text       *query = PG_GETARG_TEXT_PP(1);
557                 int                     crc = crc32_sz(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query));
558
559                 res = (GETBIT(sign, HASHVAL(crc))) ? true : false;
560         }
561         else if (strategy == HStoreExistsAllStrategyNumber)
562         {
563                 ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
564                 Datum      *key_datums;
565                 bool       *key_nulls;
566                 int                     key_count;
567                 int                     i;
568
569                 deconstruct_array(query,
570                                                   TEXTOID, -1, false, 'i',
571                                                   &key_datums, &key_nulls, &key_count);
572
573                 for (i = 0; res && i < key_count; ++i)
574                 {
575                         int                     crc;
576
577                         if (key_nulls[i])
578                                 continue;
579                         crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
580                         if (!(GETBIT(sign, HASHVAL(crc))))
581                                 res = FALSE;
582                 }
583         }
584         else if (strategy == HStoreExistsAnyStrategyNumber)
585         {
586                 ArrayType  *query = PG_GETARG_ARRAYTYPE_P(1);
587                 Datum      *key_datums;
588                 bool       *key_nulls;
589                 int                     key_count;
590                 int                     i;
591
592                 deconstruct_array(query,
593                                                   TEXTOID, -1, false, 'i',
594                                                   &key_datums, &key_nulls, &key_count);
595
596                 res = FALSE;
597
598                 for (i = 0; !res && i < key_count; ++i)
599                 {
600                         int                     crc;
601
602                         if (key_nulls[i])
603                                 continue;
604                         crc = crc32_sz(VARDATA(key_datums[i]), VARSIZE(key_datums[i]) - VARHDRSZ);
605                         if (GETBIT(sign, HASHVAL(crc)))
606                                 res = TRUE;
607                 }
608         }
609         else
610                 elog(ERROR, "Unsupported strategy number: %d", strategy);
611
612         PG_RETURN_BOOL(res);
613 }