1 /******************************************************************************
2 This file contains routines that can be bound to a Postgres backend and
3 called by the backend in the process of processing queries. The calling
4 format for these routines is dictated by Postgres architecture.
5 ******************************************************************************/
11 #include "access/gist.h"
12 #include "access/rtree.h"
13 #include "utils/builtins.h"
19 #define GIST_QUERY_DEBUG
22 extern int seg_yyparse();
23 extern void seg_yyerror(const char *message);
24 extern void seg_scanner_init(const char *str);
25 extern void seg_scanner_finish(void);
28 extern int seg_yydebug;
32 ** Input/Output routines
34 SEG *seg_in(char *str);
35 char *seg_out(SEG * seg);
36 float32 seg_lower(SEG * seg);
37 float32 seg_upper(SEG * seg);
38 float32 seg_center(SEG * seg);
41 ** GiST support methods
43 bool gseg_consistent(GISTENTRY *entry, SEG * query, StrategyNumber strategy);
44 GISTENTRY *gseg_compress(GISTENTRY *entry);
45 GISTENTRY *gseg_decompress(GISTENTRY *entry);
46 float *gseg_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result);
47 GIST_SPLITVEC *gseg_picksplit(GistEntryVector *entryvec, GIST_SPLITVEC *v);
48 bool gseg_leaf_consistent(SEG * key, SEG * query, StrategyNumber strategy);
49 bool gseg_internal_consistent(SEG * key, SEG * query, StrategyNumber strategy);
50 SEG *gseg_union(GistEntryVector *entryvec, int *sizep);
51 SEG *gseg_binary_union(SEG * r1, SEG * r2, int *sizep);
52 bool *gseg_same(SEG * b1, SEG * b2, bool *result);
56 ** R-tree suport functions
58 bool seg_same(SEG * a, SEG * b);
59 bool seg_contains_int(SEG * a, int *b);
60 bool seg_contains_float4(SEG * a, float4 *b);
61 bool seg_contains_float8(SEG * a, float8 *b);
62 bool seg_contains(SEG * a, SEG * b);
63 bool seg_contained(SEG * a, SEG * b);
64 bool seg_overlap(SEG * a, SEG * b);
65 bool seg_left(SEG * a, SEG * b);
66 bool seg_over_left(SEG * a, SEG * b);
67 bool seg_right(SEG * a, SEG * b);
68 bool seg_over_right(SEG * a, SEG * b);
69 SEG *seg_union(SEG * a, SEG * b);
70 SEG *seg_inter(SEG * a, SEG * b);
71 void rt_seg_size(SEG * a, float *sz);
72 float *seg_size(SEG * a);
77 int32 seg_cmp(SEG * a, SEG * b);
78 bool seg_lt(SEG * a, SEG * b);
79 bool seg_le(SEG * a, SEG * b);
80 bool seg_gt(SEG * a, SEG * b);
81 bool seg_ge(SEG * a, SEG * b);
82 bool seg_different(SEG * a, SEG * b);
85 ** Auxiliary funxtions
87 static int restore(char *s, float val, int n);
88 int significant_digits(char *s);
91 /*****************************************************************************
92 * Input/Output functions
93 *****************************************************************************/
98 SEG *result = palloc(sizeof(SEG));
100 seg_scanner_init(str);
102 if (seg_yyparse(result) != 0)
103 seg_yyerror("bogus input");
105 seg_scanner_finish();
119 p = result = (char *) palloc(40);
121 if (seg->l_ext == '>' || seg->l_ext == '<' || seg->l_ext == '~')
122 p += sprintf(p, "%c", seg->l_ext);
124 if (seg->lower == seg->upper && seg->l_ext == seg->u_ext)
127 * indicates that this interval was built by seg_in off a single
130 p += restore(p, seg->lower, seg->l_sigd);
134 if (seg->l_ext != '-')
136 /* print the lower boudary if exists */
137 p += restore(p, seg->lower, seg->l_sigd);
138 p += sprintf(p, " ");
140 p += sprintf(p, "..");
141 if (seg->u_ext != '-')
143 /* print the upper boudary if exists */
144 p += sprintf(p, " ");
145 if (seg->u_ext == '>' || seg->u_ext == '<' || seg->l_ext == '~')
146 p += sprintf(p, "%c", seg->u_ext);
147 p += restore(p, seg->upper, seg->u_sigd);
155 seg_center(SEG * seg)
157 float32 result = (float32) palloc(sizeof(float32data));
160 return (float32) NULL;
162 *result = ((float) seg->lower + (float) seg->upper) / 2.0;
169 float32 result = (float32) palloc(sizeof(float32data));
172 return (float32) NULL;
174 *result = (float) seg->lower;
181 float32 result = (float32) palloc(sizeof(float32data));
184 return (float32) NULL;
186 *result = (float) seg->upper;
191 /*****************************************************************************
193 *****************************************************************************/
196 ** The GiST Consistent method for segments
197 ** Should return false if for all data items x below entry,
198 ** the predicate x op query == FALSE, where op is the oper
199 ** corresponding to strategy in the pg_amop table.
202 gseg_consistent(GISTENTRY *entry,
204 StrategyNumber strategy)
207 * * if entry is not leaf, use gseg_internal_consistent, * else use
208 * gseg_leaf_consistent
210 if (GIST_LEAF(entry))
211 return (gseg_leaf_consistent((SEG *) DatumGetPointer(entry->key), query, strategy));
213 return (gseg_internal_consistent((SEG *) DatumGetPointer(entry->key), query, strategy));
217 ** The GiST Union method for segments
218 ** returns the minimal bounding seg that encloses all the entries in entryvec
221 gseg_union(GistEntryVector *entryvec, int *sizep)
225 SEG *out = (SEG *) NULL;
229 fprintf(stderr, "union\n");
232 numranges = entryvec->n;
233 tmp = (SEG *) DatumGetPointer(entryvec->vector[0].key);
234 *sizep = sizeof(SEG);
236 for (i = 1; i < numranges; i++)
238 out = gseg_binary_union(tmp, (SEG *)
239 DatumGetPointer(entryvec->vector[i].key),
250 ** GiST Compress and Decompress methods for segments
251 ** do not do anything.
254 gseg_compress(GISTENTRY *entry)
260 gseg_decompress(GISTENTRY *entry)
266 ** The GiST Penalty method for segments
267 ** As in the R-tree paper, we use change in area as our penalty metric
270 gseg_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result)
276 ud = seg_union((SEG *) DatumGetPointer(origentry->key),
277 (SEG *) DatumGetPointer(newentry->key));
278 rt_seg_size(ud, &tmp1);
279 rt_seg_size((SEG *) DatumGetPointer(origentry->key), &tmp2);
280 *result = tmp1 - tmp2;
284 fprintf(stderr, "penalty\n");
285 fprintf(stderr, "\t%g\n", *result);
294 ** The GiST PickSplit method for segments
295 ** We use Guttman's poly time split algorithm
298 gseg_picksplit(GistEntryVector *entryvec,
321 OffsetNumber seed_1 = 0,
328 fprintf(stderr, "picksplit\n");
331 maxoff = entryvec->n - 2;
332 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
333 v->spl_left = (OffsetNumber *) palloc(nbytes);
334 v->spl_right = (OffsetNumber *) palloc(nbytes);
339 for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
341 datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key);
342 for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
344 datum_beta = (SEG *) DatumGetPointer(entryvec->vector[j].key);
346 /* compute the wasted space by unioning these guys */
347 /* size_waste = size_union - size_inter; */
348 union_d = seg_union(datum_alpha, datum_beta);
349 rt_seg_size(union_d, &size_union);
350 inter_d = seg_inter(datum_alpha, datum_beta);
351 rt_seg_size(inter_d, &size_inter);
352 size_waste = size_union - size_inter;
356 if (inter_d != (SEG *) NULL)
360 * are these a more promising split that what we've already
364 if (size_waste > waste || firsttime)
376 right = v->spl_right;
379 datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[seed_1].key);
380 datum_l = seg_union(datum_alpha, datum_alpha);
381 rt_seg_size(datum_l, &size_l);
382 datum_beta = (SEG *) DatumGetPointer(entryvec->vector[seed_2].key);
383 datum_r = seg_union(datum_beta, datum_beta);
384 rt_seg_size(datum_r, &size_r);
387 * Now split up the regions between the two seeds. An important
388 * property of this split algorithm is that the split vector v has the
389 * indices of items to be split in order in its left and right
390 * vectors. We exploit this property by doing a merge in the code
391 * that actually splits the page.
393 * For efficiency, we also place the new index tuple in this loop. This
394 * is handled at the very end, when we have placed all the existing
395 * tuples and i == maxoff + 1.
398 maxoff = OffsetNumberNext(maxoff);
399 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
402 * If we've already decided where to place this item, just put it
403 * on the right list. Otherwise, we need to figure out which page
404 * needs the least enlargement in order to store the item.
413 else if (i == seed_2)
420 /* okay, which page needs least enlargement? */
421 datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key);
422 union_dl = seg_union(datum_l, datum_alpha);
423 union_dr = seg_union(datum_r, datum_alpha);
424 rt_seg_size(union_dl, &size_alpha);
425 rt_seg_size(union_dr, &size_beta);
427 /* pick which page to add it to */
428 if (size_alpha - size_l < size_beta - size_r)
447 *left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */
449 v->spl_ldatum = PointerGetDatum(datum_l);
450 v->spl_rdatum = PointerGetDatum(datum_r);
459 gseg_same(SEG * b1, SEG * b2, bool *result)
461 if (seg_same(b1, b2))
467 fprintf(stderr, "same: %s\n", (*result ? "TRUE" : "FALSE"));
477 gseg_leaf_consistent(SEG * key,
479 StrategyNumber strategy)
483 #ifdef GIST_QUERY_DEBUG
484 fprintf(stderr, "leaf_consistent, %d\n", strategy);
489 case RTLeftStrategyNumber:
490 retval = (bool) seg_left(key, query);
492 case RTOverLeftStrategyNumber:
493 retval = (bool) seg_over_left(key, query);
495 case RTOverlapStrategyNumber:
496 retval = (bool) seg_overlap(key, query);
498 case RTOverRightStrategyNumber:
499 retval = (bool) seg_over_right(key, query);
501 case RTRightStrategyNumber:
502 retval = (bool) seg_right(key, query);
504 case RTSameStrategyNumber:
505 retval = (bool) seg_same(key, query);
507 case RTContainsStrategyNumber:
508 retval = (bool) seg_contains(key, query);
510 case RTContainedByStrategyNumber:
511 retval = (bool) seg_contained(key, query);
520 gseg_internal_consistent(SEG * key,
522 StrategyNumber strategy)
526 #ifdef GIST_QUERY_DEBUG
527 fprintf(stderr, "internal_consistent, %d\n", strategy);
532 case RTLeftStrategyNumber:
533 case RTOverLeftStrategyNumber:
534 retval = (bool) seg_over_left(key, query);
536 case RTOverlapStrategyNumber:
537 retval = (bool) seg_overlap(key, query);
539 case RTOverRightStrategyNumber:
540 case RTRightStrategyNumber:
541 retval = (bool) seg_right(key, query);
543 case RTSameStrategyNumber:
544 case RTContainsStrategyNumber:
545 retval = (bool) seg_contains(key, query);
547 case RTContainedByStrategyNumber:
548 retval = (bool) seg_overlap(key, query);
557 gseg_binary_union(SEG * r1, SEG * r2, int *sizep)
561 retval = seg_union(r1, r2);
562 *sizep = sizeof(SEG);
569 seg_contains(SEG * a, SEG * b)
571 return ((a->lower <= b->lower) && (a->upper >= b->upper));
575 seg_contained(SEG * a, SEG * b)
577 return (seg_contains(b, a));
580 /*****************************************************************************
581 * Operator class for R-tree indexing
582 *****************************************************************************/
585 seg_same(SEG * a, SEG * b)
587 return seg_cmp(a, b) == 0;
590 /* seg_overlap -- does a overlap b?
593 seg_overlap(SEG * a, SEG * b)
596 ((a->upper >= b->upper) && (a->lower <= b->upper))
598 ((b->upper >= a->upper) && (b->lower <= a->upper))
602 /* seg_overleft -- is the right edge of (a) located to the left of the right edge of (b)?
605 seg_over_left(SEG * a, SEG * b)
607 return (a->upper <= b->upper && !seg_left(a, b) && !seg_right(a, b));
610 /* seg_left -- is (a) entirely on the left of (b)?
613 seg_left(SEG * a, SEG * b)
615 return (a->upper < b->lower);
618 /* seg_right -- is (a) entirely on the right of (b)?
621 seg_right(SEG * a, SEG * b)
623 return (a->lower > b->upper);
626 /* seg_overright -- is the left edge of (a) located to the right of the left edge of (b)?
629 seg_over_right(SEG * a, SEG * b)
631 return (a->lower >= b->lower && !seg_left(a, b) && !seg_right(a, b));
636 seg_union(SEG * a, SEG * b)
640 n = (SEG *) palloc(sizeof(*n));
642 /* take max of upper endpoints */
643 if (a->upper > b->upper)
646 n->u_sigd = a->u_sigd;
652 n->u_sigd = b->u_sigd;
656 /* take min of lower endpoints */
657 if (a->lower < b->lower)
660 n->l_sigd = a->l_sigd;
666 n->l_sigd = b->l_sigd;
675 seg_inter(SEG * a, SEG * b)
679 n = (SEG *) palloc(sizeof(*n));
681 /* take min of upper endpoints */
682 if (a->upper < b->upper)
685 n->u_sigd = a->u_sigd;
691 n->u_sigd = b->u_sigd;
695 /* take max of lower endpoints */
696 if (a->lower > b->lower)
699 n->l_sigd = a->l_sigd;
705 n->l_sigd = b->l_sigd;
713 rt_seg_size(SEG * a, float *size)
715 if (a == (SEG *) NULL || a->upper <= a->lower)
718 *size = (float) Abs(a->upper - a->lower);
728 result = (float *) palloc(sizeof(float));
730 *result = (float) Abs(a->upper - a->lower);
736 /*****************************************************************************
737 * Miscellaneous operators
738 *****************************************************************************/
740 seg_cmp(SEG * a, SEG * b)
743 * First compare on lower boundary position
745 if (a->lower < b->lower)
747 if (a->lower > b->lower)
751 * a->lower == b->lower, so consider type of boundary.
753 * A '-' lower bound is < any other kind (this could only be relevant if
754 * -HUGE_VAL is used as a regular data value). A '<' lower bound is <
755 * any other kind except '-'. A '>' lower bound is > any other kind.
757 if (a->l_ext != b->l_ext)
774 * For other boundary types, consider # of significant digits first.
776 if (a->l_sigd < b->l_sigd) /* (a) is blurred and is likely to include
779 if (a->l_sigd > b->l_sigd) /* (a) is less blurred and is likely to be
784 * For same # of digits, an approximate boundary is more blurred than
787 if (a->l_ext != b->l_ext)
789 if (a->l_ext == '~') /* (a) is approximate, while (b) is exact */
793 /* can't get here unless data is corrupt */
794 elog(ERROR, "bogus lower boundary types %d %d",
795 (int) a->l_ext, (int) b->l_ext);
798 /* at this point, the lower boundaries are identical */
801 * First compare on upper boundary position
803 if (a->upper < b->upper)
805 if (a->upper > b->upper)
809 * a->upper == b->upper, so consider type of boundary.
811 * A '-' upper bound is > any other kind (this could only be relevant if
812 * HUGE_VAL is used as a regular data value). A '<' upper bound is <
813 * any other kind. A '>' upper bound is > any other kind except '-'.
815 if (a->u_ext != b->u_ext)
832 * For other boundary types, consider # of significant digits first.
833 * Note result here is converse of the lower-boundary case.
835 if (a->u_sigd < b->u_sigd) /* (a) is blurred and is likely to include
838 if (a->u_sigd > b->u_sigd) /* (a) is less blurred and is likely to be
843 * For same # of digits, an approximate boundary is more blurred than
844 * exact. Again, result is converse of lower-boundary case.
846 if (a->u_ext != b->u_ext)
848 if (a->u_ext == '~') /* (a) is approximate, while (b) is exact */
852 /* can't get here unless data is corrupt */
853 elog(ERROR, "bogus upper boundary types %d %d",
854 (int) a->u_ext, (int) b->u_ext);
861 seg_lt(SEG * a, SEG * b)
863 return seg_cmp(a, b) < 0;
867 seg_le(SEG * a, SEG * b)
869 return seg_cmp(a, b) <= 0;
873 seg_gt(SEG * a, SEG * b)
875 return seg_cmp(a, b) > 0;
879 seg_ge(SEG * a, SEG * b)
881 return seg_cmp(a, b) >= 0;
885 seg_different(SEG * a, SEG * b)
887 return seg_cmp(a, b) != 0;
892 /*****************************************************************************
893 * Auxiliary functions
894 *****************************************************************************/
896 /* The purpose of this routine is to print the floating point
897 * value with exact number of significant digits. Its behaviour
898 * is similar to %.ng except it prints 8.00 where %.ng would
902 restore(char *result, float val, int n)
904 static char efmt[8] = {'%', '-', '1', '5', '.', '#', 'e', 0};
906 '0', '0', '0', '0', '0',
907 '0', '0', '0', '0', '0',
908 '0', '0', '0', '0', '0',
909 '0', '0', '0', '0', '0',
910 '0', '0', '0', '0', '\0'
920 * put a cap on the number of siugnificant digits to avoid nonsense in
925 /* remember the sign */
926 sign = (val < 0 ? 1 : 0);
928 efmt[5] = '0' + (n - 1) % 10; /* makes %-15.(n-1)e -- this
929 * format guarantees that the
930 * exponent is always present */
932 sprintf(result, efmt, val);
934 /* trim the spaces left by the %e */
935 for (p = result; *p != ' '; p++);
938 /* get the exponent */
939 mant = (char *) strtok(strdup(result), "e");
940 exp = atoi(strtok(NULL, "e"));
944 /* use the supplied mantyssa with sign */
945 strcpy((char *) strchr(result, 'e'), "");
952 * remove the decimal point from the mantyssa and write the
953 * digits to the buf array
955 for (p = result + sign, i = 10, dp = 0; *p != 'e'; p++, i++)
960 dp = i--; /* skip the decimal point */
964 dp = i--; /* no decimal point was found in the above
969 if (dp - 10 + exp >= n)
972 * the decimal point is behind the last significant
973 * digit; the digits in between must be converted to
974 * the exponent and the decimal point placed after the
977 exp = dp - 10 + exp - n;
980 /* insert the decimal point */
984 for (i = 23; i > dp; i--)
990 * adjust the exponent by the number of digits after
994 sprintf(&buf[11 + n], "e%d", exp + n - 1);
996 sprintf(&buf[11], "e%d", exp + n - 1);
1001 strcpy(result, &buf[9]);
1004 strcpy(result, &buf[10]);
1007 { /* insert the decimal point */
1009 for (i = 23; i > dp; i--)
1010 buf[i] = buf[i - 1];
1016 strcpy(result, &buf[9]);
1019 strcpy(result, &buf[10]);
1030 strcpy(result, &buf[dp - 2]);
1033 strcpy(result, &buf[dp - 1]);
1037 /* do nothing for Abs(exp) > 4; %e must be OK */
1038 /* just get rid of zeroes after [eE]- and +zeroes after [Ee]. */
1040 /* ... this is not done yet. */
1042 return (strlen(result));
1051 seg_contains_int(SEG * a, int *b)
1053 return ((a->lower <= *b) && (a->upper >= *b));
1057 seg_contains_float4(SEG * a, float4 *b)
1059 return ((a->lower <= *b) && (a->upper >= *b));
1063 seg_contains_float8(SEG * a, float8 *b)
1065 return ((a->lower <= *b) && (a->upper >= *b));
1068 /* find out the number of significant digits in a string representing
1069 * a floating point number
1072 significant_digits(char *s)
1080 /* skip leading zeroes and sign */
1081 for (c = *p; (c == '0' || c == '+' || c == '-') && c != 0; c = *(++p));
1083 /* skip decimal point and following zeroes */
1084 for (c = *p; (c == '0' || c == '.') && c != 0; c = *(++p))
1090 /* count significant digits (n) */
1091 for (c = *p, n = 0; c != 0; c = *(++p))
1093 if (!((c >= '0' && c <= '9') || (c == '.')))