4 ******************************************************************************
5 This file contains routines that can be bound to a Postgres backend and
6 called by the backend in the process of processing queries. The calling
7 format for these routines is dictated by Postgres architecture.
8 ******************************************************************************/
14 #include "access/gist.h"
15 #include "access/stratnum.h"
22 #define GIST_QUERY_DEBUG
28 * Auxiliary data structure for picksplit method.
35 } gseg_picksplit_item;
38 ** Input/Output routines
40 PG_FUNCTION_INFO_V1(seg_in);
41 PG_FUNCTION_INFO_V1(seg_out);
42 PG_FUNCTION_INFO_V1(seg_size);
43 PG_FUNCTION_INFO_V1(seg_lower);
44 PG_FUNCTION_INFO_V1(seg_upper);
45 PG_FUNCTION_INFO_V1(seg_center);
48 ** GiST support methods
50 bool gseg_consistent(GISTENTRY *entry,
52 StrategyNumber strategy,
55 GISTENTRY *gseg_compress(GISTENTRY *entry);
56 GISTENTRY *gseg_decompress(GISTENTRY *entry);
57 float *gseg_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result);
58 GIST_SPLITVEC *gseg_picksplit(GistEntryVector *entryvec, GIST_SPLITVEC *v);
59 bool gseg_leaf_consistent(SEG *key, SEG *query, StrategyNumber strategy);
60 bool gseg_internal_consistent(SEG *key, SEG *query, StrategyNumber strategy);
61 SEG *gseg_union(GistEntryVector *entryvec, int *sizep);
62 SEG *gseg_binary_union(SEG *r1, SEG *r2, int *sizep);
63 bool *gseg_same(SEG *b1, SEG *b2, bool *result);
67 ** R-tree support functions
69 bool seg_same(SEG *a, SEG *b);
70 bool seg_contains_int(SEG *a, int *b);
71 bool seg_contains_float4(SEG *a, float4 *b);
72 bool seg_contains_float8(SEG *a, float8 *b);
73 bool seg_contains(SEG *a, SEG *b);
74 bool seg_contained(SEG *a, SEG *b);
75 bool seg_overlap(SEG *a, SEG *b);
76 bool seg_left(SEG *a, SEG *b);
77 bool seg_over_left(SEG *a, SEG *b);
78 bool seg_right(SEG *a, SEG *b);
79 bool seg_over_right(SEG *a, SEG *b);
80 SEG *seg_union(SEG *a, SEG *b);
81 SEG *seg_inter(SEG *a, SEG *b);
82 void rt_seg_size(SEG *a, float *sz);
87 int32 seg_cmp(SEG *a, SEG *b);
88 bool seg_lt(SEG *a, SEG *b);
89 bool seg_le(SEG *a, SEG *b);
90 bool seg_gt(SEG *a, SEG *b);
91 bool seg_ge(SEG *a, SEG *b);
92 bool seg_different(SEG *a, SEG *b);
95 ** Auxiliary funxtions
97 static int restore(char *s, float val, int n);
100 /*****************************************************************************
101 * Input/Output functions
102 *****************************************************************************/
105 seg_in(PG_FUNCTION_ARGS)
107 char *str = PG_GETARG_CSTRING(0);
108 SEG *result = palloc(sizeof(SEG));
110 seg_scanner_init(str);
112 if (seg_yyparse(result) != 0)
113 seg_yyerror(result, "bogus input");
115 seg_scanner_finish();
117 PG_RETURN_POINTER(result);
121 seg_out(PG_FUNCTION_ARGS)
123 SEG *seg = (SEG *) PG_GETARG_POINTER(0);
127 p = result = (char *) palloc(40);
129 if (seg->l_ext == '>' || seg->l_ext == '<' || seg->l_ext == '~')
130 p += sprintf(p, "%c", seg->l_ext);
132 if (seg->lower == seg->upper && seg->l_ext == seg->u_ext)
135 * indicates that this interval was built by seg_in off a single point
137 p += restore(p, seg->lower, seg->l_sigd);
141 if (seg->l_ext != '-')
143 /* print the lower boundary if exists */
144 p += restore(p, seg->lower, seg->l_sigd);
145 p += sprintf(p, " ");
147 p += sprintf(p, "..");
148 if (seg->u_ext != '-')
150 /* print the upper boundary if exists */
151 p += sprintf(p, " ");
152 if (seg->u_ext == '>' || seg->u_ext == '<' || seg->l_ext == '~')
153 p += sprintf(p, "%c", seg->u_ext);
154 p += restore(p, seg->upper, seg->u_sigd);
158 PG_RETURN_CSTRING(result);
162 seg_center(PG_FUNCTION_ARGS)
164 SEG *seg = (SEG *) PG_GETARG_POINTER(0);
166 PG_RETURN_FLOAT4(((float) seg->lower + (float) seg->upper) / 2.0);
170 seg_lower(PG_FUNCTION_ARGS)
172 SEG *seg = (SEG *) PG_GETARG_POINTER(0);
174 PG_RETURN_FLOAT4(seg->lower);
178 seg_upper(PG_FUNCTION_ARGS)
180 SEG *seg = (SEG *) PG_GETARG_POINTER(0);
182 PG_RETURN_FLOAT4(seg->upper);
186 /*****************************************************************************
188 *****************************************************************************/
191 ** The GiST Consistent method for segments
192 ** Should return false if for all data items x below entry,
193 ** the predicate x op query == FALSE, where op is the oper
194 ** corresponding to strategy in the pg_amop table.
197 gseg_consistent(GISTENTRY *entry,
199 StrategyNumber strategy,
203 /* All cases served by this function are exact */
207 * if entry is not leaf, use gseg_internal_consistent, else use
208 * gseg_leaf_consistent
210 if (GIST_LEAF(entry))
211 return (gseg_leaf_consistent((SEG *) DatumGetPointer(entry->key), query, strategy));
213 return (gseg_internal_consistent((SEG *) DatumGetPointer(entry->key), query, strategy));
217 ** The GiST Union method for segments
218 ** returns the minimal bounding seg that encloses all the entries in entryvec
221 gseg_union(GistEntryVector *entryvec, int *sizep)
225 SEG *out = (SEG *) NULL;
229 fprintf(stderr, "union\n");
232 numranges = entryvec->n;
233 tmp = (SEG *) DatumGetPointer(entryvec->vector[0].key);
234 *sizep = sizeof(SEG);
236 for (i = 1; i < numranges; i++)
238 out = gseg_binary_union(tmp, (SEG *)
239 DatumGetPointer(entryvec->vector[i].key),
248 ** GiST Compress and Decompress methods for segments
249 ** do not do anything.
252 gseg_compress(GISTENTRY *entry)
258 gseg_decompress(GISTENTRY *entry)
264 ** The GiST Penalty method for segments
265 ** As in the R-tree paper, we use change in area as our penalty metric
268 gseg_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result)
274 ud = seg_union((SEG *) DatumGetPointer(origentry->key),
275 (SEG *) DatumGetPointer(newentry->key));
276 rt_seg_size(ud, &tmp1);
277 rt_seg_size((SEG *) DatumGetPointer(origentry->key), &tmp2);
278 *result = tmp1 - tmp2;
281 fprintf(stderr, "penalty\n");
282 fprintf(stderr, "\t%g\n", *result);
289 * Compare function for gseg_picksplit_item: sort by center.
292 gseg_picksplit_item_cmp(const void *a, const void *b)
294 const gseg_picksplit_item *i1 = (const gseg_picksplit_item *) a;
295 const gseg_picksplit_item *i2 = (const gseg_picksplit_item *) b;
297 if (i1->center < i2->center)
299 else if (i1->center == i2->center)
306 * The GiST PickSplit method for segments
308 * We used to use Guttman's split algorithm here, but since the data is 1-D
309 * it's easier and more robust to just sort the segments by center-point and
310 * split at the middle.
313 gseg_picksplit(GistEntryVector *entryvec,
320 gseg_picksplit_item *sort_items;
324 OffsetNumber firstright;
327 fprintf(stderr, "picksplit\n");
330 /* Valid items in entryvec->vector[] are indexed 1..maxoff */
331 maxoff = entryvec->n - 1;
334 * Prepare the auxiliary array and sort it.
336 sort_items = (gseg_picksplit_item *)
337 palloc(maxoff * sizeof(gseg_picksplit_item));
338 for (i = 1; i <= maxoff; i++)
340 seg = (SEG *) DatumGetPointer(entryvec->vector[i].key);
341 /* center calculation is done this way to avoid possible overflow */
342 sort_items[i - 1].center = seg->lower * 0.5f + seg->upper * 0.5f;
343 sort_items[i - 1].index = i;
344 sort_items[i - 1].data = seg;
346 qsort(sort_items, maxoff, sizeof(gseg_picksplit_item),
347 gseg_picksplit_item_cmp);
349 /* sort items below "firstright" will go into the left side */
350 firstright = maxoff / 2;
352 v->spl_left = (OffsetNumber *) palloc(maxoff * sizeof(OffsetNumber));
353 v->spl_right = (OffsetNumber *) palloc(maxoff * sizeof(OffsetNumber));
356 right = v->spl_right;
360 * Emit segments to the left output page, and compute its bounding box.
362 datum_l = (SEG *) palloc(sizeof(SEG));
363 memcpy(datum_l, sort_items[0].data, sizeof(SEG));
364 *left++ = sort_items[0].index;
366 for (i = 1; i < firstright; i++)
368 datum_l = seg_union(datum_l, sort_items[i].data);
369 *left++ = sort_items[i].index;
374 * Likewise for the right page.
376 datum_r = (SEG *) palloc(sizeof(SEG));
377 memcpy(datum_r, sort_items[firstright].data, sizeof(SEG));
378 *right++ = sort_items[firstright].index;
380 for (i = firstright + 1; i < maxoff; i++)
382 datum_r = seg_union(datum_r, sort_items[i].data);
383 *right++ = sort_items[i].index;
387 v->spl_ldatum = PointerGetDatum(datum_l);
388 v->spl_rdatum = PointerGetDatum(datum_r);
397 gseg_same(SEG *b1, SEG *b2, bool *result)
399 if (seg_same(b1, b2))
405 fprintf(stderr, "same: %s\n", (*result ? "TRUE" : "FALSE"));
415 gseg_leaf_consistent(SEG *key,
417 StrategyNumber strategy)
421 #ifdef GIST_QUERY_DEBUG
422 fprintf(stderr, "leaf_consistent, %d\n", strategy);
427 case RTLeftStrategyNumber:
428 retval = (bool) seg_left(key, query);
430 case RTOverLeftStrategyNumber:
431 retval = (bool) seg_over_left(key, query);
433 case RTOverlapStrategyNumber:
434 retval = (bool) seg_overlap(key, query);
436 case RTOverRightStrategyNumber:
437 retval = (bool) seg_over_right(key, query);
439 case RTRightStrategyNumber:
440 retval = (bool) seg_right(key, query);
442 case RTSameStrategyNumber:
443 retval = (bool) seg_same(key, query);
445 case RTContainsStrategyNumber:
446 case RTOldContainsStrategyNumber:
447 retval = (bool) seg_contains(key, query);
449 case RTContainedByStrategyNumber:
450 case RTOldContainedByStrategyNumber:
451 retval = (bool) seg_contained(key, query);
460 gseg_internal_consistent(SEG *key,
462 StrategyNumber strategy)
466 #ifdef GIST_QUERY_DEBUG
467 fprintf(stderr, "internal_consistent, %d\n", strategy);
472 case RTLeftStrategyNumber:
473 retval = (bool) !seg_over_right(key, query);
475 case RTOverLeftStrategyNumber:
476 retval = (bool) !seg_right(key, query);
478 case RTOverlapStrategyNumber:
479 retval = (bool) seg_overlap(key, query);
481 case RTOverRightStrategyNumber:
482 retval = (bool) !seg_left(key, query);
484 case RTRightStrategyNumber:
485 retval = (bool) !seg_over_left(key, query);
487 case RTSameStrategyNumber:
488 case RTContainsStrategyNumber:
489 case RTOldContainsStrategyNumber:
490 retval = (bool) seg_contains(key, query);
492 case RTContainedByStrategyNumber:
493 case RTOldContainedByStrategyNumber:
494 retval = (bool) seg_overlap(key, query);
503 gseg_binary_union(SEG *r1, SEG *r2, int *sizep)
507 retval = seg_union(r1, r2);
508 *sizep = sizeof(SEG);
515 seg_contains(SEG *a, SEG *b)
517 return ((a->lower <= b->lower) && (a->upper >= b->upper));
521 seg_contained(SEG *a, SEG *b)
523 return (seg_contains(b, a));
526 /*****************************************************************************
527 * Operator class for R-tree indexing
528 *****************************************************************************/
531 seg_same(SEG *a, SEG *b)
533 return seg_cmp(a, b) == 0;
536 /* seg_overlap -- does a overlap b?
539 seg_overlap(SEG *a, SEG *b)
542 ((a->upper >= b->upper) && (a->lower <= b->upper))
544 ((b->upper >= a->upper) && (b->lower <= a->upper))
548 /* seg_overleft -- is the right edge of (a) located at or left of the right edge of (b)?
551 seg_over_left(SEG *a, SEG *b)
553 return (a->upper <= b->upper);
556 /* seg_left -- is (a) entirely on the left of (b)?
559 seg_left(SEG *a, SEG *b)
561 return (a->upper < b->lower);
564 /* seg_right -- is (a) entirely on the right of (b)?
567 seg_right(SEG *a, SEG *b)
569 return (a->lower > b->upper);
572 /* seg_overright -- is the left edge of (a) located at or right of the left edge of (b)?
575 seg_over_right(SEG *a, SEG *b)
577 return (a->lower >= b->lower);
582 seg_union(SEG *a, SEG *b)
586 n = (SEG *) palloc(sizeof(*n));
588 /* take max of upper endpoints */
589 if (a->upper > b->upper)
592 n->u_sigd = a->u_sigd;
598 n->u_sigd = b->u_sigd;
602 /* take min of lower endpoints */
603 if (a->lower < b->lower)
606 n->l_sigd = a->l_sigd;
612 n->l_sigd = b->l_sigd;
621 seg_inter(SEG *a, SEG *b)
625 n = (SEG *) palloc(sizeof(*n));
627 /* take min of upper endpoints */
628 if (a->upper < b->upper)
631 n->u_sigd = a->u_sigd;
637 n->u_sigd = b->u_sigd;
641 /* take max of lower endpoints */
642 if (a->lower > b->lower)
645 n->l_sigd = a->l_sigd;
651 n->l_sigd = b->l_sigd;
659 rt_seg_size(SEG *a, float *size)
661 if (a == (SEG *) NULL || a->upper <= a->lower)
664 *size = (float) Abs(a->upper - a->lower);
670 seg_size(PG_FUNCTION_ARGS)
672 SEG *seg = (SEG *) PG_GETARG_POINTER(0);
674 PG_RETURN_FLOAT4((float) Abs(seg->upper - seg->lower));
678 /*****************************************************************************
679 * Miscellaneous operators
680 *****************************************************************************/
682 seg_cmp(SEG *a, SEG *b)
685 * First compare on lower boundary position
687 if (a->lower < b->lower)
689 if (a->lower > b->lower)
693 * a->lower == b->lower, so consider type of boundary.
695 * A '-' lower bound is < any other kind (this could only be relevant if
696 * -HUGE_VAL is used as a regular data value). A '<' lower bound is < any
697 * other kind except '-'. A '>' lower bound is > any other kind.
699 if (a->l_ext != b->l_ext)
716 * For other boundary types, consider # of significant digits first.
718 if (a->l_sigd < b->l_sigd) /* (a) is blurred and is likely to include (b) */
720 if (a->l_sigd > b->l_sigd) /* (a) is less blurred and is likely to be
725 * For same # of digits, an approximate boundary is more blurred than
728 if (a->l_ext != b->l_ext)
730 if (a->l_ext == '~') /* (a) is approximate, while (b) is exact */
734 /* can't get here unless data is corrupt */
735 elog(ERROR, "bogus lower boundary types %d %d",
736 (int) a->l_ext, (int) b->l_ext);
739 /* at this point, the lower boundaries are identical */
742 * First compare on upper boundary position
744 if (a->upper < b->upper)
746 if (a->upper > b->upper)
750 * a->upper == b->upper, so consider type of boundary.
752 * A '-' upper bound is > any other kind (this could only be relevant if
753 * HUGE_VAL is used as a regular data value). A '<' upper bound is < any
754 * other kind. A '>' upper bound is > any other kind except '-'.
756 if (a->u_ext != b->u_ext)
773 * For other boundary types, consider # of significant digits first. Note
774 * result here is converse of the lower-boundary case.
776 if (a->u_sigd < b->u_sigd) /* (a) is blurred and is likely to include (b) */
778 if (a->u_sigd > b->u_sigd) /* (a) is less blurred and is likely to be
783 * For same # of digits, an approximate boundary is more blurred than
784 * exact. Again, result is converse of lower-boundary case.
786 if (a->u_ext != b->u_ext)
788 if (a->u_ext == '~') /* (a) is approximate, while (b) is exact */
792 /* can't get here unless data is corrupt */
793 elog(ERROR, "bogus upper boundary types %d %d",
794 (int) a->u_ext, (int) b->u_ext);
801 seg_lt(SEG *a, SEG *b)
803 return seg_cmp(a, b) < 0;
807 seg_le(SEG *a, SEG *b)
809 return seg_cmp(a, b) <= 0;
813 seg_gt(SEG *a, SEG *b)
815 return seg_cmp(a, b) > 0;
819 seg_ge(SEG *a, SEG *b)
821 return seg_cmp(a, b) >= 0;
825 seg_different(SEG *a, SEG *b)
827 return seg_cmp(a, b) != 0;
832 /*****************************************************************************
833 * Auxiliary functions
834 *****************************************************************************/
836 /* The purpose of this routine is to print the floating point
837 * value with exact number of significant digits. Its behaviour
838 * is similar to %.ng except it prints 8.00 where %.ng would
842 restore(char *result, float val, int n)
844 static char efmt[8] = {'%', '-', '1', '5', '.', '#', 'e', 0};
846 '0', '0', '0', '0', '0',
847 '0', '0', '0', '0', '0',
848 '0', '0', '0', '0', '0',
849 '0', '0', '0', '0', '0',
850 '0', '0', '0', '0', '\0'
859 * put a cap on the number of siugnificant digits to avoid nonsense in the
864 /* remember the sign */
865 sign = (val < 0 ? 1 : 0);
867 efmt[5] = '0' + (n - 1) % 10; /* makes %-15.(n-1)e -- this format
868 * guarantees that the exponent is
871 sprintf(result, efmt, val);
873 /* trim the spaces left by the %e */
874 for (p = result; *p != ' '; p++);
877 /* get the exponent */
878 strtok(pstrdup(result), "e");
879 exp = atoi(strtok(NULL, "e"));
883 /* use the supplied mantyssa with sign */
884 strcpy((char *) strchr(result, 'e'), "");
891 * remove the decimal point from the mantyssa and write the digits
894 for (p = result + sign, i = 10, dp = 0; *p != 'e'; p++, i++)
899 dp = i--; /* skip the decimal point */
903 dp = i--; /* no decimal point was found in the above
908 if (dp - 10 + exp >= n)
911 * the decimal point is behind the last significant digit;
912 * the digits in between must be converted to the exponent
913 * and the decimal point placed after the first digit
915 exp = dp - 10 + exp - n;
918 /* insert the decimal point */
922 for (i = 23; i > dp; i--)
928 * adjust the exponent by the number of digits after the
932 sprintf(&buf[11 + n], "e%d", exp + n - 1);
934 sprintf(&buf[11], "e%d", exp + n - 1);
939 strcpy(result, &buf[9]);
942 strcpy(result, &buf[10]);
945 { /* insert the decimal point */
947 for (i = 23; i > dp; i--)
954 strcpy(result, &buf[9]);
957 strcpy(result, &buf[10]);
968 strcpy(result, &buf[dp - 2]);
971 strcpy(result, &buf[dp - 1]);
975 /* do nothing for Abs(exp) > 4; %e must be OK */
976 /* just get rid of zeroes after [eE]- and +zeroes after [Ee]. */
978 /* ... this is not done yet. */
980 return (strlen(result));
989 seg_contains_int(SEG *a, int *b)
991 return ((a->lower <= *b) && (a->upper >= *b));
995 seg_contains_float4(SEG *a, float4 *b)
997 return ((a->lower <= *b) && (a->upper >= *b));
1001 seg_contains_float8(SEG *a, float8 *b)
1003 return ((a->lower <= *b) && (a->upper >= *b));
1006 /* find out the number of significant digits in a string representing
1007 * a floating point number
1010 significant_digits(char *s)
1018 /* skip leading zeroes and sign */
1019 for (c = *p; (c == '0' || c == '+' || c == '-') && c != 0; c = *(++p));
1021 /* skip decimal point and following zeroes */
1022 for (c = *p; (c == '0' || c == '.') && c != 0; c = *(++p))
1028 /* count significant digits (n) */
1029 for (c = *p, n = 0; c != 0; c = *(++p))
1031 if (!((c >= '0' && c <= '9') || (c == '.')))