1 /******************************************************************************
2 This file contains routines that can be bound to a Postgres backend and
3 called by the backend in the process of processing queries. The calling
4 format for these routines is dictated by Postgres architecture.
5 ******************************************************************************/
11 #include "access/gist.h"
12 #include "access/rtree.h"
13 #include "utils/elog.h"
14 #include "utils/palloc.h"
15 #include "utils/builtins.h"
19 #define max(a,b) ((a) > (b) ? (a) : (b))
20 #define min(a,b) ((a) <= (b) ? (a) : (b))
21 #define abs(a) ((a) < (0) ? (-a) : (a))
23 extern void set_parse_buffer(char *str);
24 extern int cube_yyparse();
27 ** Input/Output routines
29 NDBOX *cube_in(char *str);
30 char *cube_out(NDBOX * cube);
34 ** GiST support methods
36 bool g_cube_consistent(GISTENTRY *entry, NDBOX * query, StrategyNumber strategy);
37 GISTENTRY *g_cube_compress(GISTENTRY *entry);
38 GISTENTRY *g_cube_decompress(GISTENTRY *entry);
39 float *g_cube_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result);
40 GIST_SPLITVEC *g_cube_picksplit(bytea *entryvec, GIST_SPLITVEC *v);
41 bool g_cube_leaf_consistent(NDBOX * key, NDBOX * query, StrategyNumber strategy);
42 bool g_cube_internal_consistent(NDBOX * key, NDBOX * query, StrategyNumber strategy);
43 NDBOX *g_cube_union(bytea *entryvec, int *sizep);
44 NDBOX *g_cube_binary_union(NDBOX * r1, NDBOX * r2, int *sizep);
45 bool *g_cube_same(NDBOX * b1, NDBOX * b2, bool *result);
48 ** R-tree support functions
50 bool cube_same(NDBOX * a, NDBOX * b);
51 bool cube_different(NDBOX * a, NDBOX * b);
52 bool cube_contains(NDBOX * a, NDBOX * b);
53 bool cube_contained(NDBOX * a, NDBOX * b);
54 bool cube_overlap(NDBOX * a, NDBOX * b);
55 NDBOX *cube_union(NDBOX * a, NDBOX * b);
56 NDBOX *cube_inter(NDBOX * a, NDBOX * b);
57 float *cube_size(NDBOX * a);
58 void rt_cube_size(NDBOX * a, float *sz);
61 ** These make no sense for this type, but R-tree wants them
63 bool cube_over_left(NDBOX * a, NDBOX * b);
64 bool cube_over_right(NDBOX * a, NDBOX * b);
65 bool cube_left(NDBOX * a, NDBOX * b);
66 bool cube_right(NDBOX * a, NDBOX * b);
71 bool cube_lt(NDBOX * a, NDBOX * b);
72 bool cube_gt(NDBOX * a, NDBOX * b);
73 float *cube_distance(NDBOX * a, NDBOX * b);
76 ** Auxiliary funxtions
78 static float distance_1D(float a1, float a2, float b1, float b2);
79 static NDBOX *swap_corners(NDBOX * a);
82 /*****************************************************************************
83 * Input/Output functions
84 *****************************************************************************/
86 /* NdBox = [(lowerleft),(upperright)] */
87 /* [(xLL(1)...xLL(N)),(xUR(1)...xUR(n))] */
93 set_parse_buffer(str);
95 if (cube_yyparse(&result) != 0)
98 return ((NDBOX *) result);
102 * You might have noticed a slight inconsistency between the following
103 * declaration and the SQL definition:
104 * CREATE FUNCTION cube_out(opaque) RETURNS opaque ...
105 * The reason is that the argument pass into cube_out is really just a
106 * pointer. POSTGRES thinks all output functions are:
107 * char *out_func(char *);
110 cube_out(NDBOX * cube)
121 p = result = (char *) palloc(100);
124 * while printing the first (LL) corner, check if it is equal to the
127 p += sprintf(p, "(");
128 for (i = 0; i < dim; i++)
130 p += sprintf(p, "%g", cube->x[i]);
131 p += sprintf(p, ", ");
132 if (cube->x[i] != cube->x[i + dim])
135 p -= 2; /* get rid of the last ", " */
136 p += sprintf(p, ")");
140 p += sprintf(p, ",(");
141 for (i = dim; i < dim * 2; i++)
143 p += sprintf(p, "%g", cube->x[i]);
144 p += sprintf(p, ", ");
147 p += sprintf(p, ")");
154 /*****************************************************************************
156 *****************************************************************************/
159 ** The GiST Consistent method for boxes
160 ** Should return false if for all data items x below entry,
161 ** the predicate x op query == FALSE, where op is the oper
162 ** corresponding to strategy in the pg_amop table.
165 g_cube_consistent(GISTENTRY *entry,
167 StrategyNumber strategy)
171 * if entry is not leaf, use g_cube_internal_consistent, else use
172 * g_cube_leaf_consistent
174 if (GIST_LEAF(entry))
175 return g_cube_leaf_consistent((NDBOX *) DatumGetPointer(entry->key),
178 return g_cube_internal_consistent((NDBOX *) DatumGetPointer(entry->key),
184 ** The GiST Union method for boxes
185 ** returns the minimal bounding box that encloses all the entries in entryvec
188 g_cube_union(bytea *entryvec, int *sizep)
192 NDBOX *out = (NDBOX *) NULL;
196 * fprintf(stderr, "union\n");
198 numranges = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
199 tmp = (NDBOX *) DatumGetPointer((((GISTENTRY *) (VARDATA(entryvec)))[0]).key);
202 * sizep = sizeof(NDBOX); -- NDBOX has variable size
206 for (i = 1; i < numranges; i++)
208 out = g_cube_binary_union(tmp, (NDBOX *)
209 DatumGetPointer((((GISTENTRY *) (VARDATA(entryvec)))[i]).key),
220 ** GiST Compress and Decompress methods for boxes
221 ** do not do anything.
224 g_cube_compress(GISTENTRY *entry)
230 g_cube_decompress(GISTENTRY *entry)
236 ** The GiST Penalty method for boxes
237 ** As in the R-tree paper, we use change in area as our penalty metric
240 g_cube_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result)
246 ud = cube_union((NDBOX *) DatumGetPointer(origentry->key),
247 (NDBOX *) DatumGetPointer(newentry->key));
248 rt_cube_size(ud, &tmp1);
249 rt_cube_size((NDBOX *) DatumGetPointer(origentry->key), &tmp2);
250 *result = tmp1 - tmp2;
254 * fprintf(stderr, "penalty\n"); fprintf(stderr, "\t%g\n", *result);
262 ** The GiST PickSplit method for boxes
263 ** We use Guttman's poly time split algorithm
266 g_cube_picksplit(bytea *entryvec,
289 OffsetNumber seed_1 = 0,
296 * fprintf(stderr, "picksplit\n");
298 maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
299 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
300 v->spl_left = (OffsetNumber *) palloc(nbytes);
301 v->spl_right = (OffsetNumber *) palloc(nbytes);
306 for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
308 datum_alpha = (NDBOX *) DatumGetPointer(((GISTENTRY *) (VARDATA(entryvec)))[i].key);
309 for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
311 datum_beta = (NDBOX *) DatumGetPointer(((GISTENTRY *) (VARDATA(entryvec)))[j].key);
313 /* compute the wasted space by unioning these guys */
314 /* size_waste = size_union - size_inter; */
315 union_d = cube_union(datum_alpha, datum_beta);
316 rt_cube_size(union_d, &size_union);
317 inter_d = cube_inter(datum_alpha, datum_beta);
318 rt_cube_size(inter_d, &size_inter);
319 size_waste = size_union - size_inter;
323 if (inter_d != (NDBOX *) NULL)
327 * are these a more promising split than what we've already
331 if (size_waste > waste || firsttime)
343 right = v->spl_right;
346 datum_alpha = (NDBOX *) DatumGetPointer(((GISTENTRY *) (VARDATA(entryvec)))[seed_1].key);
347 datum_l = cube_union(datum_alpha, datum_alpha);
348 rt_cube_size(datum_l, &size_l);
349 datum_beta = (NDBOX *) DatumGetPointer(((GISTENTRY *) (VARDATA(entryvec)))[seed_2].key);
350 datum_r = cube_union(datum_beta, datum_beta);
351 rt_cube_size(datum_r, &size_r);
354 * Now split up the regions between the two seeds. An important
355 * property of this split algorithm is that the split vector v has the
356 * indices of items to be split in order in its left and right
357 * vectors. We exploit this property by doing a merge in the code
358 * that actually splits the page.
360 * For efficiency, we also place the new index tuple in this loop. This
361 * is handled at the very end, when we have placed all the existing
362 * tuples and i == maxoff + 1.
365 maxoff = OffsetNumberNext(maxoff);
366 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
370 * If we've already decided where to place this item, just put it
371 * on the right list. Otherwise, we need to figure out which page
372 * needs the least enlargement in order to store the item.
381 else if (i == seed_2)
388 /* okay, which page needs least enlargement? */
389 datum_alpha = (NDBOX *) DatumGetPointer(((GISTENTRY *) (VARDATA(entryvec)))[i].key);
390 union_dl = cube_union(datum_l, datum_alpha);
391 union_dr = cube_union(datum_r, datum_alpha);
392 rt_cube_size(union_dl, &size_alpha);
393 rt_cube_size(union_dr, &size_beta);
395 /* pick which page to add it to */
396 if (size_alpha - size_l < size_beta - size_r)
415 *left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */
417 v->spl_ldatum = PointerGetDatum(datum_l);
418 v->spl_rdatum = PointerGetDatum(datum_r);
427 g_cube_same(NDBOX * b1, NDBOX * b2, bool *result)
429 if (cube_same(b1, b2))
435 * fprintf(stderr, "same: %s\n", (*result ? "TRUE" : "FALSE" ));
444 g_cube_leaf_consistent(NDBOX * key,
446 StrategyNumber strategy)
451 * fprintf(stderr, "leaf_consistent, %d\n", strategy);
455 case RTLeftStrategyNumber:
456 retval = (bool) cube_left(key, query);
458 case RTOverLeftStrategyNumber:
459 retval = (bool) cube_over_left(key, query);
461 case RTOverlapStrategyNumber:
462 retval = (bool) cube_overlap(key, query);
464 case RTOverRightStrategyNumber:
465 retval = (bool) cube_over_right(key, query);
467 case RTRightStrategyNumber:
468 retval = (bool) cube_right(key, query);
470 case RTSameStrategyNumber:
471 retval = (bool) cube_same(key, query);
473 case RTContainsStrategyNumber:
474 retval = (bool) cube_contains(key, query);
476 case RTContainedByStrategyNumber:
477 retval = (bool) cube_contained(key, query);
486 g_cube_internal_consistent(NDBOX * key,
488 StrategyNumber strategy)
493 * fprintf(stderr, "internal_consistent, %d\n", strategy);
497 case RTLeftStrategyNumber:
498 case RTOverLeftStrategyNumber:
499 retval = (bool) cube_over_left(key, query);
501 case RTOverlapStrategyNumber:
502 retval = (bool) cube_overlap(key, query);
504 case RTOverRightStrategyNumber:
505 case RTRightStrategyNumber:
506 retval = (bool) cube_right(key, query);
508 case RTSameStrategyNumber:
509 case RTContainsStrategyNumber:
510 retval = (bool) cube_contains(key, query);
512 case RTContainedByStrategyNumber:
513 retval = (bool) cube_overlap(key, query);
522 g_cube_binary_union(NDBOX * r1, NDBOX * r2, int *sizep)
526 retval = cube_union(r1, r2);
527 *sizep = retval->size;
535 cube_union(NDBOX * box_a, NDBOX * box_b)
539 NDBOX *a = swap_corners(box_a);
540 NDBOX *b = swap_corners(box_b);
542 if (a->dim >= b->dim)
544 result = palloc(a->size);
545 result->size = a->size;
546 result->dim = a->dim;
550 result = palloc(b->size);
551 result->size = b->size;
552 result->dim = b->dim;
555 /* swap the box pointers if needed */
565 * use the potentially smaller of the two boxes (b) to fill in the
566 * result, padding absent dimensions with zeroes
568 for (i = 0; i < b->dim; i++)
570 result->x[i] = b->x[i];
571 result->x[i + a->dim] = b->x[i + b->dim];
573 for (i = b->dim; i < a->dim; i++)
576 result->x[i + a->dim] = 0;
579 /* compute the union */
580 for (i = 0; i < a->dim; i++)
581 result->x[i] = min(a->x[i], result->x[i]);
582 for (i = a->dim; i < a->dim * 2; i++)
583 result->x[i] = max(a->x[i], result->x[i]);
593 cube_inter(NDBOX * box_a, NDBOX * box_b)
597 NDBOX *a = swap_corners(box_a);
598 NDBOX *b = swap_corners(box_b);
600 if (a->dim >= b->dim)
602 result = palloc(a->size);
603 result->size = a->size;
604 result->dim = a->dim;
608 result = palloc(b->size);
609 result->size = b->size;
610 result->dim = b->dim;
613 /* swap the box pointers if needed */
623 * use the potentially smaller of the two boxes (b) to fill in the
624 * result, padding absent dimensions with zeroes
626 for (i = 0; i < b->dim; i++)
628 result->x[i] = b->x[i];
629 result->x[i + a->dim] = b->x[i + b->dim];
631 for (i = b->dim; i < a->dim; i++)
634 result->x[i + a->dim] = 0;
637 /* compute the intersection */
638 for (i = 0; i < a->dim; i++)
639 result->x[i] = max(a->x[i], result->x[i]);
640 for (i = a->dim; i < a->dim * 2; i++)
641 result->x[i] = min(a->x[i], result->x[i]);
647 * Is it OK to return a non-null intersection for non-overlapping
661 result = (float *) palloc(sizeof(float));
664 for (i = 0, j = a->dim; i < a->dim; i++, j++)
665 *result = (*result) * abs((a->x[j] - a->x[i]));
671 rt_cube_size(NDBOX * a, float *size)
676 if (a == (NDBOX *) NULL)
681 for (i = 0, j = a->dim; i < a->dim; i++, j++)
682 *size = (*size) * abs((a->x[j] - a->x[i]));
687 /* The following four methods compare the projections of the boxes
688 onto the 0-th coordinate axis. These methods are useless for dimensions
689 larger than 2, but it seems that R-tree requires all its strategies
690 map to real functions that return something */
692 /* is the right edge of (a) located to the left of
693 the right edge of (b)? */
695 cube_over_left(NDBOX * box_a, NDBOX * box_b)
700 if ((box_a == NULL) || (box_b == NULL))
703 a = swap_corners(box_a);
704 b = swap_corners(box_b);
706 return (a->x[a->dim - 1] <= b->x[b->dim - 1] && !cube_left(a, b) && !cube_right(a, b));
709 /* is the left edge of (a) located to the right of
710 the left edge of (b)? */
712 cube_over_right(NDBOX * box_a, NDBOX * box_b)
717 if ((box_a == NULL) || (box_b == NULL))
720 a = swap_corners(box_a);
721 b = swap_corners(box_b);
723 return (a->x[a->dim - 1] >= b->x[b->dim - 1] && !cube_left(a, b) && !cube_right(a, b));
727 /* return 'true' if the projection of 'a' is
728 entirely on the left of the projection of 'b' */
730 cube_left(NDBOX * box_a, NDBOX * box_b)
735 if ((box_a == NULL) || (box_b == NULL))
738 a = swap_corners(box_a);
739 b = swap_corners(box_b);
741 return (a->x[a->dim - 1] < b->x[0]);
744 /* return 'true' if the projection of 'a' is
745 entirely on the right of the projection of 'b' */
747 cube_right(NDBOX * box_a, NDBOX * box_b)
752 if ((box_a == NULL) || (box_b == NULL))
755 a = swap_corners(box_a);
756 b = swap_corners(box_b);
758 return (a->x[0] > b->x[b->dim - 1]);
761 /* make up a metric in which one box will be 'lower' than the other
762 -- this can be useful for srting and to determine uniqueness */
764 cube_lt(NDBOX * box_a, NDBOX * box_b)
771 if ((box_a == NULL) || (box_b == NULL))
774 a = swap_corners(box_a);
775 b = swap_corners(box_b);
776 dim = min(a->dim, b->dim);
779 * if all common dimensions are equal, the cube with more dimensions
790 /* compare the common dimensions */
791 for (i = 0; i < dim; i++)
793 if (a->x[i] > b->x[i])
795 if (a->x[i] < b->x[i])
798 for (i = 0; i < dim; i++)
800 if (a->x[i + a->dim] > b->x[i + b->dim])
802 if (a->x[i + a->dim] < b->x[i + b->dim])
806 /* compare extra dimensions to zero */
809 for (i = dim; i < a->dim; i++)
816 for (i = 0; i < dim; i++)
818 if (a->x[i + a->dim] > 0)
820 if (a->x[i + a->dim] < 0)
826 for (i = dim; i < b->dim; i++)
833 for (i = 0; i < dim; i++)
835 if (b->x[i + b->dim] > 0)
837 if (b->x[i + b->dim] < 0)
847 cube_gt(NDBOX * box_a, NDBOX * box_b)
854 if ((box_a == NULL) || (box_b == NULL))
857 a = swap_corners(box_a);
858 b = swap_corners(box_b);
859 dim = min(a->dim, b->dim);
862 * if all common dimensions are equal, the cube with more dimensions
873 /* compare the common dimensions */
874 for (i = 0; i < dim; i++)
876 if (a->x[i] < b->x[i])
878 if (a->x[i] > b->x[i])
881 for (i = 0; i < dim; i++)
883 if (a->x[i + a->dim] < b->x[i + b->dim])
885 if (a->x[i + a->dim] > b->x[i + b->dim])
890 /* compare extra dimensions to zero */
893 for (i = dim; i < a->dim; i++)
900 for (i = 0; i < dim; i++)
902 if (a->x[i + a->dim] < 0)
904 if (a->x[i + a->dim] > 0)
910 for (i = dim; i < b->dim; i++)
917 for (i = 0; i < dim; i++)
919 if (b->x[i + b->dim] < 0)
921 if (b->x[i + b->dim] > 0)
932 cube_same(NDBOX * box_a, NDBOX * box_b)
938 if ((box_a == NULL) || (box_b == NULL))
941 a = swap_corners(box_a);
942 b = swap_corners(box_b);
944 /* swap the box pointers if necessary */
953 for (i = 0; i < b->dim; i++)
955 if (a->x[i] != b->x[i])
957 if (a->x[i + a->dim] != b->x[i + b->dim])
962 * all dimensions of (b) are compared to those of (a); instead of
963 * those in (a) absent in (b), compare (a) to zero
965 for (i = b->dim; i < a->dim; i++)
969 if (a->x[i + a->dim] != 0)
981 cube_different(NDBOX * box_a, NDBOX * box_b)
983 return (!cube_same(box_a, box_b));
988 /* Box(A) CONTAINS Box(B) IFF pt(A) < pt(B) */
990 cube_contains(NDBOX * box_a, NDBOX * box_b)
996 if ((box_a == NULL) || (box_b == NULL))
999 a = swap_corners(box_a);
1000 b = swap_corners(box_b);
1002 if (a->dim < b->dim)
1006 * the further comparisons will make sense if the excess
1007 * dimensions of (b) were zeroes
1009 for (i = a->dim; i < b->dim; i++)
1013 if (b->x[i + b->dim] != 0)
1018 /* Can't care less about the excess dimensions of (a), if any */
1019 for (i = 0; i < min(a->dim, b->dim); i++)
1021 if (a->x[i] > b->x[i])
1023 if (a->x[i + a->dim] < b->x[i + b->dim])
1034 /* Box(A) Contained by Box(B) IFF Box(B) Contains Box(A) */
1036 cube_contained(NDBOX * a, NDBOX * b)
1038 if (cube_contains(b, a) == TRUE)
1045 /* Box(A) Overlap Box(B) IFF (pt(a)LL < pt(B)UR) && (pt(b)LL < pt(a)UR) */
1047 cube_overlap(NDBOX * box_a, NDBOX * box_b)
1054 * This *very bad* error was found in the source: if ( (a==NULL) ||
1055 * (b=NULL) ) return(FALSE);
1057 if ((box_a == NULL) || (box_b == NULL))
1060 a = swap_corners(box_a);
1061 b = swap_corners(box_b);
1063 /* swap the box pointers if needed */
1064 if (a->dim < b->dim)
1072 /* compare within the dimensions of (b) */
1073 for (i = 0; i < b->dim; i++)
1075 if (a->x[i] > b->x[i + b->dim])
1077 if (a->x[i + a->dim] < b->x[i])
1081 /* compare to zero those dimensions in (a) absent in (b) */
1082 for (i = b->dim; i < a->dim; i++)
1086 if (a->x[i + a->dim] < 0)
1098 /* The distance is computed as a per axis sum of the squared distances
1099 between 1D projections of the boxes onto Cartesian axes. Assuming zero
1100 distance between overlapping projections, this metric coincides with the
1101 "common sense" geometric distance */
1103 cube_distance(NDBOX * a, NDBOX * b)
1110 result = (float *) palloc(sizeof(float));
1112 /* swap the box pointers if needed */
1113 if (a->dim < b->dim)
1122 /* compute within the dimensions of (b) */
1123 for (i = 0; i < b->dim; i++)
1125 d = distance_1D(a->x[i], a->x[i + a->dim], b->x[i], b->x[i + b->dim]);
1129 /* compute distance to zero for those dimensions in (a) absent in (b) */
1130 for (i = b->dim; i < a->dim; i++)
1132 d = distance_1D(a->x[i], a->x[i + a->dim], 0.0, 0.0);
1136 *result = (float) sqrt(distance);
1142 distance_1D(float a1, float a2, float b1, float b2)
1144 /* interval (a) is entirely on the left of (b) */
1145 if ((a1 <= b1) && (a2 <= b1) && (a1 <= b2) && (a2 <= b2))
1146 return (min(b1, b2) - max(a1, a2));
1148 /* interval (a) is entirely on the right of (b) */
1149 if ((a1 > b1) && (a2 > b1) && (a1 > b2) && (a2 > b2))
1150 return (min(a1, a2) - max(b1, b2));
1152 /* the rest are all sorts of intersections */
1156 /* normalize the box's co-ordinates by placing min(xLL,xUR) to LL
1157 and max(xLL,xUR) to UR
1160 swap_corners(NDBOX * a)
1166 result = palloc(a->size);
1167 result->size = a->size;
1168 result->dim = a->dim;
1170 for (i = 0, j = a->dim; i < a->dim; i++, j++)
1172 result->x[i] = min(a->x[i], a->x[j]);
1173 result->x[j] = max(a->x[i], a->x[j]);