1 /*-------------------------------------------------------------------------
4 * GiST support for range types.
6 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/utils/adt/rangetypes_gist.c
13 *-------------------------------------------------------------------------
17 #include "access/gist.h"
18 #include "access/skey.h"
19 #include "utils/builtins.h"
20 #include "utils/rangetypes.h"
23 /* Operator strategy numbers used in the GiST range opclass */
24 /* Numbers are chosen to match up operator names with existing usages */
25 #define RANGESTRAT_BEFORE 1
26 #define RANGESTRAT_OVERLEFT 2
27 #define RANGESTRAT_OVERLAPS 3
28 #define RANGESTRAT_OVERRIGHT 4
29 #define RANGESTRAT_AFTER 5
30 #define RANGESTRAT_ADJACENT 6
31 #define RANGESTRAT_CONTAINS 7
32 #define RANGESTRAT_CONTAINED_BY 8
33 #define RANGESTRAT_CONTAINS_ELEM 16
34 #define RANGESTRAT_ELEM_CONTAINED_BY 17
35 #define RANGESTRAT_EQ 18
36 #define RANGESTRAT_NE 19
39 * Auxiliary structure for picksplit method.
43 int index; /* original index in entryvec->vector[] */
44 RangeType *data; /* range value to sort */
45 TypeCacheEntry *typcache; /* range type's info */
48 static RangeType *range_super_union(TypeCacheEntry *typcache, RangeType * r1,
50 static bool range_gist_consistent_int(FmgrInfo *flinfo,
51 StrategyNumber strategy, RangeType * key,
53 static bool range_gist_consistent_leaf(FmgrInfo *flinfo,
54 StrategyNumber strategy, RangeType * key,
56 static int sort_item_cmp(const void *a, const void *b);
59 /* GiST query consistency check */
61 range_gist_consistent(PG_FUNCTION_ARGS)
63 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
64 Datum dquery = PG_GETARG_DATUM(1);
65 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
66 /* Oid subtype = PG_GETARG_OID(3); */
67 bool *recheck = (bool *) PG_GETARG_POINTER(4);
68 RangeType *key = DatumGetRangeType(entry->key);
69 TypeCacheEntry *typcache;
72 /* All operators served by this function are exact */
78 * For element contains and contained by operators, the other operand
79 * is a "point" of the subtype. Construct a singleton range
80 * containing just that value. (Since range_contains_elem and
81 * elem_contained_by_range would do that anyway, it's actually more
82 * efficient not less so to merge these cases into range containment
83 * at this step. But revisit this if we ever change the implementation
84 * of those functions.)
86 case RANGESTRAT_CONTAINS_ELEM:
87 case RANGESTRAT_ELEM_CONTAINED_BY:
88 typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
89 query = make_singleton_range(typcache, dquery);
93 query = DatumGetRangeType(dquery);
98 PG_RETURN_BOOL(range_gist_consistent_leaf(fcinfo->flinfo, strategy,
101 PG_RETURN_BOOL(range_gist_consistent_int(fcinfo->flinfo, strategy,
105 /* form union range */
107 range_gist_union(PG_FUNCTION_ARGS)
109 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
110 GISTENTRY *ent = entryvec->vector;
111 RangeType *result_range;
112 TypeCacheEntry *typcache;
115 result_range = DatumGetRangeType(ent[0].key);
117 typcache = range_get_typcache(fcinfo, RangeTypeGetOid(result_range));
119 for (i = 1; i < entryvec->n; i++)
121 result_range = range_super_union(typcache, result_range,
122 DatumGetRangeType(ent[i].key));
125 PG_RETURN_RANGE(result_range);
128 /* compress, decompress are no-ops */
130 range_gist_compress(PG_FUNCTION_ARGS)
132 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
134 PG_RETURN_POINTER(entry);
138 range_gist_decompress(PG_FUNCTION_ARGS)
140 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
142 PG_RETURN_POINTER(entry);
145 /* page split penalty function */
147 range_gist_penalty(PG_FUNCTION_ARGS)
149 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
150 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
151 float *penalty = (float *) PG_GETARG_POINTER(2);
152 RangeType *orig = DatumGetRangeType(origentry->key);
153 RangeType *new = DatumGetRangeType(newentry->key);
154 TypeCacheEntry *typcache;
156 FmgrInfo *subtype_diff;
166 if (RangeTypeGetOid(orig) != RangeTypeGetOid(new))
167 elog(ERROR, "range types do not match");
169 typcache = range_get_typcache(fcinfo, RangeTypeGetOid(orig));
171 subtype_diff = &typcache->rng_subdiff_finfo;
173 /* we want to compare the size of "orig" to size of "orig union new" */
174 s_union = range_super_union(typcache, orig, new);
176 range_deserialize(typcache, orig, &lower1, &upper1, &empty1);
177 range_deserialize(typcache, s_union, &lower2, &upper2, &empty2);
179 /* if orig isn't empty, s_union can't be either */
180 Assert(empty1 || !empty2);
182 if (empty1 && empty2)
185 PG_RETURN_POINTER(penalty);
187 else if (empty1 && !empty2)
189 if (lower2.infinite || upper2.infinite)
191 /* from empty to infinite */
192 *penalty = get_float4_infinity();
193 PG_RETURN_POINTER(penalty);
195 else if (OidIsValid(subtype_diff->fn_oid))
197 /* from empty to upper2-lower2 */
198 *penalty = DatumGetFloat8(FunctionCall2Coll(subtype_diff,
199 typcache->rng_collation,
203 *penalty = 0; /* subtype_diff is broken */
204 PG_RETURN_POINTER(penalty);
210 PG_RETURN_POINTER(penalty);
214 Assert(lower2.infinite || !lower1.infinite);
216 if (lower2.infinite && !lower1.infinite)
217 lower_diff = get_float8_infinity();
218 else if (lower2.infinite && lower1.infinite)
220 else if (OidIsValid(subtype_diff->fn_oid))
222 lower_diff = DatumGetFloat8(FunctionCall2Coll(subtype_diff,
223 typcache->rng_collation,
227 lower_diff = 0; /* subtype_diff is broken */
231 /* only know whether there is a difference or not */
232 lower_diff = (float) range_cmp_bounds(typcache, &lower1, &lower2);
235 Assert(upper2.infinite || !upper1.infinite);
237 if (upper2.infinite && !upper1.infinite)
238 upper_diff = get_float8_infinity();
239 else if (upper2.infinite && upper1.infinite)
241 else if (OidIsValid(subtype_diff->fn_oid))
243 upper_diff = DatumGetFloat8(FunctionCall2Coll(subtype_diff,
244 typcache->rng_collation,
248 upper_diff = 0; /* subtype_diff is broken */
252 /* only know whether there is a difference or not */
253 upper_diff = (float) range_cmp_bounds(typcache, &upper2, &upper1);
256 Assert(lower_diff >= 0 && upper_diff >= 0);
258 *penalty = (float) (lower_diff + upper_diff);
259 PG_RETURN_POINTER(penalty);
263 * The GiST PickSplit method for ranges
265 * Algorithm based on sorting. Incoming array of ranges is sorted using
266 * sort_item_cmp function. After that first half of ranges goes to the left
267 * output, and the second half of ranges goes to the right output.
270 range_gist_picksplit(PG_FUNCTION_ARGS)
272 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
273 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
274 TypeCacheEntry *typcache;
276 RangeType *pred_left;
277 RangeType *pred_right;
278 PickSplitSortItem *sortItems;
280 OffsetNumber split_idx;
285 /* use first item to look up range type's info */
286 pred_left = DatumGetRangeType(entryvec->vector[FirstOffsetNumber].key);
287 typcache = range_get_typcache(fcinfo, RangeTypeGetOid(pred_left));
289 /* allocate result and work arrays */
290 maxoff = entryvec->n - 1;
291 nbytes = (maxoff + 1) * sizeof(OffsetNumber);
292 v->spl_left = (OffsetNumber *) palloc(nbytes);
293 v->spl_right = (OffsetNumber *) palloc(nbytes);
294 sortItems = (PickSplitSortItem *) palloc(maxoff * sizeof(PickSplitSortItem));
297 * Prepare auxiliary array and sort the values.
299 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
301 sortItems[i - 1].index = i;
302 sortItems[i - 1].data = DatumGetRangeType(entryvec->vector[i].key);
303 sortItems[i - 1].typcache = typcache;
305 qsort(sortItems, maxoff, sizeof(PickSplitSortItem), sort_item_cmp);
307 split_idx = maxoff / 2;
311 right = v->spl_right;
315 * First half of items goes to the left output.
317 pred_left = sortItems[0].data;
318 *left++ = sortItems[0].index;
320 for (i = 1; i < split_idx; i++)
322 pred_left = range_super_union(typcache, pred_left, sortItems[i].data);
323 *left++ = sortItems[i].index;
328 * Second half of items goes to the right output.
330 pred_right = sortItems[split_idx].data;
331 *right++ = sortItems[split_idx].index;
333 for (i = split_idx + 1; i < maxoff; i++)
335 pred_right = range_super_union(typcache, pred_right, sortItems[i].data);
336 *right++ = sortItems[i].index;
340 *left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */
342 v->spl_ldatum = RangeTypeGetDatum(pred_left);
343 v->spl_rdatum = RangeTypeGetDatum(pred_right);
345 PG_RETURN_POINTER(v);
348 /* equality comparator for GiST */
350 range_gist_same(PG_FUNCTION_ARGS)
352 /* Datum r1 = PG_GETARG_DATUM(0); */
353 /* Datum r2 = PG_GETARG_DATUM(1); */
354 bool *result = (bool *) PG_GETARG_POINTER(2);
357 * We can safely call range_eq using our fcinfo directly; it won't notice
358 * the third argument. This allows it to use fn_extra for caching.
360 *result = DatumGetBool(range_eq(fcinfo));
362 PG_RETURN_POINTER(result);
366 *----------------------------------------------------------
368 *----------------------------------------------------------
372 * Return the smallest range that contains r1 and r2
374 * XXX would it be better to redefine range_union as working this way?
377 range_super_union(TypeCacheEntry *typcache, RangeType * r1, RangeType * r2)
385 RangeBound *result_lower;
386 RangeBound *result_upper;
388 range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
389 range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
396 if (range_cmp_bounds(typcache, &lower1, &lower2) <= 0)
397 result_lower = &lower1;
399 result_lower = &lower2;
401 if (range_cmp_bounds(typcache, &upper1, &upper2) >= 0)
402 result_upper = &upper1;
404 result_upper = &upper2;
406 /* optimization to avoid constructing a new range */
407 if (result_lower == &lower1 && result_upper == &upper1)
409 if (result_lower == &lower2 && result_upper == &upper2)
412 return make_range(typcache, result_lower, result_upper, false);
416 * trick function call: call the given function with given FmgrInfo
418 * To allow the various functions called here to cache lookups of range
419 * datatype information, we use a trick: we pass them the FmgrInfo struct
420 * for the GiST consistent function. This relies on the knowledge that
421 * none of them consult FmgrInfo for anything but fn_extra, and that they
422 * all use fn_extra the same way, i.e. as a pointer to the typcache entry
423 * for the range data type. Since the FmgrInfo is long-lived (it's actually
424 * part of the relcache entry for the index, typically) this essentially
425 * eliminates lookup overhead during operations on a GiST range index.
428 TrickFunctionCall2(PGFunction proc, FmgrInfo *flinfo, Datum arg1, Datum arg2)
430 FunctionCallInfoData fcinfo;
433 InitFunctionCallInfoData(fcinfo, flinfo, 2, InvalidOid, NULL, NULL);
435 fcinfo.arg[0] = arg1;
436 fcinfo.arg[1] = arg2;
437 fcinfo.argnull[0] = false;
438 fcinfo.argnull[1] = false;
440 result = (*proc) (&fcinfo);
443 elog(ERROR, "function %p returned NULL", proc);
449 * GiST consistent test on an index internal page
452 range_gist_consistent_int(FmgrInfo *flinfo, StrategyNumber strategy,
453 RangeType * key, RangeType * query)
461 case RANGESTRAT_BEFORE:
462 if (RangeIsEmpty(key))
464 proc = range_overright;
467 case RANGESTRAT_OVERLEFT:
468 if (RangeIsEmpty(key))
473 case RANGESTRAT_OVERLAPS:
474 proc = range_overlaps;
476 case RANGESTRAT_OVERRIGHT:
477 if (RangeIsEmpty(key))
482 case RANGESTRAT_AFTER:
483 if (RangeIsEmpty(key))
485 proc = range_overleft;
488 case RANGESTRAT_ADJACENT:
489 if (RangeIsEmpty(key) || RangeIsEmpty(query))
491 if (DatumGetBool(TrickFunctionCall2(range_adjacent, flinfo,
492 RangeTypeGetDatum(key),
493 RangeTypeGetDatum(query))))
495 proc = range_overlaps;
497 case RANGESTRAT_CONTAINS:
498 case RANGESTRAT_CONTAINS_ELEM:
499 proc = range_contains;
501 case RANGESTRAT_CONTAINED_BY:
502 case RANGESTRAT_ELEM_CONTAINED_BY:
506 proc = range_contains;
512 elog(ERROR, "unrecognized range strategy: %d", strategy);
513 proc = NULL; /* keep compiler quiet */
517 retval = DatumGetBool(TrickFunctionCall2(proc, flinfo,
518 RangeTypeGetDatum(key),
519 RangeTypeGetDatum(query)));
527 * GiST consistent test on an index leaf page
530 range_gist_consistent_leaf(FmgrInfo *flinfo, StrategyNumber strategy,
531 RangeType * key, RangeType * query)
537 case RANGESTRAT_BEFORE:
538 if (RangeIsEmpty(key) || RangeIsEmpty(query))
542 case RANGESTRAT_OVERLEFT:
543 if (RangeIsEmpty(key) || RangeIsEmpty(query))
545 proc = range_overleft;
547 case RANGESTRAT_OVERLAPS:
548 proc = range_overlaps;
550 case RANGESTRAT_OVERRIGHT:
551 if (RangeIsEmpty(key) || RangeIsEmpty(query))
553 proc = range_overright;
555 case RANGESTRAT_AFTER:
556 if (RangeIsEmpty(key) || RangeIsEmpty(query))
560 case RANGESTRAT_ADJACENT:
561 if (RangeIsEmpty(key) || RangeIsEmpty(query))
563 proc = range_adjacent;
565 case RANGESTRAT_CONTAINS:
566 case RANGESTRAT_CONTAINS_ELEM:
567 proc = range_contains;
569 case RANGESTRAT_CONTAINED_BY:
570 case RANGESTRAT_ELEM_CONTAINED_BY:
571 proc = range_contained_by;
580 elog(ERROR, "unrecognized range strategy: %d", strategy);
581 proc = NULL; /* keep compiler quiet */
585 return DatumGetBool(TrickFunctionCall2(proc, flinfo,
586 RangeTypeGetDatum(key),
587 RangeTypeGetDatum(query)));
591 * Compare function for PickSplitSortItem. This is actually the
592 * interesting part of the picksplit algorithm.
594 * We want to separate out empty ranges, bounded ranges, and unbounded
595 * ranges. We assume that "contains" and "overlaps" are the most
596 * important queries, so empty ranges will rarely match and unbounded
597 * ranges frequently will. Bounded ranges should be in the middle.
599 * Empty ranges we push all the way to the left, then bounded ranges
600 * (sorted on lower bound, then upper), then ranges with no lower
601 * bound, then ranges with no upper bound; and finally, ranges with no
602 * upper or lower bound all the way to the right.
605 sort_item_cmp(const void *a, const void *b)
607 PickSplitSortItem *i1 = (PickSplitSortItem *) a;
608 PickSplitSortItem *i2 = (PickSplitSortItem *) b;
609 RangeType *r1 = i1->data;
610 RangeType *r2 = i2->data;
611 TypeCacheEntry *typcache = i1->typcache;
620 range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
621 range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
623 if (empty1 || empty2)
625 if (empty1 && empty2)
636 * If both lower or both upper bounds are infinite, we sort by ascending
637 * range size. That means that if both upper bounds are infinite, we sort
638 * by the lower bound _descending_. That creates a slightly odd total
639 * order, but keeps the pages with very unselective predicates grouped
640 * more closely together on the right.
642 if (lower1.infinite || upper1.infinite ||
643 lower2.infinite || upper2.infinite)
645 if (lower1.infinite && lower2.infinite)
646 return range_cmp_bounds(typcache, &upper1, &upper2);
647 else if (lower1.infinite)
649 else if (lower2.infinite)
651 else if (upper1.infinite && upper2.infinite)
652 return -1 * range_cmp_bounds(typcache, &lower1, &lower2);
653 else if (upper1.infinite)
655 else if (upper2.infinite)
661 if ((cmp = range_cmp_bounds(typcache, &lower1, &lower2)) != 0)
664 return range_cmp_bounds(typcache, &upper1, &upper2);