]> granicus.if.org Git - postgresql/commitdiff
Fix gist_box_same and gist_point_consistent to handle fuzziness correctly.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 8 Feb 2013 23:03:23 +0000 (18:03 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 8 Feb 2013 23:03:23 +0000 (18:03 -0500)
While there's considerable doubt that we want fuzzy behavior in the
geometric operators at all (let alone as currently implemented), nobody is
stepping forward to redesign that stuff.  In the meantime it behooves us
to make sure that index searches agree with the behavior of the underlying
operators.  This patch fixes two problems in this area.

First, gist_box_same was using fuzzy equality, but it really needs to use
exact equality to prevent not-quite-identical upper index keys from being
treated as identical, which for example would prevent an existing upper
key from being extended by an amount less than epsilon.  This would result
in inconsistent indexes.  (The next release notes will need to recommend
that users reindex GiST indexes on boxes, polygons, circles, and points,
since all four opclasses use gist_box_same.)

Second, gist_point_consistent used exact comparisons for upper-page
comparisons in ~= searches, when it needs to use fuzzy comparisons to
ensure it finds all matches; and it used fuzzy comparisons for point <@ box
searches, when it needs to use exact comparisons because that's what the
<@ operator (rather inconsistently) does.

The added regression test cases illustrate all three misbehaviors.

Back-patch to all active branches.  (8.4 did not have GiST point_ops,
but it still seems prudent to apply the gist_box_same patch to it.)

Alexander Korotkov, reviewed by Noah Misch

src/backend/access/gist/gistproc.c
src/test/regress/expected/point.out
src/test/regress/sql/point.sql

index 09e911d0981ef335baf99812b56fc3b468958a14..6cb679736be7ccb81ffdd296518bdc169ef07aed 100644 (file)
@@ -838,7 +838,12 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
 /*
  * Equality method
  *
- * This is used for both boxes and points.
+ * This is used for boxes, points, circles, and polygons, all of which store
+ * boxes as GiST index entries.
+ *
+ * Returns true only when boxes are exactly the same.  We can't use fuzzy
+ * comparisons here without breaking index consistency; therefore, this isn't
+ * equivalent to box_same().
  */
 Datum
 gist_box_same(PG_FUNCTION_ARGS)
@@ -848,11 +853,10 @@ gist_box_same(PG_FUNCTION_ARGS)
        bool       *result = (bool *) PG_GETARG_POINTER(2);
 
        if (b1 && b2)
-               *result = DatumGetBool(DirectFunctionCall2(box_same,
-                                                                                                  PointerGetDatum(b1),
-                                                                                                  PointerGetDatum(b2)));
+               *result = (b1->low.x == b2->low.x && b1->low.y == b2->low.y &&
+                                  b1->high.x == b2->high.x && b1->high.y == b2->high.y);
        else
-               *result = (b1 == NULL && b2 == NULL) ? TRUE : FALSE;
+               *result = (b1 == NULL && b2 == NULL);
        PG_RETURN_POINTER(result);
 }
 
@@ -1296,13 +1300,16 @@ gist_point_consistent_internal(StrategyNumber strategy,
                case RTSameStrategyNumber:
                        if (isLeaf)
                        {
-                               result = FPeq(key->low.x, query->x)
-                                       && FPeq(key->low.y, query->y);
+                               /* key.high must equal key.low, so we can disregard it */
+                               result = (FPeq(key->low.x, query->x) &&
+                                                 FPeq(key->low.y, query->y));
                        }
                        else
                        {
-                               result = (query->x <= key->high.x && query->x >= key->low.x &&
-                                                 query->y <= key->high.y && query->y >= key->low.y);
+                               result = (FPle(query->x, key->high.x) &&
+                                                 FPge(query->x, key->low.x) &&
+                                                 FPle(query->y, key->high.y) &&
+                                                 FPge(query->y, key->low.y));
                        }
                        break;
                default:
@@ -1337,12 +1344,31 @@ gist_point_consistent(PG_FUNCTION_ARGS)
                        *recheck = false;
                        break;
                case BoxStrategyNumberGroup:
-                       result = DatumGetBool(DirectFunctionCall5(
-                                                                                                         gist_box_consistent,
-                                                                                                         PointerGetDatum(entry),
-                                                                                                         PG_GETARG_DATUM(1),
-                                                                         Int16GetDatum(RTOverlapStrategyNumber),
-                                                                                          0, PointerGetDatum(recheck)));
+                       {
+                               /*
+                                * The only operator in this group is point <@ box (on_pb), so
+                                * we needn't examine strategy again.
+                                *
+                                * For historical reasons, on_pb uses exact rather than fuzzy
+                                * comparisons.  We could use box_overlap when at an internal
+                                * page, but that would lead to possibly visiting child pages
+                                * uselessly, because box_overlap uses fuzzy comparisons.
+                                * Instead we write a non-fuzzy overlap test.  The same code
+                                * will also serve for leaf-page tests, since leaf keys have
+                                * high == low.
+                                */
+                               BOX                *query,
+                                                  *key;
+
+                               query = PG_GETARG_BOX_P(1);
+                               key = DatumGetBoxP(entry->key);
+
+                               result = (key->high.x >= query->low.x &&
+                                                 key->low.x <= query->high.x &&
+                                                 key->high.y >= query->low.y &&
+                                                 key->low.y <= query->high.y);
+                               *recheck = false;
+                       }
                        break;
                case PolygonStrategyNumberGroup:
                        {
index 7929229b16c77d678525d31edd2c6d06efa5a5f8..bfc09627496dcbfa568ba71bd7322bfac9dc7c76 100644 (file)
@@ -245,3 +245,53 @@ SELECT '' AS three, p1.f1 AS point1, p2.f1 AS point2, (p1.f1 <-> p2.f1) AS dista
        | (5.1,34.5) | (10,10)  | 24.9851956166046
 (3 rows)
 
+-- Test that GiST indexes provide same behavior as sequential scan
+CREATE TEMP TABLE point_gist_tbl(f1 point);
+INSERT INTO point_gist_tbl SELECT '(0,0)' FROM generate_series(0,1000);
+CREATE INDEX point_gist_tbl_index ON point_gist_tbl USING gist (f1);
+INSERT INTO point_gist_tbl VALUES ('(0.0000009,0.0000009)');
+SET enable_seqscan TO true;
+SET enable_indexscan TO false;
+SET enable_bitmapscan TO false;
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 ~= '(0.0000009,0.0000009)'::point;
+ count 
+-------
+  1002
+(1 row)
+
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 <@ '(0.0000009,0.0000009),(0.0000009,0.0000009)'::box;
+ count 
+-------
+     1
+(1 row)
+
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 ~= '(0.0000018,0.0000018)'::point;
+ count 
+-------
+     1
+(1 row)
+
+SET enable_seqscan TO false;
+SET enable_indexscan TO true;
+SET enable_bitmapscan TO true;
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 ~= '(0.0000009,0.0000009)'::point;
+ count 
+-------
+  1002
+(1 row)
+
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 <@ '(0.0000009,0.0000009),(0.0000009,0.0000009)'::box;
+ count 
+-------
+     1
+(1 row)
+
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 ~= '(0.0000018,0.0000018)'::point;
+ count 
+-------
+     1
+(1 row)
+
+RESET enable_seqscan;
+RESET enable_indexscan;
+RESET enable_bitmapscan;
index 1b62b10d40427fb3e8f0c0a4ea105f24cd70864b..63a803a809dda4497b452618b1314be87063f9da 100644 (file)
@@ -80,3 +80,24 @@ SELECT '' AS three, p1.f1 AS point1, p2.f1 AS point2, (p1.f1 <-> p2.f1) AS dista
    FROM POINT_TBL p1, POINT_TBL p2
    WHERE (p1.f1 <-> p2.f1) > 3 and p1.f1 << p2.f1 and p1.f1 >^ p2.f1
    ORDER BY distance;
+
+-- Test that GiST indexes provide same behavior as sequential scan
+CREATE TEMP TABLE point_gist_tbl(f1 point);
+INSERT INTO point_gist_tbl SELECT '(0,0)' FROM generate_series(0,1000);
+CREATE INDEX point_gist_tbl_index ON point_gist_tbl USING gist (f1);
+INSERT INTO point_gist_tbl VALUES ('(0.0000009,0.0000009)');
+SET enable_seqscan TO true;
+SET enable_indexscan TO false;
+SET enable_bitmapscan TO false;
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 ~= '(0.0000009,0.0000009)'::point;
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 <@ '(0.0000009,0.0000009),(0.0000009,0.0000009)'::box;
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 ~= '(0.0000018,0.0000018)'::point;
+SET enable_seqscan TO false;
+SET enable_indexscan TO true;
+SET enable_bitmapscan TO true;
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 ~= '(0.0000009,0.0000009)'::point;
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 <@ '(0.0000009,0.0000009),(0.0000009,0.0000009)'::box;
+SELECT COUNT(*) FROM point_gist_tbl WHERE f1 ~= '(0.0000018,0.0000018)'::point;
+RESET enable_seqscan;
+RESET enable_indexscan;
+RESET enable_bitmapscan;