]> granicus.if.org Git - postgresql/commitdiff
Fix handling of NULL distances in KNN-GiST
authorAlexander Korotkov <akorotkov@postgresql.org>
Sun, 8 Sep 2019 18:13:40 +0000 (21:13 +0300)
committerAlexander Korotkov <akorotkov@postgresql.org>
Sun, 8 Sep 2019 19:08:12 +0000 (22:08 +0300)
In order to implement NULL LAST semantic GiST previously assumed distance to
the NULL value to be Inf.  However, our distance functions can return Inf and
NaN for non-null values.  In such cases, NULL LAST semantic appears to be
broken.  This commit fixes that by introducing separate array of null flags for
distances.

Backpatch to all supported versions.

Discussion: https://postgr.es/m/CAPpHfdsNvNdA0DBS%2BwMpFrgwT6C3-q50sFVGLSiuWnV3FqOJuQ%40mail.gmail.com
Author: Alexander Korotkov
Backpatch-through: 9.4

src/backend/access/gist/gistget.c
src/backend/access/gist/gistscan.c
src/backend/access/index/indexam.c
src/backend/access/spgist/spgscan.c
src/include/access/genam.h
src/include/access/gist_private.h
src/test/regress/expected/create_index.out

index 95ac35e3bb3c7a4593aab156fe534b23c98a2c23..db633a94757ddc64d7e5f89ca841ad02de9927c6 100644 (file)
@@ -112,8 +112,9 @@ gistkillitems(IndexScanDesc scan)
  * Similarly, *recheck_distances_p is set to indicate whether the distances
  * need to be rechecked, and it is also ignored for non-leaf entries.
  *
- * If we are doing an ordered scan, so->distances[] is filled with distance
- * data from the distance() functions before returning success.
+ * If we are doing an ordered scan, so->distancesValues[] and
+ * so->distancesNulls[] is filled with distance data from the distance()
+ * functions before returning success.
  *
  * We must decompress the key in the IndexTuple before passing it to the
  * sk_funcs (which actually are the opclass Consistent or Distance methods).
@@ -134,7 +135,8 @@ gistindex_keytest(IndexScanDesc scan,
        GISTSTATE  *giststate = so->giststate;
        ScanKey         key = scan->keyData;
        int                     keySize = scan->numberOfKeys;
-       double     *distance_p;
+       double     *distance_value_p;
+       bool       *distance_null_p;
        Relation        r = scan->indexRelation;
 
        *recheck_p = false;
@@ -152,7 +154,10 @@ gistindex_keytest(IndexScanDesc scan,
                if (GistPageIsLeaf(page))       /* shouldn't happen */
                        elog(ERROR, "invalid GiST tuple found on leaf page");
                for (i = 0; i < scan->numberOfOrderBys; i++)
-                       so->distances[i] = -get_float8_infinity();
+               {
+                       so->distanceValues[i] = -get_float8_infinity();
+                       so->distanceNulls[i] = false;
+               }
                return true;
        }
 
@@ -235,7 +240,8 @@ gistindex_keytest(IndexScanDesc scan,
 
        /* OK, it passes --- now let's compute the distances */
        key = scan->orderByData;
-       distance_p = so->distances;
+       distance_value_p = so->distanceValues;
+       distance_null_p = so->distanceNulls;
        keySize = scan->numberOfOrderBys;
        while (keySize > 0)
        {
@@ -249,8 +255,9 @@ gistindex_keytest(IndexScanDesc scan,
 
                if ((key->sk_flags & SK_ISNULL) || isNull)
                {
-                       /* Assume distance computes as null and sorts to the end */
-                       *distance_p = get_float8_infinity();
+                       /* Assume distance computes as null */
+                       *distance_value_p = 0.0;
+                       *distance_null_p = true;
                }
                else
                {
@@ -287,11 +294,13 @@ gistindex_keytest(IndexScanDesc scan,
                                                                         ObjectIdGetDatum(key->sk_subtype),
                                                                         PointerGetDatum(&recheck));
                        *recheck_distances_p |= recheck;
-                       *distance_p = DatumGetFloat8(dist);
+                       *distance_value_p = DatumGetFloat8(dist);
+                       *distance_null_p = false;
                }
 
                key++;
-               distance_p++;
+               distance_value_p++;
+               distance_null_p++;
                keySize--;
        }
 
@@ -304,7 +313,8 @@ gistindex_keytest(IndexScanDesc scan,
  *
  * scan: index scan we are executing
  * pageItem: search queue item identifying an index page to scan
- * myDistances: distances array associated with pageItem, or NULL at the root
+ * myDistanceValues: distances array associated with pageItem, or NULL at the root
+ * myDistanceNulls: null flags for myDistanceValues array, or NULL at the root
  * tbm: if not NULL, gistgetbitmap's output bitmap
  * ntids: if not NULL, gistgetbitmap's output tuple counter
  *
@@ -321,7 +331,8 @@ gistindex_keytest(IndexScanDesc scan,
  * sibling will be processed next.
  */
 static void
-gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
+gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
+                        double *myDistanceValues, bool *myDistanceNulls,
                         TIDBitmap *tbm, int64 *ntids)
 {
        GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
@@ -359,7 +370,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
                GISTSearchItem *item;
 
                /* This can't happen when starting at the root */
-               Assert(myDistances != NULL);
+               Assert(myDistanceValues != NULL && myDistanceNulls != NULL);
 
                oldcxt = MemoryContextSwitchTo(so->queueCxt);
 
@@ -369,8 +380,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
                item->data.parentlsn = pageItem->data.parentlsn;
 
                /* Insert it into the queue using same distances as for this page */
-               memcpy(item->distances, myDistances,
-                          sizeof(double) * scan->numberOfOrderBys);
+               memcpy(GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
+                          myDistanceValues, sizeof(double) * scan->numberOfOrderBys);
+               memcpy(GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
+                          myDistanceNulls, sizeof(bool) * scan->numberOfOrderBys);
 
                pairingheap_add(so->queue, &item->phNode);
 
@@ -479,6 +492,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
                         * search.
                         */
                        GISTSearchItem *item;
+                       int                     nOrderBys = scan->numberOfOrderBys;
 
                        oldcxt = MemoryContextSwitchTo(so->queueCxt);
 
@@ -513,8 +527,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
                        }
 
                        /* Insert it into the queue using new distance data */
-                       memcpy(item->distances, so->distances,
-                                  sizeof(double) * scan->numberOfOrderBys);
+                       memcpy(GISTSearchItemDistanceValues(item, nOrderBys),
+                                  so->distanceValues, sizeof(double) * nOrderBys);
+                       memcpy(GISTSearchItemDistanceNulls(item, nOrderBys),
+                                  so->distanceNulls, sizeof(bool) * nOrderBys);
 
                        pairingheap_add(so->queue, &item->phNode);
 
@@ -579,7 +595,8 @@ getNextNearest(IndexScanDesc scan)
                        scan->xs_recheck = item->data.heap.recheck;
 
                        index_store_float8_orderby_distances(scan, so->orderByTypes,
-                                                                                                item->distances,
+                                                                                                GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
+                                                                                                GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
                                                                                                 item->data.heap.recheckDistances);
 
                        /* in an index-only scan, also return the reconstructed tuple. */
@@ -592,7 +609,10 @@ getNextNearest(IndexScanDesc scan)
                        /* visit an index page, extract its items into queue */
                        CHECK_FOR_INTERRUPTS();
 
-                       gistScanPage(scan, item, item->distances, NULL, NULL);
+                       gistScanPage(scan, item,
+                                                GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
+                                                GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
+                                                NULL, NULL);
                }
 
                pfree(item);
@@ -630,7 +650,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
 
                fakeItem.blkno = GIST_ROOT_BLKNO;
                memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
-               gistScanPage(scan, &fakeItem, NULL, NULL, NULL);
+               gistScanPage(scan, &fakeItem, NULL, NULL, NULL, NULL);
        }
 
        if (scan->numberOfOrderBys > 0)
@@ -724,7 +744,10 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
                                 * this page, we fall out of the inner "do" and loop around to
                                 * return them.
                                 */
-                               gistScanPage(scan, item, item->distances, NULL, NULL);
+                               gistScanPage(scan, item,
+                                                        GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
+                                                        GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
+                                                        NULL, NULL);
 
                                pfree(item);
                        } while (so->nPageData == 0);
@@ -755,7 +778,7 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 
        fakeItem.blkno = GIST_ROOT_BLKNO;
        memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
-       gistScanPage(scan, &fakeItem, NULL, tbm, &ntids);
+       gistScanPage(scan, &fakeItem, NULL, NULL, tbm, &ntids);
 
        /*
         * While scanning a leaf page, ItemPointers of matching heap tuples will
@@ -770,7 +793,10 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 
                CHECK_FOR_INTERRUPTS();
 
-               gistScanPage(scan, item, item->distances, tbm, &ntids);
+               gistScanPage(scan, item,
+                                        GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
+                                        GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
+                                        tbm, &ntids);
 
                pfree(item);
        }
index 636aa62b680042a99c32d79afad4cd2632004ecf..e72bf08f952b578aa3261ba7bf8296b0f5da1b54 100644 (file)
@@ -33,14 +33,30 @@ pairingheap_GISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node
        const GISTSearchItem *sb = (const GISTSearchItem *) b;
        IndexScanDesc scan = (IndexScanDesc) arg;
        int                     i;
+       double     *da = GISTSearchItemDistanceValues(sa, scan->numberOfOrderBys),
+                          *db = GISTSearchItemDistanceValues(sb, scan->numberOfOrderBys);
+       bool       *na = GISTSearchItemDistanceNulls(sa, scan->numberOfOrderBys),
+                          *nb = GISTSearchItemDistanceNulls(sb, scan->numberOfOrderBys);
 
        /* Order according to distance comparison */
        for (i = 0; i < scan->numberOfOrderBys; i++)
        {
-               int                     cmp = -float8_cmp_internal(sa->distances[i], sb->distances[i]);
+               if (na[i])
+               {
+                       if (!nb[i])
+                               return -1;
+               }
+               else if (nb[i])
+               {
+                       return 1;
+               }
+               else
+               {
+                       int                     cmp = -float8_cmp_internal(da[i], db[i]);
 
-               if (cmp != 0)
-                       return cmp;
+                       if (cmp != 0)
+                               return cmp;
+               }
        }
 
        /* Heap items go before inner pages, to ensure a depth-first search */
@@ -84,7 +100,8 @@ gistbeginscan(Relation r, int nkeys, int norderbys)
        so->queueCxt = giststate->scanCxt;      /* see gistrescan */
 
        /* workspaces with size dependent on numberOfOrderBys: */
-       so->distances = palloc(sizeof(double) * scan->numberOfOrderBys);
+       so->distanceValues = palloc(sizeof(double) * scan->numberOfOrderBys);
+       so->distanceNulls = palloc(sizeof(bool) * scan->numberOfOrderBys);
        so->qual_ok = true;                     /* in case there are zero keys */
        if (scan->numberOfOrderBys > 0)
        {
index 28edd4aca765d5d04c2be07e7eff2414c6e60284..2e8f53a37c8463f6bd73ca1f25d323a318495dca 100644 (file)
@@ -847,13 +847,14 @@ index_getprocinfo(Relation irel,
  */
 void
 index_store_float8_orderby_distances(IndexScanDesc scan, Oid *orderByTypes,
-                                                                        double *distances, bool recheckOrderBy)
+                                                                        double *distanceValues,
+                                                                        bool *distanceNulls, bool recheckOrderBy)
 {
        int                     i;
 
        scan->xs_recheckorderby = recheckOrderBy;
 
-       if (!distances)
+       if (!distanceValues)
        {
                Assert(!scan->xs_recheckorderby);
 
@@ -868,6 +869,11 @@ index_store_float8_orderby_distances(IndexScanDesc scan, Oid *orderByTypes,
 
        for (i = 0; i < scan->numberOfOrderBys; i++)
        {
+               if (distanceNulls && distanceNulls[i])
+               {
+                       scan->xs_orderbyvals[i] = (Datum) 0;
+                       scan->xs_orderbynulls[i] = true;
+               }
                if (orderByTypes[i] == FLOAT8OID)
                {
 #ifndef USE_FLOAT8_BYVAL
@@ -875,7 +881,7 @@ index_store_float8_orderby_distances(IndexScanDesc scan, Oid *orderByTypes,
                        if (!scan->xs_orderbynulls[i])
                                pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
 #endif
-                       scan->xs_orderbyvals[i] = Float8GetDatum(distances[i]);
+                       scan->xs_orderbyvals[i] = Float8GetDatum(distanceValues[i]);
                        scan->xs_orderbynulls[i] = false;
                }
                else if (orderByTypes[i] == FLOAT4OID)
@@ -886,7 +892,7 @@ index_store_float8_orderby_distances(IndexScanDesc scan, Oid *orderByTypes,
                        if (!scan->xs_orderbynulls[i])
                                pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
 #endif
-                       scan->xs_orderbyvals[i] = Float4GetDatum((float4) distances[i]);
+                       scan->xs_orderbyvals[i] = Float4GetDatum((float4) distanceValues[i]);
                        scan->xs_orderbynulls[i] = false;
                }
                else
index 1cf28ecf2fdea7f829ee273fabf8f1551284578f..2bd4037af9e00c837680b4e79cca3b050b1bb5f5 100644 (file)
@@ -929,6 +929,7 @@ spggettuple(IndexScanDesc scan, ScanDirection dir)
                        if (so->numberOfOrderBys > 0)
                                index_store_float8_orderby_distances(scan, so->orderByTypes,
                                                                                                         so->distances[so->iPtr],
+                                                                                                        NULL,
                                                                                                         so->recheckDistances[so->iPtr]);
                        so->iPtr++;
                        return true;
index 8c053be2caf5738c9ab03c7be6a88883c6ef716d..6c56717ba77600faef0f5e342be34f750df5c5ef 100644 (file)
@@ -178,7 +178,9 @@ extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum,
 extern FmgrInfo *index_getprocinfo(Relation irel, AttrNumber attnum,
                                                                   uint16 procnum);
 extern void index_store_float8_orderby_distances(IndexScanDesc scan,
-                                                                                                Oid *orderByTypes, double *distances,
+                                                                                                Oid *orderByTypes,
+                                                                                                double *distanceValues,
+                                                                                                bool *distanceNulls,
                                                                                                 bool recheckOrderBy);
 
 /*
index fc1a3115565458a3abc580e19f1b50879774f1f9..ed5b643885dcc94c559d5d29ef518b67c1cf3b82 100644 (file)
@@ -137,13 +137,30 @@ typedef struct GISTSearchItem
                /* we must store parentlsn to detect whether a split occurred */
                GISTSearchHeapItem heap;        /* heap info, if heap tuple */
        }                       data;
-       double          distances[FLEXIBLE_ARRAY_MEMBER];       /* numberOfOrderBys
-                                                                                                        * entries */
+
+       /*
+        * This data structure is followed by arrays of distance values and
+        * distance null flags.  Size of both arrays is
+        * IndexScanDesc->numberOfOrderBys. See macros below for accessing those
+        * arrays.
+        */
 } GISTSearchItem;
 
 #define GISTSearchItemIsHeap(item)     ((item).blkno == InvalidBlockNumber)
 
-#define SizeOfGISTSearchItem(n_distances) (offsetof(GISTSearchItem, distances) + sizeof(double) * (n_distances))
+#define SizeOfGISTSearchItem(n_distances) (DOUBLEALIGN(sizeof(GISTSearchItem)) + \
+       (sizeof(double) + sizeof(bool)) * (n_distances))
+
+/*
+ * We actually don't need n_distances compute pointer to distance values.
+ * Nevertheless take n_distances as argument to have same arguments list for
+ * GISTSearchItemDistanceValues() and GISTSearchItemDistanceNulls().
+ */
+#define GISTSearchItemDistanceValues(item, n_distances) \
+       ((double *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem))))
+
+#define GISTSearchItemDistanceNulls(item, n_distances) \
+       ((bool *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem)) + sizeof(double) * (n_distances)))
 
 /*
  * GISTScanOpaqueData: private state for a scan of a GiST index
@@ -159,7 +176,8 @@ typedef struct GISTScanOpaqueData
        bool            firstCall;              /* true until first gistgettuple call */
 
        /* pre-allocated workspace arrays */
-       double     *distances;          /* output area for gistindex_keytest */
+       double     *distanceValues; /* output area for gistindex_keytest */
+       bool       *distanceNulls;
 
        /* info about killed items if any (killedItems is NULL if never used) */
        OffsetNumber *killedItems;      /* offset numbers of killed items */
index 117b11a1a63ead815d9da46bf13d9edf84677507..324db1b6ae1f411d41ca5d88c7b9068feacec0e8 100644 (file)
@@ -531,8 +531,8 @@ SELECT * FROM point_tbl ORDER BY f1 <-> '0,1';
  (-5,-12)
  (5.1,34.5)
  (1e+300,Infinity)
  (NaN,NaN)
 (10 rows)
 
 EXPLAIN (COSTS OFF)