]> granicus.if.org Git - postgresql/commitdiff
Improve handling of NULLs in KNN-GiST and KNN-SP-GiST
authorAlexander Korotkov <akorotkov@postgresql.org>
Thu, 19 Sep 2019 18:30:19 +0000 (21:30 +0300)
committerAlexander Korotkov <akorotkov@postgresql.org>
Thu, 19 Sep 2019 19:09:51 +0000 (22:09 +0300)
This commit improves subject in two ways:

 * It removes ugliness of 02f90879e7, which stores distance values and null
   flags in two separate arrays after GISTSearchItem struct.  Instead we pack
   both distance value and null flag in IndexOrderByDistance struct.  Alignment
   overhead should be negligible, because we typically deal with at most few
   "col op const" expressions in ORDER BY clause.
 * It fixes handling of "col op NULL" expression in KNN-SP-GiST.  Now, these
   expression are not passed to support functions, which can't deal with them.
   Instead, NULL result is implicitly assumed.  It future we may decide to
   teach support functions to deal with NULL arguments, but current solution is
   bugfix suitable for backpatch.

Reported-by: Nikita Glukhov
Discussion: https://postgr.es/m/826f57ee-afc7-8977-c44c-6111d18b02ec%40postgrespro.ru
Author: Nikita Glukhov
Reviewed-by: Alexander Korotkov
Backpatch-through: 9.4

src/backend/access/gist/gistget.c
src/backend/access/gist/gistscan.c
src/include/access/genam.h
src/include/access/gist_private.h
src/tools/pgindent/typedefs.list

index b2ec971156dd8f97ef1879d6c7a33e4a425b583a..93419f937be3ea45dce606bf9a8c92c043e7d347 100644 (file)
@@ -38,9 +38,8 @@
  * Similarly, *recheck_distances_p is set to indicate whether the distances
  * need to be rechecked, and it is also ignored for non-leaf entries.
  *
- * If we are doing an ordered scan, so->distancesValues[] and
- * so->distancesNulls[] is filled with distance data from the distance()
- * functions before returning success.
+ * If we are doing an ordered scan, so->distances[] is filled with distance
+ * data from the distance() functions before returning success.
  *
  * We must decompress the key in the IndexTuple before passing it to the
  * sk_funcs (which actually are the opclass Consistent or Distance methods).
@@ -61,8 +60,7 @@ gistindex_keytest(IndexScanDesc scan,
        GISTSTATE  *giststate = so->giststate;
        ScanKey         key = scan->keyData;
        int                     keySize = scan->numberOfKeys;
-       double     *distance_value_p;
-       bool       *distance_null_p;
+       IndexOrderByDistance *distance_p;
        Relation        r = scan->indexRelation;
 
        *recheck_p = false;
@@ -81,8 +79,8 @@ gistindex_keytest(IndexScanDesc scan,
                        elog(ERROR, "invalid GiST tuple found on leaf page");
                for (i = 0; i < scan->numberOfOrderBys; i++)
                {
-                       so->distanceValues[i] = -get_float8_infinity();
-                       so->distanceNulls[i] = false;
+                       so->distances[i].value = -get_float8_infinity();
+                       so->distances[i].isnull = false;
                }
                return true;
        }
@@ -166,8 +164,7 @@ gistindex_keytest(IndexScanDesc scan,
 
        /* OK, it passes --- now let's compute the distances */
        key = scan->orderByData;
-       distance_value_p = so->distanceValues;
-       distance_null_p = so->distanceNulls;
+       distance_p = so->distances;
        keySize = scan->numberOfOrderBys;
        while (keySize > 0)
        {
@@ -182,8 +179,8 @@ gistindex_keytest(IndexScanDesc scan,
                if ((key->sk_flags & SK_ISNULL) || isNull)
                {
                        /* Assume distance computes as null */
-                       *distance_value_p = 0.0;
-                       *distance_null_p = true;
+                       distance_p->value = 0.0;
+                       distance_p->isnull = true;
                }
                else
                {
@@ -220,13 +217,12 @@ gistindex_keytest(IndexScanDesc scan,
                                                                         ObjectIdGetDatum(key->sk_subtype),
                                                                         PointerGetDatum(&recheck));
                        *recheck_distances_p |= recheck;
-                       *distance_value_p = DatumGetFloat8(dist);
-                       *distance_null_p = false;
+                       distance_p->value = DatumGetFloat8(dist);
+                       distance_p->isnull = false;
                }
 
                key++;
-               distance_value_p++;
-               distance_null_p++;
+               distance_p++;
                keySize--;
        }
 
@@ -239,8 +235,7 @@ gistindex_keytest(IndexScanDesc scan,
  *
  * scan: index scan we are executing
  * pageItem: search queue item identifying an index page to scan
- * myDistanceValues: distances array associated with pageItem, or NULL at the root
- * myDistanceNulls: null flags for myDistanceValues array, or NULL at the root
+ * myDistances: distances array associated with pageItem, or NULL at the root
  * tbm: if not NULL, gistgetbitmap's output bitmap
  * ntids: if not NULL, gistgetbitmap's output tuple counter
  *
@@ -258,8 +253,7 @@ gistindex_keytest(IndexScanDesc scan,
  */
 static void
 gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
-                        double *myDistanceValues, bool *myDistanceNulls,
-                        TIDBitmap *tbm, int64 *ntids)
+                        IndexOrderByDistance *myDistances, TIDBitmap *tbm, int64 *ntids)
 {
        GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
        GISTSTATE  *giststate = so->giststate;
@@ -294,7 +288,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
                GISTSearchItem *item;
 
                /* This can't happen when starting at the root */
-               Assert(myDistanceValues != NULL && myDistanceNulls != NULL);
+               Assert(myDistances != NULL);
 
                oldcxt = MemoryContextSwitchTo(so->queueCxt);
 
@@ -304,10 +298,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
                item->data.parentlsn = pageItem->data.parentlsn;
 
                /* Insert it into the queue using same distances as for this page */
-               memcpy(GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
-                          myDistanceValues, sizeof(double) * scan->numberOfOrderBys);
-               memcpy(GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
-                          myDistanceNulls, sizeof(bool) * scan->numberOfOrderBys);
+               memcpy(item->distances, myDistances,
+                          sizeof(item->distances[0]) * scan->numberOfOrderBys);
 
                pairingheap_add(so->queue, &item->phNode);
 
@@ -418,10 +410,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
                        }
 
                        /* Insert it into the queue using new distance data */
-                       memcpy(GISTSearchItemDistanceValues(item, nOrderBys),
-                                  so->distanceValues, sizeof(double) * nOrderBys);
-                       memcpy(GISTSearchItemDistanceNulls(item, nOrderBys),
-                                  so->distanceNulls, sizeof(bool) * nOrderBys);
+                       memcpy(item->distances, so->distances,
+                                  sizeof(item->distances[0]) * nOrderBys);
 
                        pairingheap_add(so->queue, &item->phNode);
 
@@ -476,8 +466,6 @@ getNextNearest(IndexScanDesc scan)
        do
        {
                GISTSearchItem *item = getNextGISTSearchItem(so);
-               float8 *distanceValues = GISTSearchItemDistanceValues(item, scan->numberOfOrderBys);
-               bool *distanceNulls = GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys);
 
                if (!item)
                        break;
@@ -497,8 +485,8 @@ getNextNearest(IndexScanDesc scan)
                                        if (!scan->xs_orderbynulls[i])
                                                pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
 #endif
-                                       scan->xs_orderbyvals[i] = Float8GetDatum(distanceValues[i]);
-                                       scan->xs_orderbynulls[i] = distanceNulls[i];
+                                       scan->xs_orderbyvals[i] = item->distances[i].value;
+                                       scan->xs_orderbynulls[i] = item->distances[i].isnull;
                                }
                                else if (so->orderByTypes[i] == FLOAT4OID)
                                {
@@ -508,8 +496,8 @@ getNextNearest(IndexScanDesc scan)
                                        if (!scan->xs_orderbynulls[i])
                                                pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
 #endif
-                                       scan->xs_orderbyvals[i] = Float4GetDatum(distanceValues[i]);
-                                       scan->xs_orderbynulls[i] = distanceNulls[i];
+                                       scan->xs_orderbyvals[i] = Float4GetDatum(item->distances[i].value);
+                                       scan->xs_orderbynulls[i] = item->distances[i].isnull;
                                }
                                else
                                {
@@ -537,10 +525,7 @@ getNextNearest(IndexScanDesc scan)
                        /* visit an index page, extract its items into queue */
                        CHECK_FOR_INTERRUPTS();
 
-                       gistScanPage(scan, item,
-                                                GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
-                                                GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
-                                                NULL, NULL);
+                       gistScanPage(scan, item, item->distances, NULL, NULL);
                }
 
                pfree(item);
@@ -580,7 +565,7 @@ gistgettuple(PG_FUNCTION_ARGS)
 
                fakeItem.blkno = GIST_ROOT_BLKNO;
                memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
-               gistScanPage(scan, &fakeItem, NULL, NULL, NULL, NULL);
+               gistScanPage(scan, &fakeItem, NULL, NULL, NULL);
        }
 
        if (scan->numberOfOrderBys > 0)
@@ -625,10 +610,7 @@ gistgettuple(PG_FUNCTION_ARGS)
                                 * this page, we fall out of the inner "do" and loop around to
                                 * return them.
                                 */
-                               gistScanPage(scan, item,
-                                                        GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
-                                                        GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
-                                                        NULL, NULL);
+                               gistScanPage(scan, item, item->distances, NULL, NULL);
 
                                pfree(item);
                        } while (so->nPageData == 0);
@@ -661,7 +643,7 @@ gistgetbitmap(PG_FUNCTION_ARGS)
 
        fakeItem.blkno = GIST_ROOT_BLKNO;
        memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
-       gistScanPage(scan, &fakeItem, NULL, NULL, tbm, &ntids);
+       gistScanPage(scan, &fakeItem, NULL, tbm, &ntids);
 
        /*
         * While scanning a leaf page, ItemPointers of matching heap tuples will
@@ -676,10 +658,7 @@ gistgetbitmap(PG_FUNCTION_ARGS)
 
                CHECK_FOR_INTERRUPTS();
 
-               gistScanPage(scan, item,
-                                        GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
-                                        GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
-                                        tbm, &ntids);
+               gistScanPage(scan, item, item->distances, tbm, &ntids);
 
                pfree(item);
        }
index e805cf2c1b7e0c4e7fb64493d9dd4a4e93d1eccb..ed16a85145da5222b732064cfc54747263e21d37 100644 (file)
@@ -33,26 +33,23 @@ pairingheap_GISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node
        const GISTSearchItem *sb = (const GISTSearchItem *) b;
        IndexScanDesc scan = (IndexScanDesc) arg;
        int                     i;
-       double     *da = GISTSearchItemDistanceValues(sa, scan->numberOfOrderBys),
-                          *db = GISTSearchItemDistanceValues(sb, scan->numberOfOrderBys);
-       bool       *na = GISTSearchItemDistanceNulls(sa, scan->numberOfOrderBys),
-                          *nb = GISTSearchItemDistanceNulls(sb, scan->numberOfOrderBys);
 
        /* Order according to distance comparison */
        for (i = 0; i < scan->numberOfOrderBys; i++)
        {
-               if (na[i])
+               if (sa->distances[i].isnull)
                {
-                       if (!nb[i])
+                       if (!sb->distances[i].isnull)
                                return -1;
                }
-               else if (nb[i])
+               else if (sb->distances[i].isnull)
                {
                        return 1;
                }
                else
                {
-                       int                     cmp = -float8_cmp_internal(da[i], db[i]);
+                       int                     cmp = -float8_cmp_internal(sa->distances[i].value,
+                                                                                                  sb->distances[i].value);
 
                        if (cmp != 0)
                                return cmp;
@@ -103,8 +100,7 @@ gistbeginscan(PG_FUNCTION_ARGS)
        so->queueCxt = giststate->scanCxt;      /* see gistrescan */
 
        /* workspaces with size dependent on numberOfOrderBys: */
-       so->distanceValues = palloc(sizeof(double) * scan->numberOfOrderBys);
-       so->distanceNulls = palloc(sizeof(bool) * scan->numberOfOrderBys);
+       so->distances = palloc(sizeof(so->distances[0]) * scan->numberOfOrderBys);
        so->qual_ok = true;                     /* in case there are zero keys */
        if (scan->numberOfOrderBys > 0)
        {
index d86590ac111e6064c06a85b4dbc7b9979b9192b2..da5490a00a1d4110d35dbc5b355f9762520309d3 100644 (file)
@@ -112,6 +112,13 @@ typedef enum IndexUniqueCheck
 } IndexUniqueCheck;
 
 
+/* Nullable "ORDER BY col op const" distance */
+typedef struct IndexOrderByDistance
+{
+       double          value;
+       bool            isnull;
+} IndexOrderByDistance;
+
 /*
  * generalized index_ interface routines (in indexam.c)
  */
index 36172262383e5f529e875bd4c9af4fcff4531d86..35cbfcb282f056036fb584dfbefa913868b697e5 100644 (file)
@@ -14,6 +14,7 @@
 #ifndef GIST_PRIVATE_H
 #define GIST_PRIVATE_H
 
+#include "access/genam.h"
 #include "access/gist.h"
 #include "access/itup.h"
 #include "access/xlogreader.h"
@@ -134,29 +135,15 @@ typedef struct GISTSearchItem
                GISTSearchHeapItem heap;        /* heap info, if heap tuple */
        }                       data;
 
-       /*
-        * This data structure is followed by arrays of distance values and
-        * distance null flags.  Size of both arrays is
-        * IndexScanDesc->numberOfOrderBys. See macros below for accessing those
-        * arrays.
-        */
+       /* numberOfOrderBys entries */
+       IndexOrderByDistance distances[FLEXIBLE_ARRAY_MEMBER];
 } GISTSearchItem;
 
 #define GISTSearchItemIsHeap(item)     ((item).blkno == InvalidBlockNumber)
 
-#define SizeOfGISTSearchItem(n_distances) (DOUBLEALIGN(sizeof(GISTSearchItem)) + \
-       (sizeof(double) + sizeof(bool)) * (n_distances))
-
-/*
- * We actually don't need n_distances compute pointer to distance values.
- * Nevertheless take n_distances as argument to have same arguments list for
- * GISTSearchItemDistanceValues() and GISTSearchItemDistanceNulls().
- */
-#define GISTSearchItemDistanceValues(item, n_distances) \
-       ((double *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem))))
-
-#define GISTSearchItemDistanceNulls(item, n_distances) \
-       ((bool *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem)) + sizeof(double) * (n_distances)))
+#define SizeOfGISTSearchItem(n_distances) \
+       (offsetof(GISTSearchItem, distances) + \
+        sizeof(IndexOrderByDistance) * (n_distances))
 
 /*
  * GISTScanOpaqueData: private state for a scan of a GiST index
@@ -172,8 +159,7 @@ typedef struct GISTScanOpaqueData
        bool            firstCall;              /* true until first gistgettuple call */
 
        /* pre-allocated workspace arrays */
-       double     *distanceValues; /* output area for gistindex_keytest */
-       bool       *distanceNulls;
+       IndexOrderByDistance *distances;        /* output area for gistindex_keytest */
 
        /* In a non-ordered search, returnable heap items are stored here: */
        GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
index 23ba334076ed2d4f74d71c1c76d95a3b04c07d21..42b7492ae771d6d3926fdb7c5878c2777cf308d8 100644 (file)
@@ -892,6 +892,7 @@ IndexList
 IndexOnlyScan
 IndexOnlyScanState
 IndexOptInfo
+IndexOrderByDistance
 IndexPath
 IndexQualInfo
 IndexRuntimeKeyInfo