]> granicus.if.org Git - postgresql/commitdiff
Still more fixes for lossy-GiST-distance-functions patch.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 23 May 2015 19:22:25 +0000 (15:22 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 23 May 2015 19:22:25 +0000 (15:22 -0400)
Fix confusion in documentation, substantial memory leakage if float8 or
float4 are pass-by-reference, and assorted comments that were obsoleted
by commit 98edd617f3b62a02cb2df9b418fcc4ece45c7ec0.

doc/src/sgml/gist.sgml
src/backend/access/gist/gistget.c
src/backend/access/gist/gistscan.c
src/backend/executor/nodeIndexscan.c
src/include/access/relscan.h

index 4ab93252340b0cc5f5bba366137b6d461d8e7561..dd5d6f9fea05a485f1260d9c87aa72fb2ea0f27d 100644 (file)
    </tgroup>
   </table>
 
- <para>
-  Currently, ordering by the distance operator <literal>&lt;-&gt;</>
-  is supported only with <literal>point</> by the operator classes
-  of the geometric types.
- </para>
-
  <para>
   For historical reasons, the <literal>inet_ops</> operator class is
   not the default class for types <type>inet</> and <type>cidr</>.
@@ -805,28 +799,30 @@ my_distance(PG_FUNCTION_ARGS)
       </para>
 
       <para>
-       Some approximation is allowed when determining the distance, as long as
-       the result is never greater than the entry's actual distance. Thus, for
-       example, distance to a bounding box is usually sufficient in geometric
-       applications.  For an internal tree node, the distance returned must not
-       be greater than the distance to any of the child nodes. If the returned
-       distance is not accurate, the function must set *recheck to false. (This
-       is not necessary for internal tree nodes; for them, the calculation is
-       always assumed to be inaccurate). The executor will calculate the
-       accurate distance after fetching the tuple from the heap, and reorder
-       the tuples if necessary.
+       Some approximation is allowed when determining the distance, so long
+       as the result is never greater than the entry's actual distance. Thus,
+       for example, distance to a bounding box is usually sufficient in
+       geometric applications.  For an internal tree node, the distance
+       returned must not be greater than the distance to any of the child
+       nodes. If the returned distance is not exact, the function must set
+       <literal>*recheck</> to true. (This is not necessary for internal tree
+       nodes; for them, the calculation is always assumed to be inexact.) In
+       this case the executor will calculate the accurate distance after
+       fetching the tuple from the heap, and reorder the tuples if necessary.
       </para>
 
       <para>
-       If the distance function returns *recheck=true for a leaf node, the
-       original ordering operator's return type must be float8 or float4, and
-       the distance function's return value must be comparable with the actual
-       distance operator. Otherwise, the distance function's return type can
-       be any finit <type>float8</> value, as long as the relative order of
-       the returned values matches the order returned by the ordering operator.
-       (Infinity and minus infinity are used internally to handle cases such as
-       nulls, so it is not recommended that <function>distance</> functions
-       return these values.)
+       If the distance function returns <literal>*recheck = true</> for any
+       leaf node, the original ordering operator's return type must
+       be <type>float8</> or <type>float4</>, and the distance function's
+       result values must be comparable to those of the original ordering
+       operator, since the executor will sort using both distance function
+       results and recalculated ordering-operator results.  Otherwise, the
+       distance function's result values can be any finite <type>float8</>
+       values, so long as the relative order of the result values matches the
+       order returned by the ordering operator.  (Infinity and minus infinity
+       are used internally to handle cases such as nulls, so it is not
+       recommended that <function>distance</> functions return these values.)
       </para>
 
      </listitem>
@@ -857,7 +853,7 @@ LANGUAGE C STRICT;
         struct, whose 'key' field contains the same datum in the original,
         uncompressed form. If the opclass' compress function does nothing for
         leaf entries, the fetch method can return the argument as is.
-       </para> 
+       </para>
 
        <para>
         The matching code in the C module could then follow this skeleton:
index 09fba06f73e2990cef5fea693dc50cc441ae82ef..20f695cee4ed09be550817a4e480e36cbbf9b920 100644 (file)
@@ -191,20 +191,18 @@ gistindex_keytest(IndexScanDesc scan,
                        /*
                         * Call the Distance function to evaluate the distance.  The
                         * arguments are the index datum (as a GISTENTRY*), the comparison
-                        * datum, and the ordering operator's strategy number and subtype
-                        * from pg_amop.
+                        * datum, the ordering operator's strategy number and subtype from
+                        * pg_amop, and the recheck flag.
                         *
                         * (Presently there's no need to pass the subtype since it'll
                         * always be zero, but might as well pass it for possible future
                         * use.)
                         *
-                        * Distance functions get a recheck argument as well.  In this
-                        * case the returned distance is the lower bound of distance and
-                        * needs to be rechecked.  We return single recheck flag which
-                        * means that both quals and distances are to be rechecked.  We
-                        * initialize the flag to 'false'.  The flag was added in version
-                        * 9.5 and the distance operators written before that won't know
-                        * about the flag, and are never lossy.
+                        * If the function sets the recheck flag, the returned distance is
+                        * a lower bound on the true distance and needs to be rechecked.
+                        * We initialize the flag to 'false'.  This flag was added in
+                        * version 9.5; distance functions written before that won't know
+                        * about the flag, but are expected to never be lossy.
                         */
                        recheck = false;
                        dist = FunctionCall5Coll(&key->sk_func,
@@ -475,11 +473,22 @@ getNextNearest(IndexScanDesc scan)
                        {
                                if (so->orderByTypes[i] == FLOAT8OID)
                                {
+#ifndef USE_FLOAT8_BYVAL
+                                       /* must free any old value to avoid memory leakage */
+                                       if (!scan->xs_orderbynulls[i])
+                                               pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
+#endif
                                        scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]);
                                        scan->xs_orderbynulls[i] = false;
                                }
                                else if (so->orderByTypes[i] == FLOAT4OID)
                                {
+                                       /* convert distance function's result to ORDER BY type */
+#ifndef USE_FLOAT4_BYVAL
+                                       /* must free any old value to avoid memory leakage */
+                                       if (!scan->xs_orderbynulls[i])
+                                               pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
+#endif
                                        scan->xs_orderbyvals[i] = Float4GetDatum((float4) item->distances[i]);
                                        scan->xs_orderbynulls[i] = false;
                                }
@@ -491,7 +500,7 @@ getNextNearest(IndexScanDesc scan)
                                         * calculated by the distance function to that.  The
                                         * executor won't actually need the order by values we
                                         * return here, if there are no lossy results, so only
-                                        * insist on the datatype if the *recheck is set.
+                                        * insist on converting if the *recheck flag is set.
                                         */
                                        if (scan->xs_recheckorderby)
                                                elog(ERROR, "GiST operator family's FOR ORDER BY operator must return float8 or float4 if the distance function is lossy");
index d0fea2b86ce182e5184a1e2a870e9c2712f8d17d..beb402357c0924889c09794435b55181c5de0f70 100644 (file)
@@ -88,8 +88,9 @@ gistbeginscan(PG_FUNCTION_ARGS)
        so->qual_ok = true;                     /* in case there are zero keys */
        if (scan->numberOfOrderBys > 0)
        {
-               scan->xs_orderbyvals = palloc(sizeof(Datum) * scan->numberOfOrderBys);
+               scan->xs_orderbyvals = palloc0(sizeof(Datum) * scan->numberOfOrderBys);
                scan->xs_orderbynulls = palloc(sizeof(bool) * scan->numberOfOrderBys);
+               memset(scan->xs_orderbynulls, true, sizeof(bool) * scan->numberOfOrderBys);
        }
 
        scan->opaque = so;
@@ -284,6 +285,8 @@ gistrescan(PG_FUNCTION_ARGS)
                                         GIST_DISTANCE_PROC, skey->sk_attno,
                                         RelationGetRelationName(scan->indexRelation));
 
+                       fmgr_info_copy(&(skey->sk_func), finfo, so->giststate->scanCxt);
+
                        /*
                         * Look up the datatype returned by the original ordering operator.
                         * GiST always uses a float8 for the distance function, but the
@@ -297,7 +300,6 @@ gistrescan(PG_FUNCTION_ARGS)
                         * first time.
                         */
                        so->orderByTypes[i] = get_func_rettype(skey->sk_func.fn_oid);
-                       fmgr_info_copy(&(skey->sk_func), finfo, so->giststate->scanCxt);
 
                        /* Restore prior fn_extra pointers, if not first time */
                        if (!first_time)
index d79c1aa8ca98b029c78d25c4d6f84eb1b4d307da..79133e08b66230b1f602c6518a320c120b175588 100644 (file)
@@ -459,10 +459,16 @@ reorderqueue_pop(IndexScanState *node)
 {
        HeapTuple       result;
        ReorderTuple *topmost;
+       int                     i;
 
        topmost = (ReorderTuple *) pairingheap_remove_first(node->iss_ReorderQueue);
 
        result = topmost->htup;
+       for (i = 0; i < node->iss_NumOrderByKeys; i++)
+       {
+               if (!node->iss_OrderByTypByVals[i] && !topmost->orderbynulls[i])
+                       pfree(DatumGetPointer(topmost->orderbyvals[i]));
+       }
        pfree(topmost->orderbyvals);
        pfree(topmost->orderbynulls);
        pfree(topmost);
index 1b9b2993957b5beb8f228ff942423d81fb427d11..29f5b35b326630ee6679773a4186196f733580f8 100644 (file)
@@ -95,12 +95,13 @@ typedef struct IndexScanDescData
        /*
         * When fetching with an ordering operator, the values of the ORDER BY
         * expressions of the last returned tuple, according to the index.  If
-        * xs_recheck is true, these need to be rechecked just like the scan keys,
-        * and the values returned here are a lower-bound on the actual values.
+        * xs_recheckorderby is true, these need to be rechecked just like the
+        * scan keys, and the values returned here are a lower-bound on the actual
+        * values.
         */
        Datum      *xs_orderbyvals;
        bool       *xs_orderbynulls;
-       bool            xs_recheckorderby;      /* T means ORDER BY exprs must be rechecked */
+       bool            xs_recheckorderby;
 
        /* state data for traversing HOT chains in index_getnext */
        bool            xs_continue_hot;        /* T if must keep walking HOT chain */