]> granicus.if.org Git - postgresql/commitdiff
Add optional compression method to SP-GiST
authorTeodor Sigaev <teodor@sigaev.ru>
Fri, 22 Dec 2017 10:33:16 +0000 (13:33 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Fri, 22 Dec 2017 10:33:16 +0000 (13:33 +0300)
Patch allows to have different types of column and value stored in leaf tuples
of SP-GiST. The main application of feature is to transform complex column type
to simple indexed type or for truncating too long value, transformation could
be lossy.  Simple example: polygons are converted to their bounding boxes,
this opclass follows.

Authors: me, Heikki Linnakangas, Alexander Korotkov, Nikita Glukhov
Reviewed-By: all authors + Darafei Praliaskouski
Discussions:
https://www.postgresql.org/message-id/5447B3FF.2080406@sigaev.ru
https://www.postgresql.org/message-id/flat/54907069.1030506@sigaev.ru#54907069.1030506@sigaev.ru

doc/src/sgml/spgist.sgml
src/backend/access/spgist/spgdoinsert.c
src/backend/access/spgist/spgscan.c
src/backend/access/spgist/spgutils.c
src/backend/access/spgist/spgvalidate.c
src/include/access/spgist.h
src/include/access/spgist_private.h

index 139c8ed8f74e18fc2baa0aff7d570d879585cdcd..b4a8be476e79de544759e00f7ba6cf5d4a50747c 100644 (file)
 
  <para>
   There are five user-defined methods that an index operator class for
-  <acronym>SP-GiST</acronym> must provide.  All five follow the convention
-  of accepting two <type>internal</type> arguments, the first of which is a
-  pointer to a C struct containing input values for the support method,
-  while the second argument is a pointer to a C struct where output values
-  must be placed.  Four of the methods just return <type>void</type>, since
-  all their results appear in the output struct; but
+  <acronym>SP-GiST</acronym> must provide, and one is optional.  All five
+  mandatory methods follow the convention of accepting two <type>internal</type>
+  arguments, the first of which is a pointer to a C struct containing input
+  values for the support method, while the second argument is a pointer to a
+  C struct where output values must be placed.  Four of the mandatory methods just
+  return <type>void</type>, since all their results appear in the output struct; but
   <function>leaf_consistent</function> additionally returns a <type>boolean</type> result.
   The methods must not modify any fields of their input structs.  In all
   cases, the output struct is initialized to zeroes before calling the
-  user-defined method.
+  user-defined method.  Optional sixth method <function>compress</function>
+  accepts datum to be indexed as the only argument and returns value suitable
+  for physical storage in leaf tuple.
  </para>
 
  <para>
-  The five user-defined methods are:
+  The five mandatory user-defined methods are:
  </para>
 
  <variablelist>
@@ -283,6 +285,7 @@ typedef struct spgConfigOut
 {
     Oid         prefixType;     /* Data type of inner-tuple prefixes */
     Oid         labelType;      /* Data type of inner-tuple node labels */
+    Oid         leafType;       /* Data type of leaf-tuple values */
     bool        canReturnData;  /* Opclass can reconstruct original data */
     bool        longValuesOK;   /* Opclass can cope with values &gt; 1 page */
 } spgConfigOut;
@@ -305,6 +308,22 @@ typedef struct spgConfigOut
       class is capable of segmenting long values by repeated suffixing
       (see <xref linkend="spgist-limits"/>).
      </para>
+
+     <para>
+      <structfield>leafType</structfield> is typically the same as
+      <structfield>attType</structfield>.  For the reasons of backward
+      compatibility, method <function>config</function> can
+      leave <structfield>leafType</structfield> uninitialized; that would
+      give the same effect as setting <structfield>leafType</structfield> equal
+      to <structfield>attType</structfield>.  When <structfield>attType</structfield>
+      and <structfield>leafType</structfield> are different, then optional
+      method <function>compress</function> must be provided.
+      Method <function>compress</function> is responsible
+      for transformation of datums to be indexed from <structfield>attType</structfield>
+      to <structfield>leafType</structfield>.
+      Note: both consistent functions will get <structfield>scankeys</structfield>
+      unchanged, without transformation using <function>compress</function>.
+     </para>
      </listitem>
     </varlistentry>
 
@@ -380,10 +399,16 @@ typedef struct spgChooseOut
 } spgChooseOut;
 </programlisting>
 
-       <structfield>datum</structfield> is the original datum that was to be inserted
-       into the index.
-       <structfield>leafDatum</structfield> is initially the same as
-       <structfield>datum</structfield>, but can change at lower levels of the tree
+       <structfield>datum</structfield> is the original datum of
+       <structname>spgConfigIn</structname>.<structfield>attType</structfield>
+       type that was to be inserted into the index.
+       <structfield>leafDatum</structfield> is a value of
+       <structname>spgConfigOut</structname>.<structfield>leafType</structfield>
+       type which is initially an result of method
+       <function>compress</function> applied to <structfield>datum</structfield>
+       when method <function>compress</function> is provided, or same value as
+       <structfield>datum</structfield> otherwise.
+       <structfield>leafDatum</structfield> can change at lower levels of the tree
        if the <function>choose</function> or <function>picksplit</function>
        methods change it.  When the insertion search reaches a leaf page,
        the current value of <structfield>leafDatum</structfield> is what will be stored
@@ -418,7 +443,7 @@ typedef struct spgChooseOut
        Set <structfield>levelAdd</structfield> to the increment in
        <structfield>level</structfield> caused by descending through that node,
        or leave it as zero if the operator class does not use levels.
-       Set <structfield>restDatum</structfield> to equal <structfield>datum</structfield>
+       Set <structfield>restDatum</structfield> to equal <structfield>leafDatum</structfield>
        if the operator class does not modify datums from one level to the
        next, or otherwise set it to the modified value to be used as
        <structfield>leafDatum</structfield> at the next level.
@@ -509,7 +534,9 @@ typedef struct spgPickSplitOut
 </programlisting>
 
        <structfield>nTuples</structfield> is the number of leaf tuples provided.
-       <structfield>datums</structfield> is an array of their datum values.
+       <structfield>datums</structfield> is an array of their datum values of
+       <structname>spgConfigOut</structname>.<structfield>leafType</structfield>
+       type.
        <structfield>level</structfield> is the current level that all the leaf tuples
        share, which will become the level of the new inner tuple.
       </para>
@@ -624,7 +651,8 @@ typedef struct spgInnerConsistentOut
        <structfield>reconstructedValue</structfield> is the value reconstructed for the
        parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
        <function>inner_consistent</function> function did not provide a value at the
-       parent level.
+       parent level. <structfield>reconstructedValue</structfield> is always of
+       <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type.
        <structfield>traversalValue</structfield> is a pointer to any traverse data
        passed down from the previous call of <function>inner_consistent</function>
        on the parent index tuple, or NULL at the root level.
@@ -659,6 +687,7 @@ typedef struct spgInnerConsistentOut
        necessarily so, so an array is used.)
        If value reconstruction is needed, set
        <structfield>reconstructedValues</structfield> to an array of the values
+       of <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type
        reconstructed for each child node to be visited; otherwise, leave
        <structfield>reconstructedValues</structfield> as NULL.
        If it is desired to pass down additional out-of-band information
@@ -730,7 +759,8 @@ typedef struct spgLeafConsistentOut
        <structfield>reconstructedValue</structfield> is the value reconstructed for the
        parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
        <function>inner_consistent</function> function did not provide a value at the
-       parent level.
+       parent level. <structfield>reconstructedValue</structfield> is always of
+       <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type. 
        <structfield>traversalValue</structfield> is a pointer to any traverse data
        passed down from the previous call of <function>inner_consistent</function>
        on the parent index tuple, or NULL at the root level.
@@ -739,16 +769,18 @@ typedef struct spgLeafConsistentOut
        <structfield>returnData</structfield> is <literal>true</literal> if reconstructed data is
        required for this query; this will only be so if the
        <function>config</function> function asserted <structfield>canReturnData</structfield>.
-       <structfield>leafDatum</structfield> is the key value stored in the current
-       leaf tuple.
+       <structfield>leafDatum</structfield> is the key value of
+       <structname>spgConfigOut</structname>.<structfield>leafType</structfield>
+       stored in the current leaf tuple.
       </para>
 
       <para>
        The function must return <literal>true</literal> if the leaf tuple matches the
        query, or <literal>false</literal> if not.  In the <literal>true</literal> case,
        if <structfield>returnData</structfield> is <literal>true</literal> then
-       <structfield>leafValue</structfield> must be set to the value originally supplied
-       to be indexed for this leaf tuple.  Also,
+       <structfield>leafValue</structfield> must be set to the value of
+       <structname>spgConfigIn</structname>.<structfield>attType</structfield> type
+       originally supplied to be indexed for this leaf tuple.  Also,
        <structfield>recheck</structfield> may be set to <literal>true</literal> if the match
        is uncertain and so the operator(s) must be re-applied to the actual
        heap tuple to verify the match.
@@ -757,6 +789,26 @@ typedef struct spgLeafConsistentOut
     </varlistentry>
    </variablelist>
 
+ <para>
+  The optional user-defined method is:
+ </para>
+
+ <variablelist>
+    <varlistentry>
+     <term><function>Datum compress(Datum in)</function></term>
+     <listitem>
+      <para>
+       Converts the data item into a format suitable for physical storage in 
+       a leaf tuple of index page.  It accepts
+       <structname>spgConfigIn</structname>.<structfield>attType</structfield>
+       value and return
+       <structname>spgConfigOut</structname>.<structfield>leafType</structfield>
+       value.  Output value should not be toasted.
+      </para>
+     </listitem>
+    </varlistentry>
+  </variablelist>
+
   <para>
    All the SP-GiST support methods are normally called in a short-lived
    memory context; that is, <varname>CurrentMemoryContext</varname> will be reset
index a5f4c4059c8c8af7620e0bed49b7c009efb17d4b..a8cb8c7bdca418c8d3827702d00eb960915bb331 100644 (file)
@@ -1906,14 +1906,37 @@ spgdoinsert(Relation index, SpGistState *state,
                procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC);
 
        /*
-        * Since we don't use index_form_tuple in this AM, we have to make sure
+        * Prepare the leaf datum to insert.
+        *
+        * If an optional "compress" method is provided, then call it to form
+        * the leaf datum from the input datum.  Otherwise store the input datum as
+        * is.  Since we don't use index_form_tuple in this AM, we have to make sure
         * value to be inserted is not toasted; FormIndexDatum doesn't guarantee
-        * that.
+        * that.  But we assume the "compress" method to return an untoasted value.
         */
-       if (!isnull && state->attType.attlen == -1)
-               datum = PointerGetDatum(PG_DETOAST_DATUM(datum));
+       if (!isnull)
+       {
+               if (OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
+               {
+                       FmgrInfo   *compressProcinfo = NULL;
+
+                       compressProcinfo = index_getprocinfo(index, 1, SPGIST_COMPRESS_PROC);
+                       leafDatum = FunctionCall1Coll(compressProcinfo,
+                                                                                 index->rd_indcollation[0],
+                                                                                 datum);
+               }
+               else
+               {
+                       Assert(state->attLeafType.type == state->attType.type);
 
-       leafDatum = datum;
+                       if (state->attType.attlen == -1)
+                               leafDatum = PointerGetDatum(PG_DETOAST_DATUM(datum));
+                       else
+                               leafDatum = datum;
+               }
+       }
+       else
+               leafDatum = (Datum) 0;
 
        /*
         * Compute space needed for a leaf tuple containing the given datum.
@@ -1923,7 +1946,7 @@ spgdoinsert(Relation index, SpGistState *state,
         */
        if (!isnull)
                leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
-                       SpGistGetTypeSize(&state->attType, leafDatum);
+                       SpGistGetTypeSize(&state->attLeafType, leafDatum);
        else
                leafSize = SGDTSIZE + sizeof(ItemIdData);
 
@@ -2138,7 +2161,7 @@ spgdoinsert(Relation index, SpGistState *state,
                                        {
                                                leafDatum = out.result.matchNode.restDatum;
                                                leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
-                                                       SpGistGetTypeSize(&state->attType, leafDatum);
+                                                       SpGistGetTypeSize(&state->attLeafType, leafDatum);
                                        }
 
                                        /*
index 7965b5846d9c85ddd880b55d71366911e87aa32f..c64a174143b3a6d3a9bc282994716cf9883deab6 100644 (file)
@@ -40,7 +40,7 @@ typedef struct ScanStackEntry
 static void
 freeScanStackEntry(SpGistScanOpaque so, ScanStackEntry *stackEntry)
 {
-       if (!so->state.attType.attbyval &&
+       if (!so->state.attLeafType.attbyval &&
                DatumGetPointer(stackEntry->reconstructedValue) != NULL)
                pfree(DatumGetPointer(stackEntry->reconstructedValue));
        if (stackEntry->traversalValue)
@@ -527,8 +527,8 @@ redirect:
                                        if (out.reconstructedValues)
                                                newEntry->reconstructedValue =
                                                        datumCopy(out.reconstructedValues[i],
-                                                                         so->state.attType.attbyval,
-                                                                         so->state.attType.attlen);
+                                                                         so->state.attLeafType.attbyval,
+                                                                         so->state.attLeafType.attlen);
                                        else
                                                newEntry->reconstructedValue = (Datum) 0;
 
index bd5301f383ad943e2c33927564fb9ad8e1f54c8b..e571f0cce0aadb09f3517716d204a9266a7e21a8 100644 (file)
@@ -125,6 +125,22 @@ spgGetCache(Relation index)
 
                /* Get the information we need about each relevant datatype */
                fillTypeDesc(&cache->attType, atttype);
+
+               if (OidIsValid(cache->config.leafType) &&
+                       cache->config.leafType != atttype)
+               {
+                       if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                errmsg("compress method must not defined when leaf type is different from input type")));
+
+                       fillTypeDesc(&cache->attLeafType, cache->config.leafType);
+               }
+               else
+               {
+                       cache->attLeafType = cache->attType;
+               }
+
                fillTypeDesc(&cache->attPrefixType, cache->config.prefixType);
                fillTypeDesc(&cache->attLabelType, cache->config.labelType);
 
@@ -164,6 +180,7 @@ initSpGistState(SpGistState *state, Relation index)
 
        state->config = cache->config;
        state->attType = cache->attType;
+       state->attLeafType = cache->attLeafType;
        state->attPrefixType = cache->attPrefixType;
        state->attLabelType = cache->attLabelType;
 
@@ -618,7 +635,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
        /* compute space needed (note result is already maxaligned) */
        size = SGLTHDRSZ;
        if (!isnull)
-               size += SpGistGetTypeSize(&state->attType, datum);
+               size += SpGistGetTypeSize(&state->attLeafType, datum);
 
        /*
         * Ensure that we can replace the tuple with a dead tuple later.  This
@@ -634,7 +651,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
        tup->nextOffset = InvalidOffsetNumber;
        tup->heapPtr = *heapPtr;
        if (!isnull)
-               memcpyDatum(SGLTDATAPTR(tup), &state->attType, datum);
+               memcpyDatum(SGLTDATAPTR(tup), &state->attLeafType, datum);
 
        return tup;
 }
index 157cf2a0283fc9f618c0c8f1481cd777695aca02..440b3ce917898b1db0a7d1854d5669bd9e52e9bc 100644 (file)
@@ -22,6 +22,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_type.h"
 #include "utils/builtins.h"
+#include "utils/lsyscache.h"
 #include "utils/regproc.h"
 #include "utils/syscache.h"
 
@@ -52,6 +53,10 @@ spgvalidate(Oid opclassoid)
        OpFamilyOpFuncGroup *opclassgroup;
        int                     i;
        ListCell   *lc;
+       spgConfigIn     configIn;
+       spgConfigOut configOut;
+       Oid                     configOutLefttype = InvalidOid;
+       Oid                     configOutRighttype = InvalidOid;
 
        /* Fetch opclass information */
        classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
@@ -74,6 +79,7 @@ spgvalidate(Oid opclassoid)
        /* Fetch all operators and support functions of the opfamily */
        oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
        proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
+       grouplist = identify_opfamily_groups(oprlist, proclist);
 
        /* Check individual support functions */
        for (i = 0; i < proclist->n_members; i++)
@@ -100,6 +106,40 @@ spgvalidate(Oid opclassoid)
                switch (procform->amprocnum)
                {
                        case SPGIST_CONFIG_PROC:
+                               ok = check_amproc_signature(procform->amproc, VOIDOID, true,
+                                                                                       2, 2, INTERNALOID, INTERNALOID);
+                               configIn.attType = procform->amproclefttype;
+                               memset(&configOut, 0, sizeof(configOut));
+
+                               OidFunctionCall2(procform->amproc,
+                                                                PointerGetDatum(&configIn),
+                                                                PointerGetDatum(&configOut));
+
+                               configOutLefttype = procform->amproclefttype;
+                               configOutRighttype = procform->amprocrighttype;
+
+                               /*
+                                * When leaf and attribute types are the same, compress function
+                                * is not required and we set corresponding bit in functionset
+                                * for later group consistency check.
+                                */
+                               if (!OidIsValid(configOut.leafType) ||
+                                       configOut.leafType == configIn.attType)
+                               {
+                                       foreach(lc, grouplist)
+                                       {
+                                               OpFamilyOpFuncGroup *group = lfirst(lc);
+
+                                               if (group->lefttype == procform->amproclefttype &&
+                                                       group->righttype == procform->amprocrighttype)
+                                               {
+                                                       group->functionset |=
+                                                               ((uint64) 1) << SPGIST_COMPRESS_PROC;
+                                                       break;
+                                               }
+                                       }
+                               }
+                               break;
                        case SPGIST_CHOOSE_PROC:
                        case SPGIST_PICKSPLIT_PROC:
                        case SPGIST_INNER_CONSISTENT_PROC:
@@ -110,6 +150,15 @@ spgvalidate(Oid opclassoid)
                                ok = check_amproc_signature(procform->amproc, BOOLOID, true,
                                                                                        2, 2, INTERNALOID, INTERNALOID);
                                break;
+                       case SPGIST_COMPRESS_PROC:
+                               if (configOutLefttype != procform->amproclefttype ||
+                                       configOutRighttype != procform->amprocrighttype)
+                                       ok = false;
+                               else
+                                       ok = check_amproc_signature(procform->amproc,
+                                                                                               configOut.leafType, true,
+                                                                                               1, 1, procform->amproclefttype);
+                               break;
                        default:
                                ereport(INFO,
                                                (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -178,7 +227,6 @@ spgvalidate(Oid opclassoid)
        }
 
        /* Now check for inconsistent groups of operators/functions */
-       grouplist = identify_opfamily_groups(oprlist, proclist);
        opclassgroup = NULL;
        foreach(lc, grouplist)
        {
index d1bc396e6df31d6e9c09c172e8b00da2c08768f4..06b1d88e5a7cf032982b3e326de58edac21dc686 100644 (file)
@@ -30,7 +30,9 @@
 #define SPGIST_PICKSPLIT_PROC                  3
 #define SPGIST_INNER_CONSISTENT_PROC   4
 #define SPGIST_LEAF_CONSISTENT_PROC            5
-#define SPGISTNProc                                            5
+#define SPGIST_COMPRESS_PROC                   6
+#define SPGISTNRequiredProc                            5
+#define SPGISTNProc                                            6
 
 /*
  * Argument structs for spg_config method
@@ -44,6 +46,7 @@ typedef struct spgConfigOut
 {
        Oid                     prefixType;             /* Data type of inner-tuple prefixes */
        Oid                     labelType;              /* Data type of inner-tuple node labels */
+       Oid                     leafType;               /* Data type of leaf-tuple values */
        bool            canReturnData;  /* Opclass can reconstruct original data */
        bool            longValuesOK;   /* Opclass can cope with values > 1 page */
 } spgConfigOut;
index 1c4b321b6c653140690a525c3b6abe88fba47626..e55de9dc54840d1f75d51525cb8acdb80d50c21c 100644 (file)
@@ -119,7 +119,8 @@ typedef struct SpGistState
 {
        spgConfigOut config;            /* filled in by opclass config method */
 
-       SpGistTypeDesc attType;         /* type of input data and leaf values */
+       SpGistTypeDesc attType;         /* type of values to be indexed/restored */
+       SpGistTypeDesc attLeafType;             /* type of leaf-tuple values */
        SpGistTypeDesc attPrefixType;   /* type of inner-tuple prefix values */
        SpGistTypeDesc attLabelType;    /* type of node label values */
 
@@ -178,7 +179,8 @@ typedef struct SpGistCache
 {
        spgConfigOut config;            /* filled in by opclass config method */
 
-       SpGistTypeDesc attType;         /* type of input data and leaf values */
+       SpGistTypeDesc attType;         /* type of values to be indexed/restored */
+       SpGistTypeDesc attLeafType;             /* type of leaf-tuple values */
        SpGistTypeDesc attPrefixType;   /* type of inner-tuple prefix values */
        SpGistTypeDesc attLabelType;    /* type of node label values */
 
@@ -300,7 +302,7 @@ typedef SpGistLeafTupleData *SpGistLeafTuple;
 
 #define SGLTHDRSZ                      MAXALIGN(sizeof(SpGistLeafTupleData))
 #define SGLTDATAPTR(x)         (((char *) (x)) + SGLTHDRSZ)
-#define SGLTDATUM(x, s)                ((s)->attType.attbyval ? \
+#define SGLTDATUM(x, s)                ((s)->attLeafType.attbyval ? \
                                                         *(Datum *) SGLTDATAPTR(x) : \
                                                         PointerGetDatum(SGLTDATAPTR(x)))