Fix infinite loop when splitting inner tuples in SPGiST text indexes.

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 9 Jun 2014 20:30:40 +0000 (16:30 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 9 Jun 2014 20:31:11 +0000 (16:31 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 9 Jun 2014 20:30:40 +0000 (16:30 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 9 Jun 2014 20:31:11 +0000 (16:31 -0400)
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c

index 5b7a5a06a0f34811b6f947e19ccd071a59e92ef9..1ea1dd1413f9bb7949aa8fdf1cfa3a724bd6af14 100644 (file)
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -3,6 +3,31 @@
   * spgtextproc.c
   *       implementation of radix tree (compressed trie) over text
   *
+ * In a text_ops SPGiST index, inner tuples can have a prefix which is the
+ * common prefix of all strings indexed under that tuple.  The node labels
+ * represent the next byte of the string(s) after the prefix.  Assuming we
+ * always use the longest possible prefix, we will get more than one node
+ * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH.
+ *
+ * To reconstruct the indexed string for any index entry, concatenate the
+ * inner-tuple prefixes and node labels starting at the root and working
+ * down to the leaf entry, then append the datum in the leaf entry.
+ * (While descending the tree, "level" is the number of bytes reconstructed
+ * so far.)
+ *
+ * However, there are two special cases for node labels: -1 indicates that
+ * there are no more bytes after the prefix-so-far, and -2 indicates that we
+ * had to split an existing allTheSame tuple (in such a case we have to create
+ * a node label that doesn't correspond to any string byte).  In either case,
+ * the node label does not contribute anything to the reconstructed string.
+ *
+ * Previously, we used a node label of zero for both special cases, but
+ * this was problematic because one can't tell whether a string ending at
+ * the current level can be pushed down into such a child node.  For
+ * backwards compatibility, we still support such node labels for reading;
+ * but no new entries will ever be pushed down into a zero-labeled child.
+ * No new entries ever get pushed into a -2-labeled child, either.
+ *
   *
   * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
@@ -24,28 +49,29 @@
  
  /*
   * In the worst case, an inner tuple in a text radix tree could have as many
- * as 256 nodes (one for each possible byte value).  Each node can take 16
- * bytes on MAXALIGN=8 machines.  The inner tuple must fit on an index page
- * of size BLCKSZ.  Rather than assuming we know the exact amount of overhead
- * imposed by page headers, tuple headers, etc, we leave 100 bytes for that
- * (the actual overhead should be no more than 56 bytes at this writing, so
- * there is slop in this number).  So we can safely create prefixes up to
- * BLCKSZ - 256 * 16 - 100 bytes long.  Unfortunately, because 256 * 16 is
- * already 4K, there is no safe prefix length when BLCKSZ is less than 8K;
- * it is always possible to get "SPGiST inner tuple size exceeds maximum"
- * if there are too many distinct next-byte values at a given place in the
- * tree.  Since use of nonstandard block sizes appears to be negligible in
- * the field, we just live with that fact for now, choosing a max prefix
- * size of 32 bytes when BLCKSZ is configured smaller than default.
+ * as 258 nodes (one for each possible byte value, plus the two special
+ * cases).  Each node can take 16 bytes on MAXALIGN=8 machines.  The inner
+ * tuple must fit on an index page of size BLCKSZ.  Rather than assuming we
+ * know the exact amount of overhead imposed by page headers, tuple headers,
+ * etc, we leave 100 bytes for that (the actual overhead should be no more
+ * than 56 bytes at this writing, so there is slop in this number).
+ * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long.
+ * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length
+ * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner
+ * tuple size exceeds maximum" if there are too many distinct next-byte values
+ * at a given place in the tree.  Since use of nonstandard block sizes appears
+ * to be negligible in the field, we just live with that fact for now,
+ * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller
+ * than default.
   */
-#define SPGIST_MAX_PREFIX_LENGTH       Max((int) (BLCKSZ - 256 * 16 - 100), 32)
+#define SPGIST_MAX_PREFIX_LENGTH       Max((int) (BLCKSZ - 258 * 16 - 100), 32)
  
  /* Struct for sorting values in picksplit */
  typedef struct spgNodePtr
  {
         Datum           d;
         int                     i;
-       uint8           c;
+       int16           c;
  } spgNodePtr;
  
  
@@ -56,7 +82,7 @@ spg_text_config(PG_FUNCTION_ARGS)
         spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
  
         cfg->prefixType = TEXTOID;
-       cfg->labelType = CHAROID;
+       cfg->labelType = INT2OID;
         cfg->canReturnData = true;
         cfg->longValuesOK = true;       /* suffixing will shorten long values */
         PG_RETURN_VOID();
@@ -107,12 +133,12 @@ commonPrefix(const char *a, const char *b, int lena, int lenb)
  }
  
  /*
- * Binary search an array of uint8 datums for a match to c
+ * Binary search an array of int16 datums for a match to c
   *
   * On success, *i gets the match location; on failure, it gets where to insert
   */
  static bool
-searchChar(Datum *nodeLabels, int nNodes, uint8 c, int *i)
+searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i)
  {
         int                     StopLow = 0,
                                 StopHigh = nNodes;
@@ -120,7 +146,7 @@ searchChar(Datum *nodeLabels, int nNodes, uint8 c, int *i)
         while (StopLow < StopHigh)
         {
                 int                     StopMiddle = (StopLow + StopHigh) >> 1;
-               uint8           middle = DatumGetUInt8(nodeLabels[StopMiddle]);
+               int16           middle = DatumGetInt16(nodeLabels[StopMiddle]);
  
                 if (c < middle)
                         StopHigh = StopMiddle;
@@ -145,16 +171,19 @@ spg_text_choose(PG_FUNCTION_ARGS)
         text       *inText = DatumGetTextPP(in->datum);
         char       *inStr = VARDATA_ANY(inText);
         int                     inSize = VARSIZE_ANY_EXHDR(inText);
-       uint8           nodeChar = '\0';
-       int                     i = 0;
+       char       *prefixStr = NULL;
+       int                     prefixSize = 0;
         int                     commonLen = 0;
+       int16           nodeChar = 0;
+       int                     i = 0;
  
         /* Check for prefix match, set nodeChar to first byte after prefix */
         if (in->hasPrefix)
         {
                 text       *prefixText = DatumGetTextPP(in->prefixDatum);
-               char       *prefixStr = VARDATA_ANY(prefixText);
-               int                     prefixSize = VARSIZE_ANY_EXHDR(prefixText);
+
+               prefixStr = VARDATA_ANY(prefixText);
+               prefixSize = VARSIZE_ANY_EXHDR(prefixText);
  
                 commonLen = commonPrefix(inStr + in->level,
                                                                  prefixStr,
@@ -164,9 +193,9 @@ spg_text_choose(PG_FUNCTION_ARGS)
                 if (commonLen == prefixSize)
                 {
                         if (inSize - in->level > commonLen)
-                               nodeChar = *(uint8 *) (inStr + in->level + commonLen);
+                               nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
                         else
-                               nodeChar = '\0';
+                               nodeChar = -1;
                 }
                 else
                 {
@@ -184,7 +213,7 @@ spg_text_choose(PG_FUNCTION_ARGS)
                                         formTextDatum(prefixStr, commonLen);
                         }
                         out->result.splitTuple.nodeLabel =
-                               UInt8GetDatum(*(prefixStr + commonLen));
+                               Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
  
                         if (prefixSize - commonLen == 1)
                         {
@@ -203,11 +232,11 @@ spg_text_choose(PG_FUNCTION_ARGS)
         }
         else if (inSize > in->level)
         {
-               nodeChar = *(uint8 *) (inStr + in->level);
+               nodeChar = *(unsigned char *) (inStr + in->level);
         }
         else
         {
-               nodeChar = '\0';
+               nodeChar = -1;
         }
  
         /* Look up nodeChar in the node label array */
@@ -219,13 +248,18 @@ spg_text_choose(PG_FUNCTION_ARGS)
                  * to provide the correct levelAdd and restDatum values, and those are
                  * the same regardless of which node gets chosen by core.)
                  */
+               int                     levelAdd;
+
                 out->resultType = spgMatchNode;
                 out->result.matchNode.nodeN = i;
-               out->result.matchNode.levelAdd = commonLen + 1;
-               if (inSize - in->level - commonLen - 1 > 0)
+               levelAdd = commonLen;
+               if (nodeChar >= 0)
+                       levelAdd++;
+               out->result.matchNode.levelAdd = levelAdd;
+               if (inSize - in->level - levelAdd > 0)
                         out->result.matchNode.restDatum =
-                               formTextDatum(inStr + in->level + commonLen + 1,
-                                                         inSize - in->level - commonLen - 1);
+                               formTextDatum(inStr + in->level + levelAdd,
+                                                         inSize - in->level - levelAdd);
                 else
                         out->result.matchNode.restDatum =
                                 formTextDatum(NULL, 0);
@@ -234,21 +268,26 @@ spg_text_choose(PG_FUNCTION_ARGS)
         {
                 /*
                  * Can't use AddNode action, so split the tuple.  The upper tuple has
-                * the same prefix as before and uses an empty node label for the
+                * the same prefix as before and uses a dummy node label -2 for the
                  * lower tuple.  The lower tuple has no prefix and the same node
                  * labels as the original tuple.
+                *
+                * Note: it might seem tempting to shorten the upper tuple's prefix,
+                * if it has one, then use its last byte as label for the lower tuple.
+                * But that doesn't win since we know the incoming value matches the
+                * whole prefix: we'd just end up splitting the lower tuple again.
                  */
                 out->resultType = spgSplitTuple;
                 out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
                 out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
-               out->result.splitTuple.nodeLabel = UInt8GetDatum('\0');
+               out->result.splitTuple.nodeLabel = Int16GetDatum(-2);
                 out->result.splitTuple.postfixHasPrefix = false;
         }
         else
         {
                 /* Add a node for the not-previously-seen nodeChar value */
                 out->resultType = spgAddNode;
-               out->result.addNode.nodeLabel = UInt8GetDatum(nodeChar);
+               out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
                 out->result.addNode.nodeN = i;
         }
  
@@ -262,12 +301,7 @@ cmpNodePtr(const void *a, const void *b)
         const spgNodePtr *aa = (const spgNodePtr *) a;
         const spgNodePtr *bb = (const spgNodePtr *) b;
  
-       if (aa->c == bb->c)
-               return 0;
-       else if (aa->c > bb->c)
-               return 1;
-       else
-               return -1;
+       return aa->c - bb->c;
  }
  
  Datum
@@ -319,15 +353,15 @@ spg_text_picksplit(PG_FUNCTION_ARGS)
                 text       *texti = DatumGetTextPP(in->datums[i]);
  
                 if (commonLen < VARSIZE_ANY_EXHDR(texti))
-                       nodes[i].c = *(uint8 *) (VARDATA_ANY(texti) + commonLen);
+                       nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen);
                 else
-                       nodes[i].c = '\0';      /* use \0 if string is all common */
+                       nodes[i].c = -1;        /* use -1 if string is all common */
                 nodes[i].i = i;
                 nodes[i].d = in->datums[i];
         }
  
         /*
-        * Sort by label bytes so that we can group the values into nodes.  This
+        * Sort by label values so that we can group the values into nodes.  This
          * also ensures that the nodes are ordered by label value, allowing the
          * use of binary search in searchChar.
          */
@@ -346,7 +380,7 @@ spg_text_picksplit(PG_FUNCTION_ARGS)
  
                 if (i == 0 || nodes[i].c != nodes[i - 1].c)
                 {
-                       out->nodeLabels[out->nNodes] = UInt8GetDatum(nodes[i].c);
+                       out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c);
                         out->nNodes++;
                 }
  
@@ -377,9 +411,9 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
  
         /*
          * Reconstruct values represented at this tuple, including parent data,
-        * prefix of this tuple if any, and the node label if any.  in->level
-        * should be the length of the previously reconstructed value, and the
-        * number of bytes added here is prefixSize or prefixSize + 1.
+        * prefix of this tuple if any, and the node label if it's non-dummy.
+        * in->level should be the length of the previously reconstructed value,
+        * and the number of bytes added here is prefixSize or prefixSize + 1.
          *
          * Note: we assume that in->reconstructedValue isn't toasted and doesn't
          * have a short varlena header.  This is okay because it must have been
@@ -422,17 +456,17 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
  
         for (i = 0; i < in->nNodes; i++)
         {
-               uint8           nodeChar = DatumGetUInt8(in->nodeLabels[i]);
+               int16           nodeChar = DatumGetInt16(in->nodeLabels[i]);
                 int                     thisLen;
                 bool            res = true;
                 int                     j;
  
-               /* If nodeChar is zero, don't include it in data */
-               if (nodeChar == '\0')
+               /* If nodeChar is a dummy value, don't include it in data */
+               if (nodeChar <= 0)
                         thisLen = maxReconstrLen - 1;
                 else
                 {
-                       ((char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
+                       ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
                         thisLen = maxReconstrLen;
                 }
  
@@ -447,7 +481,9 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
                          * If it's a collation-aware operator, but the collation is C, we
                          * can treat it as non-collation-aware.  With non-C collation we
                          * need to traverse whole tree :-( so there's no point in making
-                        * any check here.
+                        * any check here.  (Note also that our reconstructed value may
+                        * well end with a partial multibyte character, so that applying
+                        * any encoding-sensitive test to it would be risky anyhow.)
                          */
                         if (strategy > 10)
                         {
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 9 Jun 2014 20:30:40 +0000 (16:30 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 9 Jun 2014 20:31:11 +0000 (16:31 -0400)