]> granicus.if.org Git - postgresql/commitdiff
Fix two bugs in tsquery @> operator.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Mon, 27 Oct 2014 08:50:41 +0000 (10:50 +0200)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Mon, 27 Oct 2014 08:51:33 +0000 (10:51 +0200)
1. The comparison for matching terms used only the CRC to decide if there's
a match. Two different terms with the same CRC gave a match.

2. It assumed that if the second operand has more terms than the first, it's
never a match. That assumption is bogus, because there can be duplicate
terms in either operand.

Rewrite the implementation in a way that doesn't have those bugs.

Backpatch to all supported versions.

src/backend/utils/adt/tsquery_op.c

index 303a3b467404baeee3e0deb0c2b48734d91f1d04..606fbe019c23e23cfd506a64d757163b68751ac5 100644 (file)
@@ -213,63 +213,112 @@ makeTSQuerySign(TSQuery a)
        return sign;
 }
 
-Datum
-tsq_mcontains(PG_FUNCTION_ARGS)
+static char **
+collectTSQueryValues(TSQuery a, int *nvalues_p)
 {
-       TSQuery         query = PG_GETARG_TSQUERY(0);
-       TSQuery         ex = PG_GETARG_TSQUERY(1);
-       TSQuerySign sq,
-                               se;
-       int                     i,
-                               j;
-       QueryItem  *iq,
-                          *ie;
-
-       if (query->size < ex->size)
+       QueryItem  *ptr = GETQUERY(a);
+       char       *operand = GETOPERAND(a);
+       char      **values;
+       int                     nvalues = 0;
+       int                     i;
+
+       values = (char **) palloc(sizeof(char *) * a->size);
+
+       for (i = 0; i < a->size; i++)
        {
-               PG_FREE_IF_COPY(query, 0);
-               PG_FREE_IF_COPY(ex, 1);
+               if (ptr->type == QI_VAL)
+               {
+                       int                     len = ptr->qoperand.length;
+                       char       *val;
+
+                       val = palloc(len + 1);
+                       memcpy(val, operand + ptr->qoperand.distance, len);
+                       val[len] = '\0';
 
-               PG_RETURN_BOOL(false);
+                       values[nvalues++] = val;
+               }
+               ptr++;
        }
 
-       sq = makeTSQuerySign(query);
-       se = makeTSQuerySign(ex);
+       *nvalues_p = nvalues;
+       return values;
+}
+
+static int
+cmp_string(const void *a, const void *b)
+{
+       const char *sa = *((const char **) a);
+       const char *sb = *((const char **) b);
+       return strcmp(sa, sb);
+}
 
-       if ((sq & se) != se)
+static int
+remove_duplicates(char **strings, int n)
+{
+       if (n <= 1)
+               return n;
+       else
        {
-               PG_FREE_IF_COPY(query, 0);
-               PG_FREE_IF_COPY(ex, 1);
+               int                     i;
+               char       *prev = strings[0];
+               int                     new_n = 1;
 
-               PG_RETURN_BOOL(false);
+               for (i = 1; i < n; i++)
+               {
+                       if (strcmp(strings[i], prev) != 0)
+                       {
+                               strings[new_n++] = strings[i];
+                               prev = strings[i];
+                       }
+               }
+               return new_n;
        }
+}
 
-       iq = GETQUERY(query);
-       ie = GETQUERY(ex);
-
-       for (i = 0; i < ex->size; i++)
+Datum
+tsq_mcontains(PG_FUNCTION_ARGS)
+{
+       TSQuery         query = PG_GETARG_TSQUERY(0);
+       TSQuery         ex = PG_GETARG_TSQUERY(1);
+       char      **query_values;
+       int                     query_nvalues;
+       char      **ex_values;
+       int                     ex_nvalues;
+       bool            result = true;
+
+       /* Extract the query terms into arrays */
+       query_values = collectTSQueryValues(query, &query_nvalues);
+       ex_values = collectTSQueryValues(ex, &ex_nvalues);
+
+       /* Sort and remove duplicates from both arrays */
+       qsort(query_values, query_nvalues, sizeof(char *), cmp_string);
+       query_nvalues = remove_duplicates(query_values, query_nvalues);
+       qsort(ex_values, ex_nvalues, sizeof(char *), cmp_string);
+       ex_nvalues = remove_duplicates(ex_values, ex_nvalues);
+
+       if (ex_nvalues > query_nvalues)
+               result = false;
+       else
        {
-               if (ie[i].type != QI_VAL)
-                       continue;
-               for (j = 0; j < query->size; j++)
+               int i;
+               int j = 0;
+
+               for (i = 0; i < ex_nvalues; i++)
                {
-                       if (iq[j].type == QI_VAL &&
-                               ie[i].qoperand.valcrc == iq[j].qoperand.valcrc)
+                       for (; j < query_nvalues; j++)
+                       {
+                               if (strcmp(ex_values[i], query_values[j]) == 0)
+                                       break;
+                       }
+                       if (j == query_nvalues)
+                       {
+                               result = false;
                                break;
-               }
-               if (j >= query->size)
-               {
-                       PG_FREE_IF_COPY(query, 0);
-                       PG_FREE_IF_COPY(ex, 1);
-
-                       PG_RETURN_BOOL(false);
+                       }
                }
        }
 
-       PG_FREE_IF_COPY(query, 0);
-       PG_FREE_IF_COPY(ex, 1);
-
-       PG_RETURN_BOOL(true);
+       PG_RETURN_BOOL(result);
 }
 
 Datum