]> granicus.if.org Git - postgresql/commitdiff
Fix bugs in contrib/pg_trgm's LIKE pattern analysis code.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 20 Aug 2012 17:24:52 +0000 (13:24 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 20 Aug 2012 17:25:42 +0000 (13:25 -0400)
Extraction of trigrams did not process LIKE escape sequences properly,
leading to possible misidentification of trigrams near escapes, resulting
in incorrect index search results.

Fujii Masao

contrib/pg_trgm/expected/pg_trgm.out
contrib/pg_trgm/sql/pg_trgm.sql
contrib/pg_trgm/trgm_op.c

index e7af7d48902e0c88f7720d5d4dd211cf454288af..81d0ca80b206394799de4a3f1a9bdaba0f124364 100644 (file)
@@ -3497,6 +3497,12 @@ select * from test2 where t like '%bcd%';
  abcdef
 (1 row)
 
+select * from test2 where t like E'%\\bcd%';
+   t    
+--------
+ abcdef
+(1 row)
+
 select * from test2 where t ilike '%BCD%';
    t    
 --------
@@ -3539,6 +3545,12 @@ select * from test2 where t like '%bcd%';
  abcdef
 (1 row)
 
+select * from test2 where t like E'%\\bcd%';
+   t    
+--------
+ abcdef
+(1 row)
+
 select * from test2 where t ilike '%BCD%';
    t    
 --------
index ea902f602f9271bb54803ba1a382b5f916560bc5..81ab1e79b17df0c5855b10e01298ab765e95d94d 100644 (file)
@@ -49,6 +49,7 @@ explain (costs off)
   select * from test2 where t ilike '%BCD%';
 select * from test2 where t like '%BCD%';
 select * from test2 where t like '%bcd%';
+select * from test2 where t like E'%\\bcd%';
 select * from test2 where t ilike '%BCD%';
 select * from test2 where t ilike 'qua%';
 drop index test2_idx_gin;
@@ -60,5 +61,6 @@ explain (costs off)
   select * from test2 where t ilike '%BCD%';
 select * from test2 where t like '%BCD%';
 select * from test2 where t like '%bcd%';
+select * from test2 where t like E'%\\bcd%';
 select * from test2 where t ilike '%BCD%';
 select * from test2 where t ilike 'qua%';
index 4e32c6f654c164206fb293fcb3d76f642ed72d38..87dffd1dd2c9b773c91c1243e96f6483735e601d 100644 (file)
@@ -272,33 +272,36 @@ get_wildcard_part(const char *str, int lenstr,
        const char *beginword = str;
        const char *endword;
        char       *s = buf;
-       bool            in_wildcard_meta = false;
+       bool            in_leading_wildcard_meta = false;
+       bool            in_trailing_wildcard_meta = false;
        bool            in_escape = false;
        int                     clen;
 
        /*
-        * Find the first word character remembering whether last character was
-        * wildcard meta-character.
+        * Find the first word character, remembering whether preceding character
+        * was wildcard meta-character.  Note that the in_escape state persists
+        * from this loop to the next one, since we may exit at a word character
+        * that is in_escape.
         */
        while (beginword - str < lenstr)
        {
                if (in_escape)
                {
-                       in_escape = false;
-                       in_wildcard_meta = false;
                        if (iswordchr(beginword))
                                break;
+                       in_escape = false;
+                       in_leading_wildcard_meta = false;
                }
                else
                {
                        if (ISESCAPECHAR(beginword))
                                in_escape = true;
                        else if (ISWILDCARDCHAR(beginword))
-                               in_wildcard_meta = true;
+                               in_leading_wildcard_meta = true;
                        else if (iswordchr(beginword))
                                break;
                        else
-                               in_wildcard_meta = false;
+                               in_leading_wildcard_meta = false;
                }
                beginword += pg_mblen(beginword);
        }
@@ -310,11 +313,11 @@ get_wildcard_part(const char *str, int lenstr,
                return NULL;
 
        /*
-        * Add left padding spaces if last character wasn't wildcard
+        * Add left padding spaces if preceding character wasn't wildcard
         * meta-character.
         */
        *charlen = 0;
-       if (!in_wildcard_meta)
+       if (!in_leading_wildcard_meta)
        {
                if (LPADDING > 0)
                {
@@ -333,15 +336,11 @@ get_wildcard_part(const char *str, int lenstr,
         * string boundary.  Strip escapes during copy.
         */
        endword = beginword;
-       in_wildcard_meta = false;
-       in_escape = false;
        while (endword - str < lenstr)
        {
                clen = pg_mblen(endword);
                if (in_escape)
                {
-                       in_escape = false;
-                       in_wildcard_meta = false;
                        if (iswordchr(endword))
                        {
                                memcpy(s, endword, clen);
@@ -349,7 +348,17 @@ get_wildcard_part(const char *str, int lenstr,
                                s += clen;
                        }
                        else
+                       {
+                               /*
+                                * Back up endword to the escape character when stopping at
+                                * an escaped char, so that subsequent get_wildcard_part will
+                                * restart from the escape character.  We assume here that
+                                * escape chars are single-byte.
+                                */
+                               endword--;
                                break;
+                       }
+                       in_escape = false;
                }
                else
                {
@@ -357,7 +366,7 @@ get_wildcard_part(const char *str, int lenstr,
                                in_escape = true;
                        else if (ISWILDCARDCHAR(endword))
                        {
-                               in_wildcard_meta = true;
+                               in_trailing_wildcard_meta = true;
                                break;
                        }
                        else if (iswordchr(endword))
@@ -367,19 +376,16 @@ get_wildcard_part(const char *str, int lenstr,
                                s += clen;
                        }
                        else
-                       {
-                               in_wildcard_meta = false;
                                break;
-                       }
                }
                endword += clen;
        }
 
        /*
-        * Add right padding spaces if last character wasn't wildcard
+        * Add right padding spaces if next character isn't wildcard
         * meta-character.
         */
-       if (!in_wildcard_meta)
+       if (!in_trailing_wildcard_meta)
        {
                if (RPADDING > 0)
                {