]> granicus.if.org Git - postgresql/commitdiff
Fix similar_escape() to convert parentheses to non-capturing style.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 2 Jan 2010 20:59:16 +0000 (20:59 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 2 Jan 2010 20:59:16 +0000 (20:59 +0000)
This is needed to avoid unwanted interference with SUBSTRING behavior,
as per bug #5257 from Roman Kononov.  Also, add some basic intelligence
about character classes (bracket expressions) since we now have several
behaviors that aren't appropriate inside a character class.

As with the previous patch in this area, I'm reluctant to back-patch
since it might affect applications that are relying on the prior
behavior.

src/backend/utils/adt/regexp.c

index ca61d5637f960f99dc4e711f964b0b380ed42c78..cbffcdb183508f038ef7424c68adff6295332fec 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.85 2010/01/02 16:57:55 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.86 2010/01/02 20:59:16 tgl Exp $
  *
  *             Alistair Crooks added the code for the regex caching
  *             agc - cached the regular expressions used - there's a good chance
@@ -640,6 +640,7 @@ similar_escape(PG_FUNCTION_ARGS)
        int                     plen,
                                elen;
        bool            afterescape = false;
+       bool            incharclass = false;
        int                     nquotes = 0;
 
        /* This function is not strict, so must test explicitly */
@@ -682,10 +683,10 @@ similar_escape(PG_FUNCTION_ARGS)
         */
 
        /*
-        * We need room for the prefix/postfix plus as many as 2 output bytes per
-        * input byte
+        * We need room for the prefix/postfix plus as many as 3 output bytes per
+        * input byte; since the input is at most 1GB this can't overflow
         */
-       result = (text *) palloc(VARHDRSZ + 6 + 2 * plen);
+       result = (text *) palloc(VARHDRSZ + 6 + 3 * plen);
        r = VARDATA(result);
 
        *r++ = '^';
@@ -699,7 +700,7 @@ similar_escape(PG_FUNCTION_ARGS)
 
                if (afterescape)
                {
-                       if (pchar == '"')       /* for SUBSTRING patterns */
+                       if (pchar == '"' && !incharclass)       /* for SUBSTRING patterns */
                                *r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
                        else
                        {
@@ -713,6 +714,19 @@ similar_escape(PG_FUNCTION_ARGS)
                        /* SQL99 escape character; do not send to output */
                        afterescape = true;
                }
+               else if (incharclass)
+               {
+                       if (pchar == '\\')
+                               *r++ = '\\';
+                       *r++ = pchar;
+                       if (pchar == ']')
+                               incharclass = false;
+               }
+               else if (pchar == '[')
+               {
+                       *r++ = pchar;
+                       incharclass = true;
+               }
                else if (pchar == '%')
                {
                        *r++ = '.';
@@ -720,6 +734,13 @@ similar_escape(PG_FUNCTION_ARGS)
                }
                else if (pchar == '_')
                        *r++ = '.';
+               else if (pchar == '(')
+               {
+                       /* convert to non-capturing parenthesis */
+                       *r++ = '(';
+                       *r++ = '?';
+                       *r++ = ':';
+               }
                else if (pchar == '\\' || pchar == '.' ||
                                 pchar == '^' || pchar == '$')
                {