]> granicus.if.org Git - postgresql/commitdiff
Fix regexp substring matching (substring(string from pattern)) for the corner
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 19 Mar 2008 02:40:37 +0000 (02:40 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 19 Mar 2008 02:40:37 +0000 (02:40 +0000)
case where there is a match to the pattern overall but the user has specified
a parenthesized subexpression and that subexpression hasn't got a match.
An example is substring('foo' from 'foo(bar)?').  This should return NULL,
since (bar) isn't matched, but it was mistakenly returning the whole-pattern
match instead (ie, 'foo').  Per bug #4044 from Rui Martins.

This has been broken since the beginning; patch in all supported versions.
The old behavior was sufficiently inconsistent that it's impossible to believe
anyone is depending on it.

src/backend/utils/adt/regexp.c

index 359fd33ceea30c7eb75d5d7dce9af255489d2fd6..4a96a1bb70c017cf5c9905f7ea2b8e0f5b5d5103 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.78 2008/01/01 19:45:52 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.79 2008/03/19 02:40:37 tgl Exp $
  *
  *             Alistair Crooks added the code for the regex caching
  *             agc - cached the regular expressions used - there's a good chance
@@ -576,8 +576,13 @@ textregexsubstr(PG_FUNCTION_ARGS)
 {
        text       *s = PG_GETARG_TEXT_PP(0);
        text       *p = PG_GETARG_TEXT_PP(1);
-       bool            match;
+       regex_t    *re;
        regmatch_t      pmatch[2];
+       int                     so,
+                               eo;
+
+       /* Compile RE */
+       re = RE_compile_and_cache(p, regex_flavor);
 
        /*
         * We pass two regmatch_t structs to get info about the overall match and
@@ -585,34 +590,37 @@ textregexsubstr(PG_FUNCTION_ARGS)
         * is a parenthesized subexpression, we return what it matched; else
         * return what the whole regexp matched.
         */
-       match = RE_compile_and_execute(p,
-                                                                  VARDATA_ANY(s),
-                                                                  VARSIZE_ANY_EXHDR(s),
-                                                                  regex_flavor,
-                                                                  2, pmatch);
-
-       /* match? then return the substring matching the pattern */
-       if (match)
-       {
-               int                     so,
-                                       eo;
+       if (!RE_execute(re,
+                                       VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s),
+                                       2, pmatch))
+               PG_RETURN_NULL();               /* definitely no match */
 
+       if (re->re_nsub > 0)
+       {
+               /* has parenthesized subexpressions, use the first one */
                so = pmatch[1].rm_so;
                eo = pmatch[1].rm_eo;
-               if (so < 0 || eo < 0)
-               {
-                       /* no parenthesized subexpression */
-                       so = pmatch[0].rm_so;
-                       eo = pmatch[0].rm_eo;
-               }
-
-               return DirectFunctionCall3(text_substr,
-                                                                  PointerGetDatum(s),
-                                                                  Int32GetDatum(so + 1),
-                                                                  Int32GetDatum(eo - so));
        }
+       else
+       {
+               /* no parenthesized subexpression, use whole match */
+               so = pmatch[0].rm_so;
+               eo = pmatch[0].rm_eo;
+       }
+
+       /*
+        * It is possible to have a match to the whole pattern but no match
+        * for a subexpression; for example 'foo(bar)?' is considered to match
+        * 'foo' but there is no subexpression match.  So this extra test for
+        * match failure is not redundant.
+        */
+       if (so < 0 || eo < 0)
+               PG_RETURN_NULL();
 
-       PG_RETURN_NULL();
+       return DirectFunctionCall3(text_substr,
+                                                          PointerGetDatum(s),
+                                                          Int32GetDatum(so + 1),
+                                                          Int32GetDatum(eo - so));
 }
 
 /*