]> granicus.if.org Git - postgresql/commitdiff
I made the patch that implements regexp_replace again.
authorBruce Momjian <bruce@momjian.us>
Sun, 10 Jul 2005 04:54:33 +0000 (04:54 +0000)
committerBruce Momjian <bruce@momjian.us>
Sun, 10 Jul 2005 04:54:33 +0000 (04:54 +0000)
The specification of this function is as follows.

regexp_replace(source text, pattern text, replacement text, [flags
text])
returns text

Replace string that matches to regular expression in source text to
replacement text.

 - pattern is regular expression pattern.
 - replacement is replace string that can use '\1'-'\9', and '\&'.
    '\1'-'\9': back reference to the n'th subexpression.
    '\&'     : entire matched string.
 - flags can use the following values:
    g: global (replace all)
    i: ignore case
    When the flags is not specified, case sensitive, replace the first
    instance only.

Atsushi Ogawa

doc/src/sgml/func.sgml
src/backend/regex/regexec.c
src/backend/utils/adt/regexp.c
src/backend/utils/adt/varlena.c
src/include/catalog/catversion.h
src/include/catalog/pg_proc.h
src/include/regex/regex.h
src/include/utils/builtins.h
src/test/regress/expected/strings.out
src/test/regress/sql/strings.sql

index 4ff434aca876d1cef2cf4bb2da5970c7601f166c..f274e53c552ce3b6841be89442a1c26ed70b5c6f 100644 (file)
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.263 2005/07/06 19:02:52 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.264 2005/07/10 04:54:30 momjian Exp $
 PostgreSQL documentation
 -->
 
@@ -1256,6 +1256,26 @@ PostgreSQL documentation
        <entry><literal>'O''Reilly'</literal></entry>
       </row>
 
+      <row>
+       <entry><literal><function>regexp_replace</function>(<parameter>source</parameter> <type>text</type>,
+       <parameter>pattern</parameter> <type>text</type>,
+       <parameter>replacement</parameter> <type>text</type>
+       <optional>, <parameter>flags</parameter> <type>text</type></optional>)</literal></entry>
+       <entry><type>text</type></entry>
+       <entry>Replace string that matches the regular expression
+        <parameter>pattern</parameter> in <parameter>source</parameter> to
+        <parameter>replacement</parameter>.
+        <parameter>replacement</parameter> can use <literal>\1</>-<literal>\9</> and <literal>\&amp;</>.
+        <literal>\1</>-<literal>\9</> is a back reference to the n'th subexpression, and
+        <literal>\&amp;</> is the entire matched string.
+        <parameter>flags</parameter> can use <literal>g</>(global) and <literal>i</>(ignore case).
+        When flags is not specified, case sensitive matching is used, and it replaces
+        only the instance.
+       </entry>
+       <entry><literal>regexp_replace('1112223333', '(\\d{3})(\\d{3})(\\d{4})', '(\\1) \\2-\\3')</literal></entry>
+       <entry><literal>(111) 222-3333</literal></entry>
+      </row>
+
       <row>
        <entry><literal><function>repeat</function>(<parameter>string</parameter> <type>text</type>, <parameter>number</parameter> <type>integer</type>)</literal></entry>
        <entry><type>text</type></entry>
index 6df4aff05e9d8af494f82d7dce89414803614793..e3bc41fa5e042c1ddb283c3ef921416962fa8c35 100644 (file)
@@ -27,7 +27,7 @@
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $PostgreSQL: pgsql/src/backend/regex/regexec.c,v 1.24 2003/11/29 19:51:55 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/regex/regexec.c,v 1.25 2005/07/10 04:54:30 momjian Exp $
  *
  */
 
@@ -110,6 +110,7 @@ struct vars
        regmatch_t *pmatch;
        rm_detail_t *details;
        chr                *start;                      /* start of string */
+       chr                *search_start;       /* search start of string */
        chr                *stop;                       /* just past end of string */
        int                     err;                    /* error code if any (0 none) */
        regoff_t   *mem;                        /* memory vector for backtracking */
@@ -168,6 +169,7 @@ int
 pg_regexec(regex_t *re,
                   const chr *string,
                   size_t len,
+                  size_t search_start,
                   rm_detail_t *details,
                   size_t nmatch,
                   regmatch_t pmatch[],
@@ -219,6 +221,7 @@ pg_regexec(regex_t *re,
                v->pmatch = pmatch;
        v->details = details;
        v->start = (chr *) string;
+       v->search_start = (chr *) string + search_start;
        v->stop = (chr *) string + len;
        v->err = 0;
        if (backref)
@@ -288,7 +291,8 @@ find(struct vars * v,
        NOERR();
        MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
        cold = NULL;
-       close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *) NULL);
+       close = shortest(v, s, v->search_start, v->search_start, v->stop,
+                                        &cold, (int *) NULL);
        freedfa(s);
        NOERR();
        if (v->g->cflags & REG_EXPECT)
@@ -415,7 +419,7 @@ cfindloop(struct vars * v,
 
        assert(d != NULL && s != NULL);
        cold = NULL;
-       close = v->start;
+       close = v->search_start;
        do
        {
                MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
index d8b038a09a8e7079655c8a1951fd4fe6f0bbff43..b8112a2cb952565fae2e8aa3fecb3d9b97e6f351 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.56 2004/12/31 22:01:22 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.57 2005/07/10 04:54:30 momjian Exp $
  *
  *             Alistair Crooks added the code for the regex caching
  *             agc - cached the regular expressions used - there's a good chance
@@ -81,38 +81,27 @@ static cached_re_str re_array[MAX_CACHED_RES];      /* cached re's */
 
 
 /*
- * RE_compile_and_execute - compile and execute a RE, caching if possible
+ * RE_compile_and_cache - compile a RE, caching if possible
  *
- * Returns TRUE on match, FALSE on no match
+ * Returns regex_t
  *
- *     text_re --- the pattern, expressed as an *untoasted* TEXT object
- *     dat --- the data to match against (need not be null-terminated)
- *     dat_len --- the length of the data string
- *     cflags --- compile options for the pattern
- *     nmatch, pmatch  --- optional return area for match details
+ *  text_re --- the pattern, expressed as an *untoasted* TEXT object
+ *  cflags --- compile options for the pattern
  *
- * Both pattern and data are given in the database encoding.  We internally
- * convert to array of pg_wchar which is what Spencer's regex package wants.
+ * Pattern is given in the database encoding.  We internally convert to
+ * array of pg_wchar which is what Spencer's regex package wants.
  */
-static bool
-RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
-                                          int cflags, int nmatch, regmatch_t *pmatch)
+static regex_t
+RE_compile_and_cache(text *text_re, int cflags)
 {
        int                     text_re_len = VARSIZE(text_re);
-       pg_wchar   *data;
-       size_t          data_len;
        pg_wchar   *pattern;
        size_t          pattern_len;
        int                     i;
        int                     regcomp_result;
-       int                     regexec_result;
        cached_re_str re_temp;
        char            errMsg[100];
 
-       /* Convert data string to wide characters */
-       data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
-       data_len = pg_mb2wchar_with_len(dat, data, dat_len);
-
        /*
         * Look for a match among previously compiled REs.      Since the data
         * structure is self-organizing with most-used entries at the front,
@@ -134,28 +123,7 @@ RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
                                re_array[0] = re_temp;
                        }
 
-                       /* Perform RE match and return result */
-                       regexec_result = pg_regexec(&re_array[0].cre_re,
-                                                                               data,
-                                                                               data_len,
-                                                                               NULL,   /* no details */
-                                                                               nmatch,
-                                                                               pmatch,
-                                                                               0);
-
-                       pfree(data);
-
-                       if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
-                       {
-                               /* re failed??? */
-                               pg_regerror(regexec_result, &re_array[0].cre_re,
-                                                       errMsg, sizeof(errMsg));
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
-                                                errmsg("regular expression failed: %s", errMsg)));
-                       }
-
-                       return (regexec_result == REG_OKAY);
+                       return re_array[0].cre_re;
                }
        }
 
@@ -220,10 +188,45 @@ RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
        re_array[0] = re_temp;
        num_res++;
 
+       return re_array[0].cre_re;
+}
+
+/*
+ * RE_compile_and_execute - compile and execute a RE
+ *
+ * Returns TRUE on match, FALSE on no match
+ *
+ *     text_re --- the pattern, expressed as an *untoasted* TEXT object
+ *     dat --- the data to match against (need not be null-terminated)
+ *     dat_len --- the length of the data string
+ *     cflags --- compile options for the pattern
+ *     nmatch, pmatch  --- optional return area for match details
+ *
+ * Both pattern and data are given in the database encoding.  We internally
+ * convert to array of pg_wchar which is what Spencer's regex package wants.
+ */
+static bool
+RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
+                                          int cflags, int nmatch, regmatch_t *pmatch)
+{
+       pg_wchar   *data;
+       size_t          data_len;
+       int                     regexec_result;
+       regex_t         re;
+       char        errMsg[100];
+
+       /* Convert data string to wide characters */
+       data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
+       data_len = pg_mb2wchar_with_len(dat, data, dat_len);
+
+       /* Compile RE */
+       re = RE_compile_and_cache(text_re, cflags);
+
        /* Perform RE match and return result */
        regexec_result = pg_regexec(&re_array[0].cre_re,
                                                                data,
                                                                data_len,
+                                                               0,
                                                                NULL,   /* no details */
                                                                nmatch,
                                                                pmatch,
@@ -428,15 +431,89 @@ textregexsubstr(PG_FUNCTION_ARGS)
                        eo = pmatch[0].rm_eo;
                }
 
-               return (DirectFunctionCall3(text_substr,
+               return DirectFunctionCall3(text_substr,
                                                                        PointerGetDatum(s),
                                                                        Int32GetDatum(so + 1),
-                                                                       Int32GetDatum(eo - so)));
+                                                                       Int32GetDatum(eo - so));
        }
 
        PG_RETURN_NULL();
 }
 
+/*
+ * textregexreplace_noopt()
+ *      Return a replace string matched by a regular expression.
+ *             This function is a version that doesn't specify the option of
+ *             textregexreplace. This is case sensitive, replace the first
+ *             instance only.
+ */
+Datum
+textregexreplace_noopt(PG_FUNCTION_ARGS)
+{
+       text       *s = PG_GETARG_TEXT_P(0);
+       text       *p = PG_GETARG_TEXT_P(1);
+       text       *r = PG_GETARG_TEXT_P(2);
+       regex_t         re;
+
+       re = RE_compile_and_cache(p, regex_flavor);
+
+       return DirectFunctionCall4(replace_text_regexp,
+                                                               PointerGetDatum(s),
+                                                               PointerGetDatum(&re),
+                                                               PointerGetDatum(r),
+                                                               BoolGetDatum(false));
+}
+
+/*
+ * textregexreplace()
+ *      Return a replace string matched by a regular expression.
+ */
+Datum
+textregexreplace(PG_FUNCTION_ARGS)
+{
+       text       *s = PG_GETARG_TEXT_P(0);
+       text       *p = PG_GETARG_TEXT_P(1);
+       text       *r = PG_GETARG_TEXT_P(2);
+       text       *opt = PG_GETARG_TEXT_P(3);
+       char       *opt_p = VARDATA(opt);
+       int                     opt_len = (VARSIZE(opt) - VARHDRSZ);
+       int                     i;
+       bool            global = false;
+       bool            ignorecase = false;
+       regex_t         re;
+
+       /* parse options */
+       for (i = 0; i < opt_len; i++)
+       {
+               switch (opt_p[i])
+               {
+                       case 'i':
+                               ignorecase = true;
+                               break;
+                       case 'g':
+                               global = true;
+                               break;
+                       default:
+                               ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("invalid option of regexp_replace: %c",
+                                        opt_p[i])));
+                               break;
+               }
+       }
+
+       if (ignorecase)
+               re = RE_compile_and_cache(p, regex_flavor | REG_ICASE);
+       else
+               re = RE_compile_and_cache(p, regex_flavor);
+
+       return DirectFunctionCall4(replace_text_regexp,
+                                                               PointerGetDatum(s),
+                                                               PointerGetDatum(&re),
+                                                               PointerGetDatum(r),
+                                                               BoolGetDatum(global));
+}
+
 /* similar_escape()
  * Convert a SQL99 regexp pattern to POSIX style, so it can be used by
  * our regexp engine.
index 01cd5d2b7a31137ef65b61b032c29d13a72c14cd..ba40747df41c4a0c3bbcaa6bbd1f0e73c9f06432 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.126 2005/07/07 04:36:08 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.127 2005/07/10 04:54:30 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,6 +28,7 @@
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/pg_locale.h"
+#include "regex/regex.h"
 
 
 typedef struct varlena unknown;
@@ -1993,6 +1994,225 @@ replace_text(PG_FUNCTION_ARGS)
        PG_RETURN_TEXT_P(ret_text);
 }
 
+/*
+ * check_replace_text_has_escape_char
+ * check whether replace_text has escape char. 
+ */
+static bool
+check_replace_text_has_escape_char(const text *replace_text)
+{
+       const char *p = VARDATA(replace_text);
+       const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
+
+       if (pg_database_encoding_max_length() == 1)
+       {
+               for (; p < p_end; p++)
+                       if (*p == '\\') return true;
+       }
+       else
+       {
+               for (; p < p_end; p += pg_mblen(p))
+                       if (*p == '\\') return true;
+       }
+
+       return false;
+}
+
+/*
+ * appendStringInfoRegexpSubstr
+ * append string by using back references of regexp.
+ */
+static void
+appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
+    regmatch_t *pmatch, text *src_text)
+{
+       const char *p = VARDATA(replace_text);
+       const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
+
+       int                     eml = pg_database_encoding_max_length();
+
+       int                     substr_start = 1;
+       int                     ch_cnt;
+
+       int                     so;
+       int                     eo;
+
+       while (1)
+       {
+               /* Find escape char. */
+               ch_cnt = 0;
+               if (eml == 1)
+               {
+                       for (; p < p_end && *p != '\\'; p++)
+                               ch_cnt++;
+               }
+               else
+               {
+                       for (; p < p_end && *p != '\\'; p += pg_mblen(p))
+                               ch_cnt++;
+               }
+
+               /*
+                * Copy the text when there is a text in the left of escape char
+                * or escape char is not found.
+                */
+               if (ch_cnt)
+               {
+                       text *append_text = text_substring(PointerGetDatum(replace_text),
+                                                                         substr_start, ch_cnt, false);
+                       appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
+                       pfree(append_text);
+               }
+               substr_start += ch_cnt + 1;
+
+               if (p >= p_end) /* When escape char is not found. */
+                       break;
+
+               /* See the next character of escape char. */
+               p++;
+               so = eo = -1;
+
+               if (*p >= '1' && *p <= '9')
+               {
+                       /* Use the back reference of regexp. */
+                       int             idx = *p - '0';
+                       so = pmatch[idx].rm_so;
+                       eo = pmatch[idx].rm_eo;
+                       p++;
+                       substr_start++;
+               }
+               else if (*p == '&')
+               {
+                       /* Use the entire matched string. */
+                       so = pmatch[0].rm_so;
+                       eo = pmatch[0].rm_eo;
+                       p++;
+                       substr_start++;
+               }
+
+               if (so != -1 && eo != -1)
+               {
+                       /* Copy the text that is back reference of regexp. */
+                       text *append_text = text_substring(PointerGetDatum(src_text),
+                                                                         so + 1, (eo - so), false);
+                       appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
+                       pfree(append_text);
+               }
+       }
+}
+
+#define REGEXP_REPLACE_BACKREF_CNT             10
+
+/*
+ * replace_text_regexp
+ * replace text that matches to regexp in src_text to replace_text.
+ */
+Datum
+replace_text_regexp(PG_FUNCTION_ARGS)
+{
+       text       *ret_text;
+       text       *src_text = PG_GETARG_TEXT_P(0);
+       int                     src_text_len = VARSIZE(src_text) - VARHDRSZ;
+       regex_t    *re = (regex_t *)PG_GETARG_POINTER(1);
+       text       *replace_text = PG_GETARG_TEXT_P(2);
+       bool            global = PG_GETARG_BOOL(3);
+       StringInfo      str = makeStringInfo();
+       int                     regexec_result;
+       regmatch_t      pmatch[REGEXP_REPLACE_BACKREF_CNT];
+       pg_wchar   *data;
+       size_t          data_len;
+       int                     search_start;
+       int                     data_pos;
+       bool            have_escape;
+
+       /* Convert data string to wide characters. */
+       data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
+       data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
+
+       /* Check whether replace_text has escape char. */
+       have_escape = check_replace_text_has_escape_char(replace_text);
+
+       for (search_start = data_pos = 0; search_start <= data_len;)
+       {
+               regexec_result = pg_regexec(re,
+                                                                       data,
+                                                                       data_len,
+                                                                       search_start,
+                                                                       NULL,   /* no details */
+                                                                       REGEXP_REPLACE_BACKREF_CNT,
+                                                                       pmatch,
+                                                                       0);
+
+               if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
+               {
+                       char    errMsg[100];
+
+                       /* re failed??? */
+                       pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
+                       ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+                                errmsg("regular expression failed: %s", errMsg)));
+               }
+
+               if (regexec_result == REG_NOMATCH)
+                       break;
+
+        /*
+         * Copy the text when there is a text in the left of matched position.
+         */
+               if (pmatch[0].rm_so - data_pos > 0)
+               {
+                       text *left_text = text_substring(PointerGetDatum(src_text),
+                                                                          data_pos + 1,
+                                                                          pmatch[0].rm_so - data_pos, false);
+                       appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
+                       pfree(left_text);
+               }
+
+               /*
+                * Copy the replace_text. Process back references when the
+                * replace_text has escape characters. 
+                */
+               if (have_escape)
+                       appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
+               else
+                       appendStringInfoString(str, PG_TEXT_GET_STR(replace_text));
+
+               search_start = data_pos = pmatch[0].rm_eo;
+
+               /*
+                * When global option is off, replace the first instance only.
+                */
+               if (!global)
+                       break;
+
+               /*
+                * Search from next character when the matching text is zero width.
+                */
+               if (pmatch[0].rm_so == pmatch[0].rm_eo)
+                       search_start++;
+       }
+
+       /*
+     * Copy the text when there is a text at the right of last matched
+        * or regexp is not matched.
+        */
+       if (data_pos < data_len)
+       {
+               text *right_text = text_substring(PointerGetDatum(src_text),
+                                                                  data_pos + 1, -1, true);
+               appendStringInfoString(str, PG_TEXT_GET_STR(right_text));
+               pfree(right_text);
+       }
+
+       ret_text = PG_STR_GET_TEXT(str->data);
+       pfree(str->data);
+       pfree(str);
+       pfree(data);
+
+       PG_RETURN_TEXT_P(ret_text);
+}
+
 /*
  * split_text
  * parse input string
index 5dc1b21a10737b0b46f5be680be971a8fc4f073a..3452b246d17537adfb02e56891ccd7ea5178d805 100644 (file)
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.286 2005/07/08 04:12:26 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.287 2005/07/10 04:54:31 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200507081
+#define CATALOG_VERSION_NO     200507101
 
 #endif
index 0a213f64be177bf9d0d0d21bd4ffd10333838be4..9ea413a6f541c3bf59e652c0015e594d1c241a8a 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.374 2005/07/06 19:02:53 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.375 2005/07/10 04:54:31 momjian Exp $
  *
  * NOTES
  *       The script catalog/genbki.sh reads this file and generates .bki
@@ -2207,6 +2207,10 @@ DATA(insert OID =  937 (  substring    PGNSP PGUID 12 f f t f i 2 25 "25 23" _nu
 DESCR("return portion of string");
 DATA(insert OID =  2087 ( replace         PGNSP PGUID 12 f f t f i 3 25 "25 25 25" _null_ _null_ _null_        replace_text - _null_ ));
 DESCR("replace all occurrences of old_substr with new_substr in string");
+DATA(insert OID =  2284 ( regexp_replace          PGNSP PGUID 12 f f t f i 3 25 "25 25 25" _null_ _null_ _null_        textregexreplace_noopt - _null_ ));
+DESCR("replace text using regexp");
+DATA(insert OID =  2285 ( regexp_replace          PGNSP PGUID 12 f f t f i 4 25 "25 25 25 25" _null_ _null_ _null_     textregexreplace - _null_ ));
+DESCR("replace text using regexp");
 DATA(insert OID =  2088 ( split_part   PGNSP PGUID 12 f f t f i 3 25 "25 25 23" _null_ _null_ _null_   split_text - _null_ ));
 DESCR("split string by field_sep and return field_num");
 DATA(insert OID =  2089 ( to_hex          PGNSP PGUID 12 f f t f i 1 25 "23" _null_ _null_ _null_  to_hex32 - _null_ ));
index ac0a55da29f02b3dad1e2c10d1a7e9e0b089af47..37c06097631635210ccc0661935a8606e2addd75 100644 (file)
@@ -29,7 +29,7 @@
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $PostgreSQL: pgsql/src/include/regex/regex.h,v 1.26 2003/11/29 22:41:10 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/regex/regex.h,v 1.27 2005/07/10 04:54:32 momjian Exp $
  */
 
 /*
@@ -163,7 +163,7 @@ typedef struct
  * the prototypes for exported functions
  */
 extern int     pg_regcomp(regex_t *, const pg_wchar *, size_t, int);
-extern int     pg_regexec(regex_t *, const pg_wchar *, size_t, rm_detail_t *, size_t, regmatch_t[], int);
+extern int     pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int);
 extern void pg_regfree(regex_t *);
 extern size_t pg_regerror(int, const regex_t *, char *, size_t);
 
index fea496aaaeab9f9495e8d82a35b04fefe51989d6..8400b8e4701983e5f9e5a3a13bca4a4f732512fb 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.259 2005/07/06 19:02:54 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.260 2005/07/10 04:54:32 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -429,6 +429,8 @@ extern Datum nameicregexne(PG_FUNCTION_ARGS);
 extern Datum texticregexeq(PG_FUNCTION_ARGS);
 extern Datum texticregexne(PG_FUNCTION_ARGS);
 extern Datum textregexsubstr(PG_FUNCTION_ARGS);
+extern Datum textregexreplace_noopt(PG_FUNCTION_ARGS);
+extern Datum textregexreplace(PG_FUNCTION_ARGS);
 extern Datum similar_escape(PG_FUNCTION_ARGS);
 
 /* regproc.c */
@@ -566,6 +568,7 @@ extern List *textToQualifiedNameList(text *textval);
 extern bool SplitIdentifierString(char *rawstring, char separator,
                                          List **namelist);
 extern Datum replace_text(PG_FUNCTION_ARGS);
+extern Datum replace_text_regexp(PG_FUNCTION_ARGS);
 extern Datum split_text(PG_FUNCTION_ARGS);
 extern Datum text_to_array(PG_FUNCTION_ARGS);
 extern Datum array_to_text(PG_FUNCTION_ARGS);
index ab4cd6a7979b16dbd288cda61ec56dabf3b66c8d..d05ce5629972303b80f210ca923431081cb732de 100644 (file)
@@ -192,6 +192,34 @@ SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
  cde
 (1 row)
 
+-- PostgreSQL extension to allow using back reference in replace string;
+SELECT regexp_replace('1112223333', '(\\d{3})(\\d{3})(\\d{4})', '(\\1) \\2-\\3');
+ regexp_replace 
+----------------
+ (111) 222-3333
+(1 row)
+
+SELECT regexp_replace('AAA   BBB   CCC   ', '\\s+', ' ', 'g');
+ regexp_replace 
+----------------
+ AAA BBB CCC 
+(1 row)
+
+SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
+ regexp_replace 
+----------------
+ ZAAAZ
+(1 row)
+
+SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
+ regexp_replace 
+----------------
+ Z Z
+(1 row)
+
+-- invalid option of REGEXP_REPLACE
+SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
+ERROR:  invalid option of regexp_replace: z
 -- E021-11 position expression
 SELECT POSITION('4' IN '1234567890') = '4' AS "4";
  4 
index a59b39cf99ef5e360fb1fe1120b357a11c733c30..620aabe8ae67dba0ec53d5e3796f636de027f585 100644 (file)
@@ -80,6 +80,13 @@ SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
 -- With a parenthesized subexpression, return only what matches the subexpr
 SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
 
+-- PostgreSQL extension to allow using back reference in replace string;
+SELECT regexp_replace('1112223333', '(\\d{3})(\\d{3})(\\d{4})', '(\\1) \\2-\\3');
+SELECT regexp_replace('AAA   BBB   CCC   ', '\\s+', ' ', 'g');
+SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
+SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
+-- invalid option of REGEXP_REPLACE
+SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
 
 -- E021-11 position expression
 SELECT POSITION('4' IN '1234567890') = '4' AS "4";