]> granicus.if.org Git - postgresql/blobdiff - src/backend/utils/adt/varlena.c
Message style improvements
[postgresql] / src / backend / utils / adt / varlena.c
index 5fc3128360c837f92d52ea59bbc6217e6d839a66..33f40b685c76bbaa87e05476229e47793b01e227 100644 (file)
@@ -3,12 +3,12 @@
  * varlena.c
  *       Functions for the variable-length built-in types.
  *
- * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.137 2005/10/17 16:24:19 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.151 2006/10/04 00:30:00 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include <ctype.h>
 
+#include "access/tupmacs.h"
 #include "access/tuptoaster.h"
 #include "catalog/pg_type.h"
-#include "lib/stringinfo.h"
-#include "libpq/crypt.h"
+#include "libpq/md5.h"
 #include "libpq/pqformat.h"
-#include "mb/pg_wchar.h"
-#include "miscadmin.h"
 #include "parser/scansup.h"
-#include "utils/array.h"
+#include "regex/regex.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/pg_locale.h"
-#include "regex/regex.h"
 
 
 typedef struct varlena unknown;
@@ -81,7 +78,7 @@ static void appendStringInfoText(StringInfo str, const text *t);
  *             ereport(ERROR, ...) if bad form.
  *
  *             BUGS:
- *                             The input is scaned twice.
+ *                             The input is scanned twice.
  *                             The error checking of input is minimal.
  */
 Datum
@@ -256,10 +253,7 @@ textin(PG_FUNCTION_ARGS)
        text       *result;
        int                     len;
 
-       /* verify encoding */
        len = strlen(inputText);
-       pg_verifymbstr(inputText, len, false);
-
        result = (text *) palloc(len + VARHDRSZ);
        VARATT_SIZEP(result) = len + VARHDRSZ;
 
@@ -299,9 +293,6 @@ textrecv(PG_FUNCTION_ARGS)
 
        str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 
-       /* verify encoding */
-       pg_verifymbstr(str, nbytes, false);
-
        result = (text *) palloc(nbytes + VARHDRSZ);
        VARATT_SIZEP(result) = nbytes + VARHDRSZ;
        memcpy(VARDATA(result), str, nbytes);
@@ -888,8 +879,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
                                                                                (LPWSTR) a1p, a1len / 2);
                                if (!r)
                                        ereport(ERROR,
-                                                       (errmsg("could not convert string to UTF16: %lu",
-                                                                       GetLastError())));
+                                        (errmsg("could not convert string to UTF-16: error %lu",
+                                                        GetLastError())));
                        }
                        ((LPWSTR) a1p)[r] = 0;
 
@@ -901,8 +892,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
                                                                                (LPWSTR) a2p, a2len / 2);
                                if (!r)
                                        ereport(ERROR,
-                                                       (errmsg("could not convert string to UTF16: %lu",
-                                                                       GetLastError())));
+                                        (errmsg("could not convert string to UTF-16: error %lu",
+                                                        GetLastError())));
                        }
                        ((LPWSTR) a2p)[r] = 0;
 
@@ -911,8 +902,7 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
                        if (result == 2147483647)       /* _NLSCMPERROR; missing from mingw
                                                                                 * headers */
                                ereport(ERROR,
-                                               (errmsg("could not compare unicode strings: %d",
-                                                               errno)));
+                                               (errmsg("could not compare Unicode strings: %m")));
 
                        if (a1p != a1buf)
                                pfree(a1p);
@@ -939,6 +929,15 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 
                result = strcoll(a1p, a2p);
 
+               /*
+                * In some locales strcoll() can claim that nonidentical strings are
+                * equal.  Believing that would be bad news for a number of reasons,
+                * so we follow Perl's lead and sort "equal" strings according to
+                * strcmp().
+                */
+               if (result == 0)
+                       result = strcmp(a1p, a2p);
+
                if (a1p != a1buf)
                        pfree(a1p);
                if (a2p != a2buf)
@@ -985,11 +984,15 @@ texteq(PG_FUNCTION_ARGS)
        text       *arg2 = PG_GETARG_TEXT_P(1);
        bool            result;
 
-       /* fast path for different-length inputs */
+       /*
+        * Since we only care about equality or not-equality, we can avoid all the
+        * expense of strcoll() here, and just do bitwise comparison.
+        */
        if (VARSIZE(arg1) != VARSIZE(arg2))
                result = false;
        else
-               result = (text_cmp(arg1, arg2) == 0);
+               result = (strncmp(VARDATA(arg1), VARDATA(arg2),
+                                                 VARSIZE(arg1) - VARHDRSZ) == 0);
 
        PG_FREE_IF_COPY(arg1, 0);
        PG_FREE_IF_COPY(arg2, 1);
@@ -1004,11 +1007,15 @@ textne(PG_FUNCTION_ARGS)
        text       *arg2 = PG_GETARG_TEXT_P(1);
        bool            result;
 
-       /* fast path for different-length inputs */
+       /*
+        * Since we only care about equality or not-equality, we can avoid all the
+        * expense of strcoll() here, and just do bitwise comparison.
+        */
        if (VARSIZE(arg1) != VARSIZE(arg2))
                result = true;
        else
-               result = (text_cmp(arg1, arg2) != 0);
+               result = (strncmp(VARDATA(arg1), VARDATA(arg2),
+                                                 VARSIZE(arg1) - VARHDRSZ) != 0);
 
        PG_FREE_IF_COPY(arg1, 0);
        PG_FREE_IF_COPY(arg2, 1);
@@ -2022,7 +2029,7 @@ replace_text(PG_FUNCTION_ARGS)
        text       *buf_text;
        text       *ret_text;
        int                     curr_posn;
-       StringInfo      str;
+       StringInfoData str;
 
        if (src_text_len == 0 || from_sub_text_len == 0)
                PG_RETURN_TEXT_P(src_text);
@@ -2033,7 +2040,7 @@ replace_text(PG_FUNCTION_ARGS)
        if (curr_posn == 0)
                PG_RETURN_TEXT_P(src_text);
 
-       str = makeStringInfo();
+       initStringInfo(&str);
        buf_text = src_text;
 
        while (curr_posn > 0)
@@ -2043,8 +2050,8 @@ replace_text(PG_FUNCTION_ARGS)
                right_text = text_substring(PointerGetDatum(buf_text),
                                                                        curr_posn + from_sub_text_len, -1, true);
 
-               appendStringInfoText(str, left_text);
-               appendStringInfoText(str, to_sub_text);
+               appendStringInfoText(&str, left_text);
+               appendStringInfoText(&str, to_sub_text);
 
                if (buf_text != src_text)
                        pfree(buf_text);
@@ -2053,20 +2060,20 @@ replace_text(PG_FUNCTION_ARGS)
                curr_posn = TEXTPOS(buf_text, from_sub_text);
        }
 
-       appendStringInfoText(str, buf_text);
+       appendStringInfoText(&str, buf_text);
        if (buf_text != src_text)
                pfree(buf_text);
 
-       ret_text = PG_STR_GET_TEXT(str->data);
-       pfree(str->data);
-       pfree(str);
+       ret_text = PG_STR_GET_TEXT(str.data);
+       pfree(str.data);
 
        PG_RETURN_TEXT_P(ret_text);
 }
 
 /*
  * check_replace_text_has_escape_char
- * check whether replace_text has escape char.
+ *
+ * check whether replace_text contains escape char.
  */
 static bool
 check_replace_text_has_escape_char(const text *replace_text)
@@ -2077,14 +2084,18 @@ check_replace_text_has_escape_char(const text *replace_text)
        if (pg_database_encoding_max_length() == 1)
        {
                for (; p < p_end; p++)
+               {
                        if (*p == '\\')
                                return true;
+               }
        }
        else
        {
                for (; p < p_end; p += pg_mblen(p))
+               {
                        if (*p == '\\')
                                return true;
+               }
        }
 
        return false;
@@ -2092,7 +2103,9 @@ check_replace_text_has_escape_char(const text *replace_text)
 
 /*
  * appendStringInfoRegexpSubstr
- * append string by using back references of regexp.
+ *
+ * Append replace_text to str, substituting regexp back references for
+ * \n escapes.
  */
 static void
 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
@@ -2100,50 +2113,41 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
 {
        const char *p = VARDATA(replace_text);
        const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
-
        int                     eml = pg_database_encoding_max_length();
 
-       int                     substr_start = 1;
-       int                     ch_cnt;
-
-       int                     so;
-       int                     eo;
-
-       while (1)
+       for (;;)
        {
-               /* Find escape char. */
-               ch_cnt = 0;
+               const char *chunk_start = p;
+               int                     so;
+               int                     eo;
+
+               /* Find next escape char. */
                if (eml == 1)
                {
                        for (; p < p_end && *p != '\\'; p++)
-                               ch_cnt++;
+                                /* nothing */ ;
                }
                else
                {
                        for (; p < p_end && *p != '\\'; p += pg_mblen(p))
-                               ch_cnt++;
+                                /* nothing */ ;
                }
 
-               /*
-                * Copy the text when there is a text in the left of escape char or
-                * escape char is not found.
-                */
-               if (ch_cnt)
-               {
-                       text       *append_text = text_substring(PointerGetDatum(replace_text),
-                                                                                               substr_start, ch_cnt, false);
-
-                       appendStringInfoText(str, append_text);
-                       pfree(append_text);
-               }
-               substr_start += ch_cnt + 1;
+               /* Copy the text we just scanned over, if any. */
+               if (p > chunk_start)
+                       appendBinaryStringInfo(str, chunk_start, p - chunk_start);
 
-               if (p >= p_end)                 /* When escape char is not found. */
+               /* Done if at end of string, else advance over escape char. */
+               if (p >= p_end)
                        break;
-
-               /* See the next character of escape char. */
                p++;
-               so = eo = -1;
+
+               if (p >= p_end)
+               {
+                       /* Escape at very end of input.  Treat same as unexpected char */
+                       appendStringInfoChar(str, '\\');
+                       break;
+               }
 
                if (*p >= '1' && *p <= '9')
                {
@@ -2153,7 +2157,6 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
                        so = pmatch[idx].rm_so;
                        eo = pmatch[idx].rm_eo;
                        p++;
-                       substr_start++;
                }
                else if (*p == '&')
                {
@@ -2161,15 +2164,36 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
                        so = pmatch[0].rm_so;
                        eo = pmatch[0].rm_eo;
                        p++;
-                       substr_start++;
+               }
+               else if (*p == '\\')
+               {
+                       /* \\ means transfer one \ to output. */
+                       appendStringInfoChar(str, '\\');
+                       p++;
+                       continue;
+               }
+               else
+               {
+                       /*
+                        * If escape char is not followed by any expected char, just treat
+                        * it as ordinary data to copy.  (XXX would it be better to throw
+                        * an error?)
+                        */
+                       appendStringInfoChar(str, '\\');
+                       continue;
                }
 
                if (so != -1 && eo != -1)
                {
-                       /* Copy the text that is back reference of regexp. */
-                       text       *append_text = text_substring(PointerGetDatum(src_text),
-                                                                                                  so + 1, (eo - so), false);
+                       /*
+                        * Copy the text that is back reference of regexp.      Because so and
+                        * eo are counted in characters not bytes, it's easiest to use
+                        * text_substring to pull out the correct chunk of text.
+                        */
+                       text       *append_text;
 
+                       append_text = text_substring(PointerGetDatum(src_text),
+                                                                                so + 1, (eo - so), false);
                        appendStringInfoText(str, append_text);
                        pfree(append_text);
                }
@@ -2180,19 +2204,20 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
 
 /*
  * replace_text_regexp
+ *
  * replace text that matches to regexp in src_text to replace_text.
+ *
+ * Note: to avoid having to include regex.h in builtins.h, we declare
+ * the regexp argument as void *, but really it's regex_t *.
  */
-Datum
-replace_text_regexp(PG_FUNCTION_ARGS)
+text *
+replace_text_regexp(text *src_text, void *regexp,
+                                       text *replace_text, bool glob)
 {
        text       *ret_text;
-       text       *src_text = PG_GETARG_TEXT_P(0);
+       regex_t    *re = (regex_t *) regexp;
        int                     src_text_len = VARSIZE(src_text) - VARHDRSZ;
-       regex_t    *re = (regex_t *) PG_GETARG_POINTER(1);
-       text       *replace_text = PG_GETARG_TEXT_P(2);
-       bool global = PG_GETARG_BOOL(3);
-       StringInfo      str = makeStringInfo();
-       int                     regexec_result;
+       StringInfoData buf;
        regmatch_t      pmatch[REGEXP_REPLACE_BACKREF_CNT];
        pg_wchar   *data;
        size_t          data_len;
@@ -2200,6 +2225,8 @@ replace_text_regexp(PG_FUNCTION_ARGS)
        int                     data_pos;
        bool            have_escape;
 
+       initStringInfo(&buf);
+
        /* Convert data string to wide characters. */
        data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
        data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
@@ -2209,6 +2236,8 @@ replace_text_regexp(PG_FUNCTION_ARGS)
 
        for (search_start = data_pos = 0; search_start <= data_len;)
        {
+               int                     regexec_result;
+
                regexec_result = pg_regexec(re,
                                                                        data,
                                                                        data_len,
@@ -2218,30 +2247,33 @@ replace_text_regexp(PG_FUNCTION_ARGS)
                                                                        pmatch,
                                                                        0);
 
-               if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
+               if (regexec_result == REG_NOMATCH)
+                       break;
+
+               if (regexec_result != REG_OKAY)
                {
                        char            errMsg[100];
 
-                       /* re failed??? */
                        pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
                        ereport(ERROR,
                                        (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                                         errmsg("regular expression failed: %s", errMsg)));
                }
 
-               if (regexec_result == REG_NOMATCH)
-                       break;
-
                /*
-                * Copy the text when there is a text in the left of matched position.
+                * Copy the text to the left of the match position.  Because we are
+                * working with character not byte indexes, it's easiest to use
+                * text_substring to pull out the needed data.
                 */
                if (pmatch[0].rm_so - data_pos > 0)
                {
-                       text       *left_text = text_substring(PointerGetDatum(src_text),
-                                                                                                  data_pos + 1,
-                                                                                 pmatch[0].rm_so - data_pos, false);
+                       text       *left_text;
 
-                       appendStringInfoText(str, left_text);
+                       left_text = text_substring(PointerGetDatum(src_text),
+                                                                          data_pos + 1,
+                                                                          pmatch[0].rm_so - data_pos,
+                                                                          false);
+                       appendStringInfoText(&buf, left_text);
                        pfree(left_text);
                }
 
@@ -2250,16 +2282,16 @@ replace_text_regexp(PG_FUNCTION_ARGS)
                 * replace_text has escape characters.
                 */
                if (have_escape)
-                       appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
+                       appendStringInfoRegexpSubstr(&buf, replace_text, pmatch, src_text);
                else
-                       appendStringInfoText(str, replace_text);
+                       appendStringInfoText(&buf, replace_text);
 
                search_start = data_pos = pmatch[0].rm_eo;
 
                /*
                 * When global option is off, replace the first instance only.
                 */
-               if (!global)
+               if (!glob)
                        break;
 
                /*
@@ -2270,24 +2302,23 @@ replace_text_regexp(PG_FUNCTION_ARGS)
        }
 
        /*
-        * Copy the text when there is a text at the right of last matched or
-        * regexp is not matched.
+        * Copy the text to the right of the last match.
         */
        if (data_pos < data_len)
        {
-               text       *right_text = text_substring(PointerGetDatum(src_text),
-                                                                                               data_pos + 1, -1, true);
+               text       *right_text;
 
-               appendStringInfoText(str, right_text);
+               right_text = text_substring(PointerGetDatum(src_text),
+                                                                       data_pos + 1, -1, true);
+               appendStringInfoText(&buf, right_text);
                pfree(right_text);
        }
 
-       ret_text = PG_STR_GET_TEXT(str->data);
-       pfree(str->data);
-       pfree(str);
+       ret_text = PG_STR_GET_TEXT(buf.data);
+       pfree(buf.data);
        pfree(data);
 
-       PG_RETURN_TEXT_P(ret_text);
+       return ret_text;
 }
 
 /*
@@ -2468,16 +2499,18 @@ array_to_text(PG_FUNCTION_ARGS)
        int                     nitems,
                           *dims,
                                ndims;
-       char       *p;
        Oid                     element_type;
        int                     typlen;
        bool            typbyval;
        char            typalign;
-       StringInfo      result_str = makeStringInfo();
+       StringInfoData buf;
+       bool            printed = false;
+       char       *p;
+       bits8      *bitmap;
+       int                     bitmask;
        int                     i;
        ArrayMetaState *my_extra;
 
-       p = ARR_DATA_PTR(v);
        ndims = ARR_NDIM(v);
        dims = ARR_DIMS(v);
        nitems = ArrayGetNItems(ndims, dims);
@@ -2487,6 +2520,7 @@ array_to_text(PG_FUNCTION_ARGS)
                PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
 
        element_type = ARR_ELEMTYPE(v);
+       initStringInfo(&buf);
 
        /*
         * We arrange to look up info about element type, including its output
@@ -2499,7 +2533,7 @@ array_to_text(PG_FUNCTION_ARGS)
                fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
                                                                                                          sizeof(ArrayMetaState));
                my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
-               my_extra->element_type = InvalidOid;
+               my_extra->element_type = ~element_type;
        }
 
        if (my_extra->element_type != element_type)
@@ -2519,26 +2553,49 @@ array_to_text(PG_FUNCTION_ARGS)
        typbyval = my_extra->typbyval;
        typalign = my_extra->typalign;
 
+       p = ARR_DATA_PTR(v);
+       bitmap = ARR_NULLBITMAP(v);
+       bitmask = 1;
+
        for (i = 0; i < nitems; i++)
        {
                Datum           itemvalue;
                char       *value;
 
-               itemvalue = fetch_att(p, typbyval, typlen);
+               /* Get source element, checking for NULL */
+               if (bitmap && (*bitmap & bitmask) == 0)
+               {
+                       /* we ignore nulls */
+               }
+               else
+               {
+                       itemvalue = fetch_att(p, typbyval, typlen);
 
-               value = DatumGetCString(FunctionCall1(&my_extra->proc,
-                                                                                         itemvalue));
+                       value = OutputFunctionCall(&my_extra->proc, itemvalue);
 
-               if (i > 0)
-                       appendStringInfo(result_str, "%s%s", fldsep, value);
-               else
-                       appendStringInfoString(result_str, value);
+                       if (printed)
+                               appendStringInfo(&buf, "%s%s", fldsep, value);
+                       else
+                               appendStringInfoString(&buf, value);
+                       printed = true;
+
+                       p = att_addlength(p, typlen, PointerGetDatum(p));
+                       p = (char *) att_align(p, typalign);
+               }
 
-               p = att_addlength(p, typlen, PointerGetDatum(p));
-               p = (char *) att_align(p, typalign);
+               /* advance bitmap pointer if any */
+               if (bitmap)
+               {
+                       bitmask <<= 1;
+                       if (bitmask == 0x100)
+                       {
+                               bitmap++;
+                               bitmask = 1;
+                       }
+               }
        }
 
-       PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
+       PG_RETURN_TEXT_P(PG_STR_GET_TEXT(buf.data));
 }
 
 #define HEXBASE 16