Message style improvements

[postgresql] / src / backend / utils / adt / varlena.c
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c

index 8e01f9f539ef237cace2a8652127196d1e918406..33f40b685c76bbaa87e05476229e47793b01e227 100644 (file)
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -3,12 +3,12 @@
   * varlena.c
   *       Functions for the variable-length built-in types.
   *
- * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.108 2003/11/30 20:55:09 joe Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.151 2006/10/04 00:30:00 momjian Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -16,17 +16,16 @@
  
  #include <ctype.h>
  
-#include "mb/pg_wchar.h"
-#include "miscadmin.h"
+#include "access/tupmacs.h"
  #include "access/tuptoaster.h"
  #include "catalog/pg_type.h"
-#include "lib/stringinfo.h"
-#include "libpq/crypt.h"
+#include "libpq/md5.h"
  #include "libpq/pqformat.h"
-#include "utils/array.h"
+#include "parser/scansup.h"
+#include "regex/regex.h"
  #include "utils/builtins.h"
-#include "utils/pg_locale.h"
  #include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
  
  
  typedef struct varlena unknown;
@@ -46,26 +45,22 @@ typedef struct varlena unknown;
  #define TEXTLEN(textp) \
         text_length(PointerGetDatum(textp))
  #define TEXTPOS(buf_text, from_sub_text) \
-       text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
-#define TEXTDUP(textp) \
-       DatumGetTextPCopy(PointerGetDatum(textp))
+       text_position(buf_text, from_sub_text, 1)
  #define LEFT(buf_text, from_sub_text) \
         text_substring(PointerGetDatum(buf_text), \
                                         1, \
                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
-#define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
-       text_substring(PointerGetDatum(buf_text), \
-                                       TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
-                                       -1, true)
  
  static int     text_cmp(text *arg1, text *arg2);
  static int32 text_length(Datum str);
-static int32 text_position(Datum str, Datum search_str, int matchnum);
+static int32 text_position(text *t1, text *t2, int matchnum);
  static text *text_substring(Datum str,
                            int32 start,
                            int32 length,
                            bool length_not_specified);
  
+static void appendStringInfoText(StringInfo str, const text *t);
+
  
  /*****************************************************************************
   *      USER I/O ROUTINES                                                                                                               *
@@ -83,7 +78,7 @@ static text *text_substring(Datum str,
   *             ereport(ERROR, ...) if bad form.
   *
   *             BUGS:
- *                             The input is scaned twice.
+ *                             The input is scanned twice.
   *                             The error checking of input is minimal.
   */
  Datum
@@ -149,8 +144,7 @@ byteain(PG_FUNCTION_ARGS)
                 else
                 {
                         /*
-                        * We should never get here. The first pass should not allow
-                        * it.
+                        * We should never get here. The first pass should not allow it.
                          */
                         ereport(ERROR,
                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
@@ -259,10 +253,7 @@ textin(PG_FUNCTION_ARGS)
         text       *result;
         int                     len;
  
-       /* verify encoding */
         len = strlen(inputText);
-       pg_verifymbstr(inputText, len, false);
-
         result = (text *) palloc(len + VARHDRSZ);
         VARATT_SIZEP(result) = len + VARHDRSZ;
  
@@ -301,6 +292,7 @@ textrecv(PG_FUNCTION_ARGS)
         int                     nbytes;
  
         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+
         result = (text *) palloc(nbytes + VARHDRSZ);
         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
         memcpy(VARDATA(result), str, nbytes);
@@ -329,18 +321,10 @@ textsend(PG_FUNCTION_ARGS)
  Datum
  unknownin(PG_FUNCTION_ARGS)
  {
-       char       *inputStr = PG_GETARG_CSTRING(0);
-       unknown    *result;
-       int                     len;
-
-       len = strlen(inputStr) + VARHDRSZ;
-
-       result = (unknown *) palloc(len);
-       VARATT_SIZEP(result) = len;
-
-       memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
+       char       *str = PG_GETARG_CSTRING(0);
  
-       PG_RETURN_UNKNOWN_P(result);
+       /* representation is same as cstring */
+       PG_RETURN_CSTRING(pstrdup(str));
  }
  
  /*
@@ -349,16 +333,10 @@ unknownin(PG_FUNCTION_ARGS)
  Datum
  unknownout(PG_FUNCTION_ARGS)
  {
-       unknown    *t = PG_GETARG_UNKNOWN_P(0);
-       int                     len;
-       char       *result;
+       /* representation is same as cstring */
+       char       *str = PG_GETARG_CSTRING(0);
  
-       len = VARSIZE(t) - VARHDRSZ;
-       result = (char *) palloc(len + 1);
-       memcpy(result, VARDATA(t), len);
-       result[len] = '\0';
-
-       PG_RETURN_CSTRING(result);
+       PG_RETURN_CSTRING(pstrdup(str));
  }
  
  /*
@@ -368,28 +346,27 @@ Datum
  unknownrecv(PG_FUNCTION_ARGS)
  {
         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
-       unknown    *result;
+       char       *str;
         int                     nbytes;
  
-       nbytes = buf->len - buf->cursor;
-       result = (unknown *) palloc(nbytes + VARHDRSZ);
-       VARATT_SIZEP(result) = nbytes + VARHDRSZ;
-       pq_copymsgbytes(buf, VARDATA(result), nbytes);
-       PG_RETURN_UNKNOWN_P(result);
+       str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+       /* representation is same as cstring */
+       PG_RETURN_CSTRING(str);
  }
  
  /*
   *             unknownsend                     - converts unknown to binary format
- *
- * This is a special case: just copy the input, since it's
- * effectively the same format as bytea
   */
  Datum
  unknownsend(PG_FUNCTION_ARGS)
  {
-       unknown    *vlena = PG_GETARG_UNKNOWN_P_COPY(0);
+       /* representation is same as cstring */
+       char       *str = PG_GETARG_CSTRING(0);
+       StringInfoData buf;
  
-       PG_RETURN_UNKNOWN_P(vlena);
+       pq_begintypsend(&buf);
+       pq_sendtext(&buf, str, strlen(str));
+       PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
  }
  
  
@@ -403,14 +380,20 @@ unknownsend(PG_FUNCTION_ARGS)
  Datum
  textlen(PG_FUNCTION_ARGS)
  {
-       PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
+       Datum           str = PG_GETARG_DATUM(0);
+
+       /* try to avoid decompressing argument */
+       PG_RETURN_INT32(text_length(str));
  }
  
  /*
   * text_length -
   *     Does the real work for textlen()
+ *
   *     This is broken out so it can be called directly by other string processing
- *     functions.
+ *     functions.      Note that the argument is passed as a Datum, to indicate that
+ *     it may still be in compressed form.  We can avoid decompressing it at all
+ *     in some cases.
   */
  static int32
  text_length(Datum str)
@@ -418,20 +401,13 @@ text_length(Datum str)
         /* fastpath when max encoding length is one */
         if (pg_database_encoding_max_length() == 1)
                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
-
-       if (pg_database_encoding_max_length() > 1)
+       else
         {
                 text       *t = DatumGetTextP(str);
  
                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
                                                                                          VARSIZE(t) - VARHDRSZ));
         }
-
-       /* should never get here */
-       elog(ERROR, "invalid backend encoding: encoding max length < 1");
-
-       /* not reached: suppress compiler warning */
-       return 0;
  }
  
  /*
@@ -442,7 +418,10 @@ text_length(Datum str)
  Datum
  textoctetlen(PG_FUNCTION_ARGS)
  {
-       PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
+       Datum           str = PG_GETARG_DATUM(0);
+
+       /* We need not detoast the input at all */
+       PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
  }
  
  /*
@@ -466,11 +445,11 @@ textcat(PG_FUNCTION_ARGS)
         text       *result;
         char       *ptr;
  
-       len1 = (VARSIZE(t1) - VARHDRSZ);
+       len1 = VARSIZE(t1) - VARHDRSZ;
         if (len1 < 0)
                 len1 = 0;
  
-       len2 = (VARSIZE(t2) - VARHDRSZ);
+       len2 = VARSIZE(t2) - VARHDRSZ;
         if (len2 < 0)
                 len2 = 0;
  
@@ -504,9 +483,6 @@ textcat(PG_FUNCTION_ARGS)
   *     adjusting the length to be consistent with the "negative start" per SQL92.
   * If the length is less than zero, return the remaining string.
   *
- * Note that the arguments operate on octet length,
- *     so not aware of multibyte character sets.
- *
   * Added multibyte support.
   * - Tatsuo Ishii 1998-4-21
   * Changed behavior if starting position is less than one to conform to SQL92 behavior.
@@ -545,8 +521,11 @@ text_substr_no_len(PG_FUNCTION_ARGS)
  /*
   * text_substring -
   *     Does the real work for text_substr() and text_substr_no_len()
+ *
   *     This is broken out so it can be called directly by other string processing
- *     functions.
+ *     functions.      Note that the argument is passed as a Datum, to indicate that
+ *     it may still be in compressed/toasted form.  We can avoid detoasting all
+ *     of it in some cases.
   */
  static text *
  text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
@@ -561,8 +540,8 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
         {
                 S1 = Max(S, 1);
  
-               if (length_not_specified)               /* special case - get length to
-                                                                                * end of string */
+               if (length_not_specified)               /* special case - get length to end of
+                                                                                * string */
                         L1 = -1;
                 else
                 {
@@ -570,18 +549,18 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
                         int                     E = S + length;
  
                         /*
-                        * A negative value for L is the only way for the end position
-                        * to be before the start. SQL99 says to throw an error.
+                        * A negative value for L is the only way for the end position to
+                        * be before the start. SQL99 says to throw an error.
                          */
                         if (E < S)
                                 ereport(ERROR,
                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
-                                          errmsg("negative substring length not allowed")));
+                                                errmsg("negative substring length not allowed")));
  
                         /*
-                        * A zero or negative value for the end position can happen if
-                        * the start was negative or one. SQL99 says to return a
-                        * zero-length string.
+                        * A zero or negative value for the end position can happen if the
+                        * start was negative or one. SQL99 says to return a zero-length
+                        * string.
                          */
                         if (E < 1)
                                 return PG_STR_GET_TEXT("");
@@ -590,9 +569,9 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
                 }
  
                 /*
-                * If the start position is past the end of the string, SQL99 says
-                * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
-                * do that for us. Convert to zero-based starting position
+                * If the start position is past the end of the string, SQL99 says to
+                * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
+                * that for us. Convert to zero-based starting position
                  */
                 return DatumGetTextPSlice(str, S1 - 1, L1);
         }
@@ -600,8 +579,8 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
         {
                 /*
                  * When encoding max length is > 1, we can't get LC without
-                * detoasting, so we'll grab a conservatively large slice now and
-                * go back later to do the right thing
+                * detoasting, so we'll grab a conservatively large slice now and go
+                * back later to do the right thing
                  */
                 int32           slice_start;
                 int32           slice_size;
@@ -614,38 +593,38 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
                 text       *ret;
  
                 /*
-                * if S is past the end of the string, the tuple toaster will
-                * return a zero-length string to us
+                * if S is past the end of the string, the tuple toaster will return a
+                * zero-length string to us
                  */
                 S1 = Max(S, 1);
  
                 /*
-                * We need to start at position zero because there is no way to
-                * know in advance which byte offset corresponds to the supplied
-                * start position.
+                * We need to start at position zero because there is no way to know
+                * in advance which byte offset corresponds to the supplied start
+                * position.
                  */
                 slice_start = 0;
  
-               if (length_not_specified)               /* special case - get length to
-                                                                                * end of string */
+               if (length_not_specified)               /* special case - get length to end of
+                                                                                * string */
                         slice_size = L1 = -1;
                 else
                 {
                         int                     E = S + length;
  
                         /*
-                        * A negative value for L is the only way for the end position
-                        * to be before the start. SQL99 says to throw an error.
+                        * A negative value for L is the only way for the end position to
+                        * be before the start. SQL99 says to throw an error.
                          */
                         if (E < S)
                                 ereport(ERROR,
                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
-                                          errmsg("negative substring length not allowed")));
+                                                errmsg("negative substring length not allowed")));
  
                         /*
-                        * A zero or negative value for the end position can happen if
-                        * the start was negative or one. SQL99 says to return a
-                        * zero-length string.
+                        * A zero or negative value for the end position can happen if the
+                        * start was negative or one. SQL99 says to return a zero-length
+                        * string.
                          */
                         if (E < 1)
                                 return PG_STR_GET_TEXT("");
@@ -657,9 +636,8 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
                         L1 = E - S1;
  
                         /*
-                        * Total slice size in bytes can't be any longer than the
-                        * start position plus substring length times the encoding max
-                        * length.
+                        * Total slice size in bytes can't be any longer than the start
+                        * position plus substring length times the encoding max length.
                          */
                         slice_size = (S1 + L1) * eml;
                 }
@@ -673,16 +651,15 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
                 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
  
                 /*
-                * Check that the start position wasn't > slice_strlen. If so,
-                * SQL99 says to return a zero-length string.
+                * Check that the start position wasn't > slice_strlen. If so, SQL99
+                * says to return a zero-length string.
                  */
                 if (S1 > slice_strlen)
                         return PG_STR_GET_TEXT("");
  
                 /*
-                * Adjust L1 and E1 now that we know the slice string length.
-                * Again remember that S1 is one based, and slice_start is zero
-                * based.
+                * Adjust L1 and E1 now that we know the slice string length. Again
+                * remember that S1 is one based, and slice_start is zero based.
                  */
                 if (L1 > -1)
                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
@@ -690,8 +667,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
                         E1 = slice_start + 1 + slice_strlen;
  
                 /*
-                * Find the start position in the slice; remember S1 is not zero
-                * based
+                * Find the start position in the slice; remember S1 is not zero based
                  */
                 p = VARDATA(slice);
                 for (i = 0; i < S1 - 1; i++)
@@ -717,7 +693,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
  
         /* not reached: suppress compiler warning */
-       return PG_STR_GET_TEXT("");
+       return NULL;
  }
  
  /*
@@ -730,51 +706,61 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
  Datum
  textpos(PG_FUNCTION_ARGS)
  {
-       PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
+       text       *str = PG_GETARG_TEXT_P(0);
+       text       *search_str = PG_GETARG_TEXT_P(1);
+
+       PG_RETURN_INT32(text_position(str, search_str, 1));
  }
  
  /*
   * text_position -
   *     Does the real work for textpos()
+ *
+ * Inputs:
+ *             t1 - string to be searched
+ *             t2 - pattern to match within t1
+ *             matchnum - number of the match to be found (1 is the first match)
+ * Result:
+ *             Character index of the first matched char, starting from 1,
+ *             or 0 if no match.
+ *
   *     This is broken out so it can be called directly by other string processing
   *     functions.
   */
  static int32
-text_position(Datum str, Datum search_str, int matchnum)
+text_position(text *t1, text *t2, int matchnum)
  {
-       int                     eml = pg_database_encoding_max_length();
-       text       *t1 = DatumGetTextP(str);
-       text       *t2 = DatumGetTextP(search_str);
         int                     match = 0,
                                 pos = 0,
-                               p = 0,
+                               p,
                                 px,
                                 len1,
                                 len2;
  
-       if (matchnum == 0)
+       if (matchnum <= 0)
                 return 0;                               /* result for 0th match */
  
         if (VARSIZE(t2) <= VARHDRSZ)
-               PG_RETURN_INT32(1);             /* result for empty pattern */
+               return 1;                               /* result for empty pattern */
  
-       len1 = (VARSIZE(t1) - VARHDRSZ);
-       len2 = (VARSIZE(t2) - VARHDRSZ);
+       len1 = VARSIZE(t1) - VARHDRSZ;
+       len2 = VARSIZE(t2) - VARHDRSZ;
  
-       /* no use in searching str past point where search_str will fit */
-       px = (len1 - len2);
-
-       if (eml == 1)                           /* simple case - single byte encoding */
+       if (pg_database_encoding_max_length() == 1)
         {
+               /* simple case - single byte encoding */
                 char       *p1,
                                    *p2;
  
                 p1 = VARDATA(t1);
                 p2 = VARDATA(t2);
  
+               /* no use in searching str past point where search_str will fit */
+               px = (len1 - len2);
+
                 for (p = 0; p <= px; p++)
                 {
-                       if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
+                       if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
                         {
                                 if (++match == matchnum)
                                 {
@@ -785,23 +771,27 @@ text_position(Datum str, Datum search_str, int matchnum)
                         p1++;
                 }
         }
-       else if (eml > 1)                       /* not as simple - multibyte encoding */
+       else
         {
+               /* not as simple - multibyte encoding */
                 pg_wchar   *p1,
                                    *p2,
                                    *ps1,
                                    *ps2;
  
                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
-               (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
+               (void) pg_mb2wchar_with_len(VARDATA(t1), p1, len1);
                 len1 = pg_wchar_strlen(p1);
                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
-               (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
+               (void) pg_mb2wchar_with_len(VARDATA(t2), p2, len2);
                 len2 = pg_wchar_strlen(p2);
  
+               /* no use in searching str past point where search_str will fit */
+               px = (len1 - len2);
+
                 for (p = 0; p <= px; p++)
                 {
-                       if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
+                       if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
                         {
                                 if (++match == matchnum)
                                 {
@@ -815,10 +805,8 @@ text_position(Datum str, Datum search_str, int matchnum)
                 pfree(ps1);
                 pfree(ps2);
         }
-       else
-               elog(ERROR, "invalid backend encoding: encoding max length < 1");
  
-       PG_RETURN_INT32(pos);
+       return pos;
  }
  
  /* varstr_cmp()
@@ -833,21 +821,98 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
         int                     result;
  
         /*
-        * Unfortunately, there is no strncoll(), so in the non-C locale case
-        * we have to do some memory copying.  This turns out to be
-        * significantly slower, so we optimize the case where LC_COLLATE is
-        * C.  We also try to optimize relatively-short strings by avoiding
-        * palloc/pfree overhead.
+        * Unfortunately, there is no strncoll(), so in the non-C locale case we
+        * have to do some memory copying.      This turns out to be significantly
+        * slower, so we optimize the case where LC_COLLATE is C.  We also try to
+        * optimize relatively-short strings by avoiding palloc/pfree overhead.
          */
+       if (lc_collate_is_c())
+       {
+               result = strncmp(arg1, arg2, Min(len1, len2));
+               if ((result == 0) && (len1 != len2))
+                       result = (len1 < len2) ? -1 : 1;
+       }
+       else
+       {
  #define STACKBUFLEN            1024
  
-       if (!lc_collate_is_c())
-       {
                 char            a1buf[STACKBUFLEN];
                 char            a2buf[STACKBUFLEN];
                 char       *a1p,
                                    *a2p;
  
+#ifdef WIN32
+               /* Win32 does not have UTF-8, so we need to map to UTF-16 */
+               if (GetDatabaseEncoding() == PG_UTF8)
+               {
+                       int                     a1len;
+                       int                     a2len;
+                       int                     r;
+
+                       if (len1 >= STACKBUFLEN / 2)
+                       {
+                               a1len = len1 * 2 + 2;
+                               a1p = palloc(a1len);
+                       }
+                       else
+                       {
+                               a1len = STACKBUFLEN;
+                               a1p = a1buf;
+                       }
+                       if (len2 >= STACKBUFLEN / 2)
+                       {
+                               a2len = len2 * 2 + 2;
+                               a2p = palloc(a2len);
+                       }
+                       else
+                       {
+                               a2len = STACKBUFLEN;
+                               a2p = a2buf;
+                       }
+
+                       /* stupid Microsloth API does not work for zero-length input */
+                       if (len1 == 0)
+                               r = 0;
+                       else
+                       {
+                               r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
+                                                                               (LPWSTR) a1p, a1len / 2);
+                               if (!r)
+                                       ereport(ERROR,
+                                        (errmsg("could not convert string to UTF-16: error %lu",
+                                                        GetLastError())));
+                       }
+                       ((LPWSTR) a1p)[r] = 0;
+
+                       if (len2 == 0)
+                               r = 0;
+                       else
+                       {
+                               r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
+                                                                               (LPWSTR) a2p, a2len / 2);
+                               if (!r)
+                                       ereport(ERROR,
+                                        (errmsg("could not convert string to UTF-16: error %lu",
+                                                        GetLastError())));
+                       }
+                       ((LPWSTR) a2p)[r] = 0;
+
+                       errno = 0;
+                       result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
+                       if (result == 2147483647)       /* _NLSCMPERROR; missing from mingw
+                                                                                * headers */
+                               ereport(ERROR,
+                                               (errmsg("could not compare Unicode strings: %m")));
+
+                       if (a1p != a1buf)
+                               pfree(a1p);
+                       if (a2p != a2buf)
+                               pfree(a2p);
+
+                       return result;
+               }
+#endif   /* WIN32 */
+
                 if (len1 >= STACKBUFLEN)
                         a1p = (char *) palloc(len1 + 1);
                 else
@@ -864,17 +929,20 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
  
                 result = strcoll(a1p, a2p);
  
-               if (len1 >= STACKBUFLEN)
+               /*
+                * In some locales strcoll() can claim that nonidentical strings are
+                * equal.  Believing that would be bad news for a number of reasons,
+                * so we follow Perl's lead and sort "equal" strings according to
+                * strcmp().
+                */
+               if (result == 0)
+                       result = strcmp(a1p, a2p);
+
+               if (a1p != a1buf)
                         pfree(a1p);
-               if (len2 >= STACKBUFLEN)
+               if (a2p != a2buf)
                         pfree(a2p);
         }
-       else
-       {
-               result = strncmp(arg1, arg2, Min(len1, len2));
-               if ((result == 0) && (len1 != len2))
-                       result = (len1 < len2) ? -1 : 1;
-       }
  
         return result;
  }
@@ -916,11 +984,15 @@ texteq(PG_FUNCTION_ARGS)
         text       *arg2 = PG_GETARG_TEXT_P(1);
         bool            result;
  
-       /* fast path for different-length inputs */
+       /*
+        * Since we only care about equality or not-equality, we can avoid all the
+        * expense of strcoll() here, and just do bitwise comparison.
+        */
         if (VARSIZE(arg1) != VARSIZE(arg2))
                 result = false;
         else
-               result = (text_cmp(arg1, arg2) == 0);
+               result = (strncmp(VARDATA(arg1), VARDATA(arg2),
+                                                 VARSIZE(arg1) - VARHDRSZ) == 0);
  
         PG_FREE_IF_COPY(arg1, 0);
         PG_FREE_IF_COPY(arg2, 1);
@@ -935,11 +1007,15 @@ textne(PG_FUNCTION_ARGS)
         text       *arg2 = PG_GETARG_TEXT_P(1);
         bool            result;
  
-       /* fast path for different-length inputs */
+       /*
+        * Since we only care about equality or not-equality, we can avoid all the
+        * expense of strcoll() here, and just do bitwise comparison.
+        */
         if (VARSIZE(arg1) != VARSIZE(arg2))
                 result = true;
         else
-               result = (text_cmp(arg1, arg2) != 0);
+               result = (strncmp(VARDATA(arg1), VARDATA(arg2),
+                                                 VARSIZE(arg1) - VARHDRSZ) != 0);
  
         PG_FREE_IF_COPY(arg1, 0);
         PG_FREE_IF_COPY(arg2, 1);
@@ -1199,7 +1275,10 @@ bttext_pattern_cmp(PG_FUNCTION_ARGS)
  Datum
  byteaoctetlen(PG_FUNCTION_ARGS)
  {
-       PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
+       Datum           str = PG_GETARG_DATUM(0);
+
+       /* We need not detoast the input at all */
+       PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
  }
  
  /*
@@ -1220,11 +1299,11 @@ byteacat(PG_FUNCTION_ARGS)
         bytea      *result;
         char       *ptr;
  
-       len1 = (VARSIZE(t1) - VARHDRSZ);
+       len1 = VARSIZE(t1) - VARHDRSZ;
         if (len1 < 0)
                 len1 = 0;
  
-       len2 = (VARSIZE(t2) - VARHDRSZ);
+       len2 = VARSIZE(t2) - VARHDRSZ;
         if (len2 < 0)
                 len2 = 0;
  
@@ -1273,9 +1352,8 @@ bytea_substr(PG_FUNCTION_ARGS)
         if (fcinfo->nargs == 2)
         {
                 /*
-                * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
-                * everything to the end of the string if we pass it a negative
-                * value for length.
+                * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
+                * the end of the string if we pass it a negative value for length.
                  */
                 L1 = -1;
         }
@@ -1285,8 +1363,8 @@ bytea_substr(PG_FUNCTION_ARGS)
                 int                     E = S + PG_GETARG_INT32(2);
  
                 /*
-                * A negative value for L is the only way for the end position to
-                * be before the start. SQL99 says to throw an error.
+                * A negative value for L is the only way for the end position to be
+                * before the start. SQL99 says to throw an error.
                  */
                 if (E < S)
                         ereport(ERROR,
@@ -1306,8 +1384,8 @@ bytea_substr(PG_FUNCTION_ARGS)
  
         /*
          * If the start position is past the end of the string, SQL99 says to
-        * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
-        * that for us. Convert to zero-based starting position
+        * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
+        * for us. Convert to zero-based starting position
          */
         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
  }
@@ -1345,8 +1423,8 @@ byteapos(PG_FUNCTION_ARGS)
         if (VARSIZE(t2) <= VARHDRSZ)
                 PG_RETURN_INT32(1);             /* result for empty pattern */
  
-       len1 = (VARSIZE(t1) - VARHDRSZ);
-       len2 = (VARSIZE(t2) - VARHDRSZ);
+       len1 = VARSIZE(t1) - VARHDRSZ;
+       len2 = VARSIZE(t2) - VARHDRSZ;
  
         p1 = VARDATA(t1);
         p2 = VARDATA(t2);
@@ -1600,17 +1678,17 @@ name_text(PG_FUNCTION_ARGS)
   * truncate names if they're too long.
   */
  List *
-textToQualifiedNameList(text *textval, const char *caller)
+textToQualifiedNameList(text *textval)
  {
         char       *rawname;
         List       *result = NIL;
         List       *namelist;
-       List       *l;
+       ListCell   *l;
  
         /* Convert to C string (handles possible detoasting). */
         /* Note we rely on being able to modify rawname below. */
         rawname = DatumGetCString(DirectFunctionCall1(textout,
-                                                                                         PointerGetDatum(textval)));
+                                                                                                 PointerGetDatum(textval)));
  
         if (!SplitIdentifierString(rawname, '.', &namelist))
                 ereport(ERROR,
@@ -1630,7 +1708,7 @@ textToQualifiedNameList(text *textval, const char *caller)
         }
  
         pfree(rawname);
-       freeList(namelist);
+       list_free(namelist);
  
         return result;
  }
@@ -1651,7 +1729,7 @@ textToQualifiedNameList(text *textval, const char *caller)
   *                        identifiers.
   * Outputs:
   *     namelist: filled with a palloc'd list of pointers to identifiers within
- *                       rawstring.  Caller should freeList() this even on error return.
+ *                       rawstring.  Caller should list_free() this even on error return.
   *
   * Returns TRUE if okay, FALSE if there is a syntax error in the string.
   *
@@ -1678,7 +1756,6 @@ SplitIdentifierString(char *rawstring, char separator,
         {
                 char       *curname;
                 char       *endp;
-               int                     curlen;
  
                 if (*nextp == '\"')
                 {
@@ -1701,21 +1778,31 @@ SplitIdentifierString(char *rawstring, char separator,
                 else
                 {
                         /* Unquoted name --- extends to separator or whitespace */
+                       char       *downname;
+                       int                     len;
+
                         curname = nextp;
                         while (*nextp && *nextp != separator &&
                                    !isspace((unsigned char) *nextp))
-                       {
-                               /*
-                                * It's important that this match the identifier
-                                * downcasing code used by backend/parser/scan.l.
-                                */
-                               if (isupper((unsigned char) *nextp))
-                                       *nextp = tolower((unsigned char) *nextp);
                                 nextp++;
-                       }
                         endp = nextp;
                         if (curname == nextp)
                                 return false;   /* empty unquoted name not allowed */
+
+                       /*
+                        * Downcase the identifier, using same code as main lexer does.
+                        *
+                        * XXX because we want to overwrite the input in-place, we cannot
+                        * support a downcasing transformation that increases the string
+                        * length.      This is not a problem given the current implementation
+                        * of downcase_truncate_identifier, but we'll probably have to do
+                        * something about this someday.
+                        */
+                       len = endp - curname;
+                       downname = downcase_truncate_identifier(curname, len, false);
+                       Assert(strlen(downname) <= len);
+                       strncpy(curname, downname, len);
+                       pfree(downname);
                 }
  
                 while (isspace((unsigned char) *nextp))
@@ -1736,13 +1823,8 @@ SplitIdentifierString(char *rawstring, char separator,
                 /* Now safe to overwrite separator with a null */
                 *endp = '\0';
  
-               /* Truncate name if it's overlength; again, should match scan.l */
-               curlen = strlen(curname);
-               if (curlen >= NAMEDATALEN)
-               {
-                       curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
-                       curname[curlen] = '\0';
-               }
+               /* Truncate name if it's overlength */
+               truncate_identifier(curname, strlen(curname), false);
  
                 /*
                  * Finished isolating current name --- add it to list
@@ -1914,6 +1996,18 @@ byteacmp(PG_FUNCTION_ARGS)
         PG_RETURN_INT32(cmp);
  }
  
+/*
+ * appendStringInfoText
+ *
+ * Append a text to str.
+ * Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
+ */
+static void
+appendStringInfoText(StringInfo str, const text *t)
+{
+       appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
+}
+
  /*
   * replace_text
   * replace all occurrences of 'old_sub_str' in 'orig_str'
@@ -1925,49 +2019,308 @@ byteacmp(PG_FUNCTION_ARGS)
  Datum
  replace_text(PG_FUNCTION_ARGS)
  {
+       text       *src_text = PG_GETARG_TEXT_P(0);
+       text       *from_sub_text = PG_GETARG_TEXT_P(1);
+       text       *to_sub_text = PG_GETARG_TEXT_P(2);
+       int                     src_text_len = TEXTLEN(src_text);
+       int                     from_sub_text_len = TEXTLEN(from_sub_text);
         text       *left_text;
         text       *right_text;
         text       *buf_text;
         text       *ret_text;
         int                     curr_posn;
-       text       *src_text = PG_GETARG_TEXT_P(0);
-       int                     src_text_len = TEXTLEN(src_text);
-       text       *from_sub_text = PG_GETARG_TEXT_P(1);
-       int                     from_sub_text_len = TEXTLEN(from_sub_text);
-       text       *to_sub_text = PG_GETARG_TEXT_P(2);
-       char       *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
-       StringInfo      str = makeStringInfo();
+       StringInfoData str;
  
         if (src_text_len == 0 || from_sub_text_len == 0)
                 PG_RETURN_TEXT_P(src_text);
  
-       buf_text = TEXTDUP(src_text);
-       curr_posn = TEXTPOS(buf_text, from_sub_text);
+       curr_posn = TEXTPOS(src_text, from_sub_text);
+
+       /* When the from_sub_text is not found, there is nothing to do. */
+       if (curr_posn == 0)
+               PG_RETURN_TEXT_P(src_text);
+
+       initStringInfo(&str);
+       buf_text = src_text;
  
         while (curr_posn > 0)
         {
-               left_text = LEFT(buf_text, from_sub_text);
-               right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
+               left_text = text_substring(PointerGetDatum(buf_text),
+                                                                  1, curr_posn - 1, false);
+               right_text = text_substring(PointerGetDatum(buf_text),
+                                                                       curr_posn + from_sub_text_len, -1, true);
  
-               appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
-               appendStringInfoString(str, to_sub_str);
+               appendStringInfoText(&str, left_text);
+               appendStringInfoText(&str, to_sub_text);
  
-               pfree(buf_text);
+               if (buf_text != src_text)
+                       pfree(buf_text);
                 pfree(left_text);
                 buf_text = right_text;
                 curr_posn = TEXTPOS(buf_text, from_sub_text);
         }
  
-       appendStringInfoString(str, PG_TEXT_GET_STR(buf_text));
-       pfree(buf_text);
+       appendStringInfoText(&str, buf_text);
+       if (buf_text != src_text)
+               pfree(buf_text);
  
-       ret_text = PG_STR_GET_TEXT(str->data);
-       pfree(str->data);
-       pfree(str);
+       ret_text = PG_STR_GET_TEXT(str.data);
+       pfree(str.data);
  
         PG_RETURN_TEXT_P(ret_text);
  }
  
+/*
+ * check_replace_text_has_escape_char
+ *
+ * check whether replace_text contains escape char.
+ */
+static bool
+check_replace_text_has_escape_char(const text *replace_text)
+{
+       const char *p = VARDATA(replace_text);
+       const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
+
+       if (pg_database_encoding_max_length() == 1)
+       {
+               for (; p < p_end; p++)
+               {
+                       if (*p == '\\')
+                               return true;
+               }
+       }
+       else
+       {
+               for (; p < p_end; p += pg_mblen(p))
+               {
+                       if (*p == '\\')
+                               return true;
+               }
+       }
+
+       return false;
+}
+
+/*
+ * appendStringInfoRegexpSubstr
+ *
+ * Append replace_text to str, substituting regexp back references for
+ * \n escapes.
+ */
+static void
+appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
+                                                        regmatch_t *pmatch, text *src_text)
+{
+       const char *p = VARDATA(replace_text);
+       const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
+       int                     eml = pg_database_encoding_max_length();
+
+       for (;;)
+       {
+               const char *chunk_start = p;
+               int                     so;
+               int                     eo;
+
+               /* Find next escape char. */
+               if (eml == 1)
+               {
+                       for (; p < p_end && *p != '\\'; p++)
+                                /* nothing */ ;
+               }
+               else
+               {
+                       for (; p < p_end && *p != '\\'; p += pg_mblen(p))
+                                /* nothing */ ;
+               }
+
+               /* Copy the text we just scanned over, if any. */
+               if (p > chunk_start)
+                       appendBinaryStringInfo(str, chunk_start, p - chunk_start);
+
+               /* Done if at end of string, else advance over escape char. */
+               if (p >= p_end)
+                       break;
+               p++;
+
+               if (p >= p_end)
+               {
+                       /* Escape at very end of input.  Treat same as unexpected char */
+                       appendStringInfoChar(str, '\\');
+                       break;
+               }
+
+               if (*p >= '1' && *p <= '9')
+               {
+                       /* Use the back reference of regexp. */
+                       int                     idx = *p - '0';
+
+                       so = pmatch[idx].rm_so;
+                       eo = pmatch[idx].rm_eo;
+                       p++;
+               }
+               else if (*p == '&')
+               {
+                       /* Use the entire matched string. */
+                       so = pmatch[0].rm_so;
+                       eo = pmatch[0].rm_eo;
+                       p++;
+               }
+               else if (*p == '\\')
+               {
+                       /* \\ means transfer one \ to output. */
+                       appendStringInfoChar(str, '\\');
+                       p++;
+                       continue;
+               }
+               else
+               {
+                       /*
+                        * If escape char is not followed by any expected char, just treat
+                        * it as ordinary data to copy.  (XXX would it be better to throw
+                        * an error?)
+                        */
+                       appendStringInfoChar(str, '\\');
+                       continue;
+               }
+
+               if (so != -1 && eo != -1)
+               {
+                       /*
+                        * Copy the text that is back reference of regexp.      Because so and
+                        * eo are counted in characters not bytes, it's easiest to use
+                        * text_substring to pull out the correct chunk of text.
+                        */
+                       text       *append_text;
+
+                       append_text = text_substring(PointerGetDatum(src_text),
+                                                                                so + 1, (eo - so), false);
+                       appendStringInfoText(str, append_text);
+                       pfree(append_text);
+               }
+       }
+}
+
+#define REGEXP_REPLACE_BACKREF_CNT             10
+
+/*
+ * replace_text_regexp
+ *
+ * replace text that matches to regexp in src_text to replace_text.
+ *
+ * Note: to avoid having to include regex.h in builtins.h, we declare
+ * the regexp argument as void *, but really it's regex_t *.
+ */
+text *
+replace_text_regexp(text *src_text, void *regexp,
+                                       text *replace_text, bool glob)
+{
+       text       *ret_text;
+       regex_t    *re = (regex_t *) regexp;
+       int                     src_text_len = VARSIZE(src_text) - VARHDRSZ;
+       StringInfoData buf;
+       regmatch_t      pmatch[REGEXP_REPLACE_BACKREF_CNT];
+       pg_wchar   *data;
+       size_t          data_len;
+       int                     search_start;
+       int                     data_pos;
+       bool            have_escape;
+
+       initStringInfo(&buf);
+
+       /* Convert data string to wide characters. */
+       data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
+       data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
+
+       /* Check whether replace_text has escape char. */
+       have_escape = check_replace_text_has_escape_char(replace_text);
+
+       for (search_start = data_pos = 0; search_start <= data_len;)
+       {
+               int                     regexec_result;
+
+               regexec_result = pg_regexec(re,
+                                                                       data,
+                                                                       data_len,
+                                                                       search_start,
+                                                                       NULL,           /* no details */
+                                                                       REGEXP_REPLACE_BACKREF_CNT,
+                                                                       pmatch,
+                                                                       0);
+
+               if (regexec_result == REG_NOMATCH)
+                       break;
+
+               if (regexec_result != REG_OKAY)
+               {
+                       char            errMsg[100];
+
+                       pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+                                        errmsg("regular expression failed: %s", errMsg)));
+               }
+
+               /*
+                * Copy the text to the left of the match position.  Because we are
+                * working with character not byte indexes, it's easiest to use
+                * text_substring to pull out the needed data.
+                */
+               if (pmatch[0].rm_so - data_pos > 0)
+               {
+                       text       *left_text;
+
+                       left_text = text_substring(PointerGetDatum(src_text),
+                                                                          data_pos + 1,
+                                                                          pmatch[0].rm_so - data_pos,
+                                                                          false);
+                       appendStringInfoText(&buf, left_text);
+                       pfree(left_text);
+               }
+
+               /*
+                * Copy the replace_text. Process back references when the
+                * replace_text has escape characters.
+                */
+               if (have_escape)
+                       appendStringInfoRegexpSubstr(&buf, replace_text, pmatch, src_text);
+               else
+                       appendStringInfoText(&buf, replace_text);
+
+               search_start = data_pos = pmatch[0].rm_eo;
+
+               /*
+                * When global option is off, replace the first instance only.
+                */
+               if (!glob)
+                       break;
+
+               /*
+                * Search from next character when the matching text is zero width.
+                */
+               if (pmatch[0].rm_so == pmatch[0].rm_eo)
+                       search_start++;
+       }
+
+       /*
+        * Copy the text to the right of the last match.
+        */
+       if (data_pos < data_len)
+       {
+               text       *right_text;
+
+               right_text = text_substring(PointerGetDatum(src_text),
+                                                                       data_pos + 1, -1, true);
+               appendStringInfoText(&buf, right_text);
+               pfree(right_text);
+       }
+
+       ret_text = PG_STR_GET_TEXT(buf.data);
+       pfree(buf.data);
+       pfree(data);
+
+       return ret_text;
+}
+
  /*
   * split_text
   * parse input string
@@ -1978,14 +2331,20 @@ Datum
  split_text(PG_FUNCTION_ARGS)
  {
         text       *inputstring = PG_GETARG_TEXT_P(0);
-       int                     inputstring_len = TEXTLEN(inputstring);
         text       *fldsep = PG_GETARG_TEXT_P(1);
-       int                     fldsep_len = TEXTLEN(fldsep);
         int                     fldnum = PG_GETARG_INT32(2);
-       int                     start_posn = 0;
-       int                     end_posn = 0;
+       int                     inputstring_len = TEXTLEN(inputstring);
+       int                     fldsep_len = TEXTLEN(fldsep);
+       int                     start_posn;
+       int                     end_posn;
         text       *result_text;
  
+       /* field number is 1 based */
+       if (fldnum < 1)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("field position must be greater than zero")));
+
         /* return empty string for empty input string */
         if (inputstring_len < 1)
                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
@@ -1993,52 +2352,45 @@ split_text(PG_FUNCTION_ARGS)
         /* empty field separator */
         if (fldsep_len < 1)
         {
-               if (fldnum == 1)                /* first field - just return the input
-                                                                * string */
+               /* if first field, return input string, else empty string */
+               if (fldnum == 1)
                         PG_RETURN_TEXT_P(inputstring);
                 else
-/* otherwise return an empty string */
                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
         }
  
-       /* field number is 1 based */
-       if (fldnum < 1)
-               ereport(ERROR,
-                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                errmsg("field position must be greater than zero")));
-
-       start_posn = text_position(PointerGetDatum(inputstring),
-                                                          PointerGetDatum(fldsep),
-                                                          fldnum - 1);
-       end_posn = text_position(PointerGetDatum(inputstring),
-                                                        PointerGetDatum(fldsep),
-                                                        fldnum);
+       start_posn = text_position(inputstring, fldsep, fldnum - 1);
+       end_posn = text_position(inputstring, fldsep, fldnum);
  
         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
         {
-               if (fldnum == 1)                /* first field - just return the input
-                                                                * string */
+               /* if first field, return input string, else empty string */
+               if (fldnum == 1)
                         PG_RETURN_TEXT_P(inputstring);
                 else
-/* otherwise return an empty string */
                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
         }
-       else if ((start_posn != 0) && (end_posn == 0))
+       else if (start_posn == 0)
         {
-               /* last field requested */
-               result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
+               /* first field requested */
+               result_text = LEFT(inputstring, fldsep);
                 PG_RETURN_TEXT_P(result_text);
         }
-       else if ((start_posn == 0) && (end_posn != 0))
+       else if (end_posn == 0)
         {
-               /* first field requested */
-               result_text = LEFT(inputstring, fldsep);
+               /* last field requested */
+               result_text = text_substring(PointerGetDatum(inputstring),
+                                                                        start_posn + fldsep_len,
+                                                                        -1, true);
                 PG_RETURN_TEXT_P(result_text);
         }
         else
         {
-               /* prior to last field requested */
-               result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
+               /* interior field requested */
+               result_text = text_substring(PointerGetDatum(inputstring),
+                                                                        start_posn + fldsep_len,
+                                                                        end_posn - start_posn - fldsep_len,
+                                                                        false);
                 PG_RETURN_TEXT_P(result_text);
         }
  }
@@ -2053,15 +2405,14 @@ Datum
  text_to_array(PG_FUNCTION_ARGS)
  {
         text       *inputstring = PG_GETARG_TEXT_P(0);
-       int                     inputstring_len = TEXTLEN(inputstring);
         text       *fldsep = PG_GETARG_TEXT_P(1);
+       int                     inputstring_len = TEXTLEN(inputstring);
         int                     fldsep_len = TEXTLEN(fldsep);
         int                     fldnum;
-       int                     start_posn = 0;
-       int                     end_posn = 0;
-       text       *result_text = NULL;
+       int                     start_posn;
+       int                     end_posn;
+       text       *result_text;
         ArrayBuildState *astate = NULL;
-       MemoryContext oldcontext = CurrentMemoryContext;
  
         /* return NULL for empty input string */
         if (inputstring_len < 1)
@@ -2073,7 +2424,7 @@ text_to_array(PG_FUNCTION_ARGS)
          */
         if (fldsep_len < 1)
                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
-                                                                          CStringGetDatum(inputstring), 1));
+                                                                                  CStringGetDatum(inputstring), 1));
  
         /* start with end position holding the initial start position */
         end_posn = 0;
@@ -2083,48 +2434,52 @@ text_to_array(PG_FUNCTION_ARGS)
                 bool            disnull = false;
  
                 start_posn = end_posn;
-               end_posn = text_position(PointerGetDatum(inputstring),
-                                                                PointerGetDatum(fldsep),
-                                                                fldnum);
+               end_posn = text_position(inputstring, fldsep, fldnum);
  
                 if ((start_posn == 0) && (end_posn == 0))               /* fldsep not found */
                 {
                         if (fldnum == 1)
                         {
                                 /*
-                                * first element return one element, 1D, array using the
-                                * input string
+                                * first element return one element, 1D, array using the input
+                                * string
                                  */
                                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
-                                                                          CStringGetDatum(inputstring), 1));
+                                                                                  CStringGetDatum(inputstring), 1));
                         }
                         else
                         {
                                 /* otherwise create array and exit */
-                               PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, oldcontext));
+                               PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
+                                                                                                         CurrentMemoryContext));
                         }
                 }
-               else if ((start_posn != 0) && (end_posn == 0))
-               {
-                       /* last field requested */
-                       result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
-               }
-               else if ((start_posn == 0) && (end_posn != 0))
+               else if (start_posn == 0)
                 {
                         /* first field requested */
                         result_text = LEFT(inputstring, fldsep);
                 }
+               else if (end_posn == 0)
+               {
+                       /* last field requested */
+                       result_text = text_substring(PointerGetDatum(inputstring),
+                                                                                start_posn + fldsep_len,
+                                                                                -1, true);
+               }
                 else
                 {
-                       /* prior to last field requested */
-                       result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
+                       /* interior field requested */
+                       result_text = text_substring(PointerGetDatum(inputstring),
+                                                                                start_posn + fldsep_len,
+                                                                                end_posn - start_posn - fldsep_len,
+                                                                                false);
                 }
  
                 /* stash away current value */
                 dvalue = PointerGetDatum(result_text);
                 astate = accumArrayResult(astate, dvalue,
-                                                                 disnull, TEXTOID, oldcontext);
-
+                                                                 disnull, TEXTOID,
+                                                                 CurrentMemoryContext);
         }
  
         /* never reached -- keep compiler quiet */
@@ -2144,17 +2499,18 @@ array_to_text(PG_FUNCTION_ARGS)
         int                     nitems,
                            *dims,
                                 ndims;
-       char       *p;
         Oid                     element_type;
         int                     typlen;
         bool            typbyval;
         char            typalign;
-       Oid                     typelem;
-       StringInfo      result_str = makeStringInfo();
+       StringInfoData buf;
+       bool            printed = false;
+       char       *p;
+       bits8      *bitmap;
+       int                     bitmask;
         int                     i;
         ArrayMetaState *my_extra;
  
-       p = ARR_DATA_PTR(v);
         ndims = ARR_NDIM(v);
         dims = ARR_DIMS(v);
         nitems = ArrayGetNItems(ndims, dims);
@@ -2164,31 +2520,31 @@ array_to_text(PG_FUNCTION_ARGS)
                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
  
         element_type = ARR_ELEMTYPE(v);
+       initStringInfo(&buf);
  
         /*
          * We arrange to look up info about element type, including its output
-        * conversion proc, only once per series of calls, assuming the
-        * element type doesn't change underneath us.
+        * conversion proc, only once per series of calls, assuming the element
+        * type doesn't change underneath us.
          */
         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
         if (my_extra == NULL)
         {
                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
-                                                                                                sizeof(ArrayMetaState));
+                                                                                                         sizeof(ArrayMetaState));
                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
-               my_extra->element_type = InvalidOid;
+               my_extra->element_type = ~element_type;
         }
  
         if (my_extra->element_type != element_type)
         {
                 /*
-                * Get info about element type, including its output conversion
-                * proc
+                * Get info about element type, including its output conversion proc
                  */
                 get_type_io_data(element_type, IOFunc_output,
                                                  &my_extra->typlen, &my_extra->typbyval,
                                                  &my_extra->typalign, &my_extra->typdelim,
-                                                &my_extra->typelem, &my_extra->typiofunc);
+                                                &my_extra->typioparam, &my_extra->typiofunc);
                 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
                                           fcinfo->flinfo->fn_mcxt);
                 my_extra->element_type = element_type;
@@ -2196,30 +2552,50 @@ array_to_text(PG_FUNCTION_ARGS)
         typlen = my_extra->typlen;
         typbyval = my_extra->typbyval;
         typalign = my_extra->typalign;
-       typelem = my_extra->typelem;
+
+       p = ARR_DATA_PTR(v);
+       bitmap = ARR_NULLBITMAP(v);
+       bitmask = 1;
  
         for (i = 0; i < nitems; i++)
         {
                 Datum           itemvalue;
                 char       *value;
  
-               itemvalue = fetch_att(p, typbyval, typlen);
+               /* Get source element, checking for NULL */
+               if (bitmap && (*bitmap & bitmask) == 0)
+               {
+                       /* we ignore nulls */
+               }
+               else
+               {
+                       itemvalue = fetch_att(p, typbyval, typlen);
  
-               value = DatumGetCString(FunctionCall3(&my_extra->proc,
-                                                                                         itemvalue,
-                                                                                         ObjectIdGetDatum(typelem),
-                                                                                         Int32GetDatum(-1)));
+                       value = OutputFunctionCall(&my_extra->proc, itemvalue);
  
-               if (i > 0)
-                       appendStringInfo(result_str, "%s%s", fldsep, value);
-               else
-                       appendStringInfo(result_str, "%s", value);
+                       if (printed)
+                               appendStringInfo(&buf, "%s%s", fldsep, value);
+                       else
+                               appendStringInfoString(&buf, value);
+                       printed = true;
  
-               p = att_addlength(p, typlen, PointerGetDatum(p));
-               p = (char *) att_align(p, typalign);
+                       p = att_addlength(p, typlen, PointerGetDatum(p));
+                       p = (char *) att_align(p, typalign);
+               }
+
+               /* advance bitmap pointer if any */
+               if (bitmap)
+               {
+                       bitmask <<= 1;
+                       if (bitmask == 0x100)
+                       {
+                               bitmap++;
+                               bitmask = 1;
+                       }
+               }
         }
  
-       PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
+       PG_RETURN_TEXT_P(PG_STR_GET_TEXT(buf.data));
  }
  
  #define HEXBASE 16
@@ -2230,11 +2606,11 @@ array_to_text(PG_FUNCTION_ARGS)
  Datum
  to_hex32(PG_FUNCTION_ARGS)
  {
-       static char digits[] = "0123456789abcdef";
-       char            buf[32];                /* bigger than needed, but reasonable */
-       char       *ptr;
+       uint32          value = (uint32) PG_GETARG_INT32(0);
         text       *result_text;
-       int32           value = PG_GETARG_INT32(0);
+       char       *ptr;
+       const char *digits = "0123456789abcdef";
+       char            buf[32];                /* bigger than needed, but reasonable */
  
         ptr = buf + sizeof(buf) - 1;
         *ptr = '\0';
@@ -2256,11 +2632,11 @@ to_hex32(PG_FUNCTION_ARGS)
  Datum
  to_hex64(PG_FUNCTION_ARGS)
  {
-       static char digits[] = "0123456789abcdef";
-       char            buf[32];                /* bigger than needed, but reasonable */
-       char       *ptr;
+       uint64          value = (uint64) PG_GETARG_INT64(0);
         text       *result_text;
-       int64           value = PG_GETARG_INT64(0);
+       char       *ptr;
+       const char *digits = "0123456789abcdef";
+       char            buf[32];                /* bigger than needed, but reasonable */
  
         ptr = buf + sizeof(buf) - 1;
         *ptr = '\0';
@@ -2285,18 +2661,91 @@ to_hex64(PG_FUNCTION_ARGS)
  Datum
  md5_text(PG_FUNCTION_ARGS)
  {
-       char       *buff = PG_TEXT_GET_STR(PG_GETARG_TEXT_P(0));
-       size_t          len = strlen(buff);
-       char       *hexsum;
+       text       *in_text = PG_GETARG_TEXT_P(0);
+       size_t          len;
+       char            hexsum[MD5_HASH_LEN + 1];
         text       *result_text;
  
-       /* leave room for the terminating '\0' */
-       hexsum = (char *) palloc(MD5_HASH_LEN + 1);
+       /* Calculate the length of the buffer using varlena metadata */
+       len = VARSIZE(in_text) - VARHDRSZ;
  
         /* get the hash result */
-       md5_hash((void *) buff, len, hexsum);
+       if (pg_md5_hash(VARDATA(in_text), len, hexsum) == false)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
  
         /* convert to text and return it */
         result_text = PG_STR_GET_TEXT(hexsum);
         PG_RETURN_TEXT_P(result_text);
  }
+
+/*
+ * Create an md5 hash of a bytea field and return it as a hex string:
+ * 16-byte md5 digest is represented in 32 hex characters.
+ */
+Datum
+md5_bytea(PG_FUNCTION_ARGS)
+{
+       bytea      *in = PG_GETARG_BYTEA_P(0);
+       size_t          len;
+       char            hexsum[MD5_HASH_LEN + 1];
+       text       *result_text;
+
+       len = VARSIZE(in) - VARHDRSZ;
+       if (pg_md5_hash(VARDATA(in), len, hexsum) == false)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
+
+       result_text = PG_STR_GET_TEXT(hexsum);
+       PG_RETURN_TEXT_P(result_text);
+}
+
+/*
+ * Return the size of a datum, possibly compressed
+ *
+ * Works on any data type
+ */
+Datum
+pg_column_size(PG_FUNCTION_ARGS)
+{
+       Datum           value = PG_GETARG_DATUM(0);
+       int32           result;
+       int                     typlen;
+
+       /* On first call, get the input type's typlen, and save at *fn_extra */
+       if (fcinfo->flinfo->fn_extra == NULL)
+       {
+               /* Lookup the datatype of the supplied argument */
+               Oid                     argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+
+               typlen = get_typlen(argtypeid);
+               if (typlen == 0)                /* should not happen */
+                       elog(ERROR, "cache lookup failed for type %u", argtypeid);
+
+               fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+                                                                                                         sizeof(int));
+               *((int *) fcinfo->flinfo->fn_extra) = typlen;
+       }
+       else
+               typlen = *((int *) fcinfo->flinfo->fn_extra);
+
+       if (typlen == -1)
+       {
+               /* varlena type, possibly toasted */
+               result = toast_datum_size(value);
+       }
+       else if (typlen == -2)
+       {
+               /* cstring */
+               result = strlen(DatumGetCString(value)) + 1;
+       }
+       else
+       {
+               /* ordinary fixed-width type */
+               result = typlen;
+       }
+
+       PG_RETURN_INT32(result);
+}