Message style improvements

[postgresql] / src / backend / utils / adt / varlena.c
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c

index bdcffa954e0fe17aae68363d0ab2e93752c9f2fa..33f40b685c76bbaa87e05476229e47793b01e227 100644 (file)
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -3,12 +3,12 @@
   * varlena.c
   *       Functions for the variable-length built-in types.
   *
- * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.87 2002/08/04 06:44:47 thomas Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.151 2006/10/04 00:30:00 momjian Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -16,19 +16,50 @@
  
  #include <ctype.h>
  
-#include "mb/pg_wchar.h"
-#include "miscadmin.h"
+#include "access/tupmacs.h"
+#include "access/tuptoaster.h"
+#include "catalog/pg_type.h"
+#include "libpq/md5.h"
+#include "libpq/pqformat.h"
+#include "parser/scansup.h"
+#include "regex/regex.h"
  #include "utils/builtins.h"
+#include "utils/lsyscache.h"
  #include "utils/pg_locale.h"
  
  
  typedef struct varlena unknown;
  
  #define DatumGetUnknownP(X)                    ((unknown *) PG_DETOAST_DATUM(X))
+#define DatumGetUnknownPCopy(X)                ((unknown *) PG_DETOAST_DATUM_COPY(X))
  #define PG_GETARG_UNKNOWN_P(n)         DatumGetUnknownP(PG_GETARG_DATUM(n))
+#define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
  #define PG_RETURN_UNKNOWN_P(x)         PG_RETURN_POINTER(x)
  
+#define PG_TEXTARG_GET_STR(arg_) \
+       DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
+#define PG_TEXT_GET_STR(textp_) \
+       DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
+#define PG_STR_GET_TEXT(str_) \
+       DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
+#define TEXTLEN(textp) \
+       text_length(PointerGetDatum(textp))
+#define TEXTPOS(buf_text, from_sub_text) \
+       text_position(buf_text, from_sub_text, 1)
+#define LEFT(buf_text, from_sub_text) \
+       text_substring(PointerGetDatum(buf_text), \
+                                       1, \
+                                       TEXTPOS(buf_text, from_sub_text) - 1, false)
+
  static int     text_cmp(text *arg1, text *arg2);
+static int32 text_length(Datum str);
+static int32 text_position(text *t1, text *t2, int matchnum);
+static text *text_substring(Datum str,
+                          int32 start,
+                          int32 length,
+                          bool length_not_specified);
+
+static void appendStringInfoText(StringInfo str, const text *t);
  
  
  /*****************************************************************************
@@ -44,10 +75,10 @@ static int  text_cmp(text *arg1, text *arg2);
   *
   *             Non-printable characters must be passed as '\nnn' (octal) and are
   *             converted to internal form.  '\' must be passed as '\\'.
- *             elog(ERROR, ...) if bad form.
+ *             ereport(ERROR, ...) if bad form.
   *
   *             BUGS:
- *                             The input is scaned twice.
+ *                             The input is scanned twice.
   *                             The error checking of input is minimal.
   */
  Datum
@@ -76,16 +107,18 @@ byteain(PG_FUNCTION_ARGS)
                         /*
                          * one backslash, not followed by 0 or ### valid octal
                          */
-                       elog(ERROR, "Bad input string for type bytea");
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                        errmsg("invalid input syntax for type bytea")));
                 }
         }
  
         byte += VARHDRSZ;
         result = (bytea *) palloc(byte);
-       result->vl_len = byte;          /* set varlena length */
+       VARATT_SIZEP(result) = byte;    /* set varlena length */
  
         tp = inputText;
-       rp = result->vl_dat;
+       rp = VARDATA(result);
         while (*tp != '\0')
         {
                 if (tp[0] != '\\')
@@ -111,10 +144,11 @@ byteain(PG_FUNCTION_ARGS)
                 else
                 {
                         /*
-                        * We should never get here. The first pass should not allow
-                        * it.
+                        * We should never get here. The first pass should not allow it.
                          */
-                       elog(ERROR, "Bad input string for type bytea");
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                        errmsg("invalid input syntax for type bytea")));
                 }
         }
  
@@ -141,28 +175,26 @@ byteaout(PG_FUNCTION_ARGS)
         int                     len;
  
         len = 1;                                        /* empty string has 1 char */
-       vp = vlena->vl_dat;
-       for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
+       vp = VARDATA(vlena);
+       for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
         {
                 if (*vp == '\\')
                         len += 2;
-               else if (isprint((unsigned char) *vp))
-                       len++;
-               else
+               else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
                         len += 4;
+               else
+                       len++;
         }
         rp = result = (char *) palloc(len);
-       vp = vlena->vl_dat;
-       for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
+       vp = VARDATA(vlena);
+       for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
         {
                 if (*vp == '\\')
                 {
                         *rp++ = '\\';
                         *rp++ = '\\';
                 }
-               else if (isprint((unsigned char) *vp))
-                       *rp++ = *vp;
-               else
+               else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
                 {
                         val = *vp;
                         rp[0] = '\\';
@@ -173,11 +205,43 @@ byteaout(PG_FUNCTION_ARGS)
                         rp[1] = DIG(val & 03);
                         rp += 4;
                 }
+               else
+                       *rp++ = *vp;
         }
         *rp = '\0';
         PG_RETURN_CSTRING(result);
  }
  
+/*
+ *             bytearecv                       - converts external binary format to bytea
+ */
+Datum
+bytearecv(PG_FUNCTION_ARGS)
+{
+       StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
+       bytea      *result;
+       int                     nbytes;
+
+       nbytes = buf->len - buf->cursor;
+       result = (bytea *) palloc(nbytes + VARHDRSZ);
+       VARATT_SIZEP(result) = nbytes + VARHDRSZ;
+       pq_copymsgbytes(buf, VARDATA(result), nbytes);
+       PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ *             byteasend                       - converts bytea to binary format
+ *
+ * This is a special case: just copy the input...
+ */
+Datum
+byteasend(PG_FUNCTION_ARGS)
+{
+       bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
+
+       PG_RETURN_BYTEA_P(vlena);
+}
+
  
  /*
   *             textin                  - converts "..." to internal representation
@@ -189,25 +253,11 @@ textin(PG_FUNCTION_ARGS)
         text       *result;
         int                     len;
  
-#ifdef MULTIBYTE
-       char       *ermsg;
-#endif
-
-       len = strlen(inputText) + VARHDRSZ;
-
-#ifdef MULTIBYTE
-       if ((ermsg = pg_verifymbstr(inputText, len - VARHDRSZ)))
-               elog(ERROR, "%s", ermsg);
-#endif
-
-       result = (text *) palloc(len);
-       VARATT_SIZEP(result) = len;
+       len = strlen(inputText);
+       result = (text *) palloc(len + VARHDRSZ);
+       VARATT_SIZEP(result) = len + VARHDRSZ;
  
-       memcpy(VARDATA(result), inputText, len - VARHDRSZ);
-
-#ifdef CYR_RECODE
-       convertstr(VARDATA(result), len - VARHDRSZ, 0);
-#endif
+       memcpy(VARDATA(result), inputText, len);
  
         PG_RETURN_TEXT_P(result);
  }
@@ -227,51 +277,96 @@ textout(PG_FUNCTION_ARGS)
         memcpy(result, VARDATA(t), len);
         result[len] = '\0';
  
-#ifdef CYR_RECODE
-       convertstr(result, len, 1);
-#endif
-
         PG_RETURN_CSTRING(result);
  }
  
-
  /*
- *             unknownin                       - converts "..." to internal representation
+ *             textrecv                        - converts external binary format to text
   */
  Datum
-unknownin(PG_FUNCTION_ARGS)
+textrecv(PG_FUNCTION_ARGS)
  {
-       char       *inputStr = PG_GETARG_CSTRING(0);
-       unknown    *result;
-       int                     len;
+       StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
+       text       *result;
+       char       *str;
+       int                     nbytes;
  
-       len = strlen(inputStr) + VARHDRSZ;
+       str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
  
-       result = (unknown *) palloc(len);
-       VARATT_SIZEP(result) = len;
+       result = (text *) palloc(nbytes + VARHDRSZ);
+       VARATT_SIZEP(result) = nbytes + VARHDRSZ;
+       memcpy(VARDATA(result), str, nbytes);
+       pfree(str);
+       PG_RETURN_TEXT_P(result);
+}
  
-       memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
+/*
+ *             textsend                        - converts text to binary format
+ */
+Datum
+textsend(PG_FUNCTION_ARGS)
+{
+       text       *t = PG_GETARG_TEXT_P(0);
+       StringInfoData buf;
  
-       PG_RETURN_UNKNOWN_P(result);
+       pq_begintypsend(&buf);
+       pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
+       PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
  }
  
  
+/*
+ *             unknownin                       - converts "..." to internal representation
+ */
+Datum
+unknownin(PG_FUNCTION_ARGS)
+{
+       char       *str = PG_GETARG_CSTRING(0);
+
+       /* representation is same as cstring */
+       PG_RETURN_CSTRING(pstrdup(str));
+}
+
  /*
   *             unknownout                      - converts internal representation to "..."
   */
  Datum
  unknownout(PG_FUNCTION_ARGS)
  {
-       unknown    *t = PG_GETARG_UNKNOWN_P(0);
-       int                     len;
-       char       *result;
+       /* representation is same as cstring */
+       char       *str = PG_GETARG_CSTRING(0);
  
-       len = VARSIZE(t) - VARHDRSZ;
-       result = (char *) palloc(len + 1);
-       memcpy(result, VARDATA(t), len);
-       result[len] = '\0';
+       PG_RETURN_CSTRING(pstrdup(str));
+}
  
-       PG_RETURN_CSTRING(result);
+/*
+ *             unknownrecv                     - converts external binary format to unknown
+ */
+Datum
+unknownrecv(PG_FUNCTION_ARGS)
+{
+       StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
+       char       *str;
+       int                     nbytes;
+
+       str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+       /* representation is same as cstring */
+       PG_RETURN_CSTRING(str);
+}
+
+/*
+ *             unknownsend                     - converts unknown to binary format
+ */
+Datum
+unknownsend(PG_FUNCTION_ARGS)
+{
+       /* representation is same as cstring */
+       char       *str = PG_GETARG_CSTRING(0);
+       StringInfoData buf;
+
+       pq_begintypsend(&buf);
+       pq_sendtext(&buf, str, strlen(str));
+       PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
  }
  
  
@@ -285,19 +380,34 @@ unknownout(PG_FUNCTION_ARGS)
  Datum
  textlen(PG_FUNCTION_ARGS)
  {
-       text       *t = PG_GETARG_TEXT_P(0);
+       Datum           str = PG_GETARG_DATUM(0);
  
-#ifdef MULTIBYTE
-       /* optimization for single byte encoding */
-       if (pg_database_encoding_max_length() <= 1)
-               PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ);
+       /* try to avoid decompressing argument */
+       PG_RETURN_INT32(text_length(str));
+}
  
-       PG_RETURN_INT32(
-               pg_mbstrlen_with_len(VARDATA(t), VARSIZE(t) - VARHDRSZ)
-               );
-#else
-       PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ);
-#endif
+/*
+ * text_length -
+ *     Does the real work for textlen()
+ *
+ *     This is broken out so it can be called directly by other string processing
+ *     functions.      Note that the argument is passed as a Datum, to indicate that
+ *     it may still be in compressed form.  We can avoid decompressing it at all
+ *     in some cases.
+ */
+static int32
+text_length(Datum str)
+{
+       /* fastpath when max encoding length is one */
+       if (pg_database_encoding_max_length() == 1)
+               PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
+       else
+       {
+               text       *t = DatumGetTextP(str);
+
+               PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
+                                                                                        VARSIZE(t) - VARHDRSZ));
+       }
  }
  
  /*
@@ -308,9 +418,10 @@ textlen(PG_FUNCTION_ARGS)
  Datum
  textoctetlen(PG_FUNCTION_ARGS)
  {
-       text    *arg = PG_GETARG_TEXT_P(0);
+       Datum           str = PG_GETARG_DATUM(0);
  
-       PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
+       /* We need not detoast the input at all */
+       PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
  }
  
  /*
@@ -334,11 +445,11 @@ textcat(PG_FUNCTION_ARGS)
         text       *result;
         char       *ptr;
  
-       len1 = (VARSIZE(t1) - VARHDRSZ);
+       len1 = VARSIZE(t1) - VARHDRSZ;
         if (len1 < 0)
                 len1 = 0;
  
-       len2 = (VARSIZE(t2) - VARHDRSZ);
+       len2 = VARSIZE(t2) - VARHDRSZ;
         if (len2 < 0)
                 len2 = 0;
  
@@ -372,100 +483,217 @@ textcat(PG_FUNCTION_ARGS)
   *     adjusting the length to be consistent with the "negative start" per SQL92.
   * If the length is less than zero, return the remaining string.
   *
- * Note that the arguments operate on octet length,
- *     so not aware of multi-byte character sets.
- *
- * Added multi-byte support.
+ * Added multibyte support.
   * - Tatsuo Ishii 1998-4-21
   * Changed behavior if starting position is less than one to conform to SQL92 behavior.
   * Formerly returned the entire string; now returns a portion.
   * - Thomas Lockhart 1998-12-10
   * Now uses faster TOAST-slicing interface
   * - John Gray 2002-02-22
+ * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
+ * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
+ * error; if E < 1, return '', not entire string). Fixed MB related bug when
+ * S > LC and < LC + 4 sometimes garbage characters are returned.
+ * - Joe Conway 2002-08-10
   */
  Datum
  text_substr(PG_FUNCTION_ARGS)
  {
-       text       *string;
-       int32           m = PG_GETARG_INT32(1);
-       int32           n = PG_GETARG_INT32(2);
-       int32       sm;
-       int32       sn;
-       int         eml = 1;
-#ifdef MULTIBYTE
-       int                     i;
-       int                     len;
-       text       *ret;
-       char       *p;
-#endif 
+       PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
+                                                                       PG_GETARG_INT32(1),
+                                                                       PG_GETARG_INT32(2),
+                                                                       false));
+}
  
-       /*
-        * starting position before the start of the string? then offset into
-        * the string per SQL92 spec...
-        */
-       if (m < 1)
+/*
+ * text_substr_no_len -
+ *       Wrapper to avoid opr_sanity failure due to
+ *       one function accepting a different number of args.
+ */
+Datum
+text_substr_no_len(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
+                                                                       PG_GETARG_INT32(1),
+                                                                       -1, true));
+}
+
+/*
+ * text_substring -
+ *     Does the real work for text_substr() and text_substr_no_len()
+ *
+ *     This is broken out so it can be called directly by other string processing
+ *     functions.      Note that the argument is passed as a Datum, to indicate that
+ *     it may still be in compressed/toasted form.  We can avoid detoasting all
+ *     of it in some cases.
+ */
+static text *
+text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
+{
+       int32           eml = pg_database_encoding_max_length();
+       int32           S = start;              /* start position */
+       int32           S1;                             /* adjusted start position */
+       int32           L1;                             /* adjusted substring length */
+
+       /* life is easy if the encoding max length is 1 */
+       if (eml == 1)
         {
-               n += (m - 1);
-               m = 1;
-       }
-       /* Check for m > octet length is made in TOAST access routine */
+               S1 = Max(S, 1);
+
+               if (length_not_specified)               /* special case - get length to end of
+                                                                                * string */
+                       L1 = -1;
+               else
+               {
+                       /* end position */
+                       int                     E = S + length;
+
+                       /*
+                        * A negative value for L is the only way for the end position to
+                        * be before the start. SQL99 says to throw an error.
+                        */
+                       if (E < S)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SUBSTRING_ERROR),
+                                                errmsg("negative substring length not allowed")));
  
-       /* m will now become a zero-based starting position */
-       sm = m - 1;
-       sn = n;
+                       /*
+                        * A zero or negative value for the end position can happen if the
+                        * start was negative or one. SQL99 says to return a zero-length
+                        * string.
+                        */
+                       if (E < 1)
+                               return PG_STR_GET_TEXT("");
  
-#ifdef MULTIBYTE
-       eml = pg_database_encoding_max_length ();
+                       L1 = E - S1;
+               }
  
-       if (eml > 1)
+               /*
+                * If the start position is past the end of the string, SQL99 says to
+                * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
+                * that for us. Convert to zero-based starting position
+                */
+               return DatumGetTextPSlice(str, S1 - 1, L1);
+       }
+       else if (eml > 1)
         {
-               sm = 0;
-               if (n > -1)
-                       sn = (m + n) * eml + 3; /* +3 to avoid mb characters overhanging slice end */
+               /*
+                * When encoding max length is > 1, we can't get LC without
+                * detoasting, so we'll grab a conservatively large slice now and go
+                * back later to do the right thing
+                */
+               int32           slice_start;
+               int32           slice_size;
+               int32           slice_strlen;
+               text       *slice;
+               int32           E1;
+               int32           i;
+               char       *p;
+               char       *s;
+               text       *ret;
+
+               /*
+                * if S is past the end of the string, the tuple toaster will return a
+                * zero-length string to us
+                */
+               S1 = Max(S, 1);
+
+               /*
+                * We need to start at position zero because there is no way to know
+                * in advance which byte offset corresponds to the supplied start
+                * position.
+                */
+               slice_start = 0;
+
+               if (length_not_specified)               /* special case - get length to end of
+                                                                                * string */
+                       slice_size = L1 = -1;
                 else
-                       sn = n;         /* n < 0 is special-cased by heap_tuple_untoast_attr_slice */
-       }
-#endif 
+               {
+                       int                     E = S + length;
  
-       string = PG_GETARG_TEXT_P_SLICE (0, sm, sn);
+                       /*
+                        * A negative value for L is the only way for the end position to
+                        * be before the start. SQL99 says to throw an error.
+                        */
+                       if (E < S)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SUBSTRING_ERROR),
+                                                errmsg("negative substring length not allowed")));
  
-       if (eml == 1) 
-       {
-               PG_RETURN_TEXT_P (string);
-       }
-#ifndef MULTIBYTE
-       PG_RETURN_NULL();   /* notreached: suppress compiler warning */
-#endif
-#ifdef MULTIBYTE
-       if (n > -1)
-               len = pg_mbstrlen_with_len (VARDATA (string), sn - 3);
-       else    /* n < 0 is special-cased; need full string length */
-               len = pg_mbstrlen_with_len (VARDATA (string), VARSIZE(string)-VARHDRSZ);
+                       /*
+                        * A zero or negative value for the end position can happen if the
+                        * start was negative or one. SQL99 says to return a zero-length
+                        * string.
+                        */
+                       if (E < 1)
+                               return PG_STR_GET_TEXT("");
  
-       if (m > len)
-       {
-               m = 1;
-               n = 0;
-       }
-       m--;
-       if (((m + n) > len) || (n < 0))
-               n = (len - m);
+                       /*
+                        * if E is past the end of the string, the tuple toaster will
+                        * truncate the length for us
+                        */
+                       L1 = E - S1;
  
-       p = VARDATA(string);
-       for (i = 0; i < m; i++)
-               p += pg_mblen(p);
-       m = p - VARDATA(string);
-       for (i = 0; i < n; i++)
-               p += pg_mblen(p);
-       n = p - (VARDATA(string) + m);
+                       /*
+                        * Total slice size in bytes can't be any longer than the start
+                        * position plus substring length times the encoding max length.
+                        */
+                       slice_size = (S1 + L1) * eml;
+               }
+               slice = DatumGetTextPSlice(str, slice_start, slice_size);
  
-       ret = (text *) palloc(VARHDRSZ + n);
-       VARATT_SIZEP(ret) = VARHDRSZ + n;
+               /* see if we got back an empty string */
+               if ((VARSIZE(slice) - VARHDRSZ) == 0)
+                       return PG_STR_GET_TEXT("");
  
-       memcpy(VARDATA(ret), VARDATA(string) + m, n);
+               /* Now we can get the actual length of the slice in MB characters */
+               slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
  
-       PG_RETURN_TEXT_P(ret);
-#endif
+               /*
+                * Check that the start position wasn't > slice_strlen. If so, SQL99
+                * says to return a zero-length string.
+                */
+               if (S1 > slice_strlen)
+                       return PG_STR_GET_TEXT("");
+
+               /*
+                * Adjust L1 and E1 now that we know the slice string length. Again
+                * remember that S1 is one based, and slice_start is zero based.
+                */
+               if (L1 > -1)
+                       E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
+               else
+                       E1 = slice_start + 1 + slice_strlen;
+
+               /*
+                * Find the start position in the slice; remember S1 is not zero based
+                */
+               p = VARDATA(slice);
+               for (i = 0; i < S1 - 1; i++)
+                       p += pg_mblen(p);
+
+               /* hang onto a pointer to our start position */
+               s = p;
+
+               /*
+                * Count the actual bytes used by the substring of the requested
+                * length.
+                */
+               for (i = S1; i < E1; i++)
+                       p += pg_mblen(p);
+
+               ret = (text *) palloc(VARHDRSZ + (p - s));
+               VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
+               memcpy(VARDATA(ret), s, (p - s));
+
+               return ret;
+       }
+       else
+               elog(ERROR, "invalid backend encoding: encoding max length < 1");
+
+       /* not reached: suppress compiler warning */
+       return NULL;
  }
  
  /*
@@ -474,64 +702,111 @@ text_substr(PG_FUNCTION_ARGS)
   *       Implements the SQL92 POSITION() function.
   *       Ref: A Guide To The SQL Standard, Date & Darwen, 1997
   * - thomas 1997-07-27
- *
- * Added multi-byte support.
- * - Tatsuo Ishii 1998-4-21
   */
  Datum
  textpos(PG_FUNCTION_ARGS)
  {
-       text       *t1 = PG_GETARG_TEXT_P(0);
-       text       *t2 = PG_GETARG_TEXT_P(1);
-       int                     pos;
-       int                     px,
-                               p;
-       int                     len1,
+       text       *str = PG_GETARG_TEXT_P(0);
+       text       *search_str = PG_GETARG_TEXT_P(1);
+
+       PG_RETURN_INT32(text_position(str, search_str, 1));
+}
+
+/*
+ * text_position -
+ *     Does the real work for textpos()
+ *
+ * Inputs:
+ *             t1 - string to be searched
+ *             t2 - pattern to match within t1
+ *             matchnum - number of the match to be found (1 is the first match)
+ * Result:
+ *             Character index of the first matched char, starting from 1,
+ *             or 0 if no match.
+ *
+ *     This is broken out so it can be called directly by other string processing
+ *     functions.
+ */
+static int32
+text_position(text *t1, text *t2, int matchnum)
+{
+       int                     match = 0,
+                               pos = 0,
+                               p,
+                               px,
+                               len1,
                                 len2;
-       pg_wchar   *p1,
-                          *p2;
  
-#ifdef MULTIBYTE
-       pg_wchar   *ps1,
-                          *ps2;
-#endif
+       if (matchnum <= 0)
+               return 0;                               /* result for 0th match */
  
         if (VARSIZE(t2) <= VARHDRSZ)
-               PG_RETURN_INT32(1);             /* result for empty pattern */
+               return 1;                               /* result for empty pattern */
  
-       len1 = (VARSIZE(t1) - VARHDRSZ);
-       len2 = (VARSIZE(t2) - VARHDRSZ);
-#ifdef MULTIBYTE
-       ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
-       (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
-       len1 = pg_wchar_strlen(p1);
-       ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
-       (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
-       len2 = pg_wchar_strlen(p2);
-#else
-       p1 = VARDATA(t1);
-       p2 = VARDATA(t2);
-#endif
-       pos = 0;
-       px = (len1 - len2);
-       for (p = 0; p <= px; p++)
+       len1 = VARSIZE(t1) - VARHDRSZ;
+       len2 = VARSIZE(t2) - VARHDRSZ;
+
+       if (pg_database_encoding_max_length() == 1)
         {
-#ifdef MULTIBYTE
-               if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
-#else
-               if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
-#endif
+               /* simple case - single byte encoding */
+               char       *p1,
+                                  *p2;
+
+               p1 = VARDATA(t1);
+               p2 = VARDATA(t2);
+
+               /* no use in searching str past point where search_str will fit */
+               px = (len1 - len2);
+
+               for (p = 0; p <= px; p++)
                 {
-                       pos = p + 1;
-                       break;
-               };
-               p1++;
-       };
-#ifdef MULTIBYTE
-       pfree(ps1);
-       pfree(ps2);
-#endif
-       PG_RETURN_INT32(pos);
+                       if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
+                       {
+                               if (++match == matchnum)
+                               {
+                                       pos = p + 1;
+                                       break;
+                               }
+                       }
+                       p1++;
+               }
+       }
+       else
+       {
+               /* not as simple - multibyte encoding */
+               pg_wchar   *p1,
+                                  *p2,
+                                  *ps1,
+                                  *ps2;
+
+               ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
+               (void) pg_mb2wchar_with_len(VARDATA(t1), p1, len1);
+               len1 = pg_wchar_strlen(p1);
+               ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
+               (void) pg_mb2wchar_with_len(VARDATA(t2), p2, len2);
+               len2 = pg_wchar_strlen(p2);
+
+               /* no use in searching str past point where search_str will fit */
+               px = (len1 - len2);
+
+               for (p = 0; p <= px; p++)
+               {
+                       if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
+                       {
+                               if (++match == matchnum)
+                               {
+                                       pos = p + 1;
+                                       break;
+                               }
+                       }
+                       p1++;
+               }
+
+               pfree(ps1);
+               pfree(ps2);
+       }
+
+       return pos;
  }
  
  /* varstr_cmp()
@@ -544,38 +819,129 @@ int
  varstr_cmp(char *arg1, int len1, char *arg2, int len2)
  {
         int                     result;
-       char       *a1p,
-                          *a2p;
  
         /*
-        * Unfortunately, there is no strncoll(), so in the non-C locale
-        * case we have to do some memory copying.  This turns out to be
-        * significantly slower, so we optimize the case where LC_COLLATE
-        * is C.
+        * Unfortunately, there is no strncoll(), so in the non-C locale case we
+        * have to do some memory copying.      This turns out to be significantly
+        * slower, so we optimize the case where LC_COLLATE is C.  We also try to
+        * optimize relatively-short strings by avoiding palloc/pfree overhead.
          */
-       if (!lc_collate_is_c())
+       if (lc_collate_is_c())
+       {
+               result = strncmp(arg1, arg2, Min(len1, len2));
+               if ((result == 0) && (len1 != len2))
+                       result = (len1 < len2) ? -1 : 1;
+       }
+       else
         {
-               a1p = (char *) palloc(len1 + 1);
-               a2p = (char *) palloc(len2 + 1);
+#define STACKBUFLEN            1024
+
+               char            a1buf[STACKBUFLEN];
+               char            a2buf[STACKBUFLEN];
+               char       *a1p,
+                                  *a2p;
+
+#ifdef WIN32
+               /* Win32 does not have UTF-8, so we need to map to UTF-16 */
+               if (GetDatabaseEncoding() == PG_UTF8)
+               {
+                       int                     a1len;
+                       int                     a2len;
+                       int                     r;
+
+                       if (len1 >= STACKBUFLEN / 2)
+                       {
+                               a1len = len1 * 2 + 2;
+                               a1p = palloc(a1len);
+                       }
+                       else
+                       {
+                               a1len = STACKBUFLEN;
+                               a1p = a1buf;
+                       }
+                       if (len2 >= STACKBUFLEN / 2)
+                       {
+                               a2len = len2 * 2 + 2;
+                               a2p = palloc(a2len);
+                       }
+                       else
+                       {
+                               a2len = STACKBUFLEN;
+                               a2p = a2buf;
+                       }
+
+                       /* stupid Microsloth API does not work for zero-length input */
+                       if (len1 == 0)
+                               r = 0;
+                       else
+                       {
+                               r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
+                                                                               (LPWSTR) a1p, a1len / 2);
+                               if (!r)
+                                       ereport(ERROR,
+                                        (errmsg("could not convert string to UTF-16: error %lu",
+                                                        GetLastError())));
+                       }
+                       ((LPWSTR) a1p)[r] = 0;
+
+                       if (len2 == 0)
+                               r = 0;
+                       else
+                       {
+                               r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
+                                                                               (LPWSTR) a2p, a2len / 2);
+                               if (!r)
+                                       ereport(ERROR,
+                                        (errmsg("could not convert string to UTF-16: error %lu",
+                                                        GetLastError())));
+                       }
+                       ((LPWSTR) a2p)[r] = 0;
+
+                       errno = 0;
+                       result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
+                       if (result == 2147483647)       /* _NLSCMPERROR; missing from mingw
+                                                                                * headers */
+                               ereport(ERROR,
+                                               (errmsg("could not compare Unicode strings: %m")));
+
+                       if (a1p != a1buf)
+                               pfree(a1p);
+                       if (a2p != a2buf)
+                               pfree(a2p);
+
+                       return result;
+               }
+#endif   /* WIN32 */
+
+               if (len1 >= STACKBUFLEN)
+                       a1p = (char *) palloc(len1 + 1);
+               else
+                       a1p = a1buf;
+               if (len2 >= STACKBUFLEN)
+                       a2p = (char *) palloc(len2 + 1);
+               else
+                       a2p = a2buf;
  
                 memcpy(a1p, arg1, len1);
-               *(a1p + len1) = '\0';
+               a1p[len1] = '\0';
                 memcpy(a2p, arg2, len2);
-               *(a2p + len2) = '\0';
+               a2p[len2] = '\0';
  
                 result = strcoll(a1p, a2p);
  
-               pfree(a1p);
-               pfree(a2p);
-       }
-       else
-       {
-               a1p = arg1;
-               a2p = arg2;
+               /*
+                * In some locales strcoll() can claim that nonidentical strings are
+                * equal.  Believing that would be bad news for a number of reasons,
+                * so we follow Perl's lead and sort "equal" strings according to
+                * strcmp().
+                */
+               if (result == 0)
+                       result = strcmp(a1p, a2p);
  
-               result = strncmp(a1p, a2p, Min(len1, len2));
-               if ((result == 0) && (len1 != len2))
-                       result = (len1 < len2) ? -1 : 1;
+               if (a1p != a1buf)
+                       pfree(a1p);
+               if (a2p != a2buf)
+                       pfree(a2p);
         }
  
         return result;
@@ -618,11 +984,15 @@ texteq(PG_FUNCTION_ARGS)
         text       *arg2 = PG_GETARG_TEXT_P(1);
         bool            result;
  
-       /* fast path for different-length inputs */
+       /*
+        * Since we only care about equality or not-equality, we can avoid all the
+        * expense of strcoll() here, and just do bitwise comparison.
+        */
         if (VARSIZE(arg1) != VARSIZE(arg2))
                 result = false;
         else
-               result = (text_cmp(arg1, arg2) == 0);
+               result = (strncmp(VARDATA(arg1), VARDATA(arg2),
+                                                 VARSIZE(arg1) - VARHDRSZ) == 0);
  
         PG_FREE_IF_COPY(arg1, 0);
         PG_FREE_IF_COPY(arg2, 1);
@@ -637,11 +1007,15 @@ textne(PG_FUNCTION_ARGS)
         text       *arg2 = PG_GETARG_TEXT_P(1);
         bool            result;
  
-       /* fast path for different-length inputs */
+       /*
+        * Since we only care about equality or not-equality, we can avoid all the
+        * expense of strcoll() here, and just do bitwise comparison.
+        */
         if (VARSIZE(arg1) != VARSIZE(arg2))
                 result = true;
         else
-               result = (text_cmp(arg1, arg2) != 0);
+               result = (strncmp(VARDATA(arg1), VARDATA(arg2),
+                                                 VARSIZE(arg1) - VARHDRSZ) != 0);
  
         PG_FREE_IF_COPY(arg1, 0);
         PG_FREE_IF_COPY(arg2, 1);
@@ -749,6 +1123,149 @@ text_smaller(PG_FUNCTION_ARGS)
         PG_RETURN_TEXT_P(result);
  }
  
+
+/*
+ * The following operators support character-by-character comparison
+ * of text data types, to allow building indexes suitable for LIKE
+ * clauses.
+ */
+
+static int
+internal_text_pattern_compare(text *arg1, text *arg2)
+{
+       int                     result;
+
+       result = memcmp(VARDATA(arg1), VARDATA(arg2),
+                                       Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
+       if (result != 0)
+               return result;
+       else if (VARSIZE(arg1) < VARSIZE(arg2))
+               return -1;
+       else if (VARSIZE(arg1) > VARSIZE(arg2))
+               return 1;
+       else
+               return 0;
+}
+
+
+Datum
+text_pattern_lt(PG_FUNCTION_ARGS)
+{
+       text       *arg1 = PG_GETARG_TEXT_P(0);
+       text       *arg2 = PG_GETARG_TEXT_P(1);
+       int                     result;
+
+       result = internal_text_pattern_compare(arg1, arg2);
+
+       PG_FREE_IF_COPY(arg1, 0);
+       PG_FREE_IF_COPY(arg2, 1);
+
+       PG_RETURN_BOOL(result < 0);
+}
+
+
+Datum
+text_pattern_le(PG_FUNCTION_ARGS)
+{
+       text       *arg1 = PG_GETARG_TEXT_P(0);
+       text       *arg2 = PG_GETARG_TEXT_P(1);
+       int                     result;
+
+       result = internal_text_pattern_compare(arg1, arg2);
+
+       PG_FREE_IF_COPY(arg1, 0);
+       PG_FREE_IF_COPY(arg2, 1);
+
+       PG_RETURN_BOOL(result <= 0);
+}
+
+
+Datum
+text_pattern_eq(PG_FUNCTION_ARGS)
+{
+       text       *arg1 = PG_GETARG_TEXT_P(0);
+       text       *arg2 = PG_GETARG_TEXT_P(1);
+       int                     result;
+
+       if (VARSIZE(arg1) != VARSIZE(arg2))
+               result = 1;
+       else
+               result = internal_text_pattern_compare(arg1, arg2);
+
+       PG_FREE_IF_COPY(arg1, 0);
+       PG_FREE_IF_COPY(arg2, 1);
+
+       PG_RETURN_BOOL(result == 0);
+}
+
+
+Datum
+text_pattern_ge(PG_FUNCTION_ARGS)
+{
+       text       *arg1 = PG_GETARG_TEXT_P(0);
+       text       *arg2 = PG_GETARG_TEXT_P(1);
+       int                     result;
+
+       result = internal_text_pattern_compare(arg1, arg2);
+
+       PG_FREE_IF_COPY(arg1, 0);
+       PG_FREE_IF_COPY(arg2, 1);
+
+       PG_RETURN_BOOL(result >= 0);
+}
+
+
+Datum
+text_pattern_gt(PG_FUNCTION_ARGS)
+{
+       text       *arg1 = PG_GETARG_TEXT_P(0);
+       text       *arg2 = PG_GETARG_TEXT_P(1);
+       int                     result;
+
+       result = internal_text_pattern_compare(arg1, arg2);
+
+       PG_FREE_IF_COPY(arg1, 0);
+       PG_FREE_IF_COPY(arg2, 1);
+
+       PG_RETURN_BOOL(result > 0);
+}
+
+
+Datum
+text_pattern_ne(PG_FUNCTION_ARGS)
+{
+       text       *arg1 = PG_GETARG_TEXT_P(0);
+       text       *arg2 = PG_GETARG_TEXT_P(1);
+       int                     result;
+
+       if (VARSIZE(arg1) != VARSIZE(arg2))
+               result = 1;
+       else
+               result = internal_text_pattern_compare(arg1, arg2);
+
+       PG_FREE_IF_COPY(arg1, 0);
+       PG_FREE_IF_COPY(arg2, 1);
+
+       PG_RETURN_BOOL(result != 0);
+}
+
+
+Datum
+bttext_pattern_cmp(PG_FUNCTION_ARGS)
+{
+       text       *arg1 = PG_GETARG_TEXT_P(0);
+       text       *arg2 = PG_GETARG_TEXT_P(1);
+       int                     result;
+
+       result = internal_text_pattern_compare(arg1, arg2);
+
+       PG_FREE_IF_COPY(arg1, 0);
+       PG_FREE_IF_COPY(arg2, 1);
+
+       PG_RETURN_INT32(result);
+}
+
+
  /*-------------------------------------------------------------
   * byteaoctetlen
   *
@@ -758,9 +1275,10 @@ text_smaller(PG_FUNCTION_ARGS)
  Datum
  byteaoctetlen(PG_FUNCTION_ARGS)
  {
-       bytea      *v = PG_GETARG_BYTEA_P(0);
+       Datum           str = PG_GETARG_DATUM(0);
  
-       PG_RETURN_INT32(VARSIZE(v) - VARHDRSZ);
+       /* We need not detoast the input at all */
+       PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
  }
  
  /*
@@ -781,11 +1299,11 @@ byteacat(PG_FUNCTION_ARGS)
         bytea      *result;
         char       *ptr;
  
-       len1 = (VARSIZE(t1) - VARHDRSZ);
+       len1 = VARSIZE(t1) - VARHDRSZ;
         if (len1 < 0)
                 len1 = 0;
  
-       len2 = (VARSIZE(t2) - VARHDRSZ);
+       len2 = VARSIZE(t2) - VARHDRSZ;
         if (len2 < 0)
                 len2 = 0;
  
@@ -805,6 +1323,8 @@ byteacat(PG_FUNCTION_ARGS)
         PG_RETURN_BYTEA_P(result);
  }
  
+#define PG_STR_GET_BYTEA(str_) \
+       DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
  /*
   * bytea_substr()
   * Return a substring starting at the specified position.
@@ -813,33 +1333,72 @@ byteacat(PG_FUNCTION_ARGS)
   * Input:
   *     - string
   *     - starting position (is one-based)
- *     - string length
+ *     - string length (optional)
   *
   * If the starting position is zero or less, then return from the start of the string
   * adjusting the length to be consistent with the "negative start" per SQL92.
- * If the length is less than zero, return the remaining string.
- *
+ * If the length is less than zero, an ERROR is thrown. If no third argument
+ * (length) is provided, the length to the end of the string is assumed.
   */
  Datum
  bytea_substr(PG_FUNCTION_ARGS)
  {
-       int32           m = PG_GETARG_INT32(1);
-       int32           n = PG_GETARG_INT32(2);
+       int                     S = PG_GETARG_INT32(1); /* start position */
+       int                     S1;                             /* adjusted start position */
+       int                     L1;                             /* adjusted substring length */
  
-       /*
-        * starting position before the start of the string? then offset into
-        * the string per SQL92 spec...
-        */
-       if (m < 1)
+       S1 = Max(S, 1);
+
+       if (fcinfo->nargs == 2)
         {
-               n += (m - 1);
-               m = 1;
+               /*
+                * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
+                * the end of the string if we pass it a negative value for length.
+                */
+               L1 = -1;
         }
+       else
+       {
+               /* end position */
+               int                     E = S + PG_GETARG_INT32(2);
  
-       /* m will now become a zero-based starting position */
-       m--;
+               /*
+                * A negative value for L is the only way for the end position to be
+                * before the start. SQL99 says to throw an error.
+                */
+               if (E < S)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_SUBSTRING_ERROR),
+                                        errmsg("negative substring length not allowed")));
  
-       PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, m, n));
+               /*
+                * A zero or negative value for the end position can happen if the
+                * start was negative or one. SQL99 says to return a zero-length
+                * string.
+                */
+               if (E < 1)
+                       PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
+
+               L1 = E - S1;
+       }
+
+       /*
+        * If the start position is past the end of the string, SQL99 says to
+        * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
+        * for us. Convert to zero-based starting position
+        */
+       PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
+}
+
+/*
+ * bytea_substr_no_len -
+ *       Wrapper to avoid opr_sanity failure due to
+ *       one function accepting a different number of args.
+ */
+Datum
+bytea_substr_no_len(PG_FUNCTION_ARGS)
+{
+       return bytea_substr(fcinfo);
  }
  
  /*
@@ -864,8 +1423,8 @@ byteapos(PG_FUNCTION_ARGS)
         if (VARSIZE(t2) <= VARHDRSZ)
                 PG_RETURN_INT32(1);             /* result for empty pattern */
  
-       len1 = (VARSIZE(t1) - VARHDRSZ);
-       len2 = (VARSIZE(t2) - VARHDRSZ);
+       len1 = VARSIZE(t1) - VARHDRSZ;
+       len2 = VARSIZE(t2) - VARHDRSZ;
  
         p1 = VARDATA(t1);
         p2 = VARDATA(t2);
@@ -903,8 +1462,10 @@ byteaGetByte(PG_FUNCTION_ARGS)
         len = VARSIZE(v) - VARHDRSZ;
  
         if (n < 0 || n >= len)
-               elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
-                        n, len - 1);
+               ereport(ERROR,
+                               (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                                errmsg("index %d out of valid range, 0..%d",
+                                               n, len - 1)));
  
         byte = ((unsigned char *) VARDATA(v))[n];
  
@@ -932,8 +1493,10 @@ byteaGetBit(PG_FUNCTION_ARGS)
         len = VARSIZE(v) - VARHDRSZ;
  
         if (n < 0 || n >= len * 8)
-               elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
-                        n, len * 8 - 1);
+               ereport(ERROR,
+                               (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                                errmsg("index %d out of valid range, 0..%d",
+                                               n, len * 8 - 1)));
  
         byteNo = n / 8;
         bitNo = n % 8;
@@ -966,8 +1529,10 @@ byteaSetByte(PG_FUNCTION_ARGS)
         len = VARSIZE(v) - VARHDRSZ;
  
         if (n < 0 || n >= len)
-               elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
-                        n, len - 1);
+               ereport(ERROR,
+                               (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                                errmsg("index %d out of valid range, 0..%d",
+                                               n, len - 1)));
  
         /*
          * Make a copy of the original varlena.
@@ -1007,8 +1572,10 @@ byteaSetBit(PG_FUNCTION_ARGS)
         len = VARSIZE(v) - VARHDRSZ;
  
         if (n < 0 || n >= len * 8)
-               elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
-                        n, len * 8 - 1);
+               ereport(ERROR,
+                               (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                                errmsg("index %d out of valid range, 0..%d",
+                                               n, len * 8 - 1)));
  
         byteNo = n / 8;
         bitNo = n % 8;
@@ -1017,7 +1584,9 @@ byteaSetBit(PG_FUNCTION_ARGS)
          * sanity check!
          */
         if (newBit != 0 && newBit != 1)
-               elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("new bit must be 0 or 1")));
  
         /*
          * Make a copy of the original varlena.
@@ -1109,12 +1678,12 @@ name_text(PG_FUNCTION_ARGS)
   * truncate names if they're too long.
   */
  List *
-textToQualifiedNameList(text *textval, const char *caller)
+textToQualifiedNameList(text *textval)
  {
         char       *rawname;
         List       *result = NIL;
         List       *namelist;
-       List       *l;
+       ListCell   *l;
  
         /* Convert to C string (handles possible detoasting). */
         /* Note we rely on being able to modify rawname below. */
@@ -1122,20 +1691,24 @@ textToQualifiedNameList(text *textval, const char *caller)
                                                                                                   PointerGetDatum(textval)));
  
         if (!SplitIdentifierString(rawname, '.', &namelist))
-               elog(ERROR, "%s: invalid name syntax", caller);
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_NAME),
+                                errmsg("invalid name syntax")));
  
         if (namelist == NIL)
-               elog(ERROR, "%s: invalid name syntax", caller);
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_NAME),
+                                errmsg("invalid name syntax")));
  
         foreach(l, namelist)
         {
-               char   *curname = (char *) lfirst(l);
+               char       *curname = (char *) lfirst(l);
  
                 result = lappend(result, makeString(pstrdup(curname)));
         }
  
         pfree(rawname);
-       freeList(namelist);
+       list_free(namelist);
  
         return result;
  }
@@ -1144,19 +1717,19 @@ textToQualifiedNameList(text *textval, const char *caller)
   * SplitIdentifierString --- parse a string containing identifiers
   *
   * This is the guts of textToQualifiedNameList, and is exported for use in
- * other situations such as parsing GUC variables.  In the GUC case, it's
+ * other situations such as parsing GUC variables.     In the GUC case, it's
   * important to avoid memory leaks, so the API is designed to minimize the
   * amount of stuff that needs to be allocated and freed.
   *
   * Inputs:
- *     rawstring: the input string; must be overwritable!  On return, it's
+ *     rawstring: the input string; must be overwritable!      On return, it's
   *                        been modified to contain the separated identifiers.
   *     separator: the separator punctuation expected between identifiers
- *                        (typically '.' or ',').  Whitespace may also appear around
+ *                        (typically '.' or ',').      Whitespace may also appear around
   *                        identifiers.
   * Outputs:
   *     namelist: filled with a palloc'd list of pointers to identifiers within
- *                       rawstring.  Caller should freeList() this even on error return.
+ *                       rawstring.  Caller should list_free() this even on error return.
   *
   * Returns TRUE if okay, FALSE if there is a syntax error in the string.
   *
@@ -1183,7 +1756,6 @@ SplitIdentifierString(char *rawstring, char separator,
         {
                 char       *curname;
                 char       *endp;
-               int                     curlen;
  
                 if (*nextp == '\"')
                 {
@@ -1193,11 +1765,11 @@ SplitIdentifierString(char *rawstring, char separator,
                         {
                                 endp = strchr(nextp + 1, '\"');
                                 if (endp == NULL)
-                                       return false; /* mismatched quotes */
+                                       return false;           /* mismatched quotes */
                                 if (endp[1] != '\"')
                                         break;          /* found end of quoted name */
                                 /* Collapse adjacent quotes into one quote, and look again */
-                               memmove(endp, endp+1, strlen(endp));
+                               memmove(endp, endp + 1, strlen(endp));
                                 nextp = endp;
                         }
                         /* endp now points at the terminating quote */
@@ -1206,21 +1778,31 @@ SplitIdentifierString(char *rawstring, char separator,
                 else
                 {
                         /* Unquoted name --- extends to separator or whitespace */
+                       char       *downname;
+                       int                     len;
+
                         curname = nextp;
                         while (*nextp && *nextp != separator &&
                                    !isspace((unsigned char) *nextp))
-                       {
-                               /*
-                                * It's important that this match the identifier downcasing
-                                * code used by backend/parser/scan.l.
-                                */
-                               if (isupper((unsigned char) *nextp))
-                                       *nextp = tolower((unsigned char) *nextp);
                                 nextp++;
-                       }
                         endp = nextp;
                         if (curname == nextp)
                                 return false;   /* empty unquoted name not allowed */
+
+                       /*
+                        * Downcase the identifier, using same code as main lexer does.
+                        *
+                        * XXX because we want to overwrite the input in-place, we cannot
+                        * support a downcasing transformation that increases the string
+                        * length.      This is not a problem given the current implementation
+                        * of downcase_truncate_identifier, but we'll probably have to do
+                        * something about this someday.
+                        */
+                       len = endp - curname;
+                       downname = downcase_truncate_identifier(curname, len, false);
+                       Assert(strlen(downname) <= len);
+                       strncpy(curname, downname, len);
+                       pfree(downname);
                 }
  
                 while (isspace((unsigned char) *nextp))
@@ -1241,17 +1823,8 @@ SplitIdentifierString(char *rawstring, char separator,
                 /* Now safe to overwrite separator with a null */
                 *endp = '\0';
  
-               /* Truncate name if it's overlength; again, should match scan.l */
-               curlen = strlen(curname);
-               if (curlen >= NAMEDATALEN)
-               {
-#ifdef MULTIBYTE
-                       curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
-                       curname[curlen] = '\0';
-#else
-                       curname[NAMEDATALEN - 1] = '\0';
-#endif
-               }
+               /* Truncate name if it's overlength */
+               truncate_identifier(curname, strlen(curname), false);
  
                 /*
                  * Finished isolating current name --- add it to list
@@ -1422,3 +1995,757 @@ byteacmp(PG_FUNCTION_ARGS)
  
         PG_RETURN_INT32(cmp);
  }
+
+/*
+ * appendStringInfoText
+ *
+ * Append a text to str.
+ * Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
+ */
+static void
+appendStringInfoText(StringInfo str, const text *t)
+{
+       appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
+}
+
+/*
+ * replace_text
+ * replace all occurrences of 'old_sub_str' in 'orig_str'
+ * with 'new_sub_str' to form 'new_str'
+ *
+ * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
+ * otherwise returns 'new_str'
+ */
+Datum
+replace_text(PG_FUNCTION_ARGS)
+{
+       text       *src_text = PG_GETARG_TEXT_P(0);
+       text       *from_sub_text = PG_GETARG_TEXT_P(1);
+       text       *to_sub_text = PG_GETARG_TEXT_P(2);
+       int                     src_text_len = TEXTLEN(src_text);
+       int                     from_sub_text_len = TEXTLEN(from_sub_text);
+       text       *left_text;
+       text       *right_text;
+       text       *buf_text;
+       text       *ret_text;
+       int                     curr_posn;
+       StringInfoData str;
+
+       if (src_text_len == 0 || from_sub_text_len == 0)
+               PG_RETURN_TEXT_P(src_text);
+
+       curr_posn = TEXTPOS(src_text, from_sub_text);
+
+       /* When the from_sub_text is not found, there is nothing to do. */
+       if (curr_posn == 0)
+               PG_RETURN_TEXT_P(src_text);
+
+       initStringInfo(&str);
+       buf_text = src_text;
+
+       while (curr_posn > 0)
+       {
+               left_text = text_substring(PointerGetDatum(buf_text),
+                                                                  1, curr_posn - 1, false);
+               right_text = text_substring(PointerGetDatum(buf_text),
+                                                                       curr_posn + from_sub_text_len, -1, true);
+
+               appendStringInfoText(&str, left_text);
+               appendStringInfoText(&str, to_sub_text);
+
+               if (buf_text != src_text)
+                       pfree(buf_text);
+               pfree(left_text);
+               buf_text = right_text;
+               curr_posn = TEXTPOS(buf_text, from_sub_text);
+       }
+
+       appendStringInfoText(&str, buf_text);
+       if (buf_text != src_text)
+               pfree(buf_text);
+
+       ret_text = PG_STR_GET_TEXT(str.data);
+       pfree(str.data);
+
+       PG_RETURN_TEXT_P(ret_text);
+}
+
+/*
+ * check_replace_text_has_escape_char
+ *
+ * check whether replace_text contains escape char.
+ */
+static bool
+check_replace_text_has_escape_char(const text *replace_text)
+{
+       const char *p = VARDATA(replace_text);
+       const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
+
+       if (pg_database_encoding_max_length() == 1)
+       {
+               for (; p < p_end; p++)
+               {
+                       if (*p == '\\')
+                               return true;
+               }
+       }
+       else
+       {
+               for (; p < p_end; p += pg_mblen(p))
+               {
+                       if (*p == '\\')
+                               return true;
+               }
+       }
+
+       return false;
+}
+
+/*
+ * appendStringInfoRegexpSubstr
+ *
+ * Append replace_text to str, substituting regexp back references for
+ * \n escapes.
+ */
+static void
+appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
+                                                        regmatch_t *pmatch, text *src_text)
+{
+       const char *p = VARDATA(replace_text);
+       const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
+       int                     eml = pg_database_encoding_max_length();
+
+       for (;;)
+       {
+               const char *chunk_start = p;
+               int                     so;
+               int                     eo;
+
+               /* Find next escape char. */
+               if (eml == 1)
+               {
+                       for (; p < p_end && *p != '\\'; p++)
+                                /* nothing */ ;
+               }
+               else
+               {
+                       for (; p < p_end && *p != '\\'; p += pg_mblen(p))
+                                /* nothing */ ;
+               }
+
+               /* Copy the text we just scanned over, if any. */
+               if (p > chunk_start)
+                       appendBinaryStringInfo(str, chunk_start, p - chunk_start);
+
+               /* Done if at end of string, else advance over escape char. */
+               if (p >= p_end)
+                       break;
+               p++;
+
+               if (p >= p_end)
+               {
+                       /* Escape at very end of input.  Treat same as unexpected char */
+                       appendStringInfoChar(str, '\\');
+                       break;
+               }
+
+               if (*p >= '1' && *p <= '9')
+               {
+                       /* Use the back reference of regexp. */
+                       int                     idx = *p - '0';
+
+                       so = pmatch[idx].rm_so;
+                       eo = pmatch[idx].rm_eo;
+                       p++;
+               }
+               else if (*p == '&')
+               {
+                       /* Use the entire matched string. */
+                       so = pmatch[0].rm_so;
+                       eo = pmatch[0].rm_eo;
+                       p++;
+               }
+               else if (*p == '\\')
+               {
+                       /* \\ means transfer one \ to output. */
+                       appendStringInfoChar(str, '\\');
+                       p++;
+                       continue;
+               }
+               else
+               {
+                       /*
+                        * If escape char is not followed by any expected char, just treat
+                        * it as ordinary data to copy.  (XXX would it be better to throw
+                        * an error?)
+                        */
+                       appendStringInfoChar(str, '\\');
+                       continue;
+               }
+
+               if (so != -1 && eo != -1)
+               {
+                       /*
+                        * Copy the text that is back reference of regexp.      Because so and
+                        * eo are counted in characters not bytes, it's easiest to use
+                        * text_substring to pull out the correct chunk of text.
+                        */
+                       text       *append_text;
+
+                       append_text = text_substring(PointerGetDatum(src_text),
+                                                                                so + 1, (eo - so), false);
+                       appendStringInfoText(str, append_text);
+                       pfree(append_text);
+               }
+       }
+}
+
+#define REGEXP_REPLACE_BACKREF_CNT             10
+
+/*
+ * replace_text_regexp
+ *
+ * replace text that matches to regexp in src_text to replace_text.
+ *
+ * Note: to avoid having to include regex.h in builtins.h, we declare
+ * the regexp argument as void *, but really it's regex_t *.
+ */
+text *
+replace_text_regexp(text *src_text, void *regexp,
+                                       text *replace_text, bool glob)
+{
+       text       *ret_text;
+       regex_t    *re = (regex_t *) regexp;
+       int                     src_text_len = VARSIZE(src_text) - VARHDRSZ;
+       StringInfoData buf;
+       regmatch_t      pmatch[REGEXP_REPLACE_BACKREF_CNT];
+       pg_wchar   *data;
+       size_t          data_len;
+       int                     search_start;
+       int                     data_pos;
+       bool            have_escape;
+
+       initStringInfo(&buf);
+
+       /* Convert data string to wide characters. */
+       data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
+       data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
+
+       /* Check whether replace_text has escape char. */
+       have_escape = check_replace_text_has_escape_char(replace_text);
+
+       for (search_start = data_pos = 0; search_start <= data_len;)
+       {
+               int                     regexec_result;
+
+               regexec_result = pg_regexec(re,
+                                                                       data,
+                                                                       data_len,
+                                                                       search_start,
+                                                                       NULL,           /* no details */
+                                                                       REGEXP_REPLACE_BACKREF_CNT,
+                                                                       pmatch,
+                                                                       0);
+
+               if (regexec_result == REG_NOMATCH)
+                       break;
+
+               if (regexec_result != REG_OKAY)
+               {
+                       char            errMsg[100];
+
+                       pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+                                        errmsg("regular expression failed: %s", errMsg)));
+               }
+
+               /*
+                * Copy the text to the left of the match position.  Because we are
+                * working with character not byte indexes, it's easiest to use
+                * text_substring to pull out the needed data.
+                */
+               if (pmatch[0].rm_so - data_pos > 0)
+               {
+                       text       *left_text;
+
+                       left_text = text_substring(PointerGetDatum(src_text),
+                                                                          data_pos + 1,
+                                                                          pmatch[0].rm_so - data_pos,
+                                                                          false);
+                       appendStringInfoText(&buf, left_text);
+                       pfree(left_text);
+               }
+
+               /*
+                * Copy the replace_text. Process back references when the
+                * replace_text has escape characters.
+                */
+               if (have_escape)
+                       appendStringInfoRegexpSubstr(&buf, replace_text, pmatch, src_text);
+               else
+                       appendStringInfoText(&buf, replace_text);
+
+               search_start = data_pos = pmatch[0].rm_eo;
+
+               /*
+                * When global option is off, replace the first instance only.
+                */
+               if (!glob)
+                       break;
+
+               /*
+                * Search from next character when the matching text is zero width.
+                */
+               if (pmatch[0].rm_so == pmatch[0].rm_eo)
+                       search_start++;
+       }
+
+       /*
+        * Copy the text to the right of the last match.
+        */
+       if (data_pos < data_len)
+       {
+               text       *right_text;
+
+               right_text = text_substring(PointerGetDatum(src_text),
+                                                                       data_pos + 1, -1, true);
+               appendStringInfoText(&buf, right_text);
+               pfree(right_text);
+       }
+
+       ret_text = PG_STR_GET_TEXT(buf.data);
+       pfree(buf.data);
+       pfree(data);
+
+       return ret_text;
+}
+
+/*
+ * split_text
+ * parse input string
+ * return ord item (1 based)
+ * based on provided field separator
+ */
+Datum
+split_text(PG_FUNCTION_ARGS)
+{
+       text       *inputstring = PG_GETARG_TEXT_P(0);
+       text       *fldsep = PG_GETARG_TEXT_P(1);
+       int                     fldnum = PG_GETARG_INT32(2);
+       int                     inputstring_len = TEXTLEN(inputstring);
+       int                     fldsep_len = TEXTLEN(fldsep);
+       int                     start_posn;
+       int                     end_posn;
+       text       *result_text;
+
+       /* field number is 1 based */
+       if (fldnum < 1)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("field position must be greater than zero")));
+
+       /* return empty string for empty input string */
+       if (inputstring_len < 1)
+               PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
+
+       /* empty field separator */
+       if (fldsep_len < 1)
+       {
+               /* if first field, return input string, else empty string */
+               if (fldnum == 1)
+                       PG_RETURN_TEXT_P(inputstring);
+               else
+                       PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
+       }
+
+       start_posn = text_position(inputstring, fldsep, fldnum - 1);
+       end_posn = text_position(inputstring, fldsep, fldnum);
+
+       if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
+       {
+               /* if first field, return input string, else empty string */
+               if (fldnum == 1)
+                       PG_RETURN_TEXT_P(inputstring);
+               else
+                       PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
+       }
+       else if (start_posn == 0)
+       {
+               /* first field requested */
+               result_text = LEFT(inputstring, fldsep);
+               PG_RETURN_TEXT_P(result_text);
+       }
+       else if (end_posn == 0)
+       {
+               /* last field requested */
+               result_text = text_substring(PointerGetDatum(inputstring),
+                                                                        start_posn + fldsep_len,
+                                                                        -1, true);
+               PG_RETURN_TEXT_P(result_text);
+       }
+       else
+       {
+               /* interior field requested */
+               result_text = text_substring(PointerGetDatum(inputstring),
+                                                                        start_posn + fldsep_len,
+                                                                        end_posn - start_posn - fldsep_len,
+                                                                        false);
+               PG_RETURN_TEXT_P(result_text);
+       }
+}
+
+/*
+ * text_to_array
+ * parse input string
+ * return text array of elements
+ * based on provided field separator
+ */
+Datum
+text_to_array(PG_FUNCTION_ARGS)
+{
+       text       *inputstring = PG_GETARG_TEXT_P(0);
+       text       *fldsep = PG_GETARG_TEXT_P(1);
+       int                     inputstring_len = TEXTLEN(inputstring);
+       int                     fldsep_len = TEXTLEN(fldsep);
+       int                     fldnum;
+       int                     start_posn;
+       int                     end_posn;
+       text       *result_text;
+       ArrayBuildState *astate = NULL;
+
+       /* return NULL for empty input string */
+       if (inputstring_len < 1)
+               PG_RETURN_NULL();
+
+       /*
+        * empty field separator return one element, 1D, array using the input
+        * string
+        */
+       if (fldsep_len < 1)
+               PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
+                                                                                  CStringGetDatum(inputstring), 1));
+
+       /* start with end position holding the initial start position */
+       end_posn = 0;
+       for (fldnum = 1;; fldnum++) /* field number is 1 based */
+       {
+               Datum           dvalue;
+               bool            disnull = false;
+
+               start_posn = end_posn;
+               end_posn = text_position(inputstring, fldsep, fldnum);
+
+               if ((start_posn == 0) && (end_posn == 0))               /* fldsep not found */
+               {
+                       if (fldnum == 1)
+                       {
+                               /*
+                                * first element return one element, 1D, array using the input
+                                * string
+                                */
+                               PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
+                                                                                  CStringGetDatum(inputstring), 1));
+                       }
+                       else
+                       {
+                               /* otherwise create array and exit */
+                               PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
+                                                                                                         CurrentMemoryContext));
+                       }
+               }
+               else if (start_posn == 0)
+               {
+                       /* first field requested */
+                       result_text = LEFT(inputstring, fldsep);
+               }
+               else if (end_posn == 0)
+               {
+                       /* last field requested */
+                       result_text = text_substring(PointerGetDatum(inputstring),
+                                                                                start_posn + fldsep_len,
+                                                                                -1, true);
+               }
+               else
+               {
+                       /* interior field requested */
+                       result_text = text_substring(PointerGetDatum(inputstring),
+                                                                                start_posn + fldsep_len,
+                                                                                end_posn - start_posn - fldsep_len,
+                                                                                false);
+               }
+
+               /* stash away current value */
+               dvalue = PointerGetDatum(result_text);
+               astate = accumArrayResult(astate, dvalue,
+                                                                 disnull, TEXTOID,
+                                                                 CurrentMemoryContext);
+       }
+
+       /* never reached -- keep compiler quiet */
+       PG_RETURN_NULL();
+}
+
+/*
+ * array_to_text
+ * concatenate Cstring representation of input array elements
+ * using provided field separator
+ */
+Datum
+array_to_text(PG_FUNCTION_ARGS)
+{
+       ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
+       char       *fldsep = PG_TEXTARG_GET_STR(1);
+       int                     nitems,
+                          *dims,
+                               ndims;
+       Oid                     element_type;
+       int                     typlen;
+       bool            typbyval;
+       char            typalign;
+       StringInfoData buf;
+       bool            printed = false;
+       char       *p;
+       bits8      *bitmap;
+       int                     bitmask;
+       int                     i;
+       ArrayMetaState *my_extra;
+
+       ndims = ARR_NDIM(v);
+       dims = ARR_DIMS(v);
+       nitems = ArrayGetNItems(ndims, dims);
+
+       /* if there are no elements, return an empty string */
+       if (nitems == 0)
+               PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
+
+       element_type = ARR_ELEMTYPE(v);
+       initStringInfo(&buf);
+
+       /*
+        * We arrange to look up info about element type, including its output
+        * conversion proc, only once per series of calls, assuming the element
+        * type doesn't change underneath us.
+        */
+       my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+       if (my_extra == NULL)
+       {
+               fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+                                                                                                         sizeof(ArrayMetaState));
+               my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+               my_extra->element_type = ~element_type;
+       }
+
+       if (my_extra->element_type != element_type)
+       {
+               /*
+                * Get info about element type, including its output conversion proc
+                */
+               get_type_io_data(element_type, IOFunc_output,
+                                                &my_extra->typlen, &my_extra->typbyval,
+                                                &my_extra->typalign, &my_extra->typdelim,
+                                                &my_extra->typioparam, &my_extra->typiofunc);
+               fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
+                                         fcinfo->flinfo->fn_mcxt);
+               my_extra->element_type = element_type;
+       }
+       typlen = my_extra->typlen;
+       typbyval = my_extra->typbyval;
+       typalign = my_extra->typalign;
+
+       p = ARR_DATA_PTR(v);
+       bitmap = ARR_NULLBITMAP(v);
+       bitmask = 1;
+
+       for (i = 0; i < nitems; i++)
+       {
+               Datum           itemvalue;
+               char       *value;
+
+               /* Get source element, checking for NULL */
+               if (bitmap && (*bitmap & bitmask) == 0)
+               {
+                       /* we ignore nulls */
+               }
+               else
+               {
+                       itemvalue = fetch_att(p, typbyval, typlen);
+
+                       value = OutputFunctionCall(&my_extra->proc, itemvalue);
+
+                       if (printed)
+                               appendStringInfo(&buf, "%s%s", fldsep, value);
+                       else
+                               appendStringInfoString(&buf, value);
+                       printed = true;
+
+                       p = att_addlength(p, typlen, PointerGetDatum(p));
+                       p = (char *) att_align(p, typalign);
+               }
+
+               /* advance bitmap pointer if any */
+               if (bitmap)
+               {
+                       bitmask <<= 1;
+                       if (bitmask == 0x100)
+                       {
+                               bitmap++;
+                               bitmask = 1;
+                       }
+               }
+       }
+
+       PG_RETURN_TEXT_P(PG_STR_GET_TEXT(buf.data));
+}
+
+#define HEXBASE 16
+/*
+ * Convert a int32 to a string containing a base 16 (hex) representation of
+ * the number.
+ */
+Datum
+to_hex32(PG_FUNCTION_ARGS)
+{
+       uint32          value = (uint32) PG_GETARG_INT32(0);
+       text       *result_text;
+       char       *ptr;
+       const char *digits = "0123456789abcdef";
+       char            buf[32];                /* bigger than needed, but reasonable */
+
+       ptr = buf + sizeof(buf) - 1;
+       *ptr = '\0';
+
+       do
+       {
+               *--ptr = digits[value % HEXBASE];
+               value /= HEXBASE;
+       } while (ptr > buf && value);
+
+       result_text = PG_STR_GET_TEXT(ptr);
+       PG_RETURN_TEXT_P(result_text);
+}
+
+/*
+ * Convert a int64 to a string containing a base 16 (hex) representation of
+ * the number.
+ */
+Datum
+to_hex64(PG_FUNCTION_ARGS)
+{
+       uint64          value = (uint64) PG_GETARG_INT64(0);
+       text       *result_text;
+       char       *ptr;
+       const char *digits = "0123456789abcdef";
+       char            buf[32];                /* bigger than needed, but reasonable */
+
+       ptr = buf + sizeof(buf) - 1;
+       *ptr = '\0';
+
+       do
+       {
+               *--ptr = digits[value % HEXBASE];
+               value /= HEXBASE;
+       } while (ptr > buf && value);
+
+       result_text = PG_STR_GET_TEXT(ptr);
+       PG_RETURN_TEXT_P(result_text);
+}
+
+/*
+ * Create an md5 hash of a text string and return it as hex
+ *
+ * md5 produces a 16 byte (128 bit) hash; double it for hex
+ */
+#define MD5_HASH_LEN  32
+
+Datum
+md5_text(PG_FUNCTION_ARGS)
+{
+       text       *in_text = PG_GETARG_TEXT_P(0);
+       size_t          len;
+       char            hexsum[MD5_HASH_LEN + 1];
+       text       *result_text;
+
+       /* Calculate the length of the buffer using varlena metadata */
+       len = VARSIZE(in_text) - VARHDRSZ;
+
+       /* get the hash result */
+       if (pg_md5_hash(VARDATA(in_text), len, hexsum) == false)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
+
+       /* convert to text and return it */
+       result_text = PG_STR_GET_TEXT(hexsum);
+       PG_RETURN_TEXT_P(result_text);
+}
+
+/*
+ * Create an md5 hash of a bytea field and return it as a hex string:
+ * 16-byte md5 digest is represented in 32 hex characters.
+ */
+Datum
+md5_bytea(PG_FUNCTION_ARGS)
+{
+       bytea      *in = PG_GETARG_BYTEA_P(0);
+       size_t          len;
+       char            hexsum[MD5_HASH_LEN + 1];
+       text       *result_text;
+
+       len = VARSIZE(in) - VARHDRSZ;
+       if (pg_md5_hash(VARDATA(in), len, hexsum) == false)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
+
+       result_text = PG_STR_GET_TEXT(hexsum);
+       PG_RETURN_TEXT_P(result_text);
+}
+
+/*
+ * Return the size of a datum, possibly compressed
+ *
+ * Works on any data type
+ */
+Datum
+pg_column_size(PG_FUNCTION_ARGS)
+{
+       Datum           value = PG_GETARG_DATUM(0);
+       int32           result;
+       int                     typlen;
+
+       /* On first call, get the input type's typlen, and save at *fn_extra */
+       if (fcinfo->flinfo->fn_extra == NULL)
+       {
+               /* Lookup the datatype of the supplied argument */
+               Oid                     argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+
+               typlen = get_typlen(argtypeid);
+               if (typlen == 0)                /* should not happen */
+                       elog(ERROR, "cache lookup failed for type %u", argtypeid);
+
+               fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+                                                                                                         sizeof(int));
+               *((int *) fcinfo->flinfo->fn_extra) = typlen;
+       }
+       else
+               typlen = *((int *) fcinfo->flinfo->fn_extra);
+
+       if (typlen == -1)
+       {
+               /* varlena type, possibly toasted */
+               result = toast_datum_size(value);
+       }
+       else if (typlen == -2)
+       {
+               /* cstring */
+               result = strlen(DatumGetCString(value)) + 1;
+       }
+       else
+       {
+               /* ordinary fixed-width type */
+               result = typlen;
+       }
+
+       PG_RETURN_INT32(result);
+}