granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.145 2006/03/05 15:58:44 momjian Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "access/tuptoaster.h"
  20 #include "catalog/pg_type.h"
  21 #include "lib/stringinfo.h"
  22 #include "libpq/crypt.h"
  23 #include "libpq/pqformat.h"
  24 #include "mb/pg_wchar.h"
  25 #include "miscadmin.h"
  26 #include "parser/scansup.h"
  27 #include "regex/regex.h"
  28 #include "utils/array.h"
  29 #include "utils/builtins.h"
  30 #include "utils/lsyscache.h"
  31 #include "utils/pg_locale.h"
  32
  33
  34 typedef struct varlena unknown;
  35
  36 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
  37 #define DatumGetUnknownPCopy(X)         ((unknown *) PG_DETOAST_DATUM_COPY(X))
  38 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
  39 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
  40 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
  41
  42 #define PG_TEXTARG_GET_STR(arg_) \
  43         DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
  44 #define PG_TEXT_GET_STR(textp_) \
  45         DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
  46 #define PG_STR_GET_TEXT(str_) \
  47         DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
  48 #define TEXTLEN(textp) \
  49         text_length(PointerGetDatum(textp))
  50 #define TEXTPOS(buf_text, from_sub_text) \
  51         text_position(buf_text, from_sub_text, 1)
  52 #define LEFT(buf_text, from_sub_text) \
  53         text_substring(PointerGetDatum(buf_text), \
  54                                         1, \
  55                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
  56
  57 static int      text_cmp(text *arg1, text *arg2);
  58 static int32 text_length(Datum str);
  59 static int32 text_position(text *t1, text *t2, int matchnum);
  60 static text *text_substring(Datum str,
  61                            int32 start,
  62                            int32 length,
  63                            bool length_not_specified);
  64
  65 static void appendStringInfoText(StringInfo str, const text *t);
  66
  67
  68 /*****************************************************************************
  69  *       USER I/O ROUTINES                                                                                                               *
  70  *****************************************************************************/
  71
  72
  73 #define VAL(CH)                 ((CH) - '0')
  74 #define DIG(VAL)                ((VAL) + '0')
  75
  76 /*
  77  *              byteain                 - converts from printable representation of byte array
  78  *
  79  *              Non-printable characters must be passed as '\nnn' (octal) and are
  80  *              converted to internal form.  '\' must be passed as '\\'.
  81  *              ereport(ERROR, ...) if bad form.
  82  *
  83  *              BUGS:
  84  *                              The input is scanned twice.
  85  *                              The error checking of input is minimal.
  86  */
  87 Datum
  88 byteain(PG_FUNCTION_ARGS)
  89 {
  90         char       *inputText = PG_GETARG_CSTRING(0);
  91         char       *tp;
  92         char       *rp;
  93         int                     byte;
  94         bytea      *result;
  95
  96         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
  97         {
  98                 if (tp[0] != '\\')
  99                         tp++;
 100                 else if ((tp[0] == '\\') &&
 101                                  (tp[1] >= '0' && tp[1] <= '3') &&
 102                                  (tp[2] >= '0' && tp[2] <= '7') &&
 103                                  (tp[3] >= '0' && tp[3] <= '7'))
 104                         tp += 4;
 105                 else if ((tp[0] == '\\') &&
 106                                  (tp[1] == '\\'))
 107                         tp += 2;
 108                 else
 109                 {
 110                         /*
 111                          * one backslash, not followed by 0 or ### valid octal
 112                          */
 113                         ereport(ERROR,
 114                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 115                                          errmsg("invalid input syntax for type bytea")));
 116                 }
 117         }
 118
 119         byte += VARHDRSZ;
 120         result = (bytea *) palloc(byte);
 121         VARATT_SIZEP(result) = byte;    /* set varlena length */
 122
 123         tp = inputText;
 124         rp = VARDATA(result);
 125         while (*tp != '\0')
 126         {
 127                 if (tp[0] != '\\')
 128                         *rp++ = *tp++;
 129                 else if ((tp[0] == '\\') &&
 130                                  (tp[1] >= '0' && tp[1] <= '3') &&
 131                                  (tp[2] >= '0' && tp[2] <= '7') &&
 132                                  (tp[3] >= '0' && tp[3] <= '7'))
 133                 {
 134                         byte = VAL(tp[1]);
 135                         byte <<= 3;
 136                         byte += VAL(tp[2]);
 137                         byte <<= 3;
 138                         *rp++ = byte + VAL(tp[3]);
 139                         tp += 4;
 140                 }
 141                 else if ((tp[0] == '\\') &&
 142                                  (tp[1] == '\\'))
 143                 {
 144                         *rp++ = '\\';
 145                         tp += 2;
 146                 }
 147                 else
 148                 {
 149                         /*
 150                          * We should never get here. The first pass should not allow it.
 151                          */
 152                         ereport(ERROR,
 153                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 154                                          errmsg("invalid input syntax for type bytea")));
 155                 }
 156         }
 157
 158         PG_RETURN_BYTEA_P(result);
 159 }
 160
 161 /*
 162  *              byteaout                - converts to printable representation of byte array
 163  *
 164  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 165  *              '\\'.
 166  *
 167  *              NULL vlena should be an error--returning string with NULL for now.
 168  */
 169 Datum
 170 byteaout(PG_FUNCTION_ARGS)
 171 {
 172         bytea      *vlena = PG_GETARG_BYTEA_P(0);
 173         char       *result;
 174         char       *vp;
 175         char       *rp;
 176         int                     val;                    /* holds unprintable chars */
 177         int                     i;
 178         int                     len;
 179
 180         len = 1;                                        /* empty string has 1 char */
 181         vp = VARDATA(vlena);
 182         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 183         {
 184                 if (*vp == '\\')
 185                         len += 2;
 186                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 187                         len += 4;
 188                 else
 189                         len++;
 190         }
 191         rp = result = (char *) palloc(len);
 192         vp = VARDATA(vlena);
 193         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 194         {
 195                 if (*vp == '\\')
 196                 {
 197                         *rp++ = '\\';
 198                         *rp++ = '\\';
 199                 }
 200                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 201                 {
 202                         val = *vp;
 203                         rp[0] = '\\';
 204                         rp[3] = DIG(val & 07);
 205                         val >>= 3;
 206                         rp[2] = DIG(val & 07);
 207                         val >>= 3;
 208                         rp[1] = DIG(val & 03);
 209                         rp += 4;
 210                 }
 211                 else
 212                         *rp++ = *vp;
 213         }
 214         *rp = '\0';
 215         PG_RETURN_CSTRING(result);
 216 }
 217
 218 /*
 219  *              bytearecv                       - converts external binary format to bytea
 220  */
 221 Datum
 222 bytearecv(PG_FUNCTION_ARGS)
 223 {
 224         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 225         bytea      *result;
 226         int                     nbytes;
 227
 228         nbytes = buf->len - buf->cursor;
 229         result = (bytea *) palloc(nbytes + VARHDRSZ);
 230         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 231         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 232         PG_RETURN_BYTEA_P(result);
 233 }
 234
 235 /*
 236  *              byteasend                       - converts bytea to binary format
 237  *
 238  * This is a special case: just copy the input...
 239  */
 240 Datum
 241 byteasend(PG_FUNCTION_ARGS)
 242 {
 243         bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
 244
 245         PG_RETURN_BYTEA_P(vlena);
 246 }
 247
 248
 249 /*
 250  *              textin                  - converts "..." to internal representation
 251  */
 252 Datum
 253 textin(PG_FUNCTION_ARGS)
 254 {
 255         char       *inputText = PG_GETARG_CSTRING(0);
 256         text       *result;
 257         int                     len;
 258
 259         /* verify encoding */
 260         len = strlen(inputText);
 261         pg_verifymbstr(inputText, len, false);
 262
 263         result = (text *) palloc(len + VARHDRSZ);
 264         VARATT_SIZEP(result) = len + VARHDRSZ;
 265
 266         memcpy(VARDATA(result), inputText, len);
 267
 268         PG_RETURN_TEXT_P(result);
 269 }
 270
 271 /*
 272  *              textout                 - converts internal representation to "..."
 273  */
 274 Datum
 275 textout(PG_FUNCTION_ARGS)
 276 {
 277         text       *t = PG_GETARG_TEXT_P(0);
 278         int                     len;
 279         char       *result;
 280
 281         len = VARSIZE(t) - VARHDRSZ;
 282         result = (char *) palloc(len + 1);
 283         memcpy(result, VARDATA(t), len);
 284         result[len] = '\0';
 285
 286         PG_RETURN_CSTRING(result);
 287 }
 288
 289 /*
 290  *              textrecv                        - converts external binary format to text
 291  */
 292 Datum
 293 textrecv(PG_FUNCTION_ARGS)
 294 {
 295         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 296         text       *result;
 297         char       *str;
 298         int                     nbytes;
 299
 300         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 301
 302         /* verify encoding */
 303         pg_verifymbstr(str, nbytes, false);
 304
 305         result = (text *) palloc(nbytes + VARHDRSZ);
 306         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 307         memcpy(VARDATA(result), str, nbytes);
 308         pfree(str);
 309         PG_RETURN_TEXT_P(result);
 310 }
 311
 312 /*
 313  *              textsend                        - converts text to binary format
 314  */
 315 Datum
 316 textsend(PG_FUNCTION_ARGS)
 317 {
 318         text       *t = PG_GETARG_TEXT_P(0);
 319         StringInfoData buf;
 320
 321         pq_begintypsend(&buf);
 322         pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
 323         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 324 }
 325
 326
 327 /*
 328  *              unknownin                       - converts "..." to internal representation
 329  */
 330 Datum
 331 unknownin(PG_FUNCTION_ARGS)
 332 {
 333         char       *str = PG_GETARG_CSTRING(0);
 334
 335         /* representation is same as cstring */
 336         PG_RETURN_CSTRING(pstrdup(str));
 337 }
 338
 339 /*
 340  *              unknownout                      - converts internal representation to "..."
 341  */
 342 Datum
 343 unknownout(PG_FUNCTION_ARGS)
 344 {
 345         /* representation is same as cstring */
 346         char       *str = PG_GETARG_CSTRING(0);
 347
 348         PG_RETURN_CSTRING(pstrdup(str));
 349 }
 350
 351 /*
 352  *              unknownrecv                     - converts external binary format to unknown
 353  */
 354 Datum
 355 unknownrecv(PG_FUNCTION_ARGS)
 356 {
 357         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 358         char       *str;
 359         int                     nbytes;
 360
 361         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 362         /* representation is same as cstring */
 363         PG_RETURN_CSTRING(str);
 364 }
 365
 366 /*
 367  *              unknownsend                     - converts unknown to binary format
 368  */
 369 Datum
 370 unknownsend(PG_FUNCTION_ARGS)
 371 {
 372         /* representation is same as cstring */
 373         char       *str = PG_GETARG_CSTRING(0);
 374         StringInfoData buf;
 375
 376         pq_begintypsend(&buf);
 377         pq_sendtext(&buf, str, strlen(str));
 378         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 379 }
 380
 381
 382 /* ========== PUBLIC ROUTINES ========== */
 383
 384 /*
 385  * textlen -
 386  *        returns the logical length of a text*
 387  *         (which is less than the VARSIZE of the text*)
 388  */
 389 Datum
 390 textlen(PG_FUNCTION_ARGS)
 391 {
 392         Datum           str = PG_GETARG_DATUM(0);
 393
 394         /* try to avoid decompressing argument */
 395         PG_RETURN_INT32(text_length(str));
 396 }
 397
 398 /*
 399  * text_length -
 400  *      Does the real work for textlen()
 401  *
 402  *      This is broken out so it can be called directly by other string processing
 403  *      functions.      Note that the argument is passed as a Datum, to indicate that
 404  *      it may still be in compressed form.  We can avoid decompressing it at all
 405  *      in some cases.
 406  */
 407 static int32
 408 text_length(Datum str)
 409 {
 410         /* fastpath when max encoding length is one */
 411         if (pg_database_encoding_max_length() == 1)
 412                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 413         else
 414         {
 415                 text       *t = DatumGetTextP(str);
 416
 417                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
 418                                                                                          VARSIZE(t) - VARHDRSZ));
 419         }
 420 }
 421
 422 /*
 423  * textoctetlen -
 424  *        returns the physical length of a text*
 425  *         (which is less than the VARSIZE of the text*)
 426  */
 427 Datum
 428 textoctetlen(PG_FUNCTION_ARGS)
 429 {
 430         Datum           str = PG_GETARG_DATUM(0);
 431
 432         /* We need not detoast the input at all */
 433         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 434 }
 435
 436 /*
 437  * textcat -
 438  *        takes two text* and returns a text* that is the concatenation of
 439  *        the two.
 440  *
 441  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 442  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 443  * Allocate space for output in all cases.
 444  * XXX - thomas 1997-07-10
 445  */
 446 Datum
 447 textcat(PG_FUNCTION_ARGS)
 448 {
 449         text       *t1 = PG_GETARG_TEXT_P(0);
 450         text       *t2 = PG_GETARG_TEXT_P(1);
 451         int                     len1,
 452                                 len2,
 453                                 len;
 454         text       *result;
 455         char       *ptr;
 456
 457         len1 = VARSIZE(t1) - VARHDRSZ;
 458         if (len1 < 0)
 459                 len1 = 0;
 460
 461         len2 = VARSIZE(t2) - VARHDRSZ;
 462         if (len2 < 0)
 463                 len2 = 0;
 464
 465         len = len1 + len2 + VARHDRSZ;
 466         result = (text *) palloc(len);
 467
 468         /* Set size of result string... */
 469         VARATT_SIZEP(result) = len;
 470
 471         /* Fill data field of result string... */
 472         ptr = VARDATA(result);
 473         if (len1 > 0)
 474                 memcpy(ptr, VARDATA(t1), len1);
 475         if (len2 > 0)
 476                 memcpy(ptr + len1, VARDATA(t2), len2);
 477
 478         PG_RETURN_TEXT_P(result);
 479 }
 480
 481 /*
 482  * text_substr()
 483  * Return a substring starting at the specified position.
 484  * - thomas 1997-12-31
 485  *
 486  * Input:
 487  *      - string
 488  *      - starting position (is one-based)
 489  *      - string length
 490  *
 491  * If the starting position is zero or less, then return from the start of the string
 492  *      adjusting the length to be consistent with the "negative start" per SQL92.
 493  * If the length is less than zero, return the remaining string.
 494  *
 495  * Added multibyte support.
 496  * - Tatsuo Ishii 1998-4-21
 497  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 498  * Formerly returned the entire string; now returns a portion.
 499  * - Thomas Lockhart 1998-12-10
 500  * Now uses faster TOAST-slicing interface
 501  * - John Gray 2002-02-22
 502  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 503  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 504  * error; if E < 1, return '', not entire string). Fixed MB related bug when
 505  * S > LC and < LC + 4 sometimes garbage characters are returned.
 506  * - Joe Conway 2002-08-10
 507  */
 508 Datum
 509 text_substr(PG_FUNCTION_ARGS)
 510 {
 511         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 512                                                                         PG_GETARG_INT32(1),
 513                                                                         PG_GETARG_INT32(2),
 514                                                                         false));
 515 }
 516
 517 /*
 518  * text_substr_no_len -
 519  *        Wrapper to avoid opr_sanity failure due to
 520  *        one function accepting a different number of args.
 521  */
 522 Datum
 523 text_substr_no_len(PG_FUNCTION_ARGS)
 524 {
 525         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 526                                                                         PG_GETARG_INT32(1),
 527                                                                         -1, true));
 528 }
 529
 530 /*
 531  * text_substring -
 532  *      Does the real work for text_substr() and text_substr_no_len()
 533  *
 534  *      This is broken out so it can be called directly by other string processing
 535  *      functions.      Note that the argument is passed as a Datum, to indicate that
 536  *      it may still be in compressed/toasted form.  We can avoid detoasting all
 537  *      of it in some cases.
 538  */
 539 static text *
 540 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 541 {
 542         int32           eml = pg_database_encoding_max_length();
 543         int32           S = start;              /* start position */
 544         int32           S1;                             /* adjusted start position */
 545         int32           L1;                             /* adjusted substring length */
 546
 547         /* life is easy if the encoding max length is 1 */
 548         if (eml == 1)
 549         {
 550                 S1 = Max(S, 1);
 551
 552                 if (length_not_specified)               /* special case - get length to end of
 553                                                                                  * string */
 554                         L1 = -1;
 555                 else
 556                 {
 557                         /* end position */
 558                         int                     E = S + length;
 559
 560                         /*
 561                          * A negative value for L is the only way for the end position to
 562                          * be before the start. SQL99 says to throw an error.
 563                          */
 564                         if (E < S)
 565                                 ereport(ERROR,
 566                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 567                                                  errmsg("negative substring length not allowed")));
 568
 569                         /*
 570                          * A zero or negative value for the end position can happen if the
 571                          * start was negative or one. SQL99 says to return a zero-length
 572                          * string.
 573                          */
 574                         if (E < 1)
 575                                 return PG_STR_GET_TEXT("");
 576
 577                         L1 = E - S1;
 578                 }
 579
 580                 /*
 581                  * If the start position is past the end of the string, SQL99 says to
 582                  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
 583                  * that for us. Convert to zero-based starting position
 584                  */
 585                 return DatumGetTextPSlice(str, S1 - 1, L1);
 586         }
 587         else if (eml > 1)
 588         {
 589                 /*
 590                  * When encoding max length is > 1, we can't get LC without
 591                  * detoasting, so we'll grab a conservatively large slice now and go
 592                  * back later to do the right thing
 593                  */
 594                 int32           slice_start;
 595                 int32           slice_size;
 596                 int32           slice_strlen;
 597                 text       *slice;
 598                 int32           E1;
 599                 int32           i;
 600                 char       *p;
 601                 char       *s;
 602                 text       *ret;
 603
 604                 /*
 605                  * if S is past the end of the string, the tuple toaster will return a
 606                  * zero-length string to us
 607                  */
 608                 S1 = Max(S, 1);
 609
 610                 /*
 611                  * We need to start at position zero because there is no way to know
 612                  * in advance which byte offset corresponds to the supplied start
 613                  * position.
 614                  */
 615                 slice_start = 0;
 616
 617                 if (length_not_specified)               /* special case - get length to end of
 618                                                                                  * string */
 619                         slice_size = L1 = -1;
 620                 else
 621                 {
 622                         int                     E = S + length;
 623
 624                         /*
 625                          * A negative value for L is the only way for the end position to
 626                          * be before the start. SQL99 says to throw an error.
 627                          */
 628                         if (E < S)
 629                                 ereport(ERROR,
 630                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 631                                                  errmsg("negative substring length not allowed")));
 632
 633                         /*
 634                          * A zero or negative value for the end position can happen if the
 635                          * start was negative or one. SQL99 says to return a zero-length
 636                          * string.
 637                          */
 638                         if (E < 1)
 639                                 return PG_STR_GET_TEXT("");
 640
 641                         /*
 642                          * if E is past the end of the string, the tuple toaster will
 643                          * truncate the length for us
 644                          */
 645                         L1 = E - S1;
 646
 647                         /*
 648                          * Total slice size in bytes can't be any longer than the start
 649                          * position plus substring length times the encoding max length.
 650                          */
 651                         slice_size = (S1 + L1) * eml;
 652                 }
 653                 slice = DatumGetTextPSlice(str, slice_start, slice_size);
 654
 655                 /* see if we got back an empty string */
 656                 if ((VARSIZE(slice) - VARHDRSZ) == 0)
 657                         return PG_STR_GET_TEXT("");
 658
 659                 /* Now we can get the actual length of the slice in MB characters */
 660                 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
 661
 662                 /*
 663                  * Check that the start position wasn't > slice_strlen. If so, SQL99
 664                  * says to return a zero-length string.
 665                  */
 666                 if (S1 > slice_strlen)
 667                         return PG_STR_GET_TEXT("");
 668
 669                 /*
 670                  * Adjust L1 and E1 now that we know the slice string length. Again
 671                  * remember that S1 is one based, and slice_start is zero based.
 672                  */
 673                 if (L1 > -1)
 674                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 675                 else
 676                         E1 = slice_start + 1 + slice_strlen;
 677
 678                 /*
 679                  * Find the start position in the slice; remember S1 is not zero based
 680                  */
 681                 p = VARDATA(slice);
 682                 for (i = 0; i < S1 - 1; i++)
 683                         p += pg_mblen(p);
 684
 685                 /* hang onto a pointer to our start position */
 686                 s = p;
 687
 688                 /*
 689                  * Count the actual bytes used by the substring of the requested
 690                  * length.
 691                  */
 692                 for (i = S1; i < E1; i++)
 693                         p += pg_mblen(p);
 694
 695                 ret = (text *) palloc(VARHDRSZ + (p - s));
 696                 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
 697                 memcpy(VARDATA(ret), s, (p - s));
 698
 699                 return ret;
 700         }
 701         else
 702                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
 703
 704         /* not reached: suppress compiler warning */
 705         return NULL;
 706 }
 707
 708 /*
 709  * textpos -
 710  *        Return the position of the specified substring.
 711  *        Implements the SQL92 POSITION() function.
 712  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 713  * - thomas 1997-07-27
 714  */
 715 Datum
 716 textpos(PG_FUNCTION_ARGS)
 717 {
 718         text       *str = PG_GETARG_TEXT_P(0);
 719         text       *search_str = PG_GETARG_TEXT_P(1);
 720
 721         PG_RETURN_INT32(text_position(str, search_str, 1));
 722 }
 723
 724 /*
 725  * text_position -
 726  *      Does the real work for textpos()
 727  *
 728  * Inputs:
 729  *              t1 - string to be searched
 730  *              t2 - pattern to match within t1
 731  *              matchnum - number of the match to be found (1 is the first match)
 732  * Result:
 733  *              Character index of the first matched char, starting from 1,
 734  *              or 0 if no match.
 735  *
 736  *      This is broken out so it can be called directly by other string processing
 737  *      functions.
 738  */
 739 static int32
 740 text_position(text *t1, text *t2, int matchnum)
 741 {
 742         int                     match = 0,
 743                                 pos = 0,
 744                                 p,
 745                                 px,
 746                                 len1,
 747                                 len2;
 748
 749         if (matchnum <= 0)
 750                 return 0;                               /* result for 0th match */
 751
 752         if (VARSIZE(t2) <= VARHDRSZ)
 753                 return 1;                               /* result for empty pattern */
 754
 755         len1 = VARSIZE(t1) - VARHDRSZ;
 756         len2 = VARSIZE(t2) - VARHDRSZ;
 757
 758         if (pg_database_encoding_max_length() == 1)
 759         {
 760                 /* simple case - single byte encoding */
 761                 char       *p1,
 762                                    *p2;
 763
 764                 p1 = VARDATA(t1);
 765                 p2 = VARDATA(t2);
 766
 767                 /* no use in searching str past point where search_str will fit */
 768                 px = (len1 - len2);
 769
 770                 for (p = 0; p <= px; p++)
 771                 {
 772                         if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
 773                         {
 774                                 if (++match == matchnum)
 775                                 {
 776                                         pos = p + 1;
 777                                         break;
 778                                 }
 779                         }
 780                         p1++;
 781                 }
 782         }
 783         else
 784         {
 785                 /* not as simple - multibyte encoding */
 786                 pg_wchar   *p1,
 787                                    *p2,
 788                                    *ps1,
 789                                    *ps2;
 790
 791                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 792                 (void) pg_mb2wchar_with_len(VARDATA(t1), p1, len1);
 793                 len1 = pg_wchar_strlen(p1);
 794                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 795                 (void) pg_mb2wchar_with_len(VARDATA(t2), p2, len2);
 796                 len2 = pg_wchar_strlen(p2);
 797
 798                 /* no use in searching str past point where search_str will fit */
 799                 px = (len1 - len2);
 800
 801                 for (p = 0; p <= px; p++)
 802                 {
 803                         if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
 804                         {
 805                                 if (++match == matchnum)
 806                                 {
 807                                         pos = p + 1;
 808                                         break;
 809                                 }
 810                         }
 811                         p1++;
 812                 }
 813
 814                 pfree(ps1);
 815                 pfree(ps2);
 816         }
 817
 818         return pos;
 819 }
 820
 821 /* varstr_cmp()
 822  * Comparison function for text strings with given lengths.
 823  * Includes locale support, but must copy strings to temporary memory
 824  *      to allow null-termination for inputs to strcoll().
 825  * Returns -1, 0 or 1
 826  */
 827 int
 828 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 829 {
 830         int                     result;
 831
 832         /*
 833          * Unfortunately, there is no strncoll(), so in the non-C locale case we
 834          * have to do some memory copying.      This turns out to be significantly
 835          * slower, so we optimize the case where LC_COLLATE is C.  We also try to
 836          * optimize relatively-short strings by avoiding palloc/pfree overhead.
 837          */
 838         if (lc_collate_is_c())
 839         {
 840                 result = strncmp(arg1, arg2, Min(len1, len2));
 841                 if ((result == 0) && (len1 != len2))
 842                         result = (len1 < len2) ? -1 : 1;
 843         }
 844         else
 845         {
 846 #define STACKBUFLEN             1024
 847
 848                 char            a1buf[STACKBUFLEN];
 849                 char            a2buf[STACKBUFLEN];
 850                 char       *a1p,
 851                                    *a2p;
 852
 853 #ifdef WIN32
 854                 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
 855                 if (GetDatabaseEncoding() == PG_UTF8)
 856                 {
 857                         int                     a1len;
 858                         int                     a2len;
 859                         int                     r;
 860
 861                         if (len1 >= STACKBUFLEN / 2)
 862                         {
 863                                 a1len = len1 * 2 + 2;
 864                                 a1p = palloc(a1len);
 865                         }
 866                         else
 867                         {
 868                                 a1len = STACKBUFLEN;
 869                                 a1p = a1buf;
 870                         }
 871                         if (len2 >= STACKBUFLEN / 2)
 872                         {
 873                                 a2len = len2 * 2 + 2;
 874                                 a2p = palloc(a2len);
 875                         }
 876                         else
 877                         {
 878                                 a2len = STACKBUFLEN;
 879                                 a2p = a2buf;
 880                         }
 881
 882                         /* stupid Microsloth API does not work for zero-length input */
 883                         if (len1 == 0)
 884                                 r = 0;
 885                         else
 886                         {
 887                                 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
 888                                                                                 (LPWSTR) a1p, a1len / 2);
 889                                 if (!r)
 890                                         ereport(ERROR,
 891                                          (errmsg("could not convert string to UTF-16: error %lu",
 892                                                          GetLastError())));
 893                         }
 894                         ((LPWSTR) a1p)[r] = 0;
 895
 896                         if (len2 == 0)
 897                                 r = 0;
 898                         else
 899                         {
 900                                 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
 901                                                                                 (LPWSTR) a2p, a2len / 2);
 902                                 if (!r)
 903                                         ereport(ERROR,
 904                                          (errmsg("could not convert string to UTF-16: error %lu",
 905                                                          GetLastError())));
 906                         }
 907                         ((LPWSTR) a2p)[r] = 0;
 908
 909                         errno = 0;
 910                         result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
 911                         if (result == 2147483647)       /* _NLSCMPERROR; missing from mingw
 912                                                                                  * headers */
 913                                 ereport(ERROR,
 914                                                 (errmsg("could not compare Unicode strings: %m")));
 915
 916                         if (a1p != a1buf)
 917                                 pfree(a1p);
 918                         if (a2p != a2buf)
 919                                 pfree(a2p);
 920
 921                         return result;
 922                 }
 923 #endif   /* WIN32 */
 924
 925                 if (len1 >= STACKBUFLEN)
 926                         a1p = (char *) palloc(len1 + 1);
 927                 else
 928                         a1p = a1buf;
 929                 if (len2 >= STACKBUFLEN)
 930                         a2p = (char *) palloc(len2 + 1);
 931                 else
 932                         a2p = a2buf;
 933
 934                 memcpy(a1p, arg1, len1);
 935                 a1p[len1] = '\0';
 936                 memcpy(a2p, arg2, len2);
 937                 a2p[len2] = '\0';
 938
 939                 result = strcoll(a1p, a2p);
 940
 941                 /*
 942                  * In some locales strcoll() can claim that nonidentical strings are
 943                  * equal.  Believing that would be bad news for a number of reasons,
 944                  * so we follow Perl's lead and sort "equal" strings according to
 945                  * strcmp().
 946                  */
 947                 if (result == 0)
 948                         result = strcmp(a1p, a2p);
 949
 950                 if (a1p != a1buf)
 951                         pfree(a1p);
 952                 if (a2p != a2buf)
 953                         pfree(a2p);
 954         }
 955
 956         return result;
 957 }
 958
 959
 960 /* text_cmp()
 961  * Internal comparison function for text strings.
 962  * Returns -1, 0 or 1
 963  */
 964 static int
 965 text_cmp(text *arg1, text *arg2)
 966 {
 967         char       *a1p,
 968                            *a2p;
 969         int                     len1,
 970                                 len2;
 971
 972         a1p = VARDATA(arg1);
 973         a2p = VARDATA(arg2);
 974
 975         len1 = VARSIZE(arg1) - VARHDRSZ;
 976         len2 = VARSIZE(arg2) - VARHDRSZ;
 977
 978         return varstr_cmp(a1p, len1, a2p, len2);
 979 }
 980
 981 /*
 982  * Comparison functions for text strings.
 983  *
 984  * Note: btree indexes need these routines not to leak memory; therefore,
 985  * be careful to free working copies of toasted datums.  Most places don't
 986  * need to be so careful.
 987  */
 988
 989 Datum
 990 texteq(PG_FUNCTION_ARGS)
 991 {
 992         text       *arg1 = PG_GETARG_TEXT_P(0);
 993         text       *arg2 = PG_GETARG_TEXT_P(1);
 994         bool            result;
 995
 996         /*
 997          * Since we only care about equality or not-equality, we can avoid all
 998          * the expense of strcoll() here, and just do bitwise comparison.
 999          */
1000         if (VARSIZE(arg1) != VARSIZE(arg2))
1001                 result = false;
1002         else
1003                 result = (strncmp(VARDATA(arg1), VARDATA(arg2),
1004                                                   VARSIZE(arg1) - VARHDRSZ) == 0);
1005
1006         PG_FREE_IF_COPY(arg1, 0);
1007         PG_FREE_IF_COPY(arg2, 1);
1008
1009         PG_RETURN_BOOL(result);
1010 }
1011
1012 Datum
1013 textne(PG_FUNCTION_ARGS)
1014 {
1015         text       *arg1 = PG_GETARG_TEXT_P(0);
1016         text       *arg2 = PG_GETARG_TEXT_P(1);
1017         bool            result;
1018
1019         /*
1020          * Since we only care about equality or not-equality, we can avoid all
1021          * the expense of strcoll() here, and just do bitwise comparison.
1022          */
1023         if (VARSIZE(arg1) != VARSIZE(arg2))
1024                 result = true;
1025         else
1026                 result = (strncmp(VARDATA(arg1), VARDATA(arg2),
1027                                                   VARSIZE(arg1) - VARHDRSZ) != 0);
1028
1029         PG_FREE_IF_COPY(arg1, 0);
1030         PG_FREE_IF_COPY(arg2, 1);
1031
1032         PG_RETURN_BOOL(result);
1033 }
1034
1035 Datum
1036 text_lt(PG_FUNCTION_ARGS)
1037 {
1038         text       *arg1 = PG_GETARG_TEXT_P(0);
1039         text       *arg2 = PG_GETARG_TEXT_P(1);
1040         bool            result;
1041
1042         result = (text_cmp(arg1, arg2) < 0);
1043
1044         PG_FREE_IF_COPY(arg1, 0);
1045         PG_FREE_IF_COPY(arg2, 1);
1046
1047         PG_RETURN_BOOL(result);
1048 }
1049
1050 Datum
1051 text_le(PG_FUNCTION_ARGS)
1052 {
1053         text       *arg1 = PG_GETARG_TEXT_P(0);
1054         text       *arg2 = PG_GETARG_TEXT_P(1);
1055         bool            result;
1056
1057         result = (text_cmp(arg1, arg2) <= 0);
1058
1059         PG_FREE_IF_COPY(arg1, 0);
1060         PG_FREE_IF_COPY(arg2, 1);
1061
1062         PG_RETURN_BOOL(result);
1063 }
1064
1065 Datum
1066 text_gt(PG_FUNCTION_ARGS)
1067 {
1068         text       *arg1 = PG_GETARG_TEXT_P(0);
1069         text       *arg2 = PG_GETARG_TEXT_P(1);
1070         bool            result;
1071
1072         result = (text_cmp(arg1, arg2) > 0);
1073
1074         PG_FREE_IF_COPY(arg1, 0);
1075         PG_FREE_IF_COPY(arg2, 1);
1076
1077         PG_RETURN_BOOL(result);
1078 }
1079
1080 Datum
1081 text_ge(PG_FUNCTION_ARGS)
1082 {
1083         text       *arg1 = PG_GETARG_TEXT_P(0);
1084         text       *arg2 = PG_GETARG_TEXT_P(1);
1085         bool            result;
1086
1087         result = (text_cmp(arg1, arg2) >= 0);
1088
1089         PG_FREE_IF_COPY(arg1, 0);
1090         PG_FREE_IF_COPY(arg2, 1);
1091
1092         PG_RETURN_BOOL(result);
1093 }
1094
1095 Datum
1096 bttextcmp(PG_FUNCTION_ARGS)
1097 {
1098         text       *arg1 = PG_GETARG_TEXT_P(0);
1099         text       *arg2 = PG_GETARG_TEXT_P(1);
1100         int32           result;
1101
1102         result = text_cmp(arg1, arg2);
1103
1104         PG_FREE_IF_COPY(arg1, 0);
1105         PG_FREE_IF_COPY(arg2, 1);
1106
1107         PG_RETURN_INT32(result);
1108 }
1109
1110
1111 Datum
1112 text_larger(PG_FUNCTION_ARGS)
1113 {
1114         text       *arg1 = PG_GETARG_TEXT_P(0);
1115         text       *arg2 = PG_GETARG_TEXT_P(1);
1116         text       *result;
1117
1118         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1119
1120         PG_RETURN_TEXT_P(result);
1121 }
1122
1123 Datum
1124 text_smaller(PG_FUNCTION_ARGS)
1125 {
1126         text       *arg1 = PG_GETARG_TEXT_P(0);
1127         text       *arg2 = PG_GETARG_TEXT_P(1);
1128         text       *result;
1129
1130         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1131
1132         PG_RETURN_TEXT_P(result);
1133 }
1134
1135
1136 /*
1137  * The following operators support character-by-character comparison
1138  * of text data types, to allow building indexes suitable for LIKE
1139  * clauses.
1140  */
1141
1142 static int
1143 internal_text_pattern_compare(text *arg1, text *arg2)
1144 {
1145         int                     result;
1146
1147         result = memcmp(VARDATA(arg1), VARDATA(arg2),
1148                                         Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1149         if (result != 0)
1150                 return result;
1151         else if (VARSIZE(arg1) < VARSIZE(arg2))
1152                 return -1;
1153         else if (VARSIZE(arg1) > VARSIZE(arg2))
1154                 return 1;
1155         else
1156                 return 0;
1157 }
1158
1159
1160 Datum
1161 text_pattern_lt(PG_FUNCTION_ARGS)
1162 {
1163         text       *arg1 = PG_GETARG_TEXT_P(0);
1164         text       *arg2 = PG_GETARG_TEXT_P(1);
1165         int                     result;
1166
1167         result = internal_text_pattern_compare(arg1, arg2);
1168
1169         PG_FREE_IF_COPY(arg1, 0);
1170         PG_FREE_IF_COPY(arg2, 1);
1171
1172         PG_RETURN_BOOL(result < 0);
1173 }
1174
1175
1176 Datum
1177 text_pattern_le(PG_FUNCTION_ARGS)
1178 {
1179         text       *arg1 = PG_GETARG_TEXT_P(0);
1180         text       *arg2 = PG_GETARG_TEXT_P(1);
1181         int                     result;
1182
1183         result = internal_text_pattern_compare(arg1, arg2);
1184
1185         PG_FREE_IF_COPY(arg1, 0);
1186         PG_FREE_IF_COPY(arg2, 1);
1187
1188         PG_RETURN_BOOL(result <= 0);
1189 }
1190
1191
1192 Datum
1193 text_pattern_eq(PG_FUNCTION_ARGS)
1194 {
1195         text       *arg1 = PG_GETARG_TEXT_P(0);
1196         text       *arg2 = PG_GETARG_TEXT_P(1);
1197         int                     result;
1198
1199         if (VARSIZE(arg1) != VARSIZE(arg2))
1200                 result = 1;
1201         else
1202                 result = internal_text_pattern_compare(arg1, arg2);
1203
1204         PG_FREE_IF_COPY(arg1, 0);
1205         PG_FREE_IF_COPY(arg2, 1);
1206
1207         PG_RETURN_BOOL(result == 0);
1208 }
1209
1210
1211 Datum
1212 text_pattern_ge(PG_FUNCTION_ARGS)
1213 {
1214         text       *arg1 = PG_GETARG_TEXT_P(0);
1215         text       *arg2 = PG_GETARG_TEXT_P(1);
1216         int                     result;
1217
1218         result = internal_text_pattern_compare(arg1, arg2);
1219
1220         PG_FREE_IF_COPY(arg1, 0);
1221         PG_FREE_IF_COPY(arg2, 1);
1222
1223         PG_RETURN_BOOL(result >= 0);
1224 }
1225
1226
1227 Datum
1228 text_pattern_gt(PG_FUNCTION_ARGS)
1229 {
1230         text       *arg1 = PG_GETARG_TEXT_P(0);
1231         text       *arg2 = PG_GETARG_TEXT_P(1);
1232         int                     result;
1233
1234         result = internal_text_pattern_compare(arg1, arg2);
1235
1236         PG_FREE_IF_COPY(arg1, 0);
1237         PG_FREE_IF_COPY(arg2, 1);
1238
1239         PG_RETURN_BOOL(result > 0);
1240 }
1241
1242
1243 Datum
1244 text_pattern_ne(PG_FUNCTION_ARGS)
1245 {
1246         text       *arg1 = PG_GETARG_TEXT_P(0);
1247         text       *arg2 = PG_GETARG_TEXT_P(1);
1248         int                     result;
1249
1250         if (VARSIZE(arg1) != VARSIZE(arg2))
1251                 result = 1;
1252         else
1253                 result = internal_text_pattern_compare(arg1, arg2);
1254
1255         PG_FREE_IF_COPY(arg1, 0);
1256         PG_FREE_IF_COPY(arg2, 1);
1257
1258         PG_RETURN_BOOL(result != 0);
1259 }
1260
1261
1262 Datum
1263 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1264 {
1265         text       *arg1 = PG_GETARG_TEXT_P(0);
1266         text       *arg2 = PG_GETARG_TEXT_P(1);
1267         int                     result;
1268
1269         result = internal_text_pattern_compare(arg1, arg2);
1270
1271         PG_FREE_IF_COPY(arg1, 0);
1272         PG_FREE_IF_COPY(arg2, 1);
1273
1274         PG_RETURN_INT32(result);
1275 }
1276
1277
1278 /*-------------------------------------------------------------
1279  * byteaoctetlen
1280  *
1281  * get the number of bytes contained in an instance of type 'bytea'
1282  *-------------------------------------------------------------
1283  */
1284 Datum
1285 byteaoctetlen(PG_FUNCTION_ARGS)
1286 {
1287         Datum           str = PG_GETARG_DATUM(0);
1288
1289         /* We need not detoast the input at all */
1290         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1291 }
1292
1293 /*
1294  * byteacat -
1295  *        takes two bytea* and returns a bytea* that is the concatenation of
1296  *        the two.
1297  *
1298  * Cloned from textcat and modified as required.
1299  */
1300 Datum
1301 byteacat(PG_FUNCTION_ARGS)
1302 {
1303         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1304         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1305         int                     len1,
1306                                 len2,
1307                                 len;
1308         bytea      *result;
1309         char       *ptr;
1310
1311         len1 = VARSIZE(t1) - VARHDRSZ;
1312         if (len1 < 0)
1313                 len1 = 0;
1314
1315         len2 = VARSIZE(t2) - VARHDRSZ;
1316         if (len2 < 0)
1317                 len2 = 0;
1318
1319         len = len1 + len2 + VARHDRSZ;
1320         result = (bytea *) palloc(len);
1321
1322         /* Set size of result string... */
1323         VARATT_SIZEP(result) = len;
1324
1325         /* Fill data field of result string... */
1326         ptr = VARDATA(result);
1327         if (len1 > 0)
1328                 memcpy(ptr, VARDATA(t1), len1);
1329         if (len2 > 0)
1330                 memcpy(ptr + len1, VARDATA(t2), len2);
1331
1332         PG_RETURN_BYTEA_P(result);
1333 }
1334
1335 #define PG_STR_GET_BYTEA(str_) \
1336         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1337 /*
1338  * bytea_substr()
1339  * Return a substring starting at the specified position.
1340  * Cloned from text_substr and modified as required.
1341  *
1342  * Input:
1343  *      - string
1344  *      - starting position (is one-based)
1345  *      - string length (optional)
1346  *
1347  * If the starting position is zero or less, then return from the start of the string
1348  * adjusting the length to be consistent with the "negative start" per SQL92.
1349  * If the length is less than zero, an ERROR is thrown. If no third argument
1350  * (length) is provided, the length to the end of the string is assumed.
1351  */
1352 Datum
1353 bytea_substr(PG_FUNCTION_ARGS)
1354 {
1355         int                     S = PG_GETARG_INT32(1); /* start position */
1356         int                     S1;                             /* adjusted start position */
1357         int                     L1;                             /* adjusted substring length */
1358
1359         S1 = Max(S, 1);
1360
1361         if (fcinfo->nargs == 2)
1362         {
1363                 /*
1364                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
1365                  * the end of the string if we pass it a negative value for length.
1366                  */
1367                 L1 = -1;
1368         }
1369         else
1370         {
1371                 /* end position */
1372                 int                     E = S + PG_GETARG_INT32(2);
1373
1374                 /*
1375                  * A negative value for L is the only way for the end position to be
1376                  * before the start. SQL99 says to throw an error.
1377                  */
1378                 if (E < S)
1379                         ereport(ERROR,
1380                                         (errcode(ERRCODE_SUBSTRING_ERROR),
1381                                          errmsg("negative substring length not allowed")));
1382
1383                 /*
1384                  * A zero or negative value for the end position can happen if the
1385                  * start was negative or one. SQL99 says to return a zero-length
1386                  * string.
1387                  */
1388                 if (E < 1)
1389                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1390
1391                 L1 = E - S1;
1392         }
1393
1394         /*
1395          * If the start position is past the end of the string, SQL99 says to
1396          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
1397          * for us. Convert to zero-based starting position
1398          */
1399         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1400 }
1401
1402 /*
1403  * bytea_substr_no_len -
1404  *        Wrapper to avoid opr_sanity failure due to
1405  *        one function accepting a different number of args.
1406  */
1407 Datum
1408 bytea_substr_no_len(PG_FUNCTION_ARGS)
1409 {
1410         return bytea_substr(fcinfo);
1411 }
1412
1413 /*
1414  * byteapos -
1415  *        Return the position of the specified substring.
1416  *        Implements the SQL92 POSITION() function.
1417  * Cloned from textpos and modified as required.
1418  */
1419 Datum
1420 byteapos(PG_FUNCTION_ARGS)
1421 {
1422         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1423         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1424         int                     pos;
1425         int                     px,
1426                                 p;
1427         int                     len1,
1428                                 len2;
1429         char       *p1,
1430                            *p2;
1431
1432         if (VARSIZE(t2) <= VARHDRSZ)
1433                 PG_RETURN_INT32(1);             /* result for empty pattern */
1434
1435         len1 = VARSIZE(t1) - VARHDRSZ;
1436         len2 = VARSIZE(t2) - VARHDRSZ;
1437
1438         p1 = VARDATA(t1);
1439         p2 = VARDATA(t2);
1440
1441         pos = 0;
1442         px = (len1 - len2);
1443         for (p = 0; p <= px; p++)
1444         {
1445                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1446                 {
1447                         pos = p + 1;
1448                         break;
1449                 };
1450                 p1++;
1451         };
1452
1453         PG_RETURN_INT32(pos);
1454 }
1455
1456 /*-------------------------------------------------------------
1457  * byteaGetByte
1458  *
1459  * this routine treats "bytea" as an array of bytes.
1460  * It returns the Nth byte (a number between 0 and 255).
1461  *-------------------------------------------------------------
1462  */
1463 Datum
1464 byteaGetByte(PG_FUNCTION_ARGS)
1465 {
1466         bytea      *v = PG_GETARG_BYTEA_P(0);
1467         int32           n = PG_GETARG_INT32(1);
1468         int                     len;
1469         int                     byte;
1470
1471         len = VARSIZE(v) - VARHDRSZ;
1472
1473         if (n < 0 || n >= len)
1474                 ereport(ERROR,
1475                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1476                                  errmsg("index %d out of valid range, 0..%d",
1477                                                 n, len - 1)));
1478
1479         byte = ((unsigned char *) VARDATA(v))[n];
1480
1481         PG_RETURN_INT32(byte);
1482 }
1483
1484 /*-------------------------------------------------------------
1485  * byteaGetBit
1486  *
1487  * This routine treats a "bytea" type like an array of bits.
1488  * It returns the value of the Nth bit (0 or 1).
1489  *
1490  *-------------------------------------------------------------
1491  */
1492 Datum
1493 byteaGetBit(PG_FUNCTION_ARGS)
1494 {
1495         bytea      *v = PG_GETARG_BYTEA_P(0);
1496         int32           n = PG_GETARG_INT32(1);
1497         int                     byteNo,
1498                                 bitNo;
1499         int                     len;
1500         int                     byte;
1501
1502         len = VARSIZE(v) - VARHDRSZ;
1503
1504         if (n < 0 || n >= len * 8)
1505                 ereport(ERROR,
1506                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1507                                  errmsg("index %d out of valid range, 0..%d",
1508                                                 n, len * 8 - 1)));
1509
1510         byteNo = n / 8;
1511         bitNo = n % 8;
1512
1513         byte = ((unsigned char *) VARDATA(v))[byteNo];
1514
1515         if (byte & (1 << bitNo))
1516                 PG_RETURN_INT32(1);
1517         else
1518                 PG_RETURN_INT32(0);
1519 }
1520
1521 /*-------------------------------------------------------------
1522  * byteaSetByte
1523  *
1524  * Given an instance of type 'bytea' creates a new one with
1525  * the Nth byte set to the given value.
1526  *
1527  *-------------------------------------------------------------
1528  */
1529 Datum
1530 byteaSetByte(PG_FUNCTION_ARGS)
1531 {
1532         bytea      *v = PG_GETARG_BYTEA_P(0);
1533         int32           n = PG_GETARG_INT32(1);
1534         int32           newByte = PG_GETARG_INT32(2);
1535         int                     len;
1536         bytea      *res;
1537
1538         len = VARSIZE(v) - VARHDRSZ;
1539
1540         if (n < 0 || n >= len)
1541                 ereport(ERROR,
1542                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1543                                  errmsg("index %d out of valid range, 0..%d",
1544                                                 n, len - 1)));
1545
1546         /*
1547          * Make a copy of the original varlena.
1548          */
1549         res = (bytea *) palloc(VARSIZE(v));
1550         memcpy((char *) res, (char *) v, VARSIZE(v));
1551
1552         /*
1553          * Now set the byte.
1554          */
1555         ((unsigned char *) VARDATA(res))[n] = newByte;
1556
1557         PG_RETURN_BYTEA_P(res);
1558 }
1559
1560 /*-------------------------------------------------------------
1561  * byteaSetBit
1562  *
1563  * Given an instance of type 'bytea' creates a new one with
1564  * the Nth bit set to the given value.
1565  *
1566  *-------------------------------------------------------------
1567  */
1568 Datum
1569 byteaSetBit(PG_FUNCTION_ARGS)
1570 {
1571         bytea      *v = PG_GETARG_BYTEA_P(0);
1572         int32           n = PG_GETARG_INT32(1);
1573         int32           newBit = PG_GETARG_INT32(2);
1574         bytea      *res;
1575         int                     len;
1576         int                     oldByte,
1577                                 newByte;
1578         int                     byteNo,
1579                                 bitNo;
1580
1581         len = VARSIZE(v) - VARHDRSZ;
1582
1583         if (n < 0 || n >= len * 8)
1584                 ereport(ERROR,
1585                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1586                                  errmsg("index %d out of valid range, 0..%d",
1587                                                 n, len * 8 - 1)));
1588
1589         byteNo = n / 8;
1590         bitNo = n % 8;
1591
1592         /*
1593          * sanity check!
1594          */
1595         if (newBit != 0 && newBit != 1)
1596                 ereport(ERROR,
1597                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1598                                  errmsg("new bit must be 0 or 1")));
1599
1600         /*
1601          * Make a copy of the original varlena.
1602          */
1603         res = (bytea *) palloc(VARSIZE(v));
1604         memcpy((char *) res, (char *) v, VARSIZE(v));
1605
1606         /*
1607          * Update the byte.
1608          */
1609         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1610
1611         if (newBit == 0)
1612                 newByte = oldByte & (~(1 << bitNo));
1613         else
1614                 newByte = oldByte | (1 << bitNo);
1615
1616         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1617
1618         PG_RETURN_BYTEA_P(res);
1619 }
1620
1621
1622 /* text_name()
1623  * Converts a text type to a Name type.
1624  */
1625 Datum
1626 text_name(PG_FUNCTION_ARGS)
1627 {
1628         text       *s = PG_GETARG_TEXT_P(0);
1629         Name            result;
1630         int                     len;
1631
1632         len = VARSIZE(s) - VARHDRSZ;
1633
1634         /* Truncate oversize input */
1635         if (len >= NAMEDATALEN)
1636                 len = NAMEDATALEN - 1;
1637
1638 #ifdef STRINGDEBUG
1639         printf("text- convert string length %d (%d) ->%d\n",
1640                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1641 #endif
1642
1643         result = (Name) palloc(NAMEDATALEN);
1644         memcpy(NameStr(*result), VARDATA(s), len);
1645
1646         /* now null pad to full length... */
1647         while (len < NAMEDATALEN)
1648         {
1649                 *(NameStr(*result) + len) = '\0';
1650                 len++;
1651         }
1652
1653         PG_RETURN_NAME(result);
1654 }
1655
1656 /* name_text()
1657  * Converts a Name type to a text type.
1658  */
1659 Datum
1660 name_text(PG_FUNCTION_ARGS)
1661 {
1662         Name            s = PG_GETARG_NAME(0);
1663         text       *result;
1664         int                     len;
1665
1666         len = strlen(NameStr(*s));
1667
1668 #ifdef STRINGDEBUG
1669         printf("text- convert string length %d (%d) ->%d\n",
1670                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1671 #endif
1672
1673         result = palloc(VARHDRSZ + len);
1674         VARATT_SIZEP(result) = VARHDRSZ + len;
1675         memcpy(VARDATA(result), NameStr(*s), len);
1676
1677         PG_RETURN_TEXT_P(result);
1678 }
1679
1680
1681 /*
1682  * textToQualifiedNameList - convert a text object to list of names
1683  *
1684  * This implements the input parsing needed by nextval() and other
1685  * functions that take a text parameter representing a qualified name.
1686  * We split the name at dots, downcase if not double-quoted, and
1687  * truncate names if they're too long.
1688  */
1689 List *
1690 textToQualifiedNameList(text *textval)
1691 {
1692         char       *rawname;
1693         List       *result = NIL;
1694         List       *namelist;
1695         ListCell   *l;
1696
1697         /* Convert to C string (handles possible detoasting). */
1698         /* Note we rely on being able to modify rawname below. */
1699         rawname = DatumGetCString(DirectFunctionCall1(textout,
1700                                                                                                   PointerGetDatum(textval)));
1701
1702         if (!SplitIdentifierString(rawname, '.', &namelist))
1703                 ereport(ERROR,
1704                                 (errcode(ERRCODE_INVALID_NAME),
1705                                  errmsg("invalid name syntax")));
1706
1707         if (namelist == NIL)
1708                 ereport(ERROR,
1709                                 (errcode(ERRCODE_INVALID_NAME),
1710                                  errmsg("invalid name syntax")));
1711
1712         foreach(l, namelist)
1713         {
1714                 char       *curname = (char *) lfirst(l);
1715
1716                 result = lappend(result, makeString(pstrdup(curname)));
1717         }
1718
1719         pfree(rawname);
1720         list_free(namelist);
1721
1722         return result;
1723 }
1724
1725 /*
1726  * SplitIdentifierString --- parse a string containing identifiers
1727  *
1728  * This is the guts of textToQualifiedNameList, and is exported for use in
1729  * other situations such as parsing GUC variables.      In the GUC case, it's
1730  * important to avoid memory leaks, so the API is designed to minimize the
1731  * amount of stuff that needs to be allocated and freed.
1732  *
1733  * Inputs:
1734  *      rawstring: the input string; must be overwritable!      On return, it's
1735  *                         been modified to contain the separated identifiers.
1736  *      separator: the separator punctuation expected between identifiers
1737  *                         (typically '.' or ',').      Whitespace may also appear around
1738  *                         identifiers.
1739  * Outputs:
1740  *      namelist: filled with a palloc'd list of pointers to identifiers within
1741  *                        rawstring.  Caller should list_free() this even on error return.
1742  *
1743  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1744  *
1745  * Note that an empty string is considered okay here, though not in
1746  * textToQualifiedNameList.
1747  */
1748 bool
1749 SplitIdentifierString(char *rawstring, char separator,
1750                                           List **namelist)
1751 {
1752         char       *nextp = rawstring;
1753         bool            done = false;
1754
1755         *namelist = NIL;
1756
1757         while (isspace((unsigned char) *nextp))
1758                 nextp++;                                /* skip leading whitespace */
1759
1760         if (*nextp == '\0')
1761                 return true;                    /* allow empty string */
1762
1763         /* At the top of the loop, we are at start of a new identifier. */
1764         do
1765         {
1766                 char       *curname;
1767                 char       *endp;
1768
1769                 if (*nextp == '\"')
1770                 {
1771                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1772                         curname = nextp + 1;
1773                         for (;;)
1774                         {
1775                                 endp = strchr(nextp + 1, '\"');
1776                                 if (endp == NULL)
1777                                         return false;           /* mismatched quotes */
1778                                 if (endp[1] != '\"')
1779                                         break;          /* found end of quoted name */
1780                                 /* Collapse adjacent quotes into one quote, and look again */
1781                                 memmove(endp, endp + 1, strlen(endp));
1782                                 nextp = endp;
1783                         }
1784                         /* endp now points at the terminating quote */
1785                         nextp = endp + 1;
1786                 }
1787                 else
1788                 {
1789                         /* Unquoted name --- extends to separator or whitespace */
1790                         char       *downname;
1791                         int                     len;
1792
1793                         curname = nextp;
1794                         while (*nextp && *nextp != separator &&
1795                                    !isspace((unsigned char) *nextp))
1796                                 nextp++;
1797                         endp = nextp;
1798                         if (curname == nextp)
1799                                 return false;   /* empty unquoted name not allowed */
1800
1801                         /*
1802                          * Downcase the identifier, using same code as main lexer does.
1803                          *
1804                          * XXX because we want to overwrite the input in-place, we cannot
1805                          * support a downcasing transformation that increases the string
1806                          * length.      This is not a problem given the current implementation
1807                          * of downcase_truncate_identifier, but we'll probably have to do
1808                          * something about this someday.
1809                          */
1810                         len = endp - curname;
1811                         downname = downcase_truncate_identifier(curname, len, false);
1812                         Assert(strlen(downname) <= len);
1813                         strncpy(curname, downname, len);
1814                         pfree(downname);
1815                 }
1816
1817                 while (isspace((unsigned char) *nextp))
1818                         nextp++;                        /* skip trailing whitespace */
1819
1820                 if (*nextp == separator)
1821                 {
1822                         nextp++;
1823                         while (isspace((unsigned char) *nextp))
1824                                 nextp++;                /* skip leading whitespace for next */
1825                         /* we expect another name, so done remains false */
1826                 }
1827                 else if (*nextp == '\0')
1828                         done = true;
1829                 else
1830                         return false;           /* invalid syntax */
1831
1832                 /* Now safe to overwrite separator with a null */
1833                 *endp = '\0';
1834
1835                 /* Truncate name if it's overlength */
1836                 truncate_identifier(curname, strlen(curname), false);
1837
1838                 /*
1839                  * Finished isolating current name --- add it to list
1840                  */
1841                 *namelist = lappend(*namelist, curname);
1842
1843                 /* Loop back if we didn't reach end of string */
1844         } while (!done);
1845
1846         return true;
1847 }
1848
1849
1850 /*****************************************************************************
1851  *      Comparison Functions used for bytea
1852  *
1853  * Note: btree indexes need these routines not to leak memory; therefore,
1854  * be careful to free working copies of toasted datums.  Most places don't
1855  * need to be so careful.
1856  *****************************************************************************/
1857
1858 Datum
1859 byteaeq(PG_FUNCTION_ARGS)
1860 {
1861         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1862         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1863         int                     len1,
1864                                 len2;
1865         bool            result;
1866
1867         len1 = VARSIZE(arg1) - VARHDRSZ;
1868         len2 = VARSIZE(arg2) - VARHDRSZ;
1869
1870         /* fast path for different-length inputs */
1871         if (len1 != len2)
1872                 result = false;
1873         else
1874                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1875
1876         PG_FREE_IF_COPY(arg1, 0);
1877         PG_FREE_IF_COPY(arg2, 1);
1878
1879         PG_RETURN_BOOL(result);
1880 }
1881
1882 Datum
1883 byteane(PG_FUNCTION_ARGS)
1884 {
1885         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1886         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1887         int                     len1,
1888                                 len2;
1889         bool            result;
1890
1891         len1 = VARSIZE(arg1) - VARHDRSZ;
1892         len2 = VARSIZE(arg2) - VARHDRSZ;
1893
1894         /* fast path for different-length inputs */
1895         if (len1 != len2)
1896                 result = true;
1897         else
1898                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1899
1900         PG_FREE_IF_COPY(arg1, 0);
1901         PG_FREE_IF_COPY(arg2, 1);
1902
1903         PG_RETURN_BOOL(result);
1904 }
1905
1906 Datum
1907 bytealt(PG_FUNCTION_ARGS)
1908 {
1909         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1910         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1911         int                     len1,
1912                                 len2;
1913         int                     cmp;
1914
1915         len1 = VARSIZE(arg1) - VARHDRSZ;
1916         len2 = VARSIZE(arg2) - VARHDRSZ;
1917
1918         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1919
1920         PG_FREE_IF_COPY(arg1, 0);
1921         PG_FREE_IF_COPY(arg2, 1);
1922
1923         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1924 }
1925
1926 Datum
1927 byteale(PG_FUNCTION_ARGS)
1928 {
1929         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1930         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1931         int                     len1,
1932                                 len2;
1933         int                     cmp;
1934
1935         len1 = VARSIZE(arg1) - VARHDRSZ;
1936         len2 = VARSIZE(arg2) - VARHDRSZ;
1937
1938         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1939
1940         PG_FREE_IF_COPY(arg1, 0);
1941         PG_FREE_IF_COPY(arg2, 1);
1942
1943         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1944 }
1945
1946 Datum
1947 byteagt(PG_FUNCTION_ARGS)
1948 {
1949         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1950         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1951         int                     len1,
1952                                 len2;
1953         int                     cmp;
1954
1955         len1 = VARSIZE(arg1) - VARHDRSZ;
1956         len2 = VARSIZE(arg2) - VARHDRSZ;
1957
1958         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1959
1960         PG_FREE_IF_COPY(arg1, 0);
1961         PG_FREE_IF_COPY(arg2, 1);
1962
1963         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1964 }
1965
1966 Datum
1967 byteage(PG_FUNCTION_ARGS)
1968 {
1969         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1970         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1971         int                     len1,
1972                                 len2;
1973         int                     cmp;
1974
1975         len1 = VARSIZE(arg1) - VARHDRSZ;
1976         len2 = VARSIZE(arg2) - VARHDRSZ;
1977
1978         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1979
1980         PG_FREE_IF_COPY(arg1, 0);
1981         PG_FREE_IF_COPY(arg2, 1);
1982
1983         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1984 }
1985
1986 Datum
1987 byteacmp(PG_FUNCTION_ARGS)
1988 {
1989         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1990         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1991         int                     len1,
1992                                 len2;
1993         int                     cmp;
1994
1995         len1 = VARSIZE(arg1) - VARHDRSZ;
1996         len2 = VARSIZE(arg2) - VARHDRSZ;
1997
1998         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1999         if ((cmp == 0) && (len1 != len2))
2000                 cmp = (len1 < len2) ? -1 : 1;
2001
2002         PG_FREE_IF_COPY(arg1, 0);
2003         PG_FREE_IF_COPY(arg2, 1);
2004
2005         PG_RETURN_INT32(cmp);
2006 }
2007
2008 /*
2009  * appendStringInfoText
2010  *
2011  * Append a text to str.
2012  * Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
2013  */
2014 static void
2015 appendStringInfoText(StringInfo str, const text *t)
2016 {
2017         appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
2018 }
2019
2020 /*
2021  * replace_text
2022  * replace all occurrences of 'old_sub_str' in 'orig_str'
2023  * with 'new_sub_str' to form 'new_str'
2024  *
2025  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2026  * otherwise returns 'new_str'
2027  */
2028 Datum
2029 replace_text(PG_FUNCTION_ARGS)
2030 {
2031         text       *src_text = PG_GETARG_TEXT_P(0);
2032         text       *from_sub_text = PG_GETARG_TEXT_P(1);
2033         text       *to_sub_text = PG_GETARG_TEXT_P(2);
2034         int                     src_text_len = TEXTLEN(src_text);
2035         int                     from_sub_text_len = TEXTLEN(from_sub_text);
2036         text       *left_text;
2037         text       *right_text;
2038         text       *buf_text;
2039         text       *ret_text;
2040         int                     curr_posn;
2041         StringInfoData str;
2042
2043         if (src_text_len == 0 || from_sub_text_len == 0)
2044                 PG_RETURN_TEXT_P(src_text);
2045
2046         curr_posn = TEXTPOS(src_text, from_sub_text);
2047
2048         /* When the from_sub_text is not found, there is nothing to do. */
2049         if (curr_posn == 0)
2050                 PG_RETURN_TEXT_P(src_text);
2051
2052         initStringInfo(&str);
2053         buf_text = src_text;
2054
2055         while (curr_posn > 0)
2056         {
2057                 left_text = text_substring(PointerGetDatum(buf_text),
2058                                                                    1, curr_posn - 1, false);
2059                 right_text = text_substring(PointerGetDatum(buf_text),
2060                                                                         curr_posn + from_sub_text_len, -1, true);
2061
2062                 appendStringInfoText(&str, left_text);
2063                 appendStringInfoText(&str, to_sub_text);
2064
2065                 if (buf_text != src_text)
2066                         pfree(buf_text);
2067                 pfree(left_text);
2068                 buf_text = right_text;
2069                 curr_posn = TEXTPOS(buf_text, from_sub_text);
2070         }
2071
2072         appendStringInfoText(&str, buf_text);
2073         if (buf_text != src_text)
2074                 pfree(buf_text);
2075
2076         ret_text = PG_STR_GET_TEXT(str.data);
2077         pfree(str.data);
2078
2079         PG_RETURN_TEXT_P(ret_text);
2080 }
2081
2082 /*
2083  * check_replace_text_has_escape_char
2084  *
2085  * check whether replace_text contains escape char.
2086  */
2087 static bool
2088 check_replace_text_has_escape_char(const text *replace_text)
2089 {
2090         const char *p = VARDATA(replace_text);
2091         const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2092
2093         if (pg_database_encoding_max_length() == 1)
2094         {
2095                 for (; p < p_end; p++)
2096                 {
2097                         if (*p == '\\')
2098                                 return true;
2099                 }
2100         }
2101         else
2102         {
2103                 for (; p < p_end; p += pg_mblen(p))
2104                 {
2105                         if (*p == '\\')
2106                                 return true;
2107                 }
2108         }
2109
2110         return false;
2111 }
2112
2113 /*
2114  * appendStringInfoRegexpSubstr
2115  *
2116  * Append replace_text to str, substituting regexp back references for
2117  * \n escapes.
2118  */
2119 static void
2120 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2121                                                          regmatch_t *pmatch, text *src_text)
2122 {
2123         const char *p = VARDATA(replace_text);
2124         const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2125         int                     eml = pg_database_encoding_max_length();
2126
2127         for (;;)
2128         {
2129                 const char *chunk_start = p;
2130                 int                     so;
2131                 int                     eo;
2132
2133                 /* Find next escape char. */
2134                 if (eml == 1)
2135                 {
2136                         for (; p < p_end && *p != '\\'; p++)
2137                                  /* nothing */ ;
2138                 }
2139                 else
2140                 {
2141                         for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2142                                  /* nothing */ ;
2143                 }
2144
2145                 /* Copy the text we just scanned over, if any. */
2146                 if (p > chunk_start)
2147                         appendBinaryStringInfo(str, chunk_start, p - chunk_start);
2148
2149                 /* Done if at end of string, else advance over escape char. */
2150                 if (p >= p_end)
2151                         break;
2152                 p++;
2153
2154                 if (p >= p_end)
2155                 {
2156                         /* Escape at very end of input.  Treat same as unexpected char */
2157                         appendStringInfoChar(str, '\\');
2158                         break;
2159                 }
2160
2161                 if (*p >= '1' && *p <= '9')
2162                 {
2163                         /* Use the back reference of regexp. */
2164                         int                     idx = *p - '0';
2165
2166                         so = pmatch[idx].rm_so;
2167                         eo = pmatch[idx].rm_eo;
2168                         p++;
2169                 }
2170                 else if (*p == '&')
2171                 {
2172                         /* Use the entire matched string. */
2173                         so = pmatch[0].rm_so;
2174                         eo = pmatch[0].rm_eo;
2175                         p++;
2176                 }
2177                 else if (*p == '\\')
2178                 {
2179                         /* \\ means transfer one \ to output. */
2180                         appendStringInfoChar(str, '\\');
2181                         p++;
2182                         continue;
2183                 }
2184                 else
2185                 {
2186                         /*
2187                          * If escape char is not followed by any expected char, just treat
2188                          * it as ordinary data to copy.  (XXX would it be better to throw
2189                          * an error?)
2190                          */
2191                         appendStringInfoChar(str, '\\');
2192                         continue;
2193                 }
2194
2195                 if (so != -1 && eo != -1)
2196                 {
2197                         /*
2198                          * Copy the text that is back reference of regexp.      Because so and
2199                          * eo are counted in characters not bytes, it's easiest to use
2200                          * text_substring to pull out the correct chunk of text.
2201                          */
2202                         text       *append_text;
2203
2204                         append_text = text_substring(PointerGetDatum(src_text),
2205                                                                                  so + 1, (eo - so), false);
2206                         appendStringInfoText(str, append_text);
2207                         pfree(append_text);
2208                 }
2209         }
2210 }
2211
2212 #define REGEXP_REPLACE_BACKREF_CNT              10
2213
2214 /*
2215  * replace_text_regexp
2216  *
2217  * replace text that matches to regexp in src_text to replace_text.
2218  *
2219  * Note: to avoid having to include regex.h in builtins.h, we declare
2220  * the regexp argument as void *, but really it's regex_t *.
2221  */
2222 text *
2223 replace_text_regexp(text *src_text, void *regexp,
2224                                         text *replace_text, bool glob)
2225 {
2226         text       *ret_text;
2227         regex_t    *re = (regex_t *) regexp;
2228         int                     src_text_len = VARSIZE(src_text) - VARHDRSZ;
2229         StringInfoData  buf;
2230         regmatch_t      pmatch[REGEXP_REPLACE_BACKREF_CNT];
2231         pg_wchar   *data;
2232         size_t          data_len;
2233         int                     search_start;
2234         int                     data_pos;
2235         bool            have_escape;
2236
2237         initStringInfo(&buf);
2238
2239         /* Convert data string to wide characters. */
2240         data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2241         data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
2242
2243         /* Check whether replace_text has escape char. */
2244         have_escape = check_replace_text_has_escape_char(replace_text);
2245
2246         for (search_start = data_pos = 0; search_start <= data_len;)
2247         {
2248                 int regexec_result;
2249
2250                 regexec_result = pg_regexec(re,
2251                                                                         data,
2252                                                                         data_len,
2253                                                                         search_start,
2254                                                                         NULL,           /* no details */
2255                                                                         REGEXP_REPLACE_BACKREF_CNT,
2256                                                                         pmatch,
2257                                                                         0);
2258
2259                 if (regexec_result == REG_NOMATCH)
2260                         break;
2261
2262                 if (regexec_result != REG_OKAY)
2263                 {
2264                         char            errMsg[100];
2265
2266                         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2267                         ereport(ERROR,
2268                                         (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2269                                          errmsg("regular expression failed: %s", errMsg)));
2270                 }
2271
2272                 /*
2273                  * Copy the text to the left of the match position.  Because we are
2274                  * working with character not byte indexes, it's easiest to use
2275                  * text_substring to pull out the needed data.
2276                  */
2277                 if (pmatch[0].rm_so - data_pos > 0)
2278                 {
2279                         text       *left_text;
2280
2281                         left_text = text_substring(PointerGetDatum(src_text),
2282                                                                            data_pos + 1,
2283                                                                            pmatch[0].rm_so - data_pos,
2284                                                                            false);
2285                         appendStringInfoText(&buf, left_text);
2286                         pfree(left_text);
2287                 }
2288
2289                 /*
2290                  * Copy the replace_text. Process back references when the
2291                  * replace_text has escape characters.
2292                  */
2293                 if (have_escape)
2294                         appendStringInfoRegexpSubstr(&buf, replace_text, pmatch, src_text);
2295                 else
2296                         appendStringInfoText(&buf, replace_text);
2297
2298                 search_start = data_pos = pmatch[0].rm_eo;
2299
2300                 /*
2301                  * When global option is off, replace the first instance only.
2302                  */
2303                 if (!glob)
2304                         break;
2305
2306                 /*
2307                  * Search from next character when the matching text is zero width.
2308                  */
2309                 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2310                         search_start++;
2311         }
2312
2313         /*
2314          * Copy the text to the right of the last match.
2315          */
2316         if (data_pos < data_len)
2317         {
2318                 text       *right_text;
2319
2320                 right_text = text_substring(PointerGetDatum(src_text),
2321                                                                         data_pos + 1, -1, true);
2322                 appendStringInfoText(&buf, right_text);
2323                 pfree(right_text);
2324         }
2325
2326         ret_text = PG_STR_GET_TEXT(buf.data);
2327         pfree(buf.data);
2328         pfree(data);
2329
2330         return ret_text;
2331 }
2332
2333 /*
2334  * split_text
2335  * parse input string
2336  * return ord item (1 based)
2337  * based on provided field separator
2338  */
2339 Datum
2340 split_text(PG_FUNCTION_ARGS)
2341 {
2342         text       *inputstring = PG_GETARG_TEXT_P(0);
2343         text       *fldsep = PG_GETARG_TEXT_P(1);
2344         int                     fldnum = PG_GETARG_INT32(2);
2345         int                     inputstring_len = TEXTLEN(inputstring);
2346         int                     fldsep_len = TEXTLEN(fldsep);
2347         int                     start_posn;
2348         int                     end_posn;
2349         text       *result_text;
2350
2351         /* field number is 1 based */
2352         if (fldnum < 1)
2353                 ereport(ERROR,
2354                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2355                                  errmsg("field position must be greater than zero")));
2356
2357         /* return empty string for empty input string */
2358         if (inputstring_len < 1)
2359                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2360
2361         /* empty field separator */
2362         if (fldsep_len < 1)
2363         {
2364                 /* if first field, return input string, else empty string */
2365                 if (fldnum == 1)
2366                         PG_RETURN_TEXT_P(inputstring);
2367                 else
2368                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2369         }
2370
2371         start_posn = text_position(inputstring, fldsep, fldnum - 1);
2372         end_posn = text_position(inputstring, fldsep, fldnum);
2373
2374         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
2375         {
2376                 /* if first field, return input string, else empty string */
2377                 if (fldnum == 1)
2378                         PG_RETURN_TEXT_P(inputstring);
2379                 else
2380                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2381         }
2382         else if (start_posn == 0)
2383         {
2384                 /* first field requested */
2385                 result_text = LEFT(inputstring, fldsep);
2386                 PG_RETURN_TEXT_P(result_text);
2387         }
2388         else if (end_posn == 0)
2389         {
2390                 /* last field requested */
2391                 result_text = text_substring(PointerGetDatum(inputstring),
2392                                                                          start_posn + fldsep_len,
2393                                                                          -1, true);
2394                 PG_RETURN_TEXT_P(result_text);
2395         }
2396         else
2397         {
2398                 /* interior field requested */
2399                 result_text = text_substring(PointerGetDatum(inputstring),
2400                                                                          start_posn + fldsep_len,
2401                                                                          end_posn - start_posn - fldsep_len,
2402                                                                          false);
2403                 PG_RETURN_TEXT_P(result_text);
2404         }
2405 }
2406
2407 /*
2408  * text_to_array
2409  * parse input string
2410  * return text array of elements
2411  * based on provided field separator
2412  */
2413 Datum
2414 text_to_array(PG_FUNCTION_ARGS)
2415 {
2416         text       *inputstring = PG_GETARG_TEXT_P(0);
2417         text       *fldsep = PG_GETARG_TEXT_P(1);
2418         int                     inputstring_len = TEXTLEN(inputstring);
2419         int                     fldsep_len = TEXTLEN(fldsep);
2420         int                     fldnum;
2421         int                     start_posn;
2422         int                     end_posn;
2423         text       *result_text;
2424         ArrayBuildState *astate = NULL;
2425
2426         /* return NULL for empty input string */
2427         if (inputstring_len < 1)
2428                 PG_RETURN_NULL();
2429
2430         /*
2431          * empty field separator return one element, 1D, array using the input
2432          * string
2433          */
2434         if (fldsep_len < 1)
2435                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2436                                                                                    CStringGetDatum(inputstring), 1));
2437
2438         /* start with end position holding the initial start position */
2439         end_posn = 0;
2440         for (fldnum = 1;; fldnum++) /* field number is 1 based */
2441         {
2442                 Datum           dvalue;
2443                 bool            disnull = false;
2444
2445                 start_posn = end_posn;
2446                 end_posn = text_position(inputstring, fldsep, fldnum);
2447
2448                 if ((start_posn == 0) && (end_posn == 0))               /* fldsep not found */
2449                 {
2450                         if (fldnum == 1)
2451                         {
2452                                 /*
2453                                  * first element return one element, 1D, array using the input
2454                                  * string
2455                                  */
2456                                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2457                                                                                    CStringGetDatum(inputstring), 1));
2458                         }
2459                         else
2460                         {
2461                                 /* otherwise create array and exit */
2462                                 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2463                                                                                                           CurrentMemoryContext));
2464                         }
2465                 }
2466                 else if (start_posn == 0)
2467                 {
2468                         /* first field requested */
2469                         result_text = LEFT(inputstring, fldsep);
2470                 }
2471                 else if (end_posn == 0)
2472                 {
2473                         /* last field requested */
2474                         result_text = text_substring(PointerGetDatum(inputstring),
2475                                                                                  start_posn + fldsep_len,
2476                                                                                  -1, true);
2477                 }
2478                 else
2479                 {
2480                         /* interior field requested */
2481                         result_text = text_substring(PointerGetDatum(inputstring),
2482                                                                                  start_posn + fldsep_len,
2483                                                                                  end_posn - start_posn - fldsep_len,
2484                                                                                  false);
2485                 }
2486
2487                 /* stash away current value */
2488                 dvalue = PointerGetDatum(result_text);
2489                 astate = accumArrayResult(astate, dvalue,
2490                                                                   disnull, TEXTOID,
2491                                                                   CurrentMemoryContext);
2492         }
2493
2494         /* never reached -- keep compiler quiet */
2495         PG_RETURN_NULL();
2496 }
2497
2498 /*
2499  * array_to_text
2500  * concatenate Cstring representation of input array elements
2501  * using provided field separator
2502  */
2503 Datum
2504 array_to_text(PG_FUNCTION_ARGS)
2505 {
2506         ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
2507         char       *fldsep = PG_TEXTARG_GET_STR(1);
2508         int                     nitems,
2509                            *dims,
2510                                 ndims;
2511         Oid                     element_type;
2512         int                     typlen;
2513         bool            typbyval;
2514         char            typalign;
2515         StringInfoData  buf;
2516         bool            printed = false;
2517         char       *p;
2518         bits8      *bitmap;
2519         int                     bitmask;
2520         int                     i;
2521         ArrayMetaState *my_extra;
2522
2523         ndims = ARR_NDIM(v);
2524         dims = ARR_DIMS(v);
2525         nitems = ArrayGetNItems(ndims, dims);
2526
2527         /* if there are no elements, return an empty string */
2528         if (nitems == 0)
2529                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2530
2531         element_type = ARR_ELEMTYPE(v);
2532         initStringInfo(&buf);
2533
2534         /*
2535          * We arrange to look up info about element type, including its output
2536          * conversion proc, only once per series of calls, assuming the element
2537          * type doesn't change underneath us.
2538          */
2539         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2540         if (my_extra == NULL)
2541         {
2542                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2543                                                                                                           sizeof(ArrayMetaState));
2544                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2545                 my_extra->element_type = ~element_type;
2546         }
2547
2548         if (my_extra->element_type != element_type)
2549         {
2550                 /*
2551                  * Get info about element type, including its output conversion proc
2552                  */
2553                 get_type_io_data(element_type, IOFunc_output,
2554                                                  &my_extra->typlen, &my_extra->typbyval,
2555                                                  &my_extra->typalign, &my_extra->typdelim,
2556                                                  &my_extra->typioparam, &my_extra->typiofunc);
2557                 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2558                                           fcinfo->flinfo->fn_mcxt);
2559                 my_extra->element_type = element_type;
2560         }
2561         typlen = my_extra->typlen;
2562         typbyval = my_extra->typbyval;
2563         typalign = my_extra->typalign;
2564
2565         p = ARR_DATA_PTR(v);
2566         bitmap = ARR_NULLBITMAP(v);
2567         bitmask = 1;
2568
2569         for (i = 0; i < nitems; i++)
2570         {
2571                 Datum           itemvalue;
2572                 char       *value;
2573
2574                 /* Get source element, checking for NULL */
2575                 if (bitmap && (*bitmap & bitmask) == 0)
2576                 {
2577                         /* we ignore nulls */
2578                 }
2579                 else
2580                 {
2581                         itemvalue = fetch_att(p, typbyval, typlen);
2582
2583                         value = DatumGetCString(FunctionCall1(&my_extra->proc,
2584                                                                                                   itemvalue));
2585
2586                         if (printed)
2587                                 appendStringInfo(&buf, "%s%s", fldsep, value);
2588                         else
2589                                 appendStringInfoString(&buf, value);
2590                         printed = true;
2591
2592                         p = att_addlength(p, typlen, PointerGetDatum(p));
2593                         p = (char *) att_align(p, typalign);
2594                 }
2595
2596                 /* advance bitmap pointer if any */
2597                 if (bitmap)
2598                 {
2599                         bitmask <<= 1;
2600                         if (bitmask == 0x100)
2601                         {
2602                                 bitmap++;
2603                                 bitmask = 1;
2604                         }
2605                 }
2606         }
2607
2608         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(buf.data));
2609 }
2610
2611 #define HEXBASE 16
2612 /*
2613  * Convert a int32 to a string containing a base 16 (hex) representation of
2614  * the number.
2615  */
2616 Datum
2617 to_hex32(PG_FUNCTION_ARGS)
2618 {
2619         uint32          value = (uint32) PG_GETARG_INT32(0);
2620         text       *result_text;
2621         char       *ptr;
2622         const char *digits = "0123456789abcdef";
2623         char            buf[32];                /* bigger than needed, but reasonable */
2624
2625         ptr = buf + sizeof(buf) - 1;
2626         *ptr = '\0';
2627
2628         do
2629         {
2630                 *--ptr = digits[value % HEXBASE];
2631                 value /= HEXBASE;
2632         } while (ptr > buf && value);
2633
2634         result_text = PG_STR_GET_TEXT(ptr);
2635         PG_RETURN_TEXT_P(result_text);
2636 }
2637
2638 /*
2639  * Convert a int64 to a string containing a base 16 (hex) representation of
2640  * the number.
2641  */
2642 Datum
2643 to_hex64(PG_FUNCTION_ARGS)
2644 {
2645         uint64          value = (uint64) PG_GETARG_INT64(0);
2646         text       *result_text;
2647         char       *ptr;
2648         const char *digits = "0123456789abcdef";
2649         char            buf[32];                /* bigger than needed, but reasonable */
2650
2651         ptr = buf + sizeof(buf) - 1;
2652         *ptr = '\0';
2653
2654         do
2655         {
2656                 *--ptr = digits[value % HEXBASE];
2657                 value /= HEXBASE;
2658         } while (ptr > buf && value);
2659
2660         result_text = PG_STR_GET_TEXT(ptr);
2661         PG_RETURN_TEXT_P(result_text);
2662 }
2663
2664 /*
2665  * Create an md5 hash of a text string and return it as hex
2666  *
2667  * md5 produces a 16 byte (128 bit) hash; double it for hex
2668  */
2669 #define MD5_HASH_LEN  32
2670
2671 Datum
2672 md5_text(PG_FUNCTION_ARGS)
2673 {
2674         text       *in_text = PG_GETARG_TEXT_P(0);
2675         size_t          len;
2676         char            hexsum[MD5_HASH_LEN + 1];
2677         text       *result_text;
2678
2679         /* Calculate the length of the buffer using varlena metadata */
2680         len = VARSIZE(in_text) - VARHDRSZ;
2681
2682         /* get the hash result */
2683         if (pg_md5_hash(VARDATA(in_text), len, hexsum) == false)
2684                 ereport(ERROR,
2685                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2686                                  errmsg("out of memory")));
2687
2688         /* convert to text and return it */
2689         result_text = PG_STR_GET_TEXT(hexsum);
2690         PG_RETURN_TEXT_P(result_text);
2691 }
2692
2693 /*
2694  * Create an md5 hash of a bytea field and return it as a hex string:
2695  * 16-byte md5 digest is represented in 32 hex characters.
2696  */
2697 Datum
2698 md5_bytea(PG_FUNCTION_ARGS)
2699 {
2700         bytea      *in = PG_GETARG_BYTEA_P(0);
2701         size_t          len;
2702         char            hexsum[MD5_HASH_LEN + 1];
2703         text       *result_text;
2704
2705         len = VARSIZE(in) - VARHDRSZ;
2706         if (pg_md5_hash(VARDATA(in), len, hexsum) == false)
2707                 ereport(ERROR,
2708                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2709                                  errmsg("out of memory")));
2710
2711         result_text = PG_STR_GET_TEXT(hexsum);
2712         PG_RETURN_TEXT_P(result_text);
2713 }
2714
2715 /*
2716  * Return the size of a datum, possibly compressed
2717  *
2718  * Works on any data type
2719  */
2720 Datum
2721 pg_column_size(PG_FUNCTION_ARGS)
2722 {
2723         Datum           value = PG_GETARG_DATUM(0);
2724         int32           result;
2725         int                     typlen;
2726
2727         /* On first call, get the input type's typlen, and save at *fn_extra */
2728         if (fcinfo->flinfo->fn_extra == NULL)
2729         {
2730                 /* Lookup the datatype of the supplied argument */
2731                 Oid                     argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
2732
2733                 typlen = get_typlen(argtypeid);
2734                 if (typlen == 0)                /* should not happen */
2735                         elog(ERROR, "cache lookup failed for type %u", argtypeid);
2736
2737                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2738                                                                                                           sizeof(int));
2739                 *((int *) fcinfo->flinfo->fn_extra) = typlen;
2740         }
2741         else
2742                 typlen = *((int *) fcinfo->flinfo->fn_extra);
2743
2744         if (typlen == -1)
2745         {
2746                 /* varlena type, possibly toasted */
2747                 result = toast_datum_size(value);
2748         }
2749         else if (typlen == -2)
2750         {
2751                 /* cstring */
2752                 result = strlen(DatumGetCString(value)) + 1;
2753         }
2754         else
2755         {
2756                 /* ordinary fixed-width type */
2757                 result = typlen;
2758         }
2759
2760         PG_RETURN_INT32(result);
2761 }