granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.129 2005/07/21 04:41:43 momjian Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "access/tuptoaster.h"
  20 #include "catalog/pg_type.h"
  21 #include "lib/stringinfo.h"
  22 #include "libpq/crypt.h"
  23 #include "libpq/pqformat.h"
  24 #include "mb/pg_wchar.h"
  25 #include "miscadmin.h"
  26 #include "parser/scansup.h"
  27 #include "utils/array.h"
  28 #include "utils/builtins.h"
  29 #include "utils/lsyscache.h"
  30 #include "utils/pg_locale.h"
  31 #include "regex/regex.h"
  32
  33
  34 typedef struct varlena unknown;
  35
  36 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
  37 #define DatumGetUnknownPCopy(X)         ((unknown *) PG_DETOAST_DATUM_COPY(X))
  38 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
  39 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
  40 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
  41
  42 #define PG_TEXTARG_GET_STR(arg_) \
  43         DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
  44 #define PG_TEXT_GET_STR(textp_) \
  45         DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
  46 #define PG_STR_GET_TEXT(str_) \
  47         DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
  48 #define TEXTLEN(textp) \
  49         text_length(PointerGetDatum(textp))
  50 #define TEXTPOS(buf_text, from_sub_text) \
  51         text_position(buf_text, from_sub_text, 1)
  52 #define LEFT(buf_text, from_sub_text) \
  53         text_substring(PointerGetDatum(buf_text), \
  54                                         1, \
  55                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
  56
  57 static int      text_cmp(text *arg1, text *arg2);
  58 static int32 text_length(Datum str);
  59 static int32 text_position(text *t1, text *t2, int matchnum);
  60 static text *text_substring(Datum str,
  61                            int32 start,
  62                            int32 length,
  63                            bool length_not_specified);
  64
  65 static void appendStringInfoText(StringInfo str, const text *t);
  66
  67
  68 /*****************************************************************************
  69  *       USER I/O ROUTINES                                                                                                               *
  70  *****************************************************************************/
  71
  72
  73 #define VAL(CH)                 ((CH) - '0')
  74 #define DIG(VAL)                ((VAL) + '0')
  75
  76 /*
  77  *              byteain                 - converts from printable representation of byte array
  78  *
  79  *              Non-printable characters must be passed as '\nnn' (octal) and are
  80  *              converted to internal form.  '\' must be passed as '\\'.
  81  *              ereport(ERROR, ...) if bad form.
  82  *
  83  *              BUGS:
  84  *                              The input is scaned twice.
  85  *                              The error checking of input is minimal.
  86  */
  87 Datum
  88 byteain(PG_FUNCTION_ARGS)
  89 {
  90         char       *inputText = PG_GETARG_CSTRING(0);
  91         char       *tp;
  92         char       *rp;
  93         int                     byte;
  94         bytea      *result;
  95
  96         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
  97         {
  98                 if (tp[0] != '\\')
  99                         tp++;
 100                 else if ((tp[0] == '\\') &&
 101                                  (tp[1] >= '0' && tp[1] <= '3') &&
 102                                  (tp[2] >= '0' && tp[2] <= '7') &&
 103                                  (tp[3] >= '0' && tp[3] <= '7'))
 104                         tp += 4;
 105                 else if ((tp[0] == '\\') &&
 106                                  (tp[1] == '\\'))
 107                         tp += 2;
 108                 else
 109                 {
 110                         /*
 111                          * one backslash, not followed by 0 or ### valid octal
 112                          */
 113                         ereport(ERROR,
 114                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 115                                          errmsg("invalid input syntax for type bytea")));
 116                 }
 117         }
 118
 119         byte += VARHDRSZ;
 120         result = (bytea *) palloc(byte);
 121         VARATT_SIZEP(result) = byte;    /* set varlena length */
 122
 123         tp = inputText;
 124         rp = VARDATA(result);
 125         while (*tp != '\0')
 126         {
 127                 if (tp[0] != '\\')
 128                         *rp++ = *tp++;
 129                 else if ((tp[0] == '\\') &&
 130                                  (tp[1] >= '0' && tp[1] <= '3') &&
 131                                  (tp[2] >= '0' && tp[2] <= '7') &&
 132                                  (tp[3] >= '0' && tp[3] <= '7'))
 133                 {
 134                         byte = VAL(tp[1]);
 135                         byte <<= 3;
 136                         byte += VAL(tp[2]);
 137                         byte <<= 3;
 138                         *rp++ = byte + VAL(tp[3]);
 139                         tp += 4;
 140                 }
 141                 else if ((tp[0] == '\\') &&
 142                                  (tp[1] == '\\'))
 143                 {
 144                         *rp++ = '\\';
 145                         tp += 2;
 146                 }
 147                 else
 148                 {
 149                         /*
 150                          * We should never get here. The first pass should not allow
 151                          * it.
 152                          */
 153                         ereport(ERROR,
 154                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 155                                          errmsg("invalid input syntax for type bytea")));
 156                 }
 157         }
 158
 159         PG_RETURN_BYTEA_P(result);
 160 }
 161
 162 /*
 163  *              byteaout                - converts to printable representation of byte array
 164  *
 165  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 166  *              '\\'.
 167  *
 168  *              NULL vlena should be an error--returning string with NULL for now.
 169  */
 170 Datum
 171 byteaout(PG_FUNCTION_ARGS)
 172 {
 173         bytea      *vlena = PG_GETARG_BYTEA_P(0);
 174         char       *result;
 175         char       *vp;
 176         char       *rp;
 177         int                     val;                    /* holds unprintable chars */
 178         int                     i;
 179         int                     len;
 180
 181         len = 1;                                        /* empty string has 1 char */
 182         vp = VARDATA(vlena);
 183         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 184         {
 185                 if (*vp == '\\')
 186                         len += 2;
 187                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 188                         len += 4;
 189                 else
 190                         len++;
 191         }
 192         rp = result = (char *) palloc(len);
 193         vp = VARDATA(vlena);
 194         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 195         {
 196                 if (*vp == '\\')
 197                 {
 198                         *rp++ = '\\';
 199                         *rp++ = '\\';
 200                 }
 201                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 202                 {
 203                         val = *vp;
 204                         rp[0] = '\\';
 205                         rp[3] = DIG(val & 07);
 206                         val >>= 3;
 207                         rp[2] = DIG(val & 07);
 208                         val >>= 3;
 209                         rp[1] = DIG(val & 03);
 210                         rp += 4;
 211                 }
 212                 else
 213                         *rp++ = *vp;
 214         }
 215         *rp = '\0';
 216         PG_RETURN_CSTRING(result);
 217 }
 218
 219 /*
 220  *              bytearecv                       - converts external binary format to bytea
 221  */
 222 Datum
 223 bytearecv(PG_FUNCTION_ARGS)
 224 {
 225         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 226         bytea      *result;
 227         int                     nbytes;
 228
 229         nbytes = buf->len - buf->cursor;
 230         result = (bytea *) palloc(nbytes + VARHDRSZ);
 231         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 232         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 233         PG_RETURN_BYTEA_P(result);
 234 }
 235
 236 /*
 237  *              byteasend                       - converts bytea to binary format
 238  *
 239  * This is a special case: just copy the input...
 240  */
 241 Datum
 242 byteasend(PG_FUNCTION_ARGS)
 243 {
 244         bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
 245
 246         PG_RETURN_BYTEA_P(vlena);
 247 }
 248
 249
 250 /*
 251  *              textin                  - converts "..." to internal representation
 252  */
 253 Datum
 254 textin(PG_FUNCTION_ARGS)
 255 {
 256         char       *inputText = PG_GETARG_CSTRING(0);
 257         text       *result;
 258         int                     len;
 259
 260         /* verify encoding */
 261         len = strlen(inputText);
 262         pg_verifymbstr(inputText, len, false);
 263
 264         result = (text *) palloc(len + VARHDRSZ);
 265         VARATT_SIZEP(result) = len + VARHDRSZ;
 266
 267         memcpy(VARDATA(result), inputText, len);
 268
 269         PG_RETURN_TEXT_P(result);
 270 }
 271
 272 /*
 273  *              textout                 - converts internal representation to "..."
 274  */
 275 Datum
 276 textout(PG_FUNCTION_ARGS)
 277 {
 278         text       *t = PG_GETARG_TEXT_P(0);
 279         int                     len;
 280         char       *result;
 281
 282         len = VARSIZE(t) - VARHDRSZ;
 283         result = (char *) palloc(len + 1);
 284         memcpy(result, VARDATA(t), len);
 285         result[len] = '\0';
 286
 287         PG_RETURN_CSTRING(result);
 288 }
 289
 290 /*
 291  *              textrecv                        - converts external binary format to text
 292  */
 293 Datum
 294 textrecv(PG_FUNCTION_ARGS)
 295 {
 296         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 297         text       *result;
 298         char       *str;
 299         int                     nbytes;
 300
 301         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 302
 303         /* verify encoding */
 304         pg_verifymbstr(str, nbytes, false);
 305
 306         result = (text *) palloc(nbytes + VARHDRSZ);
 307         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 308         memcpy(VARDATA(result), str, nbytes);
 309         pfree(str);
 310         PG_RETURN_TEXT_P(result);
 311 }
 312
 313 /*
 314  *              textsend                        - converts text to binary format
 315  */
 316 Datum
 317 textsend(PG_FUNCTION_ARGS)
 318 {
 319         text       *t = PG_GETARG_TEXT_P(0);
 320         StringInfoData buf;
 321
 322         pq_begintypsend(&buf);
 323         pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
 324         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 325 }
 326
 327
 328 /*
 329  *              unknownin                       - converts "..." to internal representation
 330  */
 331 Datum
 332 unknownin(PG_FUNCTION_ARGS)
 333 {
 334         char       *str = PG_GETARG_CSTRING(0);
 335
 336         /* representation is same as cstring */
 337         PG_RETURN_CSTRING(pstrdup(str));
 338 }
 339
 340 /*
 341  *              unknownout                      - converts internal representation to "..."
 342  */
 343 Datum
 344 unknownout(PG_FUNCTION_ARGS)
 345 {
 346         /* representation is same as cstring */
 347         char       *str = PG_GETARG_CSTRING(0);
 348
 349         PG_RETURN_CSTRING(pstrdup(str));
 350 }
 351
 352 /*
 353  *              unknownrecv                     - converts external binary format to unknown
 354  */
 355 Datum
 356 unknownrecv(PG_FUNCTION_ARGS)
 357 {
 358         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 359         char       *str;
 360         int                     nbytes;
 361
 362         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 363         /* representation is same as cstring */
 364         PG_RETURN_CSTRING(str);
 365 }
 366
 367 /*
 368  *              unknownsend                     - converts unknown to binary format
 369  */
 370 Datum
 371 unknownsend(PG_FUNCTION_ARGS)
 372 {
 373         /* representation is same as cstring */
 374         char       *str = PG_GETARG_CSTRING(0);
 375         StringInfoData buf;
 376
 377         pq_begintypsend(&buf);
 378         pq_sendtext(&buf, str, strlen(str));
 379         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 380 }
 381
 382
 383 /* ========== PUBLIC ROUTINES ========== */
 384
 385 /*
 386  * textlen -
 387  *        returns the logical length of a text*
 388  *         (which is less than the VARSIZE of the text*)
 389  */
 390 Datum
 391 textlen(PG_FUNCTION_ARGS)
 392 {
 393         Datum           str = PG_GETARG_DATUM(0);
 394
 395         /* try to avoid decompressing argument */
 396         PG_RETURN_INT32(text_length(str));
 397 }
 398
 399 /*
 400  * text_length -
 401  *      Does the real work for textlen()
 402  *
 403  *      This is broken out so it can be called directly by other string processing
 404  *      functions.      Note that the argument is passed as a Datum, to indicate that
 405  *      it may still be in compressed form.  We can avoid decompressing it at all
 406  *      in some cases.
 407  */
 408 static int32
 409 text_length(Datum str)
 410 {
 411         /* fastpath when max encoding length is one */
 412         if (pg_database_encoding_max_length() == 1)
 413                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 414         else
 415         {
 416                 text       *t = DatumGetTextP(str);
 417
 418                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
 419                                                                                          VARSIZE(t) - VARHDRSZ));
 420         }
 421 }
 422
 423 /*
 424  * textoctetlen -
 425  *        returns the physical length of a text*
 426  *         (which is less than the VARSIZE of the text*)
 427  */
 428 Datum
 429 textoctetlen(PG_FUNCTION_ARGS)
 430 {
 431         Datum           str = PG_GETARG_DATUM(0);
 432
 433         /* We need not detoast the input at all */
 434         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 435 }
 436
 437 /*
 438  * textcat -
 439  *        takes two text* and returns a text* that is the concatenation of
 440  *        the two.
 441  *
 442  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 443  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 444  * Allocate space for output in all cases.
 445  * XXX - thomas 1997-07-10
 446  */
 447 Datum
 448 textcat(PG_FUNCTION_ARGS)
 449 {
 450         text       *t1 = PG_GETARG_TEXT_P(0);
 451         text       *t2 = PG_GETARG_TEXT_P(1);
 452         int                     len1,
 453                                 len2,
 454                                 len;
 455         text       *result;
 456         char       *ptr;
 457
 458         len1 = VARSIZE(t1) - VARHDRSZ;
 459         if (len1 < 0)
 460                 len1 = 0;
 461
 462         len2 = VARSIZE(t2) - VARHDRSZ;
 463         if (len2 < 0)
 464                 len2 = 0;
 465
 466         len = len1 + len2 + VARHDRSZ;
 467         result = (text *) palloc(len);
 468
 469         /* Set size of result string... */
 470         VARATT_SIZEP(result) = len;
 471
 472         /* Fill data field of result string... */
 473         ptr = VARDATA(result);
 474         if (len1 > 0)
 475                 memcpy(ptr, VARDATA(t1), len1);
 476         if (len2 > 0)
 477                 memcpy(ptr + len1, VARDATA(t2), len2);
 478
 479         PG_RETURN_TEXT_P(result);
 480 }
 481
 482 /*
 483  * text_substr()
 484  * Return a substring starting at the specified position.
 485  * - thomas 1997-12-31
 486  *
 487  * Input:
 488  *      - string
 489  *      - starting position (is one-based)
 490  *      - string length
 491  *
 492  * If the starting position is zero or less, then return from the start of the string
 493  *      adjusting the length to be consistent with the "negative start" per SQL92.
 494  * If the length is less than zero, return the remaining string.
 495  *
 496  * Added multibyte support.
 497  * - Tatsuo Ishii 1998-4-21
 498  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 499  * Formerly returned the entire string; now returns a portion.
 500  * - Thomas Lockhart 1998-12-10
 501  * Now uses faster TOAST-slicing interface
 502  * - John Gray 2002-02-22
 503  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 504  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 505  * error; if E < 1, return '', not entire string). Fixed MB related bug when
 506  * S > LC and < LC + 4 sometimes garbage characters are returned.
 507  * - Joe Conway 2002-08-10
 508  */
 509 Datum
 510 text_substr(PG_FUNCTION_ARGS)
 511 {
 512         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 513                                                                         PG_GETARG_INT32(1),
 514                                                                         PG_GETARG_INT32(2),
 515                                                                         false));
 516 }
 517
 518 /*
 519  * text_substr_no_len -
 520  *        Wrapper to avoid opr_sanity failure due to
 521  *        one function accepting a different number of args.
 522  */
 523 Datum
 524 text_substr_no_len(PG_FUNCTION_ARGS)
 525 {
 526         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 527                                                                         PG_GETARG_INT32(1),
 528                                                                         -1, true));
 529 }
 530
 531 /*
 532  * text_substring -
 533  *      Does the real work for text_substr() and text_substr_no_len()
 534  *
 535  *      This is broken out so it can be called directly by other string processing
 536  *      functions.      Note that the argument is passed as a Datum, to indicate that
 537  *      it may still be in compressed/toasted form.  We can avoid detoasting all
 538  *      of it in some cases.
 539  */
 540 static text *
 541 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 542 {
 543         int32           eml = pg_database_encoding_max_length();
 544         int32           S = start;              /* start position */
 545         int32           S1;                             /* adjusted start position */
 546         int32           L1;                             /* adjusted substring length */
 547
 548         /* life is easy if the encoding max length is 1 */
 549         if (eml == 1)
 550         {
 551                 S1 = Max(S, 1);
 552
 553                 if (length_not_specified)               /* special case - get length to
 554                                                                                  * end of string */
 555                         L1 = -1;
 556                 else
 557                 {
 558                         /* end position */
 559                         int                     E = S + length;
 560
 561                         /*
 562                          * A negative value for L is the only way for the end position
 563                          * to be before the start. SQL99 says to throw an error.
 564                          */
 565                         if (E < S)
 566                                 ereport(ERROR,
 567                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 568                                            errmsg("negative substring length not allowed")));
 569
 570                         /*
 571                          * A zero or negative value for the end position can happen if
 572                          * the start was negative or one. SQL99 says to return a
 573                          * zero-length string.
 574                          */
 575                         if (E < 1)
 576                                 return PG_STR_GET_TEXT("");
 577
 578                         L1 = E - S1;
 579                 }
 580
 581                 /*
 582                  * If the start position is past the end of the string, SQL99 says
 583                  * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
 584                  * do that for us. Convert to zero-based starting position
 585                  */
 586                 return DatumGetTextPSlice(str, S1 - 1, L1);
 587         }
 588         else if (eml > 1)
 589         {
 590                 /*
 591                  * When encoding max length is > 1, we can't get LC without
 592                  * detoasting, so we'll grab a conservatively large slice now and
 593                  * go back later to do the right thing
 594                  */
 595                 int32           slice_start;
 596                 int32           slice_size;
 597                 int32           slice_strlen;
 598                 text       *slice;
 599                 int32           E1;
 600                 int32           i;
 601                 char       *p;
 602                 char       *s;
 603                 text       *ret;
 604
 605                 /*
 606                  * if S is past the end of the string, the tuple toaster will
 607                  * return a zero-length string to us
 608                  */
 609                 S1 = Max(S, 1);
 610
 611                 /*
 612                  * We need to start at position zero because there is no way to
 613                  * know in advance which byte offset corresponds to the supplied
 614                  * start position.
 615                  */
 616                 slice_start = 0;
 617
 618                 if (length_not_specified)               /* special case - get length to
 619                                                                                  * end of string */
 620                         slice_size = L1 = -1;
 621                 else
 622                 {
 623                         int                     E = S + length;
 624
 625                         /*
 626                          * A negative value for L is the only way for the end position
 627                          * to be before the start. SQL99 says to throw an error.
 628                          */
 629                         if (E < S)
 630                                 ereport(ERROR,
 631                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 632                                            errmsg("negative substring length not allowed")));
 633
 634                         /*
 635                          * A zero or negative value for the end position can happen if
 636                          * the start was negative or one. SQL99 says to return a
 637                          * zero-length string.
 638                          */
 639                         if (E < 1)
 640                                 return PG_STR_GET_TEXT("");
 641
 642                         /*
 643                          * if E is past the end of the string, the tuple toaster will
 644                          * truncate the length for us
 645                          */
 646                         L1 = E - S1;
 647
 648                         /*
 649                          * Total slice size in bytes can't be any longer than the
 650                          * start position plus substring length times the encoding max
 651                          * length.
 652                          */
 653                         slice_size = (S1 + L1) * eml;
 654                 }
 655                 slice = DatumGetTextPSlice(str, slice_start, slice_size);
 656
 657                 /* see if we got back an empty string */
 658                 if ((VARSIZE(slice) - VARHDRSZ) == 0)
 659                         return PG_STR_GET_TEXT("");
 660
 661                 /* Now we can get the actual length of the slice in MB characters */
 662                 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
 663
 664                 /*
 665                  * Check that the start position wasn't > slice_strlen. If so,
 666                  * SQL99 says to return a zero-length string.
 667                  */
 668                 if (S1 > slice_strlen)
 669                         return PG_STR_GET_TEXT("");
 670
 671                 /*
 672                  * Adjust L1 and E1 now that we know the slice string length.
 673                  * Again remember that S1 is one based, and slice_start is zero
 674                  * based.
 675                  */
 676                 if (L1 > -1)
 677                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 678                 else
 679                         E1 = slice_start + 1 + slice_strlen;
 680
 681                 /*
 682                  * Find the start position in the slice; remember S1 is not zero
 683                  * based
 684                  */
 685                 p = VARDATA(slice);
 686                 for (i = 0; i < S1 - 1; i++)
 687                         p += pg_mblen(p);
 688
 689                 /* hang onto a pointer to our start position */
 690                 s = p;
 691
 692                 /*
 693                  * Count the actual bytes used by the substring of the requested
 694                  * length.
 695                  */
 696                 for (i = S1; i < E1; i++)
 697                         p += pg_mblen(p);
 698
 699                 ret = (text *) palloc(VARHDRSZ + (p - s));
 700                 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
 701                 memcpy(VARDATA(ret), s, (p - s));
 702
 703                 return ret;
 704         }
 705         else
 706                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
 707
 708         /* not reached: suppress compiler warning */
 709         return NULL;
 710 }
 711
 712 /*
 713  * textpos -
 714  *        Return the position of the specified substring.
 715  *        Implements the SQL92 POSITION() function.
 716  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 717  * - thomas 1997-07-27
 718  */
 719 Datum
 720 textpos(PG_FUNCTION_ARGS)
 721 {
 722         text       *str = PG_GETARG_TEXT_P(0);
 723         text       *search_str = PG_GETARG_TEXT_P(1);
 724
 725         PG_RETURN_INT32(text_position(str, search_str, 1));
 726 }
 727
 728 /*
 729  * text_position -
 730  *      Does the real work for textpos()
 731  *
 732  * Inputs:
 733  *              t1 - string to be searched
 734  *              t2 - pattern to match within t1
 735  *              matchnum - number of the match to be found (1 is the first match)
 736  * Result:
 737  *              Character index of the first matched char, starting from 1,
 738  *              or 0 if no match.
 739  *
 740  *      This is broken out so it can be called directly by other string processing
 741  *      functions.
 742  */
 743 static int32
 744 text_position(text *t1, text *t2, int matchnum)
 745 {
 746         int                     match = 0,
 747                                 pos = 0,
 748                                 p,
 749                                 px,
 750                                 len1,
 751                                 len2;
 752
 753         if (matchnum <= 0)
 754                 return 0;                               /* result for 0th match */
 755
 756         if (VARSIZE(t2) <= VARHDRSZ)
 757                 return 1;                               /* result for empty pattern */
 758
 759         len1 = VARSIZE(t1) - VARHDRSZ;
 760         len2 = VARSIZE(t2) - VARHDRSZ;
 761
 762         if (pg_database_encoding_max_length() == 1)
 763         {
 764                 /* simple case - single byte encoding */
 765                 char       *p1,
 766                                    *p2;
 767
 768                 p1 = VARDATA(t1);
 769                 p2 = VARDATA(t2);
 770
 771                 /* no use in searching str past point where search_str will fit */
 772                 px = (len1 - len2);
 773
 774                 for (p = 0; p <= px; p++)
 775                 {
 776                         if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
 777                         {
 778                                 if (++match == matchnum)
 779                                 {
 780                                         pos = p + 1;
 781                                         break;
 782                                 }
 783                         }
 784                         p1++;
 785                 }
 786         }
 787         else
 788         {
 789                 /* not as simple - multibyte encoding */
 790                 pg_wchar   *p1,
 791                                    *p2,
 792                                    *ps1,
 793                                    *ps2;
 794
 795                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 796                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
 797                 len1 = pg_wchar_strlen(p1);
 798                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 799                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
 800                 len2 = pg_wchar_strlen(p2);
 801
 802                 /* no use in searching str past point where search_str will fit */
 803                 px = (len1 - len2);
 804
 805                 for (p = 0; p <= px; p++)
 806                 {
 807                         if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
 808                         {
 809                                 if (++match == matchnum)
 810                                 {
 811                                         pos = p + 1;
 812                                         break;
 813                                 }
 814                         }
 815                         p1++;
 816                 }
 817
 818                 pfree(ps1);
 819                 pfree(ps2);
 820         }
 821
 822         return pos;
 823 }
 824
 825 /* varstr_cmp()
 826  * Comparison function for text strings with given lengths.
 827  * Includes locale support, but must copy strings to temporary memory
 828  *      to allow null-termination for inputs to strcoll().
 829  * Returns -1, 0 or 1
 830  */
 831 int
 832 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 833 {
 834         int                     result;
 835
 836         /*
 837          * Unfortunately, there is no strncoll(), so in the non-C locale case
 838          * we have to do some memory copying.  This turns out to be
 839          * significantly slower, so we optimize the case where LC_COLLATE is
 840          * C.  We also try to optimize relatively-short strings by avoiding
 841          * palloc/pfree overhead.
 842          */
 843 #define STACKBUFLEN             1024
 844
 845         if (!lc_collate_is_c())
 846         {
 847                 char            a1buf[STACKBUFLEN];
 848                 char            a2buf[STACKBUFLEN];
 849                 char       *a1p,
 850                                    *a2p;
 851
 852                 if (len1 >= STACKBUFLEN)
 853                         a1p = (char *) palloc(len1 + 1);
 854                 else
 855                         a1p = a1buf;
 856                 if (len2 >= STACKBUFLEN)
 857                         a2p = (char *) palloc(len2 + 1);
 858                 else
 859                         a2p = a2buf;
 860
 861                 memcpy(a1p, arg1, len1);
 862                 a1p[len1] = '\0';
 863                 memcpy(a2p, arg2, len2);
 864                 a2p[len2] = '\0';
 865
 866                 result = strcoll(a1p, a2p);
 867
 868                 if (len1 >= STACKBUFLEN)
 869                         pfree(a1p);
 870                 if (len2 >= STACKBUFLEN)
 871                         pfree(a2p);
 872         }
 873         else
 874         {
 875                 result = strncmp(arg1, arg2, Min(len1, len2));
 876                 if ((result == 0) && (len1 != len2))
 877                         result = (len1 < len2) ? -1 : 1;
 878         }
 879
 880         return result;
 881 }
 882
 883
 884 /* text_cmp()
 885  * Internal comparison function for text strings.
 886  * Returns -1, 0 or 1
 887  */
 888 static int
 889 text_cmp(text *arg1, text *arg2)
 890 {
 891         char       *a1p,
 892                            *a2p;
 893         int                     len1,
 894                                 len2;
 895
 896         a1p = VARDATA(arg1);
 897         a2p = VARDATA(arg2);
 898
 899         len1 = VARSIZE(arg1) - VARHDRSZ;
 900         len2 = VARSIZE(arg2) - VARHDRSZ;
 901
 902         return varstr_cmp(a1p, len1, a2p, len2);
 903 }
 904
 905 /*
 906  * Comparison functions for text strings.
 907  *
 908  * Note: btree indexes need these routines not to leak memory; therefore,
 909  * be careful to free working copies of toasted datums.  Most places don't
 910  * need to be so careful.
 911  */
 912
 913 Datum
 914 texteq(PG_FUNCTION_ARGS)
 915 {
 916         text       *arg1 = PG_GETARG_TEXT_P(0);
 917         text       *arg2 = PG_GETARG_TEXT_P(1);
 918         bool            result;
 919
 920         /* fast path for different-length inputs */
 921         if (VARSIZE(arg1) != VARSIZE(arg2))
 922                 result = false;
 923         else
 924                 result = (text_cmp(arg1, arg2) == 0);
 925
 926         PG_FREE_IF_COPY(arg1, 0);
 927         PG_FREE_IF_COPY(arg2, 1);
 928
 929         PG_RETURN_BOOL(result);
 930 }
 931
 932 Datum
 933 textne(PG_FUNCTION_ARGS)
 934 {
 935         text       *arg1 = PG_GETARG_TEXT_P(0);
 936         text       *arg2 = PG_GETARG_TEXT_P(1);
 937         bool            result;
 938
 939         /* fast path for different-length inputs */
 940         if (VARSIZE(arg1) != VARSIZE(arg2))
 941                 result = true;
 942         else
 943                 result = (text_cmp(arg1, arg2) != 0);
 944
 945         PG_FREE_IF_COPY(arg1, 0);
 946         PG_FREE_IF_COPY(arg2, 1);
 947
 948         PG_RETURN_BOOL(result);
 949 }
 950
 951 Datum
 952 text_lt(PG_FUNCTION_ARGS)
 953 {
 954         text       *arg1 = PG_GETARG_TEXT_P(0);
 955         text       *arg2 = PG_GETARG_TEXT_P(1);
 956         bool            result;
 957
 958         result = (text_cmp(arg1, arg2) < 0);
 959
 960         PG_FREE_IF_COPY(arg1, 0);
 961         PG_FREE_IF_COPY(arg2, 1);
 962
 963         PG_RETURN_BOOL(result);
 964 }
 965
 966 Datum
 967 text_le(PG_FUNCTION_ARGS)
 968 {
 969         text       *arg1 = PG_GETARG_TEXT_P(0);
 970         text       *arg2 = PG_GETARG_TEXT_P(1);
 971         bool            result;
 972
 973         result = (text_cmp(arg1, arg2) <= 0);
 974
 975         PG_FREE_IF_COPY(arg1, 0);
 976         PG_FREE_IF_COPY(arg2, 1);
 977
 978         PG_RETURN_BOOL(result);
 979 }
 980
 981 Datum
 982 text_gt(PG_FUNCTION_ARGS)
 983 {
 984         text       *arg1 = PG_GETARG_TEXT_P(0);
 985         text       *arg2 = PG_GETARG_TEXT_P(1);
 986         bool            result;
 987
 988         result = (text_cmp(arg1, arg2) > 0);
 989
 990         PG_FREE_IF_COPY(arg1, 0);
 991         PG_FREE_IF_COPY(arg2, 1);
 992
 993         PG_RETURN_BOOL(result);
 994 }
 995
 996 Datum
 997 text_ge(PG_FUNCTION_ARGS)
 998 {
 999         text       *arg1 = PG_GETARG_TEXT_P(0);
1000         text       *arg2 = PG_GETARG_TEXT_P(1);
1001         bool            result;
1002
1003         result = (text_cmp(arg1, arg2) >= 0);
1004
1005         PG_FREE_IF_COPY(arg1, 0);
1006         PG_FREE_IF_COPY(arg2, 1);
1007
1008         PG_RETURN_BOOL(result);
1009 }
1010
1011 Datum
1012 bttextcmp(PG_FUNCTION_ARGS)
1013 {
1014         text       *arg1 = PG_GETARG_TEXT_P(0);
1015         text       *arg2 = PG_GETARG_TEXT_P(1);
1016         int32           result;
1017
1018         result = text_cmp(arg1, arg2);
1019
1020         PG_FREE_IF_COPY(arg1, 0);
1021         PG_FREE_IF_COPY(arg2, 1);
1022
1023         PG_RETURN_INT32(result);
1024 }
1025
1026
1027 Datum
1028 text_larger(PG_FUNCTION_ARGS)
1029 {
1030         text       *arg1 = PG_GETARG_TEXT_P(0);
1031         text       *arg2 = PG_GETARG_TEXT_P(1);
1032         text       *result;
1033
1034         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1035
1036         PG_RETURN_TEXT_P(result);
1037 }
1038
1039 Datum
1040 text_smaller(PG_FUNCTION_ARGS)
1041 {
1042         text       *arg1 = PG_GETARG_TEXT_P(0);
1043         text       *arg2 = PG_GETARG_TEXT_P(1);
1044         text       *result;
1045
1046         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1047
1048         PG_RETURN_TEXT_P(result);
1049 }
1050
1051
1052 /*
1053  * The following operators support character-by-character comparison
1054  * of text data types, to allow building indexes suitable for LIKE
1055  * clauses.
1056  */
1057
1058 static int
1059 internal_text_pattern_compare(text *arg1, text *arg2)
1060 {
1061         int                     result;
1062
1063         result = memcmp(VARDATA(arg1), VARDATA(arg2),
1064                                         Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1065         if (result != 0)
1066                 return result;
1067         else if (VARSIZE(arg1) < VARSIZE(arg2))
1068                 return -1;
1069         else if (VARSIZE(arg1) > VARSIZE(arg2))
1070                 return 1;
1071         else
1072                 return 0;
1073 }
1074
1075
1076 Datum
1077 text_pattern_lt(PG_FUNCTION_ARGS)
1078 {
1079         text       *arg1 = PG_GETARG_TEXT_P(0);
1080         text       *arg2 = PG_GETARG_TEXT_P(1);
1081         int                     result;
1082
1083         result = internal_text_pattern_compare(arg1, arg2);
1084
1085         PG_FREE_IF_COPY(arg1, 0);
1086         PG_FREE_IF_COPY(arg2, 1);
1087
1088         PG_RETURN_BOOL(result < 0);
1089 }
1090
1091
1092 Datum
1093 text_pattern_le(PG_FUNCTION_ARGS)
1094 {
1095         text       *arg1 = PG_GETARG_TEXT_P(0);
1096         text       *arg2 = PG_GETARG_TEXT_P(1);
1097         int                     result;
1098
1099         result = internal_text_pattern_compare(arg1, arg2);
1100
1101         PG_FREE_IF_COPY(arg1, 0);
1102         PG_FREE_IF_COPY(arg2, 1);
1103
1104         PG_RETURN_BOOL(result <= 0);
1105 }
1106
1107
1108 Datum
1109 text_pattern_eq(PG_FUNCTION_ARGS)
1110 {
1111         text       *arg1 = PG_GETARG_TEXT_P(0);
1112         text       *arg2 = PG_GETARG_TEXT_P(1);
1113         int                     result;
1114
1115         if (VARSIZE(arg1) != VARSIZE(arg2))
1116                 result = 1;
1117         else
1118                 result = internal_text_pattern_compare(arg1, arg2);
1119
1120         PG_FREE_IF_COPY(arg1, 0);
1121         PG_FREE_IF_COPY(arg2, 1);
1122
1123         PG_RETURN_BOOL(result == 0);
1124 }
1125
1126
1127 Datum
1128 text_pattern_ge(PG_FUNCTION_ARGS)
1129 {
1130         text       *arg1 = PG_GETARG_TEXT_P(0);
1131         text       *arg2 = PG_GETARG_TEXT_P(1);
1132         int                     result;
1133
1134         result = internal_text_pattern_compare(arg1, arg2);
1135
1136         PG_FREE_IF_COPY(arg1, 0);
1137         PG_FREE_IF_COPY(arg2, 1);
1138
1139         PG_RETURN_BOOL(result >= 0);
1140 }
1141
1142
1143 Datum
1144 text_pattern_gt(PG_FUNCTION_ARGS)
1145 {
1146         text       *arg1 = PG_GETARG_TEXT_P(0);
1147         text       *arg2 = PG_GETARG_TEXT_P(1);
1148         int                     result;
1149
1150         result = internal_text_pattern_compare(arg1, arg2);
1151
1152         PG_FREE_IF_COPY(arg1, 0);
1153         PG_FREE_IF_COPY(arg2, 1);
1154
1155         PG_RETURN_BOOL(result > 0);
1156 }
1157
1158
1159 Datum
1160 text_pattern_ne(PG_FUNCTION_ARGS)
1161 {
1162         text       *arg1 = PG_GETARG_TEXT_P(0);
1163         text       *arg2 = PG_GETARG_TEXT_P(1);
1164         int                     result;
1165
1166         if (VARSIZE(arg1) != VARSIZE(arg2))
1167                 result = 1;
1168         else
1169                 result = internal_text_pattern_compare(arg1, arg2);
1170
1171         PG_FREE_IF_COPY(arg1, 0);
1172         PG_FREE_IF_COPY(arg2, 1);
1173
1174         PG_RETURN_BOOL(result != 0);
1175 }
1176
1177
1178 Datum
1179 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1180 {
1181         text       *arg1 = PG_GETARG_TEXT_P(0);
1182         text       *arg2 = PG_GETARG_TEXT_P(1);
1183         int                     result;
1184
1185         result = internal_text_pattern_compare(arg1, arg2);
1186
1187         PG_FREE_IF_COPY(arg1, 0);
1188         PG_FREE_IF_COPY(arg2, 1);
1189
1190         PG_RETURN_INT32(result);
1191 }
1192
1193
1194 /*-------------------------------------------------------------
1195  * byteaoctetlen
1196  *
1197  * get the number of bytes contained in an instance of type 'bytea'
1198  *-------------------------------------------------------------
1199  */
1200 Datum
1201 byteaoctetlen(PG_FUNCTION_ARGS)
1202 {
1203         Datum           str = PG_GETARG_DATUM(0);
1204
1205         /* We need not detoast the input at all */
1206         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1207 }
1208
1209 /*
1210  * byteacat -
1211  *        takes two bytea* and returns a bytea* that is the concatenation of
1212  *        the two.
1213  *
1214  * Cloned from textcat and modified as required.
1215  */
1216 Datum
1217 byteacat(PG_FUNCTION_ARGS)
1218 {
1219         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1220         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1221         int                     len1,
1222                                 len2,
1223                                 len;
1224         bytea      *result;
1225         char       *ptr;
1226
1227         len1 = VARSIZE(t1) - VARHDRSZ;
1228         if (len1 < 0)
1229                 len1 = 0;
1230
1231         len2 = VARSIZE(t2) - VARHDRSZ;
1232         if (len2 < 0)
1233                 len2 = 0;
1234
1235         len = len1 + len2 + VARHDRSZ;
1236         result = (bytea *) palloc(len);
1237
1238         /* Set size of result string... */
1239         VARATT_SIZEP(result) = len;
1240
1241         /* Fill data field of result string... */
1242         ptr = VARDATA(result);
1243         if (len1 > 0)
1244                 memcpy(ptr, VARDATA(t1), len1);
1245         if (len2 > 0)
1246                 memcpy(ptr + len1, VARDATA(t2), len2);
1247
1248         PG_RETURN_BYTEA_P(result);
1249 }
1250
1251 #define PG_STR_GET_BYTEA(str_) \
1252         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1253 /*
1254  * bytea_substr()
1255  * Return a substring starting at the specified position.
1256  * Cloned from text_substr and modified as required.
1257  *
1258  * Input:
1259  *      - string
1260  *      - starting position (is one-based)
1261  *      - string length (optional)
1262  *
1263  * If the starting position is zero or less, then return from the start of the string
1264  * adjusting the length to be consistent with the "negative start" per SQL92.
1265  * If the length is less than zero, an ERROR is thrown. If no third argument
1266  * (length) is provided, the length to the end of the string is assumed.
1267  */
1268 Datum
1269 bytea_substr(PG_FUNCTION_ARGS)
1270 {
1271         int                     S = PG_GETARG_INT32(1); /* start position */
1272         int                     S1;                             /* adjusted start position */
1273         int                     L1;                             /* adjusted substring length */
1274
1275         S1 = Max(S, 1);
1276
1277         if (fcinfo->nargs == 2)
1278         {
1279                 /*
1280                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1281                  * everything to the end of the string if we pass it a negative
1282                  * value for length.
1283                  */
1284                 L1 = -1;
1285         }
1286         else
1287         {
1288                 /* end position */
1289                 int                     E = S + PG_GETARG_INT32(2);
1290
1291                 /*
1292                  * A negative value for L is the only way for the end position to
1293                  * be before the start. SQL99 says to throw an error.
1294                  */
1295                 if (E < S)
1296                         ereport(ERROR,
1297                                         (errcode(ERRCODE_SUBSTRING_ERROR),
1298                                          errmsg("negative substring length not allowed")));
1299
1300                 /*
1301                  * A zero or negative value for the end position can happen if the
1302                  * start was negative or one. SQL99 says to return a zero-length
1303                  * string.
1304                  */
1305                 if (E < 1)
1306                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1307
1308                 L1 = E - S1;
1309         }
1310
1311         /*
1312          * If the start position is past the end of the string, SQL99 says to
1313          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1314          * that for us. Convert to zero-based starting position
1315          */
1316         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1317 }
1318
1319 /*
1320  * bytea_substr_no_len -
1321  *        Wrapper to avoid opr_sanity failure due to
1322  *        one function accepting a different number of args.
1323  */
1324 Datum
1325 bytea_substr_no_len(PG_FUNCTION_ARGS)
1326 {
1327         return bytea_substr(fcinfo);
1328 }
1329
1330 /*
1331  * byteapos -
1332  *        Return the position of the specified substring.
1333  *        Implements the SQL92 POSITION() function.
1334  * Cloned from textpos and modified as required.
1335  */
1336 Datum
1337 byteapos(PG_FUNCTION_ARGS)
1338 {
1339         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1340         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1341         int                     pos;
1342         int                     px,
1343                                 p;
1344         int                     len1,
1345                                 len2;
1346         char       *p1,
1347                            *p2;
1348
1349         if (VARSIZE(t2) <= VARHDRSZ)
1350                 PG_RETURN_INT32(1);             /* result for empty pattern */
1351
1352         len1 = VARSIZE(t1) - VARHDRSZ;
1353         len2 = VARSIZE(t2) - VARHDRSZ;
1354
1355         p1 = VARDATA(t1);
1356         p2 = VARDATA(t2);
1357
1358         pos = 0;
1359         px = (len1 - len2);
1360         for (p = 0; p <= px; p++)
1361         {
1362                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1363                 {
1364                         pos = p + 1;
1365                         break;
1366                 };
1367                 p1++;
1368         };
1369
1370         PG_RETURN_INT32(pos);
1371 }
1372
1373 /*-------------------------------------------------------------
1374  * byteaGetByte
1375  *
1376  * this routine treats "bytea" as an array of bytes.
1377  * It returns the Nth byte (a number between 0 and 255).
1378  *-------------------------------------------------------------
1379  */
1380 Datum
1381 byteaGetByte(PG_FUNCTION_ARGS)
1382 {
1383         bytea      *v = PG_GETARG_BYTEA_P(0);
1384         int32           n = PG_GETARG_INT32(1);
1385         int                     len;
1386         int                     byte;
1387
1388         len = VARSIZE(v) - VARHDRSZ;
1389
1390         if (n < 0 || n >= len)
1391                 ereport(ERROR,
1392                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1393                                  errmsg("index %d out of valid range, 0..%d",
1394                                                 n, len - 1)));
1395
1396         byte = ((unsigned char *) VARDATA(v))[n];
1397
1398         PG_RETURN_INT32(byte);
1399 }
1400
1401 /*-------------------------------------------------------------
1402  * byteaGetBit
1403  *
1404  * This routine treats a "bytea" type like an array of bits.
1405  * It returns the value of the Nth bit (0 or 1).
1406  *
1407  *-------------------------------------------------------------
1408  */
1409 Datum
1410 byteaGetBit(PG_FUNCTION_ARGS)
1411 {
1412         bytea      *v = PG_GETARG_BYTEA_P(0);
1413         int32           n = PG_GETARG_INT32(1);
1414         int                     byteNo,
1415                                 bitNo;
1416         int                     len;
1417         int                     byte;
1418
1419         len = VARSIZE(v) - VARHDRSZ;
1420
1421         if (n < 0 || n >= len * 8)
1422                 ereport(ERROR,
1423                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1424                                  errmsg("index %d out of valid range, 0..%d",
1425                                                 n, len * 8 - 1)));
1426
1427         byteNo = n / 8;
1428         bitNo = n % 8;
1429
1430         byte = ((unsigned char *) VARDATA(v))[byteNo];
1431
1432         if (byte & (1 << bitNo))
1433                 PG_RETURN_INT32(1);
1434         else
1435                 PG_RETURN_INT32(0);
1436 }
1437
1438 /*-------------------------------------------------------------
1439  * byteaSetByte
1440  *
1441  * Given an instance of type 'bytea' creates a new one with
1442  * the Nth byte set to the given value.
1443  *
1444  *-------------------------------------------------------------
1445  */
1446 Datum
1447 byteaSetByte(PG_FUNCTION_ARGS)
1448 {
1449         bytea      *v = PG_GETARG_BYTEA_P(0);
1450         int32           n = PG_GETARG_INT32(1);
1451         int32           newByte = PG_GETARG_INT32(2);
1452         int                     len;
1453         bytea      *res;
1454
1455         len = VARSIZE(v) - VARHDRSZ;
1456
1457         if (n < 0 || n >= len)
1458                 ereport(ERROR,
1459                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1460                                  errmsg("index %d out of valid range, 0..%d",
1461                                                 n, len - 1)));
1462
1463         /*
1464          * Make a copy of the original varlena.
1465          */
1466         res = (bytea *) palloc(VARSIZE(v));
1467         memcpy((char *) res, (char *) v, VARSIZE(v));
1468
1469         /*
1470          * Now set the byte.
1471          */
1472         ((unsigned char *) VARDATA(res))[n] = newByte;
1473
1474         PG_RETURN_BYTEA_P(res);
1475 }
1476
1477 /*-------------------------------------------------------------
1478  * byteaSetBit
1479  *
1480  * Given an instance of type 'bytea' creates a new one with
1481  * the Nth bit set to the given value.
1482  *
1483  *-------------------------------------------------------------
1484  */
1485 Datum
1486 byteaSetBit(PG_FUNCTION_ARGS)
1487 {
1488         bytea      *v = PG_GETARG_BYTEA_P(0);
1489         int32           n = PG_GETARG_INT32(1);
1490         int32           newBit = PG_GETARG_INT32(2);
1491         bytea      *res;
1492         int                     len;
1493         int                     oldByte,
1494                                 newByte;
1495         int                     byteNo,
1496                                 bitNo;
1497
1498         len = VARSIZE(v) - VARHDRSZ;
1499
1500         if (n < 0 || n >= len * 8)
1501                 ereport(ERROR,
1502                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1503                                  errmsg("index %d out of valid range, 0..%d",
1504                                                 n, len * 8 - 1)));
1505
1506         byteNo = n / 8;
1507         bitNo = n % 8;
1508
1509         /*
1510          * sanity check!
1511          */
1512         if (newBit != 0 && newBit != 1)
1513                 ereport(ERROR,
1514                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1515                                  errmsg("new bit must be 0 or 1")));
1516
1517         /*
1518          * Make a copy of the original varlena.
1519          */
1520         res = (bytea *) palloc(VARSIZE(v));
1521         memcpy((char *) res, (char *) v, VARSIZE(v));
1522
1523         /*
1524          * Update the byte.
1525          */
1526         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1527
1528         if (newBit == 0)
1529                 newByte = oldByte & (~(1 << bitNo));
1530         else
1531                 newByte = oldByte | (1 << bitNo);
1532
1533         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1534
1535         PG_RETURN_BYTEA_P(res);
1536 }
1537
1538
1539 /* text_name()
1540  * Converts a text type to a Name type.
1541  */
1542 Datum
1543 text_name(PG_FUNCTION_ARGS)
1544 {
1545         text       *s = PG_GETARG_TEXT_P(0);
1546         Name            result;
1547         int                     len;
1548
1549         len = VARSIZE(s) - VARHDRSZ;
1550
1551         /* Truncate oversize input */
1552         if (len >= NAMEDATALEN)
1553                 len = NAMEDATALEN - 1;
1554
1555 #ifdef STRINGDEBUG
1556         printf("text- convert string length %d (%d) ->%d\n",
1557                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1558 #endif
1559
1560         result = (Name) palloc(NAMEDATALEN);
1561         memcpy(NameStr(*result), VARDATA(s), len);
1562
1563         /* now null pad to full length... */
1564         while (len < NAMEDATALEN)
1565         {
1566                 *(NameStr(*result) + len) = '\0';
1567                 len++;
1568         }
1569
1570         PG_RETURN_NAME(result);
1571 }
1572
1573 /* name_text()
1574  * Converts a Name type to a text type.
1575  */
1576 Datum
1577 name_text(PG_FUNCTION_ARGS)
1578 {
1579         Name            s = PG_GETARG_NAME(0);
1580         text       *result;
1581         int                     len;
1582
1583         len = strlen(NameStr(*s));
1584
1585 #ifdef STRINGDEBUG
1586         printf("text- convert string length %d (%d) ->%d\n",
1587                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1588 #endif
1589
1590         result = palloc(VARHDRSZ + len);
1591         VARATT_SIZEP(result) = VARHDRSZ + len;
1592         memcpy(VARDATA(result), NameStr(*s), len);
1593
1594         PG_RETURN_TEXT_P(result);
1595 }
1596
1597
1598 /*
1599  * textToQualifiedNameList - convert a text object to list of names
1600  *
1601  * This implements the input parsing needed by nextval() and other
1602  * functions that take a text parameter representing a qualified name.
1603  * We split the name at dots, downcase if not double-quoted, and
1604  * truncate names if they're too long.
1605  */
1606 List *
1607 textToQualifiedNameList(text *textval)
1608 {
1609         char       *rawname;
1610         List       *result = NIL;
1611         List       *namelist;
1612         ListCell   *l;
1613
1614         /* Convert to C string (handles possible detoasting). */
1615         /* Note we rely on being able to modify rawname below. */
1616         rawname = DatumGetCString(DirectFunctionCall1(textout,
1617                                                                                           PointerGetDatum(textval)));
1618
1619         if (!SplitIdentifierString(rawname, '.', &namelist))
1620                 ereport(ERROR,
1621                                 (errcode(ERRCODE_INVALID_NAME),
1622                                  errmsg("invalid name syntax")));
1623
1624         if (namelist == NIL)
1625                 ereport(ERROR,
1626                                 (errcode(ERRCODE_INVALID_NAME),
1627                                  errmsg("invalid name syntax")));
1628
1629         foreach(l, namelist)
1630         {
1631                 char       *curname = (char *) lfirst(l);
1632
1633                 result = lappend(result, makeString(pstrdup(curname)));
1634         }
1635
1636         pfree(rawname);
1637         list_free(namelist);
1638
1639         return result;
1640 }
1641
1642 /*
1643  * SplitIdentifierString --- parse a string containing identifiers
1644  *
1645  * This is the guts of textToQualifiedNameList, and is exported for use in
1646  * other situations such as parsing GUC variables.      In the GUC case, it's
1647  * important to avoid memory leaks, so the API is designed to minimize the
1648  * amount of stuff that needs to be allocated and freed.
1649  *
1650  * Inputs:
1651  *      rawstring: the input string; must be overwritable!      On return, it's
1652  *                         been modified to contain the separated identifiers.
1653  *      separator: the separator punctuation expected between identifiers
1654  *                         (typically '.' or ',').      Whitespace may also appear around
1655  *                         identifiers.
1656  * Outputs:
1657  *      namelist: filled with a palloc'd list of pointers to identifiers within
1658  *                        rawstring.  Caller should freeList() this even on error return.
1659  *
1660  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1661  *
1662  * Note that an empty string is considered okay here, though not in
1663  * textToQualifiedNameList.
1664  */
1665 bool
1666 SplitIdentifierString(char *rawstring, char separator,
1667                                           List **namelist)
1668 {
1669         char       *nextp = rawstring;
1670         bool            done = false;
1671
1672         *namelist = NIL;
1673
1674         while (isspace((unsigned char) *nextp))
1675                 nextp++;                                /* skip leading whitespace */
1676
1677         if (*nextp == '\0')
1678                 return true;                    /* allow empty string */
1679
1680         /* At the top of the loop, we are at start of a new identifier. */
1681         do
1682         {
1683                 char       *curname;
1684                 char       *endp;
1685
1686                 if (*nextp == '\"')
1687                 {
1688                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1689                         curname = nextp + 1;
1690                         for (;;)
1691                         {
1692                                 endp = strchr(nextp + 1, '\"');
1693                                 if (endp == NULL)
1694                                         return false;           /* mismatched quotes */
1695                                 if (endp[1] != '\"')
1696                                         break;          /* found end of quoted name */
1697                                 /* Collapse adjacent quotes into one quote, and look again */
1698                                 memmove(endp, endp + 1, strlen(endp));
1699                                 nextp = endp;
1700                         }
1701                         /* endp now points at the terminating quote */
1702                         nextp = endp + 1;
1703                 }
1704                 else
1705                 {
1706                         /* Unquoted name --- extends to separator or whitespace */
1707                         char       *downname;
1708                         int                     len;
1709
1710                         curname = nextp;
1711                         while (*nextp && *nextp != separator &&
1712                                    !isspace((unsigned char) *nextp))
1713                                 nextp++;
1714                         endp = nextp;
1715                         if (curname == nextp)
1716                                 return false;   /* empty unquoted name not allowed */
1717
1718                         /*
1719                          * Downcase the identifier, using same code as main lexer
1720                          * does.
1721                          *
1722                          * XXX because we want to overwrite the input in-place, we cannot
1723                          * support a downcasing transformation that increases the
1724                          * string length.  This is not a problem given the current
1725                          * implementation of downcase_truncate_identifier, but we'll
1726                          * probably have to do something about this someday.
1727                          */
1728                         len = endp - curname;
1729                         downname = downcase_truncate_identifier(curname, len, false);
1730                         Assert(strlen(downname) <= len);
1731                         strncpy(curname, downname, len);
1732                         pfree(downname);
1733                 }
1734
1735                 while (isspace((unsigned char) *nextp))
1736                         nextp++;                        /* skip trailing whitespace */
1737
1738                 if (*nextp == separator)
1739                 {
1740                         nextp++;
1741                         while (isspace((unsigned char) *nextp))
1742                                 nextp++;                /* skip leading whitespace for next */
1743                         /* we expect another name, so done remains false */
1744                 }
1745                 else if (*nextp == '\0')
1746                         done = true;
1747                 else
1748                         return false;           /* invalid syntax */
1749
1750                 /* Now safe to overwrite separator with a null */
1751                 *endp = '\0';
1752
1753                 /* Truncate name if it's overlength */
1754                 truncate_identifier(curname, strlen(curname), false);
1755
1756                 /*
1757                  * Finished isolating current name --- add it to list
1758                  */
1759                 *namelist = lappend(*namelist, curname);
1760
1761                 /* Loop back if we didn't reach end of string */
1762         } while (!done);
1763
1764         return true;
1765 }
1766
1767
1768 /*****************************************************************************
1769  *      Comparison Functions used for bytea
1770  *
1771  * Note: btree indexes need these routines not to leak memory; therefore,
1772  * be careful to free working copies of toasted datums.  Most places don't
1773  * need to be so careful.
1774  *****************************************************************************/
1775
1776 Datum
1777 byteaeq(PG_FUNCTION_ARGS)
1778 {
1779         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1780         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1781         int                     len1,
1782                                 len2;
1783         bool            result;
1784
1785         len1 = VARSIZE(arg1) - VARHDRSZ;
1786         len2 = VARSIZE(arg2) - VARHDRSZ;
1787
1788         /* fast path for different-length inputs */
1789         if (len1 != len2)
1790                 result = false;
1791         else
1792                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1793
1794         PG_FREE_IF_COPY(arg1, 0);
1795         PG_FREE_IF_COPY(arg2, 1);
1796
1797         PG_RETURN_BOOL(result);
1798 }
1799
1800 Datum
1801 byteane(PG_FUNCTION_ARGS)
1802 {
1803         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1804         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1805         int                     len1,
1806                                 len2;
1807         bool            result;
1808
1809         len1 = VARSIZE(arg1) - VARHDRSZ;
1810         len2 = VARSIZE(arg2) - VARHDRSZ;
1811
1812         /* fast path for different-length inputs */
1813         if (len1 != len2)
1814                 result = true;
1815         else
1816                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1817
1818         PG_FREE_IF_COPY(arg1, 0);
1819         PG_FREE_IF_COPY(arg2, 1);
1820
1821         PG_RETURN_BOOL(result);
1822 }
1823
1824 Datum
1825 bytealt(PG_FUNCTION_ARGS)
1826 {
1827         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1828         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1829         int                     len1,
1830                                 len2;
1831         int                     cmp;
1832
1833         len1 = VARSIZE(arg1) - VARHDRSZ;
1834         len2 = VARSIZE(arg2) - VARHDRSZ;
1835
1836         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1837
1838         PG_FREE_IF_COPY(arg1, 0);
1839         PG_FREE_IF_COPY(arg2, 1);
1840
1841         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1842 }
1843
1844 Datum
1845 byteale(PG_FUNCTION_ARGS)
1846 {
1847         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1848         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1849         int                     len1,
1850                                 len2;
1851         int                     cmp;
1852
1853         len1 = VARSIZE(arg1) - VARHDRSZ;
1854         len2 = VARSIZE(arg2) - VARHDRSZ;
1855
1856         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1857
1858         PG_FREE_IF_COPY(arg1, 0);
1859         PG_FREE_IF_COPY(arg2, 1);
1860
1861         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1862 }
1863
1864 Datum
1865 byteagt(PG_FUNCTION_ARGS)
1866 {
1867         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1868         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1869         int                     len1,
1870                                 len2;
1871         int                     cmp;
1872
1873         len1 = VARSIZE(arg1) - VARHDRSZ;
1874         len2 = VARSIZE(arg2) - VARHDRSZ;
1875
1876         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1877
1878         PG_FREE_IF_COPY(arg1, 0);
1879         PG_FREE_IF_COPY(arg2, 1);
1880
1881         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1882 }
1883
1884 Datum
1885 byteage(PG_FUNCTION_ARGS)
1886 {
1887         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1888         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1889         int                     len1,
1890                                 len2;
1891         int                     cmp;
1892
1893         len1 = VARSIZE(arg1) - VARHDRSZ;
1894         len2 = VARSIZE(arg2) - VARHDRSZ;
1895
1896         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1897
1898         PG_FREE_IF_COPY(arg1, 0);
1899         PG_FREE_IF_COPY(arg2, 1);
1900
1901         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1902 }
1903
1904 Datum
1905 byteacmp(PG_FUNCTION_ARGS)
1906 {
1907         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1908         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1909         int                     len1,
1910                                 len2;
1911         int                     cmp;
1912
1913         len1 = VARSIZE(arg1) - VARHDRSZ;
1914         len2 = VARSIZE(arg2) - VARHDRSZ;
1915
1916         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1917         if ((cmp == 0) && (len1 != len2))
1918                 cmp = (len1 < len2) ? -1 : 1;
1919
1920         PG_FREE_IF_COPY(arg1, 0);
1921         PG_FREE_IF_COPY(arg2, 1);
1922
1923         PG_RETURN_INT32(cmp);
1924 }
1925
1926 /*
1927  * appendStringInfoText
1928  *
1929  * Append a text to str.
1930  * Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
1931  */
1932 static void
1933 appendStringInfoText(StringInfo str, const text *t)
1934 {
1935         appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
1936 }
1937
1938 /*
1939  * replace_text
1940  * replace all occurrences of 'old_sub_str' in 'orig_str'
1941  * with 'new_sub_str' to form 'new_str'
1942  *
1943  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1944  * otherwise returns 'new_str'
1945  */
1946 Datum
1947 replace_text(PG_FUNCTION_ARGS)
1948 {
1949         text       *src_text = PG_GETARG_TEXT_P(0);
1950         text       *from_sub_text = PG_GETARG_TEXT_P(1);
1951         text       *to_sub_text = PG_GETARG_TEXT_P(2);
1952         int                     src_text_len = TEXTLEN(src_text);
1953         int                     from_sub_text_len = TEXTLEN(from_sub_text);
1954         text       *left_text;
1955         text       *right_text;
1956         text       *buf_text;
1957         text       *ret_text;
1958         int                     curr_posn;
1959         StringInfo      str;
1960
1961         if (src_text_len == 0 || from_sub_text_len == 0)
1962                 PG_RETURN_TEXT_P(src_text);
1963
1964         curr_posn = TEXTPOS(src_text, from_sub_text);
1965
1966         /* When the from_sub_text is not found, there is nothing to do. */
1967         if (curr_posn == 0)
1968                 PG_RETURN_TEXT_P(src_text);
1969
1970         str = makeStringInfo();
1971         buf_text = src_text;
1972
1973         while (curr_posn > 0)
1974         {
1975                 left_text = text_substring(PointerGetDatum(buf_text),
1976                                                                    1, curr_posn - 1, false);
1977                 right_text = text_substring(PointerGetDatum(buf_text),
1978                                                                         curr_posn + from_sub_text_len, -1, true);
1979
1980                 appendStringInfoText(str, left_text);
1981                 appendStringInfoText(str, to_sub_text);
1982
1983                 if (buf_text != src_text)
1984                         pfree(buf_text);
1985                 pfree(left_text);
1986                 buf_text = right_text;
1987                 curr_posn = TEXTPOS(buf_text, from_sub_text);
1988         }
1989
1990         appendStringInfoText(str, buf_text);
1991         if (buf_text != src_text)
1992                 pfree(buf_text);
1993
1994         ret_text = PG_STR_GET_TEXT(str->data);
1995         pfree(str->data);
1996         pfree(str);
1997
1998         PG_RETURN_TEXT_P(ret_text);
1999 }
2000
2001 /*
2002  * check_replace_text_has_escape_char
2003  * check whether replace_text has escape char.
2004  */
2005 static bool
2006 check_replace_text_has_escape_char(const text *replace_text)
2007 {
2008         const char *p = VARDATA(replace_text);
2009         const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2010
2011         if (pg_database_encoding_max_length() == 1)
2012         {
2013                 for (; p < p_end; p++)
2014                         if (*p == '\\') return true;
2015         }
2016         else
2017         {
2018                 for (; p < p_end; p += pg_mblen(p))
2019                         if (*p == '\\') return true;
2020         }
2021
2022         return false;
2023 }
2024
2025 /*
2026  * appendStringInfoRegexpSubstr
2027  * append string by using back references of regexp.
2028  */
2029 static void
2030 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2031     regmatch_t *pmatch, text *src_text)
2032 {
2033         const char *p = VARDATA(replace_text);
2034         const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2035
2036         int                     eml = pg_database_encoding_max_length();
2037
2038         int                     substr_start = 1;
2039         int                     ch_cnt;
2040
2041         int                     so;
2042         int                     eo;
2043
2044         while (1)
2045         {
2046                 /* Find escape char. */
2047                 ch_cnt = 0;
2048                 if (eml == 1)
2049                 {
2050                         for (; p < p_end && *p != '\\'; p++)
2051                                 ch_cnt++;
2052                 }
2053                 else
2054                 {
2055                         for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2056                                 ch_cnt++;
2057                 }
2058
2059                 /*
2060                  * Copy the text when there is a text in the left of escape char
2061                  * or escape char is not found.
2062                  */
2063                 if (ch_cnt)
2064                 {
2065                         text *append_text = text_substring(PointerGetDatum(replace_text),
2066                                                                           substr_start, ch_cnt, false);
2067                         appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
2068                         pfree(append_text);
2069                 }
2070                 substr_start += ch_cnt + 1;
2071
2072                 if (p >= p_end) /* When escape char is not found. */
2073                         break;
2074
2075                 /* See the next character of escape char. */
2076                 p++;
2077                 so = eo = -1;
2078
2079                 if (*p >= '1' && *p <= '9')
2080                 {
2081                         /* Use the back reference of regexp. */
2082                         int             idx = *p - '0';
2083                         so = pmatch[idx].rm_so;
2084                         eo = pmatch[idx].rm_eo;
2085                         p++;
2086                         substr_start++;
2087                 }
2088                 else if (*p == '&')
2089                 {
2090                         /* Use the entire matched string. */
2091                         so = pmatch[0].rm_so;
2092                         eo = pmatch[0].rm_eo;
2093                         p++;
2094                         substr_start++;
2095                 }
2096
2097                 if (so != -1 && eo != -1)
2098                 {
2099                         /* Copy the text that is back reference of regexp. */
2100                         text *append_text = text_substring(PointerGetDatum(src_text),
2101                                                                           so + 1, (eo - so), false);
2102                         appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
2103                         pfree(append_text);
2104                 }
2105         }
2106 }
2107
2108 #define REGEXP_REPLACE_BACKREF_CNT              10
2109
2110 /*
2111  * replace_text_regexp
2112  * replace text that matches to regexp in src_text to replace_text.
2113  */
2114 Datum
2115 replace_text_regexp(PG_FUNCTION_ARGS)
2116 {
2117         text       *ret_text;
2118         text       *src_text = PG_GETARG_TEXT_P(0);
2119         int                     src_text_len = VARSIZE(src_text) - VARHDRSZ;
2120         regex_t    *re = (regex_t *)PG_GETARG_POINTER(1);
2121         text       *replace_text = PG_GETARG_TEXT_P(2);
2122         bool            global = PG_GETARG_BOOL(3);
2123         StringInfo      str = makeStringInfo();
2124         int                     regexec_result;
2125         regmatch_t      pmatch[REGEXP_REPLACE_BACKREF_CNT];
2126         pg_wchar   *data;
2127         size_t          data_len;
2128         int                     search_start;
2129         int                     data_pos;
2130         bool            have_escape;
2131
2132         /* Convert data string to wide characters. */
2133         data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2134         data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
2135
2136         /* Check whether replace_text has escape char. */
2137         have_escape = check_replace_text_has_escape_char(replace_text);
2138
2139         for (search_start = data_pos = 0; search_start <= data_len;)
2140         {
2141                 regexec_result = pg_regexec(re,
2142                                                                         data,
2143                                                                         data_len,
2144                                                                         search_start,
2145                                                                         NULL,   /* no details */
2146                                                                         REGEXP_REPLACE_BACKREF_CNT,
2147                                                                         pmatch,
2148                                                                         0);
2149
2150                 if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
2151                 {
2152                         char    errMsg[100];
2153
2154                         /* re failed??? */
2155                         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2156                         ereport(ERROR,
2157                                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2158                                  errmsg("regular expression failed: %s", errMsg)));
2159                 }
2160
2161                 if (regexec_result == REG_NOMATCH)
2162                         break;
2163
2164         /*
2165          * Copy the text when there is a text in the left of matched position.
2166          */
2167                 if (pmatch[0].rm_so - data_pos > 0)
2168                 {
2169                         text *left_text = text_substring(PointerGetDatum(src_text),
2170                                                                            data_pos + 1,
2171                                                                            pmatch[0].rm_so - data_pos, false);
2172                         appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
2173                         pfree(left_text);
2174                 }
2175
2176                 /*
2177                  * Copy the replace_text. Process back references when the
2178                  * replace_text has escape characters.
2179                  */
2180                 if (have_escape)
2181                         appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
2182                 else
2183                         appendStringInfoString(str, PG_TEXT_GET_STR(replace_text));
2184
2185                 search_start = data_pos = pmatch[0].rm_eo;
2186
2187                 /*
2188                  * When global option is off, replace the first instance only.
2189                  */
2190                 if (!global)
2191                         break;
2192
2193                 /*
2194                  * Search from next character when the matching text is zero width.
2195                  */
2196                 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2197                         search_start++;
2198         }
2199
2200         /*
2201      * Copy the text when there is a text at the right of last matched
2202          * or regexp is not matched.
2203          */
2204         if (data_pos < data_len)
2205         {
2206                 text *right_text = text_substring(PointerGetDatum(src_text),
2207                                                                    data_pos + 1, -1, true);
2208                 appendStringInfoString(str, PG_TEXT_GET_STR(right_text));
2209                 pfree(right_text);
2210         }
2211
2212         ret_text = PG_STR_GET_TEXT(str->data);
2213         pfree(str->data);
2214         pfree(str);
2215         pfree(data);
2216
2217         PG_RETURN_TEXT_P(ret_text);
2218 }
2219
2220 /*
2221  * split_text
2222  * parse input string
2223  * return ord item (1 based)
2224  * based on provided field separator
2225  */
2226 Datum
2227 split_text(PG_FUNCTION_ARGS)
2228 {
2229         text       *inputstring = PG_GETARG_TEXT_P(0);
2230         text       *fldsep = PG_GETARG_TEXT_P(1);
2231         int                     fldnum = PG_GETARG_INT32(2);
2232         int                     inputstring_len = TEXTLEN(inputstring);
2233         int                     fldsep_len = TEXTLEN(fldsep);
2234         int                     start_posn;
2235         int                     end_posn;
2236         text       *result_text;
2237
2238         /* field number is 1 based */
2239         if (fldnum < 1)
2240                 ereport(ERROR,
2241                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2242                                  errmsg("field position must be greater than zero")));
2243
2244         /* return empty string for empty input string */
2245         if (inputstring_len < 1)
2246                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2247
2248         /* empty field separator */
2249         if (fldsep_len < 1)
2250         {
2251                 /* if first field, return input string, else empty string */
2252                 if (fldnum == 1)
2253                         PG_RETURN_TEXT_P(inputstring);
2254                 else
2255                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2256         }
2257
2258         start_posn = text_position(inputstring, fldsep, fldnum - 1);
2259         end_posn = text_position(inputstring, fldsep, fldnum);
2260
2261         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
2262         {
2263                 /* if first field, return input string, else empty string */
2264                 if (fldnum == 1)
2265                         PG_RETURN_TEXT_P(inputstring);
2266                 else
2267                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2268         }
2269         else if (start_posn == 0)
2270         {
2271                 /* first field requested */
2272                 result_text = LEFT(inputstring, fldsep);
2273                 PG_RETURN_TEXT_P(result_text);
2274         }
2275         else if (end_posn == 0)
2276         {
2277                 /* last field requested */
2278                 result_text = text_substring(PointerGetDatum(inputstring),
2279                                                                          start_posn + fldsep_len,
2280                                                                          -1, true);
2281                 PG_RETURN_TEXT_P(result_text);
2282         }
2283         else
2284         {
2285                 /* interior field requested */
2286                 result_text = text_substring(PointerGetDatum(inputstring),
2287                                                                          start_posn + fldsep_len,
2288                                                                          end_posn - start_posn - fldsep_len,
2289                                                                          false);
2290                 PG_RETURN_TEXT_P(result_text);
2291         }
2292 }
2293
2294 /*
2295  * text_to_array
2296  * parse input string
2297  * return text array of elements
2298  * based on provided field separator
2299  */
2300 Datum
2301 text_to_array(PG_FUNCTION_ARGS)
2302 {
2303         text       *inputstring = PG_GETARG_TEXT_P(0);
2304         text       *fldsep = PG_GETARG_TEXT_P(1);
2305         int                     inputstring_len = TEXTLEN(inputstring);
2306         int                     fldsep_len = TEXTLEN(fldsep);
2307         int                     fldnum;
2308         int                     start_posn;
2309         int                     end_posn;
2310         text       *result_text;
2311         ArrayBuildState *astate = NULL;
2312
2313         /* return NULL for empty input string */
2314         if (inputstring_len < 1)
2315                 PG_RETURN_NULL();
2316
2317         /*
2318          * empty field separator return one element, 1D, array using the input
2319          * string
2320          */
2321         if (fldsep_len < 1)
2322                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2323                                                                            CStringGetDatum(inputstring), 1));
2324
2325         /* start with end position holding the initial start position */
2326         end_posn = 0;
2327         for (fldnum = 1;; fldnum++) /* field number is 1 based */
2328         {
2329                 Datum           dvalue;
2330                 bool            disnull = false;
2331
2332                 start_posn = end_posn;
2333                 end_posn = text_position(inputstring, fldsep, fldnum);
2334
2335                 if ((start_posn == 0) && (end_posn == 0))               /* fldsep not found */
2336                 {
2337                         if (fldnum == 1)
2338                         {
2339                                 /*
2340                                  * first element return one element, 1D, array using the
2341                                  * input string
2342                                  */
2343                                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2344                                                                            CStringGetDatum(inputstring), 1));
2345                         }
2346                         else
2347                         {
2348                                 /* otherwise create array and exit */
2349                                 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2350                                                                                                   CurrentMemoryContext));
2351                         }
2352                 }
2353                 else if (start_posn == 0)
2354                 {
2355                         /* first field requested */
2356                         result_text = LEFT(inputstring, fldsep);
2357                 }
2358                 else if (end_posn == 0)
2359                 {
2360                         /* last field requested */
2361                         result_text = text_substring(PointerGetDatum(inputstring),
2362                                                                                  start_posn + fldsep_len,
2363                                                                                  -1, true);
2364                 }
2365                 else
2366                 {
2367                         /* interior field requested */
2368                         result_text = text_substring(PointerGetDatum(inputstring),
2369                                                                                  start_posn + fldsep_len,
2370                                                                           end_posn - start_posn - fldsep_len,
2371                                                                                  false);
2372                 }
2373
2374                 /* stash away current value */
2375                 dvalue = PointerGetDatum(result_text);
2376                 astate = accumArrayResult(astate, dvalue,
2377                                                                   disnull, TEXTOID,
2378                                                                   CurrentMemoryContext);
2379         }
2380
2381         /* never reached -- keep compiler quiet */
2382         PG_RETURN_NULL();
2383 }
2384
2385 /*
2386  * array_to_text
2387  * concatenate Cstring representation of input array elements
2388  * using provided field separator
2389  */
2390 Datum
2391 array_to_text(PG_FUNCTION_ARGS)
2392 {
2393         ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
2394         char       *fldsep = PG_TEXTARG_GET_STR(1);
2395         int                     nitems,
2396                            *dims,
2397                                 ndims;
2398         char       *p;
2399         Oid                     element_type;
2400         int                     typlen;
2401         bool            typbyval;
2402         char            typalign;
2403         StringInfo      result_str = makeStringInfo();
2404         int                     i;
2405         ArrayMetaState *my_extra;
2406
2407         p = ARR_DATA_PTR(v);
2408         ndims = ARR_NDIM(v);
2409         dims = ARR_DIMS(v);
2410         nitems = ArrayGetNItems(ndims, dims);
2411
2412         /* if there are no elements, return an empty string */
2413         if (nitems == 0)
2414                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2415
2416         element_type = ARR_ELEMTYPE(v);
2417
2418         /*
2419          * We arrange to look up info about element type, including its output
2420          * conversion proc, only once per series of calls, assuming the
2421          * element type doesn't change underneath us.
2422          */
2423         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2424         if (my_extra == NULL)
2425         {
2426                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2427                                                                                                  sizeof(ArrayMetaState));
2428                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2429                 my_extra->element_type = InvalidOid;
2430         }
2431
2432         if (my_extra->element_type != element_type)
2433         {
2434                 /*
2435                  * Get info about element type, including its output conversion
2436                  * proc
2437                  */
2438                 get_type_io_data(element_type, IOFunc_output,
2439                                                  &my_extra->typlen, &my_extra->typbyval,
2440                                                  &my_extra->typalign, &my_extra->typdelim,
2441                                                  &my_extra->typioparam, &my_extra->typiofunc);
2442                 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2443                                           fcinfo->flinfo->fn_mcxt);
2444                 my_extra->element_type = element_type;
2445         }
2446         typlen = my_extra->typlen;
2447         typbyval = my_extra->typbyval;
2448         typalign = my_extra->typalign;
2449
2450         for (i = 0; i < nitems; i++)
2451         {
2452                 Datum           itemvalue;
2453                 char       *value;
2454
2455                 itemvalue = fetch_att(p, typbyval, typlen);
2456
2457                 value = DatumGetCString(FunctionCall1(&my_extra->proc,
2458                                                                                           itemvalue));
2459
2460                 if (i > 0)
2461                         appendStringInfo(result_str, "%s%s", fldsep, value);
2462                 else
2463                         appendStringInfoString(result_str, value);
2464
2465                 p = att_addlength(p, typlen, PointerGetDatum(p));
2466                 p = (char *) att_align(p, typalign);
2467         }
2468
2469         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2470 }
2471
2472 #define HEXBASE 16
2473 /*
2474  * Convert a int32 to a string containing a base 16 (hex) representation of
2475  * the number.
2476  */
2477 Datum
2478 to_hex32(PG_FUNCTION_ARGS)
2479 {
2480         uint32          value = (uint32) PG_GETARG_INT32(0);
2481         text       *result_text;
2482         char       *ptr;
2483         const char *digits = "0123456789abcdef";
2484         char            buf[32];                /* bigger than needed, but reasonable */
2485
2486         ptr = buf + sizeof(buf) - 1;
2487         *ptr = '\0';
2488
2489         do
2490         {
2491                 *--ptr = digits[value % HEXBASE];
2492                 value /= HEXBASE;
2493         } while (ptr > buf && value);
2494
2495         result_text = PG_STR_GET_TEXT(ptr);
2496         PG_RETURN_TEXT_P(result_text);
2497 }
2498
2499 /*
2500  * Convert a int64 to a string containing a base 16 (hex) representation of
2501  * the number.
2502  */
2503 Datum
2504 to_hex64(PG_FUNCTION_ARGS)
2505 {
2506         uint64          value = (uint64) PG_GETARG_INT64(0);
2507         text       *result_text;
2508         char       *ptr;
2509         const char *digits = "0123456789abcdef";
2510         char            buf[32];                /* bigger than needed, but reasonable */
2511
2512         ptr = buf + sizeof(buf) - 1;
2513         *ptr = '\0';
2514
2515         do
2516         {
2517                 *--ptr = digits[value % HEXBASE];
2518                 value /= HEXBASE;
2519         } while (ptr > buf && value);
2520
2521         result_text = PG_STR_GET_TEXT(ptr);
2522         PG_RETURN_TEXT_P(result_text);
2523 }
2524
2525 /*
2526  * Create an md5 hash of a text string and return it as hex
2527  *
2528  * md5 produces a 16 byte (128 bit) hash; double it for hex
2529  */
2530 #define MD5_HASH_LEN  32
2531
2532 Datum
2533 md5_text(PG_FUNCTION_ARGS)
2534 {
2535         text       *in_text = PG_GETARG_TEXT_P(0);
2536         size_t          len;
2537         char        hexsum[MD5_HASH_LEN + 1];
2538         text       *result_text;
2539
2540         /* Calculate the length of the buffer using varlena metadata */
2541         len = VARSIZE(in_text) - VARHDRSZ;
2542
2543         /* get the hash result */
2544         if (md5_hash(VARDATA(in_text), len, hexsum) == false)
2545                 ereport(ERROR,
2546                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2547                                  errmsg("out of memory")));
2548
2549         /* convert to text and return it */
2550         result_text = PG_STR_GET_TEXT(hexsum);
2551         PG_RETURN_TEXT_P(result_text);
2552 }
2553
2554 /*
2555  * Create an md5 hash of a bytea field and return it as a hex string:
2556  * 16-byte md5 digest is represented in 32 hex characters.
2557  */
2558 Datum
2559 md5_bytea(PG_FUNCTION_ARGS)
2560 {
2561         bytea      *in = PG_GETARG_BYTEA_P(0);
2562         size_t          len;
2563         char            hexsum[MD5_HASH_LEN + 1];
2564         text       *result_text;
2565
2566         len = VARSIZE(in) - VARHDRSZ;
2567         if (md5_hash(VARDATA(in), len, hexsum) == false)
2568                 ereport(ERROR,
2569                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2570                                  errmsg("out of memory")));
2571
2572         result_text = PG_STR_GET_TEXT(hexsum);
2573         PG_RETURN_TEXT_P(result_text);
2574 }
2575
2576 /*
2577  * Return the length of a datum, possibly compressed
2578  */
2579 Datum
2580 pg_column_size(PG_FUNCTION_ARGS)
2581 {
2582         Datum                   value = PG_GETARG_DATUM(0);
2583         int                             result;
2584
2585         /*      fn_extra stores the fixed column length, or -1 for varlena. */
2586         if (fcinfo->flinfo->fn_extra == NULL)   /* first call? */
2587         {
2588                 /* On the first call lookup the datatype of the supplied argument */
2589                 Oid                             argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
2590                 int                             typlen    = get_typlen(argtypeid);
2591
2592
2593                 if (typlen == 0)
2594                 {
2595                         /* Oid not in pg_type, should never happen. */
2596                         elog(ERROR, "cache lookup failed for type %u", argtypeid);
2597                 }
2598
2599                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2600                                                                                                           sizeof(int));
2601                 *(int *)fcinfo->flinfo->fn_extra = typlen;
2602         }
2603
2604         if (*(int *)fcinfo->flinfo->fn_extra != -1)
2605                 PG_RETURN_INT32(*(int *)fcinfo->flinfo->fn_extra);
2606         else
2607         {
2608                 result = toast_datum_size(value) - VARHDRSZ;
2609                 PG_RETURN_INT32(result);
2610         }
2611 }