granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.102 2003/07/27 04:53:10 tgl Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "mb/pg_wchar.h"
  20 #include "miscadmin.h"
  21 #include "access/tuptoaster.h"
  22 #include "catalog/pg_type.h"
  23 #include "lib/stringinfo.h"
  24 #include "libpq/crypt.h"
  25 #include "libpq/pqformat.h"
  26 #include "utils/array.h"
  27 #include "utils/builtins.h"
  28 #include "utils/pg_locale.h"
  29 #include "utils/lsyscache.h"
  30
  31
  32 typedef struct varlena unknown;
  33
  34 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
  35 #define DatumGetUnknownPCopy(X)         ((unknown *) PG_DETOAST_DATUM_COPY(X))
  36 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
  37 #define PG_GETARG_UNKNOWN_P_COPY(n)     DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
  38 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
  39
  40 #define PG_TEXTARG_GET_STR(arg_) \
  41         DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
  42 #define PG_TEXT_GET_STR(textp_) \
  43         DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
  44 #define PG_STR_GET_TEXT(str_) \
  45         DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
  46 #define TEXTLEN(textp) \
  47         text_length(PointerGetDatum(textp))
  48 #define TEXTPOS(buf_text, from_sub_text) \
  49         text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
  50 #define TEXTDUP(textp) \
  51         DatumGetTextPCopy(PointerGetDatum(textp))
  52 #define LEFT(buf_text, from_sub_text) \
  53         text_substring(PointerGetDatum(buf_text), \
  54                                         1, \
  55                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
  56 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
  57         text_substring(PointerGetDatum(buf_text), \
  58                                         TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
  59                                         -1, true)
  60
  61 static int      text_cmp(text *arg1, text *arg2);
  62 static int32 text_length(Datum str);
  63 static int32 text_position(Datum str, Datum search_str, int matchnum);
  64 static text *text_substring(Datum str,
  65                            int32 start,
  66                            int32 length,
  67                            bool length_not_specified);
  68
  69
  70 /*****************************************************************************
  71  *       USER I/O ROUTINES                                                                                                               *
  72  *****************************************************************************/
  73
  74
  75 #define VAL(CH)                 ((CH) - '0')
  76 #define DIG(VAL)                ((VAL) + '0')
  77
  78 /*
  79  *              byteain                 - converts from printable representation of byte array
  80  *
  81  *              Non-printable characters must be passed as '\nnn' (octal) and are
  82  *              converted to internal form.  '\' must be passed as '\\'.
  83  *              ereport(ERROR, ...) if bad form.
  84  *
  85  *              BUGS:
  86  *                              The input is scaned twice.
  87  *                              The error checking of input is minimal.
  88  */
  89 Datum
  90 byteain(PG_FUNCTION_ARGS)
  91 {
  92         char       *inputText = PG_GETARG_CSTRING(0);
  93         char       *tp;
  94         char       *rp;
  95         int                     byte;
  96         bytea      *result;
  97
  98         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
  99         {
 100                 if (tp[0] != '\\')
 101                         tp++;
 102                 else if ((tp[0] == '\\') &&
 103                                  (tp[1] >= '0' && tp[1] <= '3') &&
 104                                  (tp[2] >= '0' && tp[2] <= '7') &&
 105                                  (tp[3] >= '0' && tp[3] <= '7'))
 106                         tp += 4;
 107                 else if ((tp[0] == '\\') &&
 108                                  (tp[1] == '\\'))
 109                         tp += 2;
 110                 else
 111                 {
 112                         /*
 113                          * one backslash, not followed by 0 or ### valid octal
 114                          */
 115                         ereport(ERROR,
 116                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 117                                          errmsg("invalid input syntax for bytea")));
 118                 }
 119         }
 120
 121         byte += VARHDRSZ;
 122         result = (bytea *) palloc(byte);
 123         VARATT_SIZEP(result) = byte;            /* set varlena length */
 124
 125         tp = inputText;
 126         rp = VARDATA(result);
 127         while (*tp != '\0')
 128         {
 129                 if (tp[0] != '\\')
 130                         *rp++ = *tp++;
 131                 else if ((tp[0] == '\\') &&
 132                                  (tp[1] >= '0' && tp[1] <= '3') &&
 133                                  (tp[2] >= '0' && tp[2] <= '7') &&
 134                                  (tp[3] >= '0' && tp[3] <= '7'))
 135                 {
 136                         byte = VAL(tp[1]);
 137                         byte <<= 3;
 138                         byte += VAL(tp[2]);
 139                         byte <<= 3;
 140                         *rp++ = byte + VAL(tp[3]);
 141                         tp += 4;
 142                 }
 143                 else if ((tp[0] == '\\') &&
 144                                  (tp[1] == '\\'))
 145                 {
 146                         *rp++ = '\\';
 147                         tp += 2;
 148                 }
 149                 else
 150                 {
 151                         /*
 152                          * We should never get here. The first pass should not allow
 153                          * it.
 154                          */
 155                         ereport(ERROR,
 156                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 157                                          errmsg("invalid input syntax for bytea")));
 158                 }
 159         }
 160
 161         PG_RETURN_BYTEA_P(result);
 162 }
 163
 164 /*
 165  *              byteaout                - converts to printable representation of byte array
 166  *
 167  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 168  *              '\\'.
 169  *
 170  *              NULL vlena should be an error--returning string with NULL for now.
 171  */
 172 Datum
 173 byteaout(PG_FUNCTION_ARGS)
 174 {
 175         bytea      *vlena = PG_GETARG_BYTEA_P(0);
 176         char       *result;
 177         char       *vp;
 178         char       *rp;
 179         int                     val;                    /* holds unprintable chars */
 180         int                     i;
 181         int                     len;
 182
 183         len = 1;                                        /* empty string has 1 char */
 184         vp = VARDATA(vlena);
 185         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 186         {
 187                 if (*vp == '\\')
 188                         len += 2;
 189                 else if (isprint((unsigned char) *vp))
 190                         len++;
 191                 else
 192                         len += 4;
 193         }
 194         rp = result = (char *) palloc(len);
 195         vp = VARDATA(vlena);
 196         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 197         {
 198                 if (*vp == '\\')
 199                 {
 200                         *rp++ = '\\';
 201                         *rp++ = '\\';
 202                 }
 203                 else if (isprint((unsigned char) *vp))
 204                         *rp++ = *vp;
 205                 else
 206                 {
 207                         val = *vp;
 208                         rp[0] = '\\';
 209                         rp[3] = DIG(val & 07);
 210                         val >>= 3;
 211                         rp[2] = DIG(val & 07);
 212                         val >>= 3;
 213                         rp[1] = DIG(val & 03);
 214                         rp += 4;
 215                 }
 216         }
 217         *rp = '\0';
 218         PG_RETURN_CSTRING(result);
 219 }
 220
 221 /*
 222  *              bytearecv                       - converts external binary format to bytea
 223  */
 224 Datum
 225 bytearecv(PG_FUNCTION_ARGS)
 226 {
 227         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 228         bytea      *result;
 229         int                     nbytes;
 230
 231         nbytes = buf->len - buf->cursor;
 232         result = (bytea *) palloc(nbytes + VARHDRSZ);
 233         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 234         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 235         PG_RETURN_BYTEA_P(result);
 236 }
 237
 238 /*
 239  *              byteasend                       - converts bytea to binary format
 240  *
 241  * This is a special case: just copy the input...
 242  */
 243 Datum
 244 byteasend(PG_FUNCTION_ARGS)
 245 {
 246         bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
 247
 248         PG_RETURN_BYTEA_P(vlena);
 249 }
 250
 251
 252 /*
 253  *              textin                  - converts "..." to internal representation
 254  */
 255 Datum
 256 textin(PG_FUNCTION_ARGS)
 257 {
 258         char       *inputText = PG_GETARG_CSTRING(0);
 259         text       *result;
 260         int                     len;
 261
 262         /* verify encoding */
 263         len = strlen(inputText);
 264         pg_verifymbstr(inputText, len, false);
 265
 266         result = (text *) palloc(len + VARHDRSZ);
 267         VARATT_SIZEP(result) = len + VARHDRSZ;
 268
 269         memcpy(VARDATA(result), inputText, len);
 270
 271 #ifdef CYR_RECODE
 272         convertstr(VARDATA(result), len, 0);
 273 #endif
 274
 275         PG_RETURN_TEXT_P(result);
 276 }
 277
 278 /*
 279  *              textout                 - converts internal representation to "..."
 280  */
 281 Datum
 282 textout(PG_FUNCTION_ARGS)
 283 {
 284         text       *t = PG_GETARG_TEXT_P(0);
 285         int                     len;
 286         char       *result;
 287
 288         len = VARSIZE(t) - VARHDRSZ;
 289         result = (char *) palloc(len + 1);
 290         memcpy(result, VARDATA(t), len);
 291         result[len] = '\0';
 292
 293 #ifdef CYR_RECODE
 294         convertstr(result, len, 1);
 295 #endif
 296
 297         PG_RETURN_CSTRING(result);
 298 }
 299
 300 /*
 301  *              textrecv                        - converts external binary format to text
 302  */
 303 Datum
 304 textrecv(PG_FUNCTION_ARGS)
 305 {
 306         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 307         text       *result;
 308         char       *str;
 309         int                     nbytes;
 310
 311         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 312         result = (text *) palloc(nbytes + VARHDRSZ);
 313         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 314         memcpy(VARDATA(result), str, nbytes);
 315         pfree(str);
 316         PG_RETURN_TEXT_P(result);
 317 }
 318
 319 /*
 320  *              textsend                        - converts text to binary format
 321  */
 322 Datum
 323 textsend(PG_FUNCTION_ARGS)
 324 {
 325         text       *t = PG_GETARG_TEXT_P(0);
 326         StringInfoData buf;
 327
 328         pq_begintypsend(&buf);
 329         pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
 330         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 331 }
 332
 333
 334 /*
 335  *              unknownin                       - converts "..." to internal representation
 336  */
 337 Datum
 338 unknownin(PG_FUNCTION_ARGS)
 339 {
 340         char       *inputStr = PG_GETARG_CSTRING(0);
 341         unknown    *result;
 342         int                     len;
 343
 344         len = strlen(inputStr) + VARHDRSZ;
 345
 346         result = (unknown *) palloc(len);
 347         VARATT_SIZEP(result) = len;
 348
 349         memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
 350
 351         PG_RETURN_UNKNOWN_P(result);
 352 }
 353
 354 /*
 355  *              unknownout                      - converts internal representation to "..."
 356  */
 357 Datum
 358 unknownout(PG_FUNCTION_ARGS)
 359 {
 360         unknown    *t = PG_GETARG_UNKNOWN_P(0);
 361         int                     len;
 362         char       *result;
 363
 364         len = VARSIZE(t) - VARHDRSZ;
 365         result = (char *) palloc(len + 1);
 366         memcpy(result, VARDATA(t), len);
 367         result[len] = '\0';
 368
 369         PG_RETURN_CSTRING(result);
 370 }
 371
 372 /*
 373  *              unknownrecv                     - converts external binary format to unknown
 374  */
 375 Datum
 376 unknownrecv(PG_FUNCTION_ARGS)
 377 {
 378         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 379         unknown    *result;
 380         int                     nbytes;
 381
 382         nbytes = buf->len - buf->cursor;
 383         result = (unknown *) palloc(nbytes + VARHDRSZ);
 384         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 385         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 386         PG_RETURN_UNKNOWN_P(result);
 387 }
 388
 389 /*
 390  *              unknownsend                     - converts unknown to binary format
 391  *
 392  * This is a special case: just copy the input, since it's
 393  * effectively the same format as bytea
 394  */
 395 Datum
 396 unknownsend(PG_FUNCTION_ARGS)
 397 {
 398         unknown    *vlena = PG_GETARG_UNKNOWN_P_COPY(0);
 399
 400         PG_RETURN_UNKNOWN_P(vlena);
 401 }
 402
 403
 404 /* ========== PUBLIC ROUTINES ========== */
 405
 406 /*
 407  * textlen -
 408  *        returns the logical length of a text*
 409  *         (which is less than the VARSIZE of the text*)
 410  */
 411 Datum
 412 textlen(PG_FUNCTION_ARGS)
 413 {
 414         PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
 415 }
 416
 417 /*
 418  * text_length -
 419  *      Does the real work for textlen()
 420  *      This is broken out so it can be called directly by other string processing
 421  *      functions.
 422  */
 423 static int32
 424 text_length(Datum str)
 425 {
 426         /* fastpath when max encoding length is one */
 427         if (pg_database_encoding_max_length() == 1)
 428                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 429
 430         if (pg_database_encoding_max_length() > 1)
 431         {
 432                 text       *t = DatumGetTextP(str);
 433
 434                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
 435                                                                                          VARSIZE(t) - VARHDRSZ));
 436         }
 437
 438         /* should never get here */
 439         elog(ERROR, "invalid backend encoding: encoding max length < 1");
 440
 441         /* not reached: suppress compiler warning */
 442         return 0;
 443 }
 444
 445 /*
 446  * textoctetlen -
 447  *        returns the physical length of a text*
 448  *         (which is less than the VARSIZE of the text*)
 449  */
 450 Datum
 451 textoctetlen(PG_FUNCTION_ARGS)
 452 {
 453         PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
 454 }
 455
 456 /*
 457  * textcat -
 458  *        takes two text* and returns a text* that is the concatenation of
 459  *        the two.
 460  *
 461  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 462  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 463  * Allocate space for output in all cases.
 464  * XXX - thomas 1997-07-10
 465  */
 466 Datum
 467 textcat(PG_FUNCTION_ARGS)
 468 {
 469         text       *t1 = PG_GETARG_TEXT_P(0);
 470         text       *t2 = PG_GETARG_TEXT_P(1);
 471         int                     len1,
 472                                 len2,
 473                                 len;
 474         text       *result;
 475         char       *ptr;
 476
 477         len1 = (VARSIZE(t1) - VARHDRSZ);
 478         if (len1 < 0)
 479                 len1 = 0;
 480
 481         len2 = (VARSIZE(t2) - VARHDRSZ);
 482         if (len2 < 0)
 483                 len2 = 0;
 484
 485         len = len1 + len2 + VARHDRSZ;
 486         result = (text *) palloc(len);
 487
 488         /* Set size of result string... */
 489         VARATT_SIZEP(result) = len;
 490
 491         /* Fill data field of result string... */
 492         ptr = VARDATA(result);
 493         if (len1 > 0)
 494                 memcpy(ptr, VARDATA(t1), len1);
 495         if (len2 > 0)
 496                 memcpy(ptr + len1, VARDATA(t2), len2);
 497
 498         PG_RETURN_TEXT_P(result);
 499 }
 500
 501 /*
 502  * text_substr()
 503  * Return a substring starting at the specified position.
 504  * - thomas 1997-12-31
 505  *
 506  * Input:
 507  *      - string
 508  *      - starting position (is one-based)
 509  *      - string length
 510  *
 511  * If the starting position is zero or less, then return from the start of the string
 512  *      adjusting the length to be consistent with the "negative start" per SQL92.
 513  * If the length is less than zero, return the remaining string.
 514  *
 515  * Note that the arguments operate on octet length,
 516  *      so not aware of multibyte character sets.
 517  *
 518  * Added multibyte support.
 519  * - Tatsuo Ishii 1998-4-21
 520  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 521  * Formerly returned the entire string; now returns a portion.
 522  * - Thomas Lockhart 1998-12-10
 523  * Now uses faster TOAST-slicing interface
 524  * - John Gray 2002-02-22
 525  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 526  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 527  * error; if E < 1, return '', not entire string). Fixed MB related bug when
 528  * S > LC and < LC + 4 sometimes garbage characters are returned.
 529  * - Joe Conway 2002-08-10
 530  */
 531 Datum
 532 text_substr(PG_FUNCTION_ARGS)
 533 {
 534         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 535                                                                         PG_GETARG_INT32(1),
 536                                                                         PG_GETARG_INT32(2),
 537                                                                         false));
 538 }
 539
 540 /*
 541  * text_substr_no_len -
 542  *        Wrapper to avoid opr_sanity failure due to
 543  *        one function accepting a different number of args.
 544  */
 545 Datum
 546 text_substr_no_len(PG_FUNCTION_ARGS)
 547 {
 548         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 549                                                                         PG_GETARG_INT32(1),
 550                                                                         -1, true));
 551 }
 552
 553 /*
 554  * text_substring -
 555  *      Does the real work for text_substr() and text_substr_no_len()
 556  *      This is broken out so it can be called directly by other string processing
 557  *      functions.
 558  */
 559 static text *
 560 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 561 {
 562         int32           eml = pg_database_encoding_max_length();
 563         int32           S = start;              /* start position */
 564         int32           S1;                             /* adjusted start position */
 565         int32           L1;                             /* adjusted substring length */
 566
 567         /* life is easy if the encoding max length is 1 */
 568         if (eml == 1)
 569         {
 570                 S1 = Max(S, 1);
 571
 572                 if (length_not_specified)               /* special case - get length to
 573                                                                                  * end of string */
 574                         L1 = -1;
 575                 else
 576                 {
 577                         /* end position */
 578                         int                     E = S + length;
 579
 580                         /*
 581                          * A negative value for L is the only way for the end position
 582                          * to be before the start. SQL99 says to throw an error.
 583                          */
 584                         if (E < S)
 585                                 ereport(ERROR,
 586                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 587                                                  errmsg("negative substring length not allowed")));
 588
 589                         /*
 590                          * A zero or negative value for the end position can happen if
 591                          * the start was negative or one. SQL99 says to return a
 592                          * zero-length string.
 593                          */
 594                         if (E < 1)
 595                                 return PG_STR_GET_TEXT("");
 596
 597                         L1 = E - S1;
 598                 }
 599
 600                 /*
 601                  * If the start position is past the end of the string, SQL99 says
 602                  * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
 603                  * do that for us. Convert to zero-based starting position
 604                  */
 605                 return DatumGetTextPSlice(str, S1 - 1, L1);
 606         }
 607         else if (eml > 1)
 608         {
 609                 /*
 610                  * When encoding max length is > 1, we can't get LC without
 611                  * detoasting, so we'll grab a conservatively large slice now and
 612                  * go back later to do the right thing
 613                  */
 614                 int32           slice_start;
 615                 int32           slice_size;
 616                 int32           slice_strlen;
 617                 text       *slice;
 618                 int32           E1;
 619                 int32           i;
 620                 char       *p;
 621                 char       *s;
 622                 text       *ret;
 623
 624                 /*
 625                  * if S is past the end of the string, the tuple toaster will
 626                  * return a zero-length string to us
 627                  */
 628                 S1 = Max(S, 1);
 629
 630                 /*
 631                  * We need to start at position zero because there is no way to
 632                  * know in advance which byte offset corresponds to the supplied
 633                  * start position.
 634                  */
 635                 slice_start = 0;
 636
 637                 if (length_not_specified)               /* special case - get length to
 638                                                                                  * end of string */
 639                         slice_size = L1 = -1;
 640                 else
 641                 {
 642                         int                     E = S + length;
 643
 644                         /*
 645                          * A negative value for L is the only way for the end position
 646                          * to be before the start. SQL99 says to throw an error.
 647                          */
 648                         if (E < S)
 649                                 ereport(ERROR,
 650                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 651                                                  errmsg("negative substring length not allowed")));
 652
 653                         /*
 654                          * A zero or negative value for the end position can happen if
 655                          * the start was negative or one. SQL99 says to return a
 656                          * zero-length string.
 657                          */
 658                         if (E < 1)
 659                                 return PG_STR_GET_TEXT("");
 660
 661                         /*
 662                          * if E is past the end of the string, the tuple toaster will
 663                          * truncate the length for us
 664                          */
 665                         L1 = E - S1;
 666
 667                         /*
 668                          * Total slice size in bytes can't be any longer than the
 669                          * start position plus substring length times the encoding max
 670                          * length.
 671                          */
 672                         slice_size = (S1 + L1) * eml;
 673                 }
 674                 slice = DatumGetTextPSlice(str, slice_start, slice_size);
 675
 676                 /* see if we got back an empty string */
 677                 if ((VARSIZE(slice) - VARHDRSZ) == 0)
 678                         return PG_STR_GET_TEXT("");
 679
 680                 /* Now we can get the actual length of the slice in MB characters */
 681                 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
 682
 683                 /*
 684                  * Check that the start position wasn't > slice_strlen. If so,
 685                  * SQL99 says to return a zero-length string.
 686                  */
 687                 if (S1 > slice_strlen)
 688                         return PG_STR_GET_TEXT("");
 689
 690                 /*
 691                  * Adjust L1 and E1 now that we know the slice string length.
 692                  * Again remember that S1 is one based, and slice_start is zero
 693                  * based.
 694                  */
 695                 if (L1 > -1)
 696                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 697                 else
 698                         E1 = slice_start + 1 + slice_strlen;
 699
 700                 /*
 701                  * Find the start position in the slice; remember S1 is not zero
 702                  * based
 703                  */
 704                 p = VARDATA(slice);
 705                 for (i = 0; i < S1 - 1; i++)
 706                         p += pg_mblen(p);
 707
 708                 /* hang onto a pointer to our start position */
 709                 s = p;
 710
 711                 /*
 712                  * Count the actual bytes used by the substring of the requested
 713                  * length.
 714                  */
 715                 for (i = S1; i < E1; i++)
 716                         p += pg_mblen(p);
 717
 718                 ret = (text *) palloc(VARHDRSZ + (p - s));
 719                 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
 720                 memcpy(VARDATA(ret), s, (p - s));
 721
 722                 return ret;
 723         }
 724         else
 725                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
 726
 727         /* not reached: suppress compiler warning */
 728         return PG_STR_GET_TEXT("");
 729 }
 730
 731 /*
 732  * textpos -
 733  *        Return the position of the specified substring.
 734  *        Implements the SQL92 POSITION() function.
 735  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 736  * - thomas 1997-07-27
 737  */
 738 Datum
 739 textpos(PG_FUNCTION_ARGS)
 740 {
 741         PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
 742 }
 743
 744 /*
 745  * text_position -
 746  *      Does the real work for textpos()
 747  *      This is broken out so it can be called directly by other string processing
 748  *      functions.
 749  */
 750 static int32
 751 text_position(Datum str, Datum search_str, int matchnum)
 752 {
 753         int                     eml = pg_database_encoding_max_length();
 754         text       *t1 = DatumGetTextP(str);
 755         text       *t2 = DatumGetTextP(search_str);
 756         int                     match = 0,
 757                                 pos = 0,
 758                                 p = 0,
 759                                 px,
 760                                 len1,
 761                                 len2;
 762
 763         if (matchnum == 0)
 764                 return 0;                               /* result for 0th match */
 765
 766         if (VARSIZE(t2) <= VARHDRSZ)
 767                 PG_RETURN_INT32(1);             /* result for empty pattern */
 768
 769         len1 = (VARSIZE(t1) - VARHDRSZ);
 770         len2 = (VARSIZE(t2) - VARHDRSZ);
 771
 772         /* no use in searching str past point where search_str will fit */
 773         px = (len1 - len2);
 774
 775         if (eml == 1)                           /* simple case - single byte encoding */
 776         {
 777                 char       *p1,
 778                                    *p2;
 779
 780                 p1 = VARDATA(t1);
 781                 p2 = VARDATA(t2);
 782
 783                 for (p = 0; p <= px; p++)
 784                 {
 785                         if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
 786                         {
 787                                 if (++match == matchnum)
 788                                 {
 789                                         pos = p + 1;
 790                                         break;
 791                                 }
 792                         }
 793                         p1++;
 794                 }
 795         }
 796         else if (eml > 1)                       /* not as simple - multibyte encoding */
 797         {
 798                 pg_wchar   *p1,
 799                                    *p2,
 800                                    *ps1,
 801                                    *ps2;
 802
 803                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 804                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
 805                 len1 = pg_wchar_strlen(p1);
 806                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 807                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
 808                 len2 = pg_wchar_strlen(p2);
 809
 810                 for (p = 0; p <= px; p++)
 811                 {
 812                         if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
 813                         {
 814                                 if (++match == matchnum)
 815                                 {
 816                                         pos = p + 1;
 817                                         break;
 818                                 }
 819                         }
 820                         p1++;
 821                 }
 822
 823                 pfree(ps1);
 824                 pfree(ps2);
 825         }
 826         else
 827                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
 828
 829         PG_RETURN_INT32(pos);
 830 }
 831
 832 /* varstr_cmp()
 833  * Comparison function for text strings with given lengths.
 834  * Includes locale support, but must copy strings to temporary memory
 835  *      to allow null-termination for inputs to strcoll().
 836  * Returns -1, 0 or 1
 837  */
 838 int
 839 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 840 {
 841         int                     result;
 842
 843         /*
 844          * Unfortunately, there is no strncoll(), so in the non-C locale case
 845          * we have to do some memory copying.  This turns out to be
 846          * significantly slower, so we optimize the case where LC_COLLATE is
 847          * C.  We also try to optimize relatively-short strings by avoiding
 848          * palloc/pfree overhead.
 849          */
 850 #define STACKBUFLEN             1024
 851
 852         if (!lc_collate_is_c())
 853         {
 854                 char    a1buf[STACKBUFLEN];
 855                 char    a2buf[STACKBUFLEN];
 856                 char   *a1p,
 857                            *a2p;
 858
 859                 if (len1 >= STACKBUFLEN)
 860                         a1p = (char *) palloc(len1 + 1);
 861                 else
 862                         a1p = a1buf;
 863                 if (len2 >= STACKBUFLEN)
 864                         a2p = (char *) palloc(len2 + 1);
 865                 else
 866                         a2p = a2buf;
 867
 868                 memcpy(a1p, arg1, len1);
 869                 a1p[len1] = '\0';
 870                 memcpy(a2p, arg2, len2);
 871                 a2p[len2] = '\0';
 872
 873                 result = strcoll(a1p, a2p);
 874
 875                 if (len1 >= STACKBUFLEN)
 876                         pfree(a1p);
 877                 if (len2 >= STACKBUFLEN)
 878                         pfree(a2p);
 879         }
 880         else
 881         {
 882                 result = strncmp(arg1, arg2, Min(len1, len2));
 883                 if ((result == 0) && (len1 != len2))
 884                         result = (len1 < len2) ? -1 : 1;
 885         }
 886
 887         return result;
 888 }
 889
 890
 891 /* text_cmp()
 892  * Internal comparison function for text strings.
 893  * Returns -1, 0 or 1
 894  */
 895 static int
 896 text_cmp(text *arg1, text *arg2)
 897 {
 898         char       *a1p,
 899                            *a2p;
 900         int                     len1,
 901                                 len2;
 902
 903         a1p = VARDATA(arg1);
 904         a2p = VARDATA(arg2);
 905
 906         len1 = VARSIZE(arg1) - VARHDRSZ;
 907         len2 = VARSIZE(arg2) - VARHDRSZ;
 908
 909         return varstr_cmp(a1p, len1, a2p, len2);
 910 }
 911
 912 /*
 913  * Comparison functions for text strings.
 914  *
 915  * Note: btree indexes need these routines not to leak memory; therefore,
 916  * be careful to free working copies of toasted datums.  Most places don't
 917  * need to be so careful.
 918  */
 919
 920 Datum
 921 texteq(PG_FUNCTION_ARGS)
 922 {
 923         text       *arg1 = PG_GETARG_TEXT_P(0);
 924         text       *arg2 = PG_GETARG_TEXT_P(1);
 925         bool            result;
 926
 927         /* fast path for different-length inputs */
 928         if (VARSIZE(arg1) != VARSIZE(arg2))
 929                 result = false;
 930         else
 931                 result = (text_cmp(arg1, arg2) == 0);
 932
 933         PG_FREE_IF_COPY(arg1, 0);
 934         PG_FREE_IF_COPY(arg2, 1);
 935
 936         PG_RETURN_BOOL(result);
 937 }
 938
 939 Datum
 940 textne(PG_FUNCTION_ARGS)
 941 {
 942         text       *arg1 = PG_GETARG_TEXT_P(0);
 943         text       *arg2 = PG_GETARG_TEXT_P(1);
 944         bool            result;
 945
 946         /* fast path for different-length inputs */
 947         if (VARSIZE(arg1) != VARSIZE(arg2))
 948                 result = true;
 949         else
 950                 result = (text_cmp(arg1, arg2) != 0);
 951
 952         PG_FREE_IF_COPY(arg1, 0);
 953         PG_FREE_IF_COPY(arg2, 1);
 954
 955         PG_RETURN_BOOL(result);
 956 }
 957
 958 Datum
 959 text_lt(PG_FUNCTION_ARGS)
 960 {
 961         text       *arg1 = PG_GETARG_TEXT_P(0);
 962         text       *arg2 = PG_GETARG_TEXT_P(1);
 963         bool            result;
 964
 965         result = (text_cmp(arg1, arg2) < 0);
 966
 967         PG_FREE_IF_COPY(arg1, 0);
 968         PG_FREE_IF_COPY(arg2, 1);
 969
 970         PG_RETURN_BOOL(result);
 971 }
 972
 973 Datum
 974 text_le(PG_FUNCTION_ARGS)
 975 {
 976         text       *arg1 = PG_GETARG_TEXT_P(0);
 977         text       *arg2 = PG_GETARG_TEXT_P(1);
 978         bool            result;
 979
 980         result = (text_cmp(arg1, arg2) <= 0);
 981
 982         PG_FREE_IF_COPY(arg1, 0);
 983         PG_FREE_IF_COPY(arg2, 1);
 984
 985         PG_RETURN_BOOL(result);
 986 }
 987
 988 Datum
 989 text_gt(PG_FUNCTION_ARGS)
 990 {
 991         text       *arg1 = PG_GETARG_TEXT_P(0);
 992         text       *arg2 = PG_GETARG_TEXT_P(1);
 993         bool            result;
 994
 995         result = (text_cmp(arg1, arg2) > 0);
 996
 997         PG_FREE_IF_COPY(arg1, 0);
 998         PG_FREE_IF_COPY(arg2, 1);
 999
1000         PG_RETURN_BOOL(result);
1001 }
1002
1003 Datum
1004 text_ge(PG_FUNCTION_ARGS)
1005 {
1006         text       *arg1 = PG_GETARG_TEXT_P(0);
1007         text       *arg2 = PG_GETARG_TEXT_P(1);
1008         bool            result;
1009
1010         result = (text_cmp(arg1, arg2) >= 0);
1011
1012         PG_FREE_IF_COPY(arg1, 0);
1013         PG_FREE_IF_COPY(arg2, 1);
1014
1015         PG_RETURN_BOOL(result);
1016 }
1017
1018 Datum
1019 bttextcmp(PG_FUNCTION_ARGS)
1020 {
1021         text       *arg1 = PG_GETARG_TEXT_P(0);
1022         text       *arg2 = PG_GETARG_TEXT_P(1);
1023         int32           result;
1024
1025         result = text_cmp(arg1, arg2);
1026
1027         PG_FREE_IF_COPY(arg1, 0);
1028         PG_FREE_IF_COPY(arg2, 1);
1029
1030         PG_RETURN_INT32(result);
1031 }
1032
1033
1034 Datum
1035 text_larger(PG_FUNCTION_ARGS)
1036 {
1037         text       *arg1 = PG_GETARG_TEXT_P(0);
1038         text       *arg2 = PG_GETARG_TEXT_P(1);
1039         text       *result;
1040
1041         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1042
1043         PG_RETURN_TEXT_P(result);
1044 }
1045
1046 Datum
1047 text_smaller(PG_FUNCTION_ARGS)
1048 {
1049         text       *arg1 = PG_GETARG_TEXT_P(0);
1050         text       *arg2 = PG_GETARG_TEXT_P(1);
1051         text       *result;
1052
1053         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1054
1055         PG_RETURN_TEXT_P(result);
1056 }
1057
1058
1059 /*
1060  * The following operators support character-by-character comparison
1061  * of text data types, to allow building indexes suitable for LIKE
1062  * clauses.
1063  */
1064
1065 static int
1066 internal_text_pattern_compare(text *arg1, text *arg2)
1067 {
1068         int result;
1069
1070         result = memcmp(VARDATA(arg1), VARDATA(arg2),
1071                                         Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1072         if (result != 0)
1073                 return result;
1074         else if (VARSIZE(arg1) < VARSIZE(arg2))
1075                 return -1;
1076         else if (VARSIZE(arg1) > VARSIZE(arg2))
1077                 return 1;
1078         else
1079                 return 0;
1080 }
1081
1082
1083 Datum
1084 text_pattern_lt(PG_FUNCTION_ARGS)
1085 {
1086         text       *arg1 = PG_GETARG_TEXT_P(0);
1087         text       *arg2 = PG_GETARG_TEXT_P(1);
1088         int                     result;
1089
1090         result = internal_text_pattern_compare(arg1, arg2);
1091
1092         PG_FREE_IF_COPY(arg1, 0);
1093         PG_FREE_IF_COPY(arg2, 1);
1094
1095         PG_RETURN_BOOL(result < 0);
1096 }
1097
1098
1099 Datum
1100 text_pattern_le(PG_FUNCTION_ARGS)
1101 {
1102         text       *arg1 = PG_GETARG_TEXT_P(0);
1103         text       *arg2 = PG_GETARG_TEXT_P(1);
1104         int                     result;
1105
1106         result = internal_text_pattern_compare(arg1, arg2);
1107
1108         PG_FREE_IF_COPY(arg1, 0);
1109         PG_FREE_IF_COPY(arg2, 1);
1110
1111         PG_RETURN_BOOL(result <= 0);
1112 }
1113
1114
1115 Datum
1116 text_pattern_eq(PG_FUNCTION_ARGS)
1117 {
1118         text       *arg1 = PG_GETARG_TEXT_P(0);
1119         text       *arg2 = PG_GETARG_TEXT_P(1);
1120         int                     result;
1121
1122         if (VARSIZE(arg1) != VARSIZE(arg2))
1123                 result = 1;
1124         else
1125                 result = internal_text_pattern_compare(arg1, arg2);
1126
1127         PG_FREE_IF_COPY(arg1, 0);
1128         PG_FREE_IF_COPY(arg2, 1);
1129
1130         PG_RETURN_BOOL(result == 0);
1131 }
1132
1133
1134 Datum
1135 text_pattern_ge(PG_FUNCTION_ARGS)
1136 {
1137         text       *arg1 = PG_GETARG_TEXT_P(0);
1138         text       *arg2 = PG_GETARG_TEXT_P(1);
1139         int                     result;
1140
1141         result = internal_text_pattern_compare(arg1, arg2);
1142
1143         PG_FREE_IF_COPY(arg1, 0);
1144         PG_FREE_IF_COPY(arg2, 1);
1145
1146         PG_RETURN_BOOL(result >= 0);
1147 }
1148
1149
1150 Datum
1151 text_pattern_gt(PG_FUNCTION_ARGS)
1152 {
1153         text       *arg1 = PG_GETARG_TEXT_P(0);
1154         text       *arg2 = PG_GETARG_TEXT_P(1);
1155         int                     result;
1156
1157         result = internal_text_pattern_compare(arg1, arg2);
1158
1159         PG_FREE_IF_COPY(arg1, 0);
1160         PG_FREE_IF_COPY(arg2, 1);
1161
1162         PG_RETURN_BOOL(result > 0);
1163 }
1164
1165
1166 Datum
1167 text_pattern_ne(PG_FUNCTION_ARGS)
1168 {
1169         text       *arg1 = PG_GETARG_TEXT_P(0);
1170         text       *arg2 = PG_GETARG_TEXT_P(1);
1171         int                     result;
1172
1173         if (VARSIZE(arg1) != VARSIZE(arg2))
1174                 result = 1;
1175         else
1176                 result = internal_text_pattern_compare(arg1, arg2);
1177
1178         PG_FREE_IF_COPY(arg1, 0);
1179         PG_FREE_IF_COPY(arg2, 1);
1180
1181         PG_RETURN_BOOL(result != 0);
1182 }
1183
1184
1185 Datum
1186 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1187 {
1188         text       *arg1 = PG_GETARG_TEXT_P(0);
1189         text       *arg2 = PG_GETARG_TEXT_P(1);
1190         int                     result;
1191
1192         result = internal_text_pattern_compare(arg1, arg2);
1193
1194         PG_FREE_IF_COPY(arg1, 0);
1195         PG_FREE_IF_COPY(arg2, 1);
1196
1197         PG_RETURN_INT32(result);
1198 }
1199
1200
1201 /*-------------------------------------------------------------
1202  * byteaoctetlen
1203  *
1204  * get the number of bytes contained in an instance of type 'bytea'
1205  *-------------------------------------------------------------
1206  */
1207 Datum
1208 byteaoctetlen(PG_FUNCTION_ARGS)
1209 {
1210         PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
1211 }
1212
1213 /*
1214  * byteacat -
1215  *        takes two bytea* and returns a bytea* that is the concatenation of
1216  *        the two.
1217  *
1218  * Cloned from textcat and modified as required.
1219  */
1220 Datum
1221 byteacat(PG_FUNCTION_ARGS)
1222 {
1223         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1224         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1225         int                     len1,
1226                                 len2,
1227                                 len;
1228         bytea      *result;
1229         char       *ptr;
1230
1231         len1 = (VARSIZE(t1) - VARHDRSZ);
1232         if (len1 < 0)
1233                 len1 = 0;
1234
1235         len2 = (VARSIZE(t2) - VARHDRSZ);
1236         if (len2 < 0)
1237                 len2 = 0;
1238
1239         len = len1 + len2 + VARHDRSZ;
1240         result = (bytea *) palloc(len);
1241
1242         /* Set size of result string... */
1243         VARATT_SIZEP(result) = len;
1244
1245         /* Fill data field of result string... */
1246         ptr = VARDATA(result);
1247         if (len1 > 0)
1248                 memcpy(ptr, VARDATA(t1), len1);
1249         if (len2 > 0)
1250                 memcpy(ptr + len1, VARDATA(t2), len2);
1251
1252         PG_RETURN_BYTEA_P(result);
1253 }
1254
1255 #define PG_STR_GET_BYTEA(str_) \
1256         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1257 /*
1258  * bytea_substr()
1259  * Return a substring starting at the specified position.
1260  * Cloned from text_substr and modified as required.
1261  *
1262  * Input:
1263  *      - string
1264  *      - starting position (is one-based)
1265  *      - string length (optional)
1266  *
1267  * If the starting position is zero or less, then return from the start of the string
1268  * adjusting the length to be consistent with the "negative start" per SQL92.
1269  * If the length is less than zero, an ERROR is thrown. If no third argument
1270  * (length) is provided, the length to the end of the string is assumed.
1271  */
1272 Datum
1273 bytea_substr(PG_FUNCTION_ARGS)
1274 {
1275         int                     S = PG_GETARG_INT32(1); /* start position */
1276         int                     S1;                             /* adjusted start position */
1277         int                     L1;                             /* adjusted substring length */
1278
1279         S1 = Max(S, 1);
1280
1281         if (fcinfo->nargs == 2)
1282         {
1283                 /*
1284                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1285                  * everything to the end of the string if we pass it a negative
1286                  * value for length.
1287                  */
1288                 L1 = -1;
1289         }
1290         else
1291         {
1292                 /* end position */
1293                 int                     E = S + PG_GETARG_INT32(2);
1294
1295                 /*
1296                  * A negative value for L is the only way for the end position to
1297                  * be before the start. SQL99 says to throw an error.
1298                  */
1299                 if (E < S)
1300                         ereport(ERROR,
1301                                         (errcode(ERRCODE_SUBSTRING_ERROR),
1302                                          errmsg("negative substring length not allowed")));
1303
1304                 /*
1305                  * A zero or negative value for the end position can happen if the
1306                  * start was negative or one. SQL99 says to return a zero-length
1307                  * string.
1308                  */
1309                 if (E < 1)
1310                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1311
1312                 L1 = E - S1;
1313         }
1314
1315         /*
1316          * If the start position is past the end of the string, SQL99 says to
1317          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1318          * that for us. Convert to zero-based starting position
1319          */
1320         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1321 }
1322
1323 /*
1324  * bytea_substr_no_len -
1325  *        Wrapper to avoid opr_sanity failure due to
1326  *        one function accepting a different number of args.
1327  */
1328 Datum
1329 bytea_substr_no_len(PG_FUNCTION_ARGS)
1330 {
1331         return bytea_substr(fcinfo);
1332 }
1333
1334 /*
1335  * byteapos -
1336  *        Return the position of the specified substring.
1337  *        Implements the SQL92 POSITION() function.
1338  * Cloned from textpos and modified as required.
1339  */
1340 Datum
1341 byteapos(PG_FUNCTION_ARGS)
1342 {
1343         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1344         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1345         int                     pos;
1346         int                     px,
1347                                 p;
1348         int                     len1,
1349                                 len2;
1350         char       *p1,
1351                            *p2;
1352
1353         if (VARSIZE(t2) <= VARHDRSZ)
1354                 PG_RETURN_INT32(1);             /* result for empty pattern */
1355
1356         len1 = (VARSIZE(t1) - VARHDRSZ);
1357         len2 = (VARSIZE(t2) - VARHDRSZ);
1358
1359         p1 = VARDATA(t1);
1360         p2 = VARDATA(t2);
1361
1362         pos = 0;
1363         px = (len1 - len2);
1364         for (p = 0; p <= px; p++)
1365         {
1366                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1367                 {
1368                         pos = p + 1;
1369                         break;
1370                 };
1371                 p1++;
1372         };
1373
1374         PG_RETURN_INT32(pos);
1375 }
1376
1377 /*-------------------------------------------------------------
1378  * byteaGetByte
1379  *
1380  * this routine treats "bytea" as an array of bytes.
1381  * It returns the Nth byte (a number between 0 and 255).
1382  *-------------------------------------------------------------
1383  */
1384 Datum
1385 byteaGetByte(PG_FUNCTION_ARGS)
1386 {
1387         bytea      *v = PG_GETARG_BYTEA_P(0);
1388         int32           n = PG_GETARG_INT32(1);
1389         int                     len;
1390         int                     byte;
1391
1392         len = VARSIZE(v) - VARHDRSZ;
1393
1394         if (n < 0 || n >= len)
1395                 ereport(ERROR,
1396                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1397                                  errmsg("index %d out of valid range, 0..%d",
1398                                                 n, len - 1)));
1399
1400         byte = ((unsigned char *) VARDATA(v))[n];
1401
1402         PG_RETURN_INT32(byte);
1403 }
1404
1405 /*-------------------------------------------------------------
1406  * byteaGetBit
1407  *
1408  * This routine treats a "bytea" type like an array of bits.
1409  * It returns the value of the Nth bit (0 or 1).
1410  *
1411  *-------------------------------------------------------------
1412  */
1413 Datum
1414 byteaGetBit(PG_FUNCTION_ARGS)
1415 {
1416         bytea      *v = PG_GETARG_BYTEA_P(0);
1417         int32           n = PG_GETARG_INT32(1);
1418         int                     byteNo,
1419                                 bitNo;
1420         int                     len;
1421         int                     byte;
1422
1423         len = VARSIZE(v) - VARHDRSZ;
1424
1425         if (n < 0 || n >= len * 8)
1426                 ereport(ERROR,
1427                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1428                                  errmsg("index %d out of valid range, 0..%d",
1429                                                 n, len * 8 - 1)));
1430
1431         byteNo = n / 8;
1432         bitNo = n % 8;
1433
1434         byte = ((unsigned char *) VARDATA(v))[byteNo];
1435
1436         if (byte & (1 << bitNo))
1437                 PG_RETURN_INT32(1);
1438         else
1439                 PG_RETURN_INT32(0);
1440 }
1441
1442 /*-------------------------------------------------------------
1443  * byteaSetByte
1444  *
1445  * Given an instance of type 'bytea' creates a new one with
1446  * the Nth byte set to the given value.
1447  *
1448  *-------------------------------------------------------------
1449  */
1450 Datum
1451 byteaSetByte(PG_FUNCTION_ARGS)
1452 {
1453         bytea      *v = PG_GETARG_BYTEA_P(0);
1454         int32           n = PG_GETARG_INT32(1);
1455         int32           newByte = PG_GETARG_INT32(2);
1456         int                     len;
1457         bytea      *res;
1458
1459         len = VARSIZE(v) - VARHDRSZ;
1460
1461         if (n < 0 || n >= len)
1462                 ereport(ERROR,
1463                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1464                                  errmsg("index %d out of valid range, 0..%d",
1465                                                 n, len - 1)));
1466
1467         /*
1468          * Make a copy of the original varlena.
1469          */
1470         res = (bytea *) palloc(VARSIZE(v));
1471         memcpy((char *) res, (char *) v, VARSIZE(v));
1472
1473         /*
1474          * Now set the byte.
1475          */
1476         ((unsigned char *) VARDATA(res))[n] = newByte;
1477
1478         PG_RETURN_BYTEA_P(res);
1479 }
1480
1481 /*-------------------------------------------------------------
1482  * byteaSetBit
1483  *
1484  * Given an instance of type 'bytea' creates a new one with
1485  * the Nth bit set to the given value.
1486  *
1487  *-------------------------------------------------------------
1488  */
1489 Datum
1490 byteaSetBit(PG_FUNCTION_ARGS)
1491 {
1492         bytea      *v = PG_GETARG_BYTEA_P(0);
1493         int32           n = PG_GETARG_INT32(1);
1494         int32           newBit = PG_GETARG_INT32(2);
1495         bytea      *res;
1496         int                     len;
1497         int                     oldByte,
1498                                 newByte;
1499         int                     byteNo,
1500                                 bitNo;
1501
1502         len = VARSIZE(v) - VARHDRSZ;
1503
1504         if (n < 0 || n >= len * 8)
1505                 ereport(ERROR,
1506                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1507                                  errmsg("index %d out of valid range, 0..%d",
1508                                                 n, len * 8 - 1)));
1509
1510         byteNo = n / 8;
1511         bitNo = n % 8;
1512
1513         /*
1514          * sanity check!
1515          */
1516         if (newBit != 0 && newBit != 1)
1517                 ereport(ERROR,
1518                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1519                                  errmsg("new bit must be 0 or 1")));
1520
1521         /*
1522          * Make a copy of the original varlena.
1523          */
1524         res = (bytea *) palloc(VARSIZE(v));
1525         memcpy((char *) res, (char *) v, VARSIZE(v));
1526
1527         /*
1528          * Update the byte.
1529          */
1530         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1531
1532         if (newBit == 0)
1533                 newByte = oldByte & (~(1 << bitNo));
1534         else
1535                 newByte = oldByte | (1 << bitNo);
1536
1537         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1538
1539         PG_RETURN_BYTEA_P(res);
1540 }
1541
1542
1543 /* text_name()
1544  * Converts a text type to a Name type.
1545  */
1546 Datum
1547 text_name(PG_FUNCTION_ARGS)
1548 {
1549         text       *s = PG_GETARG_TEXT_P(0);
1550         Name            result;
1551         int                     len;
1552
1553         len = VARSIZE(s) - VARHDRSZ;
1554
1555         /* Truncate oversize input */
1556         if (len >= NAMEDATALEN)
1557                 len = NAMEDATALEN - 1;
1558
1559 #ifdef STRINGDEBUG
1560         printf("text- convert string length %d (%d) ->%d\n",
1561                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1562 #endif
1563
1564         result = (Name) palloc(NAMEDATALEN);
1565         memcpy(NameStr(*result), VARDATA(s), len);
1566
1567         /* now null pad to full length... */
1568         while (len < NAMEDATALEN)
1569         {
1570                 *(NameStr(*result) + len) = '\0';
1571                 len++;
1572         }
1573
1574         PG_RETURN_NAME(result);
1575 }
1576
1577 /* name_text()
1578  * Converts a Name type to a text type.
1579  */
1580 Datum
1581 name_text(PG_FUNCTION_ARGS)
1582 {
1583         Name            s = PG_GETARG_NAME(0);
1584         text       *result;
1585         int                     len;
1586
1587         len = strlen(NameStr(*s));
1588
1589 #ifdef STRINGDEBUG
1590         printf("text- convert string length %d (%d) ->%d\n",
1591                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1592 #endif
1593
1594         result = palloc(VARHDRSZ + len);
1595         VARATT_SIZEP(result) = VARHDRSZ + len;
1596         memcpy(VARDATA(result), NameStr(*s), len);
1597
1598         PG_RETURN_TEXT_P(result);
1599 }
1600
1601
1602 /*
1603  * textToQualifiedNameList - convert a text object to list of names
1604  *
1605  * This implements the input parsing needed by nextval() and other
1606  * functions that take a text parameter representing a qualified name.
1607  * We split the name at dots, downcase if not double-quoted, and
1608  * truncate names if they're too long.
1609  */
1610 List *
1611 textToQualifiedNameList(text *textval, const char *caller)
1612 {
1613         char       *rawname;
1614         List       *result = NIL;
1615         List       *namelist;
1616         List       *l;
1617
1618         /* Convert to C string (handles possible detoasting). */
1619         /* Note we rely on being able to modify rawname below. */
1620         rawname = DatumGetCString(DirectFunctionCall1(textout,
1621                                                                                           PointerGetDatum(textval)));
1622
1623         if (!SplitIdentifierString(rawname, '.', &namelist))
1624                 ereport(ERROR,
1625                                 (errcode(ERRCODE_INVALID_NAME),
1626                                  errmsg("invalid name syntax")));
1627
1628         if (namelist == NIL)
1629                 ereport(ERROR,
1630                                 (errcode(ERRCODE_INVALID_NAME),
1631                                  errmsg("invalid name syntax")));
1632
1633         foreach(l, namelist)
1634         {
1635                 char       *curname = (char *) lfirst(l);
1636
1637                 result = lappend(result, makeString(pstrdup(curname)));
1638         }
1639
1640         pfree(rawname);
1641         freeList(namelist);
1642
1643         return result;
1644 }
1645
1646 /*
1647  * SplitIdentifierString --- parse a string containing identifiers
1648  *
1649  * This is the guts of textToQualifiedNameList, and is exported for use in
1650  * other situations such as parsing GUC variables.      In the GUC case, it's
1651  * important to avoid memory leaks, so the API is designed to minimize the
1652  * amount of stuff that needs to be allocated and freed.
1653  *
1654  * Inputs:
1655  *      rawstring: the input string; must be overwritable!      On return, it's
1656  *                         been modified to contain the separated identifiers.
1657  *      separator: the separator punctuation expected between identifiers
1658  *                         (typically '.' or ',').      Whitespace may also appear around
1659  *                         identifiers.
1660  * Outputs:
1661  *      namelist: filled with a palloc'd list of pointers to identifiers within
1662  *                        rawstring.  Caller should freeList() this even on error return.
1663  *
1664  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1665  *
1666  * Note that an empty string is considered okay here, though not in
1667  * textToQualifiedNameList.
1668  */
1669 bool
1670 SplitIdentifierString(char *rawstring, char separator,
1671                                           List **namelist)
1672 {
1673         char       *nextp = rawstring;
1674         bool            done = false;
1675
1676         *namelist = NIL;
1677
1678         while (isspace((unsigned char) *nextp))
1679                 nextp++;                                /* skip leading whitespace */
1680
1681         if (*nextp == '\0')
1682                 return true;                    /* allow empty string */
1683
1684         /* At the top of the loop, we are at start of a new identifier. */
1685         do
1686         {
1687                 char       *curname;
1688                 char       *endp;
1689                 int                     curlen;
1690
1691                 if (*nextp == '\"')
1692                 {
1693                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1694                         curname = nextp + 1;
1695                         for (;;)
1696                         {
1697                                 endp = strchr(nextp + 1, '\"');
1698                                 if (endp == NULL)
1699                                         return false;           /* mismatched quotes */
1700                                 if (endp[1] != '\"')
1701                                         break;          /* found end of quoted name */
1702                                 /* Collapse adjacent quotes into one quote, and look again */
1703                                 memmove(endp, endp + 1, strlen(endp));
1704                                 nextp = endp;
1705                         }
1706                         /* endp now points at the terminating quote */
1707                         nextp = endp + 1;
1708                 }
1709                 else
1710                 {
1711                         /* Unquoted name --- extends to separator or whitespace */
1712                         curname = nextp;
1713                         while (*nextp && *nextp != separator &&
1714                                    !isspace((unsigned char) *nextp))
1715                         {
1716                                 /*
1717                                  * It's important that this match the identifier
1718                                  * downcasing code used by backend/parser/scan.l.
1719                                  */
1720                                 if (isupper((unsigned char) *nextp))
1721                                         *nextp = tolower((unsigned char) *nextp);
1722                                 nextp++;
1723                         }
1724                         endp = nextp;
1725                         if (curname == nextp)
1726                                 return false;   /* empty unquoted name not allowed */
1727                 }
1728
1729                 while (isspace((unsigned char) *nextp))
1730                         nextp++;                        /* skip trailing whitespace */
1731
1732                 if (*nextp == separator)
1733                 {
1734                         nextp++;
1735                         while (isspace((unsigned char) *nextp))
1736                                 nextp++;                /* skip leading whitespace for next */
1737                         /* we expect another name, so done remains false */
1738                 }
1739                 else if (*nextp == '\0')
1740                         done = true;
1741                 else
1742                         return false;           /* invalid syntax */
1743
1744                 /* Now safe to overwrite separator with a null */
1745                 *endp = '\0';
1746
1747                 /* Truncate name if it's overlength; again, should match scan.l */
1748                 curlen = strlen(curname);
1749                 if (curlen >= NAMEDATALEN)
1750                 {
1751                         curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
1752                         curname[curlen] = '\0';
1753                 }
1754
1755                 /*
1756                  * Finished isolating current name --- add it to list
1757                  */
1758                 *namelist = lappend(*namelist, curname);
1759
1760                 /* Loop back if we didn't reach end of string */
1761         } while (!done);
1762
1763         return true;
1764 }
1765
1766
1767 /*****************************************************************************
1768  *      Comparison Functions used for bytea
1769  *
1770  * Note: btree indexes need these routines not to leak memory; therefore,
1771  * be careful to free working copies of toasted datums.  Most places don't
1772  * need to be so careful.
1773  *****************************************************************************/
1774
1775 Datum
1776 byteaeq(PG_FUNCTION_ARGS)
1777 {
1778         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1779         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1780         int                     len1,
1781                                 len2;
1782         bool            result;
1783
1784         len1 = VARSIZE(arg1) - VARHDRSZ;
1785         len2 = VARSIZE(arg2) - VARHDRSZ;
1786
1787         /* fast path for different-length inputs */
1788         if (len1 != len2)
1789                 result = false;
1790         else
1791                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1792
1793         PG_FREE_IF_COPY(arg1, 0);
1794         PG_FREE_IF_COPY(arg2, 1);
1795
1796         PG_RETURN_BOOL(result);
1797 }
1798
1799 Datum
1800 byteane(PG_FUNCTION_ARGS)
1801 {
1802         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1803         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1804         int                     len1,
1805                                 len2;
1806         bool            result;
1807
1808         len1 = VARSIZE(arg1) - VARHDRSZ;
1809         len2 = VARSIZE(arg2) - VARHDRSZ;
1810
1811         /* fast path for different-length inputs */
1812         if (len1 != len2)
1813                 result = true;
1814         else
1815                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1816
1817         PG_FREE_IF_COPY(arg1, 0);
1818         PG_FREE_IF_COPY(arg2, 1);
1819
1820         PG_RETURN_BOOL(result);
1821 }
1822
1823 Datum
1824 bytealt(PG_FUNCTION_ARGS)
1825 {
1826         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1827         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1828         int                     len1,
1829                                 len2;
1830         int                     cmp;
1831
1832         len1 = VARSIZE(arg1) - VARHDRSZ;
1833         len2 = VARSIZE(arg2) - VARHDRSZ;
1834
1835         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1836
1837         PG_FREE_IF_COPY(arg1, 0);
1838         PG_FREE_IF_COPY(arg2, 1);
1839
1840         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1841 }
1842
1843 Datum
1844 byteale(PG_FUNCTION_ARGS)
1845 {
1846         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1847         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1848         int                     len1,
1849                                 len2;
1850         int                     cmp;
1851
1852         len1 = VARSIZE(arg1) - VARHDRSZ;
1853         len2 = VARSIZE(arg2) - VARHDRSZ;
1854
1855         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1856
1857         PG_FREE_IF_COPY(arg1, 0);
1858         PG_FREE_IF_COPY(arg2, 1);
1859
1860         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1861 }
1862
1863 Datum
1864 byteagt(PG_FUNCTION_ARGS)
1865 {
1866         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1867         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1868         int                     len1,
1869                                 len2;
1870         int                     cmp;
1871
1872         len1 = VARSIZE(arg1) - VARHDRSZ;
1873         len2 = VARSIZE(arg2) - VARHDRSZ;
1874
1875         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1876
1877         PG_FREE_IF_COPY(arg1, 0);
1878         PG_FREE_IF_COPY(arg2, 1);
1879
1880         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1881 }
1882
1883 Datum
1884 byteage(PG_FUNCTION_ARGS)
1885 {
1886         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1887         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1888         int                     len1,
1889                                 len2;
1890         int                     cmp;
1891
1892         len1 = VARSIZE(arg1) - VARHDRSZ;
1893         len2 = VARSIZE(arg2) - VARHDRSZ;
1894
1895         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1896
1897         PG_FREE_IF_COPY(arg1, 0);
1898         PG_FREE_IF_COPY(arg2, 1);
1899
1900         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1901 }
1902
1903 Datum
1904 byteacmp(PG_FUNCTION_ARGS)
1905 {
1906         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1907         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1908         int                     len1,
1909                                 len2;
1910         int                     cmp;
1911
1912         len1 = VARSIZE(arg1) - VARHDRSZ;
1913         len2 = VARSIZE(arg2) - VARHDRSZ;
1914
1915         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1916         if ((cmp == 0) && (len1 != len2))
1917                 cmp = (len1 < len2) ? -1 : 1;
1918
1919         PG_FREE_IF_COPY(arg1, 0);
1920         PG_FREE_IF_COPY(arg2, 1);
1921
1922         PG_RETURN_INT32(cmp);
1923 }
1924
1925 /*
1926  * replace_text
1927  * replace all occurrences of 'old_sub_str' in 'orig_str'
1928  * with 'new_sub_str' to form 'new_str'
1929  *
1930  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1931  * otherwise returns 'new_str'
1932  */
1933 Datum
1934 replace_text(PG_FUNCTION_ARGS)
1935 {
1936         text       *left_text;
1937         text       *right_text;
1938         text       *buf_text;
1939         text       *ret_text;
1940         int                     curr_posn;
1941         text       *src_text = PG_GETARG_TEXT_P(0);
1942         int                     src_text_len = TEXTLEN(src_text);
1943         text       *from_sub_text = PG_GETARG_TEXT_P(1);
1944         int                     from_sub_text_len = TEXTLEN(from_sub_text);
1945         text       *to_sub_text = PG_GETARG_TEXT_P(2);
1946         char       *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1947         StringInfo      str = makeStringInfo();
1948
1949         if (src_text_len == 0 || from_sub_text_len == 0)
1950                 PG_RETURN_TEXT_P(src_text);
1951
1952         buf_text = TEXTDUP(src_text);
1953         curr_posn = TEXTPOS(buf_text, from_sub_text);
1954
1955         while (curr_posn > 0)
1956         {
1957                 left_text = LEFT(buf_text, from_sub_text);
1958                 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1959
1960                 appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
1961                 appendStringInfoString(str, to_sub_str);
1962
1963                 pfree(buf_text);
1964                 pfree(left_text);
1965                 buf_text = right_text;
1966                 curr_posn = TEXTPOS(buf_text, from_sub_text);
1967         }
1968
1969         appendStringInfoString(str, PG_TEXT_GET_STR(buf_text));
1970         pfree(buf_text);
1971
1972         ret_text = PG_STR_GET_TEXT(str->data);
1973         pfree(str->data);
1974         pfree(str);
1975
1976         PG_RETURN_TEXT_P(ret_text);
1977 }
1978
1979 /*
1980  * split_text
1981  * parse input string
1982  * return ord item (1 based)
1983  * based on provided field separator
1984  */
1985 Datum
1986 split_text(PG_FUNCTION_ARGS)
1987 {
1988         text       *inputstring = PG_GETARG_TEXT_P(0);
1989         int                     inputstring_len = TEXTLEN(inputstring);
1990         text       *fldsep = PG_GETARG_TEXT_P(1);
1991         int                     fldsep_len = TEXTLEN(fldsep);
1992         int                     fldnum = PG_GETARG_INT32(2);
1993         int                     start_posn = 0;
1994         int                     end_posn = 0;
1995         text       *result_text;
1996
1997         /* return empty string for empty input string */
1998         if (inputstring_len < 1)
1999                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2000
2001         /* empty field separator */
2002         if (fldsep_len < 1)
2003         {
2004                 if (fldnum == 1)                /* first field - just return the input
2005                                                                  * string */
2006                         PG_RETURN_TEXT_P(inputstring);
2007                 else                                    /* otherwise return an empty string */
2008                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2009         }
2010
2011         /* field number is 1 based */
2012         if (fldnum < 1)
2013                 ereport(ERROR,
2014                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2015                                  errmsg("field position must be greater than zero")));
2016
2017         start_posn = text_position(PointerGetDatum(inputstring),
2018                                                            PointerGetDatum(fldsep),
2019                                                            fldnum - 1);
2020         end_posn = text_position(PointerGetDatum(inputstring),
2021                                                          PointerGetDatum(fldsep),
2022                                                          fldnum);
2023
2024         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
2025         {
2026                 if (fldnum == 1)                /* first field - just return the input
2027                                                                  * string */
2028                         PG_RETURN_TEXT_P(inputstring);
2029                 else                                    /* otherwise return an empty string */
2030                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2031         }
2032         else if ((start_posn != 0) && (end_posn == 0))
2033         {
2034                 /* last field requested */
2035                 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
2036                 PG_RETURN_TEXT_P(result_text);
2037         }
2038         else if ((start_posn == 0) && (end_posn != 0))
2039         {
2040                 /* first field requested */
2041                 result_text = LEFT(inputstring, fldsep);
2042                 PG_RETURN_TEXT_P(result_text);
2043         }
2044         else
2045         {
2046                 /* prior to last field requested */
2047                 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
2048                 PG_RETURN_TEXT_P(result_text);
2049         }
2050 }
2051
2052 /*
2053  * text_to_array
2054  * parse input string
2055  * return text array of elements
2056  * based on provided field separator
2057  */
2058 Datum
2059 text_to_array(PG_FUNCTION_ARGS)
2060 {
2061         text       *inputstring = PG_GETARG_TEXT_P(0);
2062         int                     inputstring_len = TEXTLEN(inputstring);
2063         text       *fldsep = PG_GETARG_TEXT_P(1);
2064         int                     fldsep_len = TEXTLEN(fldsep);
2065         int                     fldnum;
2066         int                     start_posn = 0;
2067         int                     end_posn = 0;
2068         text       *result_text = NULL;
2069         ArrayBuildState *astate = NULL;
2070         MemoryContext oldcontext = CurrentMemoryContext;
2071
2072         /* return NULL for empty input string */
2073         if (inputstring_len < 1)
2074                 PG_RETURN_NULL();
2075
2076         /* empty field separator
2077          * return one element, 1D, array using the input string */
2078         if (fldsep_len < 1)
2079                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2080                                                           CStringGetDatum(inputstring), 1));
2081
2082         /* start with end position holding the initial start position */
2083         end_posn = 0;
2084         for (fldnum=1;;fldnum++)        /* field number is 1 based */
2085         {
2086                 Datum   dvalue;
2087                 bool    disnull = false;
2088
2089                 start_posn = end_posn;
2090                 end_posn = text_position(PointerGetDatum(inputstring),
2091                                                                  PointerGetDatum(fldsep),
2092                                                                  fldnum);
2093
2094                 if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
2095                 {
2096                         if (fldnum == 1)
2097                         {
2098                                 /* first element
2099                                  * return one element, 1D, array using the input string */
2100                                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2101                                                                           CStringGetDatum(inputstring), 1));
2102                         }
2103                         else
2104                         {
2105                                 /* otherwise create array and exit */
2106                                 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, oldcontext));
2107                         }
2108                 }
2109                 else if ((start_posn != 0) && (end_posn == 0))
2110                 {
2111                         /* last field requested */
2112                         result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
2113                 }
2114                 else if ((start_posn == 0) && (end_posn != 0))
2115                 {
2116                         /* first field requested */
2117                         result_text = LEFT(inputstring, fldsep);
2118                 }
2119                 else
2120                 {
2121                         /* prior to last field requested */
2122                         result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
2123                 }
2124
2125                 /* stash away current value */
2126                 dvalue = PointerGetDatum(result_text);
2127                 astate = accumArrayResult(astate, dvalue,
2128                                                                   disnull, TEXTOID, oldcontext);
2129
2130         }
2131
2132         /* never reached -- keep compiler quiet */
2133         PG_RETURN_NULL();
2134 }
2135
2136 /*
2137  * array_to_text
2138  * concatenate Cstring representation of input array elements
2139  * using provided field separator
2140  */
2141 Datum
2142 array_to_text(PG_FUNCTION_ARGS)
2143 {
2144         ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
2145         char       *fldsep = PG_TEXTARG_GET_STR(1);
2146         int                     nitems, *dims, ndims;
2147         char       *p;
2148         Oid                     element_type;
2149         int                     typlen;
2150         bool            typbyval;
2151         char            typalign;
2152         Oid                     typelem;
2153         StringInfo      result_str = makeStringInfo();
2154         int                     i;
2155         ArrayMetaState *my_extra;
2156
2157         p = ARR_DATA_PTR(v);
2158         ndims = ARR_NDIM(v);
2159         dims = ARR_DIMS(v);
2160         nitems = ArrayGetNItems(ndims, dims);
2161
2162         /* if there are no elements, return an empty string */
2163         if (nitems == 0)
2164                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2165
2166         element_type = ARR_ELEMTYPE(v);
2167
2168         /*
2169          * We arrange to look up info about element type, including its output
2170          * conversion proc, only once per series of calls, assuming the element
2171          * type doesn't change underneath us.
2172          */
2173         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2174         if (my_extra == NULL)
2175         {
2176                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2177                                                                                                          sizeof(ArrayMetaState));
2178                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2179                 my_extra->element_type = InvalidOid;
2180         }
2181
2182         if (my_extra->element_type != element_type)
2183         {
2184                 /* Get info about element type, including its output conversion proc */
2185                 get_type_io_data(element_type, IOFunc_output,
2186                                                  &my_extra->typlen, &my_extra->typbyval,
2187                                                  &my_extra->typalign, &my_extra->typdelim,
2188                                                  &my_extra->typelem, &my_extra->typiofunc);
2189                 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2190                                           fcinfo->flinfo->fn_mcxt);
2191                 my_extra->element_type = element_type;
2192         }
2193         typlen = my_extra->typlen;
2194         typbyval = my_extra->typbyval;
2195         typalign = my_extra->typalign;
2196         typelem = my_extra->typelem;
2197
2198         for (i = 0; i < nitems; i++)
2199         {
2200                 Datum           itemvalue;
2201                 char       *value;
2202
2203                 itemvalue = fetch_att(p, typbyval, typlen);
2204
2205                 value = DatumGetCString(FunctionCall3(&my_extra->proc,
2206                                                                                           itemvalue,
2207                                                                                           ObjectIdGetDatum(typelem),
2208                                                                                           Int32GetDatum(-1)));
2209
2210                 if (i > 0)
2211                         appendStringInfo(result_str, "%s%s", fldsep, value);
2212                 else
2213                         appendStringInfo(result_str, "%s", value);
2214
2215                 p = att_addlength(p, typlen, PointerGetDatum(p));
2216                 p = (char *) att_align(p, typalign);
2217         }
2218
2219         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2220 }
2221
2222 #define HEXBASE 16
2223 /*
2224  * Convert a int32 to a string containing a base 16 (hex) representation of
2225  * the number.
2226  */
2227 Datum
2228 to_hex32(PG_FUNCTION_ARGS)
2229 {
2230         static char digits[] = "0123456789abcdef";
2231         char            buf[32];                /* bigger than needed, but reasonable */
2232         char       *ptr;
2233         text       *result_text;
2234         int32           value = PG_GETARG_INT32(0);
2235
2236         ptr = buf + sizeof(buf) - 1;
2237         *ptr = '\0';
2238
2239         do
2240         {
2241                 *--ptr = digits[value % HEXBASE];
2242                 value /= HEXBASE;
2243         } while (ptr > buf && value);
2244
2245         result_text = PG_STR_GET_TEXT(ptr);
2246         PG_RETURN_TEXT_P(result_text);
2247 }
2248
2249 /*
2250  * Convert a int64 to a string containing a base 16 (hex) representation of
2251  * the number.
2252  */
2253 Datum
2254 to_hex64(PG_FUNCTION_ARGS)
2255 {
2256         static char digits[] = "0123456789abcdef";
2257         char            buf[32];                /* bigger than needed, but reasonable */
2258         char       *ptr;
2259         text       *result_text;
2260         int64           value = PG_GETARG_INT64(0);
2261
2262         ptr = buf + sizeof(buf) - 1;
2263         *ptr = '\0';
2264
2265         do
2266         {
2267                 *--ptr = digits[value % HEXBASE];
2268                 value /= HEXBASE;
2269         } while (ptr > buf && value);
2270
2271         result_text = PG_STR_GET_TEXT(ptr);
2272         PG_RETURN_TEXT_P(result_text);
2273 }
2274
2275 /*
2276  * Create an md5 hash of a text string and return it as hex
2277  *
2278  * md5 produces a 16 byte (128 bit) hash; double it for hex
2279  */
2280 #define MD5_HASH_LEN  32
2281
2282 Datum
2283 md5_text(PG_FUNCTION_ARGS)
2284 {
2285         char       *buff = PG_TEXT_GET_STR(PG_GETARG_TEXT_P(0));
2286         size_t          len = strlen(buff);
2287         char       *hexsum;
2288         text       *result_text;
2289
2290         /* leave room for the terminating '\0' */
2291         hexsum = (char *) palloc(MD5_HASH_LEN + 1);
2292
2293         /* get the hash result */
2294         md5_hash((void *) buff, len, hexsum);
2295
2296         /* convert to text and return it */
2297         result_text = PG_STR_GET_TEXT(hexsum);
2298         PG_RETURN_TEXT_P(result_text);
2299 }