granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.105 2003/08/04 04:03:10 tgl Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "mb/pg_wchar.h"
  20 #include "miscadmin.h"
  21 #include "access/tuptoaster.h"
  22 #include "catalog/pg_type.h"
  23 #include "lib/stringinfo.h"
  24 #include "libpq/crypt.h"
  25 #include "libpq/pqformat.h"
  26 #include "utils/array.h"
  27 #include "utils/builtins.h"
  28 #include "utils/pg_locale.h"
  29 #include "utils/lsyscache.h"
  30
  31
  32 typedef struct varlena unknown;
  33
  34 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
  35 #define DatumGetUnknownPCopy(X)         ((unknown *) PG_DETOAST_DATUM_COPY(X))
  36 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
  37 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
  38 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
  39
  40 #define PG_TEXTARG_GET_STR(arg_) \
  41         DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
  42 #define PG_TEXT_GET_STR(textp_) \
  43         DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
  44 #define PG_STR_GET_TEXT(str_) \
  45         DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
  46 #define TEXTLEN(textp) \
  47         text_length(PointerGetDatum(textp))
  48 #define TEXTPOS(buf_text, from_sub_text) \
  49         text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
  50 #define TEXTDUP(textp) \
  51         DatumGetTextPCopy(PointerGetDatum(textp))
  52 #define LEFT(buf_text, from_sub_text) \
  53         text_substring(PointerGetDatum(buf_text), \
  54                                         1, \
  55                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
  56 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
  57         text_substring(PointerGetDatum(buf_text), \
  58                                         TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
  59                                         -1, true)
  60
  61 static int      text_cmp(text *arg1, text *arg2);
  62 static int32 text_length(Datum str);
  63 static int32 text_position(Datum str, Datum search_str, int matchnum);
  64 static text *text_substring(Datum str,
  65                            int32 start,
  66                            int32 length,
  67                            bool length_not_specified);
  68
  69
  70 /*****************************************************************************
  71  *       USER I/O ROUTINES                                                                                                               *
  72  *****************************************************************************/
  73
  74
  75 #define VAL(CH)                 ((CH) - '0')
  76 #define DIG(VAL)                ((VAL) + '0')
  77
  78 /*
  79  *              byteain                 - converts from printable representation of byte array
  80  *
  81  *              Non-printable characters must be passed as '\nnn' (octal) and are
  82  *              converted to internal form.  '\' must be passed as '\\'.
  83  *              ereport(ERROR, ...) if bad form.
  84  *
  85  *              BUGS:
  86  *                              The input is scaned twice.
  87  *                              The error checking of input is minimal.
  88  */
  89 Datum
  90 byteain(PG_FUNCTION_ARGS)
  91 {
  92         char       *inputText = PG_GETARG_CSTRING(0);
  93         char       *tp;
  94         char       *rp;
  95         int                     byte;
  96         bytea      *result;
  97
  98         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
  99         {
 100                 if (tp[0] != '\\')
 101                         tp++;
 102                 else if ((tp[0] == '\\') &&
 103                                  (tp[1] >= '0' && tp[1] <= '3') &&
 104                                  (tp[2] >= '0' && tp[2] <= '7') &&
 105                                  (tp[3] >= '0' && tp[3] <= '7'))
 106                         tp += 4;
 107                 else if ((tp[0] == '\\') &&
 108                                  (tp[1] == '\\'))
 109                         tp += 2;
 110                 else
 111                 {
 112                         /*
 113                          * one backslash, not followed by 0 or ### valid octal
 114                          */
 115                         ereport(ERROR,
 116                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 117                                          errmsg("invalid input syntax for bytea")));
 118                 }
 119         }
 120
 121         byte += VARHDRSZ;
 122         result = (bytea *) palloc(byte);
 123         VARATT_SIZEP(result) = byte;    /* set varlena length */
 124
 125         tp = inputText;
 126         rp = VARDATA(result);
 127         while (*tp != '\0')
 128         {
 129                 if (tp[0] != '\\')
 130                         *rp++ = *tp++;
 131                 else if ((tp[0] == '\\') &&
 132                                  (tp[1] >= '0' && tp[1] <= '3') &&
 133                                  (tp[2] >= '0' && tp[2] <= '7') &&
 134                                  (tp[3] >= '0' && tp[3] <= '7'))
 135                 {
 136                         byte = VAL(tp[1]);
 137                         byte <<= 3;
 138                         byte += VAL(tp[2]);
 139                         byte <<= 3;
 140                         *rp++ = byte + VAL(tp[3]);
 141                         tp += 4;
 142                 }
 143                 else if ((tp[0] == '\\') &&
 144                                  (tp[1] == '\\'))
 145                 {
 146                         *rp++ = '\\';
 147                         tp += 2;
 148                 }
 149                 else
 150                 {
 151                         /*
 152                          * We should never get here. The first pass should not allow
 153                          * it.
 154                          */
 155                         ereport(ERROR,
 156                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 157                                          errmsg("invalid input syntax for bytea")));
 158                 }
 159         }
 160
 161         PG_RETURN_BYTEA_P(result);
 162 }
 163
 164 /*
 165  *              byteaout                - converts to printable representation of byte array
 166  *
 167  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 168  *              '\\'.
 169  *
 170  *              NULL vlena should be an error--returning string with NULL for now.
 171  */
 172 Datum
 173 byteaout(PG_FUNCTION_ARGS)
 174 {
 175         bytea      *vlena = PG_GETARG_BYTEA_P(0);
 176         char       *result;
 177         char       *vp;
 178         char       *rp;
 179         int                     val;                    /* holds unprintable chars */
 180         int                     i;
 181         int                     len;
 182
 183         len = 1;                                        /* empty string has 1 char */
 184         vp = VARDATA(vlena);
 185         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 186         {
 187                 if (*vp == '\\')
 188                         len += 2;
 189                 else if (isprint((unsigned char) *vp))
 190                         len++;
 191                 else
 192                         len += 4;
 193         }
 194         rp = result = (char *) palloc(len);
 195         vp = VARDATA(vlena);
 196         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 197         {
 198                 if (*vp == '\\')
 199                 {
 200                         *rp++ = '\\';
 201                         *rp++ = '\\';
 202                 }
 203                 else if (isprint((unsigned char) *vp))
 204                         *rp++ = *vp;
 205                 else
 206                 {
 207                         val = *vp;
 208                         rp[0] = '\\';
 209                         rp[3] = DIG(val & 07);
 210                         val >>= 3;
 211                         rp[2] = DIG(val & 07);
 212                         val >>= 3;
 213                         rp[1] = DIG(val & 03);
 214                         rp += 4;
 215                 }
 216         }
 217         *rp = '\0';
 218         PG_RETURN_CSTRING(result);
 219 }
 220
 221 /*
 222  *              bytearecv                       - converts external binary format to bytea
 223  */
 224 Datum
 225 bytearecv(PG_FUNCTION_ARGS)
 226 {
 227         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 228         bytea      *result;
 229         int                     nbytes;
 230
 231         nbytes = buf->len - buf->cursor;
 232         result = (bytea *) palloc(nbytes + VARHDRSZ);
 233         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 234         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 235         PG_RETURN_BYTEA_P(result);
 236 }
 237
 238 /*
 239  *              byteasend                       - converts bytea to binary format
 240  *
 241  * This is a special case: just copy the input...
 242  */
 243 Datum
 244 byteasend(PG_FUNCTION_ARGS)
 245 {
 246         bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
 247
 248         PG_RETURN_BYTEA_P(vlena);
 249 }
 250
 251
 252 /*
 253  *              textin                  - converts "..." to internal representation
 254  */
 255 Datum
 256 textin(PG_FUNCTION_ARGS)
 257 {
 258         char       *inputText = PG_GETARG_CSTRING(0);
 259         text       *result;
 260         int                     len;
 261
 262         /* verify encoding */
 263         len = strlen(inputText);
 264         pg_verifymbstr(inputText, len, false);
 265
 266         result = (text *) palloc(len + VARHDRSZ);
 267         VARATT_SIZEP(result) = len + VARHDRSZ;
 268
 269         memcpy(VARDATA(result), inputText, len);
 270
 271         PG_RETURN_TEXT_P(result);
 272 }
 273
 274 /*
 275  *              textout                 - converts internal representation to "..."
 276  */
 277 Datum
 278 textout(PG_FUNCTION_ARGS)
 279 {
 280         text       *t = PG_GETARG_TEXT_P(0);
 281         int                     len;
 282         char       *result;
 283
 284         len = VARSIZE(t) - VARHDRSZ;
 285         result = (char *) palloc(len + 1);
 286         memcpy(result, VARDATA(t), len);
 287         result[len] = '\0';
 288
 289         PG_RETURN_CSTRING(result);
 290 }
 291
 292 /*
 293  *              textrecv                        - converts external binary format to text
 294  */
 295 Datum
 296 textrecv(PG_FUNCTION_ARGS)
 297 {
 298         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 299         text       *result;
 300         char       *str;
 301         int                     nbytes;
 302
 303         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 304         result = (text *) palloc(nbytes + VARHDRSZ);
 305         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 306         memcpy(VARDATA(result), str, nbytes);
 307         pfree(str);
 308         PG_RETURN_TEXT_P(result);
 309 }
 310
 311 /*
 312  *              textsend                        - converts text to binary format
 313  */
 314 Datum
 315 textsend(PG_FUNCTION_ARGS)
 316 {
 317         text       *t = PG_GETARG_TEXT_P(0);
 318         StringInfoData buf;
 319
 320         pq_begintypsend(&buf);
 321         pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
 322         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 323 }
 324
 325
 326 /*
 327  *              unknownin                       - converts "..." to internal representation
 328  */
 329 Datum
 330 unknownin(PG_FUNCTION_ARGS)
 331 {
 332         char       *inputStr = PG_GETARG_CSTRING(0);
 333         unknown    *result;
 334         int                     len;
 335
 336         len = strlen(inputStr) + VARHDRSZ;
 337
 338         result = (unknown *) palloc(len);
 339         VARATT_SIZEP(result) = len;
 340
 341         memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
 342
 343         PG_RETURN_UNKNOWN_P(result);
 344 }
 345
 346 /*
 347  *              unknownout                      - converts internal representation to "..."
 348  */
 349 Datum
 350 unknownout(PG_FUNCTION_ARGS)
 351 {
 352         unknown    *t = PG_GETARG_UNKNOWN_P(0);
 353         int                     len;
 354         char       *result;
 355
 356         len = VARSIZE(t) - VARHDRSZ;
 357         result = (char *) palloc(len + 1);
 358         memcpy(result, VARDATA(t), len);
 359         result[len] = '\0';
 360
 361         PG_RETURN_CSTRING(result);
 362 }
 363
 364 /*
 365  *              unknownrecv                     - converts external binary format to unknown
 366  */
 367 Datum
 368 unknownrecv(PG_FUNCTION_ARGS)
 369 {
 370         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 371         unknown    *result;
 372         int                     nbytes;
 373
 374         nbytes = buf->len - buf->cursor;
 375         result = (unknown *) palloc(nbytes + VARHDRSZ);
 376         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 377         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 378         PG_RETURN_UNKNOWN_P(result);
 379 }
 380
 381 /*
 382  *              unknownsend                     - converts unknown to binary format
 383  *
 384  * This is a special case: just copy the input, since it's
 385  * effectively the same format as bytea
 386  */
 387 Datum
 388 unknownsend(PG_FUNCTION_ARGS)
 389 {
 390         unknown    *vlena = PG_GETARG_UNKNOWN_P_COPY(0);
 391
 392         PG_RETURN_UNKNOWN_P(vlena);
 393 }
 394
 395
 396 /* ========== PUBLIC ROUTINES ========== */
 397
 398 /*
 399  * textlen -
 400  *        returns the logical length of a text*
 401  *         (which is less than the VARSIZE of the text*)
 402  */
 403 Datum
 404 textlen(PG_FUNCTION_ARGS)
 405 {
 406         PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
 407 }
 408
 409 /*
 410  * text_length -
 411  *      Does the real work for textlen()
 412  *      This is broken out so it can be called directly by other string processing
 413  *      functions.
 414  */
 415 static int32
 416 text_length(Datum str)
 417 {
 418         /* fastpath when max encoding length is one */
 419         if (pg_database_encoding_max_length() == 1)
 420                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 421
 422         if (pg_database_encoding_max_length() > 1)
 423         {
 424                 text       *t = DatumGetTextP(str);
 425
 426                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
 427                                                                                          VARSIZE(t) - VARHDRSZ));
 428         }
 429
 430         /* should never get here */
 431         elog(ERROR, "invalid backend encoding: encoding max length < 1");
 432
 433         /* not reached: suppress compiler warning */
 434         return 0;
 435 }
 436
 437 /*
 438  * textoctetlen -
 439  *        returns the physical length of a text*
 440  *         (which is less than the VARSIZE of the text*)
 441  */
 442 Datum
 443 textoctetlen(PG_FUNCTION_ARGS)
 444 {
 445         PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
 446 }
 447
 448 /*
 449  * textcat -
 450  *        takes two text* and returns a text* that is the concatenation of
 451  *        the two.
 452  *
 453  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 454  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 455  * Allocate space for output in all cases.
 456  * XXX - thomas 1997-07-10
 457  */
 458 Datum
 459 textcat(PG_FUNCTION_ARGS)
 460 {
 461         text       *t1 = PG_GETARG_TEXT_P(0);
 462         text       *t2 = PG_GETARG_TEXT_P(1);
 463         int                     len1,
 464                                 len2,
 465                                 len;
 466         text       *result;
 467         char       *ptr;
 468
 469         len1 = (VARSIZE(t1) - VARHDRSZ);
 470         if (len1 < 0)
 471                 len1 = 0;
 472
 473         len2 = (VARSIZE(t2) - VARHDRSZ);
 474         if (len2 < 0)
 475                 len2 = 0;
 476
 477         len = len1 + len2 + VARHDRSZ;
 478         result = (text *) palloc(len);
 479
 480         /* Set size of result string... */
 481         VARATT_SIZEP(result) = len;
 482
 483         /* Fill data field of result string... */
 484         ptr = VARDATA(result);
 485         if (len1 > 0)
 486                 memcpy(ptr, VARDATA(t1), len1);
 487         if (len2 > 0)
 488                 memcpy(ptr + len1, VARDATA(t2), len2);
 489
 490         PG_RETURN_TEXT_P(result);
 491 }
 492
 493 /*
 494  * text_substr()
 495  * Return a substring starting at the specified position.
 496  * - thomas 1997-12-31
 497  *
 498  * Input:
 499  *      - string
 500  *      - starting position (is one-based)
 501  *      - string length
 502  *
 503  * If the starting position is zero or less, then return from the start of the string
 504  *      adjusting the length to be consistent with the "negative start" per SQL92.
 505  * If the length is less than zero, return the remaining string.
 506  *
 507  * Note that the arguments operate on octet length,
 508  *      so not aware of multibyte character sets.
 509  *
 510  * Added multibyte support.
 511  * - Tatsuo Ishii 1998-4-21
 512  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 513  * Formerly returned the entire string; now returns a portion.
 514  * - Thomas Lockhart 1998-12-10
 515  * Now uses faster TOAST-slicing interface
 516  * - John Gray 2002-02-22
 517  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 518  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 519  * error; if E < 1, return '', not entire string). Fixed MB related bug when
 520  * S > LC and < LC + 4 sometimes garbage characters are returned.
 521  * - Joe Conway 2002-08-10
 522  */
 523 Datum
 524 text_substr(PG_FUNCTION_ARGS)
 525 {
 526         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 527                                                                         PG_GETARG_INT32(1),
 528                                                                         PG_GETARG_INT32(2),
 529                                                                         false));
 530 }
 531
 532 /*
 533  * text_substr_no_len -
 534  *        Wrapper to avoid opr_sanity failure due to
 535  *        one function accepting a different number of args.
 536  */
 537 Datum
 538 text_substr_no_len(PG_FUNCTION_ARGS)
 539 {
 540         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 541                                                                         PG_GETARG_INT32(1),
 542                                                                         -1, true));
 543 }
 544
 545 /*
 546  * text_substring -
 547  *      Does the real work for text_substr() and text_substr_no_len()
 548  *      This is broken out so it can be called directly by other string processing
 549  *      functions.
 550  */
 551 static text *
 552 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 553 {
 554         int32           eml = pg_database_encoding_max_length();
 555         int32           S = start;              /* start position */
 556         int32           S1;                             /* adjusted start position */
 557         int32           L1;                             /* adjusted substring length */
 558
 559         /* life is easy if the encoding max length is 1 */
 560         if (eml == 1)
 561         {
 562                 S1 = Max(S, 1);
 563
 564                 if (length_not_specified)               /* special case - get length to
 565                                                                                  * end of string */
 566                         L1 = -1;
 567                 else
 568                 {
 569                         /* end position */
 570                         int                     E = S + length;
 571
 572                         /*
 573                          * A negative value for L is the only way for the end position
 574                          * to be before the start. SQL99 says to throw an error.
 575                          */
 576                         if (E < S)
 577                                 ereport(ERROR,
 578                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 579                                            errmsg("negative substring length not allowed")));
 580
 581                         /*
 582                          * A zero or negative value for the end position can happen if
 583                          * the start was negative or one. SQL99 says to return a
 584                          * zero-length string.
 585                          */
 586                         if (E < 1)
 587                                 return PG_STR_GET_TEXT("");
 588
 589                         L1 = E - S1;
 590                 }
 591
 592                 /*
 593                  * If the start position is past the end of the string, SQL99 says
 594                  * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
 595                  * do that for us. Convert to zero-based starting position
 596                  */
 597                 return DatumGetTextPSlice(str, S1 - 1, L1);
 598         }
 599         else if (eml > 1)
 600         {
 601                 /*
 602                  * When encoding max length is > 1, we can't get LC without
 603                  * detoasting, so we'll grab a conservatively large slice now and
 604                  * go back later to do the right thing
 605                  */
 606                 int32           slice_start;
 607                 int32           slice_size;
 608                 int32           slice_strlen;
 609                 text       *slice;
 610                 int32           E1;
 611                 int32           i;
 612                 char       *p;
 613                 char       *s;
 614                 text       *ret;
 615
 616                 /*
 617                  * if S is past the end of the string, the tuple toaster will
 618                  * return a zero-length string to us
 619                  */
 620                 S1 = Max(S, 1);
 621
 622                 /*
 623                  * We need to start at position zero because there is no way to
 624                  * know in advance which byte offset corresponds to the supplied
 625                  * start position.
 626                  */
 627                 slice_start = 0;
 628
 629                 if (length_not_specified)               /* special case - get length to
 630                                                                                  * end of string */
 631                         slice_size = L1 = -1;
 632                 else
 633                 {
 634                         int                     E = S + length;
 635
 636                         /*
 637                          * A negative value for L is the only way for the end position
 638                          * to be before the start. SQL99 says to throw an error.
 639                          */
 640                         if (E < S)
 641                                 ereport(ERROR,
 642                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 643                                            errmsg("negative substring length not allowed")));
 644
 645                         /*
 646                          * A zero or negative value for the end position can happen if
 647                          * the start was negative or one. SQL99 says to return a
 648                          * zero-length string.
 649                          */
 650                         if (E < 1)
 651                                 return PG_STR_GET_TEXT("");
 652
 653                         /*
 654                          * if E is past the end of the string, the tuple toaster will
 655                          * truncate the length for us
 656                          */
 657                         L1 = E - S1;
 658
 659                         /*
 660                          * Total slice size in bytes can't be any longer than the
 661                          * start position plus substring length times the encoding max
 662                          * length.
 663                          */
 664                         slice_size = (S1 + L1) * eml;
 665                 }
 666                 slice = DatumGetTextPSlice(str, slice_start, slice_size);
 667
 668                 /* see if we got back an empty string */
 669                 if ((VARSIZE(slice) - VARHDRSZ) == 0)
 670                         return PG_STR_GET_TEXT("");
 671
 672                 /* Now we can get the actual length of the slice in MB characters */
 673                 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
 674
 675                 /*
 676                  * Check that the start position wasn't > slice_strlen. If so,
 677                  * SQL99 says to return a zero-length string.
 678                  */
 679                 if (S1 > slice_strlen)
 680                         return PG_STR_GET_TEXT("");
 681
 682                 /*
 683                  * Adjust L1 and E1 now that we know the slice string length.
 684                  * Again remember that S1 is one based, and slice_start is zero
 685                  * based.
 686                  */
 687                 if (L1 > -1)
 688                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 689                 else
 690                         E1 = slice_start + 1 + slice_strlen;
 691
 692                 /*
 693                  * Find the start position in the slice; remember S1 is not zero
 694                  * based
 695                  */
 696                 p = VARDATA(slice);
 697                 for (i = 0; i < S1 - 1; i++)
 698                         p += pg_mblen(p);
 699
 700                 /* hang onto a pointer to our start position */
 701                 s = p;
 702
 703                 /*
 704                  * Count the actual bytes used by the substring of the requested
 705                  * length.
 706                  */
 707                 for (i = S1; i < E1; i++)
 708                         p += pg_mblen(p);
 709
 710                 ret = (text *) palloc(VARHDRSZ + (p - s));
 711                 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
 712                 memcpy(VARDATA(ret), s, (p - s));
 713
 714                 return ret;
 715         }
 716         else
 717                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
 718
 719         /* not reached: suppress compiler warning */
 720         return PG_STR_GET_TEXT("");
 721 }
 722
 723 /*
 724  * textpos -
 725  *        Return the position of the specified substring.
 726  *        Implements the SQL92 POSITION() function.
 727  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 728  * - thomas 1997-07-27
 729  */
 730 Datum
 731 textpos(PG_FUNCTION_ARGS)
 732 {
 733         PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
 734 }
 735
 736 /*
 737  * text_position -
 738  *      Does the real work for textpos()
 739  *      This is broken out so it can be called directly by other string processing
 740  *      functions.
 741  */
 742 static int32
 743 text_position(Datum str, Datum search_str, int matchnum)
 744 {
 745         int                     eml = pg_database_encoding_max_length();
 746         text       *t1 = DatumGetTextP(str);
 747         text       *t2 = DatumGetTextP(search_str);
 748         int                     match = 0,
 749                                 pos = 0,
 750                                 p = 0,
 751                                 px,
 752                                 len1,
 753                                 len2;
 754
 755         if (matchnum == 0)
 756                 return 0;                               /* result for 0th match */
 757
 758         if (VARSIZE(t2) <= VARHDRSZ)
 759                 PG_RETURN_INT32(1);             /* result for empty pattern */
 760
 761         len1 = (VARSIZE(t1) - VARHDRSZ);
 762         len2 = (VARSIZE(t2) - VARHDRSZ);
 763
 764         /* no use in searching str past point where search_str will fit */
 765         px = (len1 - len2);
 766
 767         if (eml == 1)                           /* simple case - single byte encoding */
 768         {
 769                 char       *p1,
 770                                    *p2;
 771
 772                 p1 = VARDATA(t1);
 773                 p2 = VARDATA(t2);
 774
 775                 for (p = 0; p <= px; p++)
 776                 {
 777                         if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
 778                         {
 779                                 if (++match == matchnum)
 780                                 {
 781                                         pos = p + 1;
 782                                         break;
 783                                 }
 784                         }
 785                         p1++;
 786                 }
 787         }
 788         else if (eml > 1)                       /* not as simple - multibyte encoding */
 789         {
 790                 pg_wchar   *p1,
 791                                    *p2,
 792                                    *ps1,
 793                                    *ps2;
 794
 795                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 796                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
 797                 len1 = pg_wchar_strlen(p1);
 798                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 799                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
 800                 len2 = pg_wchar_strlen(p2);
 801
 802                 for (p = 0; p <= px; p++)
 803                 {
 804                         if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
 805                         {
 806                                 if (++match == matchnum)
 807                                 {
 808                                         pos = p + 1;
 809                                         break;
 810                                 }
 811                         }
 812                         p1++;
 813                 }
 814
 815                 pfree(ps1);
 816                 pfree(ps2);
 817         }
 818         else
 819                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
 820
 821         PG_RETURN_INT32(pos);
 822 }
 823
 824 /* varstr_cmp()
 825  * Comparison function for text strings with given lengths.
 826  * Includes locale support, but must copy strings to temporary memory
 827  *      to allow null-termination for inputs to strcoll().
 828  * Returns -1, 0 or 1
 829  */
 830 int
 831 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 832 {
 833         int                     result;
 834
 835         /*
 836          * Unfortunately, there is no strncoll(), so in the non-C locale case
 837          * we have to do some memory copying.  This turns out to be
 838          * significantly slower, so we optimize the case where LC_COLLATE is
 839          * C.  We also try to optimize relatively-short strings by avoiding
 840          * palloc/pfree overhead.
 841          */
 842 #define STACKBUFLEN             1024
 843
 844         if (!lc_collate_is_c())
 845         {
 846                 char            a1buf[STACKBUFLEN];
 847                 char            a2buf[STACKBUFLEN];
 848                 char       *a1p,
 849                                    *a2p;
 850
 851                 if (len1 >= STACKBUFLEN)
 852                         a1p = (char *) palloc(len1 + 1);
 853                 else
 854                         a1p = a1buf;
 855                 if (len2 >= STACKBUFLEN)
 856                         a2p = (char *) palloc(len2 + 1);
 857                 else
 858                         a2p = a2buf;
 859
 860                 memcpy(a1p, arg1, len1);
 861                 a1p[len1] = '\0';
 862                 memcpy(a2p, arg2, len2);
 863                 a2p[len2] = '\0';
 864
 865                 result = strcoll(a1p, a2p);
 866
 867                 if (len1 >= STACKBUFLEN)
 868                         pfree(a1p);
 869                 if (len2 >= STACKBUFLEN)
 870                         pfree(a2p);
 871         }
 872         else
 873         {
 874                 result = strncmp(arg1, arg2, Min(len1, len2));
 875                 if ((result == 0) && (len1 != len2))
 876                         result = (len1 < len2) ? -1 : 1;
 877         }
 878
 879         return result;
 880 }
 881
 882
 883 /* text_cmp()
 884  * Internal comparison function for text strings.
 885  * Returns -1, 0 or 1
 886  */
 887 static int
 888 text_cmp(text *arg1, text *arg2)
 889 {
 890         char       *a1p,
 891                            *a2p;
 892         int                     len1,
 893                                 len2;
 894
 895         a1p = VARDATA(arg1);
 896         a2p = VARDATA(arg2);
 897
 898         len1 = VARSIZE(arg1) - VARHDRSZ;
 899         len2 = VARSIZE(arg2) - VARHDRSZ;
 900
 901         return varstr_cmp(a1p, len1, a2p, len2);
 902 }
 903
 904 /*
 905  * Comparison functions for text strings.
 906  *
 907  * Note: btree indexes need these routines not to leak memory; therefore,
 908  * be careful to free working copies of toasted datums.  Most places don't
 909  * need to be so careful.
 910  */
 911
 912 Datum
 913 texteq(PG_FUNCTION_ARGS)
 914 {
 915         text       *arg1 = PG_GETARG_TEXT_P(0);
 916         text       *arg2 = PG_GETARG_TEXT_P(1);
 917         bool            result;
 918
 919         /* fast path for different-length inputs */
 920         if (VARSIZE(arg1) != VARSIZE(arg2))
 921                 result = false;
 922         else
 923                 result = (text_cmp(arg1, arg2) == 0);
 924
 925         PG_FREE_IF_COPY(arg1, 0);
 926         PG_FREE_IF_COPY(arg2, 1);
 927
 928         PG_RETURN_BOOL(result);
 929 }
 930
 931 Datum
 932 textne(PG_FUNCTION_ARGS)
 933 {
 934         text       *arg1 = PG_GETARG_TEXT_P(0);
 935         text       *arg2 = PG_GETARG_TEXT_P(1);
 936         bool            result;
 937
 938         /* fast path for different-length inputs */
 939         if (VARSIZE(arg1) != VARSIZE(arg2))
 940                 result = true;
 941         else
 942                 result = (text_cmp(arg1, arg2) != 0);
 943
 944         PG_FREE_IF_COPY(arg1, 0);
 945         PG_FREE_IF_COPY(arg2, 1);
 946
 947         PG_RETURN_BOOL(result);
 948 }
 949
 950 Datum
 951 text_lt(PG_FUNCTION_ARGS)
 952 {
 953         text       *arg1 = PG_GETARG_TEXT_P(0);
 954         text       *arg2 = PG_GETARG_TEXT_P(1);
 955         bool            result;
 956
 957         result = (text_cmp(arg1, arg2) < 0);
 958
 959         PG_FREE_IF_COPY(arg1, 0);
 960         PG_FREE_IF_COPY(arg2, 1);
 961
 962         PG_RETURN_BOOL(result);
 963 }
 964
 965 Datum
 966 text_le(PG_FUNCTION_ARGS)
 967 {
 968         text       *arg1 = PG_GETARG_TEXT_P(0);
 969         text       *arg2 = PG_GETARG_TEXT_P(1);
 970         bool            result;
 971
 972         result = (text_cmp(arg1, arg2) <= 0);
 973
 974         PG_FREE_IF_COPY(arg1, 0);
 975         PG_FREE_IF_COPY(arg2, 1);
 976
 977         PG_RETURN_BOOL(result);
 978 }
 979
 980 Datum
 981 text_gt(PG_FUNCTION_ARGS)
 982 {
 983         text       *arg1 = PG_GETARG_TEXT_P(0);
 984         text       *arg2 = PG_GETARG_TEXT_P(1);
 985         bool            result;
 986
 987         result = (text_cmp(arg1, arg2) > 0);
 988
 989         PG_FREE_IF_COPY(arg1, 0);
 990         PG_FREE_IF_COPY(arg2, 1);
 991
 992         PG_RETURN_BOOL(result);
 993 }
 994
 995 Datum
 996 text_ge(PG_FUNCTION_ARGS)
 997 {
 998         text       *arg1 = PG_GETARG_TEXT_P(0);
 999         text       *arg2 = PG_GETARG_TEXT_P(1);
1000         bool            result;
1001
1002         result = (text_cmp(arg1, arg2) >= 0);
1003
1004         PG_FREE_IF_COPY(arg1, 0);
1005         PG_FREE_IF_COPY(arg2, 1);
1006
1007         PG_RETURN_BOOL(result);
1008 }
1009
1010 Datum
1011 bttextcmp(PG_FUNCTION_ARGS)
1012 {
1013         text       *arg1 = PG_GETARG_TEXT_P(0);
1014         text       *arg2 = PG_GETARG_TEXT_P(1);
1015         int32           result;
1016
1017         result = text_cmp(arg1, arg2);
1018
1019         PG_FREE_IF_COPY(arg1, 0);
1020         PG_FREE_IF_COPY(arg2, 1);
1021
1022         PG_RETURN_INT32(result);
1023 }
1024
1025
1026 Datum
1027 text_larger(PG_FUNCTION_ARGS)
1028 {
1029         text       *arg1 = PG_GETARG_TEXT_P(0);
1030         text       *arg2 = PG_GETARG_TEXT_P(1);
1031         text       *result;
1032
1033         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1034
1035         PG_RETURN_TEXT_P(result);
1036 }
1037
1038 Datum
1039 text_smaller(PG_FUNCTION_ARGS)
1040 {
1041         text       *arg1 = PG_GETARG_TEXT_P(0);
1042         text       *arg2 = PG_GETARG_TEXT_P(1);
1043         text       *result;
1044
1045         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1046
1047         PG_RETURN_TEXT_P(result);
1048 }
1049
1050
1051 /*
1052  * The following operators support character-by-character comparison
1053  * of text data types, to allow building indexes suitable for LIKE
1054  * clauses.
1055  */
1056
1057 static int
1058 internal_text_pattern_compare(text *arg1, text *arg2)
1059 {
1060         int                     result;
1061
1062         result = memcmp(VARDATA(arg1), VARDATA(arg2),
1063                                         Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1064         if (result != 0)
1065                 return result;
1066         else if (VARSIZE(arg1) < VARSIZE(arg2))
1067                 return -1;
1068         else if (VARSIZE(arg1) > VARSIZE(arg2))
1069                 return 1;
1070         else
1071                 return 0;
1072 }
1073
1074
1075 Datum
1076 text_pattern_lt(PG_FUNCTION_ARGS)
1077 {
1078         text       *arg1 = PG_GETARG_TEXT_P(0);
1079         text       *arg2 = PG_GETARG_TEXT_P(1);
1080         int                     result;
1081
1082         result = internal_text_pattern_compare(arg1, arg2);
1083
1084         PG_FREE_IF_COPY(arg1, 0);
1085         PG_FREE_IF_COPY(arg2, 1);
1086
1087         PG_RETURN_BOOL(result < 0);
1088 }
1089
1090
1091 Datum
1092 text_pattern_le(PG_FUNCTION_ARGS)
1093 {
1094         text       *arg1 = PG_GETARG_TEXT_P(0);
1095         text       *arg2 = PG_GETARG_TEXT_P(1);
1096         int                     result;
1097
1098         result = internal_text_pattern_compare(arg1, arg2);
1099
1100         PG_FREE_IF_COPY(arg1, 0);
1101         PG_FREE_IF_COPY(arg2, 1);
1102
1103         PG_RETURN_BOOL(result <= 0);
1104 }
1105
1106
1107 Datum
1108 text_pattern_eq(PG_FUNCTION_ARGS)
1109 {
1110         text       *arg1 = PG_GETARG_TEXT_P(0);
1111         text       *arg2 = PG_GETARG_TEXT_P(1);
1112         int                     result;
1113
1114         if (VARSIZE(arg1) != VARSIZE(arg2))
1115                 result = 1;
1116         else
1117                 result = internal_text_pattern_compare(arg1, arg2);
1118
1119         PG_FREE_IF_COPY(arg1, 0);
1120         PG_FREE_IF_COPY(arg2, 1);
1121
1122         PG_RETURN_BOOL(result == 0);
1123 }
1124
1125
1126 Datum
1127 text_pattern_ge(PG_FUNCTION_ARGS)
1128 {
1129         text       *arg1 = PG_GETARG_TEXT_P(0);
1130         text       *arg2 = PG_GETARG_TEXT_P(1);
1131         int                     result;
1132
1133         result = internal_text_pattern_compare(arg1, arg2);
1134
1135         PG_FREE_IF_COPY(arg1, 0);
1136         PG_FREE_IF_COPY(arg2, 1);
1137
1138         PG_RETURN_BOOL(result >= 0);
1139 }
1140
1141
1142 Datum
1143 text_pattern_gt(PG_FUNCTION_ARGS)
1144 {
1145         text       *arg1 = PG_GETARG_TEXT_P(0);
1146         text       *arg2 = PG_GETARG_TEXT_P(1);
1147         int                     result;
1148
1149         result = internal_text_pattern_compare(arg1, arg2);
1150
1151         PG_FREE_IF_COPY(arg1, 0);
1152         PG_FREE_IF_COPY(arg2, 1);
1153
1154         PG_RETURN_BOOL(result > 0);
1155 }
1156
1157
1158 Datum
1159 text_pattern_ne(PG_FUNCTION_ARGS)
1160 {
1161         text       *arg1 = PG_GETARG_TEXT_P(0);
1162         text       *arg2 = PG_GETARG_TEXT_P(1);
1163         int                     result;
1164
1165         if (VARSIZE(arg1) != VARSIZE(arg2))
1166                 result = 1;
1167         else
1168                 result = internal_text_pattern_compare(arg1, arg2);
1169
1170         PG_FREE_IF_COPY(arg1, 0);
1171         PG_FREE_IF_COPY(arg2, 1);
1172
1173         PG_RETURN_BOOL(result != 0);
1174 }
1175
1176
1177 Datum
1178 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1179 {
1180         text       *arg1 = PG_GETARG_TEXT_P(0);
1181         text       *arg2 = PG_GETARG_TEXT_P(1);
1182         int                     result;
1183
1184         result = internal_text_pattern_compare(arg1, arg2);
1185
1186         PG_FREE_IF_COPY(arg1, 0);
1187         PG_FREE_IF_COPY(arg2, 1);
1188
1189         PG_RETURN_INT32(result);
1190 }
1191
1192
1193 /*-------------------------------------------------------------
1194  * byteaoctetlen
1195  *
1196  * get the number of bytes contained in an instance of type 'bytea'
1197  *-------------------------------------------------------------
1198  */
1199 Datum
1200 byteaoctetlen(PG_FUNCTION_ARGS)
1201 {
1202         PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
1203 }
1204
1205 /*
1206  * byteacat -
1207  *        takes two bytea* and returns a bytea* that is the concatenation of
1208  *        the two.
1209  *
1210  * Cloned from textcat and modified as required.
1211  */
1212 Datum
1213 byteacat(PG_FUNCTION_ARGS)
1214 {
1215         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1216         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1217         int                     len1,
1218                                 len2,
1219                                 len;
1220         bytea      *result;
1221         char       *ptr;
1222
1223         len1 = (VARSIZE(t1) - VARHDRSZ);
1224         if (len1 < 0)
1225                 len1 = 0;
1226
1227         len2 = (VARSIZE(t2) - VARHDRSZ);
1228         if (len2 < 0)
1229                 len2 = 0;
1230
1231         len = len1 + len2 + VARHDRSZ;
1232         result = (bytea *) palloc(len);
1233
1234         /* Set size of result string... */
1235         VARATT_SIZEP(result) = len;
1236
1237         /* Fill data field of result string... */
1238         ptr = VARDATA(result);
1239         if (len1 > 0)
1240                 memcpy(ptr, VARDATA(t1), len1);
1241         if (len2 > 0)
1242                 memcpy(ptr + len1, VARDATA(t2), len2);
1243
1244         PG_RETURN_BYTEA_P(result);
1245 }
1246
1247 #define PG_STR_GET_BYTEA(str_) \
1248         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1249 /*
1250  * bytea_substr()
1251  * Return a substring starting at the specified position.
1252  * Cloned from text_substr and modified as required.
1253  *
1254  * Input:
1255  *      - string
1256  *      - starting position (is one-based)
1257  *      - string length (optional)
1258  *
1259  * If the starting position is zero or less, then return from the start of the string
1260  * adjusting the length to be consistent with the "negative start" per SQL92.
1261  * If the length is less than zero, an ERROR is thrown. If no third argument
1262  * (length) is provided, the length to the end of the string is assumed.
1263  */
1264 Datum
1265 bytea_substr(PG_FUNCTION_ARGS)
1266 {
1267         int                     S = PG_GETARG_INT32(1); /* start position */
1268         int                     S1;                             /* adjusted start position */
1269         int                     L1;                             /* adjusted substring length */
1270
1271         S1 = Max(S, 1);
1272
1273         if (fcinfo->nargs == 2)
1274         {
1275                 /*
1276                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1277                  * everything to the end of the string if we pass it a negative
1278                  * value for length.
1279                  */
1280                 L1 = -1;
1281         }
1282         else
1283         {
1284                 /* end position */
1285                 int                     E = S + PG_GETARG_INT32(2);
1286
1287                 /*
1288                  * A negative value for L is the only way for the end position to
1289                  * be before the start. SQL99 says to throw an error.
1290                  */
1291                 if (E < S)
1292                         ereport(ERROR,
1293                                         (errcode(ERRCODE_SUBSTRING_ERROR),
1294                                          errmsg("negative substring length not allowed")));
1295
1296                 /*
1297                  * A zero or negative value for the end position can happen if the
1298                  * start was negative or one. SQL99 says to return a zero-length
1299                  * string.
1300                  */
1301                 if (E < 1)
1302                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1303
1304                 L1 = E - S1;
1305         }
1306
1307         /*
1308          * If the start position is past the end of the string, SQL99 says to
1309          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1310          * that for us. Convert to zero-based starting position
1311          */
1312         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1313 }
1314
1315 /*
1316  * bytea_substr_no_len -
1317  *        Wrapper to avoid opr_sanity failure due to
1318  *        one function accepting a different number of args.
1319  */
1320 Datum
1321 bytea_substr_no_len(PG_FUNCTION_ARGS)
1322 {
1323         return bytea_substr(fcinfo);
1324 }
1325
1326 /*
1327  * byteapos -
1328  *        Return the position of the specified substring.
1329  *        Implements the SQL92 POSITION() function.
1330  * Cloned from textpos and modified as required.
1331  */
1332 Datum
1333 byteapos(PG_FUNCTION_ARGS)
1334 {
1335         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1336         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1337         int                     pos;
1338         int                     px,
1339                                 p;
1340         int                     len1,
1341                                 len2;
1342         char       *p1,
1343                            *p2;
1344
1345         if (VARSIZE(t2) <= VARHDRSZ)
1346                 PG_RETURN_INT32(1);             /* result for empty pattern */
1347
1348         len1 = (VARSIZE(t1) - VARHDRSZ);
1349         len2 = (VARSIZE(t2) - VARHDRSZ);
1350
1351         p1 = VARDATA(t1);
1352         p2 = VARDATA(t2);
1353
1354         pos = 0;
1355         px = (len1 - len2);
1356         for (p = 0; p <= px; p++)
1357         {
1358                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1359                 {
1360                         pos = p + 1;
1361                         break;
1362                 };
1363                 p1++;
1364         };
1365
1366         PG_RETURN_INT32(pos);
1367 }
1368
1369 /*-------------------------------------------------------------
1370  * byteaGetByte
1371  *
1372  * this routine treats "bytea" as an array of bytes.
1373  * It returns the Nth byte (a number between 0 and 255).
1374  *-------------------------------------------------------------
1375  */
1376 Datum
1377 byteaGetByte(PG_FUNCTION_ARGS)
1378 {
1379         bytea      *v = PG_GETARG_BYTEA_P(0);
1380         int32           n = PG_GETARG_INT32(1);
1381         int                     len;
1382         int                     byte;
1383
1384         len = VARSIZE(v) - VARHDRSZ;
1385
1386         if (n < 0 || n >= len)
1387                 ereport(ERROR,
1388                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1389                                  errmsg("index %d out of valid range, 0..%d",
1390                                                 n, len - 1)));
1391
1392         byte = ((unsigned char *) VARDATA(v))[n];
1393
1394         PG_RETURN_INT32(byte);
1395 }
1396
1397 /*-------------------------------------------------------------
1398  * byteaGetBit
1399  *
1400  * This routine treats a "bytea" type like an array of bits.
1401  * It returns the value of the Nth bit (0 or 1).
1402  *
1403  *-------------------------------------------------------------
1404  */
1405 Datum
1406 byteaGetBit(PG_FUNCTION_ARGS)
1407 {
1408         bytea      *v = PG_GETARG_BYTEA_P(0);
1409         int32           n = PG_GETARG_INT32(1);
1410         int                     byteNo,
1411                                 bitNo;
1412         int                     len;
1413         int                     byte;
1414
1415         len = VARSIZE(v) - VARHDRSZ;
1416
1417         if (n < 0 || n >= len * 8)
1418                 ereport(ERROR,
1419                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1420                                  errmsg("index %d out of valid range, 0..%d",
1421                                                 n, len * 8 - 1)));
1422
1423         byteNo = n / 8;
1424         bitNo = n % 8;
1425
1426         byte = ((unsigned char *) VARDATA(v))[byteNo];
1427
1428         if (byte & (1 << bitNo))
1429                 PG_RETURN_INT32(1);
1430         else
1431                 PG_RETURN_INT32(0);
1432 }
1433
1434 /*-------------------------------------------------------------
1435  * byteaSetByte
1436  *
1437  * Given an instance of type 'bytea' creates a new one with
1438  * the Nth byte set to the given value.
1439  *
1440  *-------------------------------------------------------------
1441  */
1442 Datum
1443 byteaSetByte(PG_FUNCTION_ARGS)
1444 {
1445         bytea      *v = PG_GETARG_BYTEA_P(0);
1446         int32           n = PG_GETARG_INT32(1);
1447         int32           newByte = PG_GETARG_INT32(2);
1448         int                     len;
1449         bytea      *res;
1450
1451         len = VARSIZE(v) - VARHDRSZ;
1452
1453         if (n < 0 || n >= len)
1454                 ereport(ERROR,
1455                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1456                                  errmsg("index %d out of valid range, 0..%d",
1457                                                 n, len - 1)));
1458
1459         /*
1460          * Make a copy of the original varlena.
1461          */
1462         res = (bytea *) palloc(VARSIZE(v));
1463         memcpy((char *) res, (char *) v, VARSIZE(v));
1464
1465         /*
1466          * Now set the byte.
1467          */
1468         ((unsigned char *) VARDATA(res))[n] = newByte;
1469
1470         PG_RETURN_BYTEA_P(res);
1471 }
1472
1473 /*-------------------------------------------------------------
1474  * byteaSetBit
1475  *
1476  * Given an instance of type 'bytea' creates a new one with
1477  * the Nth bit set to the given value.
1478  *
1479  *-------------------------------------------------------------
1480  */
1481 Datum
1482 byteaSetBit(PG_FUNCTION_ARGS)
1483 {
1484         bytea      *v = PG_GETARG_BYTEA_P(0);
1485         int32           n = PG_GETARG_INT32(1);
1486         int32           newBit = PG_GETARG_INT32(2);
1487         bytea      *res;
1488         int                     len;
1489         int                     oldByte,
1490                                 newByte;
1491         int                     byteNo,
1492                                 bitNo;
1493
1494         len = VARSIZE(v) - VARHDRSZ;
1495
1496         if (n < 0 || n >= len * 8)
1497                 ereport(ERROR,
1498                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1499                                  errmsg("index %d out of valid range, 0..%d",
1500                                                 n, len * 8 - 1)));
1501
1502         byteNo = n / 8;
1503         bitNo = n % 8;
1504
1505         /*
1506          * sanity check!
1507          */
1508         if (newBit != 0 && newBit != 1)
1509                 ereport(ERROR,
1510                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1511                                  errmsg("new bit must be 0 or 1")));
1512
1513         /*
1514          * Make a copy of the original varlena.
1515          */
1516         res = (bytea *) palloc(VARSIZE(v));
1517         memcpy((char *) res, (char *) v, VARSIZE(v));
1518
1519         /*
1520          * Update the byte.
1521          */
1522         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1523
1524         if (newBit == 0)
1525                 newByte = oldByte & (~(1 << bitNo));
1526         else
1527                 newByte = oldByte | (1 << bitNo);
1528
1529         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1530
1531         PG_RETURN_BYTEA_P(res);
1532 }
1533
1534
1535 /* text_name()
1536  * Converts a text type to a Name type.
1537  */
1538 Datum
1539 text_name(PG_FUNCTION_ARGS)
1540 {
1541         text       *s = PG_GETARG_TEXT_P(0);
1542         Name            result;
1543         int                     len;
1544
1545         len = VARSIZE(s) - VARHDRSZ;
1546
1547         /* Truncate oversize input */
1548         if (len >= NAMEDATALEN)
1549                 len = NAMEDATALEN - 1;
1550
1551 #ifdef STRINGDEBUG
1552         printf("text- convert string length %d (%d) ->%d\n",
1553                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1554 #endif
1555
1556         result = (Name) palloc(NAMEDATALEN);
1557         memcpy(NameStr(*result), VARDATA(s), len);
1558
1559         /* now null pad to full length... */
1560         while (len < NAMEDATALEN)
1561         {
1562                 *(NameStr(*result) + len) = '\0';
1563                 len++;
1564         }
1565
1566         PG_RETURN_NAME(result);
1567 }
1568
1569 /* name_text()
1570  * Converts a Name type to a text type.
1571  */
1572 Datum
1573 name_text(PG_FUNCTION_ARGS)
1574 {
1575         Name            s = PG_GETARG_NAME(0);
1576         text       *result;
1577         int                     len;
1578
1579         len = strlen(NameStr(*s));
1580
1581 #ifdef STRINGDEBUG
1582         printf("text- convert string length %d (%d) ->%d\n",
1583                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1584 #endif
1585
1586         result = palloc(VARHDRSZ + len);
1587         VARATT_SIZEP(result) = VARHDRSZ + len;
1588         memcpy(VARDATA(result), NameStr(*s), len);
1589
1590         PG_RETURN_TEXT_P(result);
1591 }
1592
1593
1594 /*
1595  * textToQualifiedNameList - convert a text object to list of names
1596  *
1597  * This implements the input parsing needed by nextval() and other
1598  * functions that take a text parameter representing a qualified name.
1599  * We split the name at dots, downcase if not double-quoted, and
1600  * truncate names if they're too long.
1601  */
1602 List *
1603 textToQualifiedNameList(text *textval, const char *caller)
1604 {
1605         char       *rawname;
1606         List       *result = NIL;
1607         List       *namelist;
1608         List       *l;
1609
1610         /* Convert to C string (handles possible detoasting). */
1611         /* Note we rely on being able to modify rawname below. */
1612         rawname = DatumGetCString(DirectFunctionCall1(textout,
1613                                                                                           PointerGetDatum(textval)));
1614
1615         if (!SplitIdentifierString(rawname, '.', &namelist))
1616                 ereport(ERROR,
1617                                 (errcode(ERRCODE_INVALID_NAME),
1618                                  errmsg("invalid name syntax")));
1619
1620         if (namelist == NIL)
1621                 ereport(ERROR,
1622                                 (errcode(ERRCODE_INVALID_NAME),
1623                                  errmsg("invalid name syntax")));
1624
1625         foreach(l, namelist)
1626         {
1627                 char       *curname = (char *) lfirst(l);
1628
1629                 result = lappend(result, makeString(pstrdup(curname)));
1630         }
1631
1632         pfree(rawname);
1633         freeList(namelist);
1634
1635         return result;
1636 }
1637
1638 /*
1639  * SplitIdentifierString --- parse a string containing identifiers
1640  *
1641  * This is the guts of textToQualifiedNameList, and is exported for use in
1642  * other situations such as parsing GUC variables.      In the GUC case, it's
1643  * important to avoid memory leaks, so the API is designed to minimize the
1644  * amount of stuff that needs to be allocated and freed.
1645  *
1646  * Inputs:
1647  *      rawstring: the input string; must be overwritable!      On return, it's
1648  *                         been modified to contain the separated identifiers.
1649  *      separator: the separator punctuation expected between identifiers
1650  *                         (typically '.' or ',').      Whitespace may also appear around
1651  *                         identifiers.
1652  * Outputs:
1653  *      namelist: filled with a palloc'd list of pointers to identifiers within
1654  *                        rawstring.  Caller should freeList() this even on error return.
1655  *
1656  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1657  *
1658  * Note that an empty string is considered okay here, though not in
1659  * textToQualifiedNameList.
1660  */
1661 bool
1662 SplitIdentifierString(char *rawstring, char separator,
1663                                           List **namelist)
1664 {
1665         char       *nextp = rawstring;
1666         bool            done = false;
1667
1668         *namelist = NIL;
1669
1670         while (isspace((unsigned char) *nextp))
1671                 nextp++;                                /* skip leading whitespace */
1672
1673         if (*nextp == '\0')
1674                 return true;                    /* allow empty string */
1675
1676         /* At the top of the loop, we are at start of a new identifier. */
1677         do
1678         {
1679                 char       *curname;
1680                 char       *endp;
1681                 int                     curlen;
1682
1683                 if (*nextp == '\"')
1684                 {
1685                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1686                         curname = nextp + 1;
1687                         for (;;)
1688                         {
1689                                 endp = strchr(nextp + 1, '\"');
1690                                 if (endp == NULL)
1691                                         return false;           /* mismatched quotes */
1692                                 if (endp[1] != '\"')
1693                                         break;          /* found end of quoted name */
1694                                 /* Collapse adjacent quotes into one quote, and look again */
1695                                 memmove(endp, endp + 1, strlen(endp));
1696                                 nextp = endp;
1697                         }
1698                         /* endp now points at the terminating quote */
1699                         nextp = endp + 1;
1700                 }
1701                 else
1702                 {
1703                         /* Unquoted name --- extends to separator or whitespace */
1704                         curname = nextp;
1705                         while (*nextp && *nextp != separator &&
1706                                    !isspace((unsigned char) *nextp))
1707                         {
1708                                 /*
1709                                  * It's important that this match the identifier
1710                                  * downcasing code used by backend/parser/scan.l.
1711                                  */
1712                                 if (isupper((unsigned char) *nextp))
1713                                         *nextp = tolower((unsigned char) *nextp);
1714                                 nextp++;
1715                         }
1716                         endp = nextp;
1717                         if (curname == nextp)
1718                                 return false;   /* empty unquoted name not allowed */
1719                 }
1720
1721                 while (isspace((unsigned char) *nextp))
1722                         nextp++;                        /* skip trailing whitespace */
1723
1724                 if (*nextp == separator)
1725                 {
1726                         nextp++;
1727                         while (isspace((unsigned char) *nextp))
1728                                 nextp++;                /* skip leading whitespace for next */
1729                         /* we expect another name, so done remains false */
1730                 }
1731                 else if (*nextp == '\0')
1732                         done = true;
1733                 else
1734                         return false;           /* invalid syntax */
1735
1736                 /* Now safe to overwrite separator with a null */
1737                 *endp = '\0';
1738
1739                 /* Truncate name if it's overlength; again, should match scan.l */
1740                 curlen = strlen(curname);
1741                 if (curlen >= NAMEDATALEN)
1742                 {
1743                         curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
1744                         curname[curlen] = '\0';
1745                 }
1746
1747                 /*
1748                  * Finished isolating current name --- add it to list
1749                  */
1750                 *namelist = lappend(*namelist, curname);
1751
1752                 /* Loop back if we didn't reach end of string */
1753         } while (!done);
1754
1755         return true;
1756 }
1757
1758
1759 /*****************************************************************************
1760  *      Comparison Functions used for bytea
1761  *
1762  * Note: btree indexes need these routines not to leak memory; therefore,
1763  * be careful to free working copies of toasted datums.  Most places don't
1764  * need to be so careful.
1765  *****************************************************************************/
1766
1767 Datum
1768 byteaeq(PG_FUNCTION_ARGS)
1769 {
1770         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1771         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1772         int                     len1,
1773                                 len2;
1774         bool            result;
1775
1776         len1 = VARSIZE(arg1) - VARHDRSZ;
1777         len2 = VARSIZE(arg2) - VARHDRSZ;
1778
1779         /* fast path for different-length inputs */
1780         if (len1 != len2)
1781                 result = false;
1782         else
1783                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1784
1785         PG_FREE_IF_COPY(arg1, 0);
1786         PG_FREE_IF_COPY(arg2, 1);
1787
1788         PG_RETURN_BOOL(result);
1789 }
1790
1791 Datum
1792 byteane(PG_FUNCTION_ARGS)
1793 {
1794         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1795         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1796         int                     len1,
1797                                 len2;
1798         bool            result;
1799
1800         len1 = VARSIZE(arg1) - VARHDRSZ;
1801         len2 = VARSIZE(arg2) - VARHDRSZ;
1802
1803         /* fast path for different-length inputs */
1804         if (len1 != len2)
1805                 result = true;
1806         else
1807                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1808
1809         PG_FREE_IF_COPY(arg1, 0);
1810         PG_FREE_IF_COPY(arg2, 1);
1811
1812         PG_RETURN_BOOL(result);
1813 }
1814
1815 Datum
1816 bytealt(PG_FUNCTION_ARGS)
1817 {
1818         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1819         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1820         int                     len1,
1821                                 len2;
1822         int                     cmp;
1823
1824         len1 = VARSIZE(arg1) - VARHDRSZ;
1825         len2 = VARSIZE(arg2) - VARHDRSZ;
1826
1827         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1828
1829         PG_FREE_IF_COPY(arg1, 0);
1830         PG_FREE_IF_COPY(arg2, 1);
1831
1832         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1833 }
1834
1835 Datum
1836 byteale(PG_FUNCTION_ARGS)
1837 {
1838         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1839         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1840         int                     len1,
1841                                 len2;
1842         int                     cmp;
1843
1844         len1 = VARSIZE(arg1) - VARHDRSZ;
1845         len2 = VARSIZE(arg2) - VARHDRSZ;
1846
1847         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1848
1849         PG_FREE_IF_COPY(arg1, 0);
1850         PG_FREE_IF_COPY(arg2, 1);
1851
1852         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1853 }
1854
1855 Datum
1856 byteagt(PG_FUNCTION_ARGS)
1857 {
1858         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1859         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1860         int                     len1,
1861                                 len2;
1862         int                     cmp;
1863
1864         len1 = VARSIZE(arg1) - VARHDRSZ;
1865         len2 = VARSIZE(arg2) - VARHDRSZ;
1866
1867         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1868
1869         PG_FREE_IF_COPY(arg1, 0);
1870         PG_FREE_IF_COPY(arg2, 1);
1871
1872         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1873 }
1874
1875 Datum
1876 byteage(PG_FUNCTION_ARGS)
1877 {
1878         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1879         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1880         int                     len1,
1881                                 len2;
1882         int                     cmp;
1883
1884         len1 = VARSIZE(arg1) - VARHDRSZ;
1885         len2 = VARSIZE(arg2) - VARHDRSZ;
1886
1887         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1888
1889         PG_FREE_IF_COPY(arg1, 0);
1890         PG_FREE_IF_COPY(arg2, 1);
1891
1892         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1893 }
1894
1895 Datum
1896 byteacmp(PG_FUNCTION_ARGS)
1897 {
1898         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1899         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1900         int                     len1,
1901                                 len2;
1902         int                     cmp;
1903
1904         len1 = VARSIZE(arg1) - VARHDRSZ;
1905         len2 = VARSIZE(arg2) - VARHDRSZ;
1906
1907         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1908         if ((cmp == 0) && (len1 != len2))
1909                 cmp = (len1 < len2) ? -1 : 1;
1910
1911         PG_FREE_IF_COPY(arg1, 0);
1912         PG_FREE_IF_COPY(arg2, 1);
1913
1914         PG_RETURN_INT32(cmp);
1915 }
1916
1917 /*
1918  * replace_text
1919  * replace all occurrences of 'old_sub_str' in 'orig_str'
1920  * with 'new_sub_str' to form 'new_str'
1921  *
1922  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1923  * otherwise returns 'new_str'
1924  */
1925 Datum
1926 replace_text(PG_FUNCTION_ARGS)
1927 {
1928         text       *left_text;
1929         text       *right_text;
1930         text       *buf_text;
1931         text       *ret_text;
1932         int                     curr_posn;
1933         text       *src_text = PG_GETARG_TEXT_P(0);
1934         int                     src_text_len = TEXTLEN(src_text);
1935         text       *from_sub_text = PG_GETARG_TEXT_P(1);
1936         int                     from_sub_text_len = TEXTLEN(from_sub_text);
1937         text       *to_sub_text = PG_GETARG_TEXT_P(2);
1938         char       *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1939         StringInfo      str = makeStringInfo();
1940
1941         if (src_text_len == 0 || from_sub_text_len == 0)
1942                 PG_RETURN_TEXT_P(src_text);
1943
1944         buf_text = TEXTDUP(src_text);
1945         curr_posn = TEXTPOS(buf_text, from_sub_text);
1946
1947         while (curr_posn > 0)
1948         {
1949                 left_text = LEFT(buf_text, from_sub_text);
1950                 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1951
1952                 appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
1953                 appendStringInfoString(str, to_sub_str);
1954
1955                 pfree(buf_text);
1956                 pfree(left_text);
1957                 buf_text = right_text;
1958                 curr_posn = TEXTPOS(buf_text, from_sub_text);
1959         }
1960
1961         appendStringInfoString(str, PG_TEXT_GET_STR(buf_text));
1962         pfree(buf_text);
1963
1964         ret_text = PG_STR_GET_TEXT(str->data);
1965         pfree(str->data);
1966         pfree(str);
1967
1968         PG_RETURN_TEXT_P(ret_text);
1969 }
1970
1971 /*
1972  * split_text
1973  * parse input string
1974  * return ord item (1 based)
1975  * based on provided field separator
1976  */
1977 Datum
1978 split_text(PG_FUNCTION_ARGS)
1979 {
1980         text       *inputstring = PG_GETARG_TEXT_P(0);
1981         int                     inputstring_len = TEXTLEN(inputstring);
1982         text       *fldsep = PG_GETARG_TEXT_P(1);
1983         int                     fldsep_len = TEXTLEN(fldsep);
1984         int                     fldnum = PG_GETARG_INT32(2);
1985         int                     start_posn = 0;
1986         int                     end_posn = 0;
1987         text       *result_text;
1988
1989         /* return empty string for empty input string */
1990         if (inputstring_len < 1)
1991                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1992
1993         /* empty field separator */
1994         if (fldsep_len < 1)
1995         {
1996                 if (fldnum == 1)                /* first field - just return the input
1997                                                                  * string */
1998                         PG_RETURN_TEXT_P(inputstring);
1999                 else
2000 /* otherwise return an empty string */
2001                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2002         }
2003
2004         /* field number is 1 based */
2005         if (fldnum < 1)
2006                 ereport(ERROR,
2007                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2008                                  errmsg("field position must be greater than zero")));
2009
2010         start_posn = text_position(PointerGetDatum(inputstring),
2011                                                            PointerGetDatum(fldsep),
2012                                                            fldnum - 1);
2013         end_posn = text_position(PointerGetDatum(inputstring),
2014                                                          PointerGetDatum(fldsep),
2015                                                          fldnum);
2016
2017         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
2018         {
2019                 if (fldnum == 1)                /* first field - just return the input
2020                                                                  * string */
2021                         PG_RETURN_TEXT_P(inputstring);
2022                 else
2023 /* otherwise return an empty string */
2024                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2025         }
2026         else if ((start_posn != 0) && (end_posn == 0))
2027         {
2028                 /* last field requested */
2029                 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
2030                 PG_RETURN_TEXT_P(result_text);
2031         }
2032         else if ((start_posn == 0) && (end_posn != 0))
2033         {
2034                 /* first field requested */
2035                 result_text = LEFT(inputstring, fldsep);
2036                 PG_RETURN_TEXT_P(result_text);
2037         }
2038         else
2039         {
2040                 /* prior to last field requested */
2041                 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
2042                 PG_RETURN_TEXT_P(result_text);
2043         }
2044 }
2045
2046 /*
2047  * text_to_array
2048  * parse input string
2049  * return text array of elements
2050  * based on provided field separator
2051  */
2052 Datum
2053 text_to_array(PG_FUNCTION_ARGS)
2054 {
2055         text       *inputstring = PG_GETARG_TEXT_P(0);
2056         int                     inputstring_len = TEXTLEN(inputstring);
2057         text       *fldsep = PG_GETARG_TEXT_P(1);
2058         int                     fldsep_len = TEXTLEN(fldsep);
2059         int                     fldnum;
2060         int                     start_posn = 0;
2061         int                     end_posn = 0;
2062         text       *result_text = NULL;
2063         ArrayBuildState *astate = NULL;
2064         MemoryContext oldcontext = CurrentMemoryContext;
2065
2066         /* return NULL for empty input string */
2067         if (inputstring_len < 1)
2068                 PG_RETURN_NULL();
2069
2070         /*
2071          * empty field separator return one element, 1D, array using the input
2072          * string
2073          */
2074         if (fldsep_len < 1)
2075                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2076                                                                            CStringGetDatum(inputstring), 1));
2077
2078         /* start with end position holding the initial start position */
2079         end_posn = 0;
2080         for (fldnum = 1;; fldnum++) /* field number is 1 based */
2081         {
2082                 Datum           dvalue;
2083                 bool            disnull = false;
2084
2085                 start_posn = end_posn;
2086                 end_posn = text_position(PointerGetDatum(inputstring),
2087                                                                  PointerGetDatum(fldsep),
2088                                                                  fldnum);
2089
2090                 if ((start_posn == 0) && (end_posn == 0))               /* fldsep not found */
2091                 {
2092                         if (fldnum == 1)
2093                         {
2094                                 /*
2095                                  * first element return one element, 1D, array using the
2096                                  * input string
2097                                  */
2098                                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2099                                                                            CStringGetDatum(inputstring), 1));
2100                         }
2101                         else
2102                         {
2103                                 /* otherwise create array and exit */
2104                                 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, oldcontext));
2105                         }
2106                 }
2107                 else if ((start_posn != 0) && (end_posn == 0))
2108                 {
2109                         /* last field requested */
2110                         result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
2111                 }
2112                 else if ((start_posn == 0) && (end_posn != 0))
2113                 {
2114                         /* first field requested */
2115                         result_text = LEFT(inputstring, fldsep);
2116                 }
2117                 else
2118                 {
2119                         /* prior to last field requested */
2120                         result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
2121                 }
2122
2123                 /* stash away current value */
2124                 dvalue = PointerGetDatum(result_text);
2125                 astate = accumArrayResult(astate, dvalue,
2126                                                                   disnull, TEXTOID, oldcontext);
2127
2128         }
2129
2130         /* never reached -- keep compiler quiet */
2131         PG_RETURN_NULL();
2132 }
2133
2134 /*
2135  * array_to_text
2136  * concatenate Cstring representation of input array elements
2137  * using provided field separator
2138  */
2139 Datum
2140 array_to_text(PG_FUNCTION_ARGS)
2141 {
2142         ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
2143         char       *fldsep = PG_TEXTARG_GET_STR(1);
2144         int                     nitems,
2145                            *dims,
2146                                 ndims;
2147         char       *p;
2148         Oid                     element_type;
2149         int                     typlen;
2150         bool            typbyval;
2151         char            typalign;
2152         Oid                     typelem;
2153         StringInfo      result_str = makeStringInfo();
2154         int                     i;
2155         ArrayMetaState *my_extra;
2156
2157         p = ARR_DATA_PTR(v);
2158         ndims = ARR_NDIM(v);
2159         dims = ARR_DIMS(v);
2160         nitems = ArrayGetNItems(ndims, dims);
2161
2162         /* if there are no elements, return an empty string */
2163         if (nitems == 0)
2164                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2165
2166         element_type = ARR_ELEMTYPE(v);
2167
2168         /*
2169          * We arrange to look up info about element type, including its output
2170          * conversion proc, only once per series of calls, assuming the
2171          * element type doesn't change underneath us.
2172          */
2173         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2174         if (my_extra == NULL)
2175         {
2176                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2177                                                                                                  sizeof(ArrayMetaState));
2178                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2179                 my_extra->element_type = InvalidOid;
2180         }
2181
2182         if (my_extra->element_type != element_type)
2183         {
2184                 /*
2185                  * Get info about element type, including its output conversion
2186                  * proc
2187                  */
2188                 get_type_io_data(element_type, IOFunc_output,
2189                                                  &my_extra->typlen, &my_extra->typbyval,
2190                                                  &my_extra->typalign, &my_extra->typdelim,
2191                                                  &my_extra->typelem, &my_extra->typiofunc);
2192                 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2193                                           fcinfo->flinfo->fn_mcxt);
2194                 my_extra->element_type = element_type;
2195         }
2196         typlen = my_extra->typlen;
2197         typbyval = my_extra->typbyval;
2198         typalign = my_extra->typalign;
2199         typelem = my_extra->typelem;
2200
2201         for (i = 0; i < nitems; i++)
2202         {
2203                 Datum           itemvalue;
2204                 char       *value;
2205
2206                 itemvalue = fetch_att(p, typbyval, typlen);
2207
2208                 value = DatumGetCString(FunctionCall3(&my_extra->proc,
2209                                                                                           itemvalue,
2210                                                                                           ObjectIdGetDatum(typelem),
2211                                                                                           Int32GetDatum(-1)));
2212
2213                 if (i > 0)
2214                         appendStringInfo(result_str, "%s%s", fldsep, value);
2215                 else
2216                         appendStringInfo(result_str, "%s", value);
2217
2218                 p = att_addlength(p, typlen, PointerGetDatum(p));
2219                 p = (char *) att_align(p, typalign);
2220         }
2221
2222         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2223 }
2224
2225 #define HEXBASE 16
2226 /*
2227  * Convert a int32 to a string containing a base 16 (hex) representation of
2228  * the number.
2229  */
2230 Datum
2231 to_hex32(PG_FUNCTION_ARGS)
2232 {
2233         static char digits[] = "0123456789abcdef";
2234         char            buf[32];                /* bigger than needed, but reasonable */
2235         char       *ptr;
2236         text       *result_text;
2237         int32           value = PG_GETARG_INT32(0);
2238
2239         ptr = buf + sizeof(buf) - 1;
2240         *ptr = '\0';
2241
2242         do
2243         {
2244                 *--ptr = digits[value % HEXBASE];
2245                 value /= HEXBASE;
2246         } while (ptr > buf && value);
2247
2248         result_text = PG_STR_GET_TEXT(ptr);
2249         PG_RETURN_TEXT_P(result_text);
2250 }
2251
2252 /*
2253  * Convert a int64 to a string containing a base 16 (hex) representation of
2254  * the number.
2255  */
2256 Datum
2257 to_hex64(PG_FUNCTION_ARGS)
2258 {
2259         static char digits[] = "0123456789abcdef";
2260         char            buf[32];                /* bigger than needed, but reasonable */
2261         char       *ptr;
2262         text       *result_text;
2263         int64           value = PG_GETARG_INT64(0);
2264
2265         ptr = buf + sizeof(buf) - 1;
2266         *ptr = '\0';
2267
2268         do
2269         {
2270                 *--ptr = digits[value % HEXBASE];
2271                 value /= HEXBASE;
2272         } while (ptr > buf && value);
2273
2274         result_text = PG_STR_GET_TEXT(ptr);
2275         PG_RETURN_TEXT_P(result_text);
2276 }
2277
2278 /*
2279  * Create an md5 hash of a text string and return it as hex
2280  *
2281  * md5 produces a 16 byte (128 bit) hash; double it for hex
2282  */
2283 #define MD5_HASH_LEN  32
2284
2285 Datum
2286 md5_text(PG_FUNCTION_ARGS)
2287 {
2288         char       *buff = PG_TEXT_GET_STR(PG_GETARG_TEXT_P(0));
2289         size_t          len = strlen(buff);
2290         char       *hexsum;
2291         text       *result_text;
2292
2293         /* leave room for the terminating '\0' */
2294         hexsum = (char *) palloc(MD5_HASH_LEN + 1);
2295
2296         /* get the hash result */
2297         md5_hash((void *) buff, len, hexsum);
2298
2299         /* convert to text and return it */
2300         result_text = PG_STR_GET_TEXT(hexsum);
2301         PG_RETURN_TEXT_P(result_text);
2302 }