granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.121 2005/05/20 01:29:55 neilc Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "access/tuptoaster.h"
  20 #include "catalog/pg_type.h"
  21 #include "lib/stringinfo.h"
  22 #include "libpq/crypt.h"
  23 #include "libpq/pqformat.h"
  24 #include "mb/pg_wchar.h"
  25 #include "miscadmin.h"
  26 #include "parser/scansup.h"
  27 #include "utils/array.h"
  28 #include "utils/builtins.h"
  29 #include "utils/lsyscache.h"
  30 #include "utils/pg_locale.h"
  31
  32
  33 typedef struct varlena unknown;
  34
  35 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
  36 #define DatumGetUnknownPCopy(X)         ((unknown *) PG_DETOAST_DATUM_COPY(X))
  37 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
  38 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
  39 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
  40
  41 #define PG_TEXTARG_GET_STR(arg_) \
  42         DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
  43 #define PG_TEXT_GET_STR(textp_) \
  44         DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
  45 #define PG_STR_GET_TEXT(str_) \
  46         DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
  47 #define TEXTLEN(textp) \
  48         text_length(PointerGetDatum(textp))
  49 #define TEXTPOS(buf_text, from_sub_text) \
  50         text_position(buf_text, from_sub_text, 1)
  51 #define TEXTDUP(textp) \
  52         DatumGetTextPCopy(PointerGetDatum(textp))
  53 #define LEFT(buf_text, from_sub_text) \
  54         text_substring(PointerGetDatum(buf_text), \
  55                                         1, \
  56                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
  57 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
  58         text_substring(PointerGetDatum(buf_text), \
  59                                         TEXTPOS(buf_text, from_sub_text) + (from_sub_text_len), \
  60                                         -1, true)
  61
  62 static int      text_cmp(text *arg1, text *arg2);
  63 static int32 text_length(Datum str);
  64 static int32 text_position(text *t1, text *t2, int matchnum);
  65 static text *text_substring(Datum str,
  66                            int32 start,
  67                            int32 length,
  68                            bool length_not_specified);
  69
  70
  71 /*****************************************************************************
  72  *       USER I/O ROUTINES                                                                                                               *
  73  *****************************************************************************/
  74
  75
  76 #define VAL(CH)                 ((CH) - '0')
  77 #define DIG(VAL)                ((VAL) + '0')
  78
  79 /*
  80  *              byteain                 - converts from printable representation of byte array
  81  *
  82  *              Non-printable characters must be passed as '\nnn' (octal) and are
  83  *              converted to internal form.  '\' must be passed as '\\'.
  84  *              ereport(ERROR, ...) if bad form.
  85  *
  86  *              BUGS:
  87  *                              The input is scaned twice.
  88  *                              The error checking of input is minimal.
  89  */
  90 Datum
  91 byteain(PG_FUNCTION_ARGS)
  92 {
  93         char       *inputText = PG_GETARG_CSTRING(0);
  94         char       *tp;
  95         char       *rp;
  96         int                     byte;
  97         bytea      *result;
  98
  99         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
 100         {
 101                 if (tp[0] != '\\')
 102                         tp++;
 103                 else if ((tp[0] == '\\') &&
 104                                  (tp[1] >= '0' && tp[1] <= '3') &&
 105                                  (tp[2] >= '0' && tp[2] <= '7') &&
 106                                  (tp[3] >= '0' && tp[3] <= '7'))
 107                         tp += 4;
 108                 else if ((tp[0] == '\\') &&
 109                                  (tp[1] == '\\'))
 110                         tp += 2;
 111                 else
 112                 {
 113                         /*
 114                          * one backslash, not followed by 0 or ### valid octal
 115                          */
 116                         ereport(ERROR,
 117                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 118                                          errmsg("invalid input syntax for type bytea")));
 119                 }
 120         }
 121
 122         byte += VARHDRSZ;
 123         result = (bytea *) palloc(byte);
 124         VARATT_SIZEP(result) = byte;    /* set varlena length */
 125
 126         tp = inputText;
 127         rp = VARDATA(result);
 128         while (*tp != '\0')
 129         {
 130                 if (tp[0] != '\\')
 131                         *rp++ = *tp++;
 132                 else if ((tp[0] == '\\') &&
 133                                  (tp[1] >= '0' && tp[1] <= '3') &&
 134                                  (tp[2] >= '0' && tp[2] <= '7') &&
 135                                  (tp[3] >= '0' && tp[3] <= '7'))
 136                 {
 137                         byte = VAL(tp[1]);
 138                         byte <<= 3;
 139                         byte += VAL(tp[2]);
 140                         byte <<= 3;
 141                         *rp++ = byte + VAL(tp[3]);
 142                         tp += 4;
 143                 }
 144                 else if ((tp[0] == '\\') &&
 145                                  (tp[1] == '\\'))
 146                 {
 147                         *rp++ = '\\';
 148                         tp += 2;
 149                 }
 150                 else
 151                 {
 152                         /*
 153                          * We should never get here. The first pass should not allow
 154                          * it.
 155                          */
 156                         ereport(ERROR,
 157                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 158                                          errmsg("invalid input syntax for type bytea")));
 159                 }
 160         }
 161
 162         PG_RETURN_BYTEA_P(result);
 163 }
 164
 165 /*
 166  *              byteaout                - converts to printable representation of byte array
 167  *
 168  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 169  *              '\\'.
 170  *
 171  *              NULL vlena should be an error--returning string with NULL for now.
 172  */
 173 Datum
 174 byteaout(PG_FUNCTION_ARGS)
 175 {
 176         bytea      *vlena = PG_GETARG_BYTEA_P(0);
 177         char       *result;
 178         char       *vp;
 179         char       *rp;
 180         int                     val;                    /* holds unprintable chars */
 181         int                     i;
 182         int                     len;
 183
 184         len = 1;                                        /* empty string has 1 char */
 185         vp = VARDATA(vlena);
 186         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 187         {
 188                 if (*vp == '\\')
 189                         len += 2;
 190                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 191                         len += 4;
 192                 else
 193                         len++;
 194         }
 195         rp = result = (char *) palloc(len);
 196         vp = VARDATA(vlena);
 197         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 198         {
 199                 if (*vp == '\\')
 200                 {
 201                         *rp++ = '\\';
 202                         *rp++ = '\\';
 203                 }
 204                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 205                 {
 206                         val = *vp;
 207                         rp[0] = '\\';
 208                         rp[3] = DIG(val & 07);
 209                         val >>= 3;
 210                         rp[2] = DIG(val & 07);
 211                         val >>= 3;
 212                         rp[1] = DIG(val & 03);
 213                         rp += 4;
 214                 }
 215                 else
 216                         *rp++ = *vp;
 217         }
 218         *rp = '\0';
 219         PG_RETURN_CSTRING(result);
 220 }
 221
 222 /*
 223  *              bytearecv                       - converts external binary format to bytea
 224  */
 225 Datum
 226 bytearecv(PG_FUNCTION_ARGS)
 227 {
 228         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 229         bytea      *result;
 230         int                     nbytes;
 231
 232         nbytes = buf->len - buf->cursor;
 233         result = (bytea *) palloc(nbytes + VARHDRSZ);
 234         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 235         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 236         PG_RETURN_BYTEA_P(result);
 237 }
 238
 239 /*
 240  *              byteasend                       - converts bytea to binary format
 241  *
 242  * This is a special case: just copy the input...
 243  */
 244 Datum
 245 byteasend(PG_FUNCTION_ARGS)
 246 {
 247         bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
 248
 249         PG_RETURN_BYTEA_P(vlena);
 250 }
 251
 252
 253 /*
 254  *              textin                  - converts "..." to internal representation
 255  */
 256 Datum
 257 textin(PG_FUNCTION_ARGS)
 258 {
 259         char       *inputText = PG_GETARG_CSTRING(0);
 260         text       *result;
 261         int                     len;
 262
 263         /* verify encoding */
 264         len = strlen(inputText);
 265         pg_verifymbstr(inputText, len, false);
 266
 267         result = (text *) palloc(len + VARHDRSZ);
 268         VARATT_SIZEP(result) = len + VARHDRSZ;
 269
 270         memcpy(VARDATA(result), inputText, len);
 271
 272         PG_RETURN_TEXT_P(result);
 273 }
 274
 275 /*
 276  *              textout                 - converts internal representation to "..."
 277  */
 278 Datum
 279 textout(PG_FUNCTION_ARGS)
 280 {
 281         text       *t = PG_GETARG_TEXT_P(0);
 282         int                     len;
 283         char       *result;
 284
 285         len = VARSIZE(t) - VARHDRSZ;
 286         result = (char *) palloc(len + 1);
 287         memcpy(result, VARDATA(t), len);
 288         result[len] = '\0';
 289
 290         PG_RETURN_CSTRING(result);
 291 }
 292
 293 /*
 294  *              textrecv                        - converts external binary format to text
 295  */
 296 Datum
 297 textrecv(PG_FUNCTION_ARGS)
 298 {
 299         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 300         text       *result;
 301         char       *str;
 302         int                     nbytes;
 303
 304         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 305         result = (text *) palloc(nbytes + VARHDRSZ);
 306         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 307         memcpy(VARDATA(result), str, nbytes);
 308         pfree(str);
 309         PG_RETURN_TEXT_P(result);
 310 }
 311
 312 /*
 313  *              textsend                        - converts text to binary format
 314  */
 315 Datum
 316 textsend(PG_FUNCTION_ARGS)
 317 {
 318         text       *t = PG_GETARG_TEXT_P(0);
 319         StringInfoData buf;
 320
 321         pq_begintypsend(&buf);
 322         pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
 323         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 324 }
 325
 326
 327 /*
 328  *              unknownin                       - converts "..." to internal representation
 329  */
 330 Datum
 331 unknownin(PG_FUNCTION_ARGS)
 332 {
 333         char       *inputStr = PG_GETARG_CSTRING(0);
 334         unknown    *result;
 335         int                     len;
 336
 337         len = strlen(inputStr) + VARHDRSZ;
 338
 339         result = (unknown *) palloc(len);
 340         VARATT_SIZEP(result) = len;
 341
 342         memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
 343
 344         PG_RETURN_UNKNOWN_P(result);
 345 }
 346
 347 /*
 348  *              unknownout                      - converts internal representation to "..."
 349  */
 350 Datum
 351 unknownout(PG_FUNCTION_ARGS)
 352 {
 353         unknown    *t = PG_GETARG_UNKNOWN_P(0);
 354         int                     len;
 355         char       *result;
 356
 357         len = VARSIZE(t) - VARHDRSZ;
 358         result = (char *) palloc(len + 1);
 359         memcpy(result, VARDATA(t), len);
 360         result[len] = '\0';
 361
 362         PG_RETURN_CSTRING(result);
 363 }
 364
 365 /*
 366  *              unknownrecv                     - converts external binary format to unknown
 367  */
 368 Datum
 369 unknownrecv(PG_FUNCTION_ARGS)
 370 {
 371         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 372         unknown    *result;
 373         int                     nbytes;
 374
 375         nbytes = buf->len - buf->cursor;
 376         result = (unknown *) palloc(nbytes + VARHDRSZ);
 377         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 378         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 379         PG_RETURN_UNKNOWN_P(result);
 380 }
 381
 382 /*
 383  *              unknownsend                     - converts unknown to binary format
 384  *
 385  * This is a special case: just copy the input, since it's
 386  * effectively the same format as bytea
 387  */
 388 Datum
 389 unknownsend(PG_FUNCTION_ARGS)
 390 {
 391         unknown    *vlena = PG_GETARG_UNKNOWN_P_COPY(0);
 392
 393         PG_RETURN_UNKNOWN_P(vlena);
 394 }
 395
 396
 397 /* ========== PUBLIC ROUTINES ========== */
 398
 399 /*
 400  * textlen -
 401  *        returns the logical length of a text*
 402  *         (which is less than the VARSIZE of the text*)
 403  */
 404 Datum
 405 textlen(PG_FUNCTION_ARGS)
 406 {
 407         Datum           str = PG_GETARG_DATUM(0);
 408
 409         /* try to avoid decompressing argument */
 410         PG_RETURN_INT32(text_length(str));
 411 }
 412
 413 /*
 414  * text_length -
 415  *      Does the real work for textlen()
 416  *
 417  *      This is broken out so it can be called directly by other string processing
 418  *      functions.      Note that the argument is passed as a Datum, to indicate that
 419  *      it may still be in compressed form.  We can avoid decompressing it at all
 420  *      in some cases.
 421  */
 422 static int32
 423 text_length(Datum str)
 424 {
 425         /* fastpath when max encoding length is one */
 426         if (pg_database_encoding_max_length() == 1)
 427                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 428         else
 429         {
 430                 text       *t = DatumGetTextP(str);
 431
 432                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
 433                                                                                          VARSIZE(t) - VARHDRSZ));
 434         }
 435 }
 436
 437 /*
 438  * textoctetlen -
 439  *        returns the physical length of a text*
 440  *         (which is less than the VARSIZE of the text*)
 441  */
 442 Datum
 443 textoctetlen(PG_FUNCTION_ARGS)
 444 {
 445         Datum           str = PG_GETARG_DATUM(0);
 446
 447         /* We need not detoast the input at all */
 448         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 449 }
 450
 451 /*
 452  * textcat -
 453  *        takes two text* and returns a text* that is the concatenation of
 454  *        the two.
 455  *
 456  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 457  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 458  * Allocate space for output in all cases.
 459  * XXX - thomas 1997-07-10
 460  */
 461 Datum
 462 textcat(PG_FUNCTION_ARGS)
 463 {
 464         text       *t1 = PG_GETARG_TEXT_P(0);
 465         text       *t2 = PG_GETARG_TEXT_P(1);
 466         int                     len1,
 467                                 len2,
 468                                 len;
 469         text       *result;
 470         char       *ptr;
 471
 472         len1 = (VARSIZE(t1) - VARHDRSZ);
 473         if (len1 < 0)
 474                 len1 = 0;
 475
 476         len2 = (VARSIZE(t2) - VARHDRSZ);
 477         if (len2 < 0)
 478                 len2 = 0;
 479
 480         len = len1 + len2 + VARHDRSZ;
 481         result = (text *) palloc(len);
 482
 483         /* Set size of result string... */
 484         VARATT_SIZEP(result) = len;
 485
 486         /* Fill data field of result string... */
 487         ptr = VARDATA(result);
 488         if (len1 > 0)
 489                 memcpy(ptr, VARDATA(t1), len1);
 490         if (len2 > 0)
 491                 memcpy(ptr + len1, VARDATA(t2), len2);
 492
 493         PG_RETURN_TEXT_P(result);
 494 }
 495
 496 /*
 497  * text_substr()
 498  * Return a substring starting at the specified position.
 499  * - thomas 1997-12-31
 500  *
 501  * Input:
 502  *      - string
 503  *      - starting position (is one-based)
 504  *      - string length
 505  *
 506  * If the starting position is zero or less, then return from the start of the string
 507  *      adjusting the length to be consistent with the "negative start" per SQL92.
 508  * If the length is less than zero, return the remaining string.
 509  *
 510  * Added multibyte support.
 511  * - Tatsuo Ishii 1998-4-21
 512  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 513  * Formerly returned the entire string; now returns a portion.
 514  * - Thomas Lockhart 1998-12-10
 515  * Now uses faster TOAST-slicing interface
 516  * - John Gray 2002-02-22
 517  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 518  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 519  * error; if E < 1, return '', not entire string). Fixed MB related bug when
 520  * S > LC and < LC + 4 sometimes garbage characters are returned.
 521  * - Joe Conway 2002-08-10
 522  */
 523 Datum
 524 text_substr(PG_FUNCTION_ARGS)
 525 {
 526         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 527                                                                         PG_GETARG_INT32(1),
 528                                                                         PG_GETARG_INT32(2),
 529                                                                         false));
 530 }
 531
 532 /*
 533  * text_substr_no_len -
 534  *        Wrapper to avoid opr_sanity failure due to
 535  *        one function accepting a different number of args.
 536  */
 537 Datum
 538 text_substr_no_len(PG_FUNCTION_ARGS)
 539 {
 540         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 541                                                                         PG_GETARG_INT32(1),
 542                                                                         -1, true));
 543 }
 544
 545 /*
 546  * text_substring -
 547  *      Does the real work for text_substr() and text_substr_no_len()
 548  *
 549  *      This is broken out so it can be called directly by other string processing
 550  *      functions.      Note that the argument is passed as a Datum, to indicate that
 551  *      it may still be in compressed/toasted form.  We can avoid detoasting all
 552  *      of it in some cases.
 553  */
 554 static text *
 555 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 556 {
 557         int32           eml = pg_database_encoding_max_length();
 558         int32           S = start;              /* start position */
 559         int32           S1;                             /* adjusted start position */
 560         int32           L1;                             /* adjusted substring length */
 561
 562         /* life is easy if the encoding max length is 1 */
 563         if (eml == 1)
 564         {
 565                 S1 = Max(S, 1);
 566
 567                 if (length_not_specified)               /* special case - get length to
 568                                                                                  * end of string */
 569                         L1 = -1;
 570                 else
 571                 {
 572                         /* end position */
 573                         int                     E = S + length;
 574
 575                         /*
 576                          * A negative value for L is the only way for the end position
 577                          * to be before the start. SQL99 says to throw an error.
 578                          */
 579                         if (E < S)
 580                                 ereport(ERROR,
 581                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 582                                            errmsg("negative substring length not allowed")));
 583
 584                         /*
 585                          * A zero or negative value for the end position can happen if
 586                          * the start was negative or one. SQL99 says to return a
 587                          * zero-length string.
 588                          */
 589                         if (E < 1)
 590                                 return PG_STR_GET_TEXT("");
 591
 592                         L1 = E - S1;
 593                 }
 594
 595                 /*
 596                  * If the start position is past the end of the string, SQL99 says
 597                  * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
 598                  * do that for us. Convert to zero-based starting position
 599                  */
 600                 return DatumGetTextPSlice(str, S1 - 1, L1);
 601         }
 602         else if (eml > 1)
 603         {
 604                 /*
 605                  * When encoding max length is > 1, we can't get LC without
 606                  * detoasting, so we'll grab a conservatively large slice now and
 607                  * go back later to do the right thing
 608                  */
 609                 int32           slice_start;
 610                 int32           slice_size;
 611                 int32           slice_strlen;
 612                 text       *slice;
 613                 int32           E1;
 614                 int32           i;
 615                 char       *p;
 616                 char       *s;
 617                 text       *ret;
 618
 619                 /*
 620                  * if S is past the end of the string, the tuple toaster will
 621                  * return a zero-length string to us
 622                  */
 623                 S1 = Max(S, 1);
 624
 625                 /*
 626                  * We need to start at position zero because there is no way to
 627                  * know in advance which byte offset corresponds to the supplied
 628                  * start position.
 629                  */
 630                 slice_start = 0;
 631
 632                 if (length_not_specified)               /* special case - get length to
 633                                                                                  * end of string */
 634                         slice_size = L1 = -1;
 635                 else
 636                 {
 637                         int                     E = S + length;
 638
 639                         /*
 640                          * A negative value for L is the only way for the end position
 641                          * to be before the start. SQL99 says to throw an error.
 642                          */
 643                         if (E < S)
 644                                 ereport(ERROR,
 645                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 646                                            errmsg("negative substring length not allowed")));
 647
 648                         /*
 649                          * A zero or negative value for the end position can happen if
 650                          * the start was negative or one. SQL99 says to return a
 651                          * zero-length string.
 652                          */
 653                         if (E < 1)
 654                                 return PG_STR_GET_TEXT("");
 655
 656                         /*
 657                          * if E is past the end of the string, the tuple toaster will
 658                          * truncate the length for us
 659                          */
 660                         L1 = E - S1;
 661
 662                         /*
 663                          * Total slice size in bytes can't be any longer than the
 664                          * start position plus substring length times the encoding max
 665                          * length.
 666                          */
 667                         slice_size = (S1 + L1) * eml;
 668                 }
 669                 slice = DatumGetTextPSlice(str, slice_start, slice_size);
 670
 671                 /* see if we got back an empty string */
 672                 if ((VARSIZE(slice) - VARHDRSZ) == 0)
 673                         return PG_STR_GET_TEXT("");
 674
 675                 /* Now we can get the actual length of the slice in MB characters */
 676                 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
 677
 678                 /*
 679                  * Check that the start position wasn't > slice_strlen. If so,
 680                  * SQL99 says to return a zero-length string.
 681                  */
 682                 if (S1 > slice_strlen)
 683                         return PG_STR_GET_TEXT("");
 684
 685                 /*
 686                  * Adjust L1 and E1 now that we know the slice string length.
 687                  * Again remember that S1 is one based, and slice_start is zero
 688                  * based.
 689                  */
 690                 if (L1 > -1)
 691                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 692                 else
 693                         E1 = slice_start + 1 + slice_strlen;
 694
 695                 /*
 696                  * Find the start position in the slice; remember S1 is not zero
 697                  * based
 698                  */
 699                 p = VARDATA(slice);
 700                 for (i = 0; i < S1 - 1; i++)
 701                         p += pg_mblen(p);
 702
 703                 /* hang onto a pointer to our start position */
 704                 s = p;
 705
 706                 /*
 707                  * Count the actual bytes used by the substring of the requested
 708                  * length.
 709                  */
 710                 for (i = S1; i < E1; i++)
 711                         p += pg_mblen(p);
 712
 713                 ret = (text *) palloc(VARHDRSZ + (p - s));
 714                 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
 715                 memcpy(VARDATA(ret), s, (p - s));
 716
 717                 return ret;
 718         }
 719         else
 720                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
 721
 722         /* not reached: suppress compiler warning */
 723         return NULL;
 724 }
 725
 726 /*
 727  * textpos -
 728  *        Return the position of the specified substring.
 729  *        Implements the SQL92 POSITION() function.
 730  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 731  * - thomas 1997-07-27
 732  */
 733 Datum
 734 textpos(PG_FUNCTION_ARGS)
 735 {
 736         text       *str = PG_GETARG_TEXT_P(0);
 737         text       *search_str = PG_GETARG_TEXT_P(1);
 738
 739         PG_RETURN_INT32(text_position(str, search_str, 1));
 740 }
 741
 742 /*
 743  * text_position -
 744  *      Does the real work for textpos()
 745  *
 746  * Inputs:
 747  *              t1 - string to be searched
 748  *              t2 - pattern to match within t1
 749  *              matchnum - number of the match to be found (1 is the first match)
 750  * Result:
 751  *              Character index of the first matched char, starting from 1,
 752  *              or 0 if no match.
 753  *
 754  *      This is broken out so it can be called directly by other string processing
 755  *      functions.
 756  */
 757 static int32
 758 text_position(text *t1, text *t2, int matchnum)
 759 {
 760         int                     match = 0,
 761                                 pos = 0,
 762                                 p,
 763                                 px,
 764                                 len1,
 765                                 len2;
 766
 767         if (matchnum <= 0)
 768                 return 0;                               /* result for 0th match */
 769
 770         if (VARSIZE(t2) <= VARHDRSZ)
 771                 return 1;                               /* result for empty pattern */
 772
 773         len1 = (VARSIZE(t1) - VARHDRSZ);
 774         len2 = (VARSIZE(t2) - VARHDRSZ);
 775
 776         if (pg_database_encoding_max_length() == 1)
 777         {
 778                 /* simple case - single byte encoding */
 779                 char       *p1,
 780                                    *p2;
 781
 782                 p1 = VARDATA(t1);
 783                 p2 = VARDATA(t2);
 784
 785                 /* no use in searching str past point where search_str will fit */
 786                 px = (len1 - len2);
 787
 788                 for (p = 0; p <= px; p++)
 789                 {
 790                         if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
 791                         {
 792                                 if (++match == matchnum)
 793                                 {
 794                                         pos = p + 1;
 795                                         break;
 796                                 }
 797                         }
 798                         p1++;
 799                 }
 800         }
 801         else
 802         {
 803                 /* not as simple - multibyte encoding */
 804                 pg_wchar   *p1,
 805                                    *p2,
 806                                    *ps1,
 807                                    *ps2;
 808
 809                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 810                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
 811                 len1 = pg_wchar_strlen(p1);
 812                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 813                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
 814                 len2 = pg_wchar_strlen(p2);
 815
 816                 /* no use in searching str past point where search_str will fit */
 817                 px = (len1 - len2);
 818
 819                 for (p = 0; p <= px; p++)
 820                 {
 821                         if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
 822                         {
 823                                 if (++match == matchnum)
 824                                 {
 825                                         pos = p + 1;
 826                                         break;
 827                                 }
 828                         }
 829                         p1++;
 830                 }
 831
 832                 pfree(ps1);
 833                 pfree(ps2);
 834         }
 835
 836         return pos;
 837 }
 838
 839 /* varstr_cmp()
 840  * Comparison function for text strings with given lengths.
 841  * Includes locale support, but must copy strings to temporary memory
 842  *      to allow null-termination for inputs to strcoll().
 843  * Returns -1, 0 or 1
 844  */
 845 int
 846 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 847 {
 848         int                     result;
 849
 850         /*
 851          * Unfortunately, there is no strncoll(), so in the non-C locale case
 852          * we have to do some memory copying.  This turns out to be
 853          * significantly slower, so we optimize the case where LC_COLLATE is
 854          * C.  We also try to optimize relatively-short strings by avoiding
 855          * palloc/pfree overhead.
 856          */
 857 #define STACKBUFLEN             1024
 858
 859         if (!lc_collate_is_c())
 860         {
 861                 char            a1buf[STACKBUFLEN];
 862                 char            a2buf[STACKBUFLEN];
 863                 char       *a1p,
 864                                    *a2p;
 865
 866                 if (len1 >= STACKBUFLEN)
 867                         a1p = (char *) palloc(len1 + 1);
 868                 else
 869                         a1p = a1buf;
 870                 if (len2 >= STACKBUFLEN)
 871                         a2p = (char *) palloc(len2 + 1);
 872                 else
 873                         a2p = a2buf;
 874
 875                 memcpy(a1p, arg1, len1);
 876                 a1p[len1] = '\0';
 877                 memcpy(a2p, arg2, len2);
 878                 a2p[len2] = '\0';
 879
 880                 result = strcoll(a1p, a2p);
 881
 882                 if (len1 >= STACKBUFLEN)
 883                         pfree(a1p);
 884                 if (len2 >= STACKBUFLEN)
 885                         pfree(a2p);
 886         }
 887         else
 888         {
 889                 result = strncmp(arg1, arg2, Min(len1, len2));
 890                 if ((result == 0) && (len1 != len2))
 891                         result = (len1 < len2) ? -1 : 1;
 892         }
 893
 894         return result;
 895 }
 896
 897
 898 /* text_cmp()
 899  * Internal comparison function for text strings.
 900  * Returns -1, 0 or 1
 901  */
 902 static int
 903 text_cmp(text *arg1, text *arg2)
 904 {
 905         char       *a1p,
 906                            *a2p;
 907         int                     len1,
 908                                 len2;
 909
 910         a1p = VARDATA(arg1);
 911         a2p = VARDATA(arg2);
 912
 913         len1 = VARSIZE(arg1) - VARHDRSZ;
 914         len2 = VARSIZE(arg2) - VARHDRSZ;
 915
 916         return varstr_cmp(a1p, len1, a2p, len2);
 917 }
 918
 919 /*
 920  * Comparison functions for text strings.
 921  *
 922  * Note: btree indexes need these routines not to leak memory; therefore,
 923  * be careful to free working copies of toasted datums.  Most places don't
 924  * need to be so careful.
 925  */
 926
 927 Datum
 928 texteq(PG_FUNCTION_ARGS)
 929 {
 930         text       *arg1 = PG_GETARG_TEXT_P(0);
 931         text       *arg2 = PG_GETARG_TEXT_P(1);
 932         bool            result;
 933
 934         /* fast path for different-length inputs */
 935         if (VARSIZE(arg1) != VARSIZE(arg2))
 936                 result = false;
 937         else
 938                 result = (text_cmp(arg1, arg2) == 0);
 939
 940         PG_FREE_IF_COPY(arg1, 0);
 941         PG_FREE_IF_COPY(arg2, 1);
 942
 943         PG_RETURN_BOOL(result);
 944 }
 945
 946 Datum
 947 textne(PG_FUNCTION_ARGS)
 948 {
 949         text       *arg1 = PG_GETARG_TEXT_P(0);
 950         text       *arg2 = PG_GETARG_TEXT_P(1);
 951         bool            result;
 952
 953         /* fast path for different-length inputs */
 954         if (VARSIZE(arg1) != VARSIZE(arg2))
 955                 result = true;
 956         else
 957                 result = (text_cmp(arg1, arg2) != 0);
 958
 959         PG_FREE_IF_COPY(arg1, 0);
 960         PG_FREE_IF_COPY(arg2, 1);
 961
 962         PG_RETURN_BOOL(result);
 963 }
 964
 965 Datum
 966 text_lt(PG_FUNCTION_ARGS)
 967 {
 968         text       *arg1 = PG_GETARG_TEXT_P(0);
 969         text       *arg2 = PG_GETARG_TEXT_P(1);
 970         bool            result;
 971
 972         result = (text_cmp(arg1, arg2) < 0);
 973
 974         PG_FREE_IF_COPY(arg1, 0);
 975         PG_FREE_IF_COPY(arg2, 1);
 976
 977         PG_RETURN_BOOL(result);
 978 }
 979
 980 Datum
 981 text_le(PG_FUNCTION_ARGS)
 982 {
 983         text       *arg1 = PG_GETARG_TEXT_P(0);
 984         text       *arg2 = PG_GETARG_TEXT_P(1);
 985         bool            result;
 986
 987         result = (text_cmp(arg1, arg2) <= 0);
 988
 989         PG_FREE_IF_COPY(arg1, 0);
 990         PG_FREE_IF_COPY(arg2, 1);
 991
 992         PG_RETURN_BOOL(result);
 993 }
 994
 995 Datum
 996 text_gt(PG_FUNCTION_ARGS)
 997 {
 998         text       *arg1 = PG_GETARG_TEXT_P(0);
 999         text       *arg2 = PG_GETARG_TEXT_P(1);
1000         bool            result;
1001
1002         result = (text_cmp(arg1, arg2) > 0);
1003
1004         PG_FREE_IF_COPY(arg1, 0);
1005         PG_FREE_IF_COPY(arg2, 1);
1006
1007         PG_RETURN_BOOL(result);
1008 }
1009
1010 Datum
1011 text_ge(PG_FUNCTION_ARGS)
1012 {
1013         text       *arg1 = PG_GETARG_TEXT_P(0);
1014         text       *arg2 = PG_GETARG_TEXT_P(1);
1015         bool            result;
1016
1017         result = (text_cmp(arg1, arg2) >= 0);
1018
1019         PG_FREE_IF_COPY(arg1, 0);
1020         PG_FREE_IF_COPY(arg2, 1);
1021
1022         PG_RETURN_BOOL(result);
1023 }
1024
1025 Datum
1026 bttextcmp(PG_FUNCTION_ARGS)
1027 {
1028         text       *arg1 = PG_GETARG_TEXT_P(0);
1029         text       *arg2 = PG_GETARG_TEXT_P(1);
1030         int32           result;
1031
1032         result = text_cmp(arg1, arg2);
1033
1034         PG_FREE_IF_COPY(arg1, 0);
1035         PG_FREE_IF_COPY(arg2, 1);
1036
1037         PG_RETURN_INT32(result);
1038 }
1039
1040
1041 Datum
1042 text_larger(PG_FUNCTION_ARGS)
1043 {
1044         text       *arg1 = PG_GETARG_TEXT_P(0);
1045         text       *arg2 = PG_GETARG_TEXT_P(1);
1046         text       *result;
1047
1048         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1049
1050         PG_RETURN_TEXT_P(result);
1051 }
1052
1053 Datum
1054 text_smaller(PG_FUNCTION_ARGS)
1055 {
1056         text       *arg1 = PG_GETARG_TEXT_P(0);
1057         text       *arg2 = PG_GETARG_TEXT_P(1);
1058         text       *result;
1059
1060         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1061
1062         PG_RETURN_TEXT_P(result);
1063 }
1064
1065
1066 /*
1067  * The following operators support character-by-character comparison
1068  * of text data types, to allow building indexes suitable for LIKE
1069  * clauses.
1070  */
1071
1072 static int
1073 internal_text_pattern_compare(text *arg1, text *arg2)
1074 {
1075         int                     result;
1076
1077         result = memcmp(VARDATA(arg1), VARDATA(arg2),
1078                                         Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1079         if (result != 0)
1080                 return result;
1081         else if (VARSIZE(arg1) < VARSIZE(arg2))
1082                 return -1;
1083         else if (VARSIZE(arg1) > VARSIZE(arg2))
1084                 return 1;
1085         else
1086                 return 0;
1087 }
1088
1089
1090 Datum
1091 text_pattern_lt(PG_FUNCTION_ARGS)
1092 {
1093         text       *arg1 = PG_GETARG_TEXT_P(0);
1094         text       *arg2 = PG_GETARG_TEXT_P(1);
1095         int                     result;
1096
1097         result = internal_text_pattern_compare(arg1, arg2);
1098
1099         PG_FREE_IF_COPY(arg1, 0);
1100         PG_FREE_IF_COPY(arg2, 1);
1101
1102         PG_RETURN_BOOL(result < 0);
1103 }
1104
1105
1106 Datum
1107 text_pattern_le(PG_FUNCTION_ARGS)
1108 {
1109         text       *arg1 = PG_GETARG_TEXT_P(0);
1110         text       *arg2 = PG_GETARG_TEXT_P(1);
1111         int                     result;
1112
1113         result = internal_text_pattern_compare(arg1, arg2);
1114
1115         PG_FREE_IF_COPY(arg1, 0);
1116         PG_FREE_IF_COPY(arg2, 1);
1117
1118         PG_RETURN_BOOL(result <= 0);
1119 }
1120
1121
1122 Datum
1123 text_pattern_eq(PG_FUNCTION_ARGS)
1124 {
1125         text       *arg1 = PG_GETARG_TEXT_P(0);
1126         text       *arg2 = PG_GETARG_TEXT_P(1);
1127         int                     result;
1128
1129         if (VARSIZE(arg1) != VARSIZE(arg2))
1130                 result = 1;
1131         else
1132                 result = internal_text_pattern_compare(arg1, arg2);
1133
1134         PG_FREE_IF_COPY(arg1, 0);
1135         PG_FREE_IF_COPY(arg2, 1);
1136
1137         PG_RETURN_BOOL(result == 0);
1138 }
1139
1140
1141 Datum
1142 text_pattern_ge(PG_FUNCTION_ARGS)
1143 {
1144         text       *arg1 = PG_GETARG_TEXT_P(0);
1145         text       *arg2 = PG_GETARG_TEXT_P(1);
1146         int                     result;
1147
1148         result = internal_text_pattern_compare(arg1, arg2);
1149
1150         PG_FREE_IF_COPY(arg1, 0);
1151         PG_FREE_IF_COPY(arg2, 1);
1152
1153         PG_RETURN_BOOL(result >= 0);
1154 }
1155
1156
1157 Datum
1158 text_pattern_gt(PG_FUNCTION_ARGS)
1159 {
1160         text       *arg1 = PG_GETARG_TEXT_P(0);
1161         text       *arg2 = PG_GETARG_TEXT_P(1);
1162         int                     result;
1163
1164         result = internal_text_pattern_compare(arg1, arg2);
1165
1166         PG_FREE_IF_COPY(arg1, 0);
1167         PG_FREE_IF_COPY(arg2, 1);
1168
1169         PG_RETURN_BOOL(result > 0);
1170 }
1171
1172
1173 Datum
1174 text_pattern_ne(PG_FUNCTION_ARGS)
1175 {
1176         text       *arg1 = PG_GETARG_TEXT_P(0);
1177         text       *arg2 = PG_GETARG_TEXT_P(1);
1178         int                     result;
1179
1180         if (VARSIZE(arg1) != VARSIZE(arg2))
1181                 result = 1;
1182         else
1183                 result = internal_text_pattern_compare(arg1, arg2);
1184
1185         PG_FREE_IF_COPY(arg1, 0);
1186         PG_FREE_IF_COPY(arg2, 1);
1187
1188         PG_RETURN_BOOL(result != 0);
1189 }
1190
1191
1192 Datum
1193 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1194 {
1195         text       *arg1 = PG_GETARG_TEXT_P(0);
1196         text       *arg2 = PG_GETARG_TEXT_P(1);
1197         int                     result;
1198
1199         result = internal_text_pattern_compare(arg1, arg2);
1200
1201         PG_FREE_IF_COPY(arg1, 0);
1202         PG_FREE_IF_COPY(arg2, 1);
1203
1204         PG_RETURN_INT32(result);
1205 }
1206
1207
1208 /*-------------------------------------------------------------
1209  * byteaoctetlen
1210  *
1211  * get the number of bytes contained in an instance of type 'bytea'
1212  *-------------------------------------------------------------
1213  */
1214 Datum
1215 byteaoctetlen(PG_FUNCTION_ARGS)
1216 {
1217         Datum           str = PG_GETARG_DATUM(0);
1218
1219         /* We need not detoast the input at all */
1220         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1221 }
1222
1223 /*
1224  * byteacat -
1225  *        takes two bytea* and returns a bytea* that is the concatenation of
1226  *        the two.
1227  *
1228  * Cloned from textcat and modified as required.
1229  */
1230 Datum
1231 byteacat(PG_FUNCTION_ARGS)
1232 {
1233         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1234         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1235         int                     len1,
1236                                 len2,
1237                                 len;
1238         bytea      *result;
1239         char       *ptr;
1240
1241         len1 = (VARSIZE(t1) - VARHDRSZ);
1242         if (len1 < 0)
1243                 len1 = 0;
1244
1245         len2 = (VARSIZE(t2) - VARHDRSZ);
1246         if (len2 < 0)
1247                 len2 = 0;
1248
1249         len = len1 + len2 + VARHDRSZ;
1250         result = (bytea *) palloc(len);
1251
1252         /* Set size of result string... */
1253         VARATT_SIZEP(result) = len;
1254
1255         /* Fill data field of result string... */
1256         ptr = VARDATA(result);
1257         if (len1 > 0)
1258                 memcpy(ptr, VARDATA(t1), len1);
1259         if (len2 > 0)
1260                 memcpy(ptr + len1, VARDATA(t2), len2);
1261
1262         PG_RETURN_BYTEA_P(result);
1263 }
1264
1265 #define PG_STR_GET_BYTEA(str_) \
1266         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1267 /*
1268  * bytea_substr()
1269  * Return a substring starting at the specified position.
1270  * Cloned from text_substr and modified as required.
1271  *
1272  * Input:
1273  *      - string
1274  *      - starting position (is one-based)
1275  *      - string length (optional)
1276  *
1277  * If the starting position is zero or less, then return from the start of the string
1278  * adjusting the length to be consistent with the "negative start" per SQL92.
1279  * If the length is less than zero, an ERROR is thrown. If no third argument
1280  * (length) is provided, the length to the end of the string is assumed.
1281  */
1282 Datum
1283 bytea_substr(PG_FUNCTION_ARGS)
1284 {
1285         int                     S = PG_GETARG_INT32(1); /* start position */
1286         int                     S1;                             /* adjusted start position */
1287         int                     L1;                             /* adjusted substring length */
1288
1289         S1 = Max(S, 1);
1290
1291         if (fcinfo->nargs == 2)
1292         {
1293                 /*
1294                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1295                  * everything to the end of the string if we pass it a negative
1296                  * value for length.
1297                  */
1298                 L1 = -1;
1299         }
1300         else
1301         {
1302                 /* end position */
1303                 int                     E = S + PG_GETARG_INT32(2);
1304
1305                 /*
1306                  * A negative value for L is the only way for the end position to
1307                  * be before the start. SQL99 says to throw an error.
1308                  */
1309                 if (E < S)
1310                         ereport(ERROR,
1311                                         (errcode(ERRCODE_SUBSTRING_ERROR),
1312                                          errmsg("negative substring length not allowed")));
1313
1314                 /*
1315                  * A zero or negative value for the end position can happen if the
1316                  * start was negative or one. SQL99 says to return a zero-length
1317                  * string.
1318                  */
1319                 if (E < 1)
1320                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1321
1322                 L1 = E - S1;
1323         }
1324
1325         /*
1326          * If the start position is past the end of the string, SQL99 says to
1327          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1328          * that for us. Convert to zero-based starting position
1329          */
1330         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1331 }
1332
1333 /*
1334  * bytea_substr_no_len -
1335  *        Wrapper to avoid opr_sanity failure due to
1336  *        one function accepting a different number of args.
1337  */
1338 Datum
1339 bytea_substr_no_len(PG_FUNCTION_ARGS)
1340 {
1341         return bytea_substr(fcinfo);
1342 }
1343
1344 /*
1345  * byteapos -
1346  *        Return the position of the specified substring.
1347  *        Implements the SQL92 POSITION() function.
1348  * Cloned from textpos and modified as required.
1349  */
1350 Datum
1351 byteapos(PG_FUNCTION_ARGS)
1352 {
1353         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1354         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1355         int                     pos;
1356         int                     px,
1357                                 p;
1358         int                     len1,
1359                                 len2;
1360         char       *p1,
1361                            *p2;
1362
1363         if (VARSIZE(t2) <= VARHDRSZ)
1364                 PG_RETURN_INT32(1);             /* result for empty pattern */
1365
1366         len1 = (VARSIZE(t1) - VARHDRSZ);
1367         len2 = (VARSIZE(t2) - VARHDRSZ);
1368
1369         p1 = VARDATA(t1);
1370         p2 = VARDATA(t2);
1371
1372         pos = 0;
1373         px = (len1 - len2);
1374         for (p = 0; p <= px; p++)
1375         {
1376                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1377                 {
1378                         pos = p + 1;
1379                         break;
1380                 };
1381                 p1++;
1382         };
1383
1384         PG_RETURN_INT32(pos);
1385 }
1386
1387 /*-------------------------------------------------------------
1388  * byteaGetByte
1389  *
1390  * this routine treats "bytea" as an array of bytes.
1391  * It returns the Nth byte (a number between 0 and 255).
1392  *-------------------------------------------------------------
1393  */
1394 Datum
1395 byteaGetByte(PG_FUNCTION_ARGS)
1396 {
1397         bytea      *v = PG_GETARG_BYTEA_P(0);
1398         int32           n = PG_GETARG_INT32(1);
1399         int                     len;
1400         int                     byte;
1401
1402         len = VARSIZE(v) - VARHDRSZ;
1403
1404         if (n < 0 || n >= len)
1405                 ereport(ERROR,
1406                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1407                                  errmsg("index %d out of valid range, 0..%d",
1408                                                 n, len - 1)));
1409
1410         byte = ((unsigned char *) VARDATA(v))[n];
1411
1412         PG_RETURN_INT32(byte);
1413 }
1414
1415 /*-------------------------------------------------------------
1416  * byteaGetBit
1417  *
1418  * This routine treats a "bytea" type like an array of bits.
1419  * It returns the value of the Nth bit (0 or 1).
1420  *
1421  *-------------------------------------------------------------
1422  */
1423 Datum
1424 byteaGetBit(PG_FUNCTION_ARGS)
1425 {
1426         bytea      *v = PG_GETARG_BYTEA_P(0);
1427         int32           n = PG_GETARG_INT32(1);
1428         int                     byteNo,
1429                                 bitNo;
1430         int                     len;
1431         int                     byte;
1432
1433         len = VARSIZE(v) - VARHDRSZ;
1434
1435         if (n < 0 || n >= len * 8)
1436                 ereport(ERROR,
1437                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1438                                  errmsg("index %d out of valid range, 0..%d",
1439                                                 n, len * 8 - 1)));
1440
1441         byteNo = n / 8;
1442         bitNo = n % 8;
1443
1444         byte = ((unsigned char *) VARDATA(v))[byteNo];
1445
1446         if (byte & (1 << bitNo))
1447                 PG_RETURN_INT32(1);
1448         else
1449                 PG_RETURN_INT32(0);
1450 }
1451
1452 /*-------------------------------------------------------------
1453  * byteaSetByte
1454  *
1455  * Given an instance of type 'bytea' creates a new one with
1456  * the Nth byte set to the given value.
1457  *
1458  *-------------------------------------------------------------
1459  */
1460 Datum
1461 byteaSetByte(PG_FUNCTION_ARGS)
1462 {
1463         bytea      *v = PG_GETARG_BYTEA_P(0);
1464         int32           n = PG_GETARG_INT32(1);
1465         int32           newByte = PG_GETARG_INT32(2);
1466         int                     len;
1467         bytea      *res;
1468
1469         len = VARSIZE(v) - VARHDRSZ;
1470
1471         if (n < 0 || n >= len)
1472                 ereport(ERROR,
1473                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1474                                  errmsg("index %d out of valid range, 0..%d",
1475                                                 n, len - 1)));
1476
1477         /*
1478          * Make a copy of the original varlena.
1479          */
1480         res = (bytea *) palloc(VARSIZE(v));
1481         memcpy((char *) res, (char *) v, VARSIZE(v));
1482
1483         /*
1484          * Now set the byte.
1485          */
1486         ((unsigned char *) VARDATA(res))[n] = newByte;
1487
1488         PG_RETURN_BYTEA_P(res);
1489 }
1490
1491 /*-------------------------------------------------------------
1492  * byteaSetBit
1493  *
1494  * Given an instance of type 'bytea' creates a new one with
1495  * the Nth bit set to the given value.
1496  *
1497  *-------------------------------------------------------------
1498  */
1499 Datum
1500 byteaSetBit(PG_FUNCTION_ARGS)
1501 {
1502         bytea      *v = PG_GETARG_BYTEA_P(0);
1503         int32           n = PG_GETARG_INT32(1);
1504         int32           newBit = PG_GETARG_INT32(2);
1505         bytea      *res;
1506         int                     len;
1507         int                     oldByte,
1508                                 newByte;
1509         int                     byteNo,
1510                                 bitNo;
1511
1512         len = VARSIZE(v) - VARHDRSZ;
1513
1514         if (n < 0 || n >= len * 8)
1515                 ereport(ERROR,
1516                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1517                                  errmsg("index %d out of valid range, 0..%d",
1518                                                 n, len * 8 - 1)));
1519
1520         byteNo = n / 8;
1521         bitNo = n % 8;
1522
1523         /*
1524          * sanity check!
1525          */
1526         if (newBit != 0 && newBit != 1)
1527                 ereport(ERROR,
1528                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1529                                  errmsg("new bit must be 0 or 1")));
1530
1531         /*
1532          * Make a copy of the original varlena.
1533          */
1534         res = (bytea *) palloc(VARSIZE(v));
1535         memcpy((char *) res, (char *) v, VARSIZE(v));
1536
1537         /*
1538          * Update the byte.
1539          */
1540         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1541
1542         if (newBit == 0)
1543                 newByte = oldByte & (~(1 << bitNo));
1544         else
1545                 newByte = oldByte | (1 << bitNo);
1546
1547         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1548
1549         PG_RETURN_BYTEA_P(res);
1550 }
1551
1552
1553 /* text_name()
1554  * Converts a text type to a Name type.
1555  */
1556 Datum
1557 text_name(PG_FUNCTION_ARGS)
1558 {
1559         text       *s = PG_GETARG_TEXT_P(0);
1560         Name            result;
1561         int                     len;
1562
1563         len = VARSIZE(s) - VARHDRSZ;
1564
1565         /* Truncate oversize input */
1566         if (len >= NAMEDATALEN)
1567                 len = NAMEDATALEN - 1;
1568
1569 #ifdef STRINGDEBUG
1570         printf("text- convert string length %d (%d) ->%d\n",
1571                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1572 #endif
1573
1574         result = (Name) palloc(NAMEDATALEN);
1575         memcpy(NameStr(*result), VARDATA(s), len);
1576
1577         /* now null pad to full length... */
1578         while (len < NAMEDATALEN)
1579         {
1580                 *(NameStr(*result) + len) = '\0';
1581                 len++;
1582         }
1583
1584         PG_RETURN_NAME(result);
1585 }
1586
1587 /* name_text()
1588  * Converts a Name type to a text type.
1589  */
1590 Datum
1591 name_text(PG_FUNCTION_ARGS)
1592 {
1593         Name            s = PG_GETARG_NAME(0);
1594         text       *result;
1595         int                     len;
1596
1597         len = strlen(NameStr(*s));
1598
1599 #ifdef STRINGDEBUG
1600         printf("text- convert string length %d (%d) ->%d\n",
1601                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1602 #endif
1603
1604         result = palloc(VARHDRSZ + len);
1605         VARATT_SIZEP(result) = VARHDRSZ + len;
1606         memcpy(VARDATA(result), NameStr(*s), len);
1607
1608         PG_RETURN_TEXT_P(result);
1609 }
1610
1611
1612 /*
1613  * textToQualifiedNameList - convert a text object to list of names
1614  *
1615  * This implements the input parsing needed by nextval() and other
1616  * functions that take a text parameter representing a qualified name.
1617  * We split the name at dots, downcase if not double-quoted, and
1618  * truncate names if they're too long.
1619  */
1620 List *
1621 textToQualifiedNameList(text *textval, const char *caller)
1622 {
1623         char       *rawname;
1624         List       *result = NIL;
1625         List       *namelist;
1626         ListCell   *l;
1627
1628         /* Convert to C string (handles possible detoasting). */
1629         /* Note we rely on being able to modify rawname below. */
1630         rawname = DatumGetCString(DirectFunctionCall1(textout,
1631                                                                                           PointerGetDatum(textval)));
1632
1633         if (!SplitIdentifierString(rawname, '.', &namelist))
1634                 ereport(ERROR,
1635                                 (errcode(ERRCODE_INVALID_NAME),
1636                                  errmsg("invalid name syntax")));
1637
1638         if (namelist == NIL)
1639                 ereport(ERROR,
1640                                 (errcode(ERRCODE_INVALID_NAME),
1641                                  errmsg("invalid name syntax")));
1642
1643         foreach(l, namelist)
1644         {
1645                 char       *curname = (char *) lfirst(l);
1646
1647                 result = lappend(result, makeString(pstrdup(curname)));
1648         }
1649
1650         pfree(rawname);
1651         list_free(namelist);
1652
1653         return result;
1654 }
1655
1656 /*
1657  * SplitIdentifierString --- parse a string containing identifiers
1658  *
1659  * This is the guts of textToQualifiedNameList, and is exported for use in
1660  * other situations such as parsing GUC variables.      In the GUC case, it's
1661  * important to avoid memory leaks, so the API is designed to minimize the
1662  * amount of stuff that needs to be allocated and freed.
1663  *
1664  * Inputs:
1665  *      rawstring: the input string; must be overwritable!      On return, it's
1666  *                         been modified to contain the separated identifiers.
1667  *      separator: the separator punctuation expected between identifiers
1668  *                         (typically '.' or ',').      Whitespace may also appear around
1669  *                         identifiers.
1670  * Outputs:
1671  *      namelist: filled with a palloc'd list of pointers to identifiers within
1672  *                        rawstring.  Caller should freeList() this even on error return.
1673  *
1674  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1675  *
1676  * Note that an empty string is considered okay here, though not in
1677  * textToQualifiedNameList.
1678  */
1679 bool
1680 SplitIdentifierString(char *rawstring, char separator,
1681                                           List **namelist)
1682 {
1683         char       *nextp = rawstring;
1684         bool            done = false;
1685
1686         *namelist = NIL;
1687
1688         while (isspace((unsigned char) *nextp))
1689                 nextp++;                                /* skip leading whitespace */
1690
1691         if (*nextp == '\0')
1692                 return true;                    /* allow empty string */
1693
1694         /* At the top of the loop, we are at start of a new identifier. */
1695         do
1696         {
1697                 char       *curname;
1698                 char       *endp;
1699
1700                 if (*nextp == '\"')
1701                 {
1702                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1703                         curname = nextp + 1;
1704                         for (;;)
1705                         {
1706                                 endp = strchr(nextp + 1, '\"');
1707                                 if (endp == NULL)
1708                                         return false;           /* mismatched quotes */
1709                                 if (endp[1] != '\"')
1710                                         break;          /* found end of quoted name */
1711                                 /* Collapse adjacent quotes into one quote, and look again */
1712                                 memmove(endp, endp + 1, strlen(endp));
1713                                 nextp = endp;
1714                         }
1715                         /* endp now points at the terminating quote */
1716                         nextp = endp + 1;
1717                 }
1718                 else
1719                 {
1720                         /* Unquoted name --- extends to separator or whitespace */
1721                         char       *downname;
1722                         int                     len;
1723
1724                         curname = nextp;
1725                         while (*nextp && *nextp != separator &&
1726                                    !isspace((unsigned char) *nextp))
1727                                 nextp++;
1728                         endp = nextp;
1729                         if (curname == nextp)
1730                                 return false;   /* empty unquoted name not allowed */
1731
1732                         /*
1733                          * Downcase the identifier, using same code as main lexer
1734                          * does.
1735                          *
1736                          * XXX because we want to overwrite the input in-place, we cannot
1737                          * support a downcasing transformation that increases the
1738                          * string length.  This is not a problem given the current
1739                          * implementation of downcase_truncate_identifier, but we'll
1740                          * probably have to do something about this someday.
1741                          */
1742                         len = endp - curname;
1743                         downname = downcase_truncate_identifier(curname, len, false);
1744                         Assert(strlen(downname) <= len);
1745                         strncpy(curname, downname, len);
1746                         pfree(downname);
1747                 }
1748
1749                 while (isspace((unsigned char) *nextp))
1750                         nextp++;                        /* skip trailing whitespace */
1751
1752                 if (*nextp == separator)
1753                 {
1754                         nextp++;
1755                         while (isspace((unsigned char) *nextp))
1756                                 nextp++;                /* skip leading whitespace for next */
1757                         /* we expect another name, so done remains false */
1758                 }
1759                 else if (*nextp == '\0')
1760                         done = true;
1761                 else
1762                         return false;           /* invalid syntax */
1763
1764                 /* Now safe to overwrite separator with a null */
1765                 *endp = '\0';
1766
1767                 /* Truncate name if it's overlength */
1768                 truncate_identifier(curname, strlen(curname), false);
1769
1770                 /*
1771                  * Finished isolating current name --- add it to list
1772                  */
1773                 *namelist = lappend(*namelist, curname);
1774
1775                 /* Loop back if we didn't reach end of string */
1776         } while (!done);
1777
1778         return true;
1779 }
1780
1781
1782 /*****************************************************************************
1783  *      Comparison Functions used for bytea
1784  *
1785  * Note: btree indexes need these routines not to leak memory; therefore,
1786  * be careful to free working copies of toasted datums.  Most places don't
1787  * need to be so careful.
1788  *****************************************************************************/
1789
1790 Datum
1791 byteaeq(PG_FUNCTION_ARGS)
1792 {
1793         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1794         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1795         int                     len1,
1796                                 len2;
1797         bool            result;
1798
1799         len1 = VARSIZE(arg1) - VARHDRSZ;
1800         len2 = VARSIZE(arg2) - VARHDRSZ;
1801
1802         /* fast path for different-length inputs */
1803         if (len1 != len2)
1804                 result = false;
1805         else
1806                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1807
1808         PG_FREE_IF_COPY(arg1, 0);
1809         PG_FREE_IF_COPY(arg2, 1);
1810
1811         PG_RETURN_BOOL(result);
1812 }
1813
1814 Datum
1815 byteane(PG_FUNCTION_ARGS)
1816 {
1817         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1818         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1819         int                     len1,
1820                                 len2;
1821         bool            result;
1822
1823         len1 = VARSIZE(arg1) - VARHDRSZ;
1824         len2 = VARSIZE(arg2) - VARHDRSZ;
1825
1826         /* fast path for different-length inputs */
1827         if (len1 != len2)
1828                 result = true;
1829         else
1830                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1831
1832         PG_FREE_IF_COPY(arg1, 0);
1833         PG_FREE_IF_COPY(arg2, 1);
1834
1835         PG_RETURN_BOOL(result);
1836 }
1837
1838 Datum
1839 bytealt(PG_FUNCTION_ARGS)
1840 {
1841         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1842         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1843         int                     len1,
1844                                 len2;
1845         int                     cmp;
1846
1847         len1 = VARSIZE(arg1) - VARHDRSZ;
1848         len2 = VARSIZE(arg2) - VARHDRSZ;
1849
1850         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1851
1852         PG_FREE_IF_COPY(arg1, 0);
1853         PG_FREE_IF_COPY(arg2, 1);
1854
1855         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1856 }
1857
1858 Datum
1859 byteale(PG_FUNCTION_ARGS)
1860 {
1861         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1862         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1863         int                     len1,
1864                                 len2;
1865         int                     cmp;
1866
1867         len1 = VARSIZE(arg1) - VARHDRSZ;
1868         len2 = VARSIZE(arg2) - VARHDRSZ;
1869
1870         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1871
1872         PG_FREE_IF_COPY(arg1, 0);
1873         PG_FREE_IF_COPY(arg2, 1);
1874
1875         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1876 }
1877
1878 Datum
1879 byteagt(PG_FUNCTION_ARGS)
1880 {
1881         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1882         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1883         int                     len1,
1884                                 len2;
1885         int                     cmp;
1886
1887         len1 = VARSIZE(arg1) - VARHDRSZ;
1888         len2 = VARSIZE(arg2) - VARHDRSZ;
1889
1890         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1891
1892         PG_FREE_IF_COPY(arg1, 0);
1893         PG_FREE_IF_COPY(arg2, 1);
1894
1895         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1896 }
1897
1898 Datum
1899 byteage(PG_FUNCTION_ARGS)
1900 {
1901         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1902         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1903         int                     len1,
1904                                 len2;
1905         int                     cmp;
1906
1907         len1 = VARSIZE(arg1) - VARHDRSZ;
1908         len2 = VARSIZE(arg2) - VARHDRSZ;
1909
1910         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1911
1912         PG_FREE_IF_COPY(arg1, 0);
1913         PG_FREE_IF_COPY(arg2, 1);
1914
1915         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1916 }
1917
1918 Datum
1919 byteacmp(PG_FUNCTION_ARGS)
1920 {
1921         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1922         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1923         int                     len1,
1924                                 len2;
1925         int                     cmp;
1926
1927         len1 = VARSIZE(arg1) - VARHDRSZ;
1928         len2 = VARSIZE(arg2) - VARHDRSZ;
1929
1930         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1931         if ((cmp == 0) && (len1 != len2))
1932                 cmp = (len1 < len2) ? -1 : 1;
1933
1934         PG_FREE_IF_COPY(arg1, 0);
1935         PG_FREE_IF_COPY(arg2, 1);
1936
1937         PG_RETURN_INT32(cmp);
1938 }
1939
1940 /*
1941  * replace_text
1942  * replace all occurrences of 'old_sub_str' in 'orig_str'
1943  * with 'new_sub_str' to form 'new_str'
1944  *
1945  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1946  * otherwise returns 'new_str'
1947  */
1948 Datum
1949 replace_text(PG_FUNCTION_ARGS)
1950 {
1951         text       *src_text = PG_GETARG_TEXT_P(0);
1952         text       *from_sub_text = PG_GETARG_TEXT_P(1);
1953         text       *to_sub_text = PG_GETARG_TEXT_P(2);
1954         int                     src_text_len = TEXTLEN(src_text);
1955         int                     from_sub_text_len = TEXTLEN(from_sub_text);
1956         char       *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1957         text       *left_text;
1958         text       *right_text;
1959         text       *buf_text;
1960         text       *ret_text;
1961         int                     curr_posn;
1962         StringInfo      str = makeStringInfo();
1963
1964         if (src_text_len == 0 || from_sub_text_len == 0)
1965                 PG_RETURN_TEXT_P(src_text);
1966
1967         buf_text = TEXTDUP(src_text);
1968         curr_posn = TEXTPOS(buf_text, from_sub_text);
1969
1970         while (curr_posn > 0)
1971         {
1972                 left_text = LEFT(buf_text, from_sub_text);
1973                 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1974
1975                 appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
1976                 appendStringInfoString(str, to_sub_str);
1977
1978                 pfree(buf_text);
1979                 pfree(left_text);
1980                 buf_text = right_text;
1981                 curr_posn = TEXTPOS(buf_text, from_sub_text);
1982         }
1983
1984         appendStringInfoString(str, PG_TEXT_GET_STR(buf_text));
1985         pfree(buf_text);
1986
1987         ret_text = PG_STR_GET_TEXT(str->data);
1988         pfree(str->data);
1989         pfree(str);
1990
1991         PG_RETURN_TEXT_P(ret_text);
1992 }
1993
1994 /*
1995  * split_text
1996  * parse input string
1997  * return ord item (1 based)
1998  * based on provided field separator
1999  */
2000 Datum
2001 split_text(PG_FUNCTION_ARGS)
2002 {
2003         text       *inputstring = PG_GETARG_TEXT_P(0);
2004         text       *fldsep = PG_GETARG_TEXT_P(1);
2005         int                     fldnum = PG_GETARG_INT32(2);
2006         int                     inputstring_len = TEXTLEN(inputstring);
2007         int                     fldsep_len = TEXTLEN(fldsep);
2008         int                     start_posn;
2009         int                     end_posn;
2010         text       *result_text;
2011
2012         /* field number is 1 based */
2013         if (fldnum < 1)
2014                 ereport(ERROR,
2015                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2016                                  errmsg("field position must be greater than zero")));
2017
2018         /* return empty string for empty input string */
2019         if (inputstring_len < 1)
2020                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2021
2022         /* empty field separator */
2023         if (fldsep_len < 1)
2024         {
2025                 /* if first field, return input string, else empty string */
2026                 if (fldnum == 1)
2027                         PG_RETURN_TEXT_P(inputstring);
2028                 else
2029                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2030         }
2031
2032         start_posn = text_position(inputstring, fldsep, fldnum - 1);
2033         end_posn = text_position(inputstring, fldsep, fldnum);
2034
2035         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
2036         {
2037                 /* if first field, return input string, else empty string */
2038                 if (fldnum == 1)
2039                         PG_RETURN_TEXT_P(inputstring);
2040                 else
2041                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2042         }
2043         else if (start_posn == 0)
2044         {
2045                 /* first field requested */
2046                 result_text = LEFT(inputstring, fldsep);
2047                 PG_RETURN_TEXT_P(result_text);
2048         }
2049         else if (end_posn == 0)
2050         {
2051                 /* last field requested */
2052                 result_text = text_substring(PointerGetDatum(inputstring),
2053                                                                          start_posn + fldsep_len,
2054                                                                          -1, true);
2055                 PG_RETURN_TEXT_P(result_text);
2056         }
2057         else
2058         {
2059                 /* interior field requested */
2060                 result_text = text_substring(PointerGetDatum(inputstring),
2061                                                                          start_posn + fldsep_len,
2062                                                                          end_posn - start_posn - fldsep_len,
2063                                                                          false);
2064                 PG_RETURN_TEXT_P(result_text);
2065         }
2066 }
2067
2068 /*
2069  * text_to_array
2070  * parse input string
2071  * return text array of elements
2072  * based on provided field separator
2073  */
2074 Datum
2075 text_to_array(PG_FUNCTION_ARGS)
2076 {
2077         text       *inputstring = PG_GETARG_TEXT_P(0);
2078         text       *fldsep = PG_GETARG_TEXT_P(1);
2079         int                     inputstring_len = TEXTLEN(inputstring);
2080         int                     fldsep_len = TEXTLEN(fldsep);
2081         int                     fldnum;
2082         int                     start_posn;
2083         int                     end_posn;
2084         text       *result_text;
2085         ArrayBuildState *astate = NULL;
2086
2087         /* return NULL for empty input string */
2088         if (inputstring_len < 1)
2089                 PG_RETURN_NULL();
2090
2091         /*
2092          * empty field separator return one element, 1D, array using the input
2093          * string
2094          */
2095         if (fldsep_len < 1)
2096                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2097                                                                            CStringGetDatum(inputstring), 1));
2098
2099         /* start with end position holding the initial start position */
2100         end_posn = 0;
2101         for (fldnum = 1;; fldnum++) /* field number is 1 based */
2102         {
2103                 Datum           dvalue;
2104                 bool            disnull = false;
2105
2106                 start_posn = end_posn;
2107                 end_posn = text_position(inputstring, fldsep, fldnum);
2108
2109                 if ((start_posn == 0) && (end_posn == 0))               /* fldsep not found */
2110                 {
2111                         if (fldnum == 1)
2112                         {
2113                                 /*
2114                                  * first element return one element, 1D, array using the
2115                                  * input string
2116                                  */
2117                                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2118                                                                            CStringGetDatum(inputstring), 1));
2119                         }
2120                         else
2121                         {
2122                                 /* otherwise create array and exit */
2123                                 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2124                                                                                                   CurrentMemoryContext));
2125                         }
2126                 }
2127                 else if (start_posn == 0)
2128                 {
2129                         /* first field requested */
2130                         result_text = LEFT(inputstring, fldsep);
2131                 }
2132                 else if (end_posn == 0)
2133                 {
2134                         /* last field requested */
2135                         result_text = text_substring(PointerGetDatum(inputstring),
2136                                                                                  start_posn + fldsep_len,
2137                                                                                  -1, true);
2138                 }
2139                 else
2140                 {
2141                         /* interior field requested */
2142                         result_text = text_substring(PointerGetDatum(inputstring),
2143                                                                                  start_posn + fldsep_len,
2144                                                                           end_posn - start_posn - fldsep_len,
2145                                                                                  false);
2146                 }
2147
2148                 /* stash away current value */
2149                 dvalue = PointerGetDatum(result_text);
2150                 astate = accumArrayResult(astate, dvalue,
2151                                                                   disnull, TEXTOID,
2152                                                                   CurrentMemoryContext);
2153         }
2154
2155         /* never reached -- keep compiler quiet */
2156         PG_RETURN_NULL();
2157 }
2158
2159 /*
2160  * array_to_text
2161  * concatenate Cstring representation of input array elements
2162  * using provided field separator
2163  */
2164 Datum
2165 array_to_text(PG_FUNCTION_ARGS)
2166 {
2167         ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
2168         char       *fldsep = PG_TEXTARG_GET_STR(1);
2169         int                     nitems,
2170                            *dims,
2171                                 ndims;
2172         char       *p;
2173         Oid                     element_type;
2174         int                     typlen;
2175         bool            typbyval;
2176         char            typalign;
2177         StringInfo      result_str = makeStringInfo();
2178         int                     i;
2179         ArrayMetaState *my_extra;
2180
2181         p = ARR_DATA_PTR(v);
2182         ndims = ARR_NDIM(v);
2183         dims = ARR_DIMS(v);
2184         nitems = ArrayGetNItems(ndims, dims);
2185
2186         /* if there are no elements, return an empty string */
2187         if (nitems == 0)
2188                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2189
2190         element_type = ARR_ELEMTYPE(v);
2191
2192         /*
2193          * We arrange to look up info about element type, including its output
2194          * conversion proc, only once per series of calls, assuming the
2195          * element type doesn't change underneath us.
2196          */
2197         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2198         if (my_extra == NULL)
2199         {
2200                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2201                                                                                                  sizeof(ArrayMetaState));
2202                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2203                 my_extra->element_type = InvalidOid;
2204         }
2205
2206         if (my_extra->element_type != element_type)
2207         {
2208                 /*
2209                  * Get info about element type, including its output conversion
2210                  * proc
2211                  */
2212                 get_type_io_data(element_type, IOFunc_output,
2213                                                  &my_extra->typlen, &my_extra->typbyval,
2214                                                  &my_extra->typalign, &my_extra->typdelim,
2215                                                  &my_extra->typioparam, &my_extra->typiofunc);
2216                 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2217                                           fcinfo->flinfo->fn_mcxt);
2218                 my_extra->element_type = element_type;
2219         }
2220         typlen = my_extra->typlen;
2221         typbyval = my_extra->typbyval;
2222         typalign = my_extra->typalign;
2223
2224         for (i = 0; i < nitems; i++)
2225         {
2226                 Datum           itemvalue;
2227                 char       *value;
2228
2229                 itemvalue = fetch_att(p, typbyval, typlen);
2230
2231                 value = DatumGetCString(FunctionCall1(&my_extra->proc,
2232                                                                                           itemvalue));
2233
2234                 if (i > 0)
2235                         appendStringInfo(result_str, "%s%s", fldsep, value);
2236                 else
2237                         appendStringInfoString(result_str, value);
2238
2239                 p = att_addlength(p, typlen, PointerGetDatum(p));
2240                 p = (char *) att_align(p, typalign);
2241         }
2242
2243         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2244 }
2245
2246 #define HEXBASE 16
2247 /*
2248  * Convert a int32 to a string containing a base 16 (hex) representation of
2249  * the number.
2250  */
2251 Datum
2252 to_hex32(PG_FUNCTION_ARGS)
2253 {
2254         uint32          value = (uint32) PG_GETARG_INT32(0);
2255         text       *result_text;
2256         char       *ptr;
2257         const char *digits = "0123456789abcdef";
2258         char            buf[32];                /* bigger than needed, but reasonable */
2259
2260         ptr = buf + sizeof(buf) - 1;
2261         *ptr = '\0';
2262
2263         do
2264         {
2265                 *--ptr = digits[value % HEXBASE];
2266                 value /= HEXBASE;
2267         } while (ptr > buf && value);
2268
2269         result_text = PG_STR_GET_TEXT(ptr);
2270         PG_RETURN_TEXT_P(result_text);
2271 }
2272
2273 /*
2274  * Convert a int64 to a string containing a base 16 (hex) representation of
2275  * the number.
2276  */
2277 Datum
2278 to_hex64(PG_FUNCTION_ARGS)
2279 {
2280         uint64          value = (uint64) PG_GETARG_INT64(0);
2281         text       *result_text;
2282         char       *ptr;
2283         const char *digits = "0123456789abcdef";
2284         char            buf[32];                /* bigger than needed, but reasonable */
2285
2286         ptr = buf + sizeof(buf) - 1;
2287         *ptr = '\0';
2288
2289         do
2290         {
2291                 *--ptr = digits[value % HEXBASE];
2292                 value /= HEXBASE;
2293         } while (ptr > buf && value);
2294
2295         result_text = PG_STR_GET_TEXT(ptr);
2296         PG_RETURN_TEXT_P(result_text);
2297 }
2298
2299 /*
2300  * Create an md5 hash of a text string and return it as hex
2301  *
2302  * md5 produces a 16 byte (128 bit) hash; double it for hex
2303  */
2304 #define MD5_HASH_LEN  32
2305
2306 Datum
2307 md5_text(PG_FUNCTION_ARGS)
2308 {
2309         text       *in_text = PG_GETARG_TEXT_P(0);
2310         size_t          len;
2311         char        hexsum[MD5_HASH_LEN + 1];
2312         text       *result_text;
2313
2314         /* Calculate the length of the buffer using varlena metadata */
2315         len = VARSIZE(in_text) - VARHDRSZ;
2316
2317         /* get the hash result */
2318         if (md5_hash(VARDATA(in_text), len, hexsum) == false)
2319                 ereport(ERROR,
2320                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2321                                  errmsg("out of memory")));
2322
2323         /* convert to text and return it */
2324         result_text = PG_STR_GET_TEXT(hexsum);
2325         PG_RETURN_TEXT_P(result_text);
2326 }
2327
2328 /*
2329  * Create an md5 hash of a bytea field and return it as a hex string:
2330  * 16-byte md5 digest is represented in 32 hex characters.
2331  */
2332 Datum
2333 md5_bytea(PG_FUNCTION_ARGS)
2334 {
2335         bytea      *in = PG_GETARG_BYTEA_P(0);
2336         size_t          len;
2337         char            hexsum[MD5_HASH_LEN + 1];
2338         text       *result_text;
2339
2340         len = VARSIZE(in) - VARHDRSZ;
2341         if (md5_hash(VARDATA(in), len, hexsum) == false)
2342                 ereport(ERROR,
2343                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2344                                  errmsg("out of memory")));
2345
2346         result_text = PG_STR_GET_TEXT(hexsum);
2347         PG_RETURN_TEXT_P(result_text);
2348 }