granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.110 2004/01/31 00:45:21 tgl Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "mb/pg_wchar.h"
  20 #include "miscadmin.h"
  21 #include "access/tuptoaster.h"
  22 #include "catalog/pg_type.h"
  23 #include "lib/stringinfo.h"
  24 #include "libpq/crypt.h"
  25 #include "libpq/pqformat.h"
  26 #include "utils/array.h"
  27 #include "utils/builtins.h"
  28 #include "utils/pg_locale.h"
  29 #include "utils/lsyscache.h"
  30
  31
  32 typedef struct varlena unknown;
  33
  34 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
  35 #define DatumGetUnknownPCopy(X)         ((unknown *) PG_DETOAST_DATUM_COPY(X))
  36 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
  37 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
  38 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
  39
  40 #define PG_TEXTARG_GET_STR(arg_) \
  41         DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
  42 #define PG_TEXT_GET_STR(textp_) \
  43         DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
  44 #define PG_STR_GET_TEXT(str_) \
  45         DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
  46 #define TEXTLEN(textp) \
  47         text_length(PointerGetDatum(textp))
  48 #define TEXTPOS(buf_text, from_sub_text) \
  49         text_position(buf_text, from_sub_text, 1)
  50 #define TEXTDUP(textp) \
  51         DatumGetTextPCopy(PointerGetDatum(textp))
  52 #define LEFT(buf_text, from_sub_text) \
  53         text_substring(PointerGetDatum(buf_text), \
  54                                         1, \
  55                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
  56 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
  57         text_substring(PointerGetDatum(buf_text), \
  58                                         TEXTPOS(buf_text, from_sub_text) + (from_sub_text_len), \
  59                                         -1, true)
  60
  61 static int      text_cmp(text *arg1, text *arg2);
  62 static int32 text_length(Datum str);
  63 static int32 text_position(text *t1, text *t2, int matchnum);
  64 static text *text_substring(Datum str,
  65                            int32 start,
  66                            int32 length,
  67                            bool length_not_specified);
  68
  69
  70 /*****************************************************************************
  71  *       USER I/O ROUTINES                                                                                                               *
  72  *****************************************************************************/
  73
  74
  75 #define VAL(CH)                 ((CH) - '0')
  76 #define DIG(VAL)                ((VAL) + '0')
  77
  78 /*
  79  *              byteain                 - converts from printable representation of byte array
  80  *
  81  *              Non-printable characters must be passed as '\nnn' (octal) and are
  82  *              converted to internal form.  '\' must be passed as '\\'.
  83  *              ereport(ERROR, ...) if bad form.
  84  *
  85  *              BUGS:
  86  *                              The input is scaned twice.
  87  *                              The error checking of input is minimal.
  88  */
  89 Datum
  90 byteain(PG_FUNCTION_ARGS)
  91 {
  92         char       *inputText = PG_GETARG_CSTRING(0);
  93         char       *tp;
  94         char       *rp;
  95         int                     byte;
  96         bytea      *result;
  97
  98         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
  99         {
 100                 if (tp[0] != '\\')
 101                         tp++;
 102                 else if ((tp[0] == '\\') &&
 103                                  (tp[1] >= '0' && tp[1] <= '3') &&
 104                                  (tp[2] >= '0' && tp[2] <= '7') &&
 105                                  (tp[3] >= '0' && tp[3] <= '7'))
 106                         tp += 4;
 107                 else if ((tp[0] == '\\') &&
 108                                  (tp[1] == '\\'))
 109                         tp += 2;
 110                 else
 111                 {
 112                         /*
 113                          * one backslash, not followed by 0 or ### valid octal
 114                          */
 115                         ereport(ERROR,
 116                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 117                                          errmsg("invalid input syntax for type bytea")));
 118                 }
 119         }
 120
 121         byte += VARHDRSZ;
 122         result = (bytea *) palloc(byte);
 123         VARATT_SIZEP(result) = byte;    /* set varlena length */
 124
 125         tp = inputText;
 126         rp = VARDATA(result);
 127         while (*tp != '\0')
 128         {
 129                 if (tp[0] != '\\')
 130                         *rp++ = *tp++;
 131                 else if ((tp[0] == '\\') &&
 132                                  (tp[1] >= '0' && tp[1] <= '3') &&
 133                                  (tp[2] >= '0' && tp[2] <= '7') &&
 134                                  (tp[3] >= '0' && tp[3] <= '7'))
 135                 {
 136                         byte = VAL(tp[1]);
 137                         byte <<= 3;
 138                         byte += VAL(tp[2]);
 139                         byte <<= 3;
 140                         *rp++ = byte + VAL(tp[3]);
 141                         tp += 4;
 142                 }
 143                 else if ((tp[0] == '\\') &&
 144                                  (tp[1] == '\\'))
 145                 {
 146                         *rp++ = '\\';
 147                         tp += 2;
 148                 }
 149                 else
 150                 {
 151                         /*
 152                          * We should never get here. The first pass should not allow
 153                          * it.
 154                          */
 155                         ereport(ERROR,
 156                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 157                                          errmsg("invalid input syntax for type bytea")));
 158                 }
 159         }
 160
 161         PG_RETURN_BYTEA_P(result);
 162 }
 163
 164 /*
 165  *              byteaout                - converts to printable representation of byte array
 166  *
 167  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 168  *              '\\'.
 169  *
 170  *              NULL vlena should be an error--returning string with NULL for now.
 171  */
 172 Datum
 173 byteaout(PG_FUNCTION_ARGS)
 174 {
 175         bytea      *vlena = PG_GETARG_BYTEA_P(0);
 176         char       *result;
 177         char       *vp;
 178         char       *rp;
 179         int                     val;                    /* holds unprintable chars */
 180         int                     i;
 181         int                     len;
 182
 183         len = 1;                                        /* empty string has 1 char */
 184         vp = VARDATA(vlena);
 185         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 186         {
 187                 if (*vp == '\\')
 188                         len += 2;
 189                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 190                         len += 4;
 191                 else
 192                         len++;
 193         }
 194         rp = result = (char *) palloc(len);
 195         vp = VARDATA(vlena);
 196         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 197         {
 198                 if (*vp == '\\')
 199                 {
 200                         *rp++ = '\\';
 201                         *rp++ = '\\';
 202                 }
 203                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 204                 {
 205                         val = *vp;
 206                         rp[0] = '\\';
 207                         rp[3] = DIG(val & 07);
 208                         val >>= 3;
 209                         rp[2] = DIG(val & 07);
 210                         val >>= 3;
 211                         rp[1] = DIG(val & 03);
 212                         rp += 4;
 213                 }
 214                 else
 215                         *rp++ = *vp;
 216         }
 217         *rp = '\0';
 218         PG_RETURN_CSTRING(result);
 219 }
 220
 221 /*
 222  *              bytearecv                       - converts external binary format to bytea
 223  */
 224 Datum
 225 bytearecv(PG_FUNCTION_ARGS)
 226 {
 227         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 228         bytea      *result;
 229         int                     nbytes;
 230
 231         nbytes = buf->len - buf->cursor;
 232         result = (bytea *) palloc(nbytes + VARHDRSZ);
 233         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 234         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 235         PG_RETURN_BYTEA_P(result);
 236 }
 237
 238 /*
 239  *              byteasend                       - converts bytea to binary format
 240  *
 241  * This is a special case: just copy the input...
 242  */
 243 Datum
 244 byteasend(PG_FUNCTION_ARGS)
 245 {
 246         bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
 247
 248         PG_RETURN_BYTEA_P(vlena);
 249 }
 250
 251
 252 /*
 253  *              textin                  - converts "..." to internal representation
 254  */
 255 Datum
 256 textin(PG_FUNCTION_ARGS)
 257 {
 258         char       *inputText = PG_GETARG_CSTRING(0);
 259         text       *result;
 260         int                     len;
 261
 262         /* verify encoding */
 263         len = strlen(inputText);
 264         pg_verifymbstr(inputText, len, false);
 265
 266         result = (text *) palloc(len + VARHDRSZ);
 267         VARATT_SIZEP(result) = len + VARHDRSZ;
 268
 269         memcpy(VARDATA(result), inputText, len);
 270
 271         PG_RETURN_TEXT_P(result);
 272 }
 273
 274 /*
 275  *              textout                 - converts internal representation to "..."
 276  */
 277 Datum
 278 textout(PG_FUNCTION_ARGS)
 279 {
 280         text       *t = PG_GETARG_TEXT_P(0);
 281         int                     len;
 282         char       *result;
 283
 284         len = VARSIZE(t) - VARHDRSZ;
 285         result = (char *) palloc(len + 1);
 286         memcpy(result, VARDATA(t), len);
 287         result[len] = '\0';
 288
 289         PG_RETURN_CSTRING(result);
 290 }
 291
 292 /*
 293  *              textrecv                        - converts external binary format to text
 294  */
 295 Datum
 296 textrecv(PG_FUNCTION_ARGS)
 297 {
 298         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 299         text       *result;
 300         char       *str;
 301         int                     nbytes;
 302
 303         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 304         result = (text *) palloc(nbytes + VARHDRSZ);
 305         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 306         memcpy(VARDATA(result), str, nbytes);
 307         pfree(str);
 308         PG_RETURN_TEXT_P(result);
 309 }
 310
 311 /*
 312  *              textsend                        - converts text to binary format
 313  */
 314 Datum
 315 textsend(PG_FUNCTION_ARGS)
 316 {
 317         text       *t = PG_GETARG_TEXT_P(0);
 318         StringInfoData buf;
 319
 320         pq_begintypsend(&buf);
 321         pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
 322         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 323 }
 324
 325
 326 /*
 327  *              unknownin                       - converts "..." to internal representation
 328  */
 329 Datum
 330 unknownin(PG_FUNCTION_ARGS)
 331 {
 332         char       *inputStr = PG_GETARG_CSTRING(0);
 333         unknown    *result;
 334         int                     len;
 335
 336         len = strlen(inputStr) + VARHDRSZ;
 337
 338         result = (unknown *) palloc(len);
 339         VARATT_SIZEP(result) = len;
 340
 341         memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
 342
 343         PG_RETURN_UNKNOWN_P(result);
 344 }
 345
 346 /*
 347  *              unknownout                      - converts internal representation to "..."
 348  */
 349 Datum
 350 unknownout(PG_FUNCTION_ARGS)
 351 {
 352         unknown    *t = PG_GETARG_UNKNOWN_P(0);
 353         int                     len;
 354         char       *result;
 355
 356         len = VARSIZE(t) - VARHDRSZ;
 357         result = (char *) palloc(len + 1);
 358         memcpy(result, VARDATA(t), len);
 359         result[len] = '\0';
 360
 361         PG_RETURN_CSTRING(result);
 362 }
 363
 364 /*
 365  *              unknownrecv                     - converts external binary format to unknown
 366  */
 367 Datum
 368 unknownrecv(PG_FUNCTION_ARGS)
 369 {
 370         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 371         unknown    *result;
 372         int                     nbytes;
 373
 374         nbytes = buf->len - buf->cursor;
 375         result = (unknown *) palloc(nbytes + VARHDRSZ);
 376         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 377         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 378         PG_RETURN_UNKNOWN_P(result);
 379 }
 380
 381 /*
 382  *              unknownsend                     - converts unknown to binary format
 383  *
 384  * This is a special case: just copy the input, since it's
 385  * effectively the same format as bytea
 386  */
 387 Datum
 388 unknownsend(PG_FUNCTION_ARGS)
 389 {
 390         unknown    *vlena = PG_GETARG_UNKNOWN_P_COPY(0);
 391
 392         PG_RETURN_UNKNOWN_P(vlena);
 393 }
 394
 395
 396 /* ========== PUBLIC ROUTINES ========== */
 397
 398 /*
 399  * textlen -
 400  *        returns the logical length of a text*
 401  *         (which is less than the VARSIZE of the text*)
 402  */
 403 Datum
 404 textlen(PG_FUNCTION_ARGS)
 405 {
 406         Datum           str = PG_GETARG_DATUM(0);
 407
 408         /* try to avoid decompressing argument */
 409         PG_RETURN_INT32(text_length(str));
 410 }
 411
 412 /*
 413  * text_length -
 414  *      Does the real work for textlen()
 415  *
 416  *      This is broken out so it can be called directly by other string processing
 417  *      functions.  Note that the argument is passed as a Datum, to indicate that
 418  *      it may still be in compressed form.  We can avoid decompressing it at all
 419  *      in some cases.
 420  */
 421 static int32
 422 text_length(Datum str)
 423 {
 424         /* fastpath when max encoding length is one */
 425         if (pg_database_encoding_max_length() == 1)
 426                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 427         else
 428         {
 429                 text       *t = DatumGetTextP(str);
 430
 431                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
 432                                                                                          VARSIZE(t) - VARHDRSZ));
 433         }
 434 }
 435
 436 /*
 437  * textoctetlen -
 438  *        returns the physical length of a text*
 439  *         (which is less than the VARSIZE of the text*)
 440  */
 441 Datum
 442 textoctetlen(PG_FUNCTION_ARGS)
 443 {
 444         Datum           str = PG_GETARG_DATUM(0);
 445
 446         /* We need not detoast the input at all */
 447         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 448 }
 449
 450 /*
 451  * textcat -
 452  *        takes two text* and returns a text* that is the concatenation of
 453  *        the two.
 454  *
 455  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 456  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 457  * Allocate space for output in all cases.
 458  * XXX - thomas 1997-07-10
 459  */
 460 Datum
 461 textcat(PG_FUNCTION_ARGS)
 462 {
 463         text       *t1 = PG_GETARG_TEXT_P(0);
 464         text       *t2 = PG_GETARG_TEXT_P(1);
 465         int                     len1,
 466                                 len2,
 467                                 len;
 468         text       *result;
 469         char       *ptr;
 470
 471         len1 = (VARSIZE(t1) - VARHDRSZ);
 472         if (len1 < 0)
 473                 len1 = 0;
 474
 475         len2 = (VARSIZE(t2) - VARHDRSZ);
 476         if (len2 < 0)
 477                 len2 = 0;
 478
 479         len = len1 + len2 + VARHDRSZ;
 480         result = (text *) palloc(len);
 481
 482         /* Set size of result string... */
 483         VARATT_SIZEP(result) = len;
 484
 485         /* Fill data field of result string... */
 486         ptr = VARDATA(result);
 487         if (len1 > 0)
 488                 memcpy(ptr, VARDATA(t1), len1);
 489         if (len2 > 0)
 490                 memcpy(ptr + len1, VARDATA(t2), len2);
 491
 492         PG_RETURN_TEXT_P(result);
 493 }
 494
 495 /*
 496  * text_substr()
 497  * Return a substring starting at the specified position.
 498  * - thomas 1997-12-31
 499  *
 500  * Input:
 501  *      - string
 502  *      - starting position (is one-based)
 503  *      - string length
 504  *
 505  * If the starting position is zero or less, then return from the start of the string
 506  *      adjusting the length to be consistent with the "negative start" per SQL92.
 507  * If the length is less than zero, return the remaining string.
 508  *
 509  * Added multibyte support.
 510  * - Tatsuo Ishii 1998-4-21
 511  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 512  * Formerly returned the entire string; now returns a portion.
 513  * - Thomas Lockhart 1998-12-10
 514  * Now uses faster TOAST-slicing interface
 515  * - John Gray 2002-02-22
 516  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 517  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 518  * error; if E < 1, return '', not entire string). Fixed MB related bug when
 519  * S > LC and < LC + 4 sometimes garbage characters are returned.
 520  * - Joe Conway 2002-08-10
 521  */
 522 Datum
 523 text_substr(PG_FUNCTION_ARGS)
 524 {
 525         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 526                                                                         PG_GETARG_INT32(1),
 527                                                                         PG_GETARG_INT32(2),
 528                                                                         false));
 529 }
 530
 531 /*
 532  * text_substr_no_len -
 533  *        Wrapper to avoid opr_sanity failure due to
 534  *        one function accepting a different number of args.
 535  */
 536 Datum
 537 text_substr_no_len(PG_FUNCTION_ARGS)
 538 {
 539         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 540                                                                         PG_GETARG_INT32(1),
 541                                                                         -1, true));
 542 }
 543
 544 /*
 545  * text_substring -
 546  *      Does the real work for text_substr() and text_substr_no_len()
 547  *
 548  *      This is broken out so it can be called directly by other string processing
 549  *      functions.  Note that the argument is passed as a Datum, to indicate that
 550  *      it may still be in compressed/toasted form.  We can avoid detoasting all
 551  *      of it in some cases.
 552  */
 553 static text *
 554 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 555 {
 556         int32           eml = pg_database_encoding_max_length();
 557         int32           S = start;              /* start position */
 558         int32           S1;                             /* adjusted start position */
 559         int32           L1;                             /* adjusted substring length */
 560
 561         /* life is easy if the encoding max length is 1 */
 562         if (eml == 1)
 563         {
 564                 S1 = Max(S, 1);
 565
 566                 if (length_not_specified)               /* special case - get length to
 567                                                                                  * end of string */
 568                         L1 = -1;
 569                 else
 570                 {
 571                         /* end position */
 572                         int                     E = S + length;
 573
 574                         /*
 575                          * A negative value for L is the only way for the end position
 576                          * to be before the start. SQL99 says to throw an error.
 577                          */
 578                         if (E < S)
 579                                 ereport(ERROR,
 580                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 581                                            errmsg("negative substring length not allowed")));
 582
 583                         /*
 584                          * A zero or negative value for the end position can happen if
 585                          * the start was negative or one. SQL99 says to return a
 586                          * zero-length string.
 587                          */
 588                         if (E < 1)
 589                                 return PG_STR_GET_TEXT("");
 590
 591                         L1 = E - S1;
 592                 }
 593
 594                 /*
 595                  * If the start position is past the end of the string, SQL99 says
 596                  * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
 597                  * do that for us. Convert to zero-based starting position
 598                  */
 599                 return DatumGetTextPSlice(str, S1 - 1, L1);
 600         }
 601         else if (eml > 1)
 602         {
 603                 /*
 604                  * When encoding max length is > 1, we can't get LC without
 605                  * detoasting, so we'll grab a conservatively large slice now and
 606                  * go back later to do the right thing
 607                  */
 608                 int32           slice_start;
 609                 int32           slice_size;
 610                 int32           slice_strlen;
 611                 text       *slice;
 612                 int32           E1;
 613                 int32           i;
 614                 char       *p;
 615                 char       *s;
 616                 text       *ret;
 617
 618                 /*
 619                  * if S is past the end of the string, the tuple toaster will
 620                  * return a zero-length string to us
 621                  */
 622                 S1 = Max(S, 1);
 623
 624                 /*
 625                  * We need to start at position zero because there is no way to
 626                  * know in advance which byte offset corresponds to the supplied
 627                  * start position.
 628                  */
 629                 slice_start = 0;
 630
 631                 if (length_not_specified)               /* special case - get length to
 632                                                                                  * end of string */
 633                         slice_size = L1 = -1;
 634                 else
 635                 {
 636                         int                     E = S + length;
 637
 638                         /*
 639                          * A negative value for L is the only way for the end position
 640                          * to be before the start. SQL99 says to throw an error.
 641                          */
 642                         if (E < S)
 643                                 ereport(ERROR,
 644                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 645                                            errmsg("negative substring length not allowed")));
 646
 647                         /*
 648                          * A zero or negative value for the end position can happen if
 649                          * the start was negative or one. SQL99 says to return a
 650                          * zero-length string.
 651                          */
 652                         if (E < 1)
 653                                 return PG_STR_GET_TEXT("");
 654
 655                         /*
 656                          * if E is past the end of the string, the tuple toaster will
 657                          * truncate the length for us
 658                          */
 659                         L1 = E - S1;
 660
 661                         /*
 662                          * Total slice size in bytes can't be any longer than the
 663                          * start position plus substring length times the encoding max
 664                          * length.
 665                          */
 666                         slice_size = (S1 + L1) * eml;
 667                 }
 668                 slice = DatumGetTextPSlice(str, slice_start, slice_size);
 669
 670                 /* see if we got back an empty string */
 671                 if ((VARSIZE(slice) - VARHDRSZ) == 0)
 672                         return PG_STR_GET_TEXT("");
 673
 674                 /* Now we can get the actual length of the slice in MB characters */
 675                 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
 676
 677                 /*
 678                  * Check that the start position wasn't > slice_strlen. If so,
 679                  * SQL99 says to return a zero-length string.
 680                  */
 681                 if (S1 > slice_strlen)
 682                         return PG_STR_GET_TEXT("");
 683
 684                 /*
 685                  * Adjust L1 and E1 now that we know the slice string length.
 686                  * Again remember that S1 is one based, and slice_start is zero
 687                  * based.
 688                  */
 689                 if (L1 > -1)
 690                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 691                 else
 692                         E1 = slice_start + 1 + slice_strlen;
 693
 694                 /*
 695                  * Find the start position in the slice; remember S1 is not zero
 696                  * based
 697                  */
 698                 p = VARDATA(slice);
 699                 for (i = 0; i < S1 - 1; i++)
 700                         p += pg_mblen(p);
 701
 702                 /* hang onto a pointer to our start position */
 703                 s = p;
 704
 705                 /*
 706                  * Count the actual bytes used by the substring of the requested
 707                  * length.
 708                  */
 709                 for (i = S1; i < E1; i++)
 710                         p += pg_mblen(p);
 711
 712                 ret = (text *) palloc(VARHDRSZ + (p - s));
 713                 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
 714                 memcpy(VARDATA(ret), s, (p - s));
 715
 716                 return ret;
 717         }
 718         else
 719                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
 720
 721         /* not reached: suppress compiler warning */
 722         return NULL;
 723 }
 724
 725 /*
 726  * textpos -
 727  *        Return the position of the specified substring.
 728  *        Implements the SQL92 POSITION() function.
 729  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 730  * - thomas 1997-07-27
 731  */
 732 Datum
 733 textpos(PG_FUNCTION_ARGS)
 734 {
 735         text       *str = PG_GETARG_TEXT_P(0);
 736         text       *search_str = PG_GETARG_TEXT_P(1);
 737
 738         PG_RETURN_INT32(text_position(str, search_str, 1));
 739 }
 740
 741 /*
 742  * text_position -
 743  *      Does the real work for textpos()
 744  *
 745  * Inputs:
 746  *              t1 - string to be searched
 747  *              t2 - pattern to match within t1
 748  *              matchnum - number of the match to be found (1 is the first match)
 749  * Result:
 750  *              Character index of the first matched char, starting from 1,
 751  *              or 0 if no match.
 752  *
 753  *      This is broken out so it can be called directly by other string processing
 754  *      functions.
 755  */
 756 static int32
 757 text_position(text *t1, text *t2, int matchnum)
 758 {
 759         int                     match = 0,
 760                                 pos = 0,
 761                                 p,
 762                                 px,
 763                                 len1,
 764                                 len2;
 765
 766         if (matchnum <= 0)
 767                 return 0;                               /* result for 0th match */
 768
 769         if (VARSIZE(t2) <= VARHDRSZ)
 770                 return 1;                               /* result for empty pattern */
 771
 772         len1 = (VARSIZE(t1) - VARHDRSZ);
 773         len2 = (VARSIZE(t2) - VARHDRSZ);
 774
 775         if (pg_database_encoding_max_length() == 1)
 776         {
 777                 /* simple case - single byte encoding */
 778                 char       *p1,
 779                                    *p2;
 780
 781                 p1 = VARDATA(t1);
 782                 p2 = VARDATA(t2);
 783
 784                 /* no use in searching str past point where search_str will fit */
 785                 px = (len1 - len2);
 786
 787                 for (p = 0; p <= px; p++)
 788                 {
 789                         if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
 790                         {
 791                                 if (++match == matchnum)
 792                                 {
 793                                         pos = p + 1;
 794                                         break;
 795                                 }
 796                         }
 797                         p1++;
 798                 }
 799         }
 800         else
 801         {
 802                 /* not as simple - multibyte encoding */
 803                 pg_wchar   *p1,
 804                                    *p2,
 805                                    *ps1,
 806                                    *ps2;
 807
 808                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 809                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
 810                 len1 = pg_wchar_strlen(p1);
 811                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 812                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
 813                 len2 = pg_wchar_strlen(p2);
 814
 815                 /* no use in searching str past point where search_str will fit */
 816                 px = (len1 - len2);
 817
 818                 for (p = 0; p <= px; p++)
 819                 {
 820                         if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
 821                         {
 822                                 if (++match == matchnum)
 823                                 {
 824                                         pos = p + 1;
 825                                         break;
 826                                 }
 827                         }
 828                         p1++;
 829                 }
 830
 831                 pfree(ps1);
 832                 pfree(ps2);
 833         }
 834
 835         return pos;
 836 }
 837
 838 /* varstr_cmp()
 839  * Comparison function for text strings with given lengths.
 840  * Includes locale support, but must copy strings to temporary memory
 841  *      to allow null-termination for inputs to strcoll().
 842  * Returns -1, 0 or 1
 843  */
 844 int
 845 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 846 {
 847         int                     result;
 848
 849         /*
 850          * Unfortunately, there is no strncoll(), so in the non-C locale case
 851          * we have to do some memory copying.  This turns out to be
 852          * significantly slower, so we optimize the case where LC_COLLATE is
 853          * C.  We also try to optimize relatively-short strings by avoiding
 854          * palloc/pfree overhead.
 855          */
 856 #define STACKBUFLEN             1024
 857
 858         if (!lc_collate_is_c())
 859         {
 860                 char            a1buf[STACKBUFLEN];
 861                 char            a2buf[STACKBUFLEN];
 862                 char       *a1p,
 863                                    *a2p;
 864
 865                 if (len1 >= STACKBUFLEN)
 866                         a1p = (char *) palloc(len1 + 1);
 867                 else
 868                         a1p = a1buf;
 869                 if (len2 >= STACKBUFLEN)
 870                         a2p = (char *) palloc(len2 + 1);
 871                 else
 872                         a2p = a2buf;
 873
 874                 memcpy(a1p, arg1, len1);
 875                 a1p[len1] = '\0';
 876                 memcpy(a2p, arg2, len2);
 877                 a2p[len2] = '\0';
 878
 879                 result = strcoll(a1p, a2p);
 880
 881                 if (len1 >= STACKBUFLEN)
 882                         pfree(a1p);
 883                 if (len2 >= STACKBUFLEN)
 884                         pfree(a2p);
 885         }
 886         else
 887         {
 888                 result = strncmp(arg1, arg2, Min(len1, len2));
 889                 if ((result == 0) && (len1 != len2))
 890                         result = (len1 < len2) ? -1 : 1;
 891         }
 892
 893         return result;
 894 }
 895
 896
 897 /* text_cmp()
 898  * Internal comparison function for text strings.
 899  * Returns -1, 0 or 1
 900  */
 901 static int
 902 text_cmp(text *arg1, text *arg2)
 903 {
 904         char       *a1p,
 905                            *a2p;
 906         int                     len1,
 907                                 len2;
 908
 909         a1p = VARDATA(arg1);
 910         a2p = VARDATA(arg2);
 911
 912         len1 = VARSIZE(arg1) - VARHDRSZ;
 913         len2 = VARSIZE(arg2) - VARHDRSZ;
 914
 915         return varstr_cmp(a1p, len1, a2p, len2);
 916 }
 917
 918 /*
 919  * Comparison functions for text strings.
 920  *
 921  * Note: btree indexes need these routines not to leak memory; therefore,
 922  * be careful to free working copies of toasted datums.  Most places don't
 923  * need to be so careful.
 924  */
 925
 926 Datum
 927 texteq(PG_FUNCTION_ARGS)
 928 {
 929         text       *arg1 = PG_GETARG_TEXT_P(0);
 930         text       *arg2 = PG_GETARG_TEXT_P(1);
 931         bool            result;
 932
 933         /* fast path for different-length inputs */
 934         if (VARSIZE(arg1) != VARSIZE(arg2))
 935                 result = false;
 936         else
 937                 result = (text_cmp(arg1, arg2) == 0);
 938
 939         PG_FREE_IF_COPY(arg1, 0);
 940         PG_FREE_IF_COPY(arg2, 1);
 941
 942         PG_RETURN_BOOL(result);
 943 }
 944
 945 Datum
 946 textne(PG_FUNCTION_ARGS)
 947 {
 948         text       *arg1 = PG_GETARG_TEXT_P(0);
 949         text       *arg2 = PG_GETARG_TEXT_P(1);
 950         bool            result;
 951
 952         /* fast path for different-length inputs */
 953         if (VARSIZE(arg1) != VARSIZE(arg2))
 954                 result = true;
 955         else
 956                 result = (text_cmp(arg1, arg2) != 0);
 957
 958         PG_FREE_IF_COPY(arg1, 0);
 959         PG_FREE_IF_COPY(arg2, 1);
 960
 961         PG_RETURN_BOOL(result);
 962 }
 963
 964 Datum
 965 text_lt(PG_FUNCTION_ARGS)
 966 {
 967         text       *arg1 = PG_GETARG_TEXT_P(0);
 968         text       *arg2 = PG_GETARG_TEXT_P(1);
 969         bool            result;
 970
 971         result = (text_cmp(arg1, arg2) < 0);
 972
 973         PG_FREE_IF_COPY(arg1, 0);
 974         PG_FREE_IF_COPY(arg2, 1);
 975
 976         PG_RETURN_BOOL(result);
 977 }
 978
 979 Datum
 980 text_le(PG_FUNCTION_ARGS)
 981 {
 982         text       *arg1 = PG_GETARG_TEXT_P(0);
 983         text       *arg2 = PG_GETARG_TEXT_P(1);
 984         bool            result;
 985
 986         result = (text_cmp(arg1, arg2) <= 0);
 987
 988         PG_FREE_IF_COPY(arg1, 0);
 989         PG_FREE_IF_COPY(arg2, 1);
 990
 991         PG_RETURN_BOOL(result);
 992 }
 993
 994 Datum
 995 text_gt(PG_FUNCTION_ARGS)
 996 {
 997         text       *arg1 = PG_GETARG_TEXT_P(0);
 998         text       *arg2 = PG_GETARG_TEXT_P(1);
 999         bool            result;
1000
1001         result = (text_cmp(arg1, arg2) > 0);
1002
1003         PG_FREE_IF_COPY(arg1, 0);
1004         PG_FREE_IF_COPY(arg2, 1);
1005
1006         PG_RETURN_BOOL(result);
1007 }
1008
1009 Datum
1010 text_ge(PG_FUNCTION_ARGS)
1011 {
1012         text       *arg1 = PG_GETARG_TEXT_P(0);
1013         text       *arg2 = PG_GETARG_TEXT_P(1);
1014         bool            result;
1015
1016         result = (text_cmp(arg1, arg2) >= 0);
1017
1018         PG_FREE_IF_COPY(arg1, 0);
1019         PG_FREE_IF_COPY(arg2, 1);
1020
1021         PG_RETURN_BOOL(result);
1022 }
1023
1024 Datum
1025 bttextcmp(PG_FUNCTION_ARGS)
1026 {
1027         text       *arg1 = PG_GETARG_TEXT_P(0);
1028         text       *arg2 = PG_GETARG_TEXT_P(1);
1029         int32           result;
1030
1031         result = text_cmp(arg1, arg2);
1032
1033         PG_FREE_IF_COPY(arg1, 0);
1034         PG_FREE_IF_COPY(arg2, 1);
1035
1036         PG_RETURN_INT32(result);
1037 }
1038
1039
1040 Datum
1041 text_larger(PG_FUNCTION_ARGS)
1042 {
1043         text       *arg1 = PG_GETARG_TEXT_P(0);
1044         text       *arg2 = PG_GETARG_TEXT_P(1);
1045         text       *result;
1046
1047         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1048
1049         PG_RETURN_TEXT_P(result);
1050 }
1051
1052 Datum
1053 text_smaller(PG_FUNCTION_ARGS)
1054 {
1055         text       *arg1 = PG_GETARG_TEXT_P(0);
1056         text       *arg2 = PG_GETARG_TEXT_P(1);
1057         text       *result;
1058
1059         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1060
1061         PG_RETURN_TEXT_P(result);
1062 }
1063
1064
1065 /*
1066  * The following operators support character-by-character comparison
1067  * of text data types, to allow building indexes suitable for LIKE
1068  * clauses.
1069  */
1070
1071 static int
1072 internal_text_pattern_compare(text *arg1, text *arg2)
1073 {
1074         int                     result;
1075
1076         result = memcmp(VARDATA(arg1), VARDATA(arg2),
1077                                         Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1078         if (result != 0)
1079                 return result;
1080         else if (VARSIZE(arg1) < VARSIZE(arg2))
1081                 return -1;
1082         else if (VARSIZE(arg1) > VARSIZE(arg2))
1083                 return 1;
1084         else
1085                 return 0;
1086 }
1087
1088
1089 Datum
1090 text_pattern_lt(PG_FUNCTION_ARGS)
1091 {
1092         text       *arg1 = PG_GETARG_TEXT_P(0);
1093         text       *arg2 = PG_GETARG_TEXT_P(1);
1094         int                     result;
1095
1096         result = internal_text_pattern_compare(arg1, arg2);
1097
1098         PG_FREE_IF_COPY(arg1, 0);
1099         PG_FREE_IF_COPY(arg2, 1);
1100
1101         PG_RETURN_BOOL(result < 0);
1102 }
1103
1104
1105 Datum
1106 text_pattern_le(PG_FUNCTION_ARGS)
1107 {
1108         text       *arg1 = PG_GETARG_TEXT_P(0);
1109         text       *arg2 = PG_GETARG_TEXT_P(1);
1110         int                     result;
1111
1112         result = internal_text_pattern_compare(arg1, arg2);
1113
1114         PG_FREE_IF_COPY(arg1, 0);
1115         PG_FREE_IF_COPY(arg2, 1);
1116
1117         PG_RETURN_BOOL(result <= 0);
1118 }
1119
1120
1121 Datum
1122 text_pattern_eq(PG_FUNCTION_ARGS)
1123 {
1124         text       *arg1 = PG_GETARG_TEXT_P(0);
1125         text       *arg2 = PG_GETARG_TEXT_P(1);
1126         int                     result;
1127
1128         if (VARSIZE(arg1) != VARSIZE(arg2))
1129                 result = 1;
1130         else
1131                 result = internal_text_pattern_compare(arg1, arg2);
1132
1133         PG_FREE_IF_COPY(arg1, 0);
1134         PG_FREE_IF_COPY(arg2, 1);
1135
1136         PG_RETURN_BOOL(result == 0);
1137 }
1138
1139
1140 Datum
1141 text_pattern_ge(PG_FUNCTION_ARGS)
1142 {
1143         text       *arg1 = PG_GETARG_TEXT_P(0);
1144         text       *arg2 = PG_GETARG_TEXT_P(1);
1145         int                     result;
1146
1147         result = internal_text_pattern_compare(arg1, arg2);
1148
1149         PG_FREE_IF_COPY(arg1, 0);
1150         PG_FREE_IF_COPY(arg2, 1);
1151
1152         PG_RETURN_BOOL(result >= 0);
1153 }
1154
1155
1156 Datum
1157 text_pattern_gt(PG_FUNCTION_ARGS)
1158 {
1159         text       *arg1 = PG_GETARG_TEXT_P(0);
1160         text       *arg2 = PG_GETARG_TEXT_P(1);
1161         int                     result;
1162
1163         result = internal_text_pattern_compare(arg1, arg2);
1164
1165         PG_FREE_IF_COPY(arg1, 0);
1166         PG_FREE_IF_COPY(arg2, 1);
1167
1168         PG_RETURN_BOOL(result > 0);
1169 }
1170
1171
1172 Datum
1173 text_pattern_ne(PG_FUNCTION_ARGS)
1174 {
1175         text       *arg1 = PG_GETARG_TEXT_P(0);
1176         text       *arg2 = PG_GETARG_TEXT_P(1);
1177         int                     result;
1178
1179         if (VARSIZE(arg1) != VARSIZE(arg2))
1180                 result = 1;
1181         else
1182                 result = internal_text_pattern_compare(arg1, arg2);
1183
1184         PG_FREE_IF_COPY(arg1, 0);
1185         PG_FREE_IF_COPY(arg2, 1);
1186
1187         PG_RETURN_BOOL(result != 0);
1188 }
1189
1190
1191 Datum
1192 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1193 {
1194         text       *arg1 = PG_GETARG_TEXT_P(0);
1195         text       *arg2 = PG_GETARG_TEXT_P(1);
1196         int                     result;
1197
1198         result = internal_text_pattern_compare(arg1, arg2);
1199
1200         PG_FREE_IF_COPY(arg1, 0);
1201         PG_FREE_IF_COPY(arg2, 1);
1202
1203         PG_RETURN_INT32(result);
1204 }
1205
1206
1207 /*-------------------------------------------------------------
1208  * byteaoctetlen
1209  *
1210  * get the number of bytes contained in an instance of type 'bytea'
1211  *-------------------------------------------------------------
1212  */
1213 Datum
1214 byteaoctetlen(PG_FUNCTION_ARGS)
1215 {
1216         Datum           str = PG_GETARG_DATUM(0);
1217
1218         /* We need not detoast the input at all */
1219         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1220 }
1221
1222 /*
1223  * byteacat -
1224  *        takes two bytea* and returns a bytea* that is the concatenation of
1225  *        the two.
1226  *
1227  * Cloned from textcat and modified as required.
1228  */
1229 Datum
1230 byteacat(PG_FUNCTION_ARGS)
1231 {
1232         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1233         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1234         int                     len1,
1235                                 len2,
1236                                 len;
1237         bytea      *result;
1238         char       *ptr;
1239
1240         len1 = (VARSIZE(t1) - VARHDRSZ);
1241         if (len1 < 0)
1242                 len1 = 0;
1243
1244         len2 = (VARSIZE(t2) - VARHDRSZ);
1245         if (len2 < 0)
1246                 len2 = 0;
1247
1248         len = len1 + len2 + VARHDRSZ;
1249         result = (bytea *) palloc(len);
1250
1251         /* Set size of result string... */
1252         VARATT_SIZEP(result) = len;
1253
1254         /* Fill data field of result string... */
1255         ptr = VARDATA(result);
1256         if (len1 > 0)
1257                 memcpy(ptr, VARDATA(t1), len1);
1258         if (len2 > 0)
1259                 memcpy(ptr + len1, VARDATA(t2), len2);
1260
1261         PG_RETURN_BYTEA_P(result);
1262 }
1263
1264 #define PG_STR_GET_BYTEA(str_) \
1265         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1266 /*
1267  * bytea_substr()
1268  * Return a substring starting at the specified position.
1269  * Cloned from text_substr and modified as required.
1270  *
1271  * Input:
1272  *      - string
1273  *      - starting position (is one-based)
1274  *      - string length (optional)
1275  *
1276  * If the starting position is zero or less, then return from the start of the string
1277  * adjusting the length to be consistent with the "negative start" per SQL92.
1278  * If the length is less than zero, an ERROR is thrown. If no third argument
1279  * (length) is provided, the length to the end of the string is assumed.
1280  */
1281 Datum
1282 bytea_substr(PG_FUNCTION_ARGS)
1283 {
1284         int                     S = PG_GETARG_INT32(1); /* start position */
1285         int                     S1;                             /* adjusted start position */
1286         int                     L1;                             /* adjusted substring length */
1287
1288         S1 = Max(S, 1);
1289
1290         if (fcinfo->nargs == 2)
1291         {
1292                 /*
1293                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1294                  * everything to the end of the string if we pass it a negative
1295                  * value for length.
1296                  */
1297                 L1 = -1;
1298         }
1299         else
1300         {
1301                 /* end position */
1302                 int                     E = S + PG_GETARG_INT32(2);
1303
1304                 /*
1305                  * A negative value for L is the only way for the end position to
1306                  * be before the start. SQL99 says to throw an error.
1307                  */
1308                 if (E < S)
1309                         ereport(ERROR,
1310                                         (errcode(ERRCODE_SUBSTRING_ERROR),
1311                                          errmsg("negative substring length not allowed")));
1312
1313                 /*
1314                  * A zero or negative value for the end position can happen if the
1315                  * start was negative or one. SQL99 says to return a zero-length
1316                  * string.
1317                  */
1318                 if (E < 1)
1319                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1320
1321                 L1 = E - S1;
1322         }
1323
1324         /*
1325          * If the start position is past the end of the string, SQL99 says to
1326          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1327          * that for us. Convert to zero-based starting position
1328          */
1329         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1330 }
1331
1332 /*
1333  * bytea_substr_no_len -
1334  *        Wrapper to avoid opr_sanity failure due to
1335  *        one function accepting a different number of args.
1336  */
1337 Datum
1338 bytea_substr_no_len(PG_FUNCTION_ARGS)
1339 {
1340         return bytea_substr(fcinfo);
1341 }
1342
1343 /*
1344  * byteapos -
1345  *        Return the position of the specified substring.
1346  *        Implements the SQL92 POSITION() function.
1347  * Cloned from textpos and modified as required.
1348  */
1349 Datum
1350 byteapos(PG_FUNCTION_ARGS)
1351 {
1352         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1353         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1354         int                     pos;
1355         int                     px,
1356                                 p;
1357         int                     len1,
1358                                 len2;
1359         char       *p1,
1360                            *p2;
1361
1362         if (VARSIZE(t2) <= VARHDRSZ)
1363                 PG_RETURN_INT32(1);             /* result for empty pattern */
1364
1365         len1 = (VARSIZE(t1) - VARHDRSZ);
1366         len2 = (VARSIZE(t2) - VARHDRSZ);
1367
1368         p1 = VARDATA(t1);
1369         p2 = VARDATA(t2);
1370
1371         pos = 0;
1372         px = (len1 - len2);
1373         for (p = 0; p <= px; p++)
1374         {
1375                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1376                 {
1377                         pos = p + 1;
1378                         break;
1379                 };
1380                 p1++;
1381         };
1382
1383         PG_RETURN_INT32(pos);
1384 }
1385
1386 /*-------------------------------------------------------------
1387  * byteaGetByte
1388  *
1389  * this routine treats "bytea" as an array of bytes.
1390  * It returns the Nth byte (a number between 0 and 255).
1391  *-------------------------------------------------------------
1392  */
1393 Datum
1394 byteaGetByte(PG_FUNCTION_ARGS)
1395 {
1396         bytea      *v = PG_GETARG_BYTEA_P(0);
1397         int32           n = PG_GETARG_INT32(1);
1398         int                     len;
1399         int                     byte;
1400
1401         len = VARSIZE(v) - VARHDRSZ;
1402
1403         if (n < 0 || n >= len)
1404                 ereport(ERROR,
1405                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1406                                  errmsg("index %d out of valid range, 0..%d",
1407                                                 n, len - 1)));
1408
1409         byte = ((unsigned char *) VARDATA(v))[n];
1410
1411         PG_RETURN_INT32(byte);
1412 }
1413
1414 /*-------------------------------------------------------------
1415  * byteaGetBit
1416  *
1417  * This routine treats a "bytea" type like an array of bits.
1418  * It returns the value of the Nth bit (0 or 1).
1419  *
1420  *-------------------------------------------------------------
1421  */
1422 Datum
1423 byteaGetBit(PG_FUNCTION_ARGS)
1424 {
1425         bytea      *v = PG_GETARG_BYTEA_P(0);
1426         int32           n = PG_GETARG_INT32(1);
1427         int                     byteNo,
1428                                 bitNo;
1429         int                     len;
1430         int                     byte;
1431
1432         len = VARSIZE(v) - VARHDRSZ;
1433
1434         if (n < 0 || n >= len * 8)
1435                 ereport(ERROR,
1436                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1437                                  errmsg("index %d out of valid range, 0..%d",
1438                                                 n, len * 8 - 1)));
1439
1440         byteNo = n / 8;
1441         bitNo = n % 8;
1442
1443         byte = ((unsigned char *) VARDATA(v))[byteNo];
1444
1445         if (byte & (1 << bitNo))
1446                 PG_RETURN_INT32(1);
1447         else
1448                 PG_RETURN_INT32(0);
1449 }
1450
1451 /*-------------------------------------------------------------
1452  * byteaSetByte
1453  *
1454  * Given an instance of type 'bytea' creates a new one with
1455  * the Nth byte set to the given value.
1456  *
1457  *-------------------------------------------------------------
1458  */
1459 Datum
1460 byteaSetByte(PG_FUNCTION_ARGS)
1461 {
1462         bytea      *v = PG_GETARG_BYTEA_P(0);
1463         int32           n = PG_GETARG_INT32(1);
1464         int32           newByte = PG_GETARG_INT32(2);
1465         int                     len;
1466         bytea      *res;
1467
1468         len = VARSIZE(v) - VARHDRSZ;
1469
1470         if (n < 0 || n >= len)
1471                 ereport(ERROR,
1472                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1473                                  errmsg("index %d out of valid range, 0..%d",
1474                                                 n, len - 1)));
1475
1476         /*
1477          * Make a copy of the original varlena.
1478          */
1479         res = (bytea *) palloc(VARSIZE(v));
1480         memcpy((char *) res, (char *) v, VARSIZE(v));
1481
1482         /*
1483          * Now set the byte.
1484          */
1485         ((unsigned char *) VARDATA(res))[n] = newByte;
1486
1487         PG_RETURN_BYTEA_P(res);
1488 }
1489
1490 /*-------------------------------------------------------------
1491  * byteaSetBit
1492  *
1493  * Given an instance of type 'bytea' creates a new one with
1494  * the Nth bit set to the given value.
1495  *
1496  *-------------------------------------------------------------
1497  */
1498 Datum
1499 byteaSetBit(PG_FUNCTION_ARGS)
1500 {
1501         bytea      *v = PG_GETARG_BYTEA_P(0);
1502         int32           n = PG_GETARG_INT32(1);
1503         int32           newBit = PG_GETARG_INT32(2);
1504         bytea      *res;
1505         int                     len;
1506         int                     oldByte,
1507                                 newByte;
1508         int                     byteNo,
1509                                 bitNo;
1510
1511         len = VARSIZE(v) - VARHDRSZ;
1512
1513         if (n < 0 || n >= len * 8)
1514                 ereport(ERROR,
1515                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1516                                  errmsg("index %d out of valid range, 0..%d",
1517                                                 n, len * 8 - 1)));
1518
1519         byteNo = n / 8;
1520         bitNo = n % 8;
1521
1522         /*
1523          * sanity check!
1524          */
1525         if (newBit != 0 && newBit != 1)
1526                 ereport(ERROR,
1527                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1528                                  errmsg("new bit must be 0 or 1")));
1529
1530         /*
1531          * Make a copy of the original varlena.
1532          */
1533         res = (bytea *) palloc(VARSIZE(v));
1534         memcpy((char *) res, (char *) v, VARSIZE(v));
1535
1536         /*
1537          * Update the byte.
1538          */
1539         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1540
1541         if (newBit == 0)
1542                 newByte = oldByte & (~(1 << bitNo));
1543         else
1544                 newByte = oldByte | (1 << bitNo);
1545
1546         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1547
1548         PG_RETURN_BYTEA_P(res);
1549 }
1550
1551
1552 /* text_name()
1553  * Converts a text type to a Name type.
1554  */
1555 Datum
1556 text_name(PG_FUNCTION_ARGS)
1557 {
1558         text       *s = PG_GETARG_TEXT_P(0);
1559         Name            result;
1560         int                     len;
1561
1562         len = VARSIZE(s) - VARHDRSZ;
1563
1564         /* Truncate oversize input */
1565         if (len >= NAMEDATALEN)
1566                 len = NAMEDATALEN - 1;
1567
1568 #ifdef STRINGDEBUG
1569         printf("text- convert string length %d (%d) ->%d\n",
1570                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1571 #endif
1572
1573         result = (Name) palloc(NAMEDATALEN);
1574         memcpy(NameStr(*result), VARDATA(s), len);
1575
1576         /* now null pad to full length... */
1577         while (len < NAMEDATALEN)
1578         {
1579                 *(NameStr(*result) + len) = '\0';
1580                 len++;
1581         }
1582
1583         PG_RETURN_NAME(result);
1584 }
1585
1586 /* name_text()
1587  * Converts a Name type to a text type.
1588  */
1589 Datum
1590 name_text(PG_FUNCTION_ARGS)
1591 {
1592         Name            s = PG_GETARG_NAME(0);
1593         text       *result;
1594         int                     len;
1595
1596         len = strlen(NameStr(*s));
1597
1598 #ifdef STRINGDEBUG
1599         printf("text- convert string length %d (%d) ->%d\n",
1600                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1601 #endif
1602
1603         result = palloc(VARHDRSZ + len);
1604         VARATT_SIZEP(result) = VARHDRSZ + len;
1605         memcpy(VARDATA(result), NameStr(*s), len);
1606
1607         PG_RETURN_TEXT_P(result);
1608 }
1609
1610
1611 /*
1612  * textToQualifiedNameList - convert a text object to list of names
1613  *
1614  * This implements the input parsing needed by nextval() and other
1615  * functions that take a text parameter representing a qualified name.
1616  * We split the name at dots, downcase if not double-quoted, and
1617  * truncate names if they're too long.
1618  */
1619 List *
1620 textToQualifiedNameList(text *textval, const char *caller)
1621 {
1622         char       *rawname;
1623         List       *result = NIL;
1624         List       *namelist;
1625         List       *l;
1626
1627         /* Convert to C string (handles possible detoasting). */
1628         /* Note we rely on being able to modify rawname below. */
1629         rawname = DatumGetCString(DirectFunctionCall1(textout,
1630                                                                                           PointerGetDatum(textval)));
1631
1632         if (!SplitIdentifierString(rawname, '.', &namelist))
1633                 ereport(ERROR,
1634                                 (errcode(ERRCODE_INVALID_NAME),
1635                                  errmsg("invalid name syntax")));
1636
1637         if (namelist == NIL)
1638                 ereport(ERROR,
1639                                 (errcode(ERRCODE_INVALID_NAME),
1640                                  errmsg("invalid name syntax")));
1641
1642         foreach(l, namelist)
1643         {
1644                 char       *curname = (char *) lfirst(l);
1645
1646                 result = lappend(result, makeString(pstrdup(curname)));
1647         }
1648
1649         pfree(rawname);
1650         freeList(namelist);
1651
1652         return result;
1653 }
1654
1655 /*
1656  * SplitIdentifierString --- parse a string containing identifiers
1657  *
1658  * This is the guts of textToQualifiedNameList, and is exported for use in
1659  * other situations such as parsing GUC variables.      In the GUC case, it's
1660  * important to avoid memory leaks, so the API is designed to minimize the
1661  * amount of stuff that needs to be allocated and freed.
1662  *
1663  * Inputs:
1664  *      rawstring: the input string; must be overwritable!      On return, it's
1665  *                         been modified to contain the separated identifiers.
1666  *      separator: the separator punctuation expected between identifiers
1667  *                         (typically '.' or ',').      Whitespace may also appear around
1668  *                         identifiers.
1669  * Outputs:
1670  *      namelist: filled with a palloc'd list of pointers to identifiers within
1671  *                        rawstring.  Caller should freeList() this even on error return.
1672  *
1673  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1674  *
1675  * Note that an empty string is considered okay here, though not in
1676  * textToQualifiedNameList.
1677  */
1678 bool
1679 SplitIdentifierString(char *rawstring, char separator,
1680                                           List **namelist)
1681 {
1682         char       *nextp = rawstring;
1683         bool            done = false;
1684
1685         *namelist = NIL;
1686
1687         while (isspace((unsigned char) *nextp))
1688                 nextp++;                                /* skip leading whitespace */
1689
1690         if (*nextp == '\0')
1691                 return true;                    /* allow empty string */
1692
1693         /* At the top of the loop, we are at start of a new identifier. */
1694         do
1695         {
1696                 char       *curname;
1697                 char       *endp;
1698                 int                     curlen;
1699
1700                 if (*nextp == '\"')
1701                 {
1702                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1703                         curname = nextp + 1;
1704                         for (;;)
1705                         {
1706                                 endp = strchr(nextp + 1, '\"');
1707                                 if (endp == NULL)
1708                                         return false;           /* mismatched quotes */
1709                                 if (endp[1] != '\"')
1710                                         break;          /* found end of quoted name */
1711                                 /* Collapse adjacent quotes into one quote, and look again */
1712                                 memmove(endp, endp + 1, strlen(endp));
1713                                 nextp = endp;
1714                         }
1715                         /* endp now points at the terminating quote */
1716                         nextp = endp + 1;
1717                 }
1718                 else
1719                 {
1720                         /* Unquoted name --- extends to separator or whitespace */
1721                         curname = nextp;
1722                         while (*nextp && *nextp != separator &&
1723                                    !isspace((unsigned char) *nextp))
1724                         {
1725                                 /*
1726                                  * It's important that this match the identifier
1727                                  * downcasing code used by backend/parser/scan.l.
1728                                  */
1729                                 if (isupper((unsigned char) *nextp))
1730                                         *nextp = tolower((unsigned char) *nextp);
1731                                 nextp++;
1732                         }
1733                         endp = nextp;
1734                         if (curname == nextp)
1735                                 return false;   /* empty unquoted name not allowed */
1736                 }
1737
1738                 while (isspace((unsigned char) *nextp))
1739                         nextp++;                        /* skip trailing whitespace */
1740
1741                 if (*nextp == separator)
1742                 {
1743                         nextp++;
1744                         while (isspace((unsigned char) *nextp))
1745                                 nextp++;                /* skip leading whitespace for next */
1746                         /* we expect another name, so done remains false */
1747                 }
1748                 else if (*nextp == '\0')
1749                         done = true;
1750                 else
1751                         return false;           /* invalid syntax */
1752
1753                 /* Now safe to overwrite separator with a null */
1754                 *endp = '\0';
1755
1756                 /* Truncate name if it's overlength; again, should match scan.l */
1757                 curlen = strlen(curname);
1758                 if (curlen >= NAMEDATALEN)
1759                 {
1760                         curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
1761                         curname[curlen] = '\0';
1762                 }
1763
1764                 /*
1765                  * Finished isolating current name --- add it to list
1766                  */
1767                 *namelist = lappend(*namelist, curname);
1768
1769                 /* Loop back if we didn't reach end of string */
1770         } while (!done);
1771
1772         return true;
1773 }
1774
1775
1776 /*****************************************************************************
1777  *      Comparison Functions used for bytea
1778  *
1779  * Note: btree indexes need these routines not to leak memory; therefore,
1780  * be careful to free working copies of toasted datums.  Most places don't
1781  * need to be so careful.
1782  *****************************************************************************/
1783
1784 Datum
1785 byteaeq(PG_FUNCTION_ARGS)
1786 {
1787         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1788         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1789         int                     len1,
1790                                 len2;
1791         bool            result;
1792
1793         len1 = VARSIZE(arg1) - VARHDRSZ;
1794         len2 = VARSIZE(arg2) - VARHDRSZ;
1795
1796         /* fast path for different-length inputs */
1797         if (len1 != len2)
1798                 result = false;
1799         else
1800                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1801
1802         PG_FREE_IF_COPY(arg1, 0);
1803         PG_FREE_IF_COPY(arg2, 1);
1804
1805         PG_RETURN_BOOL(result);
1806 }
1807
1808 Datum
1809 byteane(PG_FUNCTION_ARGS)
1810 {
1811         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1812         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1813         int                     len1,
1814                                 len2;
1815         bool            result;
1816
1817         len1 = VARSIZE(arg1) - VARHDRSZ;
1818         len2 = VARSIZE(arg2) - VARHDRSZ;
1819
1820         /* fast path for different-length inputs */
1821         if (len1 != len2)
1822                 result = true;
1823         else
1824                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1825
1826         PG_FREE_IF_COPY(arg1, 0);
1827         PG_FREE_IF_COPY(arg2, 1);
1828
1829         PG_RETURN_BOOL(result);
1830 }
1831
1832 Datum
1833 bytealt(PG_FUNCTION_ARGS)
1834 {
1835         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1836         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1837         int                     len1,
1838                                 len2;
1839         int                     cmp;
1840
1841         len1 = VARSIZE(arg1) - VARHDRSZ;
1842         len2 = VARSIZE(arg2) - VARHDRSZ;
1843
1844         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1845
1846         PG_FREE_IF_COPY(arg1, 0);
1847         PG_FREE_IF_COPY(arg2, 1);
1848
1849         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1850 }
1851
1852 Datum
1853 byteale(PG_FUNCTION_ARGS)
1854 {
1855         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1856         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1857         int                     len1,
1858                                 len2;
1859         int                     cmp;
1860
1861         len1 = VARSIZE(arg1) - VARHDRSZ;
1862         len2 = VARSIZE(arg2) - VARHDRSZ;
1863
1864         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1865
1866         PG_FREE_IF_COPY(arg1, 0);
1867         PG_FREE_IF_COPY(arg2, 1);
1868
1869         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1870 }
1871
1872 Datum
1873 byteagt(PG_FUNCTION_ARGS)
1874 {
1875         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1876         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1877         int                     len1,
1878                                 len2;
1879         int                     cmp;
1880
1881         len1 = VARSIZE(arg1) - VARHDRSZ;
1882         len2 = VARSIZE(arg2) - VARHDRSZ;
1883
1884         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1885
1886         PG_FREE_IF_COPY(arg1, 0);
1887         PG_FREE_IF_COPY(arg2, 1);
1888
1889         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1890 }
1891
1892 Datum
1893 byteage(PG_FUNCTION_ARGS)
1894 {
1895         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1896         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1897         int                     len1,
1898                                 len2;
1899         int                     cmp;
1900
1901         len1 = VARSIZE(arg1) - VARHDRSZ;
1902         len2 = VARSIZE(arg2) - VARHDRSZ;
1903
1904         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1905
1906         PG_FREE_IF_COPY(arg1, 0);
1907         PG_FREE_IF_COPY(arg2, 1);
1908
1909         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1910 }
1911
1912 Datum
1913 byteacmp(PG_FUNCTION_ARGS)
1914 {
1915         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1916         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1917         int                     len1,
1918                                 len2;
1919         int                     cmp;
1920
1921         len1 = VARSIZE(arg1) - VARHDRSZ;
1922         len2 = VARSIZE(arg2) - VARHDRSZ;
1923
1924         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1925         if ((cmp == 0) && (len1 != len2))
1926                 cmp = (len1 < len2) ? -1 : 1;
1927
1928         PG_FREE_IF_COPY(arg1, 0);
1929         PG_FREE_IF_COPY(arg2, 1);
1930
1931         PG_RETURN_INT32(cmp);
1932 }
1933
1934 /*
1935  * replace_text
1936  * replace all occurrences of 'old_sub_str' in 'orig_str'
1937  * with 'new_sub_str' to form 'new_str'
1938  *
1939  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1940  * otherwise returns 'new_str'
1941  */
1942 Datum
1943 replace_text(PG_FUNCTION_ARGS)
1944 {
1945         text       *src_text = PG_GETARG_TEXT_P(0);
1946         text       *from_sub_text = PG_GETARG_TEXT_P(1);
1947         text       *to_sub_text = PG_GETARG_TEXT_P(2);
1948         int                     src_text_len = TEXTLEN(src_text);
1949         int                     from_sub_text_len = TEXTLEN(from_sub_text);
1950         char       *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1951         text       *left_text;
1952         text       *right_text;
1953         text       *buf_text;
1954         text       *ret_text;
1955         int                     curr_posn;
1956         StringInfo      str = makeStringInfo();
1957
1958         if (src_text_len == 0 || from_sub_text_len == 0)
1959                 PG_RETURN_TEXT_P(src_text);
1960
1961         buf_text = TEXTDUP(src_text);
1962         curr_posn = TEXTPOS(buf_text, from_sub_text);
1963
1964         while (curr_posn > 0)
1965         {
1966                 left_text = LEFT(buf_text, from_sub_text);
1967                 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1968
1969                 appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
1970                 appendStringInfoString(str, to_sub_str);
1971
1972                 pfree(buf_text);
1973                 pfree(left_text);
1974                 buf_text = right_text;
1975                 curr_posn = TEXTPOS(buf_text, from_sub_text);
1976         }
1977
1978         appendStringInfoString(str, PG_TEXT_GET_STR(buf_text));
1979         pfree(buf_text);
1980
1981         ret_text = PG_STR_GET_TEXT(str->data);
1982         pfree(str->data);
1983         pfree(str);
1984
1985         PG_RETURN_TEXT_P(ret_text);
1986 }
1987
1988 /*
1989  * split_text
1990  * parse input string
1991  * return ord item (1 based)
1992  * based on provided field separator
1993  */
1994 Datum
1995 split_text(PG_FUNCTION_ARGS)
1996 {
1997         text       *inputstring = PG_GETARG_TEXT_P(0);
1998         text       *fldsep = PG_GETARG_TEXT_P(1);
1999         int                     fldnum = PG_GETARG_INT32(2);
2000         int                     inputstring_len = TEXTLEN(inputstring);
2001         int                     fldsep_len = TEXTLEN(fldsep);
2002         int                     start_posn;
2003         int                     end_posn;
2004         text       *result_text;
2005
2006         /* field number is 1 based */
2007         if (fldnum < 1)
2008                 ereport(ERROR,
2009                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2010                                  errmsg("field position must be greater than zero")));
2011
2012         /* return empty string for empty input string */
2013         if (inputstring_len < 1)
2014                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2015
2016         /* empty field separator */
2017         if (fldsep_len < 1)
2018         {
2019                 /* if first field, return input string, else empty string */
2020                 if (fldnum == 1)
2021                         PG_RETURN_TEXT_P(inputstring);
2022                 else
2023                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2024         }
2025
2026         start_posn = text_position(inputstring, fldsep, fldnum - 1);
2027         end_posn = text_position(inputstring, fldsep, fldnum);
2028
2029         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
2030         {
2031                 /* if first field, return input string, else empty string */
2032                 if (fldnum == 1)
2033                         PG_RETURN_TEXT_P(inputstring);
2034                 else
2035                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2036         }
2037         else if (start_posn == 0)
2038         {
2039                 /* first field requested */
2040                 result_text = LEFT(inputstring, fldsep);
2041                 PG_RETURN_TEXT_P(result_text);
2042         }
2043         else if (end_posn == 0)
2044         {
2045                 /* last field requested */
2046                 result_text = text_substring(PointerGetDatum(inputstring),
2047                                                                          start_posn + fldsep_len,
2048                                                                          -1, true);
2049                 PG_RETURN_TEXT_P(result_text);
2050         }
2051         else
2052         {
2053                 /* interior field requested */
2054                 result_text = text_substring(PointerGetDatum(inputstring),
2055                                                                          start_posn + fldsep_len,
2056                                                                          end_posn - start_posn - fldsep_len,
2057                                                                          false);
2058                 PG_RETURN_TEXT_P(result_text);
2059         }
2060 }
2061
2062 /*
2063  * text_to_array
2064  * parse input string
2065  * return text array of elements
2066  * based on provided field separator
2067  */
2068 Datum
2069 text_to_array(PG_FUNCTION_ARGS)
2070 {
2071         text       *inputstring = PG_GETARG_TEXT_P(0);
2072         text       *fldsep = PG_GETARG_TEXT_P(1);
2073         int                     inputstring_len = TEXTLEN(inputstring);
2074         int                     fldsep_len = TEXTLEN(fldsep);
2075         int                     fldnum;
2076         int                     start_posn;
2077         int                     end_posn;
2078         text       *result_text;
2079         ArrayBuildState *astate = NULL;
2080
2081         /* return NULL for empty input string */
2082         if (inputstring_len < 1)
2083                 PG_RETURN_NULL();
2084
2085         /*
2086          * empty field separator return one element, 1D, array using the input
2087          * string
2088          */
2089         if (fldsep_len < 1)
2090                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2091                                                                            CStringGetDatum(inputstring), 1));
2092
2093         /* start with end position holding the initial start position */
2094         end_posn = 0;
2095         for (fldnum = 1;; fldnum++) /* field number is 1 based */
2096         {
2097                 Datum           dvalue;
2098                 bool            disnull = false;
2099
2100                 start_posn = end_posn;
2101                 end_posn = text_position(inputstring, fldsep, fldnum);
2102
2103                 if ((start_posn == 0) && (end_posn == 0))               /* fldsep not found */
2104                 {
2105                         if (fldnum == 1)
2106                         {
2107                                 /*
2108                                  * first element return one element, 1D, array using the
2109                                  * input string
2110                                  */
2111                                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2112                                                                            CStringGetDatum(inputstring), 1));
2113                         }
2114                         else
2115                         {
2116                                 /* otherwise create array and exit */
2117                                 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2118                                                                                                           CurrentMemoryContext));
2119                         }
2120                 }
2121                 else if (start_posn == 0)
2122                 {
2123                         /* first field requested */
2124                         result_text = LEFT(inputstring, fldsep);
2125                 }
2126                 else if (end_posn == 0)
2127                 {
2128                         /* last field requested */
2129                         result_text = text_substring(PointerGetDatum(inputstring),
2130                                                                                  start_posn + fldsep_len,
2131                                                                                  -1, true);
2132                 }
2133                 else
2134                 {
2135                         /* interior field requested */
2136                         result_text = text_substring(PointerGetDatum(inputstring),
2137                                                                                  start_posn + fldsep_len,
2138                                                                                  end_posn - start_posn - fldsep_len,
2139                                                                                  false);
2140                 }
2141
2142                 /* stash away current value */
2143                 dvalue = PointerGetDatum(result_text);
2144                 astate = accumArrayResult(astate, dvalue,
2145                                                                   disnull, TEXTOID,
2146                                                                   CurrentMemoryContext);
2147         }
2148
2149         /* never reached -- keep compiler quiet */
2150         PG_RETURN_NULL();
2151 }
2152
2153 /*
2154  * array_to_text
2155  * concatenate Cstring representation of input array elements
2156  * using provided field separator
2157  */
2158 Datum
2159 array_to_text(PG_FUNCTION_ARGS)
2160 {
2161         ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
2162         char       *fldsep = PG_TEXTARG_GET_STR(1);
2163         int                     nitems,
2164                            *dims,
2165                                 ndims;
2166         char       *p;
2167         Oid                     element_type;
2168         int                     typlen;
2169         bool            typbyval;
2170         char            typalign;
2171         Oid                     typelem;
2172         StringInfo      result_str = makeStringInfo();
2173         int                     i;
2174         ArrayMetaState *my_extra;
2175
2176         p = ARR_DATA_PTR(v);
2177         ndims = ARR_NDIM(v);
2178         dims = ARR_DIMS(v);
2179         nitems = ArrayGetNItems(ndims, dims);
2180
2181         /* if there are no elements, return an empty string */
2182         if (nitems == 0)
2183                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2184
2185         element_type = ARR_ELEMTYPE(v);
2186
2187         /*
2188          * We arrange to look up info about element type, including its output
2189          * conversion proc, only once per series of calls, assuming the
2190          * element type doesn't change underneath us.
2191          */
2192         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2193         if (my_extra == NULL)
2194         {
2195                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2196                                                                                                  sizeof(ArrayMetaState));
2197                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2198                 my_extra->element_type = InvalidOid;
2199         }
2200
2201         if (my_extra->element_type != element_type)
2202         {
2203                 /*
2204                  * Get info about element type, including its output conversion
2205                  * proc
2206                  */
2207                 get_type_io_data(element_type, IOFunc_output,
2208                                                  &my_extra->typlen, &my_extra->typbyval,
2209                                                  &my_extra->typalign, &my_extra->typdelim,
2210                                                  &my_extra->typelem, &my_extra->typiofunc);
2211                 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2212                                           fcinfo->flinfo->fn_mcxt);
2213                 my_extra->element_type = element_type;
2214         }
2215         typlen = my_extra->typlen;
2216         typbyval = my_extra->typbyval;
2217         typalign = my_extra->typalign;
2218         typelem = my_extra->typelem;
2219
2220         for (i = 0; i < nitems; i++)
2221         {
2222                 Datum           itemvalue;
2223                 char       *value;
2224
2225                 itemvalue = fetch_att(p, typbyval, typlen);
2226
2227                 value = DatumGetCString(FunctionCall3(&my_extra->proc,
2228                                                                                           itemvalue,
2229                                                                                           ObjectIdGetDatum(typelem),
2230                                                                                           Int32GetDatum(-1)));
2231
2232                 if (i > 0)
2233                         appendStringInfo(result_str, "%s%s", fldsep, value);
2234                 else
2235                         appendStringInfo(result_str, "%s", value);
2236
2237                 p = att_addlength(p, typlen, PointerGetDatum(p));
2238                 p = (char *) att_align(p, typalign);
2239         }
2240
2241         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2242 }
2243
2244 #define HEXBASE 16
2245 /*
2246  * Convert a int32 to a string containing a base 16 (hex) representation of
2247  * the number.
2248  */
2249 Datum
2250 to_hex32(PG_FUNCTION_ARGS)
2251 {
2252         uint32          value = (uint32) PG_GETARG_INT32(0);
2253         text       *result_text;
2254         char       *ptr;
2255         const char *digits = "0123456789abcdef";
2256         char            buf[32];                /* bigger than needed, but reasonable */
2257
2258         ptr = buf + sizeof(buf) - 1;
2259         *ptr = '\0';
2260
2261         do
2262         {
2263                 *--ptr = digits[value % HEXBASE];
2264                 value /= HEXBASE;
2265         } while (ptr > buf && value);
2266
2267         result_text = PG_STR_GET_TEXT(ptr);
2268         PG_RETURN_TEXT_P(result_text);
2269 }
2270
2271 /*
2272  * Convert a int64 to a string containing a base 16 (hex) representation of
2273  * the number.
2274  */
2275 Datum
2276 to_hex64(PG_FUNCTION_ARGS)
2277 {
2278         uint64          value = (uint64) PG_GETARG_INT64(0);
2279         text       *result_text;
2280         char       *ptr;
2281         const char *digits = "0123456789abcdef";
2282         char            buf[32];                /* bigger than needed, but reasonable */
2283
2284         ptr = buf + sizeof(buf) - 1;
2285         *ptr = '\0';
2286
2287         do
2288         {
2289                 *--ptr = digits[value % HEXBASE];
2290                 value /= HEXBASE;
2291         } while (ptr > buf && value);
2292
2293         result_text = PG_STR_GET_TEXT(ptr);
2294         PG_RETURN_TEXT_P(result_text);
2295 }
2296
2297 /*
2298  * Create an md5 hash of a text string and return it as hex
2299  *
2300  * md5 produces a 16 byte (128 bit) hash; double it for hex
2301  */
2302 #define MD5_HASH_LEN  32
2303
2304 Datum
2305 md5_text(PG_FUNCTION_ARGS)
2306 {
2307         char       *buff = PG_TEXT_GET_STR(PG_GETARG_TEXT_P(0));
2308         size_t          len = strlen(buff);
2309         char       *hexsum;
2310         text       *result_text;
2311
2312         /* leave room for the terminating '\0' */
2313         hexsum = (char *) palloc(MD5_HASH_LEN + 1);
2314
2315         /* get the hash result */
2316         md5_hash((void *) buff, len, hexsum);
2317
2318         /* convert to text and return it */
2319         result_text = PG_STR_GET_TEXT(hexsum);
2320         PG_RETURN_TEXT_P(result_text);
2321 }