granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.99 2003/06/24 23:14:46 momjian Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "mb/pg_wchar.h"
  20 #include "miscadmin.h"
  21 #include "access/tuptoaster.h"
  22 #include "catalog/pg_type.h"
  23 #include "lib/stringinfo.h"
  24 #include "libpq/crypt.h"
  25 #include "libpq/pqformat.h"
  26 #include "utils/array.h"
  27 #include "utils/builtins.h"
  28 #include "utils/pg_locale.h"
  29 #include "utils/lsyscache.h"
  30
  31
  32 typedef struct varlena unknown;
  33
  34 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
  35 #define DatumGetUnknownPCopy(X)         ((unknown *) PG_DETOAST_DATUM_COPY(X))
  36 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
  37 #define PG_GETARG_UNKNOWN_P_COPY(n)     DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
  38 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
  39
  40 #define PG_TEXTARG_GET_STR(arg_) \
  41         DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
  42 #define PG_TEXT_GET_STR(textp_) \
  43         DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
  44 #define PG_STR_GET_TEXT(str_) \
  45         DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
  46 #define TEXTLEN(textp) \
  47         text_length(PointerGetDatum(textp))
  48 #define TEXTPOS(buf_text, from_sub_text) \
  49         text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
  50 #define TEXTDUP(textp) \
  51         DatumGetTextPCopy(PointerGetDatum(textp))
  52 #define LEFT(buf_text, from_sub_text) \
  53         text_substring(PointerGetDatum(buf_text), \
  54                                         1, \
  55                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
  56 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
  57         text_substring(PointerGetDatum(buf_text), \
  58                                         TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
  59                                         -1, true)
  60
  61 static int      text_cmp(text *arg1, text *arg2);
  62 static int32 text_length(Datum str);
  63 static int32 text_position(Datum str, Datum search_str, int matchnum);
  64 static text *text_substring(Datum str,
  65                            int32 start,
  66                            int32 length,
  67                            bool length_not_specified);
  68
  69
  70 /*****************************************************************************
  71  *       USER I/O ROUTINES                                                                                                               *
  72  *****************************************************************************/
  73
  74
  75 #define VAL(CH)                 ((CH) - '0')
  76 #define DIG(VAL)                ((VAL) + '0')
  77
  78 /*
  79  *              byteain                 - converts from printable representation of byte array
  80  *
  81  *              Non-printable characters must be passed as '\nnn' (octal) and are
  82  *              converted to internal form.  '\' must be passed as '\\'.
  83  *              elog(ERROR, ...) if bad form.
  84  *
  85  *              BUGS:
  86  *                              The input is scaned twice.
  87  *                              The error checking of input is minimal.
  88  */
  89 Datum
  90 byteain(PG_FUNCTION_ARGS)
  91 {
  92         char       *inputText = PG_GETARG_CSTRING(0);
  93         char       *tp;
  94         char       *rp;
  95         int                     byte;
  96         bytea      *result;
  97
  98         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
  99         {
 100                 if (tp[0] != '\\')
 101                         tp++;
 102                 else if ((tp[0] == '\\') &&
 103                                  (tp[1] >= '0' && tp[1] <= '3') &&
 104                                  (tp[2] >= '0' && tp[2] <= '7') &&
 105                                  (tp[3] >= '0' && tp[3] <= '7'))
 106                         tp += 4;
 107                 else if ((tp[0] == '\\') &&
 108                                  (tp[1] == '\\'))
 109                         tp += 2;
 110                 else
 111                 {
 112                         /*
 113                          * one backslash, not followed by 0 or ### valid octal
 114                          */
 115                         elog(ERROR, "Bad input string for type bytea");
 116                 }
 117         }
 118
 119         byte += VARHDRSZ;
 120         result = (bytea *) palloc(byte);
 121         VARATT_SIZEP(result) = byte;            /* set varlena length */
 122
 123         tp = inputText;
 124         rp = VARDATA(result);
 125         while (*tp != '\0')
 126         {
 127                 if (tp[0] != '\\')
 128                         *rp++ = *tp++;
 129                 else if ((tp[0] == '\\') &&
 130                                  (tp[1] >= '0' && tp[1] <= '3') &&
 131                                  (tp[2] >= '0' && tp[2] <= '7') &&
 132                                  (tp[3] >= '0' && tp[3] <= '7'))
 133                 {
 134                         byte = VAL(tp[1]);
 135                         byte <<= 3;
 136                         byte += VAL(tp[2]);
 137                         byte <<= 3;
 138                         *rp++ = byte + VAL(tp[3]);
 139                         tp += 4;
 140                 }
 141                 else if ((tp[0] == '\\') &&
 142                                  (tp[1] == '\\'))
 143                 {
 144                         *rp++ = '\\';
 145                         tp += 2;
 146                 }
 147                 else
 148                 {
 149                         /*
 150                          * We should never get here. The first pass should not allow
 151                          * it.
 152                          */
 153                         elog(ERROR, "Bad input string for type bytea");
 154                 }
 155         }
 156
 157         PG_RETURN_BYTEA_P(result);
 158 }
 159
 160 /*
 161  *              byteaout                - converts to printable representation of byte array
 162  *
 163  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 164  *              '\\'.
 165  *
 166  *              NULL vlena should be an error--returning string with NULL for now.
 167  */
 168 Datum
 169 byteaout(PG_FUNCTION_ARGS)
 170 {
 171         bytea      *vlena = PG_GETARG_BYTEA_P(0);
 172         char       *result;
 173         char       *vp;
 174         char       *rp;
 175         int                     val;                    /* holds unprintable chars */
 176         int                     i;
 177         int                     len;
 178
 179         len = 1;                                        /* empty string has 1 char */
 180         vp = VARDATA(vlena);
 181         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 182         {
 183                 if (*vp == '\\')
 184                         len += 2;
 185                 else if (isprint((unsigned char) *vp))
 186                         len++;
 187                 else
 188                         len += 4;
 189         }
 190         rp = result = (char *) palloc(len);
 191         vp = VARDATA(vlena);
 192         for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
 193         {
 194                 if (*vp == '\\')
 195                 {
 196                         *rp++ = '\\';
 197                         *rp++ = '\\';
 198                 }
 199                 else if (isprint((unsigned char) *vp))
 200                         *rp++ = *vp;
 201                 else
 202                 {
 203                         val = *vp;
 204                         rp[0] = '\\';
 205                         rp[3] = DIG(val & 07);
 206                         val >>= 3;
 207                         rp[2] = DIG(val & 07);
 208                         val >>= 3;
 209                         rp[1] = DIG(val & 03);
 210                         rp += 4;
 211                 }
 212         }
 213         *rp = '\0';
 214         PG_RETURN_CSTRING(result);
 215 }
 216
 217 /*
 218  *              bytearecv                       - converts external binary format to bytea
 219  */
 220 Datum
 221 bytearecv(PG_FUNCTION_ARGS)
 222 {
 223         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 224         bytea      *result;
 225         int                     nbytes;
 226
 227         nbytes = buf->len - buf->cursor;
 228         result = (bytea *) palloc(nbytes + VARHDRSZ);
 229         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 230         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 231         PG_RETURN_BYTEA_P(result);
 232 }
 233
 234 /*
 235  *              byteasend                       - converts bytea to binary format
 236  *
 237  * This is a special case: just copy the input...
 238  */
 239 Datum
 240 byteasend(PG_FUNCTION_ARGS)
 241 {
 242         bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
 243
 244         PG_RETURN_BYTEA_P(vlena);
 245 }
 246
 247
 248 /*
 249  *              textin                  - converts "..." to internal representation
 250  */
 251 Datum
 252 textin(PG_FUNCTION_ARGS)
 253 {
 254         char       *inputText = PG_GETARG_CSTRING(0);
 255         text       *result;
 256         int                     len;
 257
 258         char       *ermsg;
 259
 260         len = strlen(inputText) + VARHDRSZ;
 261
 262         if ((ermsg = pg_verifymbstr(inputText, len - VARHDRSZ)))
 263                 elog(ERROR, "%s", ermsg);
 264
 265         result = (text *) palloc(len);
 266         VARATT_SIZEP(result) = len;
 267
 268         memcpy(VARDATA(result), inputText, len - VARHDRSZ);
 269
 270 #ifdef CYR_RECODE
 271         convertstr(VARDATA(result), len - VARHDRSZ, 0);
 272 #endif
 273
 274         PG_RETURN_TEXT_P(result);
 275 }
 276
 277 /*
 278  *              textout                 - converts internal representation to "..."
 279  */
 280 Datum
 281 textout(PG_FUNCTION_ARGS)
 282 {
 283         text       *t = PG_GETARG_TEXT_P(0);
 284         int                     len;
 285         char       *result;
 286
 287         len = VARSIZE(t) - VARHDRSZ;
 288         result = (char *) palloc(len + 1);
 289         memcpy(result, VARDATA(t), len);
 290         result[len] = '\0';
 291
 292 #ifdef CYR_RECODE
 293         convertstr(result, len, 1);
 294 #endif
 295
 296         PG_RETURN_CSTRING(result);
 297 }
 298
 299 /*
 300  *              textrecv                        - converts external binary format to text
 301  */
 302 Datum
 303 textrecv(PG_FUNCTION_ARGS)
 304 {
 305         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 306         text       *result;
 307         char       *str;
 308         int                     nbytes;
 309
 310         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 311         result = (text *) palloc(nbytes + VARHDRSZ);
 312         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 313         memcpy(VARDATA(result), str, nbytes);
 314         pfree(str);
 315         PG_RETURN_TEXT_P(result);
 316 }
 317
 318 /*
 319  *              textsend                        - converts text to binary format
 320  */
 321 Datum
 322 textsend(PG_FUNCTION_ARGS)
 323 {
 324         text       *t = PG_GETARG_TEXT_P(0);
 325         StringInfoData buf;
 326
 327         pq_begintypsend(&buf);
 328         pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
 329         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 330 }
 331
 332
 333 /*
 334  *              unknownin                       - converts "..." to internal representation
 335  */
 336 Datum
 337 unknownin(PG_FUNCTION_ARGS)
 338 {
 339         char       *inputStr = PG_GETARG_CSTRING(0);
 340         unknown    *result;
 341         int                     len;
 342
 343         len = strlen(inputStr) + VARHDRSZ;
 344
 345         result = (unknown *) palloc(len);
 346         VARATT_SIZEP(result) = len;
 347
 348         memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
 349
 350         PG_RETURN_UNKNOWN_P(result);
 351 }
 352
 353 /*
 354  *              unknownout                      - converts internal representation to "..."
 355  */
 356 Datum
 357 unknownout(PG_FUNCTION_ARGS)
 358 {
 359         unknown    *t = PG_GETARG_UNKNOWN_P(0);
 360         int                     len;
 361         char       *result;
 362
 363         len = VARSIZE(t) - VARHDRSZ;
 364         result = (char *) palloc(len + 1);
 365         memcpy(result, VARDATA(t), len);
 366         result[len] = '\0';
 367
 368         PG_RETURN_CSTRING(result);
 369 }
 370
 371 /*
 372  *              unknownrecv                     - converts external binary format to unknown
 373  */
 374 Datum
 375 unknownrecv(PG_FUNCTION_ARGS)
 376 {
 377         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 378         unknown    *result;
 379         int                     nbytes;
 380
 381         nbytes = buf->len - buf->cursor;
 382         result = (unknown *) palloc(nbytes + VARHDRSZ);
 383         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 384         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 385         PG_RETURN_UNKNOWN_P(result);
 386 }
 387
 388 /*
 389  *              unknownsend                     - converts unknown to binary format
 390  *
 391  * This is a special case: just copy the input, since it's
 392  * effectively the same format as bytea
 393  */
 394 Datum
 395 unknownsend(PG_FUNCTION_ARGS)
 396 {
 397         unknown    *vlena = PG_GETARG_UNKNOWN_P_COPY(0);
 398
 399         PG_RETURN_UNKNOWN_P(vlena);
 400 }
 401
 402
 403 /* ========== PUBLIC ROUTINES ========== */
 404
 405 /*
 406  * textlen -
 407  *        returns the logical length of a text*
 408  *         (which is less than the VARSIZE of the text*)
 409  */
 410 Datum
 411 textlen(PG_FUNCTION_ARGS)
 412 {
 413         PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
 414 }
 415
 416 /*
 417  * text_length -
 418  *      Does the real work for textlen()
 419  *      This is broken out so it can be called directly by other string processing
 420  *      functions.
 421  */
 422 static int32
 423 text_length(Datum str)
 424 {
 425         /* fastpath when max encoding length is one */
 426         if (pg_database_encoding_max_length() == 1)
 427                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 428
 429         if (pg_database_encoding_max_length() > 1)
 430         {
 431                 text       *t = DatumGetTextP(str);
 432
 433                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
 434                                                                                          VARSIZE(t) - VARHDRSZ));
 435         }
 436
 437         /* should never get here */
 438         elog(ERROR, "Invalid backend encoding; encoding max length "
 439                  "is less than one.");
 440
 441         /* not reached: suppress compiler warning */
 442         return 0;
 443 }
 444
 445 /*
 446  * textoctetlen -
 447  *        returns the physical length of a text*
 448  *         (which is less than the VARSIZE of the text*)
 449  */
 450 Datum
 451 textoctetlen(PG_FUNCTION_ARGS)
 452 {
 453         PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
 454 }
 455
 456 /*
 457  * textcat -
 458  *        takes two text* and returns a text* that is the concatenation of
 459  *        the two.
 460  *
 461  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 462  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 463  * Allocate space for output in all cases.
 464  * XXX - thomas 1997-07-10
 465  */
 466 Datum
 467 textcat(PG_FUNCTION_ARGS)
 468 {
 469         text       *t1 = PG_GETARG_TEXT_P(0);
 470         text       *t2 = PG_GETARG_TEXT_P(1);
 471         int                     len1,
 472                                 len2,
 473                                 len;
 474         text       *result;
 475         char       *ptr;
 476
 477         len1 = (VARSIZE(t1) - VARHDRSZ);
 478         if (len1 < 0)
 479                 len1 = 0;
 480
 481         len2 = (VARSIZE(t2) - VARHDRSZ);
 482         if (len2 < 0)
 483                 len2 = 0;
 484
 485         len = len1 + len2 + VARHDRSZ;
 486         result = (text *) palloc(len);
 487
 488         /* Set size of result string... */
 489         VARATT_SIZEP(result) = len;
 490
 491         /* Fill data field of result string... */
 492         ptr = VARDATA(result);
 493         if (len1 > 0)
 494                 memcpy(ptr, VARDATA(t1), len1);
 495         if (len2 > 0)
 496                 memcpy(ptr + len1, VARDATA(t2), len2);
 497
 498         PG_RETURN_TEXT_P(result);
 499 }
 500
 501 /*
 502  * text_substr()
 503  * Return a substring starting at the specified position.
 504  * - thomas 1997-12-31
 505  *
 506  * Input:
 507  *      - string
 508  *      - starting position (is one-based)
 509  *      - string length
 510  *
 511  * If the starting position is zero or less, then return from the start of the string
 512  *      adjusting the length to be consistent with the "negative start" per SQL92.
 513  * If the length is less than zero, return the remaining string.
 514  *
 515  * Note that the arguments operate on octet length,
 516  *      so not aware of multibyte character sets.
 517  *
 518  * Added multibyte support.
 519  * - Tatsuo Ishii 1998-4-21
 520  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 521  * Formerly returned the entire string; now returns a portion.
 522  * - Thomas Lockhart 1998-12-10
 523  * Now uses faster TOAST-slicing interface
 524  * - John Gray 2002-02-22
 525  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 526  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 527  * error; if E < 1, return '', not entire string). Fixed MB related bug when
 528  * S > LC and < LC + 4 sometimes garbage characters are returned.
 529  * - Joe Conway 2002-08-10
 530  */
 531 Datum
 532 text_substr(PG_FUNCTION_ARGS)
 533 {
 534         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 535                                                                         PG_GETARG_INT32(1),
 536                                                                         PG_GETARG_INT32(2),
 537                                                                         false));
 538 }
 539
 540 /*
 541  * text_substr_no_len -
 542  *        Wrapper to avoid opr_sanity failure due to
 543  *        one function accepting a different number of args.
 544  */
 545 Datum
 546 text_substr_no_len(PG_FUNCTION_ARGS)
 547 {
 548         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 549                                                                         PG_GETARG_INT32(1),
 550                                                                         -1, true));
 551 }
 552
 553 /*
 554  * text_substring -
 555  *      Does the real work for text_substr() and text_substr_no_len()
 556  *      This is broken out so it can be called directly by other string processing
 557  *      functions.
 558  */
 559 static text *
 560 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 561 {
 562         int32           eml = pg_database_encoding_max_length();
 563         int32           S = start;              /* start position */
 564         int32           S1;                             /* adjusted start position */
 565         int32           L1;                             /* adjusted substring length */
 566
 567         /* life is easy if the encoding max length is 1 */
 568         if (eml == 1)
 569         {
 570                 S1 = Max(S, 1);
 571
 572                 if (length_not_specified)               /* special case - get length to
 573                                                                                  * end of string */
 574                         L1 = -1;
 575                 else
 576                 {
 577                         /* end position */
 578                         int                     E = S + length;
 579
 580                         /*
 581                          * A negative value for L is the only way for the end position
 582                          * to be before the start. SQL99 says to throw an error.
 583                          */
 584                         if (E < S)
 585                                 elog(ERROR, "negative substring length not allowed");
 586
 587                         /*
 588                          * A zero or negative value for the end position can happen if
 589                          * the start was negative or one. SQL99 says to return a
 590                          * zero-length string.
 591                          */
 592                         if (E < 1)
 593                                 return PG_STR_GET_TEXT("");
 594
 595                         L1 = E - S1;
 596                 }
 597
 598                 /*
 599                  * If the start position is past the end of the string, SQL99 says
 600                  * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
 601                  * do that for us. Convert to zero-based starting position
 602                  */
 603                 return DatumGetTextPSlice(str, S1 - 1, L1);
 604         }
 605         else if (eml > 1)
 606         {
 607                 /*
 608                  * When encoding max length is > 1, we can't get LC without
 609                  * detoasting, so we'll grab a conservatively large slice now and
 610                  * go back later to do the right thing
 611                  */
 612                 int32           slice_start;
 613                 int32           slice_size;
 614                 int32           slice_strlen;
 615                 text       *slice;
 616                 int32           E1;
 617                 int32           i;
 618                 char       *p;
 619                 char       *s;
 620                 text       *ret;
 621
 622                 /*
 623                  * if S is past the end of the string, the tuple toaster will
 624                  * return a zero-length string to us
 625                  */
 626                 S1 = Max(S, 1);
 627
 628                 /*
 629                  * We need to start at position zero because there is no way to
 630                  * know in advance which byte offset corresponds to the supplied
 631                  * start position.
 632                  */
 633                 slice_start = 0;
 634
 635                 if (length_not_specified)               /* special case - get length to
 636                                                                                  * end of string */
 637                         slice_size = L1 = -1;
 638                 else
 639                 {
 640                         int                     E = S + length;
 641
 642                         /*
 643                          * A negative value for L is the only way for the end position
 644                          * to be before the start. SQL99 says to throw an error.
 645                          */
 646                         if (E < S)
 647                                 elog(ERROR, "negative substring length not allowed");
 648
 649                         /*
 650                          * A zero or negative value for the end position can happen if
 651                          * the start was negative or one. SQL99 says to return a
 652                          * zero-length string.
 653                          */
 654                         if (E < 1)
 655                                 return PG_STR_GET_TEXT("");
 656
 657                         /*
 658                          * if E is past the end of the string, the tuple toaster will
 659                          * truncate the length for us
 660                          */
 661                         L1 = E - S1;
 662
 663                         /*
 664                          * Total slice size in bytes can't be any longer than the
 665                          * start position plus substring length times the encoding max
 666                          * length.
 667                          */
 668                         slice_size = (S1 + L1) * eml;
 669                 }
 670                 slice = DatumGetTextPSlice(str, slice_start, slice_size);
 671
 672                 /* see if we got back an empty string */
 673                 if ((VARSIZE(slice) - VARHDRSZ) == 0)
 674                         return PG_STR_GET_TEXT("");
 675
 676                 /* Now we can get the actual length of the slice in MB characters */
 677                 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
 678
 679                 /*
 680                  * Check that the start position wasn't > slice_strlen. If so,
 681                  * SQL99 says to return a zero-length string.
 682                  */
 683                 if (S1 > slice_strlen)
 684                         return PG_STR_GET_TEXT("");
 685
 686                 /*
 687                  * Adjust L1 and E1 now that we know the slice string length.
 688                  * Again remember that S1 is one based, and slice_start is zero
 689                  * based.
 690                  */
 691                 if (L1 > -1)
 692                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 693                 else
 694                         E1 = slice_start + 1 + slice_strlen;
 695
 696                 /*
 697                  * Find the start position in the slice; remember S1 is not zero
 698                  * based
 699                  */
 700                 p = VARDATA(slice);
 701                 for (i = 0; i < S1 - 1; i++)
 702                         p += pg_mblen(p);
 703
 704                 /* hang onto a pointer to our start position */
 705                 s = p;
 706
 707                 /*
 708                  * Count the actual bytes used by the substring of the requested
 709                  * length.
 710                  */
 711                 for (i = S1; i < E1; i++)
 712                         p += pg_mblen(p);
 713
 714                 ret = (text *) palloc(VARHDRSZ + (p - s));
 715                 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
 716                 memcpy(VARDATA(ret), s, (p - s));
 717
 718                 return ret;
 719         }
 720         else
 721                 elog(ERROR, "Invalid backend encoding; encoding max length "
 722                          "is less than one.");
 723
 724         /* not reached: suppress compiler warning */
 725         return PG_STR_GET_TEXT("");
 726 }
 727
 728 /*
 729  * textpos -
 730  *        Return the position of the specified substring.
 731  *        Implements the SQL92 POSITION() function.
 732  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 733  * - thomas 1997-07-27
 734  */
 735 Datum
 736 textpos(PG_FUNCTION_ARGS)
 737 {
 738         PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
 739 }
 740
 741 /*
 742  * text_position -
 743  *      Does the real work for textpos()
 744  *      This is broken out so it can be called directly by other string processing
 745  *      functions.
 746  */
 747 static int32
 748 text_position(Datum str, Datum search_str, int matchnum)
 749 {
 750         int                     eml = pg_database_encoding_max_length();
 751         text       *t1 = DatumGetTextP(str);
 752         text       *t2 = DatumGetTextP(search_str);
 753         int                     match = 0,
 754                                 pos = 0,
 755                                 p = 0,
 756                                 px,
 757                                 len1,
 758                                 len2;
 759
 760         if (matchnum == 0)
 761                 return 0;                               /* result for 0th match */
 762
 763         if (VARSIZE(t2) <= VARHDRSZ)
 764                 PG_RETURN_INT32(1);             /* result for empty pattern */
 765
 766         len1 = (VARSIZE(t1) - VARHDRSZ);
 767         len2 = (VARSIZE(t2) - VARHDRSZ);
 768
 769         /* no use in searching str past point where search_str will fit */
 770         px = (len1 - len2);
 771
 772         if (eml == 1)                           /* simple case - single byte encoding */
 773         {
 774                 char       *p1,
 775                                    *p2;
 776
 777                 p1 = VARDATA(t1);
 778                 p2 = VARDATA(t2);
 779
 780                 for (p = 0; p <= px; p++)
 781                 {
 782                         if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
 783                         {
 784                                 if (++match == matchnum)
 785                                 {
 786                                         pos = p + 1;
 787                                         break;
 788                                 }
 789                         }
 790                         p1++;
 791                 }
 792         }
 793         else if (eml > 1)                       /* not as simple - multibyte encoding */
 794         {
 795                 pg_wchar   *p1,
 796                                    *p2,
 797                                    *ps1,
 798                                    *ps2;
 799
 800                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 801                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
 802                 len1 = pg_wchar_strlen(p1);
 803                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 804                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
 805                 len2 = pg_wchar_strlen(p2);
 806
 807                 for (p = 0; p <= px; p++)
 808                 {
 809                         if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
 810                         {
 811                                 if (++match == matchnum)
 812                                 {
 813                                         pos = p + 1;
 814                                         break;
 815                                 }
 816                         }
 817                         p1++;
 818                 }
 819
 820                 pfree(ps1);
 821                 pfree(ps2);
 822         }
 823         else
 824                 elog(ERROR, "Invalid backend encoding; encoding max length "
 825                          "is less than one.");
 826
 827         PG_RETURN_INT32(pos);
 828 }
 829
 830 /* varstr_cmp()
 831  * Comparison function for text strings with given lengths.
 832  * Includes locale support, but must copy strings to temporary memory
 833  *      to allow null-termination for inputs to strcoll().
 834  * Returns -1, 0 or 1
 835  */
 836 int
 837 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 838 {
 839         int                     result;
 840
 841         /*
 842          * Unfortunately, there is no strncoll(), so in the non-C locale case
 843          * we have to do some memory copying.  This turns out to be
 844          * significantly slower, so we optimize the case where LC_COLLATE is
 845          * C.  We also try to optimize relatively-short strings by avoiding
 846          * palloc/pfree overhead.
 847          */
 848 #define STACKBUFLEN             1024
 849
 850         if (!lc_collate_is_c())
 851         {
 852                 char    a1buf[STACKBUFLEN];
 853                 char    a2buf[STACKBUFLEN];
 854                 char   *a1p,
 855                            *a2p;
 856
 857                 if (len1 >= STACKBUFLEN)
 858                         a1p = (char *) palloc(len1 + 1);
 859                 else
 860                         a1p = a1buf;
 861                 if (len2 >= STACKBUFLEN)
 862                         a2p = (char *) palloc(len2 + 1);
 863                 else
 864                         a2p = a2buf;
 865
 866                 memcpy(a1p, arg1, len1);
 867                 a1p[len1] = '\0';
 868                 memcpy(a2p, arg2, len2);
 869                 a2p[len2] = '\0';
 870
 871                 result = strcoll(a1p, a2p);
 872
 873                 if (len1 >= STACKBUFLEN)
 874                         pfree(a1p);
 875                 if (len2 >= STACKBUFLEN)
 876                         pfree(a2p);
 877         }
 878         else
 879         {
 880                 result = strncmp(arg1, arg2, Min(len1, len2));
 881                 if ((result == 0) && (len1 != len2))
 882                         result = (len1 < len2) ? -1 : 1;
 883         }
 884
 885         return result;
 886 }
 887
 888
 889 /* text_cmp()
 890  * Internal comparison function for text strings.
 891  * Returns -1, 0 or 1
 892  */
 893 static int
 894 text_cmp(text *arg1, text *arg2)
 895 {
 896         char       *a1p,
 897                            *a2p;
 898         int                     len1,
 899                                 len2;
 900
 901         a1p = VARDATA(arg1);
 902         a2p = VARDATA(arg2);
 903
 904         len1 = VARSIZE(arg1) - VARHDRSZ;
 905         len2 = VARSIZE(arg2) - VARHDRSZ;
 906
 907         return varstr_cmp(a1p, len1, a2p, len2);
 908 }
 909
 910 /*
 911  * Comparison functions for text strings.
 912  *
 913  * Note: btree indexes need these routines not to leak memory; therefore,
 914  * be careful to free working copies of toasted datums.  Most places don't
 915  * need to be so careful.
 916  */
 917
 918 Datum
 919 texteq(PG_FUNCTION_ARGS)
 920 {
 921         text       *arg1 = PG_GETARG_TEXT_P(0);
 922         text       *arg2 = PG_GETARG_TEXT_P(1);
 923         bool            result;
 924
 925         /* fast path for different-length inputs */
 926         if (VARSIZE(arg1) != VARSIZE(arg2))
 927                 result = false;
 928         else
 929                 result = (text_cmp(arg1, arg2) == 0);
 930
 931         PG_FREE_IF_COPY(arg1, 0);
 932         PG_FREE_IF_COPY(arg2, 1);
 933
 934         PG_RETURN_BOOL(result);
 935 }
 936
 937 Datum
 938 textne(PG_FUNCTION_ARGS)
 939 {
 940         text       *arg1 = PG_GETARG_TEXT_P(0);
 941         text       *arg2 = PG_GETARG_TEXT_P(1);
 942         bool            result;
 943
 944         /* fast path for different-length inputs */
 945         if (VARSIZE(arg1) != VARSIZE(arg2))
 946                 result = true;
 947         else
 948                 result = (text_cmp(arg1, arg2) != 0);
 949
 950         PG_FREE_IF_COPY(arg1, 0);
 951         PG_FREE_IF_COPY(arg2, 1);
 952
 953         PG_RETURN_BOOL(result);
 954 }
 955
 956 Datum
 957 text_lt(PG_FUNCTION_ARGS)
 958 {
 959         text       *arg1 = PG_GETARG_TEXT_P(0);
 960         text       *arg2 = PG_GETARG_TEXT_P(1);
 961         bool            result;
 962
 963         result = (text_cmp(arg1, arg2) < 0);
 964
 965         PG_FREE_IF_COPY(arg1, 0);
 966         PG_FREE_IF_COPY(arg2, 1);
 967
 968         PG_RETURN_BOOL(result);
 969 }
 970
 971 Datum
 972 text_le(PG_FUNCTION_ARGS)
 973 {
 974         text       *arg1 = PG_GETARG_TEXT_P(0);
 975         text       *arg2 = PG_GETARG_TEXT_P(1);
 976         bool            result;
 977
 978         result = (text_cmp(arg1, arg2) <= 0);
 979
 980         PG_FREE_IF_COPY(arg1, 0);
 981         PG_FREE_IF_COPY(arg2, 1);
 982
 983         PG_RETURN_BOOL(result);
 984 }
 985
 986 Datum
 987 text_gt(PG_FUNCTION_ARGS)
 988 {
 989         text       *arg1 = PG_GETARG_TEXT_P(0);
 990         text       *arg2 = PG_GETARG_TEXT_P(1);
 991         bool            result;
 992
 993         result = (text_cmp(arg1, arg2) > 0);
 994
 995         PG_FREE_IF_COPY(arg1, 0);
 996         PG_FREE_IF_COPY(arg2, 1);
 997
 998         PG_RETURN_BOOL(result);
 999 }
1000
1001 Datum
1002 text_ge(PG_FUNCTION_ARGS)
1003 {
1004         text       *arg1 = PG_GETARG_TEXT_P(0);
1005         text       *arg2 = PG_GETARG_TEXT_P(1);
1006         bool            result;
1007
1008         result = (text_cmp(arg1, arg2) >= 0);
1009
1010         PG_FREE_IF_COPY(arg1, 0);
1011         PG_FREE_IF_COPY(arg2, 1);
1012
1013         PG_RETURN_BOOL(result);
1014 }
1015
1016 Datum
1017 bttextcmp(PG_FUNCTION_ARGS)
1018 {
1019         text       *arg1 = PG_GETARG_TEXT_P(0);
1020         text       *arg2 = PG_GETARG_TEXT_P(1);
1021         int32           result;
1022
1023         result = text_cmp(arg1, arg2);
1024
1025         PG_FREE_IF_COPY(arg1, 0);
1026         PG_FREE_IF_COPY(arg2, 1);
1027
1028         PG_RETURN_INT32(result);
1029 }
1030
1031
1032 Datum
1033 text_larger(PG_FUNCTION_ARGS)
1034 {
1035         text       *arg1 = PG_GETARG_TEXT_P(0);
1036         text       *arg2 = PG_GETARG_TEXT_P(1);
1037         text       *result;
1038
1039         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1040
1041         PG_RETURN_TEXT_P(result);
1042 }
1043
1044 Datum
1045 text_smaller(PG_FUNCTION_ARGS)
1046 {
1047         text       *arg1 = PG_GETARG_TEXT_P(0);
1048         text       *arg2 = PG_GETARG_TEXT_P(1);
1049         text       *result;
1050
1051         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1052
1053         PG_RETURN_TEXT_P(result);
1054 }
1055
1056
1057 /*
1058  * The following operators support character-by-character comparison
1059  * of text data types, to allow building indexes suitable for LIKE
1060  * clauses.
1061  */
1062
1063 static int
1064 internal_text_pattern_compare(text *arg1, text *arg2)
1065 {
1066         int result;
1067
1068         result = memcmp(VARDATA(arg1), VARDATA(arg2),
1069                                         Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1070         if (result != 0)
1071                 return result;
1072         else if (VARSIZE(arg1) < VARSIZE(arg2))
1073                 return -1;
1074         else if (VARSIZE(arg1) > VARSIZE(arg2))
1075                 return 1;
1076         else
1077                 return 0;
1078 }
1079
1080
1081 Datum
1082 text_pattern_lt(PG_FUNCTION_ARGS)
1083 {
1084         text       *arg1 = PG_GETARG_TEXT_P(0);
1085         text       *arg2 = PG_GETARG_TEXT_P(1);
1086         int                     result;
1087
1088         result = internal_text_pattern_compare(arg1, arg2);
1089
1090         PG_FREE_IF_COPY(arg1, 0);
1091         PG_FREE_IF_COPY(arg2, 1);
1092
1093         PG_RETURN_BOOL(result < 0);
1094 }
1095
1096
1097 Datum
1098 text_pattern_le(PG_FUNCTION_ARGS)
1099 {
1100         text       *arg1 = PG_GETARG_TEXT_P(0);
1101         text       *arg2 = PG_GETARG_TEXT_P(1);
1102         int                     result;
1103
1104         result = internal_text_pattern_compare(arg1, arg2);
1105
1106         PG_FREE_IF_COPY(arg1, 0);
1107         PG_FREE_IF_COPY(arg2, 1);
1108
1109         PG_RETURN_BOOL(result <= 0);
1110 }
1111
1112
1113 Datum
1114 text_pattern_eq(PG_FUNCTION_ARGS)
1115 {
1116         text       *arg1 = PG_GETARG_TEXT_P(0);
1117         text       *arg2 = PG_GETARG_TEXT_P(1);
1118         int                     result;
1119
1120         if (VARSIZE(arg1) != VARSIZE(arg2))
1121                 result = 1;
1122         else
1123                 result = internal_text_pattern_compare(arg1, arg2);
1124
1125         PG_FREE_IF_COPY(arg1, 0);
1126         PG_FREE_IF_COPY(arg2, 1);
1127
1128         PG_RETURN_BOOL(result == 0);
1129 }
1130
1131
1132 Datum
1133 text_pattern_ge(PG_FUNCTION_ARGS)
1134 {
1135         text       *arg1 = PG_GETARG_TEXT_P(0);
1136         text       *arg2 = PG_GETARG_TEXT_P(1);
1137         int                     result;
1138
1139         result = internal_text_pattern_compare(arg1, arg2);
1140
1141         PG_FREE_IF_COPY(arg1, 0);
1142         PG_FREE_IF_COPY(arg2, 1);
1143
1144         PG_RETURN_BOOL(result >= 0);
1145 }
1146
1147
1148 Datum
1149 text_pattern_gt(PG_FUNCTION_ARGS)
1150 {
1151         text       *arg1 = PG_GETARG_TEXT_P(0);
1152         text       *arg2 = PG_GETARG_TEXT_P(1);
1153         int                     result;
1154
1155         result = internal_text_pattern_compare(arg1, arg2);
1156
1157         PG_FREE_IF_COPY(arg1, 0);
1158         PG_FREE_IF_COPY(arg2, 1);
1159
1160         PG_RETURN_BOOL(result > 0);
1161 }
1162
1163
1164 Datum
1165 text_pattern_ne(PG_FUNCTION_ARGS)
1166 {
1167         text       *arg1 = PG_GETARG_TEXT_P(0);
1168         text       *arg2 = PG_GETARG_TEXT_P(1);
1169         int                     result;
1170
1171         if (VARSIZE(arg1) != VARSIZE(arg2))
1172                 result = 1;
1173         else
1174                 result = internal_text_pattern_compare(arg1, arg2);
1175
1176         PG_FREE_IF_COPY(arg1, 0);
1177         PG_FREE_IF_COPY(arg2, 1);
1178
1179         PG_RETURN_BOOL(result != 0);
1180 }
1181
1182
1183 Datum
1184 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1185 {
1186         text       *arg1 = PG_GETARG_TEXT_P(0);
1187         text       *arg2 = PG_GETARG_TEXT_P(1);
1188         int                     result;
1189
1190         result = internal_text_pattern_compare(arg1, arg2);
1191
1192         PG_FREE_IF_COPY(arg1, 0);
1193         PG_FREE_IF_COPY(arg2, 1);
1194
1195         PG_RETURN_INT32(result);
1196 }
1197
1198
1199 /*-------------------------------------------------------------
1200  * byteaoctetlen
1201  *
1202  * get the number of bytes contained in an instance of type 'bytea'
1203  *-------------------------------------------------------------
1204  */
1205 Datum
1206 byteaoctetlen(PG_FUNCTION_ARGS)
1207 {
1208         PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
1209 }
1210
1211 /*
1212  * byteacat -
1213  *        takes two bytea* and returns a bytea* that is the concatenation of
1214  *        the two.
1215  *
1216  * Cloned from textcat and modified as required.
1217  */
1218 Datum
1219 byteacat(PG_FUNCTION_ARGS)
1220 {
1221         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1222         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1223         int                     len1,
1224                                 len2,
1225                                 len;
1226         bytea      *result;
1227         char       *ptr;
1228
1229         len1 = (VARSIZE(t1) - VARHDRSZ);
1230         if (len1 < 0)
1231                 len1 = 0;
1232
1233         len2 = (VARSIZE(t2) - VARHDRSZ);
1234         if (len2 < 0)
1235                 len2 = 0;
1236
1237         len = len1 + len2 + VARHDRSZ;
1238         result = (bytea *) palloc(len);
1239
1240         /* Set size of result string... */
1241         VARATT_SIZEP(result) = len;
1242
1243         /* Fill data field of result string... */
1244         ptr = VARDATA(result);
1245         if (len1 > 0)
1246                 memcpy(ptr, VARDATA(t1), len1);
1247         if (len2 > 0)
1248                 memcpy(ptr + len1, VARDATA(t2), len2);
1249
1250         PG_RETURN_BYTEA_P(result);
1251 }
1252
1253 #define PG_STR_GET_BYTEA(str_) \
1254         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1255 /*
1256  * bytea_substr()
1257  * Return a substring starting at the specified position.
1258  * Cloned from text_substr and modified as required.
1259  *
1260  * Input:
1261  *      - string
1262  *      - starting position (is one-based)
1263  *      - string length (optional)
1264  *
1265  * If the starting position is zero or less, then return from the start of the string
1266  * adjusting the length to be consistent with the "negative start" per SQL92.
1267  * If the length is less than zero, an ERROR is thrown. If no third argument
1268  * (length) is provided, the length to the end of the string is assumed.
1269  */
1270 Datum
1271 bytea_substr(PG_FUNCTION_ARGS)
1272 {
1273         int                     S = PG_GETARG_INT32(1); /* start position */
1274         int                     S1;                             /* adjusted start position */
1275         int                     L1;                             /* adjusted substring length */
1276
1277         S1 = Max(S, 1);
1278
1279         if (fcinfo->nargs == 2)
1280         {
1281                 /*
1282                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1283                  * everything to the end of the string if we pass it a negative
1284                  * value for length.
1285                  */
1286                 L1 = -1;
1287         }
1288         else
1289         {
1290                 /* end position */
1291                 int                     E = S + PG_GETARG_INT32(2);
1292
1293                 /*
1294                  * A negative value for L is the only way for the end position to
1295                  * be before the start. SQL99 says to throw an error.
1296                  */
1297                 if (E < S)
1298                         elog(ERROR, "negative substring length not allowed");
1299
1300                 /*
1301                  * A zero or negative value for the end position can happen if the
1302                  * start was negative or one. SQL99 says to return a zero-length
1303                  * string.
1304                  */
1305                 if (E < 1)
1306                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1307
1308                 L1 = E - S1;
1309         }
1310
1311         /*
1312          * If the start position is past the end of the string, SQL99 says to
1313          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1314          * that for us. Convert to zero-based starting position
1315          */
1316         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1317 }
1318
1319 /*
1320  * bytea_substr_no_len -
1321  *        Wrapper to avoid opr_sanity failure due to
1322  *        one function accepting a different number of args.
1323  */
1324 Datum
1325 bytea_substr_no_len(PG_FUNCTION_ARGS)
1326 {
1327         return bytea_substr(fcinfo);
1328 }
1329
1330 /*
1331  * byteapos -
1332  *        Return the position of the specified substring.
1333  *        Implements the SQL92 POSITION() function.
1334  * Cloned from textpos and modified as required.
1335  */
1336 Datum
1337 byteapos(PG_FUNCTION_ARGS)
1338 {
1339         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1340         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1341         int                     pos;
1342         int                     px,
1343                                 p;
1344         int                     len1,
1345                                 len2;
1346         char       *p1,
1347                            *p2;
1348
1349         if (VARSIZE(t2) <= VARHDRSZ)
1350                 PG_RETURN_INT32(1);             /* result for empty pattern */
1351
1352         len1 = (VARSIZE(t1) - VARHDRSZ);
1353         len2 = (VARSIZE(t2) - VARHDRSZ);
1354
1355         p1 = VARDATA(t1);
1356         p2 = VARDATA(t2);
1357
1358         pos = 0;
1359         px = (len1 - len2);
1360         for (p = 0; p <= px; p++)
1361         {
1362                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1363                 {
1364                         pos = p + 1;
1365                         break;
1366                 };
1367                 p1++;
1368         };
1369
1370         PG_RETURN_INT32(pos);
1371 }
1372
1373 /*-------------------------------------------------------------
1374  * byteaGetByte
1375  *
1376  * this routine treats "bytea" as an array of bytes.
1377  * It returns the Nth byte (a number between 0 and 255).
1378  *-------------------------------------------------------------
1379  */
1380 Datum
1381 byteaGetByte(PG_FUNCTION_ARGS)
1382 {
1383         bytea      *v = PG_GETARG_BYTEA_P(0);
1384         int32           n = PG_GETARG_INT32(1);
1385         int                     len;
1386         int                     byte;
1387
1388         len = VARSIZE(v) - VARHDRSZ;
1389
1390         if (n < 0 || n >= len)
1391                 elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
1392                          n, len - 1);
1393
1394         byte = ((unsigned char *) VARDATA(v))[n];
1395
1396         PG_RETURN_INT32(byte);
1397 }
1398
1399 /*-------------------------------------------------------------
1400  * byteaGetBit
1401  *
1402  * This routine treats a "bytea" type like an array of bits.
1403  * It returns the value of the Nth bit (0 or 1).
1404  *
1405  *-------------------------------------------------------------
1406  */
1407 Datum
1408 byteaGetBit(PG_FUNCTION_ARGS)
1409 {
1410         bytea      *v = PG_GETARG_BYTEA_P(0);
1411         int32           n = PG_GETARG_INT32(1);
1412         int                     byteNo,
1413                                 bitNo;
1414         int                     len;
1415         int                     byte;
1416
1417         len = VARSIZE(v) - VARHDRSZ;
1418
1419         if (n < 0 || n >= len * 8)
1420                 elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
1421                          n, len * 8 - 1);
1422
1423         byteNo = n / 8;
1424         bitNo = n % 8;
1425
1426         byte = ((unsigned char *) VARDATA(v))[byteNo];
1427
1428         if (byte & (1 << bitNo))
1429                 PG_RETURN_INT32(1);
1430         else
1431                 PG_RETURN_INT32(0);
1432 }
1433
1434 /*-------------------------------------------------------------
1435  * byteaSetByte
1436  *
1437  * Given an instance of type 'bytea' creates a new one with
1438  * the Nth byte set to the given value.
1439  *
1440  *-------------------------------------------------------------
1441  */
1442 Datum
1443 byteaSetByte(PG_FUNCTION_ARGS)
1444 {
1445         bytea      *v = PG_GETARG_BYTEA_P(0);
1446         int32           n = PG_GETARG_INT32(1);
1447         int32           newByte = PG_GETARG_INT32(2);
1448         int                     len;
1449         bytea      *res;
1450
1451         len = VARSIZE(v) - VARHDRSZ;
1452
1453         if (n < 0 || n >= len)
1454                 elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
1455                          n, len - 1);
1456
1457         /*
1458          * Make a copy of the original varlena.
1459          */
1460         res = (bytea *) palloc(VARSIZE(v));
1461         memcpy((char *) res, (char *) v, VARSIZE(v));
1462
1463         /*
1464          * Now set the byte.
1465          */
1466         ((unsigned char *) VARDATA(res))[n] = newByte;
1467
1468         PG_RETURN_BYTEA_P(res);
1469 }
1470
1471 /*-------------------------------------------------------------
1472  * byteaSetBit
1473  *
1474  * Given an instance of type 'bytea' creates a new one with
1475  * the Nth bit set to the given value.
1476  *
1477  *-------------------------------------------------------------
1478  */
1479 Datum
1480 byteaSetBit(PG_FUNCTION_ARGS)
1481 {
1482         bytea      *v = PG_GETARG_BYTEA_P(0);
1483         int32           n = PG_GETARG_INT32(1);
1484         int32           newBit = PG_GETARG_INT32(2);
1485         bytea      *res;
1486         int                     len;
1487         int                     oldByte,
1488                                 newByte;
1489         int                     byteNo,
1490                                 bitNo;
1491
1492         len = VARSIZE(v) - VARHDRSZ;
1493
1494         if (n < 0 || n >= len * 8)
1495                 elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
1496                          n, len * 8 - 1);
1497
1498         byteNo = n / 8;
1499         bitNo = n % 8;
1500
1501         /*
1502          * sanity check!
1503          */
1504         if (newBit != 0 && newBit != 1)
1505                 elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
1506
1507         /*
1508          * Make a copy of the original varlena.
1509          */
1510         res = (bytea *) palloc(VARSIZE(v));
1511         memcpy((char *) res, (char *) v, VARSIZE(v));
1512
1513         /*
1514          * Update the byte.
1515          */
1516         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1517
1518         if (newBit == 0)
1519                 newByte = oldByte & (~(1 << bitNo));
1520         else
1521                 newByte = oldByte | (1 << bitNo);
1522
1523         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1524
1525         PG_RETURN_BYTEA_P(res);
1526 }
1527
1528
1529 /* text_name()
1530  * Converts a text type to a Name type.
1531  */
1532 Datum
1533 text_name(PG_FUNCTION_ARGS)
1534 {
1535         text       *s = PG_GETARG_TEXT_P(0);
1536         Name            result;
1537         int                     len;
1538
1539         len = VARSIZE(s) - VARHDRSZ;
1540
1541         /* Truncate oversize input */
1542         if (len >= NAMEDATALEN)
1543                 len = NAMEDATALEN - 1;
1544
1545 #ifdef STRINGDEBUG
1546         printf("text- convert string length %d (%d) ->%d\n",
1547                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1548 #endif
1549
1550         result = (Name) palloc(NAMEDATALEN);
1551         memcpy(NameStr(*result), VARDATA(s), len);
1552
1553         /* now null pad to full length... */
1554         while (len < NAMEDATALEN)
1555         {
1556                 *(NameStr(*result) + len) = '\0';
1557                 len++;
1558         }
1559
1560         PG_RETURN_NAME(result);
1561 }
1562
1563 /* name_text()
1564  * Converts a Name type to a text type.
1565  */
1566 Datum
1567 name_text(PG_FUNCTION_ARGS)
1568 {
1569         Name            s = PG_GETARG_NAME(0);
1570         text       *result;
1571         int                     len;
1572
1573         len = strlen(NameStr(*s));
1574
1575 #ifdef STRINGDEBUG
1576         printf("text- convert string length %d (%d) ->%d\n",
1577                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1578 #endif
1579
1580         result = palloc(VARHDRSZ + len);
1581         VARATT_SIZEP(result) = VARHDRSZ + len;
1582         memcpy(VARDATA(result), NameStr(*s), len);
1583
1584         PG_RETURN_TEXT_P(result);
1585 }
1586
1587
1588 /*
1589  * textToQualifiedNameList - convert a text object to list of names
1590  *
1591  * This implements the input parsing needed by nextval() and other
1592  * functions that take a text parameter representing a qualified name.
1593  * We split the name at dots, downcase if not double-quoted, and
1594  * truncate names if they're too long.
1595  */
1596 List *
1597 textToQualifiedNameList(text *textval, const char *caller)
1598 {
1599         char       *rawname;
1600         List       *result = NIL;
1601         List       *namelist;
1602         List       *l;
1603
1604         /* Convert to C string (handles possible detoasting). */
1605         /* Note we rely on being able to modify rawname below. */
1606         rawname = DatumGetCString(DirectFunctionCall1(textout,
1607                                                                                           PointerGetDatum(textval)));
1608
1609         if (!SplitIdentifierString(rawname, '.', &namelist))
1610                 elog(ERROR, "%s: invalid name syntax", caller);
1611
1612         if (namelist == NIL)
1613                 elog(ERROR, "%s: invalid name syntax", caller);
1614
1615         foreach(l, namelist)
1616         {
1617                 char       *curname = (char *) lfirst(l);
1618
1619                 result = lappend(result, makeString(pstrdup(curname)));
1620         }
1621
1622         pfree(rawname);
1623         freeList(namelist);
1624
1625         return result;
1626 }
1627
1628 /*
1629  * SplitIdentifierString --- parse a string containing identifiers
1630  *
1631  * This is the guts of textToQualifiedNameList, and is exported for use in
1632  * other situations such as parsing GUC variables.      In the GUC case, it's
1633  * important to avoid memory leaks, so the API is designed to minimize the
1634  * amount of stuff that needs to be allocated and freed.
1635  *
1636  * Inputs:
1637  *      rawstring: the input string; must be overwritable!      On return, it's
1638  *                         been modified to contain the separated identifiers.
1639  *      separator: the separator punctuation expected between identifiers
1640  *                         (typically '.' or ',').      Whitespace may also appear around
1641  *                         identifiers.
1642  * Outputs:
1643  *      namelist: filled with a palloc'd list of pointers to identifiers within
1644  *                        rawstring.  Caller should freeList() this even on error return.
1645  *
1646  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1647  *
1648  * Note that an empty string is considered okay here, though not in
1649  * textToQualifiedNameList.
1650  */
1651 bool
1652 SplitIdentifierString(char *rawstring, char separator,
1653                                           List **namelist)
1654 {
1655         char       *nextp = rawstring;
1656         bool            done = false;
1657
1658         *namelist = NIL;
1659
1660         while (isspace((unsigned char) *nextp))
1661                 nextp++;                                /* skip leading whitespace */
1662
1663         if (*nextp == '\0')
1664                 return true;                    /* allow empty string */
1665
1666         /* At the top of the loop, we are at start of a new identifier. */
1667         do
1668         {
1669                 char       *curname;
1670                 char       *endp;
1671                 int                     curlen;
1672
1673                 if (*nextp == '\"')
1674                 {
1675                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1676                         curname = nextp + 1;
1677                         for (;;)
1678                         {
1679                                 endp = strchr(nextp + 1, '\"');
1680                                 if (endp == NULL)
1681                                         return false;           /* mismatched quotes */
1682                                 if (endp[1] != '\"')
1683                                         break;          /* found end of quoted name */
1684                                 /* Collapse adjacent quotes into one quote, and look again */
1685                                 memmove(endp, endp + 1, strlen(endp));
1686                                 nextp = endp;
1687                         }
1688                         /* endp now points at the terminating quote */
1689                         nextp = endp + 1;
1690                 }
1691                 else
1692                 {
1693                         /* Unquoted name --- extends to separator or whitespace */
1694                         curname = nextp;
1695                         while (*nextp && *nextp != separator &&
1696                                    !isspace((unsigned char) *nextp))
1697                         {
1698                                 /*
1699                                  * It's important that this match the identifier
1700                                  * downcasing code used by backend/parser/scan.l.
1701                                  */
1702                                 if (isupper((unsigned char) *nextp))
1703                                         *nextp = tolower((unsigned char) *nextp);
1704                                 nextp++;
1705                         }
1706                         endp = nextp;
1707                         if (curname == nextp)
1708                                 return false;   /* empty unquoted name not allowed */
1709                 }
1710
1711                 while (isspace((unsigned char) *nextp))
1712                         nextp++;                        /* skip trailing whitespace */
1713
1714                 if (*nextp == separator)
1715                 {
1716                         nextp++;
1717                         while (isspace((unsigned char) *nextp))
1718                                 nextp++;                /* skip leading whitespace for next */
1719                         /* we expect another name, so done remains false */
1720                 }
1721                 else if (*nextp == '\0')
1722                         done = true;
1723                 else
1724                         return false;           /* invalid syntax */
1725
1726                 /* Now safe to overwrite separator with a null */
1727                 *endp = '\0';
1728
1729                 /* Truncate name if it's overlength; again, should match scan.l */
1730                 curlen = strlen(curname);
1731                 if (curlen >= NAMEDATALEN)
1732                 {
1733                         curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
1734                         curname[curlen] = '\0';
1735                 }
1736
1737                 /*
1738                  * Finished isolating current name --- add it to list
1739                  */
1740                 *namelist = lappend(*namelist, curname);
1741
1742                 /* Loop back if we didn't reach end of string */
1743         } while (!done);
1744
1745         return true;
1746 }
1747
1748
1749 /*****************************************************************************
1750  *      Comparison Functions used for bytea
1751  *
1752  * Note: btree indexes need these routines not to leak memory; therefore,
1753  * be careful to free working copies of toasted datums.  Most places don't
1754  * need to be so careful.
1755  *****************************************************************************/
1756
1757 Datum
1758 byteaeq(PG_FUNCTION_ARGS)
1759 {
1760         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1761         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1762         int                     len1,
1763                                 len2;
1764         bool            result;
1765
1766         len1 = VARSIZE(arg1) - VARHDRSZ;
1767         len2 = VARSIZE(arg2) - VARHDRSZ;
1768
1769         /* fast path for different-length inputs */
1770         if (len1 != len2)
1771                 result = false;
1772         else
1773                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1774
1775         PG_FREE_IF_COPY(arg1, 0);
1776         PG_FREE_IF_COPY(arg2, 1);
1777
1778         PG_RETURN_BOOL(result);
1779 }
1780
1781 Datum
1782 byteane(PG_FUNCTION_ARGS)
1783 {
1784         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1785         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1786         int                     len1,
1787                                 len2;
1788         bool            result;
1789
1790         len1 = VARSIZE(arg1) - VARHDRSZ;
1791         len2 = VARSIZE(arg2) - VARHDRSZ;
1792
1793         /* fast path for different-length inputs */
1794         if (len1 != len2)
1795                 result = true;
1796         else
1797                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1798
1799         PG_FREE_IF_COPY(arg1, 0);
1800         PG_FREE_IF_COPY(arg2, 1);
1801
1802         PG_RETURN_BOOL(result);
1803 }
1804
1805 Datum
1806 bytealt(PG_FUNCTION_ARGS)
1807 {
1808         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1809         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1810         int                     len1,
1811                                 len2;
1812         int                     cmp;
1813
1814         len1 = VARSIZE(arg1) - VARHDRSZ;
1815         len2 = VARSIZE(arg2) - VARHDRSZ;
1816
1817         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1818
1819         PG_FREE_IF_COPY(arg1, 0);
1820         PG_FREE_IF_COPY(arg2, 1);
1821
1822         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1823 }
1824
1825 Datum
1826 byteale(PG_FUNCTION_ARGS)
1827 {
1828         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1829         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1830         int                     len1,
1831                                 len2;
1832         int                     cmp;
1833
1834         len1 = VARSIZE(arg1) - VARHDRSZ;
1835         len2 = VARSIZE(arg2) - VARHDRSZ;
1836
1837         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1838
1839         PG_FREE_IF_COPY(arg1, 0);
1840         PG_FREE_IF_COPY(arg2, 1);
1841
1842         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1843 }
1844
1845 Datum
1846 byteagt(PG_FUNCTION_ARGS)
1847 {
1848         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1849         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1850         int                     len1,
1851                                 len2;
1852         int                     cmp;
1853
1854         len1 = VARSIZE(arg1) - VARHDRSZ;
1855         len2 = VARSIZE(arg2) - VARHDRSZ;
1856
1857         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1858
1859         PG_FREE_IF_COPY(arg1, 0);
1860         PG_FREE_IF_COPY(arg2, 1);
1861
1862         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1863 }
1864
1865 Datum
1866 byteage(PG_FUNCTION_ARGS)
1867 {
1868         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1869         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1870         int                     len1,
1871                                 len2;
1872         int                     cmp;
1873
1874         len1 = VARSIZE(arg1) - VARHDRSZ;
1875         len2 = VARSIZE(arg2) - VARHDRSZ;
1876
1877         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1878
1879         PG_FREE_IF_COPY(arg1, 0);
1880         PG_FREE_IF_COPY(arg2, 1);
1881
1882         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1883 }
1884
1885 Datum
1886 byteacmp(PG_FUNCTION_ARGS)
1887 {
1888         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1889         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1890         int                     len1,
1891                                 len2;
1892         int                     cmp;
1893
1894         len1 = VARSIZE(arg1) - VARHDRSZ;
1895         len2 = VARSIZE(arg2) - VARHDRSZ;
1896
1897         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1898         if ((cmp == 0) && (len1 != len2))
1899                 cmp = (len1 < len2) ? -1 : 1;
1900
1901         PG_FREE_IF_COPY(arg1, 0);
1902         PG_FREE_IF_COPY(arg2, 1);
1903
1904         PG_RETURN_INT32(cmp);
1905 }
1906
1907 /*
1908  * replace_text
1909  * replace all occurrences of 'old_sub_str' in 'orig_str'
1910  * with 'new_sub_str' to form 'new_str'
1911  *
1912  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1913  * otherwise returns 'new_str'
1914  */
1915 Datum
1916 replace_text(PG_FUNCTION_ARGS)
1917 {
1918         text       *left_text;
1919         text       *right_text;
1920         text       *buf_text;
1921         text       *ret_text;
1922         int                     curr_posn;
1923         text       *src_text = PG_GETARG_TEXT_P(0);
1924         int                     src_text_len = TEXTLEN(src_text);
1925         text       *from_sub_text = PG_GETARG_TEXT_P(1);
1926         int                     from_sub_text_len = TEXTLEN(from_sub_text);
1927         text       *to_sub_text = PG_GETARG_TEXT_P(2);
1928         char       *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1929         StringInfo      str = makeStringInfo();
1930
1931         if (src_text_len == 0 || from_sub_text_len == 0)
1932                 PG_RETURN_TEXT_P(src_text);
1933
1934         buf_text = TEXTDUP(src_text);
1935         curr_posn = TEXTPOS(buf_text, from_sub_text);
1936
1937         while (curr_posn > 0)
1938         {
1939                 left_text = LEFT(buf_text, from_sub_text);
1940                 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1941
1942                 appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
1943                 appendStringInfoString(str, to_sub_str);
1944
1945                 pfree(buf_text);
1946                 pfree(left_text);
1947                 buf_text = right_text;
1948                 curr_posn = TEXTPOS(buf_text, from_sub_text);
1949         }
1950
1951         appendStringInfoString(str, PG_TEXT_GET_STR(buf_text));
1952         pfree(buf_text);
1953
1954         ret_text = PG_STR_GET_TEXT(str->data);
1955         pfree(str->data);
1956         pfree(str);
1957
1958         PG_RETURN_TEXT_P(ret_text);
1959 }
1960
1961 /*
1962  * split_text
1963  * parse input string
1964  * return ord item (1 based)
1965  * based on provided field separator
1966  */
1967 Datum
1968 split_text(PG_FUNCTION_ARGS)
1969 {
1970         text       *inputstring = PG_GETARG_TEXT_P(0);
1971         int                     inputstring_len = TEXTLEN(inputstring);
1972         text       *fldsep = PG_GETARG_TEXT_P(1);
1973         int                     fldsep_len = TEXTLEN(fldsep);
1974         int                     fldnum = PG_GETARG_INT32(2);
1975         int                     start_posn = 0;
1976         int                     end_posn = 0;
1977         text       *result_text;
1978
1979         /* return empty string for empty input string */
1980         if (inputstring_len < 1)
1981                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1982
1983         /* empty field separator */
1984         if (fldsep_len < 1)
1985         {
1986                 if (fldnum == 1)                /* first field - just return the input
1987                                                                  * string */
1988                         PG_RETURN_TEXT_P(inputstring);
1989                 else                                    /* otherwise return an empty string */
1990                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1991         }
1992
1993         /* field number is 1 based */
1994         if (fldnum < 1)
1995                 elog(ERROR, "field position must be > 0");
1996
1997         start_posn = text_position(PointerGetDatum(inputstring),
1998                                                            PointerGetDatum(fldsep),
1999                                                            fldnum - 1);
2000         end_posn = text_position(PointerGetDatum(inputstring),
2001                                                          PointerGetDatum(fldsep),
2002                                                          fldnum);
2003
2004         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
2005         {
2006                 if (fldnum == 1)                /* first field - just return the input
2007                                                                  * string */
2008                         PG_RETURN_TEXT_P(inputstring);
2009                 else                                    /* otherwise return an empty string */
2010                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2011         }
2012         else if ((start_posn != 0) && (end_posn == 0))
2013         {
2014                 /* last field requested */
2015                 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
2016                 PG_RETURN_TEXT_P(result_text);
2017         }
2018         else if ((start_posn == 0) && (end_posn != 0))
2019         {
2020                 /* first field requested */
2021                 result_text = LEFT(inputstring, fldsep);
2022                 PG_RETURN_TEXT_P(result_text);
2023         }
2024         else
2025         {
2026                 /* prior to last field requested */
2027                 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
2028                 PG_RETURN_TEXT_P(result_text);
2029         }
2030 }
2031
2032 /*
2033  * text_to_array
2034  * parse input string
2035  * return text array of elements
2036  * based on provided field separator
2037  */
2038 Datum
2039 text_to_array(PG_FUNCTION_ARGS)
2040 {
2041         text       *inputstring = PG_GETARG_TEXT_P(0);
2042         int                     inputstring_len = TEXTLEN(inputstring);
2043         text       *fldsep = PG_GETARG_TEXT_P(1);
2044         int                     fldsep_len = TEXTLEN(fldsep);
2045         int                     fldnum;
2046         int                     start_posn = 0;
2047         int                     end_posn = 0;
2048         text       *result_text = NULL;
2049         ArrayBuildState *astate = NULL;
2050         MemoryContext oldcontext = CurrentMemoryContext;
2051
2052         /* return NULL for empty input string */
2053         if (inputstring_len < 1)
2054                 PG_RETURN_NULL();
2055
2056         /* empty field separator
2057          * return one element, 1D, array using the input string */
2058         if (fldsep_len < 1)
2059                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2060                                                           CStringGetDatum(inputstring), 1));
2061
2062         /* start with end position holding the initial start position */
2063         end_posn = 0;
2064         for (fldnum=1;;fldnum++)        /* field number is 1 based */
2065         {
2066                 Datum   dvalue;
2067                 bool    disnull = false;
2068
2069                 start_posn = end_posn;
2070                 end_posn = text_position(PointerGetDatum(inputstring),
2071                                                                  PointerGetDatum(fldsep),
2072                                                                  fldnum);
2073
2074                 if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
2075                 {
2076                         if (fldnum == 1)
2077                         {
2078                                 /* first element
2079                                  * return one element, 1D, array using the input string */
2080                                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2081                                                                           CStringGetDatum(inputstring), 1));
2082                         }
2083                         else
2084                         {
2085                                 /* otherwise create array and exit */
2086                                 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, oldcontext));
2087                         }
2088                 }
2089                 else if ((start_posn != 0) && (end_posn == 0))
2090                 {
2091                         /* last field requested */
2092                         result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
2093                 }
2094                 else if ((start_posn == 0) && (end_posn != 0))
2095                 {
2096                         /* first field requested */
2097                         result_text = LEFT(inputstring, fldsep);
2098                 }
2099                 else
2100                 {
2101                         /* prior to last field requested */
2102                         result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
2103                 }
2104
2105                 /* stash away current value */
2106                 dvalue = PointerGetDatum(result_text);
2107                 astate = accumArrayResult(astate, dvalue,
2108                                                                   disnull, TEXTOID, oldcontext);
2109
2110         }
2111
2112         /* never reached -- keep compiler quiet */
2113         PG_RETURN_NULL();
2114 }
2115
2116 /*
2117  * array_to_text
2118  * concatenate Cstring representation of input array elements
2119  * using provided field separator
2120  */
2121 Datum
2122 array_to_text(PG_FUNCTION_ARGS)
2123 {
2124         ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
2125         char       *fldsep = PG_TEXTARG_GET_STR(1);
2126         int                     nitems, *dims, ndims;
2127         char       *p;
2128         Oid                     element_type;
2129         int                     typlen;
2130         bool            typbyval;
2131         char            typdelim;
2132         Oid                     typoutput,
2133                                 typelem;
2134         FmgrInfo        outputproc;
2135         char            typalign;
2136         StringInfo      result_str = makeStringInfo();
2137         int                     i;
2138         ArrayMetaState *my_extra;
2139
2140         p = ARR_DATA_PTR(v);
2141         ndims = ARR_NDIM(v);
2142         dims = ARR_DIMS(v);
2143         nitems = ArrayGetNItems(ndims, dims);
2144
2145         /* if there are no elements, return an empty string */
2146         if (nitems == 0)
2147                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2148
2149         element_type = ARR_ELEMTYPE(v);
2150
2151         /*
2152          * We arrange to look up info about element type, including its output
2153          * conversion proc only once per series of calls, assuming the element
2154          * type doesn't change underneath us.
2155          */
2156         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2157         if (my_extra == NULL)
2158         {
2159                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2160                                                                                                          sizeof(ArrayMetaState));
2161                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2162                 my_extra->element_type = InvalidOid;
2163         }
2164
2165         if (my_extra->element_type != element_type)
2166         {
2167                 /* Get info about element type, including its output conversion proc */
2168                 get_type_metadata(element_type, IOFunc_output,
2169                                                         &typlen, &typbyval, &typdelim,
2170                                                         &typelem, &typoutput, &typalign);
2171                 fmgr_info(typoutput, &outputproc);
2172
2173                 my_extra->element_type = element_type;
2174                 my_extra->typlen = typlen;
2175                 my_extra->typbyval = typbyval;
2176                 my_extra->typdelim = typdelim;
2177                 my_extra->typelem = typelem;
2178                 my_extra->typiofunc = typoutput;
2179                 my_extra->typalign = typalign;
2180                 my_extra->proc = outputproc;
2181         }
2182         else
2183         {
2184                 typlen = my_extra->typlen;
2185                 typbyval = my_extra->typbyval;
2186                 typdelim = my_extra->typdelim;
2187                 typelem = my_extra->typelem;
2188                 typoutput = my_extra->typiofunc;
2189                 typalign = my_extra->typalign;
2190                 outputproc = my_extra->proc;
2191         }
2192
2193         for (i = 0; i < nitems; i++)
2194         {
2195                 Datum           itemvalue;
2196                 char       *value;
2197
2198                 itemvalue = fetch_att(p, typbyval, typlen);
2199
2200                 value = DatumGetCString(FunctionCall3(&outputproc,
2201                                                                                           itemvalue,
2202                                                                                           ObjectIdGetDatum(typelem),
2203                                                                                           Int32GetDatum(-1)));
2204
2205                 if (i > 0)
2206                         appendStringInfo(result_str, "%s%s", fldsep, value);
2207                 else
2208                         appendStringInfo(result_str, "%s", value);
2209
2210                 p = att_addlength(p, typlen, PointerGetDatum(p));
2211                 p = (char *) att_align(p, typalign);
2212         }
2213
2214         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2215 }
2216
2217 #define HEXBASE 16
2218 /*
2219  * Convert a int32 to a string containing a base 16 (hex) representation of
2220  * the number.
2221  */
2222 Datum
2223 to_hex32(PG_FUNCTION_ARGS)
2224 {
2225         static char digits[] = "0123456789abcdef";
2226         char            buf[32];                /* bigger than needed, but reasonable */
2227         char       *ptr;
2228         text       *result_text;
2229         int32           value = PG_GETARG_INT32(0);
2230
2231         ptr = buf + sizeof(buf) - 1;
2232         *ptr = '\0';
2233
2234         do
2235         {
2236                 *--ptr = digits[value % HEXBASE];
2237                 value /= HEXBASE;
2238         } while (ptr > buf && value);
2239
2240         result_text = PG_STR_GET_TEXT(ptr);
2241         PG_RETURN_TEXT_P(result_text);
2242 }
2243
2244 /*
2245  * Convert a int64 to a string containing a base 16 (hex) representation of
2246  * the number.
2247  */
2248 Datum
2249 to_hex64(PG_FUNCTION_ARGS)
2250 {
2251         static char digits[] = "0123456789abcdef";
2252         char            buf[32];                /* bigger than needed, but reasonable */
2253         char       *ptr;
2254         text       *result_text;
2255         int64           value = PG_GETARG_INT64(0);
2256
2257         ptr = buf + sizeof(buf) - 1;
2258         *ptr = '\0';
2259
2260         do
2261         {
2262                 *--ptr = digits[value % HEXBASE];
2263                 value /= HEXBASE;
2264         } while (ptr > buf && value);
2265
2266         result_text = PG_STR_GET_TEXT(ptr);
2267         PG_RETURN_TEXT_P(result_text);
2268 }
2269
2270 /*
2271  * Create an md5 hash of a text string and return it as hex
2272  *
2273  * md5 produces a 16 byte (128 bit) hash; double it for hex
2274  */
2275 #define MD5_HASH_LEN  32
2276
2277 Datum
2278 md5_text(PG_FUNCTION_ARGS)
2279 {
2280         char       *buff = PG_TEXT_GET_STR(PG_GETARG_TEXT_P(0));
2281         size_t          len = strlen(buff);
2282         char       *hexsum;
2283         text       *result_text;
2284
2285         /* leave room for the terminating '\0' */
2286         hexsum = (char *) palloc(MD5_HASH_LEN + 1);
2287
2288         /* get the hash result */
2289         md5_hash((void *) buff, len, hexsum);
2290
2291         /* convert to text and return it */
2292         result_text = PG_STR_GET_TEXT(hexsum);
2293         PG_RETURN_TEXT_P(result_text);
2294 }