granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.94 2002/12/06 05:20:17 momjian Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "mb/pg_wchar.h"
  20 #include "miscadmin.h"
  21 #include "access/tuptoaster.h"
  22 #include "lib/stringinfo.h"
  23 #include "utils/builtins.h"
  24 #include "utils/pg_locale.h"
  25
  26 extern bool md5_hash(const void *buff, size_t len, char *hexsum);
  27
  28 typedef struct varlena unknown;
  29
  30 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
  31 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
  32 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
  33 #define PG_TEXTARG_GET_STR(arg_) \
  34         DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
  35 #define PG_TEXT_GET_STR(textp_) \
  36         DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
  37 #define PG_STR_GET_TEXT(str_) \
  38         DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
  39 #define TEXTLEN(textp) \
  40         text_length(PointerGetDatum(textp))
  41 #define TEXTPOS(buf_text, from_sub_text) \
  42         text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
  43 #define TEXTDUP(textp) \
  44         DatumGetTextPCopy(PointerGetDatum(textp))
  45 #define LEFT(buf_text, from_sub_text) \
  46         text_substring(PointerGetDatum(buf_text), \
  47                                         1, \
  48                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
  49 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
  50         text_substring(PointerGetDatum(buf_text), \
  51                                         TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
  52                                         -1, true)
  53
  54 static int      text_cmp(text *arg1, text *arg2);
  55 static int32 text_length(Datum str);
  56 static int32 text_position(Datum str, Datum search_str, int matchnum);
  57 static text *text_substring(Datum str,
  58                            int32 start,
  59                            int32 length,
  60                            bool length_not_specified);
  61
  62
  63 /*****************************************************************************
  64  *       USER I/O ROUTINES                                                                                                               *
  65  *****************************************************************************/
  66
  67
  68 #define VAL(CH)                 ((CH) - '0')
  69 #define DIG(VAL)                ((VAL) + '0')
  70
  71 /*
  72  *              byteain                 - converts from printable representation of byte array
  73  *
  74  *              Non-printable characters must be passed as '\nnn' (octal) and are
  75  *              converted to internal form.  '\' must be passed as '\\'.
  76  *              elog(ERROR, ...) if bad form.
  77  *
  78  *              BUGS:
  79  *                              The input is scaned twice.
  80  *                              The error checking of input is minimal.
  81  */
  82 Datum
  83 byteain(PG_FUNCTION_ARGS)
  84 {
  85         char       *inputText = PG_GETARG_CSTRING(0);
  86         char       *tp;
  87         char       *rp;
  88         int                     byte;
  89         bytea      *result;
  90
  91         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
  92         {
  93                 if (tp[0] != '\\')
  94                         tp++;
  95                 else if ((tp[0] == '\\') &&
  96                                  (tp[1] >= '0' && tp[1] <= '3') &&
  97                                  (tp[2] >= '0' && tp[2] <= '7') &&
  98                                  (tp[3] >= '0' && tp[3] <= '7'))
  99                         tp += 4;
 100                 else if ((tp[0] == '\\') &&
 101                                  (tp[1] == '\\'))
 102                         tp += 2;
 103                 else
 104                 {
 105                         /*
 106                          * one backslash, not followed by 0 or ### valid octal
 107                          */
 108                         elog(ERROR, "Bad input string for type bytea");
 109                 }
 110         }
 111
 112         byte += VARHDRSZ;
 113         result = (bytea *) palloc(byte);
 114         result->vl_len = byte;          /* set varlena length */
 115
 116         tp = inputText;
 117         rp = result->vl_dat;
 118         while (*tp != '\0')
 119         {
 120                 if (tp[0] != '\\')
 121                         *rp++ = *tp++;
 122                 else if ((tp[0] == '\\') &&
 123                                  (tp[1] >= '0' && tp[1] <= '3') &&
 124                                  (tp[2] >= '0' && tp[2] <= '7') &&
 125                                  (tp[3] >= '0' && tp[3] <= '7'))
 126                 {
 127                         byte = VAL(tp[1]);
 128                         byte <<= 3;
 129                         byte += VAL(tp[2]);
 130                         byte <<= 3;
 131                         *rp++ = byte + VAL(tp[3]);
 132                         tp += 4;
 133                 }
 134                 else if ((tp[0] == '\\') &&
 135                                  (tp[1] == '\\'))
 136                 {
 137                         *rp++ = '\\';
 138                         tp += 2;
 139                 }
 140                 else
 141                 {
 142                         /*
 143                          * We should never get here. The first pass should not allow
 144                          * it.
 145                          */
 146                         elog(ERROR, "Bad input string for type bytea");
 147                 }
 148         }
 149
 150         PG_RETURN_BYTEA_P(result);
 151 }
 152
 153 /*
 154  *              byteaout                - converts to printable representation of byte array
 155  *
 156  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 157  *              '\\'.
 158  *
 159  *              NULL vlena should be an error--returning string with NULL for now.
 160  */
 161 Datum
 162 byteaout(PG_FUNCTION_ARGS)
 163 {
 164         bytea      *vlena = PG_GETARG_BYTEA_P(0);
 165         char       *result;
 166         char       *vp;
 167         char       *rp;
 168         int                     val;                    /* holds unprintable chars */
 169         int                     i;
 170         int                     len;
 171
 172         len = 1;                                        /* empty string has 1 char */
 173         vp = vlena->vl_dat;
 174         for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
 175         {
 176                 if (*vp == '\\')
 177                         len += 2;
 178                 else if (isprint((unsigned char) *vp))
 179                         len++;
 180                 else
 181                         len += 4;
 182         }
 183         rp = result = (char *) palloc(len);
 184         vp = vlena->vl_dat;
 185         for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
 186         {
 187                 if (*vp == '\\')
 188                 {
 189                         *rp++ = '\\';
 190                         *rp++ = '\\';
 191                 }
 192                 else if (isprint((unsigned char) *vp))
 193                         *rp++ = *vp;
 194                 else
 195                 {
 196                         val = *vp;
 197                         rp[0] = '\\';
 198                         rp[3] = DIG(val & 07);
 199                         val >>= 3;
 200                         rp[2] = DIG(val & 07);
 201                         val >>= 3;
 202                         rp[1] = DIG(val & 03);
 203                         rp += 4;
 204                 }
 205         }
 206         *rp = '\0';
 207         PG_RETURN_CSTRING(result);
 208 }
 209
 210
 211 /*
 212  *              textin                  - converts "..." to internal representation
 213  */
 214 Datum
 215 textin(PG_FUNCTION_ARGS)
 216 {
 217         char       *inputText = PG_GETARG_CSTRING(0);
 218         text       *result;
 219         int                     len;
 220
 221         char       *ermsg;
 222
 223         len = strlen(inputText) + VARHDRSZ;
 224
 225         if ((ermsg = pg_verifymbstr(inputText, len - VARHDRSZ)))
 226                 elog(ERROR, "%s", ermsg);
 227
 228         result = (text *) palloc(len);
 229         VARATT_SIZEP(result) = len;
 230
 231         memcpy(VARDATA(result), inputText, len - VARHDRSZ);
 232
 233 #ifdef CYR_RECODE
 234         convertstr(VARDATA(result), len - VARHDRSZ, 0);
 235 #endif
 236
 237         PG_RETURN_TEXT_P(result);
 238 }
 239
 240 /*
 241  *              textout                 - converts internal representation to "..."
 242  */
 243 Datum
 244 textout(PG_FUNCTION_ARGS)
 245 {
 246         text       *t = PG_GETARG_TEXT_P(0);
 247         int                     len;
 248         char       *result;
 249
 250         len = VARSIZE(t) - VARHDRSZ;
 251         result = (char *) palloc(len + 1);
 252         memcpy(result, VARDATA(t), len);
 253         result[len] = '\0';
 254
 255 #ifdef CYR_RECODE
 256         convertstr(result, len, 1);
 257 #endif
 258
 259         PG_RETURN_CSTRING(result);
 260 }
 261
 262
 263 /*
 264  *              unknownin                       - converts "..." to internal representation
 265  */
 266 Datum
 267 unknownin(PG_FUNCTION_ARGS)
 268 {
 269         char       *inputStr = PG_GETARG_CSTRING(0);
 270         unknown    *result;
 271         int                     len;
 272
 273         len = strlen(inputStr) + VARHDRSZ;
 274
 275         result = (unknown *) palloc(len);
 276         VARATT_SIZEP(result) = len;
 277
 278         memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
 279
 280         PG_RETURN_UNKNOWN_P(result);
 281 }
 282
 283
 284 /*
 285  *              unknownout                      - converts internal representation to "..."
 286  */
 287 Datum
 288 unknownout(PG_FUNCTION_ARGS)
 289 {
 290         unknown    *t = PG_GETARG_UNKNOWN_P(0);
 291         int                     len;
 292         char       *result;
 293
 294         len = VARSIZE(t) - VARHDRSZ;
 295         result = (char *) palloc(len + 1);
 296         memcpy(result, VARDATA(t), len);
 297         result[len] = '\0';
 298
 299         PG_RETURN_CSTRING(result);
 300 }
 301
 302
 303 /* ========== PUBLIC ROUTINES ========== */
 304
 305 /*
 306  * textlen -
 307  *        returns the logical length of a text*
 308  *         (which is less than the VARSIZE of the text*)
 309  */
 310 Datum
 311 textlen(PG_FUNCTION_ARGS)
 312 {
 313         PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
 314 }
 315
 316 /*
 317  * text_length -
 318  *      Does the real work for textlen()
 319  *      This is broken out so it can be called directly by other string processing
 320  *      functions.
 321  */
 322 static int32
 323 text_length(Datum str)
 324 {
 325         /* fastpath when max encoding length is one */
 326         if (pg_database_encoding_max_length() == 1)
 327                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 328
 329         if (pg_database_encoding_max_length() > 1)
 330         {
 331                 text       *t = DatumGetTextP(str);
 332
 333                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
 334                                                                                          VARSIZE(t) - VARHDRSZ));
 335         }
 336
 337         /* should never get here */
 338         elog(ERROR, "Invalid backend encoding; encoding max length "
 339                  "is less than one.");
 340
 341         /* not reached: suppress compiler warning */
 342         return 0;
 343 }
 344
 345 /*
 346  * textoctetlen -
 347  *        returns the physical length of a text*
 348  *         (which is less than the VARSIZE of the text*)
 349  */
 350 Datum
 351 textoctetlen(PG_FUNCTION_ARGS)
 352 {
 353         PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
 354 }
 355
 356 /*
 357  * textcat -
 358  *        takes two text* and returns a text* that is the concatenation of
 359  *        the two.
 360  *
 361  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 362  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 363  * Allocate space for output in all cases.
 364  * XXX - thomas 1997-07-10
 365  */
 366 Datum
 367 textcat(PG_FUNCTION_ARGS)
 368 {
 369         text       *t1 = PG_GETARG_TEXT_P(0);
 370         text       *t2 = PG_GETARG_TEXT_P(1);
 371         int                     len1,
 372                                 len2,
 373                                 len;
 374         text       *result;
 375         char       *ptr;
 376
 377         len1 = (VARSIZE(t1) - VARHDRSZ);
 378         if (len1 < 0)
 379                 len1 = 0;
 380
 381         len2 = (VARSIZE(t2) - VARHDRSZ);
 382         if (len2 < 0)
 383                 len2 = 0;
 384
 385         len = len1 + len2 + VARHDRSZ;
 386         result = (text *) palloc(len);
 387
 388         /* Set size of result string... */
 389         VARATT_SIZEP(result) = len;
 390
 391         /* Fill data field of result string... */
 392         ptr = VARDATA(result);
 393         if (len1 > 0)
 394                 memcpy(ptr, VARDATA(t1), len1);
 395         if (len2 > 0)
 396                 memcpy(ptr + len1, VARDATA(t2), len2);
 397
 398         PG_RETURN_TEXT_P(result);
 399 }
 400
 401 /*
 402  * text_substr()
 403  * Return a substring starting at the specified position.
 404  * - thomas 1997-12-31
 405  *
 406  * Input:
 407  *      - string
 408  *      - starting position (is one-based)
 409  *      - string length
 410  *
 411  * If the starting position is zero or less, then return from the start of the string
 412  *      adjusting the length to be consistent with the "negative start" per SQL92.
 413  * If the length is less than zero, return the remaining string.
 414  *
 415  * Note that the arguments operate on octet length,
 416  *      so not aware of multibyte character sets.
 417  *
 418  * Added multibyte support.
 419  * - Tatsuo Ishii 1998-4-21
 420  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 421  * Formerly returned the entire string; now returns a portion.
 422  * - Thomas Lockhart 1998-12-10
 423  * Now uses faster TOAST-slicing interface
 424  * - John Gray 2002-02-22
 425  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 426  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 427  * error; if E < 1, return '', not entire string). Fixed MB related bug when
 428  * S > LC and < LC + 4 sometimes garbage characters are returned.
 429  * - Joe Conway 2002-08-10
 430  */
 431 Datum
 432 text_substr(PG_FUNCTION_ARGS)
 433 {
 434         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 435                                                                         PG_GETARG_INT32(1),
 436                                                                         PG_GETARG_INT32(2),
 437                                                                         false));
 438 }
 439
 440 /*
 441  * text_substr_no_len -
 442  *        Wrapper to avoid opr_sanity failure due to
 443  *        one function accepting a different number of args.
 444  */
 445 Datum
 446 text_substr_no_len(PG_FUNCTION_ARGS)
 447 {
 448         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 449                                                                         PG_GETARG_INT32(1),
 450                                                                         -1, true));
 451 }
 452
 453 /*
 454  * text_substring -
 455  *      Does the real work for text_substr() and text_substr_no_len()
 456  *      This is broken out so it can be called directly by other string processing
 457  *      functions.
 458  */
 459 static text *
 460 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 461 {
 462         int32           eml = pg_database_encoding_max_length();
 463         int32           S = start;              /* start position */
 464         int32           S1;                             /* adjusted start position */
 465         int32           L1;                             /* adjusted substring length */
 466
 467         /* life is easy if the encoding max length is 1 */
 468         if (eml == 1)
 469         {
 470                 S1 = Max(S, 1);
 471
 472                 if (length_not_specified)               /* special case - get length to
 473                                                                                  * end of string */
 474                         L1 = -1;
 475                 else
 476                 {
 477                         /* end position */
 478                         int                     E = S + length;
 479
 480                         /*
 481                          * A negative value for L is the only way for the end position
 482                          * to be before the start. SQL99 says to throw an error.
 483                          */
 484                         if (E < S)
 485                                 elog(ERROR, "negative substring length not allowed");
 486
 487                         /*
 488                          * A zero or negative value for the end position can happen if
 489                          * the start was negative or one. SQL99 says to return a
 490                          * zero-length string.
 491                          */
 492                         if (E < 1)
 493                                 return PG_STR_GET_TEXT("");
 494
 495                         L1 = E - S1;
 496                 }
 497
 498                 /*
 499                  * If the start position is past the end of the string, SQL99 says
 500                  * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
 501                  * do that for us. Convert to zero-based starting position
 502                  */
 503                 return DatumGetTextPSlice(str, S1 - 1, L1);
 504         }
 505         else if (eml > 1)
 506         {
 507                 /*
 508                  * When encoding max length is > 1, we can't get LC without
 509                  * detoasting, so we'll grab a conservatively large slice now and
 510                  * go back later to do the right thing
 511                  */
 512                 int32           slice_start;
 513                 int32           slice_size;
 514                 int32           slice_strlen;
 515                 text       *slice;
 516                 int32           E1;
 517                 int32           i;
 518                 char       *p;
 519                 char       *s;
 520                 text       *ret;
 521
 522                 /*
 523                  * if S is past the end of the string, the tuple toaster will
 524                  * return a zero-length string to us
 525                  */
 526                 S1 = Max(S, 1);
 527
 528                 /*
 529                  * We need to start at position zero because there is no way to
 530                  * know in advance which byte offset corresponds to the supplied
 531                  * start position.
 532                  */
 533                 slice_start = 0;
 534
 535                 if (length_not_specified)               /* special case - get length to
 536                                                                                  * end of string */
 537                         slice_size = L1 = -1;
 538                 else
 539                 {
 540                         int                     E = S + length;
 541
 542                         /*
 543                          * A negative value for L is the only way for the end position
 544                          * to be before the start. SQL99 says to throw an error.
 545                          */
 546                         if (E < S)
 547                                 elog(ERROR, "negative substring length not allowed");
 548
 549                         /*
 550                          * A zero or negative value for the end position can happen if
 551                          * the start was negative or one. SQL99 says to return a
 552                          * zero-length string.
 553                          */
 554                         if (E < 1)
 555                                 return PG_STR_GET_TEXT("");
 556
 557                         /*
 558                          * if E is past the end of the string, the tuple toaster will
 559                          * truncate the length for us
 560                          */
 561                         L1 = E - S1;
 562
 563                         /*
 564                          * Total slice size in bytes can't be any longer than the
 565                          * start position plus substring length times the encoding max
 566                          * length.
 567                          */
 568                         slice_size = (S1 + L1) * eml;
 569                 }
 570                 slice = DatumGetTextPSlice(str, slice_start, slice_size);
 571
 572                 /* see if we got back an empty string */
 573                 if ((VARSIZE(slice) - VARHDRSZ) == 0)
 574                         return PG_STR_GET_TEXT("");
 575
 576                 /* Now we can get the actual length of the slice in MB characters */
 577                 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
 578
 579                 /*
 580                  * Check that the start position wasn't > slice_strlen. If so,
 581                  * SQL99 says to return a zero-length string.
 582                  */
 583                 if (S1 > slice_strlen)
 584                         return PG_STR_GET_TEXT("");
 585
 586                 /*
 587                  * Adjust L1 and E1 now that we know the slice string length.
 588                  * Again remember that S1 is one based, and slice_start is zero
 589                  * based.
 590                  */
 591                 if (L1 > -1)
 592                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 593                 else
 594                         E1 = slice_start + 1 + slice_strlen;
 595
 596                 /*
 597                  * Find the start position in the slice; remember S1 is not zero
 598                  * based
 599                  */
 600                 p = VARDATA(slice);
 601                 for (i = 0; i < S1 - 1; i++)
 602                         p += pg_mblen(p);
 603
 604                 /* hang onto a pointer to our start position */
 605                 s = p;
 606
 607                 /*
 608                  * Count the actual bytes used by the substring of the requested
 609                  * length.
 610                  */
 611                 for (i = S1; i < E1; i++)
 612                         p += pg_mblen(p);
 613
 614                 ret = (text *) palloc(VARHDRSZ + (p - s));
 615                 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
 616                 memcpy(VARDATA(ret), s, (p - s));
 617
 618                 return ret;
 619         }
 620         else
 621                 elog(ERROR, "Invalid backend encoding; encoding max length "
 622                          "is less than one.");
 623
 624         /* not reached: suppress compiler warning */
 625         return PG_STR_GET_TEXT("");
 626 }
 627
 628 /*
 629  * textpos -
 630  *        Return the position of the specified substring.
 631  *        Implements the SQL92 POSITION() function.
 632  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 633  * - thomas 1997-07-27
 634  */
 635 Datum
 636 textpos(PG_FUNCTION_ARGS)
 637 {
 638         PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
 639 }
 640
 641 /*
 642  * text_position -
 643  *      Does the real work for textpos()
 644  *      This is broken out so it can be called directly by other string processing
 645  *      functions.
 646  */
 647 static int32
 648 text_position(Datum str, Datum search_str, int matchnum)
 649 {
 650         int                     eml = pg_database_encoding_max_length();
 651         text       *t1 = DatumGetTextP(str);
 652         text       *t2 = DatumGetTextP(search_str);
 653         int                     match = 0,
 654                                 pos = 0,
 655                                 p = 0,
 656                                 px,
 657                                 len1,
 658                                 len2;
 659
 660         if (matchnum == 0)
 661                 return 0;                               /* result for 0th match */
 662
 663         if (VARSIZE(t2) <= VARHDRSZ)
 664                 PG_RETURN_INT32(1);             /* result for empty pattern */
 665
 666         len1 = (VARSIZE(t1) - VARHDRSZ);
 667         len2 = (VARSIZE(t2) - VARHDRSZ);
 668
 669         /* no use in searching str past point where search_str will fit */
 670         px = (len1 - len2);
 671
 672         if (eml == 1)                           /* simple case - single byte encoding */
 673         {
 674                 char       *p1,
 675                                    *p2;
 676
 677                 p1 = VARDATA(t1);
 678                 p2 = VARDATA(t2);
 679
 680                 for (p = 0; p <= px; p++)
 681                 {
 682                         if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
 683                         {
 684                                 if (++match == matchnum)
 685                                 {
 686                                         pos = p + 1;
 687                                         break;
 688                                 }
 689                         }
 690                         p1++;
 691                 }
 692         }
 693         else if (eml > 1)                       /* not as simple - multibyte encoding */
 694         {
 695                 pg_wchar   *p1,
 696                                    *p2,
 697                                    *ps1,
 698                                    *ps2;
 699
 700                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 701                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
 702                 len1 = pg_wchar_strlen(p1);
 703                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 704                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
 705                 len2 = pg_wchar_strlen(p2);
 706
 707                 for (p = 0; p <= px; p++)
 708                 {
 709                         if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
 710                         {
 711                                 if (++match == matchnum)
 712                                 {
 713                                         pos = p + 1;
 714                                         break;
 715                                 }
 716                         }
 717                         p1++;
 718                 }
 719
 720                 pfree(ps1);
 721                 pfree(ps2);
 722         }
 723         else
 724                 elog(ERROR, "Invalid backend encoding; encoding max length "
 725                          "is less than one.");
 726
 727         PG_RETURN_INT32(pos);
 728 }
 729
 730 /* varstr_cmp()
 731  * Comparison function for text strings with given lengths.
 732  * Includes locale support, but must copy strings to temporary memory
 733  *      to allow null-termination for inputs to strcoll().
 734  * Returns -1, 0 or 1
 735  */
 736 int
 737 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 738 {
 739         int                     result;
 740
 741         /*
 742          * Unfortunately, there is no strncoll(), so in the non-C locale case
 743          * we have to do some memory copying.  This turns out to be
 744          * significantly slower, so we optimize the case where LC_COLLATE is
 745          * C.  We also try to optimize relatively-short strings by avoiding
 746          * palloc/pfree overhead.
 747          */
 748 #define STACKBUFLEN             1024
 749
 750         if (!lc_collate_is_c())
 751         {
 752                 char    a1buf[STACKBUFLEN];
 753                 char    a2buf[STACKBUFLEN];
 754                 char   *a1p,
 755                            *a2p;
 756
 757                 if (len1 >= STACKBUFLEN)
 758                         a1p = (char *) palloc(len1 + 1);
 759                 else
 760                         a1p = a1buf;
 761                 if (len2 >= STACKBUFLEN)
 762                         a2p = (char *) palloc(len2 + 1);
 763                 else
 764                         a2p = a2buf;
 765
 766                 memcpy(a1p, arg1, len1);
 767                 a1p[len1] = '\0';
 768                 memcpy(a2p, arg2, len2);
 769                 a2p[len2] = '\0';
 770
 771                 result = strcoll(a1p, a2p);
 772
 773                 if (len1 >= STACKBUFLEN)
 774                         pfree(a1p);
 775                 if (len2 >= STACKBUFLEN)
 776                         pfree(a2p);
 777         }
 778         else
 779         {
 780                 result = strncmp(arg1, arg2, Min(len1, len2));
 781                 if ((result == 0) && (len1 != len2))
 782                         result = (len1 < len2) ? -1 : 1;
 783         }
 784
 785         return result;
 786 }
 787
 788
 789 /* text_cmp()
 790  * Internal comparison function for text strings.
 791  * Returns -1, 0 or 1
 792  */
 793 static int
 794 text_cmp(text *arg1, text *arg2)
 795 {
 796         char       *a1p,
 797                            *a2p;
 798         int                     len1,
 799                                 len2;
 800
 801         a1p = VARDATA(arg1);
 802         a2p = VARDATA(arg2);
 803
 804         len1 = VARSIZE(arg1) - VARHDRSZ;
 805         len2 = VARSIZE(arg2) - VARHDRSZ;
 806
 807         return varstr_cmp(a1p, len1, a2p, len2);
 808 }
 809
 810 /*
 811  * Comparison functions for text strings.
 812  *
 813  * Note: btree indexes need these routines not to leak memory; therefore,
 814  * be careful to free working copies of toasted datums.  Most places don't
 815  * need to be so careful.
 816  */
 817
 818 Datum
 819 texteq(PG_FUNCTION_ARGS)
 820 {
 821         text       *arg1 = PG_GETARG_TEXT_P(0);
 822         text       *arg2 = PG_GETARG_TEXT_P(1);
 823         bool            result;
 824
 825         /* fast path for different-length inputs */
 826         if (VARSIZE(arg1) != VARSIZE(arg2))
 827                 result = false;
 828         else
 829                 result = (text_cmp(arg1, arg2) == 0);
 830
 831         PG_FREE_IF_COPY(arg1, 0);
 832         PG_FREE_IF_COPY(arg2, 1);
 833
 834         PG_RETURN_BOOL(result);
 835 }
 836
 837 Datum
 838 textne(PG_FUNCTION_ARGS)
 839 {
 840         text       *arg1 = PG_GETARG_TEXT_P(0);
 841         text       *arg2 = PG_GETARG_TEXT_P(1);
 842         bool            result;
 843
 844         /* fast path for different-length inputs */
 845         if (VARSIZE(arg1) != VARSIZE(arg2))
 846                 result = true;
 847         else
 848                 result = (text_cmp(arg1, arg2) != 0);
 849
 850         PG_FREE_IF_COPY(arg1, 0);
 851         PG_FREE_IF_COPY(arg2, 1);
 852
 853         PG_RETURN_BOOL(result);
 854 }
 855
 856 Datum
 857 text_lt(PG_FUNCTION_ARGS)
 858 {
 859         text       *arg1 = PG_GETARG_TEXT_P(0);
 860         text       *arg2 = PG_GETARG_TEXT_P(1);
 861         bool            result;
 862
 863         result = (text_cmp(arg1, arg2) < 0);
 864
 865         PG_FREE_IF_COPY(arg1, 0);
 866         PG_FREE_IF_COPY(arg2, 1);
 867
 868         PG_RETURN_BOOL(result);
 869 }
 870
 871 Datum
 872 text_le(PG_FUNCTION_ARGS)
 873 {
 874         text       *arg1 = PG_GETARG_TEXT_P(0);
 875         text       *arg2 = PG_GETARG_TEXT_P(1);
 876         bool            result;
 877
 878         result = (text_cmp(arg1, arg2) <= 0);
 879
 880         PG_FREE_IF_COPY(arg1, 0);
 881         PG_FREE_IF_COPY(arg2, 1);
 882
 883         PG_RETURN_BOOL(result);
 884 }
 885
 886 Datum
 887 text_gt(PG_FUNCTION_ARGS)
 888 {
 889         text       *arg1 = PG_GETARG_TEXT_P(0);
 890         text       *arg2 = PG_GETARG_TEXT_P(1);
 891         bool            result;
 892
 893         result = (text_cmp(arg1, arg2) > 0);
 894
 895         PG_FREE_IF_COPY(arg1, 0);
 896         PG_FREE_IF_COPY(arg2, 1);
 897
 898         PG_RETURN_BOOL(result);
 899 }
 900
 901 Datum
 902 text_ge(PG_FUNCTION_ARGS)
 903 {
 904         text       *arg1 = PG_GETARG_TEXT_P(0);
 905         text       *arg2 = PG_GETARG_TEXT_P(1);
 906         bool            result;
 907
 908         result = (text_cmp(arg1, arg2) >= 0);
 909
 910         PG_FREE_IF_COPY(arg1, 0);
 911         PG_FREE_IF_COPY(arg2, 1);
 912
 913         PG_RETURN_BOOL(result);
 914 }
 915
 916 Datum
 917 bttextcmp(PG_FUNCTION_ARGS)
 918 {
 919         text       *arg1 = PG_GETARG_TEXT_P(0);
 920         text       *arg2 = PG_GETARG_TEXT_P(1);
 921         int32           result;
 922
 923         result = text_cmp(arg1, arg2);
 924
 925         PG_FREE_IF_COPY(arg1, 0);
 926         PG_FREE_IF_COPY(arg2, 1);
 927
 928         PG_RETURN_INT32(result);
 929 }
 930
 931
 932 Datum
 933 text_larger(PG_FUNCTION_ARGS)
 934 {
 935         text       *arg1 = PG_GETARG_TEXT_P(0);
 936         text       *arg2 = PG_GETARG_TEXT_P(1);
 937         text       *result;
 938
 939         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
 940
 941         PG_RETURN_TEXT_P(result);
 942 }
 943
 944 Datum
 945 text_smaller(PG_FUNCTION_ARGS)
 946 {
 947         text       *arg1 = PG_GETARG_TEXT_P(0);
 948         text       *arg2 = PG_GETARG_TEXT_P(1);
 949         text       *result;
 950
 951         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
 952
 953         PG_RETURN_TEXT_P(result);
 954 }
 955
 956 /*-------------------------------------------------------------
 957  * byteaoctetlen
 958  *
 959  * get the number of bytes contained in an instance of type 'bytea'
 960  *-------------------------------------------------------------
 961  */
 962 Datum
 963 byteaoctetlen(PG_FUNCTION_ARGS)
 964 {
 965         PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
 966 }
 967
 968 /*
 969  * byteacat -
 970  *        takes two bytea* and returns a bytea* that is the concatenation of
 971  *        the two.
 972  *
 973  * Cloned from textcat and modified as required.
 974  */
 975 Datum
 976 byteacat(PG_FUNCTION_ARGS)
 977 {
 978         bytea      *t1 = PG_GETARG_BYTEA_P(0);
 979         bytea      *t2 = PG_GETARG_BYTEA_P(1);
 980         int                     len1,
 981                                 len2,
 982                                 len;
 983         bytea      *result;
 984         char       *ptr;
 985
 986         len1 = (VARSIZE(t1) - VARHDRSZ);
 987         if (len1 < 0)
 988                 len1 = 0;
 989
 990         len2 = (VARSIZE(t2) - VARHDRSZ);
 991         if (len2 < 0)
 992                 len2 = 0;
 993
 994         len = len1 + len2 + VARHDRSZ;
 995         result = (bytea *) palloc(len);
 996
 997         /* Set size of result string... */
 998         VARATT_SIZEP(result) = len;
 999
1000         /* Fill data field of result string... */
1001         ptr = VARDATA(result);
1002         if (len1 > 0)
1003                 memcpy(ptr, VARDATA(t1), len1);
1004         if (len2 > 0)
1005                 memcpy(ptr + len1, VARDATA(t2), len2);
1006
1007         PG_RETURN_BYTEA_P(result);
1008 }
1009
1010 #define PG_STR_GET_BYTEA(str_) \
1011         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1012 /*
1013  * bytea_substr()
1014  * Return a substring starting at the specified position.
1015  * Cloned from text_substr and modified as required.
1016  *
1017  * Input:
1018  *      - string
1019  *      - starting position (is one-based)
1020  *      - string length (optional)
1021  *
1022  * If the starting position is zero or less, then return from the start of the string
1023  * adjusting the length to be consistent with the "negative start" per SQL92.
1024  * If the length is less than zero, an ERROR is thrown. If no third argument
1025  * (length) is provided, the length to the end of the string is assumed.
1026  */
1027 Datum
1028 bytea_substr(PG_FUNCTION_ARGS)
1029 {
1030         int                     S = PG_GETARG_INT32(1); /* start position */
1031         int                     S1;                             /* adjusted start position */
1032         int                     L1;                             /* adjusted substring length */
1033
1034         S1 = Max(S, 1);
1035
1036         if (fcinfo->nargs == 2)
1037         {
1038                 /*
1039                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1040                  * everything to the end of the string if we pass it a negative
1041                  * value for length.
1042                  */
1043                 L1 = -1;
1044         }
1045         else
1046         {
1047                 /* end position */
1048                 int                     E = S + PG_GETARG_INT32(2);
1049
1050                 /*
1051                  * A negative value for L is the only way for the end position to
1052                  * be before the start. SQL99 says to throw an error.
1053                  */
1054                 if (E < S)
1055                         elog(ERROR, "negative substring length not allowed");
1056
1057                 /*
1058                  * A zero or negative value for the end position can happen if the
1059                  * start was negative or one. SQL99 says to return a zero-length
1060                  * string.
1061                  */
1062                 if (E < 1)
1063                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1064
1065                 L1 = E - S1;
1066         }
1067
1068         /*
1069          * If the start position is past the end of the string, SQL99 says to
1070          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1071          * that for us. Convert to zero-based starting position
1072          */
1073         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1074 }
1075
1076 /*
1077  * bytea_substr_no_len -
1078  *        Wrapper to avoid opr_sanity failure due to
1079  *        one function accepting a different number of args.
1080  */
1081 Datum
1082 bytea_substr_no_len(PG_FUNCTION_ARGS)
1083 {
1084         return bytea_substr(fcinfo);
1085 }
1086
1087 /*
1088  * byteapos -
1089  *        Return the position of the specified substring.
1090  *        Implements the SQL92 POSITION() function.
1091  * Cloned from textpos and modified as required.
1092  */
1093 Datum
1094 byteapos(PG_FUNCTION_ARGS)
1095 {
1096         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1097         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1098         int                     pos;
1099         int                     px,
1100                                 p;
1101         int                     len1,
1102                                 len2;
1103         char       *p1,
1104                            *p2;
1105
1106         if (VARSIZE(t2) <= VARHDRSZ)
1107                 PG_RETURN_INT32(1);             /* result for empty pattern */
1108
1109         len1 = (VARSIZE(t1) - VARHDRSZ);
1110         len2 = (VARSIZE(t2) - VARHDRSZ);
1111
1112         p1 = VARDATA(t1);
1113         p2 = VARDATA(t2);
1114
1115         pos = 0;
1116         px = (len1 - len2);
1117         for (p = 0; p <= px; p++)
1118         {
1119                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1120                 {
1121                         pos = p + 1;
1122                         break;
1123                 };
1124                 p1++;
1125         };
1126
1127         PG_RETURN_INT32(pos);
1128 }
1129
1130 /*-------------------------------------------------------------
1131  * byteaGetByte
1132  *
1133  * this routine treats "bytea" as an array of bytes.
1134  * It returns the Nth byte (a number between 0 and 255).
1135  *-------------------------------------------------------------
1136  */
1137 Datum
1138 byteaGetByte(PG_FUNCTION_ARGS)
1139 {
1140         bytea      *v = PG_GETARG_BYTEA_P(0);
1141         int32           n = PG_GETARG_INT32(1);
1142         int                     len;
1143         int                     byte;
1144
1145         len = VARSIZE(v) - VARHDRSZ;
1146
1147         if (n < 0 || n >= len)
1148                 elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
1149                          n, len - 1);
1150
1151         byte = ((unsigned char *) VARDATA(v))[n];
1152
1153         PG_RETURN_INT32(byte);
1154 }
1155
1156 /*-------------------------------------------------------------
1157  * byteaGetBit
1158  *
1159  * This routine treats a "bytea" type like an array of bits.
1160  * It returns the value of the Nth bit (0 or 1).
1161  *
1162  *-------------------------------------------------------------
1163  */
1164 Datum
1165 byteaGetBit(PG_FUNCTION_ARGS)
1166 {
1167         bytea      *v = PG_GETARG_BYTEA_P(0);
1168         int32           n = PG_GETARG_INT32(1);
1169         int                     byteNo,
1170                                 bitNo;
1171         int                     len;
1172         int                     byte;
1173
1174         len = VARSIZE(v) - VARHDRSZ;
1175
1176         if (n < 0 || n >= len * 8)
1177                 elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
1178                          n, len * 8 - 1);
1179
1180         byteNo = n / 8;
1181         bitNo = n % 8;
1182
1183         byte = ((unsigned char *) VARDATA(v))[byteNo];
1184
1185         if (byte & (1 << bitNo))
1186                 PG_RETURN_INT32(1);
1187         else
1188                 PG_RETURN_INT32(0);
1189 }
1190
1191 /*-------------------------------------------------------------
1192  * byteaSetByte
1193  *
1194  * Given an instance of type 'bytea' creates a new one with
1195  * the Nth byte set to the given value.
1196  *
1197  *-------------------------------------------------------------
1198  */
1199 Datum
1200 byteaSetByte(PG_FUNCTION_ARGS)
1201 {
1202         bytea      *v = PG_GETARG_BYTEA_P(0);
1203         int32           n = PG_GETARG_INT32(1);
1204         int32           newByte = PG_GETARG_INT32(2);
1205         int                     len;
1206         bytea      *res;
1207
1208         len = VARSIZE(v) - VARHDRSZ;
1209
1210         if (n < 0 || n >= len)
1211                 elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
1212                          n, len - 1);
1213
1214         /*
1215          * Make a copy of the original varlena.
1216          */
1217         res = (bytea *) palloc(VARSIZE(v));
1218         memcpy((char *) res, (char *) v, VARSIZE(v));
1219
1220         /*
1221          * Now set the byte.
1222          */
1223         ((unsigned char *) VARDATA(res))[n] = newByte;
1224
1225         PG_RETURN_BYTEA_P(res);
1226 }
1227
1228 /*-------------------------------------------------------------
1229  * byteaSetBit
1230  *
1231  * Given an instance of type 'bytea' creates a new one with
1232  * the Nth bit set to the given value.
1233  *
1234  *-------------------------------------------------------------
1235  */
1236 Datum
1237 byteaSetBit(PG_FUNCTION_ARGS)
1238 {
1239         bytea      *v = PG_GETARG_BYTEA_P(0);
1240         int32           n = PG_GETARG_INT32(1);
1241         int32           newBit = PG_GETARG_INT32(2);
1242         bytea      *res;
1243         int                     len;
1244         int                     oldByte,
1245                                 newByte;
1246         int                     byteNo,
1247                                 bitNo;
1248
1249         len = VARSIZE(v) - VARHDRSZ;
1250
1251         if (n < 0 || n >= len * 8)
1252                 elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
1253                          n, len * 8 - 1);
1254
1255         byteNo = n / 8;
1256         bitNo = n % 8;
1257
1258         /*
1259          * sanity check!
1260          */
1261         if (newBit != 0 && newBit != 1)
1262                 elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
1263
1264         /*
1265          * Make a copy of the original varlena.
1266          */
1267         res = (bytea *) palloc(VARSIZE(v));
1268         memcpy((char *) res, (char *) v, VARSIZE(v));
1269
1270         /*
1271          * Update the byte.
1272          */
1273         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1274
1275         if (newBit == 0)
1276                 newByte = oldByte & (~(1 << bitNo));
1277         else
1278                 newByte = oldByte | (1 << bitNo);
1279
1280         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1281
1282         PG_RETURN_BYTEA_P(res);
1283 }
1284
1285
1286 /* text_name()
1287  * Converts a text type to a Name type.
1288  */
1289 Datum
1290 text_name(PG_FUNCTION_ARGS)
1291 {
1292         text       *s = PG_GETARG_TEXT_P(0);
1293         Name            result;
1294         int                     len;
1295
1296         len = VARSIZE(s) - VARHDRSZ;
1297
1298         /* Truncate oversize input */
1299         if (len >= NAMEDATALEN)
1300                 len = NAMEDATALEN - 1;
1301
1302 #ifdef STRINGDEBUG
1303         printf("text- convert string length %d (%d) ->%d\n",
1304                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1305 #endif
1306
1307         result = (Name) palloc(NAMEDATALEN);
1308         memcpy(NameStr(*result), VARDATA(s), len);
1309
1310         /* now null pad to full length... */
1311         while (len < NAMEDATALEN)
1312         {
1313                 *(NameStr(*result) + len) = '\0';
1314                 len++;
1315         }
1316
1317         PG_RETURN_NAME(result);
1318 }
1319
1320 /* name_text()
1321  * Converts a Name type to a text type.
1322  */
1323 Datum
1324 name_text(PG_FUNCTION_ARGS)
1325 {
1326         Name            s = PG_GETARG_NAME(0);
1327         text       *result;
1328         int                     len;
1329
1330         len = strlen(NameStr(*s));
1331
1332 #ifdef STRINGDEBUG
1333         printf("text- convert string length %d (%d) ->%d\n",
1334                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1335 #endif
1336
1337         result = palloc(VARHDRSZ + len);
1338         VARATT_SIZEP(result) = VARHDRSZ + len;
1339         memcpy(VARDATA(result), NameStr(*s), len);
1340
1341         PG_RETURN_TEXT_P(result);
1342 }
1343
1344
1345 /*
1346  * textToQualifiedNameList - convert a text object to list of names
1347  *
1348  * This implements the input parsing needed by nextval() and other
1349  * functions that take a text parameter representing a qualified name.
1350  * We split the name at dots, downcase if not double-quoted, and
1351  * truncate names if they're too long.
1352  */
1353 List *
1354 textToQualifiedNameList(text *textval, const char *caller)
1355 {
1356         char       *rawname;
1357         List       *result = NIL;
1358         List       *namelist;
1359         List       *l;
1360
1361         /* Convert to C string (handles possible detoasting). */
1362         /* Note we rely on being able to modify rawname below. */
1363         rawname = DatumGetCString(DirectFunctionCall1(textout,
1364                                                                                           PointerGetDatum(textval)));
1365
1366         if (!SplitIdentifierString(rawname, '.', &namelist))
1367                 elog(ERROR, "%s: invalid name syntax", caller);
1368
1369         if (namelist == NIL)
1370                 elog(ERROR, "%s: invalid name syntax", caller);
1371
1372         foreach(l, namelist)
1373         {
1374                 char       *curname = (char *) lfirst(l);
1375
1376                 result = lappend(result, makeString(pstrdup(curname)));
1377         }
1378
1379         pfree(rawname);
1380         freeList(namelist);
1381
1382         return result;
1383 }
1384
1385 /*
1386  * SplitIdentifierString --- parse a string containing identifiers
1387  *
1388  * This is the guts of textToQualifiedNameList, and is exported for use in
1389  * other situations such as parsing GUC variables.      In the GUC case, it's
1390  * important to avoid memory leaks, so the API is designed to minimize the
1391  * amount of stuff that needs to be allocated and freed.
1392  *
1393  * Inputs:
1394  *      rawstring: the input string; must be overwritable!      On return, it's
1395  *                         been modified to contain the separated identifiers.
1396  *      separator: the separator punctuation expected between identifiers
1397  *                         (typically '.' or ',').      Whitespace may also appear around
1398  *                         identifiers.
1399  * Outputs:
1400  *      namelist: filled with a palloc'd list of pointers to identifiers within
1401  *                        rawstring.  Caller should freeList() this even on error return.
1402  *
1403  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1404  *
1405  * Note that an empty string is considered okay here, though not in
1406  * textToQualifiedNameList.
1407  */
1408 bool
1409 SplitIdentifierString(char *rawstring, char separator,
1410                                           List **namelist)
1411 {
1412         char       *nextp = rawstring;
1413         bool            done = false;
1414
1415         *namelist = NIL;
1416
1417         while (isspace((unsigned char) *nextp))
1418                 nextp++;                                /* skip leading whitespace */
1419
1420         if (*nextp == '\0')
1421                 return true;                    /* allow empty string */
1422
1423         /* At the top of the loop, we are at start of a new identifier. */
1424         do
1425         {
1426                 char       *curname;
1427                 char       *endp;
1428                 int                     curlen;
1429
1430                 if (*nextp == '\"')
1431                 {
1432                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1433                         curname = nextp + 1;
1434                         for (;;)
1435                         {
1436                                 endp = strchr(nextp + 1, '\"');
1437                                 if (endp == NULL)
1438                                         return false;           /* mismatched quotes */
1439                                 if (endp[1] != '\"')
1440                                         break;          /* found end of quoted name */
1441                                 /* Collapse adjacent quotes into one quote, and look again */
1442                                 memmove(endp, endp + 1, strlen(endp));
1443                                 nextp = endp;
1444                         }
1445                         /* endp now points at the terminating quote */
1446                         nextp = endp + 1;
1447                 }
1448                 else
1449                 {
1450                         /* Unquoted name --- extends to separator or whitespace */
1451                         curname = nextp;
1452                         while (*nextp && *nextp != separator &&
1453                                    !isspace((unsigned char) *nextp))
1454                         {
1455                                 /*
1456                                  * It's important that this match the identifier
1457                                  * downcasing code used by backend/parser/scan.l.
1458                                  */
1459                                 if (isupper((unsigned char) *nextp))
1460                                         *nextp = tolower((unsigned char) *nextp);
1461                                 nextp++;
1462                         }
1463                         endp = nextp;
1464                         if (curname == nextp)
1465                                 return false;   /* empty unquoted name not allowed */
1466                 }
1467
1468                 while (isspace((unsigned char) *nextp))
1469                         nextp++;                        /* skip trailing whitespace */
1470
1471                 if (*nextp == separator)
1472                 {
1473                         nextp++;
1474                         while (isspace((unsigned char) *nextp))
1475                                 nextp++;                /* skip leading whitespace for next */
1476                         /* we expect another name, so done remains false */
1477                 }
1478                 else if (*nextp == '\0')
1479                         done = true;
1480                 else
1481                         return false;           /* invalid syntax */
1482
1483                 /* Now safe to overwrite separator with a null */
1484                 *endp = '\0';
1485
1486                 /* Truncate name if it's overlength; again, should match scan.l */
1487                 curlen = strlen(curname);
1488                 if (curlen >= NAMEDATALEN)
1489                 {
1490                         curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
1491                         curname[curlen] = '\0';
1492                 }
1493
1494                 /*
1495                  * Finished isolating current name --- add it to list
1496                  */
1497                 *namelist = lappend(*namelist, curname);
1498
1499                 /* Loop back if we didn't reach end of string */
1500         } while (!done);
1501
1502         return true;
1503 }
1504
1505
1506 /*****************************************************************************
1507  *      Comparison Functions used for bytea
1508  *
1509  * Note: btree indexes need these routines not to leak memory; therefore,
1510  * be careful to free working copies of toasted datums.  Most places don't
1511  * need to be so careful.
1512  *****************************************************************************/
1513
1514 Datum
1515 byteaeq(PG_FUNCTION_ARGS)
1516 {
1517         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1518         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1519         int                     len1,
1520                                 len2;
1521         bool            result;
1522
1523         len1 = VARSIZE(arg1) - VARHDRSZ;
1524         len2 = VARSIZE(arg2) - VARHDRSZ;
1525
1526         /* fast path for different-length inputs */
1527         if (len1 != len2)
1528                 result = false;
1529         else
1530                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1531
1532         PG_FREE_IF_COPY(arg1, 0);
1533         PG_FREE_IF_COPY(arg2, 1);
1534
1535         PG_RETURN_BOOL(result);
1536 }
1537
1538 Datum
1539 byteane(PG_FUNCTION_ARGS)
1540 {
1541         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1542         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1543         int                     len1,
1544                                 len2;
1545         bool            result;
1546
1547         len1 = VARSIZE(arg1) - VARHDRSZ;
1548         len2 = VARSIZE(arg2) - VARHDRSZ;
1549
1550         /* fast path for different-length inputs */
1551         if (len1 != len2)
1552                 result = true;
1553         else
1554                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1555
1556         PG_FREE_IF_COPY(arg1, 0);
1557         PG_FREE_IF_COPY(arg2, 1);
1558
1559         PG_RETURN_BOOL(result);
1560 }
1561
1562 Datum
1563 bytealt(PG_FUNCTION_ARGS)
1564 {
1565         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1566         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1567         int                     len1,
1568                                 len2;
1569         int                     cmp;
1570
1571         len1 = VARSIZE(arg1) - VARHDRSZ;
1572         len2 = VARSIZE(arg2) - VARHDRSZ;
1573
1574         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1575
1576         PG_FREE_IF_COPY(arg1, 0);
1577         PG_FREE_IF_COPY(arg2, 1);
1578
1579         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1580 }
1581
1582 Datum
1583 byteale(PG_FUNCTION_ARGS)
1584 {
1585         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1586         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1587         int                     len1,
1588                                 len2;
1589         int                     cmp;
1590
1591         len1 = VARSIZE(arg1) - VARHDRSZ;
1592         len2 = VARSIZE(arg2) - VARHDRSZ;
1593
1594         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1595
1596         PG_FREE_IF_COPY(arg1, 0);
1597         PG_FREE_IF_COPY(arg2, 1);
1598
1599         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1600 }
1601
1602 Datum
1603 byteagt(PG_FUNCTION_ARGS)
1604 {
1605         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1606         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1607         int                     len1,
1608                                 len2;
1609         int                     cmp;
1610
1611         len1 = VARSIZE(arg1) - VARHDRSZ;
1612         len2 = VARSIZE(arg2) - VARHDRSZ;
1613
1614         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1615
1616         PG_FREE_IF_COPY(arg1, 0);
1617         PG_FREE_IF_COPY(arg2, 1);
1618
1619         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1620 }
1621
1622 Datum
1623 byteage(PG_FUNCTION_ARGS)
1624 {
1625         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1626         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1627         int                     len1,
1628                                 len2;
1629         int                     cmp;
1630
1631         len1 = VARSIZE(arg1) - VARHDRSZ;
1632         len2 = VARSIZE(arg2) - VARHDRSZ;
1633
1634         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1635
1636         PG_FREE_IF_COPY(arg1, 0);
1637         PG_FREE_IF_COPY(arg2, 1);
1638
1639         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1640 }
1641
1642 Datum
1643 byteacmp(PG_FUNCTION_ARGS)
1644 {
1645         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1646         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1647         int                     len1,
1648                                 len2;
1649         int                     cmp;
1650
1651         len1 = VARSIZE(arg1) - VARHDRSZ;
1652         len2 = VARSIZE(arg2) - VARHDRSZ;
1653
1654         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1655         if ((cmp == 0) && (len1 != len2))
1656                 cmp = (len1 < len2) ? -1 : 1;
1657
1658         PG_FREE_IF_COPY(arg1, 0);
1659         PG_FREE_IF_COPY(arg2, 1);
1660
1661         PG_RETURN_INT32(cmp);
1662 }
1663
1664 /*
1665  * replace_text
1666  * replace all occurences of 'old_sub_str' in 'orig_str'
1667  * with 'new_sub_str' to form 'new_str'
1668  *
1669  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1670  * otherwise returns 'new_str'
1671  */
1672 Datum
1673 replace_text(PG_FUNCTION_ARGS)
1674 {
1675         text       *left_text;
1676         text       *right_text;
1677         text       *buf_text;
1678         text       *ret_text;
1679         int                     curr_posn;
1680         text       *src_text = PG_GETARG_TEXT_P(0);
1681         int                     src_text_len = TEXTLEN(src_text);
1682         text       *from_sub_text = PG_GETARG_TEXT_P(1);
1683         int                     from_sub_text_len = TEXTLEN(from_sub_text);
1684         text       *to_sub_text = PG_GETARG_TEXT_P(2);
1685         char       *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1686         StringInfo      str = makeStringInfo();
1687
1688         if (src_text_len == 0 || from_sub_text_len == 0)
1689                 PG_RETURN_TEXT_P(src_text);
1690
1691         buf_text = TEXTDUP(src_text);
1692         curr_posn = TEXTPOS(buf_text, from_sub_text);
1693
1694         while (curr_posn > 0)
1695         {
1696                 left_text = LEFT(buf_text, from_sub_text);
1697                 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1698
1699                 appendStringInfo(str, PG_TEXT_GET_STR(left_text));
1700                 appendStringInfo(str, to_sub_str);
1701
1702                 pfree(buf_text);
1703                 pfree(left_text);
1704                 buf_text = right_text;
1705                 curr_posn = TEXTPOS(buf_text, from_sub_text);
1706         }
1707
1708         appendStringInfo(str, PG_TEXT_GET_STR(buf_text));
1709         pfree(buf_text);
1710
1711         ret_text = PG_STR_GET_TEXT(str->data);
1712         pfree(str->data);
1713         pfree(str);
1714
1715         PG_RETURN_TEXT_P(ret_text);
1716 }
1717
1718 /*
1719  * split_text
1720  * parse input string
1721  * return ord item (1 based)
1722  * based on provided field separator
1723  */
1724 Datum
1725 split_text(PG_FUNCTION_ARGS)
1726 {
1727         text       *inputstring = PG_GETARG_TEXT_P(0);
1728         int                     inputstring_len = TEXTLEN(inputstring);
1729         text       *fldsep = PG_GETARG_TEXT_P(1);
1730         int                     fldsep_len = TEXTLEN(fldsep);
1731         int                     fldnum = PG_GETARG_INT32(2);
1732         int                     start_posn = 0;
1733         int                     end_posn = 0;
1734         text       *result_text;
1735
1736         /* return empty string for empty input string */
1737         if (inputstring_len < 1)
1738                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1739
1740         /* empty field separator */
1741         if (fldsep_len < 1)
1742         {
1743                 if (fldnum == 1)                /* first field - just return the input
1744                                                                  * string */
1745                         PG_RETURN_TEXT_P(inputstring);
1746                 else
1747 /* otherwise return an empty string */
1748                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1749         }
1750
1751         /* field number is 1 based */
1752         if (fldnum < 1)
1753                 elog(ERROR, "field position must be > 0");
1754
1755         start_posn = text_position(PointerGetDatum(inputstring),
1756                                                            PointerGetDatum(fldsep),
1757                                                            fldnum - 1);
1758         end_posn = text_position(PointerGetDatum(inputstring),
1759                                                          PointerGetDatum(fldsep),
1760                                                          fldnum);
1761
1762         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
1763         {
1764                 if (fldnum == 1)                /* first field - just return the input
1765                                                                  * string */
1766                         PG_RETURN_TEXT_P(inputstring);
1767                 else
1768 /* otherwise return an empty string */
1769                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1770         }
1771         else if ((start_posn != 0) && (end_posn == 0))
1772         {
1773                 /* last field requested */
1774                 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
1775                 PG_RETURN_TEXT_P(result_text);
1776         }
1777         else if ((start_posn == 0) && (end_posn != 0))
1778         {
1779                 /* first field requested */
1780                 result_text = LEFT(inputstring, fldsep);
1781                 PG_RETURN_TEXT_P(result_text);
1782         }
1783         else
1784         {
1785                 /* prior to last field requested */
1786                 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
1787                 PG_RETURN_TEXT_P(result_text);
1788         }
1789 }
1790
1791 #define HEXBASE 16
1792 /*
1793  * Convert a int32 to a string containing a base 16 (hex) representation of
1794  * the number.
1795  */
1796 Datum
1797 to_hex32(PG_FUNCTION_ARGS)
1798 {
1799         static char digits[] = "0123456789abcdef";
1800         char            buf[32];                /* bigger than needed, but reasonable */
1801         char       *ptr;
1802         text       *result_text;
1803         int32           value = PG_GETARG_INT32(0);
1804
1805         ptr = buf + sizeof(buf) - 1;
1806         *ptr = '\0';
1807
1808         do
1809         {
1810                 *--ptr = digits[value % HEXBASE];
1811                 value /= HEXBASE;
1812         } while (ptr > buf && value);
1813
1814         result_text = PG_STR_GET_TEXT(ptr);
1815         PG_RETURN_TEXT_P(result_text);
1816 }
1817
1818 /*
1819  * Convert a int64 to a string containing a base 16 (hex) representation of
1820  * the number.
1821  */
1822 Datum
1823 to_hex64(PG_FUNCTION_ARGS)
1824 {
1825         static char digits[] = "0123456789abcdef";
1826         char            buf[32];                /* bigger than needed, but reasonable */
1827         char       *ptr;
1828         text       *result_text;
1829         int64           value = PG_GETARG_INT64(0);
1830
1831         ptr = buf + sizeof(buf) - 1;
1832         *ptr = '\0';
1833
1834         do
1835         {
1836                 *--ptr = digits[value % HEXBASE];
1837                 value /= HEXBASE;
1838         } while (ptr > buf && value);
1839
1840         result_text = PG_STR_GET_TEXT(ptr);
1841         PG_RETURN_TEXT_P(result_text);
1842 }
1843
1844 /*
1845  * Create an md5 hash of a text string and return it as hex
1846  *
1847  * md5 produces a 16 byte (128 bit) hash; double it for hex
1848  */
1849 #define MD5_HASH_LEN  32
1850
1851 Datum
1852 md5_text(PG_FUNCTION_ARGS)
1853 {
1854         char       *buff = PG_TEXT_GET_STR(PG_GETARG_TEXT_P(0));
1855         size_t          len = strlen(buff);
1856         char       *hexsum;
1857         text       *result_text;
1858
1859         /* leave room for the terminating '\0' */
1860         hexsum = (char *) palloc(MD5_HASH_LEN + 1);
1861
1862         /* get the hash result */
1863         md5_hash((void *) buff, len, hexsum);
1864
1865         /* convert to text and return it */
1866         result_text = PG_STR_GET_TEXT(hexsum);
1867         PG_RETURN_TEXT_P(result_text);
1868 }