granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.89 2002/08/28 20:46:24 momjian Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "mb/pg_wchar.h"
  20 #include "miscadmin.h"
  21 #include "access/tuptoaster.h"
  22 #include "lib/stringinfo.h"
  23 #include "utils/builtins.h"
  24 #include "utils/pg_locale.h"
  25
  26
  27 typedef struct varlena unknown;
  28
  29 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
  30 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
  31 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
  32 #define PG_TEXTARG_GET_STR(arg_) \
  33     DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
  34 #define PG_TEXT_GET_STR(textp_) \
  35     DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
  36 #define PG_STR_GET_TEXT(str_) \
  37     DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
  38 #define TEXTLEN(textp) \
  39         text_length(PointerGetDatum(textp))
  40 #define TEXTPOS(buf_text, from_sub_text) \
  41         text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
  42 #define TEXTDUP(textp) \
  43         DatumGetTextPCopy(PointerGetDatum(textp))
  44 #define LEFT(buf_text, from_sub_text) \
  45         text_substring(PointerGetDatum(buf_text), \
  46                                         1, \
  47                                         TEXTPOS(buf_text, from_sub_text) - 1, false)
  48 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
  49         text_substring(PointerGetDatum(buf_text), \
  50                                         TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
  51                                         -1, true)
  52
  53 static int      text_cmp(text *arg1, text *arg2);
  54 static int32 text_length(Datum str);
  55 static int32 text_position(Datum str, Datum search_str, int matchnum);
  56 static text *text_substring(Datum str,
  57                                                         int32 start,
  58                                                         int32 length,
  59                                                         bool length_not_specified);
  60
  61
  62 /*****************************************************************************
  63  *       USER I/O ROUTINES                                                                                                               *
  64  *****************************************************************************/
  65
  66
  67 #define VAL(CH)                 ((CH) - '0')
  68 #define DIG(VAL)                ((VAL) + '0')
  69
  70 /*
  71  *              byteain                 - converts from printable representation of byte array
  72  *
  73  *              Non-printable characters must be passed as '\nnn' (octal) and are
  74  *              converted to internal form.  '\' must be passed as '\\'.
  75  *              elog(ERROR, ...) if bad form.
  76  *
  77  *              BUGS:
  78  *                              The input is scaned twice.
  79  *                              The error checking of input is minimal.
  80  */
  81 Datum
  82 byteain(PG_FUNCTION_ARGS)
  83 {
  84         char       *inputText = PG_GETARG_CSTRING(0);
  85         char       *tp;
  86         char       *rp;
  87         int                     byte;
  88         bytea      *result;
  89
  90         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
  91         {
  92                 if (tp[0] != '\\')
  93                         tp++;
  94                 else if ((tp[0] == '\\') &&
  95                                  (tp[1] >= '0' && tp[1] <= '3') &&
  96                                  (tp[2] >= '0' && tp[2] <= '7') &&
  97                                  (tp[3] >= '0' && tp[3] <= '7'))
  98                         tp += 4;
  99                 else if ((tp[0] == '\\') &&
 100                                  (tp[1] == '\\'))
 101                         tp += 2;
 102                 else
 103                 {
 104                         /*
 105                          * one backslash, not followed by 0 or ### valid octal
 106                          */
 107                         elog(ERROR, "Bad input string for type bytea");
 108                 }
 109         }
 110
 111         byte += VARHDRSZ;
 112         result = (bytea *) palloc(byte);
 113         result->vl_len = byte;          /* set varlena length */
 114
 115         tp = inputText;
 116         rp = result->vl_dat;
 117         while (*tp != '\0')
 118         {
 119                 if (tp[0] != '\\')
 120                         *rp++ = *tp++;
 121                 else if ((tp[0] == '\\') &&
 122                                  (tp[1] >= '0' && tp[1] <= '3') &&
 123                                  (tp[2] >= '0' && tp[2] <= '7') &&
 124                                  (tp[3] >= '0' && tp[3] <= '7'))
 125                 {
 126                         byte = VAL(tp[1]);
 127                         byte <<= 3;
 128                         byte += VAL(tp[2]);
 129                         byte <<= 3;
 130                         *rp++ = byte + VAL(tp[3]);
 131                         tp += 4;
 132                 }
 133                 else if ((tp[0] == '\\') &&
 134                                  (tp[1] == '\\'))
 135                 {
 136                         *rp++ = '\\';
 137                         tp += 2;
 138                 }
 139                 else
 140                 {
 141                         /*
 142                          * We should never get here. The first pass should not allow
 143                          * it.
 144                          */
 145                         elog(ERROR, "Bad input string for type bytea");
 146                 }
 147         }
 148
 149         PG_RETURN_BYTEA_P(result);
 150 }
 151
 152 /*
 153  *              byteaout                - converts to printable representation of byte array
 154  *
 155  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 156  *              '\\'.
 157  *
 158  *              NULL vlena should be an error--returning string with NULL for now.
 159  */
 160 Datum
 161 byteaout(PG_FUNCTION_ARGS)
 162 {
 163         bytea      *vlena = PG_GETARG_BYTEA_P(0);
 164         char       *result;
 165         char       *vp;
 166         char       *rp;
 167         int                     val;                    /* holds unprintable chars */
 168         int                     i;
 169         int                     len;
 170
 171         len = 1;                                        /* empty string has 1 char */
 172         vp = vlena->vl_dat;
 173         for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
 174         {
 175                 if (*vp == '\\')
 176                         len += 2;
 177                 else if (isprint((unsigned char) *vp))
 178                         len++;
 179                 else
 180                         len += 4;
 181         }
 182         rp = result = (char *) palloc(len);
 183         vp = vlena->vl_dat;
 184         for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
 185         {
 186                 if (*vp == '\\')
 187                 {
 188                         *rp++ = '\\';
 189                         *rp++ = '\\';
 190                 }
 191                 else if (isprint((unsigned char) *vp))
 192                         *rp++ = *vp;
 193                 else
 194                 {
 195                         val = *vp;
 196                         rp[0] = '\\';
 197                         rp[3] = DIG(val & 07);
 198                         val >>= 3;
 199                         rp[2] = DIG(val & 07);
 200                         val >>= 3;
 201                         rp[1] = DIG(val & 03);
 202                         rp += 4;
 203                 }
 204         }
 205         *rp = '\0';
 206         PG_RETURN_CSTRING(result);
 207 }
 208
 209
 210 /*
 211  *              textin                  - converts "..." to internal representation
 212  */
 213 Datum
 214 textin(PG_FUNCTION_ARGS)
 215 {
 216         char       *inputText = PG_GETARG_CSTRING(0);
 217         text       *result;
 218         int                     len;
 219
 220 #ifdef MULTIBYTE
 221         char       *ermsg;
 222 #endif
 223
 224         len = strlen(inputText) + VARHDRSZ;
 225
 226 #ifdef MULTIBYTE
 227         if ((ermsg = pg_verifymbstr(inputText, len - VARHDRSZ)))
 228                 elog(ERROR, "%s", ermsg);
 229 #endif
 230
 231         result = (text *) palloc(len);
 232         VARATT_SIZEP(result) = len;
 233
 234         memcpy(VARDATA(result), inputText, len - VARHDRSZ);
 235
 236 #ifdef CYR_RECODE
 237         convertstr(VARDATA(result), len - VARHDRSZ, 0);
 238 #endif
 239
 240         PG_RETURN_TEXT_P(result);
 241 }
 242
 243 /*
 244  *              textout                 - converts internal representation to "..."
 245  */
 246 Datum
 247 textout(PG_FUNCTION_ARGS)
 248 {
 249         text       *t = PG_GETARG_TEXT_P(0);
 250         int                     len;
 251         char       *result;
 252
 253         len = VARSIZE(t) - VARHDRSZ;
 254         result = (char *) palloc(len + 1);
 255         memcpy(result, VARDATA(t), len);
 256         result[len] = '\0';
 257
 258 #ifdef CYR_RECODE
 259         convertstr(result, len, 1);
 260 #endif
 261
 262         PG_RETURN_CSTRING(result);
 263 }
 264
 265
 266 /*
 267  *              unknownin                       - converts "..." to internal representation
 268  */
 269 Datum
 270 unknownin(PG_FUNCTION_ARGS)
 271 {
 272         char       *inputStr = PG_GETARG_CSTRING(0);
 273         unknown    *result;
 274         int                     len;
 275
 276         len = strlen(inputStr) + VARHDRSZ;
 277
 278         result = (unknown *) palloc(len);
 279         VARATT_SIZEP(result) = len;
 280
 281         memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
 282
 283         PG_RETURN_UNKNOWN_P(result);
 284 }
 285
 286
 287 /*
 288  *              unknownout                      - converts internal representation to "..."
 289  */
 290 Datum
 291 unknownout(PG_FUNCTION_ARGS)
 292 {
 293         unknown    *t = PG_GETARG_UNKNOWN_P(0);
 294         int                     len;
 295         char       *result;
 296
 297         len = VARSIZE(t) - VARHDRSZ;
 298         result = (char *) palloc(len + 1);
 299         memcpy(result, VARDATA(t), len);
 300         result[len] = '\0';
 301
 302         PG_RETURN_CSTRING(result);
 303 }
 304
 305
 306 /* ========== PUBLIC ROUTINES ========== */
 307
 308 /*
 309  * textlen -
 310  *        returns the logical length of a text*
 311  *         (which is less than the VARSIZE of the text*)
 312  */
 313 Datum
 314 textlen(PG_FUNCTION_ARGS)
 315 {
 316         PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
 317 }
 318
 319 /*
 320  * text_length -
 321  *      Does the real work for textlen()
 322  *      This is broken out so it can be called directly by other string processing
 323  *      functions.
 324  */
 325 static int32
 326 text_length(Datum str)
 327 {
 328         /* fastpath when max encoding length is one */
 329         if (pg_database_encoding_max_length() == 1)
 330                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 331
 332         if (pg_database_encoding_max_length() > 1)
 333         {
 334                 text       *t = DatumGetTextP(str);
 335
 336                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
 337                                                                          VARSIZE(t) - VARHDRSZ));
 338         }
 339
 340         /* should never get here */
 341         elog(ERROR, "Invalid backend encoding; encoding max length "
 342                                 "is less than one.");
 343
 344         /* not reached: suppress compiler warning */
 345         return 0;
 346 }
 347
 348 /*
 349  * textoctetlen -
 350  *        returns the physical length of a text*
 351  *         (which is less than the VARSIZE of the text*)
 352  */
 353 Datum
 354 textoctetlen(PG_FUNCTION_ARGS)
 355 {
 356         PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
 357 }
 358
 359 /*
 360  * textcat -
 361  *        takes two text* and returns a text* that is the concatenation of
 362  *        the two.
 363  *
 364  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 365  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 366  * Allocate space for output in all cases.
 367  * XXX - thomas 1997-07-10
 368  */
 369 Datum
 370 textcat(PG_FUNCTION_ARGS)
 371 {
 372         text       *t1 = PG_GETARG_TEXT_P(0);
 373         text       *t2 = PG_GETARG_TEXT_P(1);
 374         int                     len1,
 375                                 len2,
 376                                 len;
 377         text       *result;
 378         char       *ptr;
 379
 380         len1 = (VARSIZE(t1) - VARHDRSZ);
 381         if (len1 < 0)
 382                 len1 = 0;
 383
 384         len2 = (VARSIZE(t2) - VARHDRSZ);
 385         if (len2 < 0)
 386                 len2 = 0;
 387
 388         len = len1 + len2 + VARHDRSZ;
 389         result = (text *) palloc(len);
 390
 391         /* Set size of result string... */
 392         VARATT_SIZEP(result) = len;
 393
 394         /* Fill data field of result string... */
 395         ptr = VARDATA(result);
 396         if (len1 > 0)
 397                 memcpy(ptr, VARDATA(t1), len1);
 398         if (len2 > 0)
 399                 memcpy(ptr + len1, VARDATA(t2), len2);
 400
 401         PG_RETURN_TEXT_P(result);
 402 }
 403
 404 /*
 405  * text_substr()
 406  * Return a substring starting at the specified position.
 407  * - thomas 1997-12-31
 408  *
 409  * Input:
 410  *      - string
 411  *      - starting position (is one-based)
 412  *      - string length
 413  *
 414  * If the starting position is zero or less, then return from the start of the string
 415  *      adjusting the length to be consistent with the "negative start" per SQL92.
 416  * If the length is less than zero, return the remaining string.
 417  *
 418  * Note that the arguments operate on octet length,
 419  *      so not aware of multi-byte character sets.
 420  *
 421  * Added multi-byte support.
 422  * - Tatsuo Ishii 1998-4-21
 423  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 424  * Formerly returned the entire string; now returns a portion.
 425  * - Thomas Lockhart 1998-12-10
 426  * Now uses faster TOAST-slicing interface
 427  * - John Gray 2002-02-22
 428  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 429  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 430  * error; if E < 1, return '', not entire string). Fixed MB related bug when
 431  * S > LC and < LC + 4 sometimes garbage characters are returned.
 432  * - Joe Conway 2002-08-10
 433  */
 434 Datum
 435 text_substr(PG_FUNCTION_ARGS)
 436 {
 437         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 438                                                                         PG_GETARG_INT32(1),
 439                                                                         PG_GETARG_INT32(2),
 440                                                                         false));
 441 }
 442
 443 /*
 444  * text_substr_no_len -
 445  *        Wrapper to avoid opr_sanity failure due to
 446  *        one function accepting a different number of args.
 447  */
 448 Datum
 449 text_substr_no_len(PG_FUNCTION_ARGS)
 450 {
 451         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 452                                                                         PG_GETARG_INT32(1),
 453                                                                         -1, true));
 454 }
 455
 456 /*
 457  * text_substring -
 458  *      Does the real work for text_substr() and text_substr_no_len()
 459  *      This is broken out so it can be called directly by other string processing
 460  *      functions.
 461  */
 462 static text*
 463 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 464 {
 465         int32           eml = pg_database_encoding_max_length();
 466         int32           S = start;                              /* start position */
 467         int32           S1;                                             /* adjusted start position */
 468         int32           L1;                                             /* adjusted substring length */
 469
 470         /* life is easy if the encoding max length is 1 */
 471         if (eml == 1)
 472         {
 473                 S1 = Max(S, 1);
 474
 475                 if (length_not_specified)       /* special case - get length to end of string */
 476                         L1 = -1;
 477                 else
 478                 {
 479                         /* end position */
 480                         int     E = S + length;
 481
 482                         /*
 483                          * A negative value for L is the only way for the end position
 484                          * to be before the start. SQL99 says to throw an error.
 485                          */
 486                         if (E < S)
 487                                 elog(ERROR, "negative substring length not allowed");
 488
 489                         /*
 490                          * A zero or negative value for the end position can happen if the start
 491                          * was negative or one. SQL99 says to return a zero-length string.
 492                          */
 493                         if (E < 1)
 494                                 return PG_STR_GET_TEXT("");
 495
 496                         L1 = E - S1;
 497                 }
 498
 499                 /*
 500                  * If the start position is past the end of the string,
 501                  * SQL99 says to return a zero-length string --
 502                  * PG_GETARG_TEXT_P_SLICE() will do that for us.
 503                  * Convert to zero-based starting position
 504                  */
 505                 return DatumGetTextPSlice(str, S1 - 1, L1);
 506         }
 507         else if (eml > 1)
 508         {
 509                 /*
 510                  * When encoding max length is > 1, we can't get LC without
 511                  * detoasting, so we'll grab a conservatively large slice
 512                  * now and go back later to do the right thing
 513                  */
 514                 int32           slice_start;
 515                 int32           slice_size;
 516                 int32           slice_strlen;
 517                 text            *slice;
 518                 int32           E1;
 519                 int32           i;
 520                 char       *p;
 521                 char       *s;
 522                 text       *ret;
 523
 524                 /*
 525                  * if S is past the end of the string, the tuple toaster
 526                  * will return a zero-length string to us
 527                  */
 528                 S1 = Max(S, 1);
 529
 530                 /*
 531                  * We need to start at position zero because there is no
 532                  * way to know in advance which byte offset corresponds to
 533                  * the supplied start position.
 534                  */
 535                 slice_start = 0;
 536
 537                 if (length_not_specified)       /* special case - get length to end of string */
 538                         slice_size = L1 = -1;
 539                 else
 540                 {
 541                         int     E = S + length;
 542
 543                         /*
 544                          * A negative value for L is the only way for the end position
 545                          * to be before the start. SQL99 says to throw an error.
 546                          */
 547                         if (E < S)
 548                                 elog(ERROR, "negative substring length not allowed");
 549
 550                         /*
 551                          * A zero or negative value for the end position can happen if the start
 552                          * was negative or one. SQL99 says to return a zero-length string.
 553                          */
 554                         if (E < 1)
 555                                 return PG_STR_GET_TEXT("");
 556
 557                         /*
 558                          * if E is past the end of the string, the tuple toaster
 559                          * will truncate the length for us
 560                          */
 561                         L1 = E - S1;
 562
 563                         /*
 564                          * Total slice size in bytes can't be any longer than the start
 565                          * position plus substring length times the encoding max length.
 566                          */
 567                         slice_size = (S1 + L1) * eml;
 568                 }
 569                 slice = DatumGetTextPSlice(str, slice_start, slice_size);
 570
 571                 /* see if we got back an empty string */
 572                 if ((VARSIZE(slice) - VARHDRSZ) == 0)
 573                         return PG_STR_GET_TEXT("");
 574
 575                 /* Now we can get the actual length of the slice in MB characters */
 576                 slice_strlen = pg_mbstrlen_with_len (VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
 577
 578                 /* Check that the start position wasn't > slice_strlen. If so,
 579                  * SQL99 says to return a zero-length string.
 580                  */
 581                 if (S1 > slice_strlen)
 582                         return PG_STR_GET_TEXT("");
 583
 584                 /*
 585                  * Adjust L1 and E1 now that we know the slice string length.
 586                  * Again remember that S1 is one based, and slice_start is zero based.
 587                  */
 588                 if (L1 > -1)
 589                         E1 = Min(S1 + L1 , slice_start + 1 + slice_strlen);
 590                 else
 591                         E1 = slice_start + 1 + slice_strlen;
 592
 593                 /*
 594                  * Find the start position in the slice;
 595                  * remember S1 is not zero based
 596                  */
 597                 p = VARDATA(slice);
 598                 for (i = 0; i < S1 - 1; i++)
 599                         p += pg_mblen(p);
 600
 601                 /* hang onto a pointer to our start position */
 602                 s = p;
 603
 604                 /*
 605                  * Count the actual bytes used by the substring of
 606                  * the requested length.
 607                  */
 608                 for (i = S1; i < E1; i++)
 609                         p += pg_mblen(p);
 610
 611                 ret = (text *) palloc(VARHDRSZ + (p - s));
 612                 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
 613                 memcpy(VARDATA(ret), s, (p - s));
 614
 615                 return ret;
 616         }
 617         else
 618                 elog(ERROR, "Invalid backend encoding; encoding max length "
 619                                         "is less than one.");
 620
 621         /* not reached: suppress compiler warning */
 622         return PG_STR_GET_TEXT("");
 623 }
 624
 625 /*
 626  * textpos -
 627  *        Return the position of the specified substring.
 628  *        Implements the SQL92 POSITION() function.
 629  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 630  * - thomas 1997-07-27
 631  *
 632  * Added multi-byte support.
 633  * - Tatsuo Ishii 1998-4-21
 634  */
 635 Datum
 636 textpos(PG_FUNCTION_ARGS)
 637 {
 638         PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
 639 }
 640
 641 /*
 642  * text_position -
 643  *      Does the real work for textpos()
 644  *      This is broken out so it can be called directly by other string processing
 645  *      functions.
 646  */
 647 static int32
 648 text_position(Datum str, Datum search_str, int matchnum)
 649 {
 650         int                     eml = pg_database_encoding_max_length();
 651         text       *t1 = DatumGetTextP(str);
 652         text       *t2 = DatumGetTextP(search_str);
 653         int                     match = 0,
 654                                 pos = 0,
 655                                 p = 0,
 656                                 px,
 657                                 len1,
 658                                 len2;
 659
 660         if(matchnum == 0)
 661                 return 0;               /* result for 0th match */
 662
 663         if (VARSIZE(t2) <= VARHDRSZ)
 664                 PG_RETURN_INT32(1);             /* result for empty pattern */
 665
 666         len1 = (VARSIZE(t1) - VARHDRSZ);
 667         len2 = (VARSIZE(t2) - VARHDRSZ);
 668
 669         /* no use in searching str past point where search_str will fit */
 670         px = (len1 - len2);
 671
 672         if (eml == 1)   /* simple case - single byte encoding */
 673         {
 674                 char   *p1,
 675                            *p2;
 676
 677                 p1 = VARDATA(t1);
 678                 p2 = VARDATA(t2);
 679
 680                 for (p = 0; p <= px; p++)
 681                 {
 682                         if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
 683                         {
 684                                 if (++match == matchnum)
 685                                 {
 686                                         pos = p + 1;
 687                                         break;
 688                                 }
 689                         }
 690                         p1++;
 691                 }
 692         }
 693         else if (eml > 1)       /* not as simple - multibyte encoding */
 694         {
 695                 pg_wchar   *p1,
 696                                    *p2,
 697                                    *ps1,
 698                                    *ps2;
 699
 700                 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 701                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
 702                 len1 = pg_wchar_strlen(p1);
 703                 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 704                 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
 705                 len2 = pg_wchar_strlen(p2);
 706
 707                 for (p = 0; p <= px; p++)
 708                 {
 709                         if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
 710                         {
 711                                 if (++match == matchnum)
 712                                 {
 713                                         pos = p + 1;
 714                                         break;
 715                                 }
 716                         }
 717                         p1++;
 718                 }
 719
 720                 pfree(ps1);
 721                 pfree(ps2);
 722         }
 723         else
 724                 elog(ERROR, "Invalid backend encoding; encoding max length "
 725                                         "is less than one.");
 726
 727         PG_RETURN_INT32(pos);
 728 }
 729
 730 /* varstr_cmp()
 731  * Comparison function for text strings with given lengths.
 732  * Includes locale support, but must copy strings to temporary memory
 733  *      to allow null-termination for inputs to strcoll().
 734  * Returns -1, 0 or 1
 735  */
 736 int
 737 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 738 {
 739         int                     result;
 740         char       *a1p,
 741                            *a2p;
 742
 743         /*
 744          * Unfortunately, there is no strncoll(), so in the non-C locale
 745          * case we have to do some memory copying.  This turns out to be
 746          * significantly slower, so we optimize the case where LC_COLLATE
 747          * is C.
 748          */
 749         if (!lc_collate_is_c())
 750         {
 751                 a1p = (char *) palloc(len1 + 1);
 752                 a2p = (char *) palloc(len2 + 1);
 753
 754                 memcpy(a1p, arg1, len1);
 755                 *(a1p + len1) = '\0';
 756                 memcpy(a2p, arg2, len2);
 757                 *(a2p + len2) = '\0';
 758
 759                 result = strcoll(a1p, a2p);
 760
 761                 pfree(a1p);
 762                 pfree(a2p);
 763         }
 764         else
 765         {
 766                 a1p = arg1;
 767                 a2p = arg2;
 768
 769                 result = strncmp(a1p, a2p, Min(len1, len2));
 770                 if ((result == 0) && (len1 != len2))
 771                         result = (len1 < len2) ? -1 : 1;
 772         }
 773
 774         return result;
 775 }
 776
 777
 778 /* text_cmp()
 779  * Internal comparison function for text strings.
 780  * Returns -1, 0 or 1
 781  */
 782 static int
 783 text_cmp(text *arg1, text *arg2)
 784 {
 785         char       *a1p,
 786                            *a2p;
 787         int                     len1,
 788                                 len2;
 789
 790         a1p = VARDATA(arg1);
 791         a2p = VARDATA(arg2);
 792
 793         len1 = VARSIZE(arg1) - VARHDRSZ;
 794         len2 = VARSIZE(arg2) - VARHDRSZ;
 795
 796         return varstr_cmp(a1p, len1, a2p, len2);
 797 }
 798
 799 /*
 800  * Comparison functions for text strings.
 801  *
 802  * Note: btree indexes need these routines not to leak memory; therefore,
 803  * be careful to free working copies of toasted datums.  Most places don't
 804  * need to be so careful.
 805  */
 806
 807 Datum
 808 texteq(PG_FUNCTION_ARGS)
 809 {
 810         text       *arg1 = PG_GETARG_TEXT_P(0);
 811         text       *arg2 = PG_GETARG_TEXT_P(1);
 812         bool            result;
 813
 814         /* fast path for different-length inputs */
 815         if (VARSIZE(arg1) != VARSIZE(arg2))
 816                 result = false;
 817         else
 818                 result = (text_cmp(arg1, arg2) == 0);
 819
 820         PG_FREE_IF_COPY(arg1, 0);
 821         PG_FREE_IF_COPY(arg2, 1);
 822
 823         PG_RETURN_BOOL(result);
 824 }
 825
 826 Datum
 827 textne(PG_FUNCTION_ARGS)
 828 {
 829         text       *arg1 = PG_GETARG_TEXT_P(0);
 830         text       *arg2 = PG_GETARG_TEXT_P(1);
 831         bool            result;
 832
 833         /* fast path for different-length inputs */
 834         if (VARSIZE(arg1) != VARSIZE(arg2))
 835                 result = true;
 836         else
 837                 result = (text_cmp(arg1, arg2) != 0);
 838
 839         PG_FREE_IF_COPY(arg1, 0);
 840         PG_FREE_IF_COPY(arg2, 1);
 841
 842         PG_RETURN_BOOL(result);
 843 }
 844
 845 Datum
 846 text_lt(PG_FUNCTION_ARGS)
 847 {
 848         text       *arg1 = PG_GETARG_TEXT_P(0);
 849         text       *arg2 = PG_GETARG_TEXT_P(1);
 850         bool            result;
 851
 852         result = (text_cmp(arg1, arg2) < 0);
 853
 854         PG_FREE_IF_COPY(arg1, 0);
 855         PG_FREE_IF_COPY(arg2, 1);
 856
 857         PG_RETURN_BOOL(result);
 858 }
 859
 860 Datum
 861 text_le(PG_FUNCTION_ARGS)
 862 {
 863         text       *arg1 = PG_GETARG_TEXT_P(0);
 864         text       *arg2 = PG_GETARG_TEXT_P(1);
 865         bool            result;
 866
 867         result = (text_cmp(arg1, arg2) <= 0);
 868
 869         PG_FREE_IF_COPY(arg1, 0);
 870         PG_FREE_IF_COPY(arg2, 1);
 871
 872         PG_RETURN_BOOL(result);
 873 }
 874
 875 Datum
 876 text_gt(PG_FUNCTION_ARGS)
 877 {
 878         text       *arg1 = PG_GETARG_TEXT_P(0);
 879         text       *arg2 = PG_GETARG_TEXT_P(1);
 880         bool            result;
 881
 882         result = (text_cmp(arg1, arg2) > 0);
 883
 884         PG_FREE_IF_COPY(arg1, 0);
 885         PG_FREE_IF_COPY(arg2, 1);
 886
 887         PG_RETURN_BOOL(result);
 888 }
 889
 890 Datum
 891 text_ge(PG_FUNCTION_ARGS)
 892 {
 893         text       *arg1 = PG_GETARG_TEXT_P(0);
 894         text       *arg2 = PG_GETARG_TEXT_P(1);
 895         bool            result;
 896
 897         result = (text_cmp(arg1, arg2) >= 0);
 898
 899         PG_FREE_IF_COPY(arg1, 0);
 900         PG_FREE_IF_COPY(arg2, 1);
 901
 902         PG_RETURN_BOOL(result);
 903 }
 904
 905 Datum
 906 bttextcmp(PG_FUNCTION_ARGS)
 907 {
 908         text       *arg1 = PG_GETARG_TEXT_P(0);
 909         text       *arg2 = PG_GETARG_TEXT_P(1);
 910         int32           result;
 911
 912         result = text_cmp(arg1, arg2);
 913
 914         PG_FREE_IF_COPY(arg1, 0);
 915         PG_FREE_IF_COPY(arg2, 1);
 916
 917         PG_RETURN_INT32(result);
 918 }
 919
 920
 921 Datum
 922 text_larger(PG_FUNCTION_ARGS)
 923 {
 924         text       *arg1 = PG_GETARG_TEXT_P(0);
 925         text       *arg2 = PG_GETARG_TEXT_P(1);
 926         text       *result;
 927
 928         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
 929
 930         PG_RETURN_TEXT_P(result);
 931 }
 932
 933 Datum
 934 text_smaller(PG_FUNCTION_ARGS)
 935 {
 936         text       *arg1 = PG_GETARG_TEXT_P(0);
 937         text       *arg2 = PG_GETARG_TEXT_P(1);
 938         text       *result;
 939
 940         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
 941
 942         PG_RETURN_TEXT_P(result);
 943 }
 944
 945 /*-------------------------------------------------------------
 946  * byteaoctetlen
 947  *
 948  * get the number of bytes contained in an instance of type 'bytea'
 949  *-------------------------------------------------------------
 950  */
 951 Datum
 952 byteaoctetlen(PG_FUNCTION_ARGS)
 953 {
 954         PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
 955 }
 956
 957 /*
 958  * byteacat -
 959  *        takes two bytea* and returns a bytea* that is the concatenation of
 960  *        the two.
 961  *
 962  * Cloned from textcat and modified as required.
 963  */
 964 Datum
 965 byteacat(PG_FUNCTION_ARGS)
 966 {
 967         bytea      *t1 = PG_GETARG_BYTEA_P(0);
 968         bytea      *t2 = PG_GETARG_BYTEA_P(1);
 969         int                     len1,
 970                                 len2,
 971                                 len;
 972         bytea      *result;
 973         char       *ptr;
 974
 975         len1 = (VARSIZE(t1) - VARHDRSZ);
 976         if (len1 < 0)
 977                 len1 = 0;
 978
 979         len2 = (VARSIZE(t2) - VARHDRSZ);
 980         if (len2 < 0)
 981                 len2 = 0;
 982
 983         len = len1 + len2 + VARHDRSZ;
 984         result = (bytea *) palloc(len);
 985
 986         /* Set size of result string... */
 987         VARATT_SIZEP(result) = len;
 988
 989         /* Fill data field of result string... */
 990         ptr = VARDATA(result);
 991         if (len1 > 0)
 992                 memcpy(ptr, VARDATA(t1), len1);
 993         if (len2 > 0)
 994                 memcpy(ptr + len1, VARDATA(t2), len2);
 995
 996         PG_RETURN_BYTEA_P(result);
 997 }
 998
 999 #define PG_STR_GET_BYTEA(str_) \
1000     DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1001 /*
1002  * bytea_substr()
1003  * Return a substring starting at the specified position.
1004  * Cloned from text_substr and modified as required.
1005  *
1006  * Input:
1007  *      - string
1008  *      - starting position (is one-based)
1009  *      - string length (optional)
1010  *
1011  * If the starting position is zero or less, then return from the start of the string
1012  * adjusting the length to be consistent with the "negative start" per SQL92.
1013  * If the length is less than zero, an ERROR is thrown. If no third argument
1014  * (length) is provided, the length to the end of the string is assumed.
1015  */
1016 Datum
1017 bytea_substr(PG_FUNCTION_ARGS)
1018 {
1019         int             S = PG_GETARG_INT32(1); /* start position */
1020         int             S1;                                             /* adjusted start position */
1021         int             L1;                                             /* adjusted substring length */
1022
1023         S1 = Max(S, 1);
1024
1025         if (fcinfo->nargs == 2)
1026         {
1027                 /*
1028                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE()
1029                  * grabs everything to the end of the string if we pass it
1030                  * a negative value for length.
1031                  */
1032                 L1 = -1;
1033         }
1034         else
1035         {
1036                 /* end position */
1037                 int     E = S + PG_GETARG_INT32(2);
1038
1039                 /*
1040                  * A negative value for L is the only way for the end position
1041                  * to be before the start. SQL99 says to throw an error.
1042                  */
1043                 if (E < S)
1044                         elog(ERROR, "negative substring length not allowed");
1045
1046                 /*
1047                  * A zero or negative value for the end position can happen if the start
1048                  * was negative or one. SQL99 says to return a zero-length string.
1049                  */
1050                 if (E < 1)
1051                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1052
1053                 L1 = E - S1;
1054         }
1055
1056         /*
1057          * If the start position is past the end of the string,
1058          * SQL99 says to return a zero-length string --
1059          * PG_GETARG_TEXT_P_SLICE() will do that for us.
1060          * Convert to zero-based starting position
1061          */
1062         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, S1 - 1, L1));
1063 }
1064
1065 /*
1066  * bytea_substr_no_len -
1067  *        Wrapper to avoid opr_sanity failure due to
1068  *        one function accepting a different number of args.
1069  */
1070 Datum
1071 bytea_substr_no_len(PG_FUNCTION_ARGS)
1072 {
1073         return bytea_substr(fcinfo);
1074 }
1075
1076 /*
1077  * byteapos -
1078  *        Return the position of the specified substring.
1079  *        Implements the SQL92 POSITION() function.
1080  * Cloned from textpos and modified as required.
1081  */
1082 Datum
1083 byteapos(PG_FUNCTION_ARGS)
1084 {
1085         bytea      *t1 = PG_GETARG_BYTEA_P(0);
1086         bytea      *t2 = PG_GETARG_BYTEA_P(1);
1087         int                     pos;
1088         int                     px,
1089                                 p;
1090         int                     len1,
1091                                 len2;
1092         char       *p1,
1093                            *p2;
1094
1095         if (VARSIZE(t2) <= VARHDRSZ)
1096                 PG_RETURN_INT32(1);             /* result for empty pattern */
1097
1098         len1 = (VARSIZE(t1) - VARHDRSZ);
1099         len2 = (VARSIZE(t2) - VARHDRSZ);
1100
1101         p1 = VARDATA(t1);
1102         p2 = VARDATA(t2);
1103
1104         pos = 0;
1105         px = (len1 - len2);
1106         for (p = 0; p <= px; p++)
1107         {
1108                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1109                 {
1110                         pos = p + 1;
1111                         break;
1112                 };
1113                 p1++;
1114         };
1115
1116         PG_RETURN_INT32(pos);
1117 }
1118
1119 /*-------------------------------------------------------------
1120  * byteaGetByte
1121  *
1122  * this routine treats "bytea" as an array of bytes.
1123  * It returns the Nth byte (a number between 0 and 255).
1124  *-------------------------------------------------------------
1125  */
1126 Datum
1127 byteaGetByte(PG_FUNCTION_ARGS)
1128 {
1129         bytea      *v = PG_GETARG_BYTEA_P(0);
1130         int32           n = PG_GETARG_INT32(1);
1131         int                     len;
1132         int                     byte;
1133
1134         len = VARSIZE(v) - VARHDRSZ;
1135
1136         if (n < 0 || n >= len)
1137                 elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
1138                          n, len - 1);
1139
1140         byte = ((unsigned char *) VARDATA(v))[n];
1141
1142         PG_RETURN_INT32(byte);
1143 }
1144
1145 /*-------------------------------------------------------------
1146  * byteaGetBit
1147  *
1148  * This routine treats a "bytea" type like an array of bits.
1149  * It returns the value of the Nth bit (0 or 1).
1150  *
1151  *-------------------------------------------------------------
1152  */
1153 Datum
1154 byteaGetBit(PG_FUNCTION_ARGS)
1155 {
1156         bytea      *v = PG_GETARG_BYTEA_P(0);
1157         int32           n = PG_GETARG_INT32(1);
1158         int                     byteNo,
1159                                 bitNo;
1160         int                     len;
1161         int                     byte;
1162
1163         len = VARSIZE(v) - VARHDRSZ;
1164
1165         if (n < 0 || n >= len * 8)
1166                 elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
1167                          n, len * 8 - 1);
1168
1169         byteNo = n / 8;
1170         bitNo = n % 8;
1171
1172         byte = ((unsigned char *) VARDATA(v))[byteNo];
1173
1174         if (byte & (1 << bitNo))
1175                 PG_RETURN_INT32(1);
1176         else
1177                 PG_RETURN_INT32(0);
1178 }
1179
1180 /*-------------------------------------------------------------
1181  * byteaSetByte
1182  *
1183  * Given an instance of type 'bytea' creates a new one with
1184  * the Nth byte set to the given value.
1185  *
1186  *-------------------------------------------------------------
1187  */
1188 Datum
1189 byteaSetByte(PG_FUNCTION_ARGS)
1190 {
1191         bytea      *v = PG_GETARG_BYTEA_P(0);
1192         int32           n = PG_GETARG_INT32(1);
1193         int32           newByte = PG_GETARG_INT32(2);
1194         int                     len;
1195         bytea      *res;
1196
1197         len = VARSIZE(v) - VARHDRSZ;
1198
1199         if (n < 0 || n >= len)
1200                 elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
1201                          n, len - 1);
1202
1203         /*
1204          * Make a copy of the original varlena.
1205          */
1206         res = (bytea *) palloc(VARSIZE(v));
1207         memcpy((char *) res, (char *) v, VARSIZE(v));
1208
1209         /*
1210          * Now set the byte.
1211          */
1212         ((unsigned char *) VARDATA(res))[n] = newByte;
1213
1214         PG_RETURN_BYTEA_P(res);
1215 }
1216
1217 /*-------------------------------------------------------------
1218  * byteaSetBit
1219  *
1220  * Given an instance of type 'bytea' creates a new one with
1221  * the Nth bit set to the given value.
1222  *
1223  *-------------------------------------------------------------
1224  */
1225 Datum
1226 byteaSetBit(PG_FUNCTION_ARGS)
1227 {
1228         bytea      *v = PG_GETARG_BYTEA_P(0);
1229         int32           n = PG_GETARG_INT32(1);
1230         int32           newBit = PG_GETARG_INT32(2);
1231         bytea      *res;
1232         int                     len;
1233         int                     oldByte,
1234                                 newByte;
1235         int                     byteNo,
1236                                 bitNo;
1237
1238         len = VARSIZE(v) - VARHDRSZ;
1239
1240         if (n < 0 || n >= len * 8)
1241                 elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
1242                          n, len * 8 - 1);
1243
1244         byteNo = n / 8;
1245         bitNo = n % 8;
1246
1247         /*
1248          * sanity check!
1249          */
1250         if (newBit != 0 && newBit != 1)
1251                 elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
1252
1253         /*
1254          * Make a copy of the original varlena.
1255          */
1256         res = (bytea *) palloc(VARSIZE(v));
1257         memcpy((char *) res, (char *) v, VARSIZE(v));
1258
1259         /*
1260          * Update the byte.
1261          */
1262         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1263
1264         if (newBit == 0)
1265                 newByte = oldByte & (~(1 << bitNo));
1266         else
1267                 newByte = oldByte | (1 << bitNo);
1268
1269         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1270
1271         PG_RETURN_BYTEA_P(res);
1272 }
1273
1274
1275 /* text_name()
1276  * Converts a text type to a Name type.
1277  */
1278 Datum
1279 text_name(PG_FUNCTION_ARGS)
1280 {
1281         text       *s = PG_GETARG_TEXT_P(0);
1282         Name            result;
1283         int                     len;
1284
1285         len = VARSIZE(s) - VARHDRSZ;
1286
1287         /* Truncate oversize input */
1288         if (len >= NAMEDATALEN)
1289                 len = NAMEDATALEN - 1;
1290
1291 #ifdef STRINGDEBUG
1292         printf("text- convert string length %d (%d) ->%d\n",
1293                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1294 #endif
1295
1296         result = (Name) palloc(NAMEDATALEN);
1297         memcpy(NameStr(*result), VARDATA(s), len);
1298
1299         /* now null pad to full length... */
1300         while (len < NAMEDATALEN)
1301         {
1302                 *(NameStr(*result) + len) = '\0';
1303                 len++;
1304         }
1305
1306         PG_RETURN_NAME(result);
1307 }
1308
1309 /* name_text()
1310  * Converts a Name type to a text type.
1311  */
1312 Datum
1313 name_text(PG_FUNCTION_ARGS)
1314 {
1315         Name            s = PG_GETARG_NAME(0);
1316         text       *result;
1317         int                     len;
1318
1319         len = strlen(NameStr(*s));
1320
1321 #ifdef STRINGDEBUG
1322         printf("text- convert string length %d (%d) ->%d\n",
1323                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1324 #endif
1325
1326         result = palloc(VARHDRSZ + len);
1327         VARATT_SIZEP(result) = VARHDRSZ + len;
1328         memcpy(VARDATA(result), NameStr(*s), len);
1329
1330         PG_RETURN_TEXT_P(result);
1331 }
1332
1333
1334 /*
1335  * textToQualifiedNameList - convert a text object to list of names
1336  *
1337  * This implements the input parsing needed by nextval() and other
1338  * functions that take a text parameter representing a qualified name.
1339  * We split the name at dots, downcase if not double-quoted, and
1340  * truncate names if they're too long.
1341  */
1342 List *
1343 textToQualifiedNameList(text *textval, const char *caller)
1344 {
1345         char       *rawname;
1346         List       *result = NIL;
1347         List       *namelist;
1348         List       *l;
1349
1350         /* Convert to C string (handles possible detoasting). */
1351         /* Note we rely on being able to modify rawname below. */
1352         rawname = DatumGetCString(DirectFunctionCall1(textout,
1353                                                                                                   PointerGetDatum(textval)));
1354
1355         if (!SplitIdentifierString(rawname, '.', &namelist))
1356                 elog(ERROR, "%s: invalid name syntax", caller);
1357
1358         if (namelist == NIL)
1359                 elog(ERROR, "%s: invalid name syntax", caller);
1360
1361         foreach(l, namelist)
1362         {
1363                 char   *curname = (char *) lfirst(l);
1364
1365                 result = lappend(result, makeString(pstrdup(curname)));
1366         }
1367
1368         pfree(rawname);
1369         freeList(namelist);
1370
1371         return result;
1372 }
1373
1374 /*
1375  * SplitIdentifierString --- parse a string containing identifiers
1376  *
1377  * This is the guts of textToQualifiedNameList, and is exported for use in
1378  * other situations such as parsing GUC variables.  In the GUC case, it's
1379  * important to avoid memory leaks, so the API is designed to minimize the
1380  * amount of stuff that needs to be allocated and freed.
1381  *
1382  * Inputs:
1383  *      rawstring: the input string; must be overwritable!  On return, it's
1384  *                         been modified to contain the separated identifiers.
1385  *      separator: the separator punctuation expected between identifiers
1386  *                         (typically '.' or ',').  Whitespace may also appear around
1387  *                         identifiers.
1388  * Outputs:
1389  *      namelist: filled with a palloc'd list of pointers to identifiers within
1390  *                        rawstring.  Caller should freeList() this even on error return.
1391  *
1392  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1393  *
1394  * Note that an empty string is considered okay here, though not in
1395  * textToQualifiedNameList.
1396  */
1397 bool
1398 SplitIdentifierString(char *rawstring, char separator,
1399                                           List **namelist)
1400 {
1401         char       *nextp = rawstring;
1402         bool            done = false;
1403
1404         *namelist = NIL;
1405
1406         while (isspace((unsigned char) *nextp))
1407                 nextp++;                                /* skip leading whitespace */
1408
1409         if (*nextp == '\0')
1410                 return true;                    /* allow empty string */
1411
1412         /* At the top of the loop, we are at start of a new identifier. */
1413         do
1414         {
1415                 char       *curname;
1416                 char       *endp;
1417                 int                     curlen;
1418
1419                 if (*nextp == '\"')
1420                 {
1421                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1422                         curname = nextp + 1;
1423                         for (;;)
1424                         {
1425                                 endp = strchr(nextp + 1, '\"');
1426                                 if (endp == NULL)
1427                                         return false; /* mismatched quotes */
1428                                 if (endp[1] != '\"')
1429                                         break;          /* found end of quoted name */
1430                                 /* Collapse adjacent quotes into one quote, and look again */
1431                                 memmove(endp, endp+1, strlen(endp));
1432                                 nextp = endp;
1433                         }
1434                         /* endp now points at the terminating quote */
1435                         nextp = endp + 1;
1436                 }
1437                 else
1438                 {
1439                         /* Unquoted name --- extends to separator or whitespace */
1440                         curname = nextp;
1441                         while (*nextp && *nextp != separator &&
1442                                    !isspace((unsigned char) *nextp))
1443                         {
1444                                 /*
1445                                  * It's important that this match the identifier downcasing
1446                                  * code used by backend/parser/scan.l.
1447                                  */
1448                                 if (isupper((unsigned char) *nextp))
1449                                         *nextp = tolower((unsigned char) *nextp);
1450                                 nextp++;
1451                         }
1452                         endp = nextp;
1453                         if (curname == nextp)
1454                                 return false;   /* empty unquoted name not allowed */
1455                 }
1456
1457                 while (isspace((unsigned char) *nextp))
1458                         nextp++;                        /* skip trailing whitespace */
1459
1460                 if (*nextp == separator)
1461                 {
1462                         nextp++;
1463                         while (isspace((unsigned char) *nextp))
1464                                 nextp++;                /* skip leading whitespace for next */
1465                         /* we expect another name, so done remains false */
1466                 }
1467                 else if (*nextp == '\0')
1468                         done = true;
1469                 else
1470                         return false;           /* invalid syntax */
1471
1472                 /* Now safe to overwrite separator with a null */
1473                 *endp = '\0';
1474
1475                 /* Truncate name if it's overlength; again, should match scan.l */
1476                 curlen = strlen(curname);
1477                 if (curlen >= NAMEDATALEN)
1478                 {
1479 #ifdef MULTIBYTE
1480                         curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
1481                         curname[curlen] = '\0';
1482 #else
1483                         curname[NAMEDATALEN - 1] = '\0';
1484 #endif
1485                 }
1486
1487                 /*
1488                  * Finished isolating current name --- add it to list
1489                  */
1490                 *namelist = lappend(*namelist, curname);
1491
1492                 /* Loop back if we didn't reach end of string */
1493         } while (!done);
1494
1495         return true;
1496 }
1497
1498
1499 /*****************************************************************************
1500  *      Comparison Functions used for bytea
1501  *
1502  * Note: btree indexes need these routines not to leak memory; therefore,
1503  * be careful to free working copies of toasted datums.  Most places don't
1504  * need to be so careful.
1505  *****************************************************************************/
1506
1507 Datum
1508 byteaeq(PG_FUNCTION_ARGS)
1509 {
1510         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1511         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1512         int                     len1,
1513                                 len2;
1514         bool            result;
1515
1516         len1 = VARSIZE(arg1) - VARHDRSZ;
1517         len2 = VARSIZE(arg2) - VARHDRSZ;
1518
1519         /* fast path for different-length inputs */
1520         if (len1 != len2)
1521                 result = false;
1522         else
1523                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1524
1525         PG_FREE_IF_COPY(arg1, 0);
1526         PG_FREE_IF_COPY(arg2, 1);
1527
1528         PG_RETURN_BOOL(result);
1529 }
1530
1531 Datum
1532 byteane(PG_FUNCTION_ARGS)
1533 {
1534         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1535         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1536         int                     len1,
1537                                 len2;
1538         bool            result;
1539
1540         len1 = VARSIZE(arg1) - VARHDRSZ;
1541         len2 = VARSIZE(arg2) - VARHDRSZ;
1542
1543         /* fast path for different-length inputs */
1544         if (len1 != len2)
1545                 result = true;
1546         else
1547                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1548
1549         PG_FREE_IF_COPY(arg1, 0);
1550         PG_FREE_IF_COPY(arg2, 1);
1551
1552         PG_RETURN_BOOL(result);
1553 }
1554
1555 Datum
1556 bytealt(PG_FUNCTION_ARGS)
1557 {
1558         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1559         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1560         int                     len1,
1561                                 len2;
1562         int                     cmp;
1563
1564         len1 = VARSIZE(arg1) - VARHDRSZ;
1565         len2 = VARSIZE(arg2) - VARHDRSZ;
1566
1567         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1568
1569         PG_FREE_IF_COPY(arg1, 0);
1570         PG_FREE_IF_COPY(arg2, 1);
1571
1572         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1573 }
1574
1575 Datum
1576 byteale(PG_FUNCTION_ARGS)
1577 {
1578         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1579         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1580         int                     len1,
1581                                 len2;
1582         int                     cmp;
1583
1584         len1 = VARSIZE(arg1) - VARHDRSZ;
1585         len2 = VARSIZE(arg2) - VARHDRSZ;
1586
1587         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1588
1589         PG_FREE_IF_COPY(arg1, 0);
1590         PG_FREE_IF_COPY(arg2, 1);
1591
1592         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1593 }
1594
1595 Datum
1596 byteagt(PG_FUNCTION_ARGS)
1597 {
1598         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1599         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1600         int                     len1,
1601                                 len2;
1602         int                     cmp;
1603
1604         len1 = VARSIZE(arg1) - VARHDRSZ;
1605         len2 = VARSIZE(arg2) - VARHDRSZ;
1606
1607         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1608
1609         PG_FREE_IF_COPY(arg1, 0);
1610         PG_FREE_IF_COPY(arg2, 1);
1611
1612         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1613 }
1614
1615 Datum
1616 byteage(PG_FUNCTION_ARGS)
1617 {
1618         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1619         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1620         int                     len1,
1621                                 len2;
1622         int                     cmp;
1623
1624         len1 = VARSIZE(arg1) - VARHDRSZ;
1625         len2 = VARSIZE(arg2) - VARHDRSZ;
1626
1627         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1628
1629         PG_FREE_IF_COPY(arg1, 0);
1630         PG_FREE_IF_COPY(arg2, 1);
1631
1632         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1633 }
1634
1635 Datum
1636 byteacmp(PG_FUNCTION_ARGS)
1637 {
1638         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1639         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1640         int                     len1,
1641                                 len2;
1642         int                     cmp;
1643
1644         len1 = VARSIZE(arg1) - VARHDRSZ;
1645         len2 = VARSIZE(arg2) - VARHDRSZ;
1646
1647         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1648         if ((cmp == 0) && (len1 != len2))
1649                 cmp = (len1 < len2) ? -1 : 1;
1650
1651         PG_FREE_IF_COPY(arg1, 0);
1652         PG_FREE_IF_COPY(arg2, 1);
1653
1654         PG_RETURN_INT32(cmp);
1655 }
1656
1657 /*
1658  * replace_text
1659  * replace all occurences of 'old_sub_str' in 'orig_str'
1660  * with 'new_sub_str' to form 'new_str'
1661  *
1662  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1663  * otherwise returns 'new_str'
1664  */
1665 Datum
1666 replace_text(PG_FUNCTION_ARGS)
1667 {
1668         text            *left_text;
1669         text            *right_text;
1670         text            *buf_text;
1671         text            *ret_text;
1672         int                     curr_posn;
1673         text            *src_text = PG_GETARG_TEXT_P(0);
1674         int                     src_text_len = TEXTLEN(src_text);
1675         text            *from_sub_text = PG_GETARG_TEXT_P(1);
1676         int                     from_sub_text_len = TEXTLEN(from_sub_text);
1677         text            *to_sub_text = PG_GETARG_TEXT_P(2);
1678         char            *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1679         StringInfo      str = makeStringInfo();
1680
1681         if (src_text_len == 0 || from_sub_text_len == 0)
1682                 PG_RETURN_TEXT_P(src_text);
1683
1684         buf_text = TEXTDUP(src_text);
1685         curr_posn = TEXTPOS(buf_text, from_sub_text);
1686
1687         while (curr_posn > 0)
1688         {
1689                 left_text = LEFT(buf_text, from_sub_text);
1690                 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1691
1692                 appendStringInfo(str, PG_TEXT_GET_STR(left_text));
1693                 appendStringInfo(str, to_sub_str);
1694
1695                 pfree(buf_text);
1696                 pfree(left_text);
1697                 buf_text = right_text;
1698                 curr_posn = TEXTPOS(buf_text, from_sub_text);
1699         }
1700
1701         appendStringInfo(str, PG_TEXT_GET_STR(buf_text));
1702         pfree(buf_text);
1703
1704         ret_text = PG_STR_GET_TEXT(str->data);
1705         pfree(str->data);
1706         pfree(str);
1707
1708         PG_RETURN_TEXT_P(ret_text);
1709 }
1710
1711 /*
1712  * split_text
1713  * parse input string
1714  * return ord item (1 based)
1715  * based on provided field separator
1716  */
1717 Datum
1718 split_text(PG_FUNCTION_ARGS)
1719 {
1720         text       *inputstring = PG_GETARG_TEXT_P(0);
1721         int                     inputstring_len = TEXTLEN(inputstring);
1722         text       *fldsep = PG_GETARG_TEXT_P(1);
1723         int                     fldsep_len = TEXTLEN(fldsep);
1724         int                     fldnum = PG_GETARG_INT32(2);
1725         int                     start_posn = 0;
1726         int                     end_posn = 0;
1727         text            *result_text;
1728
1729         /* return empty string for empty input string */
1730         if (inputstring_len < 1)
1731                 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1732
1733         /* empty field separator */
1734         if (fldsep_len < 1)
1735         {
1736                 if (fldnum == 1)        /* first field - just return the input string */
1737                         PG_RETURN_TEXT_P(inputstring);
1738                 else                            /* otherwise return an empty string */
1739                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1740         }
1741
1742         /* field number is 1 based */
1743         if (fldnum < 1)
1744                 elog(ERROR, "field position must be > 0");
1745
1746         start_posn = text_position(PointerGetDatum(inputstring),
1747                                                                 PointerGetDatum(fldsep),
1748                                                                 fldnum - 1);
1749         end_posn = text_position(PointerGetDatum(inputstring),
1750                                                                 PointerGetDatum(fldsep),
1751                                                                 fldnum);
1752
1753         if ((start_posn == 0) && (end_posn == 0))       /* fldsep not found */
1754         {
1755                 if (fldnum == 1)        /* first field - just return the input string */
1756                         PG_RETURN_TEXT_P(inputstring);
1757                 else                            /* otherwise return an empty string */
1758                         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1759         }
1760         else if ((start_posn != 0) && (end_posn == 0))
1761         {
1762                 /* last field requested */
1763                 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
1764                 PG_RETURN_TEXT_P(result_text);
1765         }
1766         else if ((start_posn == 0) && (end_posn != 0))
1767         {
1768                 /* first field requested */
1769                 result_text = LEFT(inputstring, fldsep);
1770                 PG_RETURN_TEXT_P(result_text);
1771         }
1772         else
1773         {
1774                 /* prior to last field requested */
1775                 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
1776                 PG_RETURN_TEXT_P(result_text);
1777         }
1778 }
1779
1780 #define HEXBASE 16
1781 /*
1782  * Convert a int32 to a string containing a base 16 (hex) representation of
1783  * the number.
1784  */
1785 Datum
1786 to_hex32(PG_FUNCTION_ARGS)
1787 {
1788         static char             digits[] = "0123456789abcdef";
1789         char                    buf[32];        /* bigger than needed, but reasonable */
1790         char               *ptr;
1791         text               *result_text;
1792         int32                   value = PG_GETARG_INT32(0);
1793
1794         ptr = buf + sizeof(buf) - 1;
1795         *ptr = '\0';
1796
1797         do
1798         {
1799                 *--ptr = digits[value % HEXBASE];
1800                 value /= HEXBASE;
1801         } while (ptr > buf && value);
1802
1803         result_text = PG_STR_GET_TEXT(ptr);
1804         PG_RETURN_TEXT_P(result_text);
1805 }
1806
1807 /*
1808  * Convert a int64 to a string containing a base 16 (hex) representation of
1809  * the number.
1810  */
1811 Datum
1812 to_hex64(PG_FUNCTION_ARGS)
1813 {
1814         static char             digits[] = "0123456789abcdef";
1815         char                    buf[32];        /* bigger than needed, but reasonable */
1816         char                    *ptr;
1817         text                    *result_text;
1818         int64                   value = PG_GETARG_INT64(0);
1819
1820         ptr = buf + sizeof(buf) - 1;
1821         *ptr = '\0';
1822
1823         do
1824         {
1825                 *--ptr = digits[value % HEXBASE];
1826                 value /= HEXBASE;
1827         } while (ptr > buf && value);
1828
1829         result_text = PG_STR_GET_TEXT(ptr);
1830         PG_RETURN_TEXT_P(result_text);
1831 }
1832