granicus.if.org Git - postgresql/blob - src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.83 2002/04/15 07:54:37 ishii Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "mb/pg_wchar.h"
  20 #include "miscadmin.h"
  21 #include "utils/builtins.h"
  22 #include "utils/pg_locale.h"
  23
  24 static int      text_cmp(text *arg1, text *arg2);
  25
  26
  27 /*****************************************************************************
  28  *       USER I/O ROUTINES                                                                                                               *
  29  *****************************************************************************/
  30
  31
  32 #define VAL(CH)                 ((CH) - '0')
  33 #define DIG(VAL)                ((VAL) + '0')
  34
  35 /*
  36  *              byteain                 - converts from printable representation of byte array
  37  *
  38  *              Non-printable characters must be passed as '\nnn' (octal) and are
  39  *              converted to internal form.  '\' must be passed as '\\'.
  40  *              elog(ERROR, ...) if bad form.
  41  *
  42  *              BUGS:
  43  *                              The input is scaned twice.
  44  *                              The error checking of input is minimal.
  45  */
  46 Datum
  47 byteain(PG_FUNCTION_ARGS)
  48 {
  49         char       *inputText = PG_GETARG_CSTRING(0);
  50         char       *tp;
  51         char       *rp;
  52         int                     byte;
  53         bytea      *result;
  54
  55         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
  56         {
  57                 if (tp[0] != '\\')
  58                         tp++;
  59                 else if ((tp[0] == '\\') &&
  60                                  (tp[1] >= '0' && tp[1] <= '3') &&
  61                                  (tp[2] >= '0' && tp[2] <= '7') &&
  62                                  (tp[3] >= '0' && tp[3] <= '7'))
  63                         tp += 4;
  64                 else if ((tp[0] == '\\') &&
  65                                  (tp[1] == '\\'))
  66                         tp += 2;
  67                 else
  68                 {
  69                         /*
  70                          * one backslash, not followed by 0 or ### valid octal
  71                          */
  72                         elog(ERROR, "Bad input string for type bytea");
  73                 }
  74         }
  75
  76         byte += VARHDRSZ;
  77         result = (bytea *) palloc(byte);
  78         result->vl_len = byte;          /* set varlena length */
  79
  80         tp = inputText;
  81         rp = result->vl_dat;
  82         while (*tp != '\0')
  83         {
  84                 if (tp[0] != '\\')
  85                         *rp++ = *tp++;
  86                 else if ((tp[0] == '\\') &&
  87                                  (tp[1] >= '0' && tp[1] <= '3') &&
  88                                  (tp[2] >= '0' && tp[2] <= '7') &&
  89                                  (tp[3] >= '0' && tp[3] <= '7'))
  90                 {
  91                         byte = VAL(tp[1]);
  92                         byte <<= 3;
  93                         byte += VAL(tp[2]);
  94                         byte <<= 3;
  95                         *rp++ = byte + VAL(tp[3]);
  96                         tp += 4;
  97                 }
  98                 else if ((tp[0] == '\\') &&
  99                                  (tp[1] == '\\'))
 100                 {
 101                         *rp++ = '\\';
 102                         tp += 2;
 103                 }
 104                 else
 105                 {
 106                         /*
 107                          * We should never get here. The first pass should not allow
 108                          * it.
 109                          */
 110                         elog(ERROR, "Bad input string for type bytea");
 111                 }
 112         }
 113
 114         PG_RETURN_BYTEA_P(result);
 115 }
 116
 117 /*
 118  *              byteaout                - converts to printable representation of byte array
 119  *
 120  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 121  *              '\\'.
 122  *
 123  *              NULL vlena should be an error--returning string with NULL for now.
 124  */
 125 Datum
 126 byteaout(PG_FUNCTION_ARGS)
 127 {
 128         bytea      *vlena = PG_GETARG_BYTEA_P(0);
 129         char       *result;
 130         char       *vp;
 131         char       *rp;
 132         int                     val;                    /* holds unprintable chars */
 133         int                     i;
 134         int                     len;
 135
 136         len = 1;                                        /* empty string has 1 char */
 137         vp = vlena->vl_dat;
 138         for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
 139         {
 140                 if (*vp == '\\')
 141                         len += 2;
 142                 else if (isprint((unsigned char) *vp))
 143                         len++;
 144                 else
 145                         len += 4;
 146         }
 147         rp = result = (char *) palloc(len);
 148         vp = vlena->vl_dat;
 149         for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
 150         {
 151                 if (*vp == '\\')
 152                 {
 153                         *rp++ = '\\';
 154                         *rp++ = '\\';
 155                 }
 156                 else if (isprint((unsigned char) *vp))
 157                         *rp++ = *vp;
 158                 else
 159                 {
 160                         val = *vp;
 161                         rp[0] = '\\';
 162                         rp[3] = DIG(val & 07);
 163                         val >>= 3;
 164                         rp[2] = DIG(val & 07);
 165                         val >>= 3;
 166                         rp[1] = DIG(val & 03);
 167                         rp += 4;
 168                 }
 169         }
 170         *rp = '\0';
 171         PG_RETURN_CSTRING(result);
 172 }
 173
 174
 175 /*
 176  *              textin                  - converts "..." to internal representation
 177  */
 178 Datum
 179 textin(PG_FUNCTION_ARGS)
 180 {
 181         char       *inputText = PG_GETARG_CSTRING(0);
 182         text       *result;
 183         int                     len;
 184
 185 #ifdef MULTIBYTE
 186         char       *ermsg;
 187 #endif
 188
 189         len = strlen(inputText) + VARHDRSZ;
 190
 191 #ifdef MULTIBYTE
 192         if ((ermsg = pg_verifymbstr(inputText, len - VARHDRSZ)))
 193                 elog(ERROR, "%s", ermsg);
 194 #endif
 195
 196         result = (text *) palloc(len);
 197         VARATT_SIZEP(result) = len;
 198
 199         memcpy(VARDATA(result), inputText, len - VARHDRSZ);
 200
 201 #ifdef CYR_RECODE
 202         convertstr(VARDATA(result), len - VARHDRSZ, 0);
 203 #endif
 204
 205         PG_RETURN_TEXT_P(result);
 206 }
 207
 208 /*
 209  *              textout                 - converts internal representation to "..."
 210  */
 211 Datum
 212 textout(PG_FUNCTION_ARGS)
 213 {
 214         text       *t = PG_GETARG_TEXT_P(0);
 215         int                     len;
 216         char       *result;
 217
 218         len = VARSIZE(t) - VARHDRSZ;
 219         result = (char *) palloc(len + 1);
 220         memcpy(result, VARDATA(t), len);
 221         result[len] = '\0';
 222
 223 #ifdef CYR_RECODE
 224         convertstr(result, len, 1);
 225 #endif
 226
 227         PG_RETURN_CSTRING(result);
 228 }
 229
 230
 231 /* ========== PUBLIC ROUTINES ========== */
 232
 233 /*
 234  * textlen -
 235  *        returns the logical length of a text*
 236  *         (which is less than the VARSIZE of the text*)
 237  */
 238 Datum
 239 textlen(PG_FUNCTION_ARGS)
 240 {
 241         text       *t = PG_GETARG_TEXT_P(0);
 242
 243 #ifdef MULTIBYTE
 244         /* optimization for single byte encoding */
 245         if (pg_database_encoding_max_length() <= 1)
 246                 PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ);
 247
 248         PG_RETURN_INT32(
 249                 pg_mbstrlen_with_len(VARDATA(t), VARSIZE(t) - VARHDRSZ)
 250                 );
 251 #else
 252         PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ);
 253 #endif
 254 }
 255
 256 /*
 257  * textoctetlen -
 258  *        returns the physical length of a text*
 259  *         (which is less than the VARSIZE of the text*)
 260  */
 261 Datum
 262 textoctetlen(PG_FUNCTION_ARGS)
 263 {
 264         text    *arg = PG_GETARG_TEXT_P(0);
 265
 266         PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
 267 }
 268
 269 /*
 270  * textcat -
 271  *        takes two text* and returns a text* that is the concatenation of
 272  *        the two.
 273  *
 274  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 275  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 276  * Allocate space for output in all cases.
 277  * XXX - thomas 1997-07-10
 278  */
 279 Datum
 280 textcat(PG_FUNCTION_ARGS)
 281 {
 282         text       *t1 = PG_GETARG_TEXT_P(0);
 283         text       *t2 = PG_GETARG_TEXT_P(1);
 284         int                     len1,
 285                                 len2,
 286                                 len;
 287         text       *result;
 288         char       *ptr;
 289
 290         len1 = (VARSIZE(t1) - VARHDRSZ);
 291         if (len1 < 0)
 292                 len1 = 0;
 293
 294         len2 = (VARSIZE(t2) - VARHDRSZ);
 295         if (len2 < 0)
 296                 len2 = 0;
 297
 298         len = len1 + len2 + VARHDRSZ;
 299         result = (text *) palloc(len);
 300
 301         /* Set size of result string... */
 302         VARATT_SIZEP(result) = len;
 303
 304         /* Fill data field of result string... */
 305         ptr = VARDATA(result);
 306         if (len1 > 0)
 307                 memcpy(ptr, VARDATA(t1), len1);
 308         if (len2 > 0)
 309                 memcpy(ptr + len1, VARDATA(t2), len2);
 310
 311         PG_RETURN_TEXT_P(result);
 312 }
 313
 314 /*
 315  * text_substr()
 316  * Return a substring starting at the specified position.
 317  * - thomas 1997-12-31
 318  *
 319  * Input:
 320  *      - string
 321  *      - starting position (is one-based)
 322  *      - string length
 323  *
 324  * If the starting position is zero or less, then return from the start of the string
 325  *      adjusting the length to be consistent with the "negative start" per SQL92.
 326  * If the length is less than zero, return the remaining string.
 327  *
 328  * Note that the arguments operate on octet length,
 329  *      so not aware of multi-byte character sets.
 330  *
 331  * Added multi-byte support.
 332  * - Tatsuo Ishii 1998-4-21
 333  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 334  * Formerly returned the entire string; now returns a portion.
 335  * - Thomas Lockhart 1998-12-10
 336  * Now uses faster TOAST-slicing interface
 337  * - John Gray 2002-02-22
 338  */
 339 Datum
 340 text_substr(PG_FUNCTION_ARGS)
 341 {
 342         text       *string;
 343         int32           m = PG_GETARG_INT32(1);
 344         int32           n = PG_GETARG_INT32(2);
 345         int32       sm;
 346         int32       sn;
 347         int         eml = 1;
 348 #ifdef MULTIBYTE
 349         int                     i;
 350         int                     len;
 351         text       *ret;
 352         char       *p;
 353 #endif
 354
 355         /*
 356          * starting position before the start of the string? then offset into
 357          * the string per SQL92 spec...
 358          */
 359         if (m < 1)
 360         {
 361                 n += (m - 1);
 362                 m = 1;
 363         }
 364         /* Check for m > octet length is made in TOAST access routine */
 365
 366         /* m will now become a zero-based starting position */
 367         sm = m - 1;
 368         sn = n;
 369
 370 #ifdef MULTIBYTE
 371         eml = pg_database_encoding_max_length ();
 372
 373         if (eml > 1)
 374         {
 375                 sm = 0;
 376                 if (n > -1)
 377                         sn = (m + n) * eml + 3; /* +3 to avoid mb characters overhanging slice end */
 378                 else
 379                         sn = n;         /* n < 0 is special-cased by heap_tuple_untoast_attr_slice */
 380         }
 381 #endif
 382
 383         string = PG_GETARG_TEXT_P_SLICE (0, sm, sn);
 384
 385         if (eml == 1)
 386         {
 387                 PG_RETURN_TEXT_P (string);
 388         }
 389 #ifndef MULTIBYTE
 390         PG_RETURN_NULL();   /* notreached: suppress compiler warning */
 391 #endif
 392 #ifdef MULTIBYTE
 393         if (n > -1)
 394                 len = pg_mbstrlen_with_len (VARDATA (string), sn - 3);
 395         else    /* n < 0 is special-cased; need full string length */
 396                 len = pg_mbstrlen_with_len (VARDATA (string), VARSIZE(string)-VARHDRSZ);
 397
 398         if (m > len)
 399         {
 400                 m = 1;
 401                 n = 0;
 402         }
 403         m--;
 404         if (((m + n) > len) || (n < 0))
 405                 n = (len - m);
 406
 407         p = VARDATA(string);
 408         for (i = 0; i < m; i++)
 409                 p += pg_mblen(p);
 410         m = p - VARDATA(string);
 411         for (i = 0; i < n; i++)
 412                 p += pg_mblen(p);
 413         n = p - (VARDATA(string) + m);
 414
 415         ret = (text *) palloc(VARHDRSZ + n);
 416         VARATT_SIZEP(ret) = VARHDRSZ + n;
 417
 418         memcpy(VARDATA(ret), VARDATA(string) + m, n);
 419
 420         PG_RETURN_TEXT_P(ret);
 421 #endif
 422 }
 423
 424 /*
 425  * textpos -
 426  *        Return the position of the specified substring.
 427  *        Implements the SQL92 POSITION() function.
 428  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 429  * - thomas 1997-07-27
 430  *
 431  * Added multi-byte support.
 432  * - Tatsuo Ishii 1998-4-21
 433  */
 434 Datum
 435 textpos(PG_FUNCTION_ARGS)
 436 {
 437         text       *t1 = PG_GETARG_TEXT_P(0);
 438         text       *t2 = PG_GETARG_TEXT_P(1);
 439         int                     pos;
 440         int                     px,
 441                                 p;
 442         int                     len1,
 443                                 len2;
 444         pg_wchar   *p1,
 445                            *p2;
 446
 447 #ifdef MULTIBYTE
 448         pg_wchar   *ps1,
 449                            *ps2;
 450 #endif
 451
 452         if (VARSIZE(t2) <= VARHDRSZ)
 453                 PG_RETURN_INT32(1);             /* result for empty pattern */
 454
 455         len1 = (VARSIZE(t1) - VARHDRSZ);
 456         len2 = (VARSIZE(t2) - VARHDRSZ);
 457 #ifdef MULTIBYTE
 458         ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 459         (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
 460         len1 = pg_wchar_strlen(p1);
 461         ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 462         (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
 463         len2 = pg_wchar_strlen(p2);
 464 #else
 465         p1 = VARDATA(t1);
 466         p2 = VARDATA(t2);
 467 #endif
 468         pos = 0;
 469         px = (len1 - len2);
 470         for (p = 0; p <= px; p++)
 471         {
 472 #ifdef MULTIBYTE
 473                 if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
 474 #else
 475                 if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
 476 #endif
 477                 {
 478                         pos = p + 1;
 479                         break;
 480                 };
 481                 p1++;
 482         };
 483 #ifdef MULTIBYTE
 484         pfree(ps1);
 485         pfree(ps2);
 486 #endif
 487         PG_RETURN_INT32(pos);
 488 }
 489
 490 /* varstr_cmp()
 491  * Comparison function for text strings with given lengths.
 492  * Includes locale support, but must copy strings to temporary memory
 493  *      to allow null-termination for inputs to strcoll().
 494  * Returns -1, 0 or 1
 495  */
 496 int
 497 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
 498 {
 499         int                     result;
 500         char       *a1p,
 501                            *a2p;
 502
 503         /*
 504          * Unfortunately, there is no strncoll(), so in the non-C locale
 505          * case we have to do some memory copying.  This turns out to be
 506          * significantly slower, so we optimize the case were LC_COLLATE
 507          * is C.
 508          */
 509         if (!lc_collate_is_c())
 510         {
 511                 a1p = (char *) palloc(len1 + 1);
 512                 a2p = (char *) palloc(len2 + 1);
 513
 514                 memcpy(a1p, arg1, len1);
 515                 *(a1p + len1) = '\0';
 516                 memcpy(a2p, arg2, len2);
 517                 *(a2p + len2) = '\0';
 518
 519                 result = strcoll(a1p, a2p);
 520
 521                 pfree(a1p);
 522                 pfree(a2p);
 523         }
 524         else
 525         {
 526                 a1p = arg1;
 527                 a2p = arg2;
 528
 529                 result = strncmp(a1p, a2p, Min(len1, len2));
 530                 if ((result == 0) && (len1 != len2))
 531                         result = (len1 < len2) ? -1 : 1;
 532         }
 533
 534         return result;
 535 }
 536
 537
 538 /* text_cmp()
 539  * Internal comparison function for text strings.
 540  * Returns -1, 0 or 1
 541  */
 542 static int
 543 text_cmp(text *arg1, text *arg2)
 544 {
 545         char       *a1p,
 546                            *a2p;
 547         int                     len1,
 548                                 len2;
 549
 550         a1p = VARDATA(arg1);
 551         a2p = VARDATA(arg2);
 552
 553         len1 = VARSIZE(arg1) - VARHDRSZ;
 554         len2 = VARSIZE(arg2) - VARHDRSZ;
 555
 556         return varstr_cmp(a1p, len1, a2p, len2);
 557 }
 558
 559 /*
 560  * Comparison functions for text strings.
 561  *
 562  * Note: btree indexes need these routines not to leak memory; therefore,
 563  * be careful to free working copies of toasted datums.  Most places don't
 564  * need to be so careful.
 565  */
 566
 567 Datum
 568 texteq(PG_FUNCTION_ARGS)
 569 {
 570         text       *arg1 = PG_GETARG_TEXT_P(0);
 571         text       *arg2 = PG_GETARG_TEXT_P(1);
 572         bool            result;
 573
 574         /* fast path for different-length inputs */
 575         if (VARSIZE(arg1) != VARSIZE(arg2))
 576                 result = false;
 577         else
 578                 result = (text_cmp(arg1, arg2) == 0);
 579
 580         PG_FREE_IF_COPY(arg1, 0);
 581         PG_FREE_IF_COPY(arg2, 1);
 582
 583         PG_RETURN_BOOL(result);
 584 }
 585
 586 Datum
 587 textne(PG_FUNCTION_ARGS)
 588 {
 589         text       *arg1 = PG_GETARG_TEXT_P(0);
 590         text       *arg2 = PG_GETARG_TEXT_P(1);
 591         bool            result;
 592
 593         /* fast path for different-length inputs */
 594         if (VARSIZE(arg1) != VARSIZE(arg2))
 595                 result = true;
 596         else
 597                 result = (text_cmp(arg1, arg2) != 0);
 598
 599         PG_FREE_IF_COPY(arg1, 0);
 600         PG_FREE_IF_COPY(arg2, 1);
 601
 602         PG_RETURN_BOOL(result);
 603 }
 604
 605 Datum
 606 text_lt(PG_FUNCTION_ARGS)
 607 {
 608         text       *arg1 = PG_GETARG_TEXT_P(0);
 609         text       *arg2 = PG_GETARG_TEXT_P(1);
 610         bool            result;
 611
 612         result = (text_cmp(arg1, arg2) < 0);
 613
 614         PG_FREE_IF_COPY(arg1, 0);
 615         PG_FREE_IF_COPY(arg2, 1);
 616
 617         PG_RETURN_BOOL(result);
 618 }
 619
 620 Datum
 621 text_le(PG_FUNCTION_ARGS)
 622 {
 623         text       *arg1 = PG_GETARG_TEXT_P(0);
 624         text       *arg2 = PG_GETARG_TEXT_P(1);
 625         bool            result;
 626
 627         result = (text_cmp(arg1, arg2) <= 0);
 628
 629         PG_FREE_IF_COPY(arg1, 0);
 630         PG_FREE_IF_COPY(arg2, 1);
 631
 632         PG_RETURN_BOOL(result);
 633 }
 634
 635 Datum
 636 text_gt(PG_FUNCTION_ARGS)
 637 {
 638         text       *arg1 = PG_GETARG_TEXT_P(0);
 639         text       *arg2 = PG_GETARG_TEXT_P(1);
 640         bool            result;
 641
 642         result = (text_cmp(arg1, arg2) > 0);
 643
 644         PG_FREE_IF_COPY(arg1, 0);
 645         PG_FREE_IF_COPY(arg2, 1);
 646
 647         PG_RETURN_BOOL(result);
 648 }
 649
 650 Datum
 651 text_ge(PG_FUNCTION_ARGS)
 652 {
 653         text       *arg1 = PG_GETARG_TEXT_P(0);
 654         text       *arg2 = PG_GETARG_TEXT_P(1);
 655         bool            result;
 656
 657         result = (text_cmp(arg1, arg2) >= 0);
 658
 659         PG_FREE_IF_COPY(arg1, 0);
 660         PG_FREE_IF_COPY(arg2, 1);
 661
 662         PG_RETURN_BOOL(result);
 663 }
 664
 665 Datum
 666 bttextcmp(PG_FUNCTION_ARGS)
 667 {
 668         text       *arg1 = PG_GETARG_TEXT_P(0);
 669         text       *arg2 = PG_GETARG_TEXT_P(1);
 670         int32           result;
 671
 672         result = text_cmp(arg1, arg2);
 673
 674         PG_FREE_IF_COPY(arg1, 0);
 675         PG_FREE_IF_COPY(arg2, 1);
 676
 677         PG_RETURN_INT32(result);
 678 }
 679
 680
 681 Datum
 682 text_larger(PG_FUNCTION_ARGS)
 683 {
 684         text       *arg1 = PG_GETARG_TEXT_P(0);
 685         text       *arg2 = PG_GETARG_TEXT_P(1);
 686         text       *result;
 687
 688         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
 689
 690         PG_RETURN_TEXT_P(result);
 691 }
 692
 693 Datum
 694 text_smaller(PG_FUNCTION_ARGS)
 695 {
 696         text       *arg1 = PG_GETARG_TEXT_P(0);
 697         text       *arg2 = PG_GETARG_TEXT_P(1);
 698         text       *result;
 699
 700         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
 701
 702         PG_RETURN_TEXT_P(result);
 703 }
 704
 705 /*-------------------------------------------------------------
 706  * byteaoctetlen
 707  *
 708  * get the number of bytes contained in an instance of type 'bytea'
 709  *-------------------------------------------------------------
 710  */
 711 Datum
 712 byteaoctetlen(PG_FUNCTION_ARGS)
 713 {
 714         bytea      *v = PG_GETARG_BYTEA_P(0);
 715
 716         PG_RETURN_INT32(VARSIZE(v) - VARHDRSZ);
 717 }
 718
 719 /*
 720  * byteacat -
 721  *        takes two bytea* and returns a bytea* that is the concatenation of
 722  *        the two.
 723  *
 724  * Cloned from textcat and modified as required.
 725  */
 726 Datum
 727 byteacat(PG_FUNCTION_ARGS)
 728 {
 729         bytea      *t1 = PG_GETARG_BYTEA_P(0);
 730         bytea      *t2 = PG_GETARG_BYTEA_P(1);
 731         int                     len1,
 732                                 len2,
 733                                 len;
 734         bytea      *result;
 735         char       *ptr;
 736
 737         len1 = (VARSIZE(t1) - VARHDRSZ);
 738         if (len1 < 0)
 739                 len1 = 0;
 740
 741         len2 = (VARSIZE(t2) - VARHDRSZ);
 742         if (len2 < 0)
 743                 len2 = 0;
 744
 745         len = len1 + len2 + VARHDRSZ;
 746         result = (bytea *) palloc(len);
 747
 748         /* Set size of result string... */
 749         VARATT_SIZEP(result) = len;
 750
 751         /* Fill data field of result string... */
 752         ptr = VARDATA(result);
 753         if (len1 > 0)
 754                 memcpy(ptr, VARDATA(t1), len1);
 755         if (len2 > 0)
 756                 memcpy(ptr + len1, VARDATA(t2), len2);
 757
 758         PG_RETURN_BYTEA_P(result);
 759 }
 760
 761 /*
 762  * bytea_substr()
 763  * Return a substring starting at the specified position.
 764  * Cloned from text_substr and modified as required.
 765  *
 766  * Input:
 767  *      - string
 768  *      - starting position (is one-based)
 769  *      - string length
 770  *
 771  * If the starting position is zero or less, then return from the start of the string
 772  * adjusting the length to be consistent with the "negative start" per SQL92.
 773  * If the length is less than zero, return the remaining string.
 774  *
 775  */
 776 Datum
 777 bytea_substr(PG_FUNCTION_ARGS)
 778 {
 779         int32           m = PG_GETARG_INT32(1);
 780         int32           n = PG_GETARG_INT32(2);
 781
 782         /*
 783          * starting position before the start of the string? then offset into
 784          * the string per SQL92 spec...
 785          */
 786         if (m < 1)
 787         {
 788                 n += (m - 1);
 789                 m = 1;
 790         }
 791
 792         /* m will now become a zero-based starting position */
 793         m--;
 794
 795         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, m, n));
 796 }
 797
 798 /*
 799  * byteapos -
 800  *        Return the position of the specified substring.
 801  *        Implements the SQL92 POSITION() function.
 802  * Cloned from textpos and modified as required.
 803  */
 804 Datum
 805 byteapos(PG_FUNCTION_ARGS)
 806 {
 807         bytea      *t1 = PG_GETARG_BYTEA_P(0);
 808         bytea      *t2 = PG_GETARG_BYTEA_P(1);
 809         int                     pos;
 810         int                     px,
 811                                 p;
 812         int                     len1,
 813                                 len2;
 814         char       *p1,
 815                            *p2;
 816
 817         if (VARSIZE(t2) <= VARHDRSZ)
 818                 PG_RETURN_INT32(1);             /* result for empty pattern */
 819
 820         len1 = (VARSIZE(t1) - VARHDRSZ);
 821         len2 = (VARSIZE(t2) - VARHDRSZ);
 822
 823         p1 = VARDATA(t1);
 824         p2 = VARDATA(t2);
 825
 826         pos = 0;
 827         px = (len1 - len2);
 828         for (p = 0; p <= px; p++)
 829         {
 830                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
 831                 {
 832                         pos = p + 1;
 833                         break;
 834                 };
 835                 p1++;
 836         };
 837
 838         PG_RETURN_INT32(pos);
 839 }
 840
 841 /*-------------------------------------------------------------
 842  * byteaGetByte
 843  *
 844  * this routine treats "bytea" as an array of bytes.
 845  * It returns the Nth byte (a number between 0 and 255).
 846  *-------------------------------------------------------------
 847  */
 848 Datum
 849 byteaGetByte(PG_FUNCTION_ARGS)
 850 {
 851         bytea      *v = PG_GETARG_BYTEA_P(0);
 852         int32           n = PG_GETARG_INT32(1);
 853         int                     len;
 854         int                     byte;
 855
 856         len = VARSIZE(v) - VARHDRSZ;
 857
 858         if (n < 0 || n >= len)
 859                 elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
 860                          n, len - 1);
 861
 862         byte = ((unsigned char *) VARDATA(v))[n];
 863
 864         PG_RETURN_INT32(byte);
 865 }
 866
 867 /*-------------------------------------------------------------
 868  * byteaGetBit
 869  *
 870  * This routine treats a "bytea" type like an array of bits.
 871  * It returns the value of the Nth bit (0 or 1).
 872  *
 873  *-------------------------------------------------------------
 874  */
 875 Datum
 876 byteaGetBit(PG_FUNCTION_ARGS)
 877 {
 878         bytea      *v = PG_GETARG_BYTEA_P(0);
 879         int32           n = PG_GETARG_INT32(1);
 880         int                     byteNo,
 881                                 bitNo;
 882         int                     len;
 883         int                     byte;
 884
 885         len = VARSIZE(v) - VARHDRSZ;
 886
 887         if (n < 0 || n >= len * 8)
 888                 elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
 889                          n, len * 8 - 1);
 890
 891         byteNo = n / 8;
 892         bitNo = n % 8;
 893
 894         byte = ((unsigned char *) VARDATA(v))[byteNo];
 895
 896         if (byte & (1 << bitNo))
 897                 PG_RETURN_INT32(1);
 898         else
 899                 PG_RETURN_INT32(0);
 900 }
 901
 902 /*-------------------------------------------------------------
 903  * byteaSetByte
 904  *
 905  * Given an instance of type 'bytea' creates a new one with
 906  * the Nth byte set to the given value.
 907  *
 908  *-------------------------------------------------------------
 909  */
 910 Datum
 911 byteaSetByte(PG_FUNCTION_ARGS)
 912 {
 913         bytea      *v = PG_GETARG_BYTEA_P(0);
 914         int32           n = PG_GETARG_INT32(1);
 915         int32           newByte = PG_GETARG_INT32(2);
 916         int                     len;
 917         bytea      *res;
 918
 919         len = VARSIZE(v) - VARHDRSZ;
 920
 921         if (n < 0 || n >= len)
 922                 elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
 923                          n, len - 1);
 924
 925         /*
 926          * Make a copy of the original varlena.
 927          */
 928         res = (bytea *) palloc(VARSIZE(v));
 929         memcpy((char *) res, (char *) v, VARSIZE(v));
 930
 931         /*
 932          * Now set the byte.
 933          */
 934         ((unsigned char *) VARDATA(res))[n] = newByte;
 935
 936         PG_RETURN_BYTEA_P(res);
 937 }
 938
 939 /*-------------------------------------------------------------
 940  * byteaSetBit
 941  *
 942  * Given an instance of type 'bytea' creates a new one with
 943  * the Nth bit set to the given value.
 944  *
 945  *-------------------------------------------------------------
 946  */
 947 Datum
 948 byteaSetBit(PG_FUNCTION_ARGS)
 949 {
 950         bytea      *v = PG_GETARG_BYTEA_P(0);
 951         int32           n = PG_GETARG_INT32(1);
 952         int32           newBit = PG_GETARG_INT32(2);
 953         bytea      *res;
 954         int                     len;
 955         int                     oldByte,
 956                                 newByte;
 957         int                     byteNo,
 958                                 bitNo;
 959
 960         len = VARSIZE(v) - VARHDRSZ;
 961
 962         if (n < 0 || n >= len * 8)
 963                 elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
 964                          n, len * 8 - 1);
 965
 966         byteNo = n / 8;
 967         bitNo = n % 8;
 968
 969         /*
 970          * sanity check!
 971          */
 972         if (newBit != 0 && newBit != 1)
 973                 elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
 974
 975         /*
 976          * Make a copy of the original varlena.
 977          */
 978         res = (bytea *) palloc(VARSIZE(v));
 979         memcpy((char *) res, (char *) v, VARSIZE(v));
 980
 981         /*
 982          * Update the byte.
 983          */
 984         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
 985
 986         if (newBit == 0)
 987                 newByte = oldByte & (~(1 << bitNo));
 988         else
 989                 newByte = oldByte | (1 << bitNo);
 990
 991         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
 992
 993         PG_RETURN_BYTEA_P(res);
 994 }
 995
 996
 997 /* text_name()
 998  * Converts a text type to a Name type.
 999  */
1000 Datum
1001 text_name(PG_FUNCTION_ARGS)
1002 {
1003         text       *s = PG_GETARG_TEXT_P(0);
1004         Name            result;
1005         int                     len;
1006
1007         len = VARSIZE(s) - VARHDRSZ;
1008
1009         /* Truncate oversize input */
1010         if (len >= NAMEDATALEN)
1011                 len = NAMEDATALEN - 1;
1012
1013 #ifdef STRINGDEBUG
1014         printf("text- convert string length %d (%d) ->%d\n",
1015                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1016 #endif
1017
1018         result = (Name) palloc(NAMEDATALEN);
1019         memcpy(NameStr(*result), VARDATA(s), len);
1020
1021         /* now null pad to full length... */
1022         while (len < NAMEDATALEN)
1023         {
1024                 *(NameStr(*result) + len) = '\0';
1025                 len++;
1026         }
1027
1028         PG_RETURN_NAME(result);
1029 }
1030
1031 /* name_text()
1032  * Converts a Name type to a text type.
1033  */
1034 Datum
1035 name_text(PG_FUNCTION_ARGS)
1036 {
1037         Name            s = PG_GETARG_NAME(0);
1038         text       *result;
1039         int                     len;
1040
1041         len = strlen(NameStr(*s));
1042
1043 #ifdef STRINGDEBUG
1044         printf("text- convert string length %d (%d) ->%d\n",
1045                    VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1046 #endif
1047
1048         result = palloc(VARHDRSZ + len);
1049         VARATT_SIZEP(result) = VARHDRSZ + len;
1050         memcpy(VARDATA(result), NameStr(*s), len);
1051
1052         PG_RETURN_TEXT_P(result);
1053 }
1054
1055
1056 /*
1057  * textToQualifiedNameList - convert a text object to list of names
1058  *
1059  * This implements the input parsing needed by nextval() and other
1060  * functions that take a text parameter representing a qualified name.
1061  * We split the name at dots, downcase if not double-quoted, and
1062  * truncate names if they're too long.
1063  */
1064 List *
1065 textToQualifiedNameList(text *textval, const char *caller)
1066 {
1067         char       *rawname;
1068         List       *result = NIL;
1069         List       *namelist;
1070         List       *l;
1071
1072         /* Convert to C string (handles possible detoasting). */
1073         /* Note we rely on being able to modify rawname below. */
1074         rawname = DatumGetCString(DirectFunctionCall1(textout,
1075                                                                                                   PointerGetDatum(textval)));
1076
1077         if (!SplitIdentifierString(rawname, '.', &namelist))
1078                 elog(ERROR, "%s: invalid name syntax", caller);
1079
1080         if (namelist == NIL)
1081                 elog(ERROR, "%s: invalid name syntax", caller);
1082
1083         foreach(l, namelist)
1084         {
1085                 char   *curname = (char *) lfirst(l);
1086
1087                 result = lappend(result, makeString(pstrdup(curname)));
1088         }
1089
1090         pfree(rawname);
1091         freeList(namelist);
1092
1093         return result;
1094 }
1095
1096 /*
1097  * SplitIdentifierString --- parse a string containing identifiers
1098  *
1099  * This is the guts of textToQualifiedNameList, and is exported for use in
1100  * other situations such as parsing GUC variables.  In the GUC case, it's
1101  * important to avoid memory leaks, so the API is designed to minimize the
1102  * amount of stuff that needs to be allocated and freed.
1103  *
1104  * Inputs:
1105  *      rawstring: the input string; must be overwritable!  On return, it's
1106  *                         been modified to contain the separated identifiers.
1107  *      separator: the separator punctuation expected between identifiers
1108  *                         (typically '.' or ',').  Whitespace may also appear around
1109  *                         identifiers.
1110  * Outputs:
1111  *      namelist: filled with a palloc'd list of pointers to identifiers within
1112  *                        rawstring.  Caller should freeList() this even on error return.
1113  *
1114  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1115  *
1116  * Note that an empty string is considered okay here, though not in
1117  * textToQualifiedNameList.
1118  */
1119 bool
1120 SplitIdentifierString(char *rawstring, char separator,
1121                                           List **namelist)
1122 {
1123         char       *nextp = rawstring;
1124         bool            done = false;
1125
1126         *namelist = NIL;
1127
1128         while (isspace((unsigned char) *nextp))
1129                 nextp++;                                /* skip leading whitespace */
1130
1131         if (*nextp == '\0')
1132                 return true;                    /* allow empty string */
1133
1134         /* At the top of the loop, we are at start of a new identifier. */
1135         do
1136         {
1137                 char       *curname;
1138                 char       *endp;
1139                 int                     curlen;
1140
1141                 if (*nextp == '\"')
1142                 {
1143                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
1144                         curname = nextp + 1;
1145                         for (;;)
1146                         {
1147                                 endp = strchr(nextp + 1, '\"');
1148                                 if (endp == NULL)
1149                                         return false; /* mismatched quotes */
1150                                 if (endp[1] != '\"')
1151                                         break;          /* found end of quoted name */
1152                                 /* Collapse adjacent quotes into one quote, and look again */
1153                                 memmove(endp, endp+1, strlen(endp));
1154                                 nextp = endp;
1155                         }
1156                         /* endp now points at the terminating quote */
1157                         nextp = endp + 1;
1158                 }
1159                 else
1160                 {
1161                         /* Unquoted name --- extends to separator or whitespace */
1162                         curname = nextp;
1163                         while (*nextp && *nextp != separator &&
1164                                    !isspace((unsigned char) *nextp))
1165                         {
1166                                 /*
1167                                  * It's important that this match the identifier downcasing
1168                                  * code used by backend/parser/scan.l.
1169                                  */
1170                                 if (isupper((unsigned char) *nextp))
1171                                         *nextp = tolower((unsigned char) *nextp);
1172                                 nextp++;
1173                         }
1174                         endp = nextp;
1175                         if (curname == nextp)
1176                                 return false;   /* empty unquoted name not allowed */
1177                 }
1178
1179                 while (isspace((unsigned char) *nextp))
1180                         nextp++;                        /* skip trailing whitespace */
1181
1182                 if (*nextp == separator)
1183                 {
1184                         nextp++;
1185                         while (isspace((unsigned char) *nextp))
1186                                 nextp++;                /* skip leading whitespace for next */
1187                         /* we expect another name, so done remains false */
1188                 }
1189                 else if (*nextp == '\0')
1190                         done = true;
1191                 else
1192                         return false;           /* invalid syntax */
1193
1194                 /* Now safe to overwrite separator with a null */
1195                 *endp = '\0';
1196
1197                 /* Truncate name if it's overlength; again, should match scan.l */
1198                 curlen = strlen(curname);
1199                 if (curlen >= NAMEDATALEN)
1200                 {
1201 #ifdef MULTIBYTE
1202                         curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
1203                         curname[curlen] = '\0';
1204 #else
1205                         curname[NAMEDATALEN - 1] = '\0';
1206 #endif
1207                 }
1208
1209                 /*
1210                  * Finished isolating current name --- add it to list
1211                  */
1212                 *namelist = lappend(*namelist, curname);
1213
1214                 /* Loop back if we didn't reach end of string */
1215         } while (!done);
1216
1217         return true;
1218 }
1219
1220
1221 /*****************************************************************************
1222  *      Comparison Functions used for bytea
1223  *
1224  * Note: btree indexes need these routines not to leak memory; therefore,
1225  * be careful to free working copies of toasted datums.  Most places don't
1226  * need to be so careful.
1227  *****************************************************************************/
1228
1229 Datum
1230 byteaeq(PG_FUNCTION_ARGS)
1231 {
1232         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1233         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1234         int                     len1,
1235                                 len2;
1236         bool            result;
1237
1238         len1 = VARSIZE(arg1) - VARHDRSZ;
1239         len2 = VARSIZE(arg2) - VARHDRSZ;
1240
1241         /* fast path for different-length inputs */
1242         if (len1 != len2)
1243                 result = false;
1244         else
1245                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1246
1247         PG_FREE_IF_COPY(arg1, 0);
1248         PG_FREE_IF_COPY(arg2, 1);
1249
1250         PG_RETURN_BOOL(result);
1251 }
1252
1253 Datum
1254 byteane(PG_FUNCTION_ARGS)
1255 {
1256         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1257         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1258         int                     len1,
1259                                 len2;
1260         bool            result;
1261
1262         len1 = VARSIZE(arg1) - VARHDRSZ;
1263         len2 = VARSIZE(arg2) - VARHDRSZ;
1264
1265         /* fast path for different-length inputs */
1266         if (len1 != len2)
1267                 result = true;
1268         else
1269                 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1270
1271         PG_FREE_IF_COPY(arg1, 0);
1272         PG_FREE_IF_COPY(arg2, 1);
1273
1274         PG_RETURN_BOOL(result);
1275 }
1276
1277 Datum
1278 bytealt(PG_FUNCTION_ARGS)
1279 {
1280         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1281         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1282         int                     len1,
1283                                 len2;
1284         int                     cmp;
1285
1286         len1 = VARSIZE(arg1) - VARHDRSZ;
1287         len2 = VARSIZE(arg2) - VARHDRSZ;
1288
1289         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1290
1291         PG_FREE_IF_COPY(arg1, 0);
1292         PG_FREE_IF_COPY(arg2, 1);
1293
1294         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1295 }
1296
1297 Datum
1298 byteale(PG_FUNCTION_ARGS)
1299 {
1300         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1301         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1302         int                     len1,
1303                                 len2;
1304         int                     cmp;
1305
1306         len1 = VARSIZE(arg1) - VARHDRSZ;
1307         len2 = VARSIZE(arg2) - VARHDRSZ;
1308
1309         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1310
1311         PG_FREE_IF_COPY(arg1, 0);
1312         PG_FREE_IF_COPY(arg2, 1);
1313
1314         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1315 }
1316
1317 Datum
1318 byteagt(PG_FUNCTION_ARGS)
1319 {
1320         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1321         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1322         int                     len1,
1323                                 len2;
1324         int                     cmp;
1325
1326         len1 = VARSIZE(arg1) - VARHDRSZ;
1327         len2 = VARSIZE(arg2) - VARHDRSZ;
1328
1329         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1330
1331         PG_FREE_IF_COPY(arg1, 0);
1332         PG_FREE_IF_COPY(arg2, 1);
1333
1334         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1335 }
1336
1337 Datum
1338 byteage(PG_FUNCTION_ARGS)
1339 {
1340         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1341         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1342         int                     len1,
1343                                 len2;
1344         int                     cmp;
1345
1346         len1 = VARSIZE(arg1) - VARHDRSZ;
1347         len2 = VARSIZE(arg2) - VARHDRSZ;
1348
1349         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1350
1351         PG_FREE_IF_COPY(arg1, 0);
1352         PG_FREE_IF_COPY(arg2, 1);
1353
1354         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1355 }
1356
1357 Datum
1358 byteacmp(PG_FUNCTION_ARGS)
1359 {
1360         bytea      *arg1 = PG_GETARG_BYTEA_P(0);
1361         bytea      *arg2 = PG_GETARG_BYTEA_P(1);
1362         int                     len1,
1363                                 len2;
1364         int                     cmp;
1365
1366         len1 = VARSIZE(arg1) - VARHDRSZ;
1367         len2 = VARSIZE(arg2) - VARHDRSZ;
1368
1369         cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1370         if ((cmp == 0) && (len1 != len2))
1371                 cmp = (len1 < len2) ? -1 : 1;
1372
1373         PG_FREE_IF_COPY(arg1, 0);
1374         PG_FREE_IF_COPY(arg2, 1);
1375
1376         PG_RETURN_INT32(cmp);
1377 }