1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.60 2000/06/14 18:17:45 petere Exp $
13 *-------------------------------------------------------------------------
19 #include "mb/pg_wchar.h"
20 #include "utils/builtins.h"
22 static int text_cmp(text *arg1, text *arg2);
25 /*****************************************************************************
27 *****************************************************************************/
30 #define VAL(CH) ((CH) - '0')
31 #define DIG(VAL) ((VAL) + '0')
34 * byteain - converts from printable representation of byte array
36 * Non-printable characters must be passed as '\nnn' (octal) and are
37 * converted to internal form. '\' must be passed as '\\'.
38 * elog(ERROR, ...) if bad form.
41 * The input is scaned twice.
42 * The error checking of input is minimal.
45 byteain(char *inputText)
52 if (inputText == NULL)
53 elog(ERROR, "Bad input string for type bytea");
55 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
60 else if (!isdigit((int) *tp++) ||
61 !isdigit((int) *tp++) ||
62 !isdigit((int) *tp++))
63 elog(ERROR, "Bad input string for type bytea");
67 result = (bytea *) palloc(byte);
68 result->vl_len = byte; /* varlena? */
71 if (*tp != '\\' || *++tp == '\\')
79 *rp++ = byte + VAL(*tp++);
85 * byteaout - converts to printable representation of byte array
87 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
90 * NULL vlena should be an error--returning string with NULL for now.
93 byteaout(bytea *vlena)
98 int val; /* holds unprintable chars */
104 result = (char *) palloc(2);
110 len = 1; /* empty string has 1 char */
111 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
114 else if (isascii((int) *vp) && isprint((int) *vp))
118 rp = result = (char *) palloc(len);
120 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--)
127 else if (isascii((int) *vp) && isprint((int) *vp))
134 *rp-- = DIG(val & 07);
136 *rp-- = DIG(val & 07);
147 * textin - converts "..." to internal representation
150 textin(char *inputText)
155 if (inputText == NULL)
158 len = strlen(inputText) + VARHDRSZ;
159 result = (text *) palloc(len);
160 VARSIZE(result) = len;
162 memmove(VARDATA(result), inputText, len - VARHDRSZ);
165 convertstr(VARDATA(result), len - VARHDRSZ, 0);
172 * textout - converts internal representation to "..."
182 result = (char *) palloc(2);
187 len = VARSIZE(vlena) - VARHDRSZ;
188 result = (char *) palloc(len + 1);
189 memmove(result, VARDATA(vlena), len);
193 convertstr(result, len, 1);
200 /* ========== PUBLIC ROUTINES ========== */
204 * returns the logical length of a text*
205 * (which is less than the VARSIZE of the text*)
218 if (!PointerIsValid(t))
224 l = VARSIZE(t) - VARHDRSZ;
234 return VARSIZE(t) - VARHDRSZ;
241 * returns the physical length of a text*
242 * (which is less than the VARSIZE of the text*)
245 textoctetlen(text *t)
247 if (!PointerIsValid(t))
250 return VARSIZE(t) - VARHDRSZ;
251 } /* textoctetlen() */
255 * takes two text* and returns a text* that is the concatentation of
258 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
259 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
260 * Allocate space for output in all cases.
261 * XXX - thomas 1997-07-10
264 textcat(text *t1, text *t2)
272 if (!PointerIsValid(t1) || !PointerIsValid(t2))
275 len1 = (VARSIZE(t1) - VARHDRSZ);
278 while (len1 > 0 && VARDATA(t1)[len1 - 1] == '\0')
281 len2 = (VARSIZE(t2) - VARHDRSZ);
284 while (len2 > 0 && VARDATA(t2)[len2 - 1] == '\0')
287 len = len1 + len2 + VARHDRSZ;
288 result = palloc(len);
290 /* Set size of result string... */
291 VARSIZE(result) = len;
293 /* Fill data field of result string... */
294 ptr = VARDATA(result);
296 memcpy(ptr, VARDATA(t1), len1);
298 memcpy(ptr + len1, VARDATA(t2), len2);
305 * Return a substring starting at the specified position.
306 * - thomas 1997-12-31
310 * - starting position (is one-based)
313 * If the starting position is zero or less, then return from the start of the string
314 * adjusting the length to be consistant with the "negative start" per SQL92.
315 * If the length is less than zero, return the remaining string.
317 * Note that the arguments operate on octet length,
318 * so not aware of multi-byte character sets.
320 * Added multi-byte support.
321 * - Tatsuo Ishii 1998-4-21
322 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
323 * Formerly returned the entire string; now returns a portion.
324 * - Thomas Lockhart 1998-12-10
327 text_substr(PG_FUNCTION_ARGS)
329 text *string = PG_GETARG_TEXT_P(0);
330 int32 m = PG_GETARG_INT32(1);
331 int32 n = PG_GETARG_INT32(2);
339 len = VARSIZE(string) - VARHDRSZ;
341 len = pg_mbstrlen_with_len(VARDATA(string), len);
344 /* starting position after the end of the string? */
352 * starting position before the start of the string? then offset into
353 * the string per SQL92 spec...
361 /* m will now become a zero-based starting position */
363 if (((m + n) > len) || (n < 0))
368 for (i = 0; i < m; i++)
370 m = p - VARDATA(string);
371 for (i = 0; i < n; i++)
373 n = p - (VARDATA(string) + m);
376 ret = (text *) palloc(VARHDRSZ + n);
377 VARSIZE(ret) = VARHDRSZ + n;
379 memcpy(VARDATA(ret), VARDATA(string) + m, n);
381 PG_RETURN_TEXT_P(ret);
386 * Return the position of the specified substring.
387 * Implements the SQL92 POSITION() function.
388 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
389 * - thomas 1997-07-27
391 * Added multi-byte support.
392 * - Tatsuo Ishii 1998-4-21
395 textpos(text *t1, text *t2)
411 if (!PointerIsValid(t1) || !PointerIsValid(t2))
414 if (VARSIZE(t2) <= 0)
417 len1 = (VARSIZE(t1) - VARHDRSZ);
418 len2 = (VARSIZE(t2) - VARHDRSZ);
420 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
421 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
422 len1 = pg_wchar_strlen(p1);
423 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
424 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
425 len2 = pg_wchar_strlen(p2);
432 for (p = 0; p <= px; p++)
435 if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
437 if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
453 * texteq - returns 1 iff arguments are equal
454 * textne - returns 1 iff arguments are not equal
457 texteq(text *arg1, text *arg2)
463 if (arg1 == NULL || arg2 == NULL)
465 if ((len = arg1->vl_len) != arg2->vl_len)
471 * Varlenas are stored as the total size (data + size variable)
472 * followed by the data. Use VARHDRSZ instead of explicit sizeof() -
477 if (*a1p++ != *a2p++)
483 textne(text *arg1, text *arg2)
485 return (bool) !texteq(arg1, arg2);
489 * Comparison function for text strings with given lengths.
490 * Includes locale support, but must copy strings to temporary memory
491 * to allow null-termination for inputs to strcoll().
495 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
502 a1p = (unsigned char *) palloc(len1 + 1);
503 a2p = (unsigned char *) palloc(len2 + 1);
505 memcpy(a1p, arg1, len1);
506 *(a1p + len1) = '\0';
507 memcpy(a2p, arg2, len2);
508 *(a2p + len2) = '\0';
510 result = strcoll(a1p, a2p);
520 result = strncmp(a1p, a2p, Min(len1, len2));
521 if ((result == 0) && (len1 != len2))
522 result = (len1 < len2) ? -1 : 1;
530 * Comparison function for text strings.
531 * Includes locale support, but must copy strings to temporary memory
532 * to allow null-termination for inputs to strcoll().
533 * XXX HACK code for textlen() indicates that there can be embedded nulls
534 * but it appears that most routines (incl. this one) assume not! - tgl 97/04/07
538 text_cmp(text *arg1, text *arg2)
545 if (arg1 == NULL || arg2 == NULL)
551 len1 = VARSIZE(arg1) - VARHDRSZ;
552 len2 = VARSIZE(arg2) - VARHDRSZ;
554 return varstr_cmp(a1p, len1, a2p, len2);
558 * Comparison function for text strings.
561 text_lt(text *arg1, text *arg2)
563 return (bool) (text_cmp(arg1, arg2) < 0);
567 * Comparison function for text strings.
570 text_le(text *arg1, text *arg2)
572 return (bool) (text_cmp(arg1, arg2) <= 0);
576 text_gt(text *arg1, text *arg2)
578 return (bool) !text_le(arg1, arg2);
582 text_ge(text *arg1, text *arg2)
584 return (bool) !text_lt(arg1, arg2);
588 text_larger(text *arg1, text *arg2)
593 temp = ((text_cmp(arg1, arg2) <= 0) ? arg2 : arg1);
597 result = (text *) palloc(VARSIZE(temp));
598 memmove((char *) result, (char *) temp, VARSIZE(temp));
604 text_smaller(text *arg1, text *arg2)
609 temp = ((text_cmp(arg1, arg2) > 0) ? arg2 : arg1);
613 result = (text *) palloc(VARSIZE(temp));
614 memmove((char *) result, (char *) temp, VARSIZE(temp));
619 /*-------------------------------------------------------------
622 * get the number of bytes contained in an instance of type 'bytea'
623 *-------------------------------------------------------------
626 byteaoctetlen(bytea *v)
628 if (!PointerIsValid(v))
631 return VARSIZE(v) - VARHDRSZ;
634 /*-------------------------------------------------------------
637 * this routine treats "bytea" as an array of bytes.
638 * It returns the Nth byte (a number between 0 and 255).
639 *-------------------------------------------------------------
642 byteaGetByte(PG_FUNCTION_ARGS)
644 bytea *v = PG_GETARG_BYTEA_P(0);
645 int32 n = PG_GETARG_INT32(1);
649 len = VARSIZE(v) - VARHDRSZ;
651 if (n < 0 || n >= len)
652 elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
655 byte = ((unsigned char *) VARDATA(v))[n];
657 PG_RETURN_INT32(byte);
660 /*-------------------------------------------------------------
663 * This routine treats a "bytea" type like an array of bits.
664 * It returns the value of the Nth bit (0 or 1).
666 *-------------------------------------------------------------
669 byteaGetBit(PG_FUNCTION_ARGS)
671 bytea *v = PG_GETARG_BYTEA_P(0);
672 int32 n = PG_GETARG_INT32(1);
678 len = VARSIZE(v) - VARHDRSZ;
680 if (n < 0 || n >= len * 8)
681 elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
687 byte = ((unsigned char *) VARDATA(v))[byteNo];
689 if (byte & (1 << bitNo))
695 /*-------------------------------------------------------------
698 * Given an instance of type 'bytea' creates a new one with
699 * the Nth byte set to the given value.
701 *-------------------------------------------------------------
704 byteaSetByte(PG_FUNCTION_ARGS)
706 bytea *v = PG_GETARG_BYTEA_P(0);
707 int32 n = PG_GETARG_INT32(1);
708 int32 newByte = PG_GETARG_INT32(2);
712 len = VARSIZE(v) - VARHDRSZ;
714 if (n < 0 || n >= len)
715 elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
719 * Make a copy of the original varlena.
721 res = (bytea *) palloc(VARSIZE(v));
722 memcpy((char *) res, (char *) v, VARSIZE(v));
727 ((unsigned char *) VARDATA(res))[n] = newByte;
729 PG_RETURN_BYTEA_P(res);
732 /*-------------------------------------------------------------
735 * Given an instance of type 'bytea' creates a new one with
736 * the Nth bit set to the given value.
738 *-------------------------------------------------------------
741 byteaSetBit(PG_FUNCTION_ARGS)
743 bytea *v = PG_GETARG_BYTEA_P(0);
744 int32 n = PG_GETARG_INT32(1);
745 int32 newBit = PG_GETARG_INT32(2);
753 len = VARSIZE(v) - VARHDRSZ;
755 if (n < 0 || n >= len * 8)
756 elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
765 if (newBit != 0 && newBit != 1)
766 elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
769 * Make a copy of the original varlena.
771 res = (bytea *) palloc(VARSIZE(v));
772 memcpy((char *) res, (char *) v, VARSIZE(v));
777 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
780 newByte = oldByte & (~(1 << bitNo));
782 newByte = oldByte | (1 << bitNo);
784 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
786 PG_RETURN_BYTEA_P(res);
791 * Converts a text() type to a NameData type.
802 len = VARSIZE(s) - VARHDRSZ;
803 if (len > NAMEDATALEN)
807 printf("text- convert string length %d (%d) ->%d\n",
808 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
811 result = palloc(NAMEDATALEN);
812 StrNCpy(NameStr(*result), VARDATA(s), NAMEDATALEN);
814 /* now null pad to full length... */
815 while (len < NAMEDATALEN)
817 *(NameStr(*result) + len) = '\0';
825 * Converts a NameData type to a text type.
828 name_text(NameData *s)
836 len = strlen(NameStr(*s));
839 printf("text- convert string length %d (%d) ->%d\n",
840 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
843 result = palloc(VARHDRSZ + len);
844 strncpy(VARDATA(result), NameStr(*s), len);
845 VARSIZE(result) = len + VARHDRSZ;