1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.70 2001/05/03 19:00:36 tgl Exp $
13 *-------------------------------------------------------------------------
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
23 static int text_cmp(text *arg1, text *arg2);
26 /*****************************************************************************
28 *****************************************************************************/
31 #define VAL(CH) ((CH) - '0')
32 #define DIG(VAL) ((VAL) + '0')
35 * byteain - converts from printable representation of byte array
37 * Non-printable characters must be passed as '\nnn' (octal) and are
38 * converted to internal form. '\' must be passed as '\\'.
39 * elog(ERROR, ...) if bad form.
42 * The input is scaned twice.
43 * The error checking of input is minimal.
46 byteain(PG_FUNCTION_ARGS)
48 char *inputText = PG_GETARG_CSTRING(0);
54 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
60 else if (!isdigit((unsigned char) *tp++) ||
61 !isdigit((unsigned char) *tp++) ||
62 !isdigit((unsigned char) *tp++))
63 elog(ERROR, "Bad input string for type bytea");
68 result = (bytea *) palloc(byte);
69 result->vl_len = byte; /* set varlena length */
75 if (*tp != '\\' || *++tp == '\\')
83 *rp++ = byte + VAL(*tp++);
87 PG_RETURN_BYTEA_P(result);
91 * byteaout - converts to printable representation of byte array
93 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
96 * NULL vlena should be an error--returning string with NULL for now.
99 byteaout(PG_FUNCTION_ARGS)
101 bytea *vlena = PG_GETARG_BYTEA_P(0);
105 int val; /* holds unprintable chars */
109 len = 1; /* empty string has 1 char */
111 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
115 else if (isprint((unsigned char) *vp))
120 rp = result = (char *) palloc(len);
122 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
129 else if (isprint((unsigned char) *vp))
135 rp[3] = DIG(val & 07);
137 rp[2] = DIG(val & 07);
139 rp[1] = DIG(val & 03);
144 PG_RETURN_CSTRING(result);
149 * textin - converts "..." to internal representation
152 textin(PG_FUNCTION_ARGS)
154 char *inputText = PG_GETARG_CSTRING(0);
158 len = strlen(inputText) + VARHDRSZ;
159 result = (text *) palloc(len);
160 VARATT_SIZEP(result) = len;
162 memcpy(VARDATA(result), inputText, len - VARHDRSZ);
165 convertstr(VARDATA(result), len - VARHDRSZ, 0);
168 PG_RETURN_TEXT_P(result);
172 * textout - converts internal representation to "..."
175 textout(PG_FUNCTION_ARGS)
177 text *t = PG_GETARG_TEXT_P(0);
181 len = VARSIZE(t) - VARHDRSZ;
182 result = (char *) palloc(len + 1);
183 memcpy(result, VARDATA(t), len);
187 convertstr(result, len, 1);
190 PG_RETURN_CSTRING(result);
194 /* ========== PUBLIC ROUTINES ========== */
198 * returns the logical length of a text*
199 * (which is less than the VARSIZE of the text*)
202 textlen(PG_FUNCTION_ARGS)
204 text *t = PG_GETARG_TEXT_P(0);
214 l = VARSIZE(t) - VARHDRSZ;
222 PG_RETURN_INT32(len);
224 PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ);
230 * returns the physical length of a text*
231 * (which is less than the VARSIZE of the text*)
233 * XXX is it actually appropriate to return the compressed length
234 * when the value is compressed? It's not at all clear to me that
235 * this is what SQL92 has in mind ...
238 textoctetlen(PG_FUNCTION_ARGS)
240 struct varattrib *t = (struct varattrib *) PG_GETARG_RAW_VARLENA_P(0);
242 if (!VARATT_IS_EXTERNAL(t))
243 PG_RETURN_INT32(VARATT_SIZE(t) - VARHDRSZ);
245 PG_RETURN_INT32(t->va_content.va_external.va_extsize);
250 * takes two text* and returns a text* that is the concatenation of
253 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
254 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
255 * Allocate space for output in all cases.
256 * XXX - thomas 1997-07-10
259 textcat(PG_FUNCTION_ARGS)
261 text *t1 = PG_GETARG_TEXT_P(0);
262 text *t2 = PG_GETARG_TEXT_P(1);
269 len1 = (VARSIZE(t1) - VARHDRSZ);
273 len2 = (VARSIZE(t2) - VARHDRSZ);
277 len = len1 + len2 + VARHDRSZ;
278 result = (text *) palloc(len);
280 /* Set size of result string... */
281 VARATT_SIZEP(result) = len;
283 /* Fill data field of result string... */
284 ptr = VARDATA(result);
286 memcpy(ptr, VARDATA(t1), len1);
288 memcpy(ptr + len1, VARDATA(t2), len2);
290 PG_RETURN_TEXT_P(result);
295 * Return a substring starting at the specified position.
296 * - thomas 1997-12-31
300 * - starting position (is one-based)
303 * If the starting position is zero or less, then return from the start of the string
304 * adjusting the length to be consistant with the "negative start" per SQL92.
305 * If the length is less than zero, return the remaining string.
307 * Note that the arguments operate on octet length,
308 * so not aware of multi-byte character sets.
310 * Added multi-byte support.
311 * - Tatsuo Ishii 1998-4-21
312 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
313 * Formerly returned the entire string; now returns a portion.
314 * - Thomas Lockhart 1998-12-10
317 text_substr(PG_FUNCTION_ARGS)
319 text *string = PG_GETARG_TEXT_P(0);
320 int32 m = PG_GETARG_INT32(1);
321 int32 n = PG_GETARG_INT32(2);
331 len = VARSIZE(string) - VARHDRSZ;
333 len = pg_mbstrlen_with_len(VARDATA(string), len);
336 /* starting position after the end of the string? */
344 * starting position before the start of the string? then offset into
345 * the string per SQL92 spec...
353 /* m will now become a zero-based starting position */
355 if (((m + n) > len) || (n < 0))
360 for (i = 0; i < m; i++)
362 m = p - VARDATA(string);
363 for (i = 0; i < n; i++)
365 n = p - (VARDATA(string) + m);
368 ret = (text *) palloc(VARHDRSZ + n);
369 VARATT_SIZEP(ret) = VARHDRSZ + n;
371 memcpy(VARDATA(ret), VARDATA(string) + m, n);
373 PG_RETURN_TEXT_P(ret);
378 * Return the position of the specified substring.
379 * Implements the SQL92 POSITION() function.
380 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
381 * - thomas 1997-07-27
383 * Added multi-byte support.
384 * - Tatsuo Ishii 1998-4-21
387 textpos(PG_FUNCTION_ARGS)
389 text *t1 = PG_GETARG_TEXT_P(0);
390 text *t2 = PG_GETARG_TEXT_P(1);
405 if (VARSIZE(t2) <= VARHDRSZ)
406 PG_RETURN_INT32(1); /* result for empty pattern */
408 len1 = (VARSIZE(t1) - VARHDRSZ);
409 len2 = (VARSIZE(t2) - VARHDRSZ);
411 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
412 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
413 len1 = pg_wchar_strlen(p1);
414 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
415 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
416 len2 = pg_wchar_strlen(p2);
423 for (p = 0; p <= px; p++)
426 if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
428 if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
440 PG_RETURN_INT32(pos);
444 * Comparison function for text strings with given lengths.
445 * Includes locale support, but must copy strings to temporary memory
446 * to allow null-termination for inputs to strcoll().
450 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
457 a1p = (char *) palloc(len1 + 1);
458 a2p = (char *) palloc(len2 + 1);
460 memcpy(a1p, arg1, len1);
461 *(a1p + len1) = '\0';
462 memcpy(a2p, arg2, len2);
463 *(a2p + len2) = '\0';
465 result = strcoll(a1p, a2p);
475 result = strncmp(a1p, a2p, Min(len1, len2));
476 if ((result == 0) && (len1 != len2))
477 result = (len1 < len2) ? -1 : 1;
485 * Internal comparison function for text strings.
489 text_cmp(text *arg1, text *arg2)
499 len1 = VARSIZE(arg1) - VARHDRSZ;
500 len2 = VARSIZE(arg2) - VARHDRSZ;
502 return varstr_cmp(a1p, len1, a2p, len2);
506 * Comparison functions for text strings.
508 * Note: btree indexes need these routines not to leak memory; therefore,
509 * be careful to free working copies of toasted datums. Most places don't
510 * need to be so careful.
514 texteq(PG_FUNCTION_ARGS)
516 text *arg1 = PG_GETARG_TEXT_P(0);
517 text *arg2 = PG_GETARG_TEXT_P(1);
520 /* fast path for different-length inputs */
521 if (VARSIZE(arg1) != VARSIZE(arg2))
524 result = (text_cmp(arg1, arg2) == 0);
526 PG_FREE_IF_COPY(arg1, 0);
527 PG_FREE_IF_COPY(arg2, 1);
529 PG_RETURN_BOOL(result);
533 textne(PG_FUNCTION_ARGS)
535 text *arg1 = PG_GETARG_TEXT_P(0);
536 text *arg2 = PG_GETARG_TEXT_P(1);
539 /* fast path for different-length inputs */
540 if (VARSIZE(arg1) != VARSIZE(arg2))
543 result = (text_cmp(arg1, arg2) != 0);
545 PG_FREE_IF_COPY(arg1, 0);
546 PG_FREE_IF_COPY(arg2, 1);
548 PG_RETURN_BOOL(result);
552 text_lt(PG_FUNCTION_ARGS)
554 text *arg1 = PG_GETARG_TEXT_P(0);
555 text *arg2 = PG_GETARG_TEXT_P(1);
558 result = (text_cmp(arg1, arg2) < 0);
560 PG_FREE_IF_COPY(arg1, 0);
561 PG_FREE_IF_COPY(arg2, 1);
563 PG_RETURN_BOOL(result);
567 text_le(PG_FUNCTION_ARGS)
569 text *arg1 = PG_GETARG_TEXT_P(0);
570 text *arg2 = PG_GETARG_TEXT_P(1);
573 result = (text_cmp(arg1, arg2) <= 0);
575 PG_FREE_IF_COPY(arg1, 0);
576 PG_FREE_IF_COPY(arg2, 1);
578 PG_RETURN_BOOL(result);
582 text_gt(PG_FUNCTION_ARGS)
584 text *arg1 = PG_GETARG_TEXT_P(0);
585 text *arg2 = PG_GETARG_TEXT_P(1);
588 result = (text_cmp(arg1, arg2) > 0);
590 PG_FREE_IF_COPY(arg1, 0);
591 PG_FREE_IF_COPY(arg2, 1);
593 PG_RETURN_BOOL(result);
597 text_ge(PG_FUNCTION_ARGS)
599 text *arg1 = PG_GETARG_TEXT_P(0);
600 text *arg2 = PG_GETARG_TEXT_P(1);
603 result = (text_cmp(arg1, arg2) >= 0);
605 PG_FREE_IF_COPY(arg1, 0);
606 PG_FREE_IF_COPY(arg2, 1);
608 PG_RETURN_BOOL(result);
612 bttextcmp(PG_FUNCTION_ARGS)
614 text *arg1 = PG_GETARG_TEXT_P(0);
615 text *arg2 = PG_GETARG_TEXT_P(1);
618 result = text_cmp(arg1, arg2);
620 PG_FREE_IF_COPY(arg1, 0);
621 PG_FREE_IF_COPY(arg2, 1);
623 PG_RETURN_INT32(result);
628 text_larger(PG_FUNCTION_ARGS)
630 text *arg1 = PG_GETARG_TEXT_P(0);
631 text *arg2 = PG_GETARG_TEXT_P(1);
634 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
636 PG_RETURN_TEXT_P(result);
640 text_smaller(PG_FUNCTION_ARGS)
642 text *arg1 = PG_GETARG_TEXT_P(0);
643 text *arg2 = PG_GETARG_TEXT_P(1);
646 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
648 PG_RETURN_TEXT_P(result);
651 /*-------------------------------------------------------------
654 * get the number of bytes contained in an instance of type 'bytea'
655 *-------------------------------------------------------------
658 byteaoctetlen(PG_FUNCTION_ARGS)
660 bytea *v = PG_GETARG_BYTEA_P(0);
662 PG_RETURN_INT32(VARSIZE(v) - VARHDRSZ);
665 /*-------------------------------------------------------------
668 * this routine treats "bytea" as an array of bytes.
669 * It returns the Nth byte (a number between 0 and 255).
670 *-------------------------------------------------------------
673 byteaGetByte(PG_FUNCTION_ARGS)
675 bytea *v = PG_GETARG_BYTEA_P(0);
676 int32 n = PG_GETARG_INT32(1);
680 len = VARSIZE(v) - VARHDRSZ;
682 if (n < 0 || n >= len)
683 elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
686 byte = ((unsigned char *) VARDATA(v))[n];
688 PG_RETURN_INT32(byte);
691 /*-------------------------------------------------------------
694 * This routine treats a "bytea" type like an array of bits.
695 * It returns the value of the Nth bit (0 or 1).
697 *-------------------------------------------------------------
700 byteaGetBit(PG_FUNCTION_ARGS)
702 bytea *v = PG_GETARG_BYTEA_P(0);
703 int32 n = PG_GETARG_INT32(1);
709 len = VARSIZE(v) - VARHDRSZ;
711 if (n < 0 || n >= len * 8)
712 elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
718 byte = ((unsigned char *) VARDATA(v))[byteNo];
720 if (byte & (1 << bitNo))
726 /*-------------------------------------------------------------
729 * Given an instance of type 'bytea' creates a new one with
730 * the Nth byte set to the given value.
732 *-------------------------------------------------------------
735 byteaSetByte(PG_FUNCTION_ARGS)
737 bytea *v = PG_GETARG_BYTEA_P(0);
738 int32 n = PG_GETARG_INT32(1);
739 int32 newByte = PG_GETARG_INT32(2);
743 len = VARSIZE(v) - VARHDRSZ;
745 if (n < 0 || n >= len)
746 elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
750 * Make a copy of the original varlena.
752 res = (bytea *) palloc(VARSIZE(v));
753 memcpy((char *) res, (char *) v, VARSIZE(v));
758 ((unsigned char *) VARDATA(res))[n] = newByte;
760 PG_RETURN_BYTEA_P(res);
763 /*-------------------------------------------------------------
766 * Given an instance of type 'bytea' creates a new one with
767 * the Nth bit set to the given value.
769 *-------------------------------------------------------------
772 byteaSetBit(PG_FUNCTION_ARGS)
774 bytea *v = PG_GETARG_BYTEA_P(0);
775 int32 n = PG_GETARG_INT32(1);
776 int32 newBit = PG_GETARG_INT32(2);
784 len = VARSIZE(v) - VARHDRSZ;
786 if (n < 0 || n >= len * 8)
787 elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
796 if (newBit != 0 && newBit != 1)
797 elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
800 * Make a copy of the original varlena.
802 res = (bytea *) palloc(VARSIZE(v));
803 memcpy((char *) res, (char *) v, VARSIZE(v));
808 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
811 newByte = oldByte & (~(1 << bitNo));
813 newByte = oldByte | (1 << bitNo);
815 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
817 PG_RETURN_BYTEA_P(res);
822 * Converts a text type to a Name type.
825 text_name(PG_FUNCTION_ARGS)
827 text *s = PG_GETARG_TEXT_P(0);
831 len = VARSIZE(s) - VARHDRSZ;
833 /* Truncate oversize input */
834 if (len >= NAMEDATALEN)
835 len = NAMEDATALEN - 1;
838 printf("text- convert string length %d (%d) ->%d\n",
839 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
842 result = (Name) palloc(NAMEDATALEN);
843 memcpy(NameStr(*result), VARDATA(s), len);
845 /* now null pad to full length... */
846 while (len < NAMEDATALEN)
848 *(NameStr(*result) + len) = '\0';
852 PG_RETURN_NAME(result);
856 * Converts a Name type to a text type.
859 name_text(PG_FUNCTION_ARGS)
861 Name s = PG_GETARG_NAME(0);
865 len = strlen(NameStr(*s));
868 printf("text- convert string length %d (%d) ->%d\n",
869 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
872 result = palloc(VARHDRSZ + len);
873 VARATT_SIZEP(result) = VARHDRSZ + len;
874 memcpy(VARDATA(result), NameStr(*s), len);
876 PG_RETURN_TEXT_P(result);