1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.69 2001/03/22 03:59:55 momjian Exp $
13 *-------------------------------------------------------------------------
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
23 static int text_cmp(text *arg1, text *arg2);
26 /*****************************************************************************
28 *****************************************************************************/
31 #define VAL(CH) ((CH) - '0')
32 #define DIG(VAL) ((VAL) + '0')
35 * byteain - converts from printable representation of byte array
37 * Non-printable characters must be passed as '\nnn' (octal) and are
38 * converted to internal form. '\' must be passed as '\\'.
39 * elog(ERROR, ...) if bad form.
42 * The input is scaned twice.
43 * The error checking of input is minimal.
46 byteain(PG_FUNCTION_ARGS)
48 char *inputText = PG_GETARG_CSTRING(0);
54 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
60 else if (!isdigit((unsigned char) *tp++) ||
61 !isdigit((unsigned char) *tp++) ||
62 !isdigit((unsigned char) *tp++))
63 elog(ERROR, "Bad input string for type bytea");
68 result = (bytea *) palloc(byte);
69 result->vl_len = byte; /* set varlena length */
75 if (*tp != '\\' || *++tp == '\\')
83 *rp++ = byte + VAL(*tp++);
87 PG_RETURN_BYTEA_P(result);
91 * byteaout - converts to printable representation of byte array
93 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
96 * NULL vlena should be an error--returning string with NULL for now.
99 byteaout(PG_FUNCTION_ARGS)
101 bytea *vlena = PG_GETARG_BYTEA_P(0);
105 int val; /* holds unprintable chars */
109 len = 1; /* empty string has 1 char */
111 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
115 else if (isprint((unsigned char) *vp))
120 rp = result = (char *) palloc(len);
122 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
129 else if (isprint((unsigned char) *vp))
135 rp[3] = DIG(val & 07);
137 rp[2] = DIG(val & 07);
139 rp[1] = DIG(val & 03);
144 PG_RETURN_CSTRING(result);
149 * textin - converts "..." to internal representation
152 textin(PG_FUNCTION_ARGS)
154 char *inputText = PG_GETARG_CSTRING(0);
158 len = strlen(inputText) + VARHDRSZ;
159 result = (text *) palloc(len);
160 VARATT_SIZEP(result) = len;
162 memcpy(VARDATA(result), inputText, len - VARHDRSZ);
165 convertstr(VARDATA(result), len - VARHDRSZ, 0);
168 PG_RETURN_TEXT_P(result);
172 * textout - converts internal representation to "..."
175 textout(PG_FUNCTION_ARGS)
177 text *t = PG_GETARG_TEXT_P(0);
181 len = VARSIZE(t) - VARHDRSZ;
182 result = (char *) palloc(len + 1);
183 memcpy(result, VARDATA(t), len);
187 convertstr(result, len, 1);
190 PG_RETURN_CSTRING(result);
194 /* ========== PUBLIC ROUTINES ========== */
198 * returns the logical length of a text*
199 * (which is less than the VARSIZE of the text*)
202 textlen(PG_FUNCTION_ARGS)
204 text *t = PG_GETARG_TEXT_P(0);
214 l = VARSIZE(t) - VARHDRSZ;
222 PG_RETURN_INT32(len);
224 PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ);
230 * returns the physical length of a text*
231 * (which is less than the VARSIZE of the text*)
233 * XXX is it actually appropriate to return the compressed length
234 * when the value is compressed? It's not at all clear to me that
235 * this is what SQL92 has in mind ...
238 textoctetlen(PG_FUNCTION_ARGS)
240 struct varattrib *t = (struct varattrib *) PG_GETARG_RAW_VARLENA_P(0);
242 if (!VARATT_IS_EXTERNAL(t))
243 PG_RETURN_INT32(VARATT_SIZE(t) - VARHDRSZ);
245 PG_RETURN_INT32(t->va_content.va_external.va_extsize);
250 * takes two text* and returns a text* that is the concatenation of
253 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
254 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
255 * Allocate space for output in all cases.
256 * XXX - thomas 1997-07-10
259 textcat(PG_FUNCTION_ARGS)
261 text *t1 = PG_GETARG_TEXT_P(0);
262 text *t2 = PG_GETARG_TEXT_P(1);
269 len1 = (VARSIZE(t1) - VARHDRSZ);
273 len2 = (VARSIZE(t2) - VARHDRSZ);
277 len = len1 + len2 + VARHDRSZ;
278 result = (text *) palloc(len);
280 /* Set size of result string... */
281 VARATT_SIZEP(result) = len;
283 /* Fill data field of result string... */
284 ptr = VARDATA(result);
286 memcpy(ptr, VARDATA(t1), len1);
288 memcpy(ptr + len1, VARDATA(t2), len2);
290 PG_RETURN_TEXT_P(result);
295 * Return a substring starting at the specified position.
296 * - thomas 1997-12-31
300 * - starting position (is one-based)
303 * If the starting position is zero or less, then return from the start of the string
304 * adjusting the length to be consistant with the "negative start" per SQL92.
305 * If the length is less than zero, return the remaining string.
307 * Note that the arguments operate on octet length,
308 * so not aware of multi-byte character sets.
310 * Added multi-byte support.
311 * - Tatsuo Ishii 1998-4-21
312 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
313 * Formerly returned the entire string; now returns a portion.
314 * - Thomas Lockhart 1998-12-10
317 text_substr(PG_FUNCTION_ARGS)
319 text *string = PG_GETARG_TEXT_P(0);
320 int32 m = PG_GETARG_INT32(1);
321 int32 n = PG_GETARG_INT32(2);
331 len = VARSIZE(string) - VARHDRSZ;
333 len = pg_mbstrlen_with_len(VARDATA(string), len);
336 /* starting position after the end of the string? */
344 * starting position before the start of the string? then offset into
345 * the string per SQL92 spec...
353 /* m will now become a zero-based starting position */
355 if (((m + n) > len) || (n < 0))
360 for (i = 0; i < m; i++)
362 m = p - VARDATA(string);
363 for (i = 0; i < n; i++)
365 n = p - (VARDATA(string) + m);
368 ret = (text *) palloc(VARHDRSZ + n);
369 VARATT_SIZEP(ret) = VARHDRSZ + n;
371 memcpy(VARDATA(ret), VARDATA(string) + m, n);
373 PG_RETURN_TEXT_P(ret);
378 * Return the position of the specified substring.
379 * Implements the SQL92 POSITION() function.
380 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
381 * - thomas 1997-07-27
383 * Added multi-byte support.
384 * - Tatsuo Ishii 1998-4-21
387 textpos(PG_FUNCTION_ARGS)
389 text *t1 = PG_GETARG_TEXT_P(0);
390 text *t2 = PG_GETARG_TEXT_P(1);
405 if (VARSIZE(t2) <= VARHDRSZ)
406 PG_RETURN_INT32(1); /* result for empty pattern */
408 len1 = (VARSIZE(t1) - VARHDRSZ);
409 len2 = (VARSIZE(t2) - VARHDRSZ);
411 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
412 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
413 len1 = pg_wchar_strlen(p1);
414 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
415 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
416 len2 = pg_wchar_strlen(p2);
423 for (p = 0; p <= px; p++)
426 if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
428 if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
440 PG_RETURN_INT32(pos);
444 * texteq - returns true iff arguments are equal
445 * textne - returns true iff arguments are not equal
447 * Note: btree indexes need these routines not to leak memory; therefore,
448 * be careful to free working copies of toasted datums. Most places don't
449 * need to be so careful.
452 texteq(PG_FUNCTION_ARGS)
454 text *arg1 = PG_GETARG_TEXT_P(0);
455 text *arg2 = PG_GETARG_TEXT_P(1);
458 if (VARSIZE(arg1) != VARSIZE(arg2))
466 len = VARSIZE(arg1) - VARHDRSZ;
471 result = (memcmp(a1p, a2p, len) == 0);
474 PG_FREE_IF_COPY(arg1, 0);
475 PG_FREE_IF_COPY(arg2, 1);
477 PG_RETURN_BOOL(result);
481 textne(PG_FUNCTION_ARGS)
483 text *arg1 = PG_GETARG_TEXT_P(0);
484 text *arg2 = PG_GETARG_TEXT_P(1);
487 if (VARSIZE(arg1) != VARSIZE(arg2))
495 len = VARSIZE(arg1) - VARHDRSZ;
500 result = (memcmp(a1p, a2p, len) != 0);
503 PG_FREE_IF_COPY(arg1, 0);
504 PG_FREE_IF_COPY(arg2, 1);
506 PG_RETURN_BOOL(result);
510 * Comparison function for text strings with given lengths.
511 * Includes locale support, but must copy strings to temporary memory
512 * to allow null-termination for inputs to strcoll().
516 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
523 a1p = (unsigned char *) palloc(len1 + 1);
524 a2p = (unsigned char *) palloc(len2 + 1);
526 memcpy(a1p, arg1, len1);
527 *(a1p + len1) = '\0';
528 memcpy(a2p, arg2, len2);
529 *(a2p + len2) = '\0';
531 result = strcoll(a1p, a2p);
541 result = strncmp(a1p, a2p, Min(len1, len2));
542 if ((result == 0) && (len1 != len2))
543 result = (len1 < len2) ? -1 : 1;
551 * Comparison function for text strings.
552 * Includes locale support, but must copy strings to temporary memory
553 * to allow null-termination for inputs to strcoll().
554 * XXX HACK code for textlen() indicates that there can be embedded nulls
555 * but it appears that most routines (incl. this one) assume not! - tgl 97/04/07
559 text_cmp(text *arg1, text *arg2)
569 len1 = VARSIZE(arg1) - VARHDRSZ;
570 len2 = VARSIZE(arg2) - VARHDRSZ;
572 return varstr_cmp(a1p, len1, a2p, len2);
576 * Comparison functions for text strings.
578 * Note: btree indexes need these routines not to leak memory; therefore,
579 * be careful to free working copies of toasted datums. Most places don't
580 * need to be so careful.
584 text_lt(PG_FUNCTION_ARGS)
586 text *arg1 = PG_GETARG_TEXT_P(0);
587 text *arg2 = PG_GETARG_TEXT_P(1);
590 result = (text_cmp(arg1, arg2) < 0);
592 PG_FREE_IF_COPY(arg1, 0);
593 PG_FREE_IF_COPY(arg2, 1);
595 PG_RETURN_BOOL(result);
599 text_le(PG_FUNCTION_ARGS)
601 text *arg1 = PG_GETARG_TEXT_P(0);
602 text *arg2 = PG_GETARG_TEXT_P(1);
605 result = (text_cmp(arg1, arg2) <= 0);
607 PG_FREE_IF_COPY(arg1, 0);
608 PG_FREE_IF_COPY(arg2, 1);
610 PG_RETURN_BOOL(result);
614 text_gt(PG_FUNCTION_ARGS)
616 text *arg1 = PG_GETARG_TEXT_P(0);
617 text *arg2 = PG_GETARG_TEXT_P(1);
620 result = (text_cmp(arg1, arg2) > 0);
622 PG_FREE_IF_COPY(arg1, 0);
623 PG_FREE_IF_COPY(arg2, 1);
625 PG_RETURN_BOOL(result);
629 text_ge(PG_FUNCTION_ARGS)
631 text *arg1 = PG_GETARG_TEXT_P(0);
632 text *arg2 = PG_GETARG_TEXT_P(1);
635 result = (text_cmp(arg1, arg2) >= 0);
637 PG_FREE_IF_COPY(arg1, 0);
638 PG_FREE_IF_COPY(arg2, 1);
640 PG_RETURN_BOOL(result);
644 text_larger(PG_FUNCTION_ARGS)
646 text *arg1 = PG_GETARG_TEXT_P(0);
647 text *arg2 = PG_GETARG_TEXT_P(1);
650 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
652 PG_RETURN_TEXT_P(result);
656 text_smaller(PG_FUNCTION_ARGS)
658 text *arg1 = PG_GETARG_TEXT_P(0);
659 text *arg2 = PG_GETARG_TEXT_P(1);
662 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
664 PG_RETURN_TEXT_P(result);
667 /*-------------------------------------------------------------
670 * get the number of bytes contained in an instance of type 'bytea'
671 *-------------------------------------------------------------
674 byteaoctetlen(PG_FUNCTION_ARGS)
676 bytea *v = PG_GETARG_BYTEA_P(0);
678 PG_RETURN_INT32(VARSIZE(v) - VARHDRSZ);
681 /*-------------------------------------------------------------
684 * this routine treats "bytea" as an array of bytes.
685 * It returns the Nth byte (a number between 0 and 255).
686 *-------------------------------------------------------------
689 byteaGetByte(PG_FUNCTION_ARGS)
691 bytea *v = PG_GETARG_BYTEA_P(0);
692 int32 n = PG_GETARG_INT32(1);
696 len = VARSIZE(v) - VARHDRSZ;
698 if (n < 0 || n >= len)
699 elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
702 byte = ((unsigned char *) VARDATA(v))[n];
704 PG_RETURN_INT32(byte);
707 /*-------------------------------------------------------------
710 * This routine treats a "bytea" type like an array of bits.
711 * It returns the value of the Nth bit (0 or 1).
713 *-------------------------------------------------------------
716 byteaGetBit(PG_FUNCTION_ARGS)
718 bytea *v = PG_GETARG_BYTEA_P(0);
719 int32 n = PG_GETARG_INT32(1);
725 len = VARSIZE(v) - VARHDRSZ;
727 if (n < 0 || n >= len * 8)
728 elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
734 byte = ((unsigned char *) VARDATA(v))[byteNo];
736 if (byte & (1 << bitNo))
742 /*-------------------------------------------------------------
745 * Given an instance of type 'bytea' creates a new one with
746 * the Nth byte set to the given value.
748 *-------------------------------------------------------------
751 byteaSetByte(PG_FUNCTION_ARGS)
753 bytea *v = PG_GETARG_BYTEA_P(0);
754 int32 n = PG_GETARG_INT32(1);
755 int32 newByte = PG_GETARG_INT32(2);
759 len = VARSIZE(v) - VARHDRSZ;
761 if (n < 0 || n >= len)
762 elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
766 * Make a copy of the original varlena.
768 res = (bytea *) palloc(VARSIZE(v));
769 memcpy((char *) res, (char *) v, VARSIZE(v));
774 ((unsigned char *) VARDATA(res))[n] = newByte;
776 PG_RETURN_BYTEA_P(res);
779 /*-------------------------------------------------------------
782 * Given an instance of type 'bytea' creates a new one with
783 * the Nth bit set to the given value.
785 *-------------------------------------------------------------
788 byteaSetBit(PG_FUNCTION_ARGS)
790 bytea *v = PG_GETARG_BYTEA_P(0);
791 int32 n = PG_GETARG_INT32(1);
792 int32 newBit = PG_GETARG_INT32(2);
800 len = VARSIZE(v) - VARHDRSZ;
802 if (n < 0 || n >= len * 8)
803 elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
812 if (newBit != 0 && newBit != 1)
813 elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
816 * Make a copy of the original varlena.
818 res = (bytea *) palloc(VARSIZE(v));
819 memcpy((char *) res, (char *) v, VARSIZE(v));
824 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
827 newByte = oldByte & (~(1 << bitNo));
829 newByte = oldByte | (1 << bitNo);
831 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
833 PG_RETURN_BYTEA_P(res);
838 * Converts a text type to a Name type.
841 text_name(PG_FUNCTION_ARGS)
843 text *s = PG_GETARG_TEXT_P(0);
847 len = VARSIZE(s) - VARHDRSZ;
849 /* Truncate oversize input */
850 if (len >= NAMEDATALEN)
851 len = NAMEDATALEN - 1;
854 printf("text- convert string length %d (%d) ->%d\n",
855 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
858 result = (Name) palloc(NAMEDATALEN);
859 memcpy(NameStr(*result), VARDATA(s), len);
861 /* now null pad to full length... */
862 while (len < NAMEDATALEN)
864 *(NameStr(*result) + len) = '\0';
868 PG_RETURN_NAME(result);
872 * Converts a Name type to a text type.
875 name_text(PG_FUNCTION_ARGS)
877 Name s = PG_GETARG_NAME(0);
881 len = strlen(NameStr(*s));
884 printf("text- convert string length %d (%d) ->%d\n",
885 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
888 result = palloc(VARHDRSZ + len);
889 VARATT_SIZEP(result) = VARHDRSZ + len;
890 memcpy(VARDATA(result), NameStr(*s), len);
892 PG_RETURN_TEXT_P(result);