1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.54 1999/11/07 23:08:24 momjian Exp $
12 *-------------------------------------------------------------------------
18 #include "mb/pg_wchar.h"
19 #include "utils/builtins.h"
21 static int text_cmp(text *arg1, text *arg2);
24 /*****************************************************************************
26 *****************************************************************************/
29 #define VAL(CH) ((CH) - '0')
30 #define DIG(VAL) ((VAL) + '0')
33 * byteain - converts from printable representation of byte array
35 * Non-printable characters must be passed as '\nnn' (octal) and are
36 * converted to internal form. '\' must be passed as '\\'.
37 * elog(ERROR, ...) if bad form.
40 * The input is scaned twice.
41 * The error checking of input is minimal.
44 byteain(char *inputText)
51 if (inputText == NULL)
52 elog(ERROR, "Bad input string for type bytea");
54 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
59 else if (!isdigit(*tp++) ||
62 elog(ERROR, "Bad input string for type bytea");
66 result = (text *) palloc(byte);
67 result->vl_len = byte; /* varlena? */
70 if (*tp != '\\' || *++tp == '\\')
78 *rp++ = byte + VAL(*tp++);
84 * byteaout - converts to printable representation of byte array
86 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
89 * NULL vlena should be an error--returning string with NULL for now.
98 int val; /* holds unprintable chars */
104 result = (char *) palloc(2);
110 len = 1; /* empty string has 1 char */
111 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
114 else if (isascii(*vp) && isprint(*vp))
118 rp = result = (char *) palloc(len);
120 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--)
127 else if (isascii(*vp) && isprint(*vp))
134 *rp-- = DIG(val & 07);
136 *rp-- = DIG(val & 07);
147 * textin - converts "..." to internal representation
150 textin(char *inputText)
155 if (inputText == NULL)
158 len = strlen(inputText) + VARHDRSZ;
159 result = (text *) palloc(len);
160 VARSIZE(result) = len;
162 memmove(VARDATA(result), inputText, len - VARHDRSZ);
165 convertstr(VARDATA(result), len - VARHDRSZ, 0);
172 * textout - converts internal representation to "..."
182 result = (char *) palloc(2);
187 len = VARSIZE(vlena) - VARHDRSZ;
188 result = (char *) palloc(len + 1);
189 memmove(result, VARDATA(vlena), len);
193 convertstr(result, len, 1);
200 /* ========== PUBLIC ROUTINES ========== */
204 * returns the logical length of a text*
205 * (which is less than the VARSIZE of the text*)
218 if (!PointerIsValid(t))
219 elog(ERROR, "Null input to textlen");
224 l = VARSIZE(t) - VARHDRSZ;
234 return VARSIZE(t) - VARHDRSZ;
241 * returns the physical length of a text*
242 * (which is less than the VARSIZE of the text*)
245 textoctetlen(text *t)
247 if (!PointerIsValid(t))
248 elog(ERROR, "Null input to textoctetlen");
250 return VARSIZE(t) - VARHDRSZ;
252 } /* textoctetlen() */
256 * takes two text* and returns a text* that is the concatentation of
259 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
260 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
261 * Allocate space for output in all cases.
262 * XXX - thomas 1997-07-10
265 textcat(text *t1, text *t2)
273 if (!PointerIsValid(t1) || !PointerIsValid(t2))
276 len1 = (VARSIZE(t1) - VARHDRSZ);
279 while (len1 > 0 && VARDATA(t1)[len1 - 1] == '\0')
282 len2 = (VARSIZE(t2) - VARHDRSZ);
285 while (len2 > 0 && VARDATA(t2)[len2 - 1] == '\0')
288 len = len1 + len2 + VARHDRSZ;
289 result = palloc(len);
291 /* Set size of result string... */
292 VARSIZE(result) = len;
294 /* Fill data field of result string... */
295 ptr = VARDATA(result);
297 memcpy(ptr, VARDATA(t1), len1);
299 memcpy(ptr + len1, VARDATA(t2), len2);
306 * Return a substring starting at the specified position.
307 * - thomas 1997-12-31
311 * - starting position (is one-based)
314 * If the starting position is zero or less, then return from the start of the string
315 * adjusting the length to be consistant with the "negative start" per SQL92.
316 * If the length is less than zero, return the remaining string.
318 * Note that the arguments operate on octet length,
319 * so not aware of multi-byte character sets.
321 * Added multi-byte support.
322 * - Tatsuo Ishii 1998-4-21
323 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
324 * Formerly returned the entire string; now returns a portion.
325 * - Thomas Lockhart 1998-12-10
328 text_substr(text *string, int32 m, int32 n)
339 if (string == (text *) NULL)
342 len = VARSIZE(string) - VARHDRSZ;
344 len = pg_mbstrlen_with_len(VARDATA(string), len);
347 /* starting position after the end of the string? */
355 * starting position before the start of the string? then offset into
356 * the string per SQL92 spec...
364 /* m will now become a zero-based starting position */
366 if (((m + n) > len) || (n < 0))
371 for (i = 0; i < m; i++)
373 m = p - VARDATA(string);
374 for (i = 0; i < n; i++)
376 n = p - (VARDATA(string) + m);
378 ret = (text *) palloc(VARHDRSZ + n);
379 VARSIZE(ret) = VARHDRSZ + n;
381 memcpy(VARDATA(ret), VARDATA(string) + m, n);
384 } /* text_substr() */
388 * Return the position of the specified substring.
389 * Implements the SQL92 POSITION() function.
390 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
391 * - thomas 1997-07-27
393 * Added multi-byte support.
394 * - Tatsuo Ishii 1998-4-21
397 textpos(text *t1, text *t2)
413 if (!PointerIsValid(t1) || !PointerIsValid(t2))
416 if (VARSIZE(t2) <= 0)
419 len1 = (VARSIZE(t1) - VARHDRSZ);
420 len2 = (VARSIZE(t2) - VARHDRSZ);
422 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
423 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
424 len1 = pg_wchar_strlen(p1);
425 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
426 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
427 len2 = pg_wchar_strlen(p2);
434 for (p = 0; p <= px; p++)
437 if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
439 if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
455 * texteq - returns 1 iff arguments are equal
456 * textne - returns 1 iff arguments are not equal
459 texteq(text *arg1, text *arg2)
465 if (arg1 == NULL || arg2 == NULL)
467 if ((len = arg1->vl_len) != arg2->vl_len)
473 * Varlenas are stored as the total size (data + size variable)
474 * followed by the data. Use VARHDRSZ instead of explicit sizeof() -
479 if (*a1p++ != *a2p++)
485 textne(text *arg1, text *arg2)
487 return (bool) !texteq(arg1, arg2);
491 * Comparison function for text strings with given lengths.
492 * Includes locale support, but must copy strings to temporary memory
493 * to allow null-termination for inputs to strcoll().
497 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
504 a1p = (unsigned char *) palloc(len1 + 1);
505 a2p = (unsigned char *) palloc(len2 + 1);
507 memcpy(a1p, arg1, len1);
508 *(a1p + len1) = '\0';
509 memcpy(a2p, arg2, len2);
510 *(a2p + len2) = '\0';
512 result = strcoll(a1p, a2p);
522 result = strncmp(a1p, a2p, Min(len1, len2));
523 if ((result == 0) && (len1 != len2))
524 result = (len1 < len2) ? -1 : 1;
532 * Comparison function for text strings.
533 * Includes locale support, but must copy strings to temporary memory
534 * to allow null-termination for inputs to strcoll().
535 * XXX HACK code for textlen() indicates that there can be embedded nulls
536 * but it appears that most routines (incl. this one) assume not! - tgl 97/04/07
540 text_cmp(text *arg1, text *arg2)
547 if (arg1 == NULL || arg2 == NULL)
553 len1 = VARSIZE(arg1) - VARHDRSZ;
554 len2 = VARSIZE(arg2) - VARHDRSZ;
556 return varstr_cmp(a1p, len1, a2p, len2);
560 * Comparison function for text strings.
563 text_lt(text *arg1, text *arg2)
565 return (bool) (text_cmp(arg1, arg2) < 0);
569 * Comparison function for text strings.
572 text_le(text *arg1, text *arg2)
574 return (bool) (text_cmp(arg1, arg2) <= 0);
578 text_gt(text *arg1, text *arg2)
580 return (bool) !text_le(arg1, arg2);
584 text_ge(text *arg1, text *arg2)
586 return (bool) !text_lt(arg1, arg2);
590 text_larger(text *arg1, text *arg2)
595 temp = ((text_cmp(arg1, arg2) <= 0) ? arg2 : arg1);
599 result = (text *) palloc(VARSIZE(temp));
600 memmove((char *) result, (char *) temp, VARSIZE(temp));
606 text_smaller(text *arg1, text *arg2)
611 temp = ((text_cmp(arg1, arg2) > 0) ? arg2 : arg1);
615 result = (text *) palloc(VARSIZE(temp));
616 memmove((char *) result, (char *) temp, VARSIZE(temp));
621 /*-------------------------------------------------------------
624 * get the number of bytes contained in an instance of type 'bytea'
625 *-------------------------------------------------------------
628 byteaGetSize(text *v)
632 len = v->vl_len - sizeof(v->vl_len);
637 /*-------------------------------------------------------------
640 * this routine treats "bytea" as an array of bytes.
641 * It returns the Nth byte (a number between 0 and 255) or
642 * it dies if the length of this array is less than n.
643 *-------------------------------------------------------------
646 byteaGetByte(text *v, int32 n)
651 len = byteaGetSize(v);
655 elog(ERROR, "byteaGetByte: index (=%d) out of range [0..%d]",
659 byte = (unsigned char) (v->vl_dat[n]);
666 /*-------------------------------------------------------------
669 * This routine treats a "bytea" type like an array of bits.
670 * It returns the value of the Nth bit (0 or 1).
671 * If 'n' is out of range, it dies!
673 *-------------------------------------------------------------
676 byteaGetBit(text *v, int32 n)
685 byte = byteaGetByte(v, byteNo);
687 if (byte & (1 << bitNo))
693 /*-------------------------------------------------------------
696 * Given an instance of type 'bytea' creates a new one with
697 * the Nth byte set to the given value.
699 *-------------------------------------------------------------
702 byteaSetByte(text *v, int32 n, int32 newByte)
707 len = byteaGetSize(v);
712 "byteaSetByte: index (=%d) out of range [0..%d]",
717 * Make a copy of the original varlena.
719 res = (text *) palloc(VARSIZE(v));
722 elog(ERROR, "byteaSetByte: Out of memory (%d bytes requested)",
725 memmove((char *) res, (char *) v, VARSIZE(v));
730 res->vl_dat[n] = newByte;
735 /*-------------------------------------------------------------
738 * Given an instance of type 'bytea' creates a new one with
739 * the Nth bit set to the given value.
741 *-------------------------------------------------------------
744 byteaSetBit(text *v, int32 n, int32 newBit)
755 if (newBit != 0 && newBit != 1)
756 elog(ERROR, "byteaSetByte: new bit must be 0 or 1");
759 * get the byte where the bit we want is stored.
763 oldByte = byteaGetByte(v, byteNo);
766 * calculate the new value for that byte
769 newByte = oldByte & (~(1 << bitNo));
771 newByte = oldByte | (1 << bitNo);
774 * NOTE: 'byteaSetByte' creates a copy of 'v' & sets the byte.
776 res = byteaSetByte(v, byteNo, newByte);
783 * Converts a text() type to a NameData type.
794 len = VARSIZE(s) - VARHDRSZ;
795 if (len > NAMEDATALEN)
799 printf("text- convert string length %d (%d) ->%d\n",
800 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
803 result = palloc(NAMEDATALEN);
804 StrNCpy(NameStr(*result), VARDATA(s), NAMEDATALEN);
806 /* now null pad to full length... */
807 while (len < NAMEDATALEN)
809 *(NameStr(*result) + len) = '\0';
817 * Converts a NameData type to a text type.
820 name_text(NameData *s)
828 len = strlen(NameStr(*s));
831 printf("text- convert string length %d (%d) ->%d\n",
832 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
835 result = palloc(VARHDRSZ + len);
836 strncpy(VARDATA(result), NameStr(*s), len);
837 VARSIZE(result) = len + VARHDRSZ;