1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.89 2002/08/28 20:46:24 momjian Exp $
13 *-------------------------------------------------------------------------
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "access/tuptoaster.h"
22 #include "lib/stringinfo.h"
23 #include "utils/builtins.h"
24 #include "utils/pg_locale.h"
27 typedef struct varlena unknown;
29 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
30 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
31 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
32 #define PG_TEXTARG_GET_STR(arg_) \
33 DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
34 #define PG_TEXT_GET_STR(textp_) \
35 DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
36 #define PG_STR_GET_TEXT(str_) \
37 DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
38 #define TEXTLEN(textp) \
39 text_length(PointerGetDatum(textp))
40 #define TEXTPOS(buf_text, from_sub_text) \
41 text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
42 #define TEXTDUP(textp) \
43 DatumGetTextPCopy(PointerGetDatum(textp))
44 #define LEFT(buf_text, from_sub_text) \
45 text_substring(PointerGetDatum(buf_text), \
47 TEXTPOS(buf_text, from_sub_text) - 1, false)
48 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
49 text_substring(PointerGetDatum(buf_text), \
50 TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
53 static int text_cmp(text *arg1, text *arg2);
54 static int32 text_length(Datum str);
55 static int32 text_position(Datum str, Datum search_str, int matchnum);
56 static text *text_substring(Datum str,
59 bool length_not_specified);
62 /*****************************************************************************
64 *****************************************************************************/
67 #define VAL(CH) ((CH) - '0')
68 #define DIG(VAL) ((VAL) + '0')
71 * byteain - converts from printable representation of byte array
73 * Non-printable characters must be passed as '\nnn' (octal) and are
74 * converted to internal form. '\' must be passed as '\\'.
75 * elog(ERROR, ...) if bad form.
78 * The input is scaned twice.
79 * The error checking of input is minimal.
82 byteain(PG_FUNCTION_ARGS)
84 char *inputText = PG_GETARG_CSTRING(0);
90 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
94 else if ((tp[0] == '\\') &&
95 (tp[1] >= '0' && tp[1] <= '3') &&
96 (tp[2] >= '0' && tp[2] <= '7') &&
97 (tp[3] >= '0' && tp[3] <= '7'))
99 else if ((tp[0] == '\\') &&
105 * one backslash, not followed by 0 or ### valid octal
107 elog(ERROR, "Bad input string for type bytea");
112 result = (bytea *) palloc(byte);
113 result->vl_len = byte; /* set varlena length */
121 else if ((tp[0] == '\\') &&
122 (tp[1] >= '0' && tp[1] <= '3') &&
123 (tp[2] >= '0' && tp[2] <= '7') &&
124 (tp[3] >= '0' && tp[3] <= '7'))
130 *rp++ = byte + VAL(tp[3]);
133 else if ((tp[0] == '\\') &&
142 * We should never get here. The first pass should not allow
145 elog(ERROR, "Bad input string for type bytea");
149 PG_RETURN_BYTEA_P(result);
153 * byteaout - converts to printable representation of byte array
155 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
158 * NULL vlena should be an error--returning string with NULL for now.
161 byteaout(PG_FUNCTION_ARGS)
163 bytea *vlena = PG_GETARG_BYTEA_P(0);
167 int val; /* holds unprintable chars */
171 len = 1; /* empty string has 1 char */
173 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
177 else if (isprint((unsigned char) *vp))
182 rp = result = (char *) palloc(len);
184 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
191 else if (isprint((unsigned char) *vp))
197 rp[3] = DIG(val & 07);
199 rp[2] = DIG(val & 07);
201 rp[1] = DIG(val & 03);
206 PG_RETURN_CSTRING(result);
211 * textin - converts "..." to internal representation
214 textin(PG_FUNCTION_ARGS)
216 char *inputText = PG_GETARG_CSTRING(0);
224 len = strlen(inputText) + VARHDRSZ;
227 if ((ermsg = pg_verifymbstr(inputText, len - VARHDRSZ)))
228 elog(ERROR, "%s", ermsg);
231 result = (text *) palloc(len);
232 VARATT_SIZEP(result) = len;
234 memcpy(VARDATA(result), inputText, len - VARHDRSZ);
237 convertstr(VARDATA(result), len - VARHDRSZ, 0);
240 PG_RETURN_TEXT_P(result);
244 * textout - converts internal representation to "..."
247 textout(PG_FUNCTION_ARGS)
249 text *t = PG_GETARG_TEXT_P(0);
253 len = VARSIZE(t) - VARHDRSZ;
254 result = (char *) palloc(len + 1);
255 memcpy(result, VARDATA(t), len);
259 convertstr(result, len, 1);
262 PG_RETURN_CSTRING(result);
267 * unknownin - converts "..." to internal representation
270 unknownin(PG_FUNCTION_ARGS)
272 char *inputStr = PG_GETARG_CSTRING(0);
276 len = strlen(inputStr) + VARHDRSZ;
278 result = (unknown *) palloc(len);
279 VARATT_SIZEP(result) = len;
281 memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
283 PG_RETURN_UNKNOWN_P(result);
288 * unknownout - converts internal representation to "..."
291 unknownout(PG_FUNCTION_ARGS)
293 unknown *t = PG_GETARG_UNKNOWN_P(0);
297 len = VARSIZE(t) - VARHDRSZ;
298 result = (char *) palloc(len + 1);
299 memcpy(result, VARDATA(t), len);
302 PG_RETURN_CSTRING(result);
306 /* ========== PUBLIC ROUTINES ========== */
310 * returns the logical length of a text*
311 * (which is less than the VARSIZE of the text*)
314 textlen(PG_FUNCTION_ARGS)
316 PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
321 * Does the real work for textlen()
322 * This is broken out so it can be called directly by other string processing
326 text_length(Datum str)
328 /* fastpath when max encoding length is one */
329 if (pg_database_encoding_max_length() == 1)
330 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
332 if (pg_database_encoding_max_length() > 1)
334 text *t = DatumGetTextP(str);
336 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
337 VARSIZE(t) - VARHDRSZ));
340 /* should never get here */
341 elog(ERROR, "Invalid backend encoding; encoding max length "
342 "is less than one.");
344 /* not reached: suppress compiler warning */
350 * returns the physical length of a text*
351 * (which is less than the VARSIZE of the text*)
354 textoctetlen(PG_FUNCTION_ARGS)
356 PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
361 * takes two text* and returns a text* that is the concatenation of
364 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
365 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
366 * Allocate space for output in all cases.
367 * XXX - thomas 1997-07-10
370 textcat(PG_FUNCTION_ARGS)
372 text *t1 = PG_GETARG_TEXT_P(0);
373 text *t2 = PG_GETARG_TEXT_P(1);
380 len1 = (VARSIZE(t1) - VARHDRSZ);
384 len2 = (VARSIZE(t2) - VARHDRSZ);
388 len = len1 + len2 + VARHDRSZ;
389 result = (text *) palloc(len);
391 /* Set size of result string... */
392 VARATT_SIZEP(result) = len;
394 /* Fill data field of result string... */
395 ptr = VARDATA(result);
397 memcpy(ptr, VARDATA(t1), len1);
399 memcpy(ptr + len1, VARDATA(t2), len2);
401 PG_RETURN_TEXT_P(result);
406 * Return a substring starting at the specified position.
407 * - thomas 1997-12-31
411 * - starting position (is one-based)
414 * If the starting position is zero or less, then return from the start of the string
415 * adjusting the length to be consistent with the "negative start" per SQL92.
416 * If the length is less than zero, return the remaining string.
418 * Note that the arguments operate on octet length,
419 * so not aware of multi-byte character sets.
421 * Added multi-byte support.
422 * - Tatsuo Ishii 1998-4-21
423 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
424 * Formerly returned the entire string; now returns a portion.
425 * - Thomas Lockhart 1998-12-10
426 * Now uses faster TOAST-slicing interface
427 * - John Gray 2002-02-22
428 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
429 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
430 * error; if E < 1, return '', not entire string). Fixed MB related bug when
431 * S > LC and < LC + 4 sometimes garbage characters are returned.
432 * - Joe Conway 2002-08-10
435 text_substr(PG_FUNCTION_ARGS)
437 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
444 * text_substr_no_len -
445 * Wrapper to avoid opr_sanity failure due to
446 * one function accepting a different number of args.
449 text_substr_no_len(PG_FUNCTION_ARGS)
451 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
458 * Does the real work for text_substr() and text_substr_no_len()
459 * This is broken out so it can be called directly by other string processing
463 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
465 int32 eml = pg_database_encoding_max_length();
466 int32 S = start; /* start position */
467 int32 S1; /* adjusted start position */
468 int32 L1; /* adjusted substring length */
470 /* life is easy if the encoding max length is 1 */
475 if (length_not_specified) /* special case - get length to end of string */
483 * A negative value for L is the only way for the end position
484 * to be before the start. SQL99 says to throw an error.
487 elog(ERROR, "negative substring length not allowed");
490 * A zero or negative value for the end position can happen if the start
491 * was negative or one. SQL99 says to return a zero-length string.
494 return PG_STR_GET_TEXT("");
500 * If the start position is past the end of the string,
501 * SQL99 says to return a zero-length string --
502 * PG_GETARG_TEXT_P_SLICE() will do that for us.
503 * Convert to zero-based starting position
505 return DatumGetTextPSlice(str, S1 - 1, L1);
510 * When encoding max length is > 1, we can't get LC without
511 * detoasting, so we'll grab a conservatively large slice
512 * now and go back later to do the right thing
525 * if S is past the end of the string, the tuple toaster
526 * will return a zero-length string to us
531 * We need to start at position zero because there is no
532 * way to know in advance which byte offset corresponds to
533 * the supplied start position.
537 if (length_not_specified) /* special case - get length to end of string */
538 slice_size = L1 = -1;
544 * A negative value for L is the only way for the end position
545 * to be before the start. SQL99 says to throw an error.
548 elog(ERROR, "negative substring length not allowed");
551 * A zero or negative value for the end position can happen if the start
552 * was negative or one. SQL99 says to return a zero-length string.
555 return PG_STR_GET_TEXT("");
558 * if E is past the end of the string, the tuple toaster
559 * will truncate the length for us
564 * Total slice size in bytes can't be any longer than the start
565 * position plus substring length times the encoding max length.
567 slice_size = (S1 + L1) * eml;
569 slice = DatumGetTextPSlice(str, slice_start, slice_size);
571 /* see if we got back an empty string */
572 if ((VARSIZE(slice) - VARHDRSZ) == 0)
573 return PG_STR_GET_TEXT("");
575 /* Now we can get the actual length of the slice in MB characters */
576 slice_strlen = pg_mbstrlen_with_len (VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
578 /* Check that the start position wasn't > slice_strlen. If so,
579 * SQL99 says to return a zero-length string.
581 if (S1 > slice_strlen)
582 return PG_STR_GET_TEXT("");
585 * Adjust L1 and E1 now that we know the slice string length.
586 * Again remember that S1 is one based, and slice_start is zero based.
589 E1 = Min(S1 + L1 , slice_start + 1 + slice_strlen);
591 E1 = slice_start + 1 + slice_strlen;
594 * Find the start position in the slice;
595 * remember S1 is not zero based
598 for (i = 0; i < S1 - 1; i++)
601 /* hang onto a pointer to our start position */
605 * Count the actual bytes used by the substring of
606 * the requested length.
608 for (i = S1; i < E1; i++)
611 ret = (text *) palloc(VARHDRSZ + (p - s));
612 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
613 memcpy(VARDATA(ret), s, (p - s));
618 elog(ERROR, "Invalid backend encoding; encoding max length "
619 "is less than one.");
621 /* not reached: suppress compiler warning */
622 return PG_STR_GET_TEXT("");
627 * Return the position of the specified substring.
628 * Implements the SQL92 POSITION() function.
629 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
630 * - thomas 1997-07-27
632 * Added multi-byte support.
633 * - Tatsuo Ishii 1998-4-21
636 textpos(PG_FUNCTION_ARGS)
638 PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
643 * Does the real work for textpos()
644 * This is broken out so it can be called directly by other string processing
648 text_position(Datum str, Datum search_str, int matchnum)
650 int eml = pg_database_encoding_max_length();
651 text *t1 = DatumGetTextP(str);
652 text *t2 = DatumGetTextP(search_str);
661 return 0; /* result for 0th match */
663 if (VARSIZE(t2) <= VARHDRSZ)
664 PG_RETURN_INT32(1); /* result for empty pattern */
666 len1 = (VARSIZE(t1) - VARHDRSZ);
667 len2 = (VARSIZE(t2) - VARHDRSZ);
669 /* no use in searching str past point where search_str will fit */
672 if (eml == 1) /* simple case - single byte encoding */
680 for (p = 0; p <= px; p++)
682 if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
684 if (++match == matchnum)
693 else if (eml > 1) /* not as simple - multibyte encoding */
700 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
701 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
702 len1 = pg_wchar_strlen(p1);
703 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
704 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
705 len2 = pg_wchar_strlen(p2);
707 for (p = 0; p <= px; p++)
709 if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
711 if (++match == matchnum)
724 elog(ERROR, "Invalid backend encoding; encoding max length "
725 "is less than one.");
727 PG_RETURN_INT32(pos);
731 * Comparison function for text strings with given lengths.
732 * Includes locale support, but must copy strings to temporary memory
733 * to allow null-termination for inputs to strcoll().
737 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
744 * Unfortunately, there is no strncoll(), so in the non-C locale
745 * case we have to do some memory copying. This turns out to be
746 * significantly slower, so we optimize the case where LC_COLLATE
749 if (!lc_collate_is_c())
751 a1p = (char *) palloc(len1 + 1);
752 a2p = (char *) palloc(len2 + 1);
754 memcpy(a1p, arg1, len1);
755 *(a1p + len1) = '\0';
756 memcpy(a2p, arg2, len2);
757 *(a2p + len2) = '\0';
759 result = strcoll(a1p, a2p);
769 result = strncmp(a1p, a2p, Min(len1, len2));
770 if ((result == 0) && (len1 != len2))
771 result = (len1 < len2) ? -1 : 1;
779 * Internal comparison function for text strings.
783 text_cmp(text *arg1, text *arg2)
793 len1 = VARSIZE(arg1) - VARHDRSZ;
794 len2 = VARSIZE(arg2) - VARHDRSZ;
796 return varstr_cmp(a1p, len1, a2p, len2);
800 * Comparison functions for text strings.
802 * Note: btree indexes need these routines not to leak memory; therefore,
803 * be careful to free working copies of toasted datums. Most places don't
804 * need to be so careful.
808 texteq(PG_FUNCTION_ARGS)
810 text *arg1 = PG_GETARG_TEXT_P(0);
811 text *arg2 = PG_GETARG_TEXT_P(1);
814 /* fast path for different-length inputs */
815 if (VARSIZE(arg1) != VARSIZE(arg2))
818 result = (text_cmp(arg1, arg2) == 0);
820 PG_FREE_IF_COPY(arg1, 0);
821 PG_FREE_IF_COPY(arg2, 1);
823 PG_RETURN_BOOL(result);
827 textne(PG_FUNCTION_ARGS)
829 text *arg1 = PG_GETARG_TEXT_P(0);
830 text *arg2 = PG_GETARG_TEXT_P(1);
833 /* fast path for different-length inputs */
834 if (VARSIZE(arg1) != VARSIZE(arg2))
837 result = (text_cmp(arg1, arg2) != 0);
839 PG_FREE_IF_COPY(arg1, 0);
840 PG_FREE_IF_COPY(arg2, 1);
842 PG_RETURN_BOOL(result);
846 text_lt(PG_FUNCTION_ARGS)
848 text *arg1 = PG_GETARG_TEXT_P(0);
849 text *arg2 = PG_GETARG_TEXT_P(1);
852 result = (text_cmp(arg1, arg2) < 0);
854 PG_FREE_IF_COPY(arg1, 0);
855 PG_FREE_IF_COPY(arg2, 1);
857 PG_RETURN_BOOL(result);
861 text_le(PG_FUNCTION_ARGS)
863 text *arg1 = PG_GETARG_TEXT_P(0);
864 text *arg2 = PG_GETARG_TEXT_P(1);
867 result = (text_cmp(arg1, arg2) <= 0);
869 PG_FREE_IF_COPY(arg1, 0);
870 PG_FREE_IF_COPY(arg2, 1);
872 PG_RETURN_BOOL(result);
876 text_gt(PG_FUNCTION_ARGS)
878 text *arg1 = PG_GETARG_TEXT_P(0);
879 text *arg2 = PG_GETARG_TEXT_P(1);
882 result = (text_cmp(arg1, arg2) > 0);
884 PG_FREE_IF_COPY(arg1, 0);
885 PG_FREE_IF_COPY(arg2, 1);
887 PG_RETURN_BOOL(result);
891 text_ge(PG_FUNCTION_ARGS)
893 text *arg1 = PG_GETARG_TEXT_P(0);
894 text *arg2 = PG_GETARG_TEXT_P(1);
897 result = (text_cmp(arg1, arg2) >= 0);
899 PG_FREE_IF_COPY(arg1, 0);
900 PG_FREE_IF_COPY(arg2, 1);
902 PG_RETURN_BOOL(result);
906 bttextcmp(PG_FUNCTION_ARGS)
908 text *arg1 = PG_GETARG_TEXT_P(0);
909 text *arg2 = PG_GETARG_TEXT_P(1);
912 result = text_cmp(arg1, arg2);
914 PG_FREE_IF_COPY(arg1, 0);
915 PG_FREE_IF_COPY(arg2, 1);
917 PG_RETURN_INT32(result);
922 text_larger(PG_FUNCTION_ARGS)
924 text *arg1 = PG_GETARG_TEXT_P(0);
925 text *arg2 = PG_GETARG_TEXT_P(1);
928 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
930 PG_RETURN_TEXT_P(result);
934 text_smaller(PG_FUNCTION_ARGS)
936 text *arg1 = PG_GETARG_TEXT_P(0);
937 text *arg2 = PG_GETARG_TEXT_P(1);
940 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
942 PG_RETURN_TEXT_P(result);
945 /*-------------------------------------------------------------
948 * get the number of bytes contained in an instance of type 'bytea'
949 *-------------------------------------------------------------
952 byteaoctetlen(PG_FUNCTION_ARGS)
954 PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
959 * takes two bytea* and returns a bytea* that is the concatenation of
962 * Cloned from textcat and modified as required.
965 byteacat(PG_FUNCTION_ARGS)
967 bytea *t1 = PG_GETARG_BYTEA_P(0);
968 bytea *t2 = PG_GETARG_BYTEA_P(1);
975 len1 = (VARSIZE(t1) - VARHDRSZ);
979 len2 = (VARSIZE(t2) - VARHDRSZ);
983 len = len1 + len2 + VARHDRSZ;
984 result = (bytea *) palloc(len);
986 /* Set size of result string... */
987 VARATT_SIZEP(result) = len;
989 /* Fill data field of result string... */
990 ptr = VARDATA(result);
992 memcpy(ptr, VARDATA(t1), len1);
994 memcpy(ptr + len1, VARDATA(t2), len2);
996 PG_RETURN_BYTEA_P(result);
999 #define PG_STR_GET_BYTEA(str_) \
1000 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1003 * Return a substring starting at the specified position.
1004 * Cloned from text_substr and modified as required.
1008 * - starting position (is one-based)
1009 * - string length (optional)
1011 * If the starting position is zero or less, then return from the start of the string
1012 * adjusting the length to be consistent with the "negative start" per SQL92.
1013 * If the length is less than zero, an ERROR is thrown. If no third argument
1014 * (length) is provided, the length to the end of the string is assumed.
1017 bytea_substr(PG_FUNCTION_ARGS)
1019 int S = PG_GETARG_INT32(1); /* start position */
1020 int S1; /* adjusted start position */
1021 int L1; /* adjusted substring length */
1025 if (fcinfo->nargs == 2)
1028 * Not passed a length - PG_GETARG_BYTEA_P_SLICE()
1029 * grabs everything to the end of the string if we pass it
1030 * a negative value for length.
1037 int E = S + PG_GETARG_INT32(2);
1040 * A negative value for L is the only way for the end position
1041 * to be before the start. SQL99 says to throw an error.
1044 elog(ERROR, "negative substring length not allowed");
1047 * A zero or negative value for the end position can happen if the start
1048 * was negative or one. SQL99 says to return a zero-length string.
1051 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1057 * If the start position is past the end of the string,
1058 * SQL99 says to return a zero-length string --
1059 * PG_GETARG_TEXT_P_SLICE() will do that for us.
1060 * Convert to zero-based starting position
1062 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, S1 - 1, L1));
1066 * bytea_substr_no_len -
1067 * Wrapper to avoid opr_sanity failure due to
1068 * one function accepting a different number of args.
1071 bytea_substr_no_len(PG_FUNCTION_ARGS)
1073 return bytea_substr(fcinfo);
1078 * Return the position of the specified substring.
1079 * Implements the SQL92 POSITION() function.
1080 * Cloned from textpos and modified as required.
1083 byteapos(PG_FUNCTION_ARGS)
1085 bytea *t1 = PG_GETARG_BYTEA_P(0);
1086 bytea *t2 = PG_GETARG_BYTEA_P(1);
1095 if (VARSIZE(t2) <= VARHDRSZ)
1096 PG_RETURN_INT32(1); /* result for empty pattern */
1098 len1 = (VARSIZE(t1) - VARHDRSZ);
1099 len2 = (VARSIZE(t2) - VARHDRSZ);
1106 for (p = 0; p <= px; p++)
1108 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1116 PG_RETURN_INT32(pos);
1119 /*-------------------------------------------------------------
1122 * this routine treats "bytea" as an array of bytes.
1123 * It returns the Nth byte (a number between 0 and 255).
1124 *-------------------------------------------------------------
1127 byteaGetByte(PG_FUNCTION_ARGS)
1129 bytea *v = PG_GETARG_BYTEA_P(0);
1130 int32 n = PG_GETARG_INT32(1);
1134 len = VARSIZE(v) - VARHDRSZ;
1136 if (n < 0 || n >= len)
1137 elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
1140 byte = ((unsigned char *) VARDATA(v))[n];
1142 PG_RETURN_INT32(byte);
1145 /*-------------------------------------------------------------
1148 * This routine treats a "bytea" type like an array of bits.
1149 * It returns the value of the Nth bit (0 or 1).
1151 *-------------------------------------------------------------
1154 byteaGetBit(PG_FUNCTION_ARGS)
1156 bytea *v = PG_GETARG_BYTEA_P(0);
1157 int32 n = PG_GETARG_INT32(1);
1163 len = VARSIZE(v) - VARHDRSZ;
1165 if (n < 0 || n >= len * 8)
1166 elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
1172 byte = ((unsigned char *) VARDATA(v))[byteNo];
1174 if (byte & (1 << bitNo))
1180 /*-------------------------------------------------------------
1183 * Given an instance of type 'bytea' creates a new one with
1184 * the Nth byte set to the given value.
1186 *-------------------------------------------------------------
1189 byteaSetByte(PG_FUNCTION_ARGS)
1191 bytea *v = PG_GETARG_BYTEA_P(0);
1192 int32 n = PG_GETARG_INT32(1);
1193 int32 newByte = PG_GETARG_INT32(2);
1197 len = VARSIZE(v) - VARHDRSZ;
1199 if (n < 0 || n >= len)
1200 elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
1204 * Make a copy of the original varlena.
1206 res = (bytea *) palloc(VARSIZE(v));
1207 memcpy((char *) res, (char *) v, VARSIZE(v));
1212 ((unsigned char *) VARDATA(res))[n] = newByte;
1214 PG_RETURN_BYTEA_P(res);
1217 /*-------------------------------------------------------------
1220 * Given an instance of type 'bytea' creates a new one with
1221 * the Nth bit set to the given value.
1223 *-------------------------------------------------------------
1226 byteaSetBit(PG_FUNCTION_ARGS)
1228 bytea *v = PG_GETARG_BYTEA_P(0);
1229 int32 n = PG_GETARG_INT32(1);
1230 int32 newBit = PG_GETARG_INT32(2);
1238 len = VARSIZE(v) - VARHDRSZ;
1240 if (n < 0 || n >= len * 8)
1241 elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
1250 if (newBit != 0 && newBit != 1)
1251 elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
1254 * Make a copy of the original varlena.
1256 res = (bytea *) palloc(VARSIZE(v));
1257 memcpy((char *) res, (char *) v, VARSIZE(v));
1262 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1265 newByte = oldByte & (~(1 << bitNo));
1267 newByte = oldByte | (1 << bitNo);
1269 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1271 PG_RETURN_BYTEA_P(res);
1276 * Converts a text type to a Name type.
1279 text_name(PG_FUNCTION_ARGS)
1281 text *s = PG_GETARG_TEXT_P(0);
1285 len = VARSIZE(s) - VARHDRSZ;
1287 /* Truncate oversize input */
1288 if (len >= NAMEDATALEN)
1289 len = NAMEDATALEN - 1;
1292 printf("text- convert string length %d (%d) ->%d\n",
1293 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1296 result = (Name) palloc(NAMEDATALEN);
1297 memcpy(NameStr(*result), VARDATA(s), len);
1299 /* now null pad to full length... */
1300 while (len < NAMEDATALEN)
1302 *(NameStr(*result) + len) = '\0';
1306 PG_RETURN_NAME(result);
1310 * Converts a Name type to a text type.
1313 name_text(PG_FUNCTION_ARGS)
1315 Name s = PG_GETARG_NAME(0);
1319 len = strlen(NameStr(*s));
1322 printf("text- convert string length %d (%d) ->%d\n",
1323 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1326 result = palloc(VARHDRSZ + len);
1327 VARATT_SIZEP(result) = VARHDRSZ + len;
1328 memcpy(VARDATA(result), NameStr(*s), len);
1330 PG_RETURN_TEXT_P(result);
1335 * textToQualifiedNameList - convert a text object to list of names
1337 * This implements the input parsing needed by nextval() and other
1338 * functions that take a text parameter representing a qualified name.
1339 * We split the name at dots, downcase if not double-quoted, and
1340 * truncate names if they're too long.
1343 textToQualifiedNameList(text *textval, const char *caller)
1350 /* Convert to C string (handles possible detoasting). */
1351 /* Note we rely on being able to modify rawname below. */
1352 rawname = DatumGetCString(DirectFunctionCall1(textout,
1353 PointerGetDatum(textval)));
1355 if (!SplitIdentifierString(rawname, '.', &namelist))
1356 elog(ERROR, "%s: invalid name syntax", caller);
1358 if (namelist == NIL)
1359 elog(ERROR, "%s: invalid name syntax", caller);
1361 foreach(l, namelist)
1363 char *curname = (char *) lfirst(l);
1365 result = lappend(result, makeString(pstrdup(curname)));
1375 * SplitIdentifierString --- parse a string containing identifiers
1377 * This is the guts of textToQualifiedNameList, and is exported for use in
1378 * other situations such as parsing GUC variables. In the GUC case, it's
1379 * important to avoid memory leaks, so the API is designed to minimize the
1380 * amount of stuff that needs to be allocated and freed.
1383 * rawstring: the input string; must be overwritable! On return, it's
1384 * been modified to contain the separated identifiers.
1385 * separator: the separator punctuation expected between identifiers
1386 * (typically '.' or ','). Whitespace may also appear around
1389 * namelist: filled with a palloc'd list of pointers to identifiers within
1390 * rawstring. Caller should freeList() this even on error return.
1392 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1394 * Note that an empty string is considered okay here, though not in
1395 * textToQualifiedNameList.
1398 SplitIdentifierString(char *rawstring, char separator,
1401 char *nextp = rawstring;
1406 while (isspace((unsigned char) *nextp))
1407 nextp++; /* skip leading whitespace */
1410 return true; /* allow empty string */
1412 /* At the top of the loop, we are at start of a new identifier. */
1421 /* Quoted name --- collapse quote-quote pairs, no downcasing */
1422 curname = nextp + 1;
1425 endp = strchr(nextp + 1, '\"');
1427 return false; /* mismatched quotes */
1428 if (endp[1] != '\"')
1429 break; /* found end of quoted name */
1430 /* Collapse adjacent quotes into one quote, and look again */
1431 memmove(endp, endp+1, strlen(endp));
1434 /* endp now points at the terminating quote */
1439 /* Unquoted name --- extends to separator or whitespace */
1441 while (*nextp && *nextp != separator &&
1442 !isspace((unsigned char) *nextp))
1445 * It's important that this match the identifier downcasing
1446 * code used by backend/parser/scan.l.
1448 if (isupper((unsigned char) *nextp))
1449 *nextp = tolower((unsigned char) *nextp);
1453 if (curname == nextp)
1454 return false; /* empty unquoted name not allowed */
1457 while (isspace((unsigned char) *nextp))
1458 nextp++; /* skip trailing whitespace */
1460 if (*nextp == separator)
1463 while (isspace((unsigned char) *nextp))
1464 nextp++; /* skip leading whitespace for next */
1465 /* we expect another name, so done remains false */
1467 else if (*nextp == '\0')
1470 return false; /* invalid syntax */
1472 /* Now safe to overwrite separator with a null */
1475 /* Truncate name if it's overlength; again, should match scan.l */
1476 curlen = strlen(curname);
1477 if (curlen >= NAMEDATALEN)
1480 curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
1481 curname[curlen] = '\0';
1483 curname[NAMEDATALEN - 1] = '\0';
1488 * Finished isolating current name --- add it to list
1490 *namelist = lappend(*namelist, curname);
1492 /* Loop back if we didn't reach end of string */
1499 /*****************************************************************************
1500 * Comparison Functions used for bytea
1502 * Note: btree indexes need these routines not to leak memory; therefore,
1503 * be careful to free working copies of toasted datums. Most places don't
1504 * need to be so careful.
1505 *****************************************************************************/
1508 byteaeq(PG_FUNCTION_ARGS)
1510 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1511 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1516 len1 = VARSIZE(arg1) - VARHDRSZ;
1517 len2 = VARSIZE(arg2) - VARHDRSZ;
1519 /* fast path for different-length inputs */
1523 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1525 PG_FREE_IF_COPY(arg1, 0);
1526 PG_FREE_IF_COPY(arg2, 1);
1528 PG_RETURN_BOOL(result);
1532 byteane(PG_FUNCTION_ARGS)
1534 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1535 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1540 len1 = VARSIZE(arg1) - VARHDRSZ;
1541 len2 = VARSIZE(arg2) - VARHDRSZ;
1543 /* fast path for different-length inputs */
1547 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1549 PG_FREE_IF_COPY(arg1, 0);
1550 PG_FREE_IF_COPY(arg2, 1);
1552 PG_RETURN_BOOL(result);
1556 bytealt(PG_FUNCTION_ARGS)
1558 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1559 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1564 len1 = VARSIZE(arg1) - VARHDRSZ;
1565 len2 = VARSIZE(arg2) - VARHDRSZ;
1567 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1569 PG_FREE_IF_COPY(arg1, 0);
1570 PG_FREE_IF_COPY(arg2, 1);
1572 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1576 byteale(PG_FUNCTION_ARGS)
1578 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1579 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1584 len1 = VARSIZE(arg1) - VARHDRSZ;
1585 len2 = VARSIZE(arg2) - VARHDRSZ;
1587 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1589 PG_FREE_IF_COPY(arg1, 0);
1590 PG_FREE_IF_COPY(arg2, 1);
1592 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1596 byteagt(PG_FUNCTION_ARGS)
1598 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1599 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1604 len1 = VARSIZE(arg1) - VARHDRSZ;
1605 len2 = VARSIZE(arg2) - VARHDRSZ;
1607 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1609 PG_FREE_IF_COPY(arg1, 0);
1610 PG_FREE_IF_COPY(arg2, 1);
1612 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1616 byteage(PG_FUNCTION_ARGS)
1618 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1619 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1624 len1 = VARSIZE(arg1) - VARHDRSZ;
1625 len2 = VARSIZE(arg2) - VARHDRSZ;
1627 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1629 PG_FREE_IF_COPY(arg1, 0);
1630 PG_FREE_IF_COPY(arg2, 1);
1632 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1636 byteacmp(PG_FUNCTION_ARGS)
1638 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1639 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1644 len1 = VARSIZE(arg1) - VARHDRSZ;
1645 len2 = VARSIZE(arg2) - VARHDRSZ;
1647 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1648 if ((cmp == 0) && (len1 != len2))
1649 cmp = (len1 < len2) ? -1 : 1;
1651 PG_FREE_IF_COPY(arg1, 0);
1652 PG_FREE_IF_COPY(arg2, 1);
1654 PG_RETURN_INT32(cmp);
1659 * replace all occurences of 'old_sub_str' in 'orig_str'
1660 * with 'new_sub_str' to form 'new_str'
1662 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1663 * otherwise returns 'new_str'
1666 replace_text(PG_FUNCTION_ARGS)
1673 text *src_text = PG_GETARG_TEXT_P(0);
1674 int src_text_len = TEXTLEN(src_text);
1675 text *from_sub_text = PG_GETARG_TEXT_P(1);
1676 int from_sub_text_len = TEXTLEN(from_sub_text);
1677 text *to_sub_text = PG_GETARG_TEXT_P(2);
1678 char *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1679 StringInfo str = makeStringInfo();
1681 if (src_text_len == 0 || from_sub_text_len == 0)
1682 PG_RETURN_TEXT_P(src_text);
1684 buf_text = TEXTDUP(src_text);
1685 curr_posn = TEXTPOS(buf_text, from_sub_text);
1687 while (curr_posn > 0)
1689 left_text = LEFT(buf_text, from_sub_text);
1690 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1692 appendStringInfo(str, PG_TEXT_GET_STR(left_text));
1693 appendStringInfo(str, to_sub_str);
1697 buf_text = right_text;
1698 curr_posn = TEXTPOS(buf_text, from_sub_text);
1701 appendStringInfo(str, PG_TEXT_GET_STR(buf_text));
1704 ret_text = PG_STR_GET_TEXT(str->data);
1708 PG_RETURN_TEXT_P(ret_text);
1713 * parse input string
1714 * return ord item (1 based)
1715 * based on provided field separator
1718 split_text(PG_FUNCTION_ARGS)
1720 text *inputstring = PG_GETARG_TEXT_P(0);
1721 int inputstring_len = TEXTLEN(inputstring);
1722 text *fldsep = PG_GETARG_TEXT_P(1);
1723 int fldsep_len = TEXTLEN(fldsep);
1724 int fldnum = PG_GETARG_INT32(2);
1729 /* return empty string for empty input string */
1730 if (inputstring_len < 1)
1731 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1733 /* empty field separator */
1736 if (fldnum == 1) /* first field - just return the input string */
1737 PG_RETURN_TEXT_P(inputstring);
1738 else /* otherwise return an empty string */
1739 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1742 /* field number is 1 based */
1744 elog(ERROR, "field position must be > 0");
1746 start_posn = text_position(PointerGetDatum(inputstring),
1747 PointerGetDatum(fldsep),
1749 end_posn = text_position(PointerGetDatum(inputstring),
1750 PointerGetDatum(fldsep),
1753 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
1755 if (fldnum == 1) /* first field - just return the input string */
1756 PG_RETURN_TEXT_P(inputstring);
1757 else /* otherwise return an empty string */
1758 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1760 else if ((start_posn != 0) && (end_posn == 0))
1762 /* last field requested */
1763 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
1764 PG_RETURN_TEXT_P(result_text);
1766 else if ((start_posn == 0) && (end_posn != 0))
1768 /* first field requested */
1769 result_text = LEFT(inputstring, fldsep);
1770 PG_RETURN_TEXT_P(result_text);
1774 /* prior to last field requested */
1775 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
1776 PG_RETURN_TEXT_P(result_text);
1782 * Convert a int32 to a string containing a base 16 (hex) representation of
1786 to_hex32(PG_FUNCTION_ARGS)
1788 static char digits[] = "0123456789abcdef";
1789 char buf[32]; /* bigger than needed, but reasonable */
1792 int32 value = PG_GETARG_INT32(0);
1794 ptr = buf + sizeof(buf) - 1;
1799 *--ptr = digits[value % HEXBASE];
1801 } while (ptr > buf && value);
1803 result_text = PG_STR_GET_TEXT(ptr);
1804 PG_RETURN_TEXT_P(result_text);
1808 * Convert a int64 to a string containing a base 16 (hex) representation of
1812 to_hex64(PG_FUNCTION_ARGS)
1814 static char digits[] = "0123456789abcdef";
1815 char buf[32]; /* bigger than needed, but reasonable */
1818 int64 value = PG_GETARG_INT64(0);
1820 ptr = buf + sizeof(buf) - 1;
1825 *--ptr = digits[value % HEXBASE];
1827 } while (ptr > buf && value);
1829 result_text = PG_STR_GET_TEXT(ptr);
1830 PG_RETURN_TEXT_P(result_text);