1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.145 2006/03/05 15:58:44 momjian Exp $
13 *-------------------------------------------------------------------------
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_type.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/crypt.h"
23 #include "libpq/pqformat.h"
24 #include "mb/pg_wchar.h"
25 #include "miscadmin.h"
26 #include "parser/scansup.h"
27 #include "regex/regex.h"
28 #include "utils/array.h"
29 #include "utils/builtins.h"
30 #include "utils/lsyscache.h"
31 #include "utils/pg_locale.h"
34 typedef struct varlena unknown;
36 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
37 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
38 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
39 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
40 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
42 #define PG_TEXTARG_GET_STR(arg_) \
43 DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
44 #define PG_TEXT_GET_STR(textp_) \
45 DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
46 #define PG_STR_GET_TEXT(str_) \
47 DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
48 #define TEXTLEN(textp) \
49 text_length(PointerGetDatum(textp))
50 #define TEXTPOS(buf_text, from_sub_text) \
51 text_position(buf_text, from_sub_text, 1)
52 #define LEFT(buf_text, from_sub_text) \
53 text_substring(PointerGetDatum(buf_text), \
55 TEXTPOS(buf_text, from_sub_text) - 1, false)
57 static int text_cmp(text *arg1, text *arg2);
58 static int32 text_length(Datum str);
59 static int32 text_position(text *t1, text *t2, int matchnum);
60 static text *text_substring(Datum str,
63 bool length_not_specified);
65 static void appendStringInfoText(StringInfo str, const text *t);
68 /*****************************************************************************
70 *****************************************************************************/
73 #define VAL(CH) ((CH) - '0')
74 #define DIG(VAL) ((VAL) + '0')
77 * byteain - converts from printable representation of byte array
79 * Non-printable characters must be passed as '\nnn' (octal) and are
80 * converted to internal form. '\' must be passed as '\\'.
81 * ereport(ERROR, ...) if bad form.
84 * The input is scanned twice.
85 * The error checking of input is minimal.
88 byteain(PG_FUNCTION_ARGS)
90 char *inputText = PG_GETARG_CSTRING(0);
96 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
100 else if ((tp[0] == '\\') &&
101 (tp[1] >= '0' && tp[1] <= '3') &&
102 (tp[2] >= '0' && tp[2] <= '7') &&
103 (tp[3] >= '0' && tp[3] <= '7'))
105 else if ((tp[0] == '\\') &&
111 * one backslash, not followed by 0 or ### valid octal
114 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
115 errmsg("invalid input syntax for type bytea")));
120 result = (bytea *) palloc(byte);
121 VARATT_SIZEP(result) = byte; /* set varlena length */
124 rp = VARDATA(result);
129 else if ((tp[0] == '\\') &&
130 (tp[1] >= '0' && tp[1] <= '3') &&
131 (tp[2] >= '0' && tp[2] <= '7') &&
132 (tp[3] >= '0' && tp[3] <= '7'))
138 *rp++ = byte + VAL(tp[3]);
141 else if ((tp[0] == '\\') &&
150 * We should never get here. The first pass should not allow it.
153 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
154 errmsg("invalid input syntax for type bytea")));
158 PG_RETURN_BYTEA_P(result);
162 * byteaout - converts to printable representation of byte array
164 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
167 * NULL vlena should be an error--returning string with NULL for now.
170 byteaout(PG_FUNCTION_ARGS)
172 bytea *vlena = PG_GETARG_BYTEA_P(0);
176 int val; /* holds unprintable chars */
180 len = 1; /* empty string has 1 char */
182 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
186 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
191 rp = result = (char *) palloc(len);
193 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
200 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
204 rp[3] = DIG(val & 07);
206 rp[2] = DIG(val & 07);
208 rp[1] = DIG(val & 03);
215 PG_RETURN_CSTRING(result);
219 * bytearecv - converts external binary format to bytea
222 bytearecv(PG_FUNCTION_ARGS)
224 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
228 nbytes = buf->len - buf->cursor;
229 result = (bytea *) palloc(nbytes + VARHDRSZ);
230 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
231 pq_copymsgbytes(buf, VARDATA(result), nbytes);
232 PG_RETURN_BYTEA_P(result);
236 * byteasend - converts bytea to binary format
238 * This is a special case: just copy the input...
241 byteasend(PG_FUNCTION_ARGS)
243 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
245 PG_RETURN_BYTEA_P(vlena);
250 * textin - converts "..." to internal representation
253 textin(PG_FUNCTION_ARGS)
255 char *inputText = PG_GETARG_CSTRING(0);
259 /* verify encoding */
260 len = strlen(inputText);
261 pg_verifymbstr(inputText, len, false);
263 result = (text *) palloc(len + VARHDRSZ);
264 VARATT_SIZEP(result) = len + VARHDRSZ;
266 memcpy(VARDATA(result), inputText, len);
268 PG_RETURN_TEXT_P(result);
272 * textout - converts internal representation to "..."
275 textout(PG_FUNCTION_ARGS)
277 text *t = PG_GETARG_TEXT_P(0);
281 len = VARSIZE(t) - VARHDRSZ;
282 result = (char *) palloc(len + 1);
283 memcpy(result, VARDATA(t), len);
286 PG_RETURN_CSTRING(result);
290 * textrecv - converts external binary format to text
293 textrecv(PG_FUNCTION_ARGS)
295 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
300 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
302 /* verify encoding */
303 pg_verifymbstr(str, nbytes, false);
305 result = (text *) palloc(nbytes + VARHDRSZ);
306 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
307 memcpy(VARDATA(result), str, nbytes);
309 PG_RETURN_TEXT_P(result);
313 * textsend - converts text to binary format
316 textsend(PG_FUNCTION_ARGS)
318 text *t = PG_GETARG_TEXT_P(0);
321 pq_begintypsend(&buf);
322 pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
323 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
328 * unknownin - converts "..." to internal representation
331 unknownin(PG_FUNCTION_ARGS)
333 char *str = PG_GETARG_CSTRING(0);
335 /* representation is same as cstring */
336 PG_RETURN_CSTRING(pstrdup(str));
340 * unknownout - converts internal representation to "..."
343 unknownout(PG_FUNCTION_ARGS)
345 /* representation is same as cstring */
346 char *str = PG_GETARG_CSTRING(0);
348 PG_RETURN_CSTRING(pstrdup(str));
352 * unknownrecv - converts external binary format to unknown
355 unknownrecv(PG_FUNCTION_ARGS)
357 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
361 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
362 /* representation is same as cstring */
363 PG_RETURN_CSTRING(str);
367 * unknownsend - converts unknown to binary format
370 unknownsend(PG_FUNCTION_ARGS)
372 /* representation is same as cstring */
373 char *str = PG_GETARG_CSTRING(0);
376 pq_begintypsend(&buf);
377 pq_sendtext(&buf, str, strlen(str));
378 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
382 /* ========== PUBLIC ROUTINES ========== */
386 * returns the logical length of a text*
387 * (which is less than the VARSIZE of the text*)
390 textlen(PG_FUNCTION_ARGS)
392 Datum str = PG_GETARG_DATUM(0);
394 /* try to avoid decompressing argument */
395 PG_RETURN_INT32(text_length(str));
400 * Does the real work for textlen()
402 * This is broken out so it can be called directly by other string processing
403 * functions. Note that the argument is passed as a Datum, to indicate that
404 * it may still be in compressed form. We can avoid decompressing it at all
408 text_length(Datum str)
410 /* fastpath when max encoding length is one */
411 if (pg_database_encoding_max_length() == 1)
412 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
415 text *t = DatumGetTextP(str);
417 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
418 VARSIZE(t) - VARHDRSZ));
424 * returns the physical length of a text*
425 * (which is less than the VARSIZE of the text*)
428 textoctetlen(PG_FUNCTION_ARGS)
430 Datum str = PG_GETARG_DATUM(0);
432 /* We need not detoast the input at all */
433 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
438 * takes two text* and returns a text* that is the concatenation of
441 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
442 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
443 * Allocate space for output in all cases.
444 * XXX - thomas 1997-07-10
447 textcat(PG_FUNCTION_ARGS)
449 text *t1 = PG_GETARG_TEXT_P(0);
450 text *t2 = PG_GETARG_TEXT_P(1);
457 len1 = VARSIZE(t1) - VARHDRSZ;
461 len2 = VARSIZE(t2) - VARHDRSZ;
465 len = len1 + len2 + VARHDRSZ;
466 result = (text *) palloc(len);
468 /* Set size of result string... */
469 VARATT_SIZEP(result) = len;
471 /* Fill data field of result string... */
472 ptr = VARDATA(result);
474 memcpy(ptr, VARDATA(t1), len1);
476 memcpy(ptr + len1, VARDATA(t2), len2);
478 PG_RETURN_TEXT_P(result);
483 * Return a substring starting at the specified position.
484 * - thomas 1997-12-31
488 * - starting position (is one-based)
491 * If the starting position is zero or less, then return from the start of the string
492 * adjusting the length to be consistent with the "negative start" per SQL92.
493 * If the length is less than zero, return the remaining string.
495 * Added multibyte support.
496 * - Tatsuo Ishii 1998-4-21
497 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
498 * Formerly returned the entire string; now returns a portion.
499 * - Thomas Lockhart 1998-12-10
500 * Now uses faster TOAST-slicing interface
501 * - John Gray 2002-02-22
502 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
503 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
504 * error; if E < 1, return '', not entire string). Fixed MB related bug when
505 * S > LC and < LC + 4 sometimes garbage characters are returned.
506 * - Joe Conway 2002-08-10
509 text_substr(PG_FUNCTION_ARGS)
511 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
518 * text_substr_no_len -
519 * Wrapper to avoid opr_sanity failure due to
520 * one function accepting a different number of args.
523 text_substr_no_len(PG_FUNCTION_ARGS)
525 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
532 * Does the real work for text_substr() and text_substr_no_len()
534 * This is broken out so it can be called directly by other string processing
535 * functions. Note that the argument is passed as a Datum, to indicate that
536 * it may still be in compressed/toasted form. We can avoid detoasting all
537 * of it in some cases.
540 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
542 int32 eml = pg_database_encoding_max_length();
543 int32 S = start; /* start position */
544 int32 S1; /* adjusted start position */
545 int32 L1; /* adjusted substring length */
547 /* life is easy if the encoding max length is 1 */
552 if (length_not_specified) /* special case - get length to end of
561 * A negative value for L is the only way for the end position to
562 * be before the start. SQL99 says to throw an error.
566 (errcode(ERRCODE_SUBSTRING_ERROR),
567 errmsg("negative substring length not allowed")));
570 * A zero or negative value for the end position can happen if the
571 * start was negative or one. SQL99 says to return a zero-length
575 return PG_STR_GET_TEXT("");
581 * If the start position is past the end of the string, SQL99 says to
582 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
583 * that for us. Convert to zero-based starting position
585 return DatumGetTextPSlice(str, S1 - 1, L1);
590 * When encoding max length is > 1, we can't get LC without
591 * detoasting, so we'll grab a conservatively large slice now and go
592 * back later to do the right thing
605 * if S is past the end of the string, the tuple toaster will return a
606 * zero-length string to us
611 * We need to start at position zero because there is no way to know
612 * in advance which byte offset corresponds to the supplied start
617 if (length_not_specified) /* special case - get length to end of
619 slice_size = L1 = -1;
625 * A negative value for L is the only way for the end position to
626 * be before the start. SQL99 says to throw an error.
630 (errcode(ERRCODE_SUBSTRING_ERROR),
631 errmsg("negative substring length not allowed")));
634 * A zero or negative value for the end position can happen if the
635 * start was negative or one. SQL99 says to return a zero-length
639 return PG_STR_GET_TEXT("");
642 * if E is past the end of the string, the tuple toaster will
643 * truncate the length for us
648 * Total slice size in bytes can't be any longer than the start
649 * position plus substring length times the encoding max length.
651 slice_size = (S1 + L1) * eml;
653 slice = DatumGetTextPSlice(str, slice_start, slice_size);
655 /* see if we got back an empty string */
656 if ((VARSIZE(slice) - VARHDRSZ) == 0)
657 return PG_STR_GET_TEXT("");
659 /* Now we can get the actual length of the slice in MB characters */
660 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
663 * Check that the start position wasn't > slice_strlen. If so, SQL99
664 * says to return a zero-length string.
666 if (S1 > slice_strlen)
667 return PG_STR_GET_TEXT("");
670 * Adjust L1 and E1 now that we know the slice string length. Again
671 * remember that S1 is one based, and slice_start is zero based.
674 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
676 E1 = slice_start + 1 + slice_strlen;
679 * Find the start position in the slice; remember S1 is not zero based
682 for (i = 0; i < S1 - 1; i++)
685 /* hang onto a pointer to our start position */
689 * Count the actual bytes used by the substring of the requested
692 for (i = S1; i < E1; i++)
695 ret = (text *) palloc(VARHDRSZ + (p - s));
696 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
697 memcpy(VARDATA(ret), s, (p - s));
702 elog(ERROR, "invalid backend encoding: encoding max length < 1");
704 /* not reached: suppress compiler warning */
710 * Return the position of the specified substring.
711 * Implements the SQL92 POSITION() function.
712 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
713 * - thomas 1997-07-27
716 textpos(PG_FUNCTION_ARGS)
718 text *str = PG_GETARG_TEXT_P(0);
719 text *search_str = PG_GETARG_TEXT_P(1);
721 PG_RETURN_INT32(text_position(str, search_str, 1));
726 * Does the real work for textpos()
729 * t1 - string to be searched
730 * t2 - pattern to match within t1
731 * matchnum - number of the match to be found (1 is the first match)
733 * Character index of the first matched char, starting from 1,
736 * This is broken out so it can be called directly by other string processing
740 text_position(text *t1, text *t2, int matchnum)
750 return 0; /* result for 0th match */
752 if (VARSIZE(t2) <= VARHDRSZ)
753 return 1; /* result for empty pattern */
755 len1 = VARSIZE(t1) - VARHDRSZ;
756 len2 = VARSIZE(t2) - VARHDRSZ;
758 if (pg_database_encoding_max_length() == 1)
760 /* simple case - single byte encoding */
767 /* no use in searching str past point where search_str will fit */
770 for (p = 0; p <= px; p++)
772 if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
774 if (++match == matchnum)
785 /* not as simple - multibyte encoding */
791 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
792 (void) pg_mb2wchar_with_len(VARDATA(t1), p1, len1);
793 len1 = pg_wchar_strlen(p1);
794 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
795 (void) pg_mb2wchar_with_len(VARDATA(t2), p2, len2);
796 len2 = pg_wchar_strlen(p2);
798 /* no use in searching str past point where search_str will fit */
801 for (p = 0; p <= px; p++)
803 if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
805 if (++match == matchnum)
822 * Comparison function for text strings with given lengths.
823 * Includes locale support, but must copy strings to temporary memory
824 * to allow null-termination for inputs to strcoll().
828 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
833 * Unfortunately, there is no strncoll(), so in the non-C locale case we
834 * have to do some memory copying. This turns out to be significantly
835 * slower, so we optimize the case where LC_COLLATE is C. We also try to
836 * optimize relatively-short strings by avoiding palloc/pfree overhead.
838 if (lc_collate_is_c())
840 result = strncmp(arg1, arg2, Min(len1, len2));
841 if ((result == 0) && (len1 != len2))
842 result = (len1 < len2) ? -1 : 1;
846 #define STACKBUFLEN 1024
848 char a1buf[STACKBUFLEN];
849 char a2buf[STACKBUFLEN];
854 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
855 if (GetDatabaseEncoding() == PG_UTF8)
861 if (len1 >= STACKBUFLEN / 2)
863 a1len = len1 * 2 + 2;
871 if (len2 >= STACKBUFLEN / 2)
873 a2len = len2 * 2 + 2;
882 /* stupid Microsloth API does not work for zero-length input */
887 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
888 (LPWSTR) a1p, a1len / 2);
891 (errmsg("could not convert string to UTF-16: error %lu",
894 ((LPWSTR) a1p)[r] = 0;
900 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
901 (LPWSTR) a2p, a2len / 2);
904 (errmsg("could not convert string to UTF-16: error %lu",
907 ((LPWSTR) a2p)[r] = 0;
910 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
911 if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
914 (errmsg("could not compare Unicode strings: %m")));
925 if (len1 >= STACKBUFLEN)
926 a1p = (char *) palloc(len1 + 1);
929 if (len2 >= STACKBUFLEN)
930 a2p = (char *) palloc(len2 + 1);
934 memcpy(a1p, arg1, len1);
936 memcpy(a2p, arg2, len2);
939 result = strcoll(a1p, a2p);
942 * In some locales strcoll() can claim that nonidentical strings are
943 * equal. Believing that would be bad news for a number of reasons,
944 * so we follow Perl's lead and sort "equal" strings according to
948 result = strcmp(a1p, a2p);
961 * Internal comparison function for text strings.
965 text_cmp(text *arg1, text *arg2)
975 len1 = VARSIZE(arg1) - VARHDRSZ;
976 len2 = VARSIZE(arg2) - VARHDRSZ;
978 return varstr_cmp(a1p, len1, a2p, len2);
982 * Comparison functions for text strings.
984 * Note: btree indexes need these routines not to leak memory; therefore,
985 * be careful to free working copies of toasted datums. Most places don't
986 * need to be so careful.
990 texteq(PG_FUNCTION_ARGS)
992 text *arg1 = PG_GETARG_TEXT_P(0);
993 text *arg2 = PG_GETARG_TEXT_P(1);
997 * Since we only care about equality or not-equality, we can avoid all
998 * the expense of strcoll() here, and just do bitwise comparison.
1000 if (VARSIZE(arg1) != VARSIZE(arg2))
1003 result = (strncmp(VARDATA(arg1), VARDATA(arg2),
1004 VARSIZE(arg1) - VARHDRSZ) == 0);
1006 PG_FREE_IF_COPY(arg1, 0);
1007 PG_FREE_IF_COPY(arg2, 1);
1009 PG_RETURN_BOOL(result);
1013 textne(PG_FUNCTION_ARGS)
1015 text *arg1 = PG_GETARG_TEXT_P(0);
1016 text *arg2 = PG_GETARG_TEXT_P(1);
1020 * Since we only care about equality or not-equality, we can avoid all
1021 * the expense of strcoll() here, and just do bitwise comparison.
1023 if (VARSIZE(arg1) != VARSIZE(arg2))
1026 result = (strncmp(VARDATA(arg1), VARDATA(arg2),
1027 VARSIZE(arg1) - VARHDRSZ) != 0);
1029 PG_FREE_IF_COPY(arg1, 0);
1030 PG_FREE_IF_COPY(arg2, 1);
1032 PG_RETURN_BOOL(result);
1036 text_lt(PG_FUNCTION_ARGS)
1038 text *arg1 = PG_GETARG_TEXT_P(0);
1039 text *arg2 = PG_GETARG_TEXT_P(1);
1042 result = (text_cmp(arg1, arg2) < 0);
1044 PG_FREE_IF_COPY(arg1, 0);
1045 PG_FREE_IF_COPY(arg2, 1);
1047 PG_RETURN_BOOL(result);
1051 text_le(PG_FUNCTION_ARGS)
1053 text *arg1 = PG_GETARG_TEXT_P(0);
1054 text *arg2 = PG_GETARG_TEXT_P(1);
1057 result = (text_cmp(arg1, arg2) <= 0);
1059 PG_FREE_IF_COPY(arg1, 0);
1060 PG_FREE_IF_COPY(arg2, 1);
1062 PG_RETURN_BOOL(result);
1066 text_gt(PG_FUNCTION_ARGS)
1068 text *arg1 = PG_GETARG_TEXT_P(0);
1069 text *arg2 = PG_GETARG_TEXT_P(1);
1072 result = (text_cmp(arg1, arg2) > 0);
1074 PG_FREE_IF_COPY(arg1, 0);
1075 PG_FREE_IF_COPY(arg2, 1);
1077 PG_RETURN_BOOL(result);
1081 text_ge(PG_FUNCTION_ARGS)
1083 text *arg1 = PG_GETARG_TEXT_P(0);
1084 text *arg2 = PG_GETARG_TEXT_P(1);
1087 result = (text_cmp(arg1, arg2) >= 0);
1089 PG_FREE_IF_COPY(arg1, 0);
1090 PG_FREE_IF_COPY(arg2, 1);
1092 PG_RETURN_BOOL(result);
1096 bttextcmp(PG_FUNCTION_ARGS)
1098 text *arg1 = PG_GETARG_TEXT_P(0);
1099 text *arg2 = PG_GETARG_TEXT_P(1);
1102 result = text_cmp(arg1, arg2);
1104 PG_FREE_IF_COPY(arg1, 0);
1105 PG_FREE_IF_COPY(arg2, 1);
1107 PG_RETURN_INT32(result);
1112 text_larger(PG_FUNCTION_ARGS)
1114 text *arg1 = PG_GETARG_TEXT_P(0);
1115 text *arg2 = PG_GETARG_TEXT_P(1);
1118 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1120 PG_RETURN_TEXT_P(result);
1124 text_smaller(PG_FUNCTION_ARGS)
1126 text *arg1 = PG_GETARG_TEXT_P(0);
1127 text *arg2 = PG_GETARG_TEXT_P(1);
1130 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1132 PG_RETURN_TEXT_P(result);
1137 * The following operators support character-by-character comparison
1138 * of text data types, to allow building indexes suitable for LIKE
1143 internal_text_pattern_compare(text *arg1, text *arg2)
1147 result = memcmp(VARDATA(arg1), VARDATA(arg2),
1148 Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1151 else if (VARSIZE(arg1) < VARSIZE(arg2))
1153 else if (VARSIZE(arg1) > VARSIZE(arg2))
1161 text_pattern_lt(PG_FUNCTION_ARGS)
1163 text *arg1 = PG_GETARG_TEXT_P(0);
1164 text *arg2 = PG_GETARG_TEXT_P(1);
1167 result = internal_text_pattern_compare(arg1, arg2);
1169 PG_FREE_IF_COPY(arg1, 0);
1170 PG_FREE_IF_COPY(arg2, 1);
1172 PG_RETURN_BOOL(result < 0);
1177 text_pattern_le(PG_FUNCTION_ARGS)
1179 text *arg1 = PG_GETARG_TEXT_P(0);
1180 text *arg2 = PG_GETARG_TEXT_P(1);
1183 result = internal_text_pattern_compare(arg1, arg2);
1185 PG_FREE_IF_COPY(arg1, 0);
1186 PG_FREE_IF_COPY(arg2, 1);
1188 PG_RETURN_BOOL(result <= 0);
1193 text_pattern_eq(PG_FUNCTION_ARGS)
1195 text *arg1 = PG_GETARG_TEXT_P(0);
1196 text *arg2 = PG_GETARG_TEXT_P(1);
1199 if (VARSIZE(arg1) != VARSIZE(arg2))
1202 result = internal_text_pattern_compare(arg1, arg2);
1204 PG_FREE_IF_COPY(arg1, 0);
1205 PG_FREE_IF_COPY(arg2, 1);
1207 PG_RETURN_BOOL(result == 0);
1212 text_pattern_ge(PG_FUNCTION_ARGS)
1214 text *arg1 = PG_GETARG_TEXT_P(0);
1215 text *arg2 = PG_GETARG_TEXT_P(1);
1218 result = internal_text_pattern_compare(arg1, arg2);
1220 PG_FREE_IF_COPY(arg1, 0);
1221 PG_FREE_IF_COPY(arg2, 1);
1223 PG_RETURN_BOOL(result >= 0);
1228 text_pattern_gt(PG_FUNCTION_ARGS)
1230 text *arg1 = PG_GETARG_TEXT_P(0);
1231 text *arg2 = PG_GETARG_TEXT_P(1);
1234 result = internal_text_pattern_compare(arg1, arg2);
1236 PG_FREE_IF_COPY(arg1, 0);
1237 PG_FREE_IF_COPY(arg2, 1);
1239 PG_RETURN_BOOL(result > 0);
1244 text_pattern_ne(PG_FUNCTION_ARGS)
1246 text *arg1 = PG_GETARG_TEXT_P(0);
1247 text *arg2 = PG_GETARG_TEXT_P(1);
1250 if (VARSIZE(arg1) != VARSIZE(arg2))
1253 result = internal_text_pattern_compare(arg1, arg2);
1255 PG_FREE_IF_COPY(arg1, 0);
1256 PG_FREE_IF_COPY(arg2, 1);
1258 PG_RETURN_BOOL(result != 0);
1263 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1265 text *arg1 = PG_GETARG_TEXT_P(0);
1266 text *arg2 = PG_GETARG_TEXT_P(1);
1269 result = internal_text_pattern_compare(arg1, arg2);
1271 PG_FREE_IF_COPY(arg1, 0);
1272 PG_FREE_IF_COPY(arg2, 1);
1274 PG_RETURN_INT32(result);
1278 /*-------------------------------------------------------------
1281 * get the number of bytes contained in an instance of type 'bytea'
1282 *-------------------------------------------------------------
1285 byteaoctetlen(PG_FUNCTION_ARGS)
1287 Datum str = PG_GETARG_DATUM(0);
1289 /* We need not detoast the input at all */
1290 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1295 * takes two bytea* and returns a bytea* that is the concatenation of
1298 * Cloned from textcat and modified as required.
1301 byteacat(PG_FUNCTION_ARGS)
1303 bytea *t1 = PG_GETARG_BYTEA_P(0);
1304 bytea *t2 = PG_GETARG_BYTEA_P(1);
1311 len1 = VARSIZE(t1) - VARHDRSZ;
1315 len2 = VARSIZE(t2) - VARHDRSZ;
1319 len = len1 + len2 + VARHDRSZ;
1320 result = (bytea *) palloc(len);
1322 /* Set size of result string... */
1323 VARATT_SIZEP(result) = len;
1325 /* Fill data field of result string... */
1326 ptr = VARDATA(result);
1328 memcpy(ptr, VARDATA(t1), len1);
1330 memcpy(ptr + len1, VARDATA(t2), len2);
1332 PG_RETURN_BYTEA_P(result);
1335 #define PG_STR_GET_BYTEA(str_) \
1336 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1339 * Return a substring starting at the specified position.
1340 * Cloned from text_substr and modified as required.
1344 * - starting position (is one-based)
1345 * - string length (optional)
1347 * If the starting position is zero or less, then return from the start of the string
1348 * adjusting the length to be consistent with the "negative start" per SQL92.
1349 * If the length is less than zero, an ERROR is thrown. If no third argument
1350 * (length) is provided, the length to the end of the string is assumed.
1353 bytea_substr(PG_FUNCTION_ARGS)
1355 int S = PG_GETARG_INT32(1); /* start position */
1356 int S1; /* adjusted start position */
1357 int L1; /* adjusted substring length */
1361 if (fcinfo->nargs == 2)
1364 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
1365 * the end of the string if we pass it a negative value for length.
1372 int E = S + PG_GETARG_INT32(2);
1375 * A negative value for L is the only way for the end position to be
1376 * before the start. SQL99 says to throw an error.
1380 (errcode(ERRCODE_SUBSTRING_ERROR),
1381 errmsg("negative substring length not allowed")));
1384 * A zero or negative value for the end position can happen if the
1385 * start was negative or one. SQL99 says to return a zero-length
1389 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1395 * If the start position is past the end of the string, SQL99 says to
1396 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
1397 * for us. Convert to zero-based starting position
1399 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1403 * bytea_substr_no_len -
1404 * Wrapper to avoid opr_sanity failure due to
1405 * one function accepting a different number of args.
1408 bytea_substr_no_len(PG_FUNCTION_ARGS)
1410 return bytea_substr(fcinfo);
1415 * Return the position of the specified substring.
1416 * Implements the SQL92 POSITION() function.
1417 * Cloned from textpos and modified as required.
1420 byteapos(PG_FUNCTION_ARGS)
1422 bytea *t1 = PG_GETARG_BYTEA_P(0);
1423 bytea *t2 = PG_GETARG_BYTEA_P(1);
1432 if (VARSIZE(t2) <= VARHDRSZ)
1433 PG_RETURN_INT32(1); /* result for empty pattern */
1435 len1 = VARSIZE(t1) - VARHDRSZ;
1436 len2 = VARSIZE(t2) - VARHDRSZ;
1443 for (p = 0; p <= px; p++)
1445 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1453 PG_RETURN_INT32(pos);
1456 /*-------------------------------------------------------------
1459 * this routine treats "bytea" as an array of bytes.
1460 * It returns the Nth byte (a number between 0 and 255).
1461 *-------------------------------------------------------------
1464 byteaGetByte(PG_FUNCTION_ARGS)
1466 bytea *v = PG_GETARG_BYTEA_P(0);
1467 int32 n = PG_GETARG_INT32(1);
1471 len = VARSIZE(v) - VARHDRSZ;
1473 if (n < 0 || n >= len)
1475 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1476 errmsg("index %d out of valid range, 0..%d",
1479 byte = ((unsigned char *) VARDATA(v))[n];
1481 PG_RETURN_INT32(byte);
1484 /*-------------------------------------------------------------
1487 * This routine treats a "bytea" type like an array of bits.
1488 * It returns the value of the Nth bit (0 or 1).
1490 *-------------------------------------------------------------
1493 byteaGetBit(PG_FUNCTION_ARGS)
1495 bytea *v = PG_GETARG_BYTEA_P(0);
1496 int32 n = PG_GETARG_INT32(1);
1502 len = VARSIZE(v) - VARHDRSZ;
1504 if (n < 0 || n >= len * 8)
1506 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1507 errmsg("index %d out of valid range, 0..%d",
1513 byte = ((unsigned char *) VARDATA(v))[byteNo];
1515 if (byte & (1 << bitNo))
1521 /*-------------------------------------------------------------
1524 * Given an instance of type 'bytea' creates a new one with
1525 * the Nth byte set to the given value.
1527 *-------------------------------------------------------------
1530 byteaSetByte(PG_FUNCTION_ARGS)
1532 bytea *v = PG_GETARG_BYTEA_P(0);
1533 int32 n = PG_GETARG_INT32(1);
1534 int32 newByte = PG_GETARG_INT32(2);
1538 len = VARSIZE(v) - VARHDRSZ;
1540 if (n < 0 || n >= len)
1542 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1543 errmsg("index %d out of valid range, 0..%d",
1547 * Make a copy of the original varlena.
1549 res = (bytea *) palloc(VARSIZE(v));
1550 memcpy((char *) res, (char *) v, VARSIZE(v));
1555 ((unsigned char *) VARDATA(res))[n] = newByte;
1557 PG_RETURN_BYTEA_P(res);
1560 /*-------------------------------------------------------------
1563 * Given an instance of type 'bytea' creates a new one with
1564 * the Nth bit set to the given value.
1566 *-------------------------------------------------------------
1569 byteaSetBit(PG_FUNCTION_ARGS)
1571 bytea *v = PG_GETARG_BYTEA_P(0);
1572 int32 n = PG_GETARG_INT32(1);
1573 int32 newBit = PG_GETARG_INT32(2);
1581 len = VARSIZE(v) - VARHDRSZ;
1583 if (n < 0 || n >= len * 8)
1585 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1586 errmsg("index %d out of valid range, 0..%d",
1595 if (newBit != 0 && newBit != 1)
1597 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1598 errmsg("new bit must be 0 or 1")));
1601 * Make a copy of the original varlena.
1603 res = (bytea *) palloc(VARSIZE(v));
1604 memcpy((char *) res, (char *) v, VARSIZE(v));
1609 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1612 newByte = oldByte & (~(1 << bitNo));
1614 newByte = oldByte | (1 << bitNo);
1616 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1618 PG_RETURN_BYTEA_P(res);
1623 * Converts a text type to a Name type.
1626 text_name(PG_FUNCTION_ARGS)
1628 text *s = PG_GETARG_TEXT_P(0);
1632 len = VARSIZE(s) - VARHDRSZ;
1634 /* Truncate oversize input */
1635 if (len >= NAMEDATALEN)
1636 len = NAMEDATALEN - 1;
1639 printf("text- convert string length %d (%d) ->%d\n",
1640 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1643 result = (Name) palloc(NAMEDATALEN);
1644 memcpy(NameStr(*result), VARDATA(s), len);
1646 /* now null pad to full length... */
1647 while (len < NAMEDATALEN)
1649 *(NameStr(*result) + len) = '\0';
1653 PG_RETURN_NAME(result);
1657 * Converts a Name type to a text type.
1660 name_text(PG_FUNCTION_ARGS)
1662 Name s = PG_GETARG_NAME(0);
1666 len = strlen(NameStr(*s));
1669 printf("text- convert string length %d (%d) ->%d\n",
1670 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1673 result = palloc(VARHDRSZ + len);
1674 VARATT_SIZEP(result) = VARHDRSZ + len;
1675 memcpy(VARDATA(result), NameStr(*s), len);
1677 PG_RETURN_TEXT_P(result);
1682 * textToQualifiedNameList - convert a text object to list of names
1684 * This implements the input parsing needed by nextval() and other
1685 * functions that take a text parameter representing a qualified name.
1686 * We split the name at dots, downcase if not double-quoted, and
1687 * truncate names if they're too long.
1690 textToQualifiedNameList(text *textval)
1697 /* Convert to C string (handles possible detoasting). */
1698 /* Note we rely on being able to modify rawname below. */
1699 rawname = DatumGetCString(DirectFunctionCall1(textout,
1700 PointerGetDatum(textval)));
1702 if (!SplitIdentifierString(rawname, '.', &namelist))
1704 (errcode(ERRCODE_INVALID_NAME),
1705 errmsg("invalid name syntax")));
1707 if (namelist == NIL)
1709 (errcode(ERRCODE_INVALID_NAME),
1710 errmsg("invalid name syntax")));
1712 foreach(l, namelist)
1714 char *curname = (char *) lfirst(l);
1716 result = lappend(result, makeString(pstrdup(curname)));
1720 list_free(namelist);
1726 * SplitIdentifierString --- parse a string containing identifiers
1728 * This is the guts of textToQualifiedNameList, and is exported for use in
1729 * other situations such as parsing GUC variables. In the GUC case, it's
1730 * important to avoid memory leaks, so the API is designed to minimize the
1731 * amount of stuff that needs to be allocated and freed.
1734 * rawstring: the input string; must be overwritable! On return, it's
1735 * been modified to contain the separated identifiers.
1736 * separator: the separator punctuation expected between identifiers
1737 * (typically '.' or ','). Whitespace may also appear around
1740 * namelist: filled with a palloc'd list of pointers to identifiers within
1741 * rawstring. Caller should list_free() this even on error return.
1743 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1745 * Note that an empty string is considered okay here, though not in
1746 * textToQualifiedNameList.
1749 SplitIdentifierString(char *rawstring, char separator,
1752 char *nextp = rawstring;
1757 while (isspace((unsigned char) *nextp))
1758 nextp++; /* skip leading whitespace */
1761 return true; /* allow empty string */
1763 /* At the top of the loop, we are at start of a new identifier. */
1771 /* Quoted name --- collapse quote-quote pairs, no downcasing */
1772 curname = nextp + 1;
1775 endp = strchr(nextp + 1, '\"');
1777 return false; /* mismatched quotes */
1778 if (endp[1] != '\"')
1779 break; /* found end of quoted name */
1780 /* Collapse adjacent quotes into one quote, and look again */
1781 memmove(endp, endp + 1, strlen(endp));
1784 /* endp now points at the terminating quote */
1789 /* Unquoted name --- extends to separator or whitespace */
1794 while (*nextp && *nextp != separator &&
1795 !isspace((unsigned char) *nextp))
1798 if (curname == nextp)
1799 return false; /* empty unquoted name not allowed */
1802 * Downcase the identifier, using same code as main lexer does.
1804 * XXX because we want to overwrite the input in-place, we cannot
1805 * support a downcasing transformation that increases the string
1806 * length. This is not a problem given the current implementation
1807 * of downcase_truncate_identifier, but we'll probably have to do
1808 * something about this someday.
1810 len = endp - curname;
1811 downname = downcase_truncate_identifier(curname, len, false);
1812 Assert(strlen(downname) <= len);
1813 strncpy(curname, downname, len);
1817 while (isspace((unsigned char) *nextp))
1818 nextp++; /* skip trailing whitespace */
1820 if (*nextp == separator)
1823 while (isspace((unsigned char) *nextp))
1824 nextp++; /* skip leading whitespace for next */
1825 /* we expect another name, so done remains false */
1827 else if (*nextp == '\0')
1830 return false; /* invalid syntax */
1832 /* Now safe to overwrite separator with a null */
1835 /* Truncate name if it's overlength */
1836 truncate_identifier(curname, strlen(curname), false);
1839 * Finished isolating current name --- add it to list
1841 *namelist = lappend(*namelist, curname);
1843 /* Loop back if we didn't reach end of string */
1850 /*****************************************************************************
1851 * Comparison Functions used for bytea
1853 * Note: btree indexes need these routines not to leak memory; therefore,
1854 * be careful to free working copies of toasted datums. Most places don't
1855 * need to be so careful.
1856 *****************************************************************************/
1859 byteaeq(PG_FUNCTION_ARGS)
1861 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1862 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1867 len1 = VARSIZE(arg1) - VARHDRSZ;
1868 len2 = VARSIZE(arg2) - VARHDRSZ;
1870 /* fast path for different-length inputs */
1874 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1876 PG_FREE_IF_COPY(arg1, 0);
1877 PG_FREE_IF_COPY(arg2, 1);
1879 PG_RETURN_BOOL(result);
1883 byteane(PG_FUNCTION_ARGS)
1885 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1886 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1891 len1 = VARSIZE(arg1) - VARHDRSZ;
1892 len2 = VARSIZE(arg2) - VARHDRSZ;
1894 /* fast path for different-length inputs */
1898 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1900 PG_FREE_IF_COPY(arg1, 0);
1901 PG_FREE_IF_COPY(arg2, 1);
1903 PG_RETURN_BOOL(result);
1907 bytealt(PG_FUNCTION_ARGS)
1909 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1910 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1915 len1 = VARSIZE(arg1) - VARHDRSZ;
1916 len2 = VARSIZE(arg2) - VARHDRSZ;
1918 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1920 PG_FREE_IF_COPY(arg1, 0);
1921 PG_FREE_IF_COPY(arg2, 1);
1923 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1927 byteale(PG_FUNCTION_ARGS)
1929 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1930 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1935 len1 = VARSIZE(arg1) - VARHDRSZ;
1936 len2 = VARSIZE(arg2) - VARHDRSZ;
1938 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1940 PG_FREE_IF_COPY(arg1, 0);
1941 PG_FREE_IF_COPY(arg2, 1);
1943 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1947 byteagt(PG_FUNCTION_ARGS)
1949 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1950 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1955 len1 = VARSIZE(arg1) - VARHDRSZ;
1956 len2 = VARSIZE(arg2) - VARHDRSZ;
1958 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1960 PG_FREE_IF_COPY(arg1, 0);
1961 PG_FREE_IF_COPY(arg2, 1);
1963 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1967 byteage(PG_FUNCTION_ARGS)
1969 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1970 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1975 len1 = VARSIZE(arg1) - VARHDRSZ;
1976 len2 = VARSIZE(arg2) - VARHDRSZ;
1978 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1980 PG_FREE_IF_COPY(arg1, 0);
1981 PG_FREE_IF_COPY(arg2, 1);
1983 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1987 byteacmp(PG_FUNCTION_ARGS)
1989 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1990 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1995 len1 = VARSIZE(arg1) - VARHDRSZ;
1996 len2 = VARSIZE(arg2) - VARHDRSZ;
1998 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1999 if ((cmp == 0) && (len1 != len2))
2000 cmp = (len1 < len2) ? -1 : 1;
2002 PG_FREE_IF_COPY(arg1, 0);
2003 PG_FREE_IF_COPY(arg2, 1);
2005 PG_RETURN_INT32(cmp);
2009 * appendStringInfoText
2011 * Append a text to str.
2012 * Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
2015 appendStringInfoText(StringInfo str, const text *t)
2017 appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
2022 * replace all occurrences of 'old_sub_str' in 'orig_str'
2023 * with 'new_sub_str' to form 'new_str'
2025 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2026 * otherwise returns 'new_str'
2029 replace_text(PG_FUNCTION_ARGS)
2031 text *src_text = PG_GETARG_TEXT_P(0);
2032 text *from_sub_text = PG_GETARG_TEXT_P(1);
2033 text *to_sub_text = PG_GETARG_TEXT_P(2);
2034 int src_text_len = TEXTLEN(src_text);
2035 int from_sub_text_len = TEXTLEN(from_sub_text);
2043 if (src_text_len == 0 || from_sub_text_len == 0)
2044 PG_RETURN_TEXT_P(src_text);
2046 curr_posn = TEXTPOS(src_text, from_sub_text);
2048 /* When the from_sub_text is not found, there is nothing to do. */
2050 PG_RETURN_TEXT_P(src_text);
2052 initStringInfo(&str);
2053 buf_text = src_text;
2055 while (curr_posn > 0)
2057 left_text = text_substring(PointerGetDatum(buf_text),
2058 1, curr_posn - 1, false);
2059 right_text = text_substring(PointerGetDatum(buf_text),
2060 curr_posn + from_sub_text_len, -1, true);
2062 appendStringInfoText(&str, left_text);
2063 appendStringInfoText(&str, to_sub_text);
2065 if (buf_text != src_text)
2068 buf_text = right_text;
2069 curr_posn = TEXTPOS(buf_text, from_sub_text);
2072 appendStringInfoText(&str, buf_text);
2073 if (buf_text != src_text)
2076 ret_text = PG_STR_GET_TEXT(str.data);
2079 PG_RETURN_TEXT_P(ret_text);
2083 * check_replace_text_has_escape_char
2085 * check whether replace_text contains escape char.
2088 check_replace_text_has_escape_char(const text *replace_text)
2090 const char *p = VARDATA(replace_text);
2091 const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2093 if (pg_database_encoding_max_length() == 1)
2095 for (; p < p_end; p++)
2103 for (; p < p_end; p += pg_mblen(p))
2114 * appendStringInfoRegexpSubstr
2116 * Append replace_text to str, substituting regexp back references for
2120 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2121 regmatch_t *pmatch, text *src_text)
2123 const char *p = VARDATA(replace_text);
2124 const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2125 int eml = pg_database_encoding_max_length();
2129 const char *chunk_start = p;
2133 /* Find next escape char. */
2136 for (; p < p_end && *p != '\\'; p++)
2141 for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2145 /* Copy the text we just scanned over, if any. */
2146 if (p > chunk_start)
2147 appendBinaryStringInfo(str, chunk_start, p - chunk_start);
2149 /* Done if at end of string, else advance over escape char. */
2156 /* Escape at very end of input. Treat same as unexpected char */
2157 appendStringInfoChar(str, '\\');
2161 if (*p >= '1' && *p <= '9')
2163 /* Use the back reference of regexp. */
2166 so = pmatch[idx].rm_so;
2167 eo = pmatch[idx].rm_eo;
2172 /* Use the entire matched string. */
2173 so = pmatch[0].rm_so;
2174 eo = pmatch[0].rm_eo;
2177 else if (*p == '\\')
2179 /* \\ means transfer one \ to output. */
2180 appendStringInfoChar(str, '\\');
2187 * If escape char is not followed by any expected char, just treat
2188 * it as ordinary data to copy. (XXX would it be better to throw
2191 appendStringInfoChar(str, '\\');
2195 if (so != -1 && eo != -1)
2198 * Copy the text that is back reference of regexp. Because so and
2199 * eo are counted in characters not bytes, it's easiest to use
2200 * text_substring to pull out the correct chunk of text.
2204 append_text = text_substring(PointerGetDatum(src_text),
2205 so + 1, (eo - so), false);
2206 appendStringInfoText(str, append_text);
2212 #define REGEXP_REPLACE_BACKREF_CNT 10
2215 * replace_text_regexp
2217 * replace text that matches to regexp in src_text to replace_text.
2219 * Note: to avoid having to include regex.h in builtins.h, we declare
2220 * the regexp argument as void *, but really it's regex_t *.
2223 replace_text_regexp(text *src_text, void *regexp,
2224 text *replace_text, bool glob)
2227 regex_t *re = (regex_t *) regexp;
2228 int src_text_len = VARSIZE(src_text) - VARHDRSZ;
2230 regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
2237 initStringInfo(&buf);
2239 /* Convert data string to wide characters. */
2240 data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2241 data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
2243 /* Check whether replace_text has escape char. */
2244 have_escape = check_replace_text_has_escape_char(replace_text);
2246 for (search_start = data_pos = 0; search_start <= data_len;)
2250 regexec_result = pg_regexec(re,
2254 NULL, /* no details */
2255 REGEXP_REPLACE_BACKREF_CNT,
2259 if (regexec_result == REG_NOMATCH)
2262 if (regexec_result != REG_OKAY)
2266 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2268 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2269 errmsg("regular expression failed: %s", errMsg)));
2273 * Copy the text to the left of the match position. Because we are
2274 * working with character not byte indexes, it's easiest to use
2275 * text_substring to pull out the needed data.
2277 if (pmatch[0].rm_so - data_pos > 0)
2281 left_text = text_substring(PointerGetDatum(src_text),
2283 pmatch[0].rm_so - data_pos,
2285 appendStringInfoText(&buf, left_text);
2290 * Copy the replace_text. Process back references when the
2291 * replace_text has escape characters.
2294 appendStringInfoRegexpSubstr(&buf, replace_text, pmatch, src_text);
2296 appendStringInfoText(&buf, replace_text);
2298 search_start = data_pos = pmatch[0].rm_eo;
2301 * When global option is off, replace the first instance only.
2307 * Search from next character when the matching text is zero width.
2309 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2314 * Copy the text to the right of the last match.
2316 if (data_pos < data_len)
2320 right_text = text_substring(PointerGetDatum(src_text),
2321 data_pos + 1, -1, true);
2322 appendStringInfoText(&buf, right_text);
2326 ret_text = PG_STR_GET_TEXT(buf.data);
2335 * parse input string
2336 * return ord item (1 based)
2337 * based on provided field separator
2340 split_text(PG_FUNCTION_ARGS)
2342 text *inputstring = PG_GETARG_TEXT_P(0);
2343 text *fldsep = PG_GETARG_TEXT_P(1);
2344 int fldnum = PG_GETARG_INT32(2);
2345 int inputstring_len = TEXTLEN(inputstring);
2346 int fldsep_len = TEXTLEN(fldsep);
2351 /* field number is 1 based */
2354 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2355 errmsg("field position must be greater than zero")));
2357 /* return empty string for empty input string */
2358 if (inputstring_len < 1)
2359 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2361 /* empty field separator */
2364 /* if first field, return input string, else empty string */
2366 PG_RETURN_TEXT_P(inputstring);
2368 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2371 start_posn = text_position(inputstring, fldsep, fldnum - 1);
2372 end_posn = text_position(inputstring, fldsep, fldnum);
2374 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2376 /* if first field, return input string, else empty string */
2378 PG_RETURN_TEXT_P(inputstring);
2380 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2382 else if (start_posn == 0)
2384 /* first field requested */
2385 result_text = LEFT(inputstring, fldsep);
2386 PG_RETURN_TEXT_P(result_text);
2388 else if (end_posn == 0)
2390 /* last field requested */
2391 result_text = text_substring(PointerGetDatum(inputstring),
2392 start_posn + fldsep_len,
2394 PG_RETURN_TEXT_P(result_text);
2398 /* interior field requested */
2399 result_text = text_substring(PointerGetDatum(inputstring),
2400 start_posn + fldsep_len,
2401 end_posn - start_posn - fldsep_len,
2403 PG_RETURN_TEXT_P(result_text);
2409 * parse input string
2410 * return text array of elements
2411 * based on provided field separator
2414 text_to_array(PG_FUNCTION_ARGS)
2416 text *inputstring = PG_GETARG_TEXT_P(0);
2417 text *fldsep = PG_GETARG_TEXT_P(1);
2418 int inputstring_len = TEXTLEN(inputstring);
2419 int fldsep_len = TEXTLEN(fldsep);
2424 ArrayBuildState *astate = NULL;
2426 /* return NULL for empty input string */
2427 if (inputstring_len < 1)
2431 * empty field separator return one element, 1D, array using the input
2435 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2436 CStringGetDatum(inputstring), 1));
2438 /* start with end position holding the initial start position */
2440 for (fldnum = 1;; fldnum++) /* field number is 1 based */
2443 bool disnull = false;
2445 start_posn = end_posn;
2446 end_posn = text_position(inputstring, fldsep, fldnum);
2448 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2453 * first element return one element, 1D, array using the input
2456 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2457 CStringGetDatum(inputstring), 1));
2461 /* otherwise create array and exit */
2462 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2463 CurrentMemoryContext));
2466 else if (start_posn == 0)
2468 /* first field requested */
2469 result_text = LEFT(inputstring, fldsep);
2471 else if (end_posn == 0)
2473 /* last field requested */
2474 result_text = text_substring(PointerGetDatum(inputstring),
2475 start_posn + fldsep_len,
2480 /* interior field requested */
2481 result_text = text_substring(PointerGetDatum(inputstring),
2482 start_posn + fldsep_len,
2483 end_posn - start_posn - fldsep_len,
2487 /* stash away current value */
2488 dvalue = PointerGetDatum(result_text);
2489 astate = accumArrayResult(astate, dvalue,
2491 CurrentMemoryContext);
2494 /* never reached -- keep compiler quiet */
2500 * concatenate Cstring representation of input array elements
2501 * using provided field separator
2504 array_to_text(PG_FUNCTION_ARGS)
2506 ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
2507 char *fldsep = PG_TEXTARG_GET_STR(1);
2516 bool printed = false;
2521 ArrayMetaState *my_extra;
2523 ndims = ARR_NDIM(v);
2525 nitems = ArrayGetNItems(ndims, dims);
2527 /* if there are no elements, return an empty string */
2529 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2531 element_type = ARR_ELEMTYPE(v);
2532 initStringInfo(&buf);
2535 * We arrange to look up info about element type, including its output
2536 * conversion proc, only once per series of calls, assuming the element
2537 * type doesn't change underneath us.
2539 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2540 if (my_extra == NULL)
2542 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2543 sizeof(ArrayMetaState));
2544 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2545 my_extra->element_type = ~element_type;
2548 if (my_extra->element_type != element_type)
2551 * Get info about element type, including its output conversion proc
2553 get_type_io_data(element_type, IOFunc_output,
2554 &my_extra->typlen, &my_extra->typbyval,
2555 &my_extra->typalign, &my_extra->typdelim,
2556 &my_extra->typioparam, &my_extra->typiofunc);
2557 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2558 fcinfo->flinfo->fn_mcxt);
2559 my_extra->element_type = element_type;
2561 typlen = my_extra->typlen;
2562 typbyval = my_extra->typbyval;
2563 typalign = my_extra->typalign;
2565 p = ARR_DATA_PTR(v);
2566 bitmap = ARR_NULLBITMAP(v);
2569 for (i = 0; i < nitems; i++)
2574 /* Get source element, checking for NULL */
2575 if (bitmap && (*bitmap & bitmask) == 0)
2577 /* we ignore nulls */
2581 itemvalue = fetch_att(p, typbyval, typlen);
2583 value = DatumGetCString(FunctionCall1(&my_extra->proc,
2587 appendStringInfo(&buf, "%s%s", fldsep, value);
2589 appendStringInfoString(&buf, value);
2592 p = att_addlength(p, typlen, PointerGetDatum(p));
2593 p = (char *) att_align(p, typalign);
2596 /* advance bitmap pointer if any */
2600 if (bitmask == 0x100)
2608 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(buf.data));
2613 * Convert a int32 to a string containing a base 16 (hex) representation of
2617 to_hex32(PG_FUNCTION_ARGS)
2619 uint32 value = (uint32) PG_GETARG_INT32(0);
2622 const char *digits = "0123456789abcdef";
2623 char buf[32]; /* bigger than needed, but reasonable */
2625 ptr = buf + sizeof(buf) - 1;
2630 *--ptr = digits[value % HEXBASE];
2632 } while (ptr > buf && value);
2634 result_text = PG_STR_GET_TEXT(ptr);
2635 PG_RETURN_TEXT_P(result_text);
2639 * Convert a int64 to a string containing a base 16 (hex) representation of
2643 to_hex64(PG_FUNCTION_ARGS)
2645 uint64 value = (uint64) PG_GETARG_INT64(0);
2648 const char *digits = "0123456789abcdef";
2649 char buf[32]; /* bigger than needed, but reasonable */
2651 ptr = buf + sizeof(buf) - 1;
2656 *--ptr = digits[value % HEXBASE];
2658 } while (ptr > buf && value);
2660 result_text = PG_STR_GET_TEXT(ptr);
2661 PG_RETURN_TEXT_P(result_text);
2665 * Create an md5 hash of a text string and return it as hex
2667 * md5 produces a 16 byte (128 bit) hash; double it for hex
2669 #define MD5_HASH_LEN 32
2672 md5_text(PG_FUNCTION_ARGS)
2674 text *in_text = PG_GETARG_TEXT_P(0);
2676 char hexsum[MD5_HASH_LEN + 1];
2679 /* Calculate the length of the buffer using varlena metadata */
2680 len = VARSIZE(in_text) - VARHDRSZ;
2682 /* get the hash result */
2683 if (pg_md5_hash(VARDATA(in_text), len, hexsum) == false)
2685 (errcode(ERRCODE_OUT_OF_MEMORY),
2686 errmsg("out of memory")));
2688 /* convert to text and return it */
2689 result_text = PG_STR_GET_TEXT(hexsum);
2690 PG_RETURN_TEXT_P(result_text);
2694 * Create an md5 hash of a bytea field and return it as a hex string:
2695 * 16-byte md5 digest is represented in 32 hex characters.
2698 md5_bytea(PG_FUNCTION_ARGS)
2700 bytea *in = PG_GETARG_BYTEA_P(0);
2702 char hexsum[MD5_HASH_LEN + 1];
2705 len = VARSIZE(in) - VARHDRSZ;
2706 if (pg_md5_hash(VARDATA(in), len, hexsum) == false)
2708 (errcode(ERRCODE_OUT_OF_MEMORY),
2709 errmsg("out of memory")));
2711 result_text = PG_STR_GET_TEXT(hexsum);
2712 PG_RETURN_TEXT_P(result_text);
2716 * Return the size of a datum, possibly compressed
2718 * Works on any data type
2721 pg_column_size(PG_FUNCTION_ARGS)
2723 Datum value = PG_GETARG_DATUM(0);
2727 /* On first call, get the input type's typlen, and save at *fn_extra */
2728 if (fcinfo->flinfo->fn_extra == NULL)
2730 /* Lookup the datatype of the supplied argument */
2731 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
2733 typlen = get_typlen(argtypeid);
2734 if (typlen == 0) /* should not happen */
2735 elog(ERROR, "cache lookup failed for type %u", argtypeid);
2737 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2739 *((int *) fcinfo->flinfo->fn_extra) = typlen;
2742 typlen = *((int *) fcinfo->flinfo->fn_extra);
2746 /* varlena type, possibly toasted */
2747 result = toast_datum_size(value);
2749 else if (typlen == -2)
2752 result = strlen(DatumGetCString(value)) + 1;
2756 /* ordinary fixed-width type */
2760 PG_RETURN_INT32(result);