1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.138 2005/10/18 20:38:58 tgl Exp $
13 *-------------------------------------------------------------------------
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_type.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/crypt.h"
23 #include "libpq/pqformat.h"
24 #include "mb/pg_wchar.h"
25 #include "miscadmin.h"
26 #include "parser/scansup.h"
27 #include "regex/regex.h"
28 #include "utils/array.h"
29 #include "utils/builtins.h"
30 #include "utils/lsyscache.h"
31 #include "utils/pg_locale.h"
34 typedef struct varlena unknown;
36 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
37 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
38 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
39 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
40 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
42 #define PG_TEXTARG_GET_STR(arg_) \
43 DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
44 #define PG_TEXT_GET_STR(textp_) \
45 DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
46 #define PG_STR_GET_TEXT(str_) \
47 DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
48 #define TEXTLEN(textp) \
49 text_length(PointerGetDatum(textp))
50 #define TEXTPOS(buf_text, from_sub_text) \
51 text_position(buf_text, from_sub_text, 1)
52 #define LEFT(buf_text, from_sub_text) \
53 text_substring(PointerGetDatum(buf_text), \
55 TEXTPOS(buf_text, from_sub_text) - 1, false)
57 static int text_cmp(text *arg1, text *arg2);
58 static int32 text_length(Datum str);
59 static int32 text_position(text *t1, text *t2, int matchnum);
60 static text *text_substring(Datum str,
63 bool length_not_specified);
65 static void appendStringInfoText(StringInfo str, const text *t);
68 /*****************************************************************************
70 *****************************************************************************/
73 #define VAL(CH) ((CH) - '0')
74 #define DIG(VAL) ((VAL) + '0')
77 * byteain - converts from printable representation of byte array
79 * Non-printable characters must be passed as '\nnn' (octal) and are
80 * converted to internal form. '\' must be passed as '\\'.
81 * ereport(ERROR, ...) if bad form.
84 * The input is scaned twice.
85 * The error checking of input is minimal.
88 byteain(PG_FUNCTION_ARGS)
90 char *inputText = PG_GETARG_CSTRING(0);
96 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
100 else if ((tp[0] == '\\') &&
101 (tp[1] >= '0' && tp[1] <= '3') &&
102 (tp[2] >= '0' && tp[2] <= '7') &&
103 (tp[3] >= '0' && tp[3] <= '7'))
105 else if ((tp[0] == '\\') &&
111 * one backslash, not followed by 0 or ### valid octal
114 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
115 errmsg("invalid input syntax for type bytea")));
120 result = (bytea *) palloc(byte);
121 VARATT_SIZEP(result) = byte; /* set varlena length */
124 rp = VARDATA(result);
129 else if ((tp[0] == '\\') &&
130 (tp[1] >= '0' && tp[1] <= '3') &&
131 (tp[2] >= '0' && tp[2] <= '7') &&
132 (tp[3] >= '0' && tp[3] <= '7'))
138 *rp++ = byte + VAL(tp[3]);
141 else if ((tp[0] == '\\') &&
150 * We should never get here. The first pass should not allow it.
153 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
154 errmsg("invalid input syntax for type bytea")));
158 PG_RETURN_BYTEA_P(result);
162 * byteaout - converts to printable representation of byte array
164 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
167 * NULL vlena should be an error--returning string with NULL for now.
170 byteaout(PG_FUNCTION_ARGS)
172 bytea *vlena = PG_GETARG_BYTEA_P(0);
176 int val; /* holds unprintable chars */
180 len = 1; /* empty string has 1 char */
182 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
186 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
191 rp = result = (char *) palloc(len);
193 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
200 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
204 rp[3] = DIG(val & 07);
206 rp[2] = DIG(val & 07);
208 rp[1] = DIG(val & 03);
215 PG_RETURN_CSTRING(result);
219 * bytearecv - converts external binary format to bytea
222 bytearecv(PG_FUNCTION_ARGS)
224 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
228 nbytes = buf->len - buf->cursor;
229 result = (bytea *) palloc(nbytes + VARHDRSZ);
230 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
231 pq_copymsgbytes(buf, VARDATA(result), nbytes);
232 PG_RETURN_BYTEA_P(result);
236 * byteasend - converts bytea to binary format
238 * This is a special case: just copy the input...
241 byteasend(PG_FUNCTION_ARGS)
243 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
245 PG_RETURN_BYTEA_P(vlena);
250 * textin - converts "..." to internal representation
253 textin(PG_FUNCTION_ARGS)
255 char *inputText = PG_GETARG_CSTRING(0);
259 /* verify encoding */
260 len = strlen(inputText);
261 pg_verifymbstr(inputText, len, false);
263 result = (text *) palloc(len + VARHDRSZ);
264 VARATT_SIZEP(result) = len + VARHDRSZ;
266 memcpy(VARDATA(result), inputText, len);
268 PG_RETURN_TEXT_P(result);
272 * textout - converts internal representation to "..."
275 textout(PG_FUNCTION_ARGS)
277 text *t = PG_GETARG_TEXT_P(0);
281 len = VARSIZE(t) - VARHDRSZ;
282 result = (char *) palloc(len + 1);
283 memcpy(result, VARDATA(t), len);
286 PG_RETURN_CSTRING(result);
290 * textrecv - converts external binary format to text
293 textrecv(PG_FUNCTION_ARGS)
295 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
300 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
302 /* verify encoding */
303 pg_verifymbstr(str, nbytes, false);
305 result = (text *) palloc(nbytes + VARHDRSZ);
306 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
307 memcpy(VARDATA(result), str, nbytes);
309 PG_RETURN_TEXT_P(result);
313 * textsend - converts text to binary format
316 textsend(PG_FUNCTION_ARGS)
318 text *t = PG_GETARG_TEXT_P(0);
321 pq_begintypsend(&buf);
322 pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
323 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
328 * unknownin - converts "..." to internal representation
331 unknownin(PG_FUNCTION_ARGS)
333 char *str = PG_GETARG_CSTRING(0);
335 /* representation is same as cstring */
336 PG_RETURN_CSTRING(pstrdup(str));
340 * unknownout - converts internal representation to "..."
343 unknownout(PG_FUNCTION_ARGS)
345 /* representation is same as cstring */
346 char *str = PG_GETARG_CSTRING(0);
348 PG_RETURN_CSTRING(pstrdup(str));
352 * unknownrecv - converts external binary format to unknown
355 unknownrecv(PG_FUNCTION_ARGS)
357 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
361 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
362 /* representation is same as cstring */
363 PG_RETURN_CSTRING(str);
367 * unknownsend - converts unknown to binary format
370 unknownsend(PG_FUNCTION_ARGS)
372 /* representation is same as cstring */
373 char *str = PG_GETARG_CSTRING(0);
376 pq_begintypsend(&buf);
377 pq_sendtext(&buf, str, strlen(str));
378 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
382 /* ========== PUBLIC ROUTINES ========== */
386 * returns the logical length of a text*
387 * (which is less than the VARSIZE of the text*)
390 textlen(PG_FUNCTION_ARGS)
392 Datum str = PG_GETARG_DATUM(0);
394 /* try to avoid decompressing argument */
395 PG_RETURN_INT32(text_length(str));
400 * Does the real work for textlen()
402 * This is broken out so it can be called directly by other string processing
403 * functions. Note that the argument is passed as a Datum, to indicate that
404 * it may still be in compressed form. We can avoid decompressing it at all
408 text_length(Datum str)
410 /* fastpath when max encoding length is one */
411 if (pg_database_encoding_max_length() == 1)
412 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
415 text *t = DatumGetTextP(str);
417 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
418 VARSIZE(t) - VARHDRSZ));
424 * returns the physical length of a text*
425 * (which is less than the VARSIZE of the text*)
428 textoctetlen(PG_FUNCTION_ARGS)
430 Datum str = PG_GETARG_DATUM(0);
432 /* We need not detoast the input at all */
433 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
438 * takes two text* and returns a text* that is the concatenation of
441 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
442 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
443 * Allocate space for output in all cases.
444 * XXX - thomas 1997-07-10
447 textcat(PG_FUNCTION_ARGS)
449 text *t1 = PG_GETARG_TEXT_P(0);
450 text *t2 = PG_GETARG_TEXT_P(1);
457 len1 = VARSIZE(t1) - VARHDRSZ;
461 len2 = VARSIZE(t2) - VARHDRSZ;
465 len = len1 + len2 + VARHDRSZ;
466 result = (text *) palloc(len);
468 /* Set size of result string... */
469 VARATT_SIZEP(result) = len;
471 /* Fill data field of result string... */
472 ptr = VARDATA(result);
474 memcpy(ptr, VARDATA(t1), len1);
476 memcpy(ptr + len1, VARDATA(t2), len2);
478 PG_RETURN_TEXT_P(result);
483 * Return a substring starting at the specified position.
484 * - thomas 1997-12-31
488 * - starting position (is one-based)
491 * If the starting position is zero or less, then return from the start of the string
492 * adjusting the length to be consistent with the "negative start" per SQL92.
493 * If the length is less than zero, return the remaining string.
495 * Added multibyte support.
496 * - Tatsuo Ishii 1998-4-21
497 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
498 * Formerly returned the entire string; now returns a portion.
499 * - Thomas Lockhart 1998-12-10
500 * Now uses faster TOAST-slicing interface
501 * - John Gray 2002-02-22
502 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
503 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
504 * error; if E < 1, return '', not entire string). Fixed MB related bug when
505 * S > LC and < LC + 4 sometimes garbage characters are returned.
506 * - Joe Conway 2002-08-10
509 text_substr(PG_FUNCTION_ARGS)
511 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
518 * text_substr_no_len -
519 * Wrapper to avoid opr_sanity failure due to
520 * one function accepting a different number of args.
523 text_substr_no_len(PG_FUNCTION_ARGS)
525 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
532 * Does the real work for text_substr() and text_substr_no_len()
534 * This is broken out so it can be called directly by other string processing
535 * functions. Note that the argument is passed as a Datum, to indicate that
536 * it may still be in compressed/toasted form. We can avoid detoasting all
537 * of it in some cases.
540 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
542 int32 eml = pg_database_encoding_max_length();
543 int32 S = start; /* start position */
544 int32 S1; /* adjusted start position */
545 int32 L1; /* adjusted substring length */
547 /* life is easy if the encoding max length is 1 */
552 if (length_not_specified) /* special case - get length to end of
561 * A negative value for L is the only way for the end position to
562 * be before the start. SQL99 says to throw an error.
566 (errcode(ERRCODE_SUBSTRING_ERROR),
567 errmsg("negative substring length not allowed")));
570 * A zero or negative value for the end position can happen if the
571 * start was negative or one. SQL99 says to return a zero-length
575 return PG_STR_GET_TEXT("");
581 * If the start position is past the end of the string, SQL99 says to
582 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
583 * that for us. Convert to zero-based starting position
585 return DatumGetTextPSlice(str, S1 - 1, L1);
590 * When encoding max length is > 1, we can't get LC without
591 * detoasting, so we'll grab a conservatively large slice now and go
592 * back later to do the right thing
605 * if S is past the end of the string, the tuple toaster will return a
606 * zero-length string to us
611 * We need to start at position zero because there is no way to know
612 * in advance which byte offset corresponds to the supplied start
617 if (length_not_specified) /* special case - get length to end of
619 slice_size = L1 = -1;
625 * A negative value for L is the only way for the end position to
626 * be before the start. SQL99 says to throw an error.
630 (errcode(ERRCODE_SUBSTRING_ERROR),
631 errmsg("negative substring length not allowed")));
634 * A zero or negative value for the end position can happen if the
635 * start was negative or one. SQL99 says to return a zero-length
639 return PG_STR_GET_TEXT("");
642 * if E is past the end of the string, the tuple toaster will
643 * truncate the length for us
648 * Total slice size in bytes can't be any longer than the start
649 * position plus substring length times the encoding max length.
651 slice_size = (S1 + L1) * eml;
653 slice = DatumGetTextPSlice(str, slice_start, slice_size);
655 /* see if we got back an empty string */
656 if ((VARSIZE(slice) - VARHDRSZ) == 0)
657 return PG_STR_GET_TEXT("");
659 /* Now we can get the actual length of the slice in MB characters */
660 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
663 * Check that the start position wasn't > slice_strlen. If so, SQL99
664 * says to return a zero-length string.
666 if (S1 > slice_strlen)
667 return PG_STR_GET_TEXT("");
670 * Adjust L1 and E1 now that we know the slice string length. Again
671 * remember that S1 is one based, and slice_start is zero based.
674 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
676 E1 = slice_start + 1 + slice_strlen;
679 * Find the start position in the slice; remember S1 is not zero based
682 for (i = 0; i < S1 - 1; i++)
685 /* hang onto a pointer to our start position */
689 * Count the actual bytes used by the substring of the requested
692 for (i = S1; i < E1; i++)
695 ret = (text *) palloc(VARHDRSZ + (p - s));
696 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
697 memcpy(VARDATA(ret), s, (p - s));
702 elog(ERROR, "invalid backend encoding: encoding max length < 1");
704 /* not reached: suppress compiler warning */
710 * Return the position of the specified substring.
711 * Implements the SQL92 POSITION() function.
712 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
713 * - thomas 1997-07-27
716 textpos(PG_FUNCTION_ARGS)
718 text *str = PG_GETARG_TEXT_P(0);
719 text *search_str = PG_GETARG_TEXT_P(1);
721 PG_RETURN_INT32(text_position(str, search_str, 1));
726 * Does the real work for textpos()
729 * t1 - string to be searched
730 * t2 - pattern to match within t1
731 * matchnum - number of the match to be found (1 is the first match)
733 * Character index of the first matched char, starting from 1,
736 * This is broken out so it can be called directly by other string processing
740 text_position(text *t1, text *t2, int matchnum)
750 return 0; /* result for 0th match */
752 if (VARSIZE(t2) <= VARHDRSZ)
753 return 1; /* result for empty pattern */
755 len1 = VARSIZE(t1) - VARHDRSZ;
756 len2 = VARSIZE(t2) - VARHDRSZ;
758 if (pg_database_encoding_max_length() == 1)
760 /* simple case - single byte encoding */
767 /* no use in searching str past point where search_str will fit */
770 for (p = 0; p <= px; p++)
772 if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
774 if (++match == matchnum)
785 /* not as simple - multibyte encoding */
791 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
792 (void) pg_mb2wchar_with_len(VARDATA(t1), p1, len1);
793 len1 = pg_wchar_strlen(p1);
794 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
795 (void) pg_mb2wchar_with_len(VARDATA(t2), p2, len2);
796 len2 = pg_wchar_strlen(p2);
798 /* no use in searching str past point where search_str will fit */
801 for (p = 0; p <= px; p++)
803 if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
805 if (++match == matchnum)
822 * Comparison function for text strings with given lengths.
823 * Includes locale support, but must copy strings to temporary memory
824 * to allow null-termination for inputs to strcoll().
828 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
833 * Unfortunately, there is no strncoll(), so in the non-C locale case we
834 * have to do some memory copying. This turns out to be significantly
835 * slower, so we optimize the case where LC_COLLATE is C. We also try to
836 * optimize relatively-short strings by avoiding palloc/pfree overhead.
838 if (lc_collate_is_c())
840 result = strncmp(arg1, arg2, Min(len1, len2));
841 if ((result == 0) && (len1 != len2))
842 result = (len1 < len2) ? -1 : 1;
846 #define STACKBUFLEN 1024
848 char a1buf[STACKBUFLEN];
849 char a2buf[STACKBUFLEN];
854 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
855 if (GetDatabaseEncoding() == PG_UTF8)
861 if (len1 >= STACKBUFLEN / 2)
863 a1len = len1 * 2 + 2;
871 if (len2 >= STACKBUFLEN / 2)
873 a2len = len2 * 2 + 2;
882 /* stupid Microsloth API does not work for zero-length input */
887 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
888 (LPWSTR) a1p, a1len / 2);
891 (errmsg("could not convert string to UTF16: %lu",
894 ((LPWSTR) a1p)[r] = 0;
900 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
901 (LPWSTR) a2p, a2len / 2);
904 (errmsg("could not convert string to UTF16: %lu",
907 ((LPWSTR) a2p)[r] = 0;
910 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
911 if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
914 (errmsg("could not compare unicode strings: %d",
926 if (len1 >= STACKBUFLEN)
927 a1p = (char *) palloc(len1 + 1);
930 if (len2 >= STACKBUFLEN)
931 a2p = (char *) palloc(len2 + 1);
935 memcpy(a1p, arg1, len1);
937 memcpy(a2p, arg2, len2);
940 result = strcoll(a1p, a2p);
953 * Internal comparison function for text strings.
957 text_cmp(text *arg1, text *arg2)
967 len1 = VARSIZE(arg1) - VARHDRSZ;
968 len2 = VARSIZE(arg2) - VARHDRSZ;
970 return varstr_cmp(a1p, len1, a2p, len2);
974 * Comparison functions for text strings.
976 * Note: btree indexes need these routines not to leak memory; therefore,
977 * be careful to free working copies of toasted datums. Most places don't
978 * need to be so careful.
982 texteq(PG_FUNCTION_ARGS)
984 text *arg1 = PG_GETARG_TEXT_P(0);
985 text *arg2 = PG_GETARG_TEXT_P(1);
988 /* fast path for different-length inputs */
989 if (VARSIZE(arg1) != VARSIZE(arg2))
992 result = (text_cmp(arg1, arg2) == 0);
994 PG_FREE_IF_COPY(arg1, 0);
995 PG_FREE_IF_COPY(arg2, 1);
997 PG_RETURN_BOOL(result);
1001 textne(PG_FUNCTION_ARGS)
1003 text *arg1 = PG_GETARG_TEXT_P(0);
1004 text *arg2 = PG_GETARG_TEXT_P(1);
1007 /* fast path for different-length inputs */
1008 if (VARSIZE(arg1) != VARSIZE(arg2))
1011 result = (text_cmp(arg1, arg2) != 0);
1013 PG_FREE_IF_COPY(arg1, 0);
1014 PG_FREE_IF_COPY(arg2, 1);
1016 PG_RETURN_BOOL(result);
1020 text_lt(PG_FUNCTION_ARGS)
1022 text *arg1 = PG_GETARG_TEXT_P(0);
1023 text *arg2 = PG_GETARG_TEXT_P(1);
1026 result = (text_cmp(arg1, arg2) < 0);
1028 PG_FREE_IF_COPY(arg1, 0);
1029 PG_FREE_IF_COPY(arg2, 1);
1031 PG_RETURN_BOOL(result);
1035 text_le(PG_FUNCTION_ARGS)
1037 text *arg1 = PG_GETARG_TEXT_P(0);
1038 text *arg2 = PG_GETARG_TEXT_P(1);
1041 result = (text_cmp(arg1, arg2) <= 0);
1043 PG_FREE_IF_COPY(arg1, 0);
1044 PG_FREE_IF_COPY(arg2, 1);
1046 PG_RETURN_BOOL(result);
1050 text_gt(PG_FUNCTION_ARGS)
1052 text *arg1 = PG_GETARG_TEXT_P(0);
1053 text *arg2 = PG_GETARG_TEXT_P(1);
1056 result = (text_cmp(arg1, arg2) > 0);
1058 PG_FREE_IF_COPY(arg1, 0);
1059 PG_FREE_IF_COPY(arg2, 1);
1061 PG_RETURN_BOOL(result);
1065 text_ge(PG_FUNCTION_ARGS)
1067 text *arg1 = PG_GETARG_TEXT_P(0);
1068 text *arg2 = PG_GETARG_TEXT_P(1);
1071 result = (text_cmp(arg1, arg2) >= 0);
1073 PG_FREE_IF_COPY(arg1, 0);
1074 PG_FREE_IF_COPY(arg2, 1);
1076 PG_RETURN_BOOL(result);
1080 bttextcmp(PG_FUNCTION_ARGS)
1082 text *arg1 = PG_GETARG_TEXT_P(0);
1083 text *arg2 = PG_GETARG_TEXT_P(1);
1086 result = text_cmp(arg1, arg2);
1088 PG_FREE_IF_COPY(arg1, 0);
1089 PG_FREE_IF_COPY(arg2, 1);
1091 PG_RETURN_INT32(result);
1096 text_larger(PG_FUNCTION_ARGS)
1098 text *arg1 = PG_GETARG_TEXT_P(0);
1099 text *arg2 = PG_GETARG_TEXT_P(1);
1102 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1104 PG_RETURN_TEXT_P(result);
1108 text_smaller(PG_FUNCTION_ARGS)
1110 text *arg1 = PG_GETARG_TEXT_P(0);
1111 text *arg2 = PG_GETARG_TEXT_P(1);
1114 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1116 PG_RETURN_TEXT_P(result);
1121 * The following operators support character-by-character comparison
1122 * of text data types, to allow building indexes suitable for LIKE
1127 internal_text_pattern_compare(text *arg1, text *arg2)
1131 result = memcmp(VARDATA(arg1), VARDATA(arg2),
1132 Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1135 else if (VARSIZE(arg1) < VARSIZE(arg2))
1137 else if (VARSIZE(arg1) > VARSIZE(arg2))
1145 text_pattern_lt(PG_FUNCTION_ARGS)
1147 text *arg1 = PG_GETARG_TEXT_P(0);
1148 text *arg2 = PG_GETARG_TEXT_P(1);
1151 result = internal_text_pattern_compare(arg1, arg2);
1153 PG_FREE_IF_COPY(arg1, 0);
1154 PG_FREE_IF_COPY(arg2, 1);
1156 PG_RETURN_BOOL(result < 0);
1161 text_pattern_le(PG_FUNCTION_ARGS)
1163 text *arg1 = PG_GETARG_TEXT_P(0);
1164 text *arg2 = PG_GETARG_TEXT_P(1);
1167 result = internal_text_pattern_compare(arg1, arg2);
1169 PG_FREE_IF_COPY(arg1, 0);
1170 PG_FREE_IF_COPY(arg2, 1);
1172 PG_RETURN_BOOL(result <= 0);
1177 text_pattern_eq(PG_FUNCTION_ARGS)
1179 text *arg1 = PG_GETARG_TEXT_P(0);
1180 text *arg2 = PG_GETARG_TEXT_P(1);
1183 if (VARSIZE(arg1) != VARSIZE(arg2))
1186 result = internal_text_pattern_compare(arg1, arg2);
1188 PG_FREE_IF_COPY(arg1, 0);
1189 PG_FREE_IF_COPY(arg2, 1);
1191 PG_RETURN_BOOL(result == 0);
1196 text_pattern_ge(PG_FUNCTION_ARGS)
1198 text *arg1 = PG_GETARG_TEXT_P(0);
1199 text *arg2 = PG_GETARG_TEXT_P(1);
1202 result = internal_text_pattern_compare(arg1, arg2);
1204 PG_FREE_IF_COPY(arg1, 0);
1205 PG_FREE_IF_COPY(arg2, 1);
1207 PG_RETURN_BOOL(result >= 0);
1212 text_pattern_gt(PG_FUNCTION_ARGS)
1214 text *arg1 = PG_GETARG_TEXT_P(0);
1215 text *arg2 = PG_GETARG_TEXT_P(1);
1218 result = internal_text_pattern_compare(arg1, arg2);
1220 PG_FREE_IF_COPY(arg1, 0);
1221 PG_FREE_IF_COPY(arg2, 1);
1223 PG_RETURN_BOOL(result > 0);
1228 text_pattern_ne(PG_FUNCTION_ARGS)
1230 text *arg1 = PG_GETARG_TEXT_P(0);
1231 text *arg2 = PG_GETARG_TEXT_P(1);
1234 if (VARSIZE(arg1) != VARSIZE(arg2))
1237 result = internal_text_pattern_compare(arg1, arg2);
1239 PG_FREE_IF_COPY(arg1, 0);
1240 PG_FREE_IF_COPY(arg2, 1);
1242 PG_RETURN_BOOL(result != 0);
1247 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1249 text *arg1 = PG_GETARG_TEXT_P(0);
1250 text *arg2 = PG_GETARG_TEXT_P(1);
1253 result = internal_text_pattern_compare(arg1, arg2);
1255 PG_FREE_IF_COPY(arg1, 0);
1256 PG_FREE_IF_COPY(arg2, 1);
1258 PG_RETURN_INT32(result);
1262 /*-------------------------------------------------------------
1265 * get the number of bytes contained in an instance of type 'bytea'
1266 *-------------------------------------------------------------
1269 byteaoctetlen(PG_FUNCTION_ARGS)
1271 Datum str = PG_GETARG_DATUM(0);
1273 /* We need not detoast the input at all */
1274 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1279 * takes two bytea* and returns a bytea* that is the concatenation of
1282 * Cloned from textcat and modified as required.
1285 byteacat(PG_FUNCTION_ARGS)
1287 bytea *t1 = PG_GETARG_BYTEA_P(0);
1288 bytea *t2 = PG_GETARG_BYTEA_P(1);
1295 len1 = VARSIZE(t1) - VARHDRSZ;
1299 len2 = VARSIZE(t2) - VARHDRSZ;
1303 len = len1 + len2 + VARHDRSZ;
1304 result = (bytea *) palloc(len);
1306 /* Set size of result string... */
1307 VARATT_SIZEP(result) = len;
1309 /* Fill data field of result string... */
1310 ptr = VARDATA(result);
1312 memcpy(ptr, VARDATA(t1), len1);
1314 memcpy(ptr + len1, VARDATA(t2), len2);
1316 PG_RETURN_BYTEA_P(result);
1319 #define PG_STR_GET_BYTEA(str_) \
1320 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1323 * Return a substring starting at the specified position.
1324 * Cloned from text_substr and modified as required.
1328 * - starting position (is one-based)
1329 * - string length (optional)
1331 * If the starting position is zero or less, then return from the start of the string
1332 * adjusting the length to be consistent with the "negative start" per SQL92.
1333 * If the length is less than zero, an ERROR is thrown. If no third argument
1334 * (length) is provided, the length to the end of the string is assumed.
1337 bytea_substr(PG_FUNCTION_ARGS)
1339 int S = PG_GETARG_INT32(1); /* start position */
1340 int S1; /* adjusted start position */
1341 int L1; /* adjusted substring length */
1345 if (fcinfo->nargs == 2)
1348 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
1349 * the end of the string if we pass it a negative value for length.
1356 int E = S + PG_GETARG_INT32(2);
1359 * A negative value for L is the only way for the end position to be
1360 * before the start. SQL99 says to throw an error.
1364 (errcode(ERRCODE_SUBSTRING_ERROR),
1365 errmsg("negative substring length not allowed")));
1368 * A zero or negative value for the end position can happen if the
1369 * start was negative or one. SQL99 says to return a zero-length
1373 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1379 * If the start position is past the end of the string, SQL99 says to
1380 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
1381 * for us. Convert to zero-based starting position
1383 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1387 * bytea_substr_no_len -
1388 * Wrapper to avoid opr_sanity failure due to
1389 * one function accepting a different number of args.
1392 bytea_substr_no_len(PG_FUNCTION_ARGS)
1394 return bytea_substr(fcinfo);
1399 * Return the position of the specified substring.
1400 * Implements the SQL92 POSITION() function.
1401 * Cloned from textpos and modified as required.
1404 byteapos(PG_FUNCTION_ARGS)
1406 bytea *t1 = PG_GETARG_BYTEA_P(0);
1407 bytea *t2 = PG_GETARG_BYTEA_P(1);
1416 if (VARSIZE(t2) <= VARHDRSZ)
1417 PG_RETURN_INT32(1); /* result for empty pattern */
1419 len1 = VARSIZE(t1) - VARHDRSZ;
1420 len2 = VARSIZE(t2) - VARHDRSZ;
1427 for (p = 0; p <= px; p++)
1429 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1437 PG_RETURN_INT32(pos);
1440 /*-------------------------------------------------------------
1443 * this routine treats "bytea" as an array of bytes.
1444 * It returns the Nth byte (a number between 0 and 255).
1445 *-------------------------------------------------------------
1448 byteaGetByte(PG_FUNCTION_ARGS)
1450 bytea *v = PG_GETARG_BYTEA_P(0);
1451 int32 n = PG_GETARG_INT32(1);
1455 len = VARSIZE(v) - VARHDRSZ;
1457 if (n < 0 || n >= len)
1459 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1460 errmsg("index %d out of valid range, 0..%d",
1463 byte = ((unsigned char *) VARDATA(v))[n];
1465 PG_RETURN_INT32(byte);
1468 /*-------------------------------------------------------------
1471 * This routine treats a "bytea" type like an array of bits.
1472 * It returns the value of the Nth bit (0 or 1).
1474 *-------------------------------------------------------------
1477 byteaGetBit(PG_FUNCTION_ARGS)
1479 bytea *v = PG_GETARG_BYTEA_P(0);
1480 int32 n = PG_GETARG_INT32(1);
1486 len = VARSIZE(v) - VARHDRSZ;
1488 if (n < 0 || n >= len * 8)
1490 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1491 errmsg("index %d out of valid range, 0..%d",
1497 byte = ((unsigned char *) VARDATA(v))[byteNo];
1499 if (byte & (1 << bitNo))
1505 /*-------------------------------------------------------------
1508 * Given an instance of type 'bytea' creates a new one with
1509 * the Nth byte set to the given value.
1511 *-------------------------------------------------------------
1514 byteaSetByte(PG_FUNCTION_ARGS)
1516 bytea *v = PG_GETARG_BYTEA_P(0);
1517 int32 n = PG_GETARG_INT32(1);
1518 int32 newByte = PG_GETARG_INT32(2);
1522 len = VARSIZE(v) - VARHDRSZ;
1524 if (n < 0 || n >= len)
1526 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1527 errmsg("index %d out of valid range, 0..%d",
1531 * Make a copy of the original varlena.
1533 res = (bytea *) palloc(VARSIZE(v));
1534 memcpy((char *) res, (char *) v, VARSIZE(v));
1539 ((unsigned char *) VARDATA(res))[n] = newByte;
1541 PG_RETURN_BYTEA_P(res);
1544 /*-------------------------------------------------------------
1547 * Given an instance of type 'bytea' creates a new one with
1548 * the Nth bit set to the given value.
1550 *-------------------------------------------------------------
1553 byteaSetBit(PG_FUNCTION_ARGS)
1555 bytea *v = PG_GETARG_BYTEA_P(0);
1556 int32 n = PG_GETARG_INT32(1);
1557 int32 newBit = PG_GETARG_INT32(2);
1565 len = VARSIZE(v) - VARHDRSZ;
1567 if (n < 0 || n >= len * 8)
1569 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1570 errmsg("index %d out of valid range, 0..%d",
1579 if (newBit != 0 && newBit != 1)
1581 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1582 errmsg("new bit must be 0 or 1")));
1585 * Make a copy of the original varlena.
1587 res = (bytea *) palloc(VARSIZE(v));
1588 memcpy((char *) res, (char *) v, VARSIZE(v));
1593 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1596 newByte = oldByte & (~(1 << bitNo));
1598 newByte = oldByte | (1 << bitNo);
1600 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1602 PG_RETURN_BYTEA_P(res);
1607 * Converts a text type to a Name type.
1610 text_name(PG_FUNCTION_ARGS)
1612 text *s = PG_GETARG_TEXT_P(0);
1616 len = VARSIZE(s) - VARHDRSZ;
1618 /* Truncate oversize input */
1619 if (len >= NAMEDATALEN)
1620 len = NAMEDATALEN - 1;
1623 printf("text- convert string length %d (%d) ->%d\n",
1624 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1627 result = (Name) palloc(NAMEDATALEN);
1628 memcpy(NameStr(*result), VARDATA(s), len);
1630 /* now null pad to full length... */
1631 while (len < NAMEDATALEN)
1633 *(NameStr(*result) + len) = '\0';
1637 PG_RETURN_NAME(result);
1641 * Converts a Name type to a text type.
1644 name_text(PG_FUNCTION_ARGS)
1646 Name s = PG_GETARG_NAME(0);
1650 len = strlen(NameStr(*s));
1653 printf("text- convert string length %d (%d) ->%d\n",
1654 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1657 result = palloc(VARHDRSZ + len);
1658 VARATT_SIZEP(result) = VARHDRSZ + len;
1659 memcpy(VARDATA(result), NameStr(*s), len);
1661 PG_RETURN_TEXT_P(result);
1666 * textToQualifiedNameList - convert a text object to list of names
1668 * This implements the input parsing needed by nextval() and other
1669 * functions that take a text parameter representing a qualified name.
1670 * We split the name at dots, downcase if not double-quoted, and
1671 * truncate names if they're too long.
1674 textToQualifiedNameList(text *textval)
1681 /* Convert to C string (handles possible detoasting). */
1682 /* Note we rely on being able to modify rawname below. */
1683 rawname = DatumGetCString(DirectFunctionCall1(textout,
1684 PointerGetDatum(textval)));
1686 if (!SplitIdentifierString(rawname, '.', &namelist))
1688 (errcode(ERRCODE_INVALID_NAME),
1689 errmsg("invalid name syntax")));
1691 if (namelist == NIL)
1693 (errcode(ERRCODE_INVALID_NAME),
1694 errmsg("invalid name syntax")));
1696 foreach(l, namelist)
1698 char *curname = (char *) lfirst(l);
1700 result = lappend(result, makeString(pstrdup(curname)));
1704 list_free(namelist);
1710 * SplitIdentifierString --- parse a string containing identifiers
1712 * This is the guts of textToQualifiedNameList, and is exported for use in
1713 * other situations such as parsing GUC variables. In the GUC case, it's
1714 * important to avoid memory leaks, so the API is designed to minimize the
1715 * amount of stuff that needs to be allocated and freed.
1718 * rawstring: the input string; must be overwritable! On return, it's
1719 * been modified to contain the separated identifiers.
1720 * separator: the separator punctuation expected between identifiers
1721 * (typically '.' or ','). Whitespace may also appear around
1724 * namelist: filled with a palloc'd list of pointers to identifiers within
1725 * rawstring. Caller should list_free() this even on error return.
1727 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1729 * Note that an empty string is considered okay here, though not in
1730 * textToQualifiedNameList.
1733 SplitIdentifierString(char *rawstring, char separator,
1736 char *nextp = rawstring;
1741 while (isspace((unsigned char) *nextp))
1742 nextp++; /* skip leading whitespace */
1745 return true; /* allow empty string */
1747 /* At the top of the loop, we are at start of a new identifier. */
1755 /* Quoted name --- collapse quote-quote pairs, no downcasing */
1756 curname = nextp + 1;
1759 endp = strchr(nextp + 1, '\"');
1761 return false; /* mismatched quotes */
1762 if (endp[1] != '\"')
1763 break; /* found end of quoted name */
1764 /* Collapse adjacent quotes into one quote, and look again */
1765 memmove(endp, endp + 1, strlen(endp));
1768 /* endp now points at the terminating quote */
1773 /* Unquoted name --- extends to separator or whitespace */
1778 while (*nextp && *nextp != separator &&
1779 !isspace((unsigned char) *nextp))
1782 if (curname == nextp)
1783 return false; /* empty unquoted name not allowed */
1786 * Downcase the identifier, using same code as main lexer does.
1788 * XXX because we want to overwrite the input in-place, we cannot
1789 * support a downcasing transformation that increases the string
1790 * length. This is not a problem given the current implementation
1791 * of downcase_truncate_identifier, but we'll probably have to do
1792 * something about this someday.
1794 len = endp - curname;
1795 downname = downcase_truncate_identifier(curname, len, false);
1796 Assert(strlen(downname) <= len);
1797 strncpy(curname, downname, len);
1801 while (isspace((unsigned char) *nextp))
1802 nextp++; /* skip trailing whitespace */
1804 if (*nextp == separator)
1807 while (isspace((unsigned char) *nextp))
1808 nextp++; /* skip leading whitespace for next */
1809 /* we expect another name, so done remains false */
1811 else if (*nextp == '\0')
1814 return false; /* invalid syntax */
1816 /* Now safe to overwrite separator with a null */
1819 /* Truncate name if it's overlength */
1820 truncate_identifier(curname, strlen(curname), false);
1823 * Finished isolating current name --- add it to list
1825 *namelist = lappend(*namelist, curname);
1827 /* Loop back if we didn't reach end of string */
1834 /*****************************************************************************
1835 * Comparison Functions used for bytea
1837 * Note: btree indexes need these routines not to leak memory; therefore,
1838 * be careful to free working copies of toasted datums. Most places don't
1839 * need to be so careful.
1840 *****************************************************************************/
1843 byteaeq(PG_FUNCTION_ARGS)
1845 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1846 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1851 len1 = VARSIZE(arg1) - VARHDRSZ;
1852 len2 = VARSIZE(arg2) - VARHDRSZ;
1854 /* fast path for different-length inputs */
1858 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1860 PG_FREE_IF_COPY(arg1, 0);
1861 PG_FREE_IF_COPY(arg2, 1);
1863 PG_RETURN_BOOL(result);
1867 byteane(PG_FUNCTION_ARGS)
1869 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1870 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1875 len1 = VARSIZE(arg1) - VARHDRSZ;
1876 len2 = VARSIZE(arg2) - VARHDRSZ;
1878 /* fast path for different-length inputs */
1882 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1884 PG_FREE_IF_COPY(arg1, 0);
1885 PG_FREE_IF_COPY(arg2, 1);
1887 PG_RETURN_BOOL(result);
1891 bytealt(PG_FUNCTION_ARGS)
1893 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1894 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1899 len1 = VARSIZE(arg1) - VARHDRSZ;
1900 len2 = VARSIZE(arg2) - VARHDRSZ;
1902 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1904 PG_FREE_IF_COPY(arg1, 0);
1905 PG_FREE_IF_COPY(arg2, 1);
1907 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1911 byteale(PG_FUNCTION_ARGS)
1913 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1914 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1919 len1 = VARSIZE(arg1) - VARHDRSZ;
1920 len2 = VARSIZE(arg2) - VARHDRSZ;
1922 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1924 PG_FREE_IF_COPY(arg1, 0);
1925 PG_FREE_IF_COPY(arg2, 1);
1927 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1931 byteagt(PG_FUNCTION_ARGS)
1933 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1934 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1939 len1 = VARSIZE(arg1) - VARHDRSZ;
1940 len2 = VARSIZE(arg2) - VARHDRSZ;
1942 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1944 PG_FREE_IF_COPY(arg1, 0);
1945 PG_FREE_IF_COPY(arg2, 1);
1947 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1951 byteage(PG_FUNCTION_ARGS)
1953 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1954 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1959 len1 = VARSIZE(arg1) - VARHDRSZ;
1960 len2 = VARSIZE(arg2) - VARHDRSZ;
1962 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1964 PG_FREE_IF_COPY(arg1, 0);
1965 PG_FREE_IF_COPY(arg2, 1);
1967 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1971 byteacmp(PG_FUNCTION_ARGS)
1973 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1974 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1979 len1 = VARSIZE(arg1) - VARHDRSZ;
1980 len2 = VARSIZE(arg2) - VARHDRSZ;
1982 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1983 if ((cmp == 0) && (len1 != len2))
1984 cmp = (len1 < len2) ? -1 : 1;
1986 PG_FREE_IF_COPY(arg1, 0);
1987 PG_FREE_IF_COPY(arg2, 1);
1989 PG_RETURN_INT32(cmp);
1993 * appendStringInfoText
1995 * Append a text to str.
1996 * Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
1999 appendStringInfoText(StringInfo str, const text *t)
2001 appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
2006 * replace all occurrences of 'old_sub_str' in 'orig_str'
2007 * with 'new_sub_str' to form 'new_str'
2009 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2010 * otherwise returns 'new_str'
2013 replace_text(PG_FUNCTION_ARGS)
2015 text *src_text = PG_GETARG_TEXT_P(0);
2016 text *from_sub_text = PG_GETARG_TEXT_P(1);
2017 text *to_sub_text = PG_GETARG_TEXT_P(2);
2018 int src_text_len = TEXTLEN(src_text);
2019 int from_sub_text_len = TEXTLEN(from_sub_text);
2027 if (src_text_len == 0 || from_sub_text_len == 0)
2028 PG_RETURN_TEXT_P(src_text);
2030 curr_posn = TEXTPOS(src_text, from_sub_text);
2032 /* When the from_sub_text is not found, there is nothing to do. */
2034 PG_RETURN_TEXT_P(src_text);
2036 str = makeStringInfo();
2037 buf_text = src_text;
2039 while (curr_posn > 0)
2041 left_text = text_substring(PointerGetDatum(buf_text),
2042 1, curr_posn - 1, false);
2043 right_text = text_substring(PointerGetDatum(buf_text),
2044 curr_posn + from_sub_text_len, -1, true);
2046 appendStringInfoText(str, left_text);
2047 appendStringInfoText(str, to_sub_text);
2049 if (buf_text != src_text)
2052 buf_text = right_text;
2053 curr_posn = TEXTPOS(buf_text, from_sub_text);
2056 appendStringInfoText(str, buf_text);
2057 if (buf_text != src_text)
2060 ret_text = PG_STR_GET_TEXT(str->data);
2064 PG_RETURN_TEXT_P(ret_text);
2068 * check_replace_text_has_escape_char
2070 * check whether replace_text contains escape char.
2073 check_replace_text_has_escape_char(const text *replace_text)
2075 const char *p = VARDATA(replace_text);
2076 const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2078 if (pg_database_encoding_max_length() == 1)
2080 for (; p < p_end; p++)
2088 for (; p < p_end; p += pg_mblen(p))
2099 * appendStringInfoRegexpSubstr
2101 * Append replace_text to str, substituting regexp back references for
2105 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2106 regmatch_t *pmatch, text *src_text)
2108 const char *p = VARDATA(replace_text);
2109 const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2110 int eml = pg_database_encoding_max_length();
2114 const char *chunk_start = p;
2118 /* Find next escape char. */
2121 for (; p < p_end && *p != '\\'; p++)
2126 for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2130 /* Copy the text we just scanned over, if any. */
2131 if (p > chunk_start)
2132 appendBinaryStringInfo(str, chunk_start, p - chunk_start);
2134 /* Done if at end of string, else advance over escape char. */
2141 /* Escape at very end of input. Treat same as unexpected char */
2142 appendStringInfoChar(str, '\\');
2146 if (*p >= '1' && *p <= '9')
2148 /* Use the back reference of regexp. */
2151 so = pmatch[idx].rm_so;
2152 eo = pmatch[idx].rm_eo;
2157 /* Use the entire matched string. */
2158 so = pmatch[0].rm_so;
2159 eo = pmatch[0].rm_eo;
2162 else if (*p == '\\')
2164 /* \\ means transfer one \ to output. */
2165 appendStringInfoChar(str, '\\');
2172 * If escape char is not followed by any expected char,
2173 * just treat it as ordinary data to copy. (XXX would it be
2174 * better to throw an error?)
2176 appendStringInfoChar(str, '\\');
2180 if (so != -1 && eo != -1)
2183 * Copy the text that is back reference of regexp. Because so and
2184 * eo are counted in characters not bytes, it's easiest to use
2185 * text_substring to pull out the correct chunk of text.
2189 append_text = text_substring(PointerGetDatum(src_text),
2190 so + 1, (eo - so), false);
2191 appendStringInfoText(str, append_text);
2197 #define REGEXP_REPLACE_BACKREF_CNT 10
2200 * replace_text_regexp
2202 * replace text that matches to regexp in src_text to replace_text.
2204 * Note: to avoid having to include regex.h in builtins.h, we declare
2205 * the regexp argument as void *, but really it's regex_t *.
2208 replace_text_regexp(text *src_text, void *regexp,
2209 text *replace_text, bool glob)
2212 regex_t *re = (regex_t *) regexp;
2213 int src_text_len = VARSIZE(src_text) - VARHDRSZ;
2214 StringInfo str = makeStringInfo();
2216 regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
2223 /* Convert data string to wide characters. */
2224 data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2225 data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
2227 /* Check whether replace_text has escape char. */
2228 have_escape = check_replace_text_has_escape_char(replace_text);
2230 for (search_start = data_pos = 0; search_start <= data_len;)
2232 regexec_result = pg_regexec(re,
2236 NULL, /* no details */
2237 REGEXP_REPLACE_BACKREF_CNT,
2241 if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
2246 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2248 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2249 errmsg("regular expression failed: %s", errMsg)));
2252 if (regexec_result == REG_NOMATCH)
2256 * Copy the text to the left of the match position. Because we
2257 * are working with character not byte indexes, it's easiest to
2258 * use text_substring to pull out the needed data.
2260 if (pmatch[0].rm_so - data_pos > 0)
2264 left_text = text_substring(PointerGetDatum(src_text),
2266 pmatch[0].rm_so - data_pos,
2268 appendStringInfoText(str, left_text);
2273 * Copy the replace_text. Process back references when the
2274 * replace_text has escape characters.
2277 appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
2279 appendStringInfoText(str, replace_text);
2281 search_start = data_pos = pmatch[0].rm_eo;
2284 * When global option is off, replace the first instance only.
2290 * Search from next character when the matching text is zero width.
2292 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2297 * Copy the text to the right of the last match.
2299 if (data_pos < data_len)
2303 right_text = text_substring(PointerGetDatum(src_text),
2304 data_pos + 1, -1, true);
2305 appendStringInfoText(str, right_text);
2309 ret_text = PG_STR_GET_TEXT(str->data);
2319 * parse input string
2320 * return ord item (1 based)
2321 * based on provided field separator
2324 split_text(PG_FUNCTION_ARGS)
2326 text *inputstring = PG_GETARG_TEXT_P(0);
2327 text *fldsep = PG_GETARG_TEXT_P(1);
2328 int fldnum = PG_GETARG_INT32(2);
2329 int inputstring_len = TEXTLEN(inputstring);
2330 int fldsep_len = TEXTLEN(fldsep);
2335 /* field number is 1 based */
2338 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2339 errmsg("field position must be greater than zero")));
2341 /* return empty string for empty input string */
2342 if (inputstring_len < 1)
2343 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2345 /* empty field separator */
2348 /* if first field, return input string, else empty string */
2350 PG_RETURN_TEXT_P(inputstring);
2352 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2355 start_posn = text_position(inputstring, fldsep, fldnum - 1);
2356 end_posn = text_position(inputstring, fldsep, fldnum);
2358 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2360 /* if first field, return input string, else empty string */
2362 PG_RETURN_TEXT_P(inputstring);
2364 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2366 else if (start_posn == 0)
2368 /* first field requested */
2369 result_text = LEFT(inputstring, fldsep);
2370 PG_RETURN_TEXT_P(result_text);
2372 else if (end_posn == 0)
2374 /* last field requested */
2375 result_text = text_substring(PointerGetDatum(inputstring),
2376 start_posn + fldsep_len,
2378 PG_RETURN_TEXT_P(result_text);
2382 /* interior field requested */
2383 result_text = text_substring(PointerGetDatum(inputstring),
2384 start_posn + fldsep_len,
2385 end_posn - start_posn - fldsep_len,
2387 PG_RETURN_TEXT_P(result_text);
2393 * parse input string
2394 * return text array of elements
2395 * based on provided field separator
2398 text_to_array(PG_FUNCTION_ARGS)
2400 text *inputstring = PG_GETARG_TEXT_P(0);
2401 text *fldsep = PG_GETARG_TEXT_P(1);
2402 int inputstring_len = TEXTLEN(inputstring);
2403 int fldsep_len = TEXTLEN(fldsep);
2408 ArrayBuildState *astate = NULL;
2410 /* return NULL for empty input string */
2411 if (inputstring_len < 1)
2415 * empty field separator return one element, 1D, array using the input
2419 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2420 CStringGetDatum(inputstring), 1));
2422 /* start with end position holding the initial start position */
2424 for (fldnum = 1;; fldnum++) /* field number is 1 based */
2427 bool disnull = false;
2429 start_posn = end_posn;
2430 end_posn = text_position(inputstring, fldsep, fldnum);
2432 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2437 * first element return one element, 1D, array using the input
2440 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2441 CStringGetDatum(inputstring), 1));
2445 /* otherwise create array and exit */
2446 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2447 CurrentMemoryContext));
2450 else if (start_posn == 0)
2452 /* first field requested */
2453 result_text = LEFT(inputstring, fldsep);
2455 else if (end_posn == 0)
2457 /* last field requested */
2458 result_text = text_substring(PointerGetDatum(inputstring),
2459 start_posn + fldsep_len,
2464 /* interior field requested */
2465 result_text = text_substring(PointerGetDatum(inputstring),
2466 start_posn + fldsep_len,
2467 end_posn - start_posn - fldsep_len,
2471 /* stash away current value */
2472 dvalue = PointerGetDatum(result_text);
2473 astate = accumArrayResult(astate, dvalue,
2475 CurrentMemoryContext);
2478 /* never reached -- keep compiler quiet */
2484 * concatenate Cstring representation of input array elements
2485 * using provided field separator
2488 array_to_text(PG_FUNCTION_ARGS)
2490 ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
2491 char *fldsep = PG_TEXTARG_GET_STR(1);
2500 StringInfo result_str = makeStringInfo();
2502 ArrayMetaState *my_extra;
2504 p = ARR_DATA_PTR(v);
2505 ndims = ARR_NDIM(v);
2507 nitems = ArrayGetNItems(ndims, dims);
2509 /* if there are no elements, return an empty string */
2511 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2513 element_type = ARR_ELEMTYPE(v);
2516 * We arrange to look up info about element type, including its output
2517 * conversion proc, only once per series of calls, assuming the element
2518 * type doesn't change underneath us.
2520 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2521 if (my_extra == NULL)
2523 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2524 sizeof(ArrayMetaState));
2525 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2526 my_extra->element_type = InvalidOid;
2529 if (my_extra->element_type != element_type)
2532 * Get info about element type, including its output conversion proc
2534 get_type_io_data(element_type, IOFunc_output,
2535 &my_extra->typlen, &my_extra->typbyval,
2536 &my_extra->typalign, &my_extra->typdelim,
2537 &my_extra->typioparam, &my_extra->typiofunc);
2538 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2539 fcinfo->flinfo->fn_mcxt);
2540 my_extra->element_type = element_type;
2542 typlen = my_extra->typlen;
2543 typbyval = my_extra->typbyval;
2544 typalign = my_extra->typalign;
2546 for (i = 0; i < nitems; i++)
2551 itemvalue = fetch_att(p, typbyval, typlen);
2553 value = DatumGetCString(FunctionCall1(&my_extra->proc,
2557 appendStringInfo(result_str, "%s%s", fldsep, value);
2559 appendStringInfoString(result_str, value);
2561 p = att_addlength(p, typlen, PointerGetDatum(p));
2562 p = (char *) att_align(p, typalign);
2565 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2570 * Convert a int32 to a string containing a base 16 (hex) representation of
2574 to_hex32(PG_FUNCTION_ARGS)
2576 uint32 value = (uint32) PG_GETARG_INT32(0);
2579 const char *digits = "0123456789abcdef";
2580 char buf[32]; /* bigger than needed, but reasonable */
2582 ptr = buf + sizeof(buf) - 1;
2587 *--ptr = digits[value % HEXBASE];
2589 } while (ptr > buf && value);
2591 result_text = PG_STR_GET_TEXT(ptr);
2592 PG_RETURN_TEXT_P(result_text);
2596 * Convert a int64 to a string containing a base 16 (hex) representation of
2600 to_hex64(PG_FUNCTION_ARGS)
2602 uint64 value = (uint64) PG_GETARG_INT64(0);
2605 const char *digits = "0123456789abcdef";
2606 char buf[32]; /* bigger than needed, but reasonable */
2608 ptr = buf + sizeof(buf) - 1;
2613 *--ptr = digits[value % HEXBASE];
2615 } while (ptr > buf && value);
2617 result_text = PG_STR_GET_TEXT(ptr);
2618 PG_RETURN_TEXT_P(result_text);
2622 * Create an md5 hash of a text string and return it as hex
2624 * md5 produces a 16 byte (128 bit) hash; double it for hex
2626 #define MD5_HASH_LEN 32
2629 md5_text(PG_FUNCTION_ARGS)
2631 text *in_text = PG_GETARG_TEXT_P(0);
2633 char hexsum[MD5_HASH_LEN + 1];
2636 /* Calculate the length of the buffer using varlena metadata */
2637 len = VARSIZE(in_text) - VARHDRSZ;
2639 /* get the hash result */
2640 if (pg_md5_hash(VARDATA(in_text), len, hexsum) == false)
2642 (errcode(ERRCODE_OUT_OF_MEMORY),
2643 errmsg("out of memory")));
2645 /* convert to text and return it */
2646 result_text = PG_STR_GET_TEXT(hexsum);
2647 PG_RETURN_TEXT_P(result_text);
2651 * Create an md5 hash of a bytea field and return it as a hex string:
2652 * 16-byte md5 digest is represented in 32 hex characters.
2655 md5_bytea(PG_FUNCTION_ARGS)
2657 bytea *in = PG_GETARG_BYTEA_P(0);
2659 char hexsum[MD5_HASH_LEN + 1];
2662 len = VARSIZE(in) - VARHDRSZ;
2663 if (pg_md5_hash(VARDATA(in), len, hexsum) == false)
2665 (errcode(ERRCODE_OUT_OF_MEMORY),
2666 errmsg("out of memory")));
2668 result_text = PG_STR_GET_TEXT(hexsum);
2669 PG_RETURN_TEXT_P(result_text);
2673 * Return the size of a datum, possibly compressed
2675 * Works on any data type
2678 pg_column_size(PG_FUNCTION_ARGS)
2680 Datum value = PG_GETARG_DATUM(0);
2684 /* On first call, get the input type's typlen, and save at *fn_extra */
2685 if (fcinfo->flinfo->fn_extra == NULL)
2687 /* Lookup the datatype of the supplied argument */
2688 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
2690 typlen = get_typlen(argtypeid);
2691 if (typlen == 0) /* should not happen */
2692 elog(ERROR, "cache lookup failed for type %u", argtypeid);
2694 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2696 *((int *) fcinfo->flinfo->fn_extra) = typlen;
2699 typlen = *((int *) fcinfo->flinfo->fn_extra);
2703 /* varlena type, possibly toasted */
2704 result = toast_datum_size(value);
2706 else if (typlen == -2)
2709 result = strlen(DatumGetCString(value)) + 1;
2713 /* ordinary fixed-width type */
2717 PG_RETURN_INT32(result);