1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.141 2005/11/22 18:17:23 momjian Exp $
13 *-------------------------------------------------------------------------
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_type.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/crypt.h"
23 #include "libpq/pqformat.h"
24 #include "mb/pg_wchar.h"
25 #include "miscadmin.h"
26 #include "parser/scansup.h"
27 #include "regex/regex.h"
28 #include "utils/array.h"
29 #include "utils/builtins.h"
30 #include "utils/lsyscache.h"
31 #include "utils/pg_locale.h"
34 typedef struct varlena unknown;
36 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
37 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
38 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
39 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
40 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
42 #define PG_TEXTARG_GET_STR(arg_) \
43 DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
44 #define PG_TEXT_GET_STR(textp_) \
45 DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
46 #define PG_STR_GET_TEXT(str_) \
47 DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
48 #define TEXTLEN(textp) \
49 text_length(PointerGetDatum(textp))
50 #define TEXTPOS(buf_text, from_sub_text) \
51 text_position(buf_text, from_sub_text, 1)
52 #define LEFT(buf_text, from_sub_text) \
53 text_substring(PointerGetDatum(buf_text), \
55 TEXTPOS(buf_text, from_sub_text) - 1, false)
57 static int text_cmp(text *arg1, text *arg2);
58 static int32 text_length(Datum str);
59 static int32 text_position(text *t1, text *t2, int matchnum);
60 static text *text_substring(Datum str,
63 bool length_not_specified);
65 static void appendStringInfoText(StringInfo str, const text *t);
68 /*****************************************************************************
70 *****************************************************************************/
73 #define VAL(CH) ((CH) - '0')
74 #define DIG(VAL) ((VAL) + '0')
77 * byteain - converts from printable representation of byte array
79 * Non-printable characters must be passed as '\nnn' (octal) and are
80 * converted to internal form. '\' must be passed as '\\'.
81 * ereport(ERROR, ...) if bad form.
84 * The input is scaned twice.
85 * The error checking of input is minimal.
88 byteain(PG_FUNCTION_ARGS)
90 char *inputText = PG_GETARG_CSTRING(0);
96 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
100 else if ((tp[0] == '\\') &&
101 (tp[1] >= '0' && tp[1] <= '3') &&
102 (tp[2] >= '0' && tp[2] <= '7') &&
103 (tp[3] >= '0' && tp[3] <= '7'))
105 else if ((tp[0] == '\\') &&
111 * one backslash, not followed by 0 or ### valid octal
114 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
115 errmsg("invalid input syntax for type bytea")));
120 result = (bytea *) palloc(byte);
121 VARATT_SIZEP(result) = byte; /* set varlena length */
124 rp = VARDATA(result);
129 else if ((tp[0] == '\\') &&
130 (tp[1] >= '0' && tp[1] <= '3') &&
131 (tp[2] >= '0' && tp[2] <= '7') &&
132 (tp[3] >= '0' && tp[3] <= '7'))
138 *rp++ = byte + VAL(tp[3]);
141 else if ((tp[0] == '\\') &&
150 * We should never get here. The first pass should not allow it.
153 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
154 errmsg("invalid input syntax for type bytea")));
158 PG_RETURN_BYTEA_P(result);
162 * byteaout - converts to printable representation of byte array
164 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
167 * NULL vlena should be an error--returning string with NULL for now.
170 byteaout(PG_FUNCTION_ARGS)
172 bytea *vlena = PG_GETARG_BYTEA_P(0);
176 int val; /* holds unprintable chars */
180 len = 1; /* empty string has 1 char */
182 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
186 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
191 rp = result = (char *) palloc(len);
193 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
200 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
204 rp[3] = DIG(val & 07);
206 rp[2] = DIG(val & 07);
208 rp[1] = DIG(val & 03);
215 PG_RETURN_CSTRING(result);
219 * bytearecv - converts external binary format to bytea
222 bytearecv(PG_FUNCTION_ARGS)
224 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
228 nbytes = buf->len - buf->cursor;
229 result = (bytea *) palloc(nbytes + VARHDRSZ);
230 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
231 pq_copymsgbytes(buf, VARDATA(result), nbytes);
232 PG_RETURN_BYTEA_P(result);
236 * byteasend - converts bytea to binary format
238 * This is a special case: just copy the input...
241 byteasend(PG_FUNCTION_ARGS)
243 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
245 PG_RETURN_BYTEA_P(vlena);
250 * textin - converts "..." to internal representation
253 textin(PG_FUNCTION_ARGS)
255 char *inputText = PG_GETARG_CSTRING(0);
259 /* verify encoding */
260 len = strlen(inputText);
261 pg_verifymbstr(inputText, len, false);
263 result = (text *) palloc(len + VARHDRSZ);
264 VARATT_SIZEP(result) = len + VARHDRSZ;
266 memcpy(VARDATA(result), inputText, len);
268 PG_RETURN_TEXT_P(result);
272 * textout - converts internal representation to "..."
275 textout(PG_FUNCTION_ARGS)
277 text *t = PG_GETARG_TEXT_P(0);
281 len = VARSIZE(t) - VARHDRSZ;
282 result = (char *) palloc(len + 1);
283 memcpy(result, VARDATA(t), len);
286 PG_RETURN_CSTRING(result);
290 * textrecv - converts external binary format to text
293 textrecv(PG_FUNCTION_ARGS)
295 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
300 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
302 /* verify encoding */
303 pg_verifymbstr(str, nbytes, false);
305 result = (text *) palloc(nbytes + VARHDRSZ);
306 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
307 memcpy(VARDATA(result), str, nbytes);
309 PG_RETURN_TEXT_P(result);
313 * textsend - converts text to binary format
316 textsend(PG_FUNCTION_ARGS)
318 text *t = PG_GETARG_TEXT_P(0);
321 pq_begintypsend(&buf);
322 pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
323 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
328 * unknownin - converts "..." to internal representation
331 unknownin(PG_FUNCTION_ARGS)
333 char *str = PG_GETARG_CSTRING(0);
335 /* representation is same as cstring */
336 PG_RETURN_CSTRING(pstrdup(str));
340 * unknownout - converts internal representation to "..."
343 unknownout(PG_FUNCTION_ARGS)
345 /* representation is same as cstring */
346 char *str = PG_GETARG_CSTRING(0);
348 PG_RETURN_CSTRING(pstrdup(str));
352 * unknownrecv - converts external binary format to unknown
355 unknownrecv(PG_FUNCTION_ARGS)
357 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
361 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
362 /* representation is same as cstring */
363 PG_RETURN_CSTRING(str);
367 * unknownsend - converts unknown to binary format
370 unknownsend(PG_FUNCTION_ARGS)
372 /* representation is same as cstring */
373 char *str = PG_GETARG_CSTRING(0);
376 pq_begintypsend(&buf);
377 pq_sendtext(&buf, str, strlen(str));
378 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
382 /* ========== PUBLIC ROUTINES ========== */
386 * returns the logical length of a text*
387 * (which is less than the VARSIZE of the text*)
390 textlen(PG_FUNCTION_ARGS)
392 Datum str = PG_GETARG_DATUM(0);
394 /* try to avoid decompressing argument */
395 PG_RETURN_INT32(text_length(str));
400 * Does the real work for textlen()
402 * This is broken out so it can be called directly by other string processing
403 * functions. Note that the argument is passed as a Datum, to indicate that
404 * it may still be in compressed form. We can avoid decompressing it at all
408 text_length(Datum str)
410 /* fastpath when max encoding length is one */
411 if (pg_database_encoding_max_length() == 1)
412 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
415 text *t = DatumGetTextP(str);
417 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
418 VARSIZE(t) - VARHDRSZ));
424 * returns the physical length of a text*
425 * (which is less than the VARSIZE of the text*)
428 textoctetlen(PG_FUNCTION_ARGS)
430 Datum str = PG_GETARG_DATUM(0);
432 /* We need not detoast the input at all */
433 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
438 * takes two text* and returns a text* that is the concatenation of
441 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
442 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
443 * Allocate space for output in all cases.
444 * XXX - thomas 1997-07-10
447 textcat(PG_FUNCTION_ARGS)
449 text *t1 = PG_GETARG_TEXT_P(0);
450 text *t2 = PG_GETARG_TEXT_P(1);
457 len1 = VARSIZE(t1) - VARHDRSZ;
461 len2 = VARSIZE(t2) - VARHDRSZ;
465 len = len1 + len2 + VARHDRSZ;
466 result = (text *) palloc(len);
468 /* Set size of result string... */
469 VARATT_SIZEP(result) = len;
471 /* Fill data field of result string... */
472 ptr = VARDATA(result);
474 memcpy(ptr, VARDATA(t1), len1);
476 memcpy(ptr + len1, VARDATA(t2), len2);
478 PG_RETURN_TEXT_P(result);
483 * Return a substring starting at the specified position.
484 * - thomas 1997-12-31
488 * - starting position (is one-based)
491 * If the starting position is zero or less, then return from the start of the string
492 * adjusting the length to be consistent with the "negative start" per SQL92.
493 * If the length is less than zero, return the remaining string.
495 * Added multibyte support.
496 * - Tatsuo Ishii 1998-4-21
497 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
498 * Formerly returned the entire string; now returns a portion.
499 * - Thomas Lockhart 1998-12-10
500 * Now uses faster TOAST-slicing interface
501 * - John Gray 2002-02-22
502 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
503 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
504 * error; if E < 1, return '', not entire string). Fixed MB related bug when
505 * S > LC and < LC + 4 sometimes garbage characters are returned.
506 * - Joe Conway 2002-08-10
509 text_substr(PG_FUNCTION_ARGS)
511 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
518 * text_substr_no_len -
519 * Wrapper to avoid opr_sanity failure due to
520 * one function accepting a different number of args.
523 text_substr_no_len(PG_FUNCTION_ARGS)
525 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
532 * Does the real work for text_substr() and text_substr_no_len()
534 * This is broken out so it can be called directly by other string processing
535 * functions. Note that the argument is passed as a Datum, to indicate that
536 * it may still be in compressed/toasted form. We can avoid detoasting all
537 * of it in some cases.
540 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
542 int32 eml = pg_database_encoding_max_length();
543 int32 S = start; /* start position */
544 int32 S1; /* adjusted start position */
545 int32 L1; /* adjusted substring length */
547 /* life is easy if the encoding max length is 1 */
552 if (length_not_specified) /* special case - get length to end of
561 * A negative value for L is the only way for the end position to
562 * be before the start. SQL99 says to throw an error.
566 (errcode(ERRCODE_SUBSTRING_ERROR),
567 errmsg("negative substring length not allowed")));
570 * A zero or negative value for the end position can happen if the
571 * start was negative or one. SQL99 says to return a zero-length
575 return PG_STR_GET_TEXT("");
581 * If the start position is past the end of the string, SQL99 says to
582 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
583 * that for us. Convert to zero-based starting position
585 return DatumGetTextPSlice(str, S1 - 1, L1);
590 * When encoding max length is > 1, we can't get LC without
591 * detoasting, so we'll grab a conservatively large slice now and go
592 * back later to do the right thing
605 * if S is past the end of the string, the tuple toaster will return a
606 * zero-length string to us
611 * We need to start at position zero because there is no way to know
612 * in advance which byte offset corresponds to the supplied start
617 if (length_not_specified) /* special case - get length to end of
619 slice_size = L1 = -1;
625 * A negative value for L is the only way for the end position to
626 * be before the start. SQL99 says to throw an error.
630 (errcode(ERRCODE_SUBSTRING_ERROR),
631 errmsg("negative substring length not allowed")));
634 * A zero or negative value for the end position can happen if the
635 * start was negative or one. SQL99 says to return a zero-length
639 return PG_STR_GET_TEXT("");
642 * if E is past the end of the string, the tuple toaster will
643 * truncate the length for us
648 * Total slice size in bytes can't be any longer than the start
649 * position plus substring length times the encoding max length.
651 slice_size = (S1 + L1) * eml;
653 slice = DatumGetTextPSlice(str, slice_start, slice_size);
655 /* see if we got back an empty string */
656 if ((VARSIZE(slice) - VARHDRSZ) == 0)
657 return PG_STR_GET_TEXT("");
659 /* Now we can get the actual length of the slice in MB characters */
660 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
663 * Check that the start position wasn't > slice_strlen. If so, SQL99
664 * says to return a zero-length string.
666 if (S1 > slice_strlen)
667 return PG_STR_GET_TEXT("");
670 * Adjust L1 and E1 now that we know the slice string length. Again
671 * remember that S1 is one based, and slice_start is zero based.
674 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
676 E1 = slice_start + 1 + slice_strlen;
679 * Find the start position in the slice; remember S1 is not zero based
682 for (i = 0; i < S1 - 1; i++)
685 /* hang onto a pointer to our start position */
689 * Count the actual bytes used by the substring of the requested
692 for (i = S1; i < E1; i++)
695 ret = (text *) palloc(VARHDRSZ + (p - s));
696 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
697 memcpy(VARDATA(ret), s, (p - s));
702 elog(ERROR, "invalid backend encoding: encoding max length < 1");
704 /* not reached: suppress compiler warning */
710 * Return the position of the specified substring.
711 * Implements the SQL92 POSITION() function.
712 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
713 * - thomas 1997-07-27
716 textpos(PG_FUNCTION_ARGS)
718 text *str = PG_GETARG_TEXT_P(0);
719 text *search_str = PG_GETARG_TEXT_P(1);
721 PG_RETURN_INT32(text_position(str, search_str, 1));
726 * Does the real work for textpos()
729 * t1 - string to be searched
730 * t2 - pattern to match within t1
731 * matchnum - number of the match to be found (1 is the first match)
733 * Character index of the first matched char, starting from 1,
736 * This is broken out so it can be called directly by other string processing
740 text_position(text *t1, text *t2, int matchnum)
750 return 0; /* result for 0th match */
752 if (VARSIZE(t2) <= VARHDRSZ)
753 return 1; /* result for empty pattern */
755 len1 = VARSIZE(t1) - VARHDRSZ;
756 len2 = VARSIZE(t2) - VARHDRSZ;
758 if (pg_database_encoding_max_length() == 1)
760 /* simple case - single byte encoding */
767 /* no use in searching str past point where search_str will fit */
770 for (p = 0; p <= px; p++)
772 if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
774 if (++match == matchnum)
785 /* not as simple - multibyte encoding */
791 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
792 (void) pg_mb2wchar_with_len(VARDATA(t1), p1, len1);
793 len1 = pg_wchar_strlen(p1);
794 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
795 (void) pg_mb2wchar_with_len(VARDATA(t2), p2, len2);
796 len2 = pg_wchar_strlen(p2);
798 /* no use in searching str past point where search_str will fit */
801 for (p = 0; p <= px; p++)
803 if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
805 if (++match == matchnum)
822 * Comparison function for text strings with given lengths.
823 * Includes locale support, but must copy strings to temporary memory
824 * to allow null-termination for inputs to strcoll().
828 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
833 * Unfortunately, there is no strncoll(), so in the non-C locale case we
834 * have to do some memory copying. This turns out to be significantly
835 * slower, so we optimize the case where LC_COLLATE is C. We also try to
836 * optimize relatively-short strings by avoiding palloc/pfree overhead.
838 if (lc_collate_is_c())
840 result = strncmp(arg1, arg2, Min(len1, len2));
841 if ((result == 0) && (len1 != len2))
842 result = (len1 < len2) ? -1 : 1;
846 #define STACKBUFLEN 1024
848 char a1buf[STACKBUFLEN];
849 char a2buf[STACKBUFLEN];
854 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
855 if (GetDatabaseEncoding() == PG_UTF8)
861 if (len1 >= STACKBUFLEN / 2)
863 a1len = len1 * 2 + 2;
871 if (len2 >= STACKBUFLEN / 2)
873 a2len = len2 * 2 + 2;
882 /* stupid Microsloth API does not work for zero-length input */
887 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
888 (LPWSTR) a1p, a1len / 2);
891 (errmsg("could not convert string to UTF-16: error %lu",
894 ((LPWSTR) a1p)[r] = 0;
900 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
901 (LPWSTR) a2p, a2len / 2);
904 (errmsg("could not convert string to UTF-16: error %lu",
907 ((LPWSTR) a2p)[r] = 0;
910 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
911 if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
914 (errmsg("could not compare Unicode strings: %m")));
925 if (len1 >= STACKBUFLEN)
926 a1p = (char *) palloc(len1 + 1);
929 if (len2 >= STACKBUFLEN)
930 a2p = (char *) palloc(len2 + 1);
934 memcpy(a1p, arg1, len1);
936 memcpy(a2p, arg2, len2);
939 result = strcoll(a1p, a2p);
952 * Internal comparison function for text strings.
956 text_cmp(text *arg1, text *arg2)
966 len1 = VARSIZE(arg1) - VARHDRSZ;
967 len2 = VARSIZE(arg2) - VARHDRSZ;
969 return varstr_cmp(a1p, len1, a2p, len2);
973 * Comparison functions for text strings.
975 * Note: btree indexes need these routines not to leak memory; therefore,
976 * be careful to free working copies of toasted datums. Most places don't
977 * need to be so careful.
981 texteq(PG_FUNCTION_ARGS)
983 text *arg1 = PG_GETARG_TEXT_P(0);
984 text *arg2 = PG_GETARG_TEXT_P(1);
987 /* fast path for different-length inputs */
988 if (VARSIZE(arg1) != VARSIZE(arg2))
991 result = (text_cmp(arg1, arg2) == 0);
993 PG_FREE_IF_COPY(arg1, 0);
994 PG_FREE_IF_COPY(arg2, 1);
996 PG_RETURN_BOOL(result);
1000 textne(PG_FUNCTION_ARGS)
1002 text *arg1 = PG_GETARG_TEXT_P(0);
1003 text *arg2 = PG_GETARG_TEXT_P(1);
1006 /* fast path for different-length inputs */
1007 if (VARSIZE(arg1) != VARSIZE(arg2))
1010 result = (text_cmp(arg1, arg2) != 0);
1012 PG_FREE_IF_COPY(arg1, 0);
1013 PG_FREE_IF_COPY(arg2, 1);
1015 PG_RETURN_BOOL(result);
1019 text_lt(PG_FUNCTION_ARGS)
1021 text *arg1 = PG_GETARG_TEXT_P(0);
1022 text *arg2 = PG_GETARG_TEXT_P(1);
1025 result = (text_cmp(arg1, arg2) < 0);
1027 PG_FREE_IF_COPY(arg1, 0);
1028 PG_FREE_IF_COPY(arg2, 1);
1030 PG_RETURN_BOOL(result);
1034 text_le(PG_FUNCTION_ARGS)
1036 text *arg1 = PG_GETARG_TEXT_P(0);
1037 text *arg2 = PG_GETARG_TEXT_P(1);
1040 result = (text_cmp(arg1, arg2) <= 0);
1042 PG_FREE_IF_COPY(arg1, 0);
1043 PG_FREE_IF_COPY(arg2, 1);
1045 PG_RETURN_BOOL(result);
1049 text_gt(PG_FUNCTION_ARGS)
1051 text *arg1 = PG_GETARG_TEXT_P(0);
1052 text *arg2 = PG_GETARG_TEXT_P(1);
1055 result = (text_cmp(arg1, arg2) > 0);
1057 PG_FREE_IF_COPY(arg1, 0);
1058 PG_FREE_IF_COPY(arg2, 1);
1060 PG_RETURN_BOOL(result);
1064 text_ge(PG_FUNCTION_ARGS)
1066 text *arg1 = PG_GETARG_TEXT_P(0);
1067 text *arg2 = PG_GETARG_TEXT_P(1);
1070 result = (text_cmp(arg1, arg2) >= 0);
1072 PG_FREE_IF_COPY(arg1, 0);
1073 PG_FREE_IF_COPY(arg2, 1);
1075 PG_RETURN_BOOL(result);
1079 bttextcmp(PG_FUNCTION_ARGS)
1081 text *arg1 = PG_GETARG_TEXT_P(0);
1082 text *arg2 = PG_GETARG_TEXT_P(1);
1085 result = text_cmp(arg1, arg2);
1087 PG_FREE_IF_COPY(arg1, 0);
1088 PG_FREE_IF_COPY(arg2, 1);
1090 PG_RETURN_INT32(result);
1095 text_larger(PG_FUNCTION_ARGS)
1097 text *arg1 = PG_GETARG_TEXT_P(0);
1098 text *arg2 = PG_GETARG_TEXT_P(1);
1101 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1103 PG_RETURN_TEXT_P(result);
1107 text_smaller(PG_FUNCTION_ARGS)
1109 text *arg1 = PG_GETARG_TEXT_P(0);
1110 text *arg2 = PG_GETARG_TEXT_P(1);
1113 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1115 PG_RETURN_TEXT_P(result);
1120 * The following operators support character-by-character comparison
1121 * of text data types, to allow building indexes suitable for LIKE
1126 internal_text_pattern_compare(text *arg1, text *arg2)
1130 result = memcmp(VARDATA(arg1), VARDATA(arg2),
1131 Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1134 else if (VARSIZE(arg1) < VARSIZE(arg2))
1136 else if (VARSIZE(arg1) > VARSIZE(arg2))
1144 text_pattern_lt(PG_FUNCTION_ARGS)
1146 text *arg1 = PG_GETARG_TEXT_P(0);
1147 text *arg2 = PG_GETARG_TEXT_P(1);
1150 result = internal_text_pattern_compare(arg1, arg2);
1152 PG_FREE_IF_COPY(arg1, 0);
1153 PG_FREE_IF_COPY(arg2, 1);
1155 PG_RETURN_BOOL(result < 0);
1160 text_pattern_le(PG_FUNCTION_ARGS)
1162 text *arg1 = PG_GETARG_TEXT_P(0);
1163 text *arg2 = PG_GETARG_TEXT_P(1);
1166 result = internal_text_pattern_compare(arg1, arg2);
1168 PG_FREE_IF_COPY(arg1, 0);
1169 PG_FREE_IF_COPY(arg2, 1);
1171 PG_RETURN_BOOL(result <= 0);
1176 text_pattern_eq(PG_FUNCTION_ARGS)
1178 text *arg1 = PG_GETARG_TEXT_P(0);
1179 text *arg2 = PG_GETARG_TEXT_P(1);
1182 if (VARSIZE(arg1) != VARSIZE(arg2))
1185 result = internal_text_pattern_compare(arg1, arg2);
1187 PG_FREE_IF_COPY(arg1, 0);
1188 PG_FREE_IF_COPY(arg2, 1);
1190 PG_RETURN_BOOL(result == 0);
1195 text_pattern_ge(PG_FUNCTION_ARGS)
1197 text *arg1 = PG_GETARG_TEXT_P(0);
1198 text *arg2 = PG_GETARG_TEXT_P(1);
1201 result = internal_text_pattern_compare(arg1, arg2);
1203 PG_FREE_IF_COPY(arg1, 0);
1204 PG_FREE_IF_COPY(arg2, 1);
1206 PG_RETURN_BOOL(result >= 0);
1211 text_pattern_gt(PG_FUNCTION_ARGS)
1213 text *arg1 = PG_GETARG_TEXT_P(0);
1214 text *arg2 = PG_GETARG_TEXT_P(1);
1217 result = internal_text_pattern_compare(arg1, arg2);
1219 PG_FREE_IF_COPY(arg1, 0);
1220 PG_FREE_IF_COPY(arg2, 1);
1222 PG_RETURN_BOOL(result > 0);
1227 text_pattern_ne(PG_FUNCTION_ARGS)
1229 text *arg1 = PG_GETARG_TEXT_P(0);
1230 text *arg2 = PG_GETARG_TEXT_P(1);
1233 if (VARSIZE(arg1) != VARSIZE(arg2))
1236 result = internal_text_pattern_compare(arg1, arg2);
1238 PG_FREE_IF_COPY(arg1, 0);
1239 PG_FREE_IF_COPY(arg2, 1);
1241 PG_RETURN_BOOL(result != 0);
1246 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1248 text *arg1 = PG_GETARG_TEXT_P(0);
1249 text *arg2 = PG_GETARG_TEXT_P(1);
1252 result = internal_text_pattern_compare(arg1, arg2);
1254 PG_FREE_IF_COPY(arg1, 0);
1255 PG_FREE_IF_COPY(arg2, 1);
1257 PG_RETURN_INT32(result);
1261 /*-------------------------------------------------------------
1264 * get the number of bytes contained in an instance of type 'bytea'
1265 *-------------------------------------------------------------
1268 byteaoctetlen(PG_FUNCTION_ARGS)
1270 Datum str = PG_GETARG_DATUM(0);
1272 /* We need not detoast the input at all */
1273 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1278 * takes two bytea* and returns a bytea* that is the concatenation of
1281 * Cloned from textcat and modified as required.
1284 byteacat(PG_FUNCTION_ARGS)
1286 bytea *t1 = PG_GETARG_BYTEA_P(0);
1287 bytea *t2 = PG_GETARG_BYTEA_P(1);
1294 len1 = VARSIZE(t1) - VARHDRSZ;
1298 len2 = VARSIZE(t2) - VARHDRSZ;
1302 len = len1 + len2 + VARHDRSZ;
1303 result = (bytea *) palloc(len);
1305 /* Set size of result string... */
1306 VARATT_SIZEP(result) = len;
1308 /* Fill data field of result string... */
1309 ptr = VARDATA(result);
1311 memcpy(ptr, VARDATA(t1), len1);
1313 memcpy(ptr + len1, VARDATA(t2), len2);
1315 PG_RETURN_BYTEA_P(result);
1318 #define PG_STR_GET_BYTEA(str_) \
1319 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1322 * Return a substring starting at the specified position.
1323 * Cloned from text_substr and modified as required.
1327 * - starting position (is one-based)
1328 * - string length (optional)
1330 * If the starting position is zero or less, then return from the start of the string
1331 * adjusting the length to be consistent with the "negative start" per SQL92.
1332 * If the length is less than zero, an ERROR is thrown. If no third argument
1333 * (length) is provided, the length to the end of the string is assumed.
1336 bytea_substr(PG_FUNCTION_ARGS)
1338 int S = PG_GETARG_INT32(1); /* start position */
1339 int S1; /* adjusted start position */
1340 int L1; /* adjusted substring length */
1344 if (fcinfo->nargs == 2)
1347 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
1348 * the end of the string if we pass it a negative value for length.
1355 int E = S + PG_GETARG_INT32(2);
1358 * A negative value for L is the only way for the end position to be
1359 * before the start. SQL99 says to throw an error.
1363 (errcode(ERRCODE_SUBSTRING_ERROR),
1364 errmsg("negative substring length not allowed")));
1367 * A zero or negative value for the end position can happen if the
1368 * start was negative or one. SQL99 says to return a zero-length
1372 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1378 * If the start position is past the end of the string, SQL99 says to
1379 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
1380 * for us. Convert to zero-based starting position
1382 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1386 * bytea_substr_no_len -
1387 * Wrapper to avoid opr_sanity failure due to
1388 * one function accepting a different number of args.
1391 bytea_substr_no_len(PG_FUNCTION_ARGS)
1393 return bytea_substr(fcinfo);
1398 * Return the position of the specified substring.
1399 * Implements the SQL92 POSITION() function.
1400 * Cloned from textpos and modified as required.
1403 byteapos(PG_FUNCTION_ARGS)
1405 bytea *t1 = PG_GETARG_BYTEA_P(0);
1406 bytea *t2 = PG_GETARG_BYTEA_P(1);
1415 if (VARSIZE(t2) <= VARHDRSZ)
1416 PG_RETURN_INT32(1); /* result for empty pattern */
1418 len1 = VARSIZE(t1) - VARHDRSZ;
1419 len2 = VARSIZE(t2) - VARHDRSZ;
1426 for (p = 0; p <= px; p++)
1428 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1436 PG_RETURN_INT32(pos);
1439 /*-------------------------------------------------------------
1442 * this routine treats "bytea" as an array of bytes.
1443 * It returns the Nth byte (a number between 0 and 255).
1444 *-------------------------------------------------------------
1447 byteaGetByte(PG_FUNCTION_ARGS)
1449 bytea *v = PG_GETARG_BYTEA_P(0);
1450 int32 n = PG_GETARG_INT32(1);
1454 len = VARSIZE(v) - VARHDRSZ;
1456 if (n < 0 || n >= len)
1458 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1459 errmsg("index %d out of valid range, 0..%d",
1462 byte = ((unsigned char *) VARDATA(v))[n];
1464 PG_RETURN_INT32(byte);
1467 /*-------------------------------------------------------------
1470 * This routine treats a "bytea" type like an array of bits.
1471 * It returns the value of the Nth bit (0 or 1).
1473 *-------------------------------------------------------------
1476 byteaGetBit(PG_FUNCTION_ARGS)
1478 bytea *v = PG_GETARG_BYTEA_P(0);
1479 int32 n = PG_GETARG_INT32(1);
1485 len = VARSIZE(v) - VARHDRSZ;
1487 if (n < 0 || n >= len * 8)
1489 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1490 errmsg("index %d out of valid range, 0..%d",
1496 byte = ((unsigned char *) VARDATA(v))[byteNo];
1498 if (byte & (1 << bitNo))
1504 /*-------------------------------------------------------------
1507 * Given an instance of type 'bytea' creates a new one with
1508 * the Nth byte set to the given value.
1510 *-------------------------------------------------------------
1513 byteaSetByte(PG_FUNCTION_ARGS)
1515 bytea *v = PG_GETARG_BYTEA_P(0);
1516 int32 n = PG_GETARG_INT32(1);
1517 int32 newByte = PG_GETARG_INT32(2);
1521 len = VARSIZE(v) - VARHDRSZ;
1523 if (n < 0 || n >= len)
1525 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1526 errmsg("index %d out of valid range, 0..%d",
1530 * Make a copy of the original varlena.
1532 res = (bytea *) palloc(VARSIZE(v));
1533 memcpy((char *) res, (char *) v, VARSIZE(v));
1538 ((unsigned char *) VARDATA(res))[n] = newByte;
1540 PG_RETURN_BYTEA_P(res);
1543 /*-------------------------------------------------------------
1546 * Given an instance of type 'bytea' creates a new one with
1547 * the Nth bit set to the given value.
1549 *-------------------------------------------------------------
1552 byteaSetBit(PG_FUNCTION_ARGS)
1554 bytea *v = PG_GETARG_BYTEA_P(0);
1555 int32 n = PG_GETARG_INT32(1);
1556 int32 newBit = PG_GETARG_INT32(2);
1564 len = VARSIZE(v) - VARHDRSZ;
1566 if (n < 0 || n >= len * 8)
1568 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1569 errmsg("index %d out of valid range, 0..%d",
1578 if (newBit != 0 && newBit != 1)
1580 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1581 errmsg("new bit must be 0 or 1")));
1584 * Make a copy of the original varlena.
1586 res = (bytea *) palloc(VARSIZE(v));
1587 memcpy((char *) res, (char *) v, VARSIZE(v));
1592 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1595 newByte = oldByte & (~(1 << bitNo));
1597 newByte = oldByte | (1 << bitNo);
1599 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1601 PG_RETURN_BYTEA_P(res);
1606 * Converts a text type to a Name type.
1609 text_name(PG_FUNCTION_ARGS)
1611 text *s = PG_GETARG_TEXT_P(0);
1615 len = VARSIZE(s) - VARHDRSZ;
1617 /* Truncate oversize input */
1618 if (len >= NAMEDATALEN)
1619 len = NAMEDATALEN - 1;
1622 printf("text- convert string length %d (%d) ->%d\n",
1623 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1626 result = (Name) palloc(NAMEDATALEN);
1627 memcpy(NameStr(*result), VARDATA(s), len);
1629 /* now null pad to full length... */
1630 while (len < NAMEDATALEN)
1632 *(NameStr(*result) + len) = '\0';
1636 PG_RETURN_NAME(result);
1640 * Converts a Name type to a text type.
1643 name_text(PG_FUNCTION_ARGS)
1645 Name s = PG_GETARG_NAME(0);
1649 len = strlen(NameStr(*s));
1652 printf("text- convert string length %d (%d) ->%d\n",
1653 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1656 result = palloc(VARHDRSZ + len);
1657 VARATT_SIZEP(result) = VARHDRSZ + len;
1658 memcpy(VARDATA(result), NameStr(*s), len);
1660 PG_RETURN_TEXT_P(result);
1665 * textToQualifiedNameList - convert a text object to list of names
1667 * This implements the input parsing needed by nextval() and other
1668 * functions that take a text parameter representing a qualified name.
1669 * We split the name at dots, downcase if not double-quoted, and
1670 * truncate names if they're too long.
1673 textToQualifiedNameList(text *textval)
1680 /* Convert to C string (handles possible detoasting). */
1681 /* Note we rely on being able to modify rawname below. */
1682 rawname = DatumGetCString(DirectFunctionCall1(textout,
1683 PointerGetDatum(textval)));
1685 if (!SplitIdentifierString(rawname, '.', &namelist))
1687 (errcode(ERRCODE_INVALID_NAME),
1688 errmsg("invalid name syntax")));
1690 if (namelist == NIL)
1692 (errcode(ERRCODE_INVALID_NAME),
1693 errmsg("invalid name syntax")));
1695 foreach(l, namelist)
1697 char *curname = (char *) lfirst(l);
1699 result = lappend(result, makeString(pstrdup(curname)));
1703 list_free(namelist);
1709 * SplitIdentifierString --- parse a string containing identifiers
1711 * This is the guts of textToQualifiedNameList, and is exported for use in
1712 * other situations such as parsing GUC variables. In the GUC case, it's
1713 * important to avoid memory leaks, so the API is designed to minimize the
1714 * amount of stuff that needs to be allocated and freed.
1717 * rawstring: the input string; must be overwritable! On return, it's
1718 * been modified to contain the separated identifiers.
1719 * separator: the separator punctuation expected between identifiers
1720 * (typically '.' or ','). Whitespace may also appear around
1723 * namelist: filled with a palloc'd list of pointers to identifiers within
1724 * rawstring. Caller should list_free() this even on error return.
1726 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1728 * Note that an empty string is considered okay here, though not in
1729 * textToQualifiedNameList.
1732 SplitIdentifierString(char *rawstring, char separator,
1735 char *nextp = rawstring;
1740 while (isspace((unsigned char) *nextp))
1741 nextp++; /* skip leading whitespace */
1744 return true; /* allow empty string */
1746 /* At the top of the loop, we are at start of a new identifier. */
1754 /* Quoted name --- collapse quote-quote pairs, no downcasing */
1755 curname = nextp + 1;
1758 endp = strchr(nextp + 1, '\"');
1760 return false; /* mismatched quotes */
1761 if (endp[1] != '\"')
1762 break; /* found end of quoted name */
1763 /* Collapse adjacent quotes into one quote, and look again */
1764 memmove(endp, endp + 1, strlen(endp));
1767 /* endp now points at the terminating quote */
1772 /* Unquoted name --- extends to separator or whitespace */
1777 while (*nextp && *nextp != separator &&
1778 !isspace((unsigned char) *nextp))
1781 if (curname == nextp)
1782 return false; /* empty unquoted name not allowed */
1785 * Downcase the identifier, using same code as main lexer does.
1787 * XXX because we want to overwrite the input in-place, we cannot
1788 * support a downcasing transformation that increases the string
1789 * length. This is not a problem given the current implementation
1790 * of downcase_truncate_identifier, but we'll probably have to do
1791 * something about this someday.
1793 len = endp - curname;
1794 downname = downcase_truncate_identifier(curname, len, false);
1795 Assert(strlen(downname) <= len);
1796 strncpy(curname, downname, len);
1800 while (isspace((unsigned char) *nextp))
1801 nextp++; /* skip trailing whitespace */
1803 if (*nextp == separator)
1806 while (isspace((unsigned char) *nextp))
1807 nextp++; /* skip leading whitespace for next */
1808 /* we expect another name, so done remains false */
1810 else if (*nextp == '\0')
1813 return false; /* invalid syntax */
1815 /* Now safe to overwrite separator with a null */
1818 /* Truncate name if it's overlength */
1819 truncate_identifier(curname, strlen(curname), false);
1822 * Finished isolating current name --- add it to list
1824 *namelist = lappend(*namelist, curname);
1826 /* Loop back if we didn't reach end of string */
1833 /*****************************************************************************
1834 * Comparison Functions used for bytea
1836 * Note: btree indexes need these routines not to leak memory; therefore,
1837 * be careful to free working copies of toasted datums. Most places don't
1838 * need to be so careful.
1839 *****************************************************************************/
1842 byteaeq(PG_FUNCTION_ARGS)
1844 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1845 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1850 len1 = VARSIZE(arg1) - VARHDRSZ;
1851 len2 = VARSIZE(arg2) - VARHDRSZ;
1853 /* fast path for different-length inputs */
1857 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1859 PG_FREE_IF_COPY(arg1, 0);
1860 PG_FREE_IF_COPY(arg2, 1);
1862 PG_RETURN_BOOL(result);
1866 byteane(PG_FUNCTION_ARGS)
1868 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1869 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1874 len1 = VARSIZE(arg1) - VARHDRSZ;
1875 len2 = VARSIZE(arg2) - VARHDRSZ;
1877 /* fast path for different-length inputs */
1881 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1883 PG_FREE_IF_COPY(arg1, 0);
1884 PG_FREE_IF_COPY(arg2, 1);
1886 PG_RETURN_BOOL(result);
1890 bytealt(PG_FUNCTION_ARGS)
1892 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1893 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1898 len1 = VARSIZE(arg1) - VARHDRSZ;
1899 len2 = VARSIZE(arg2) - VARHDRSZ;
1901 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1903 PG_FREE_IF_COPY(arg1, 0);
1904 PG_FREE_IF_COPY(arg2, 1);
1906 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1910 byteale(PG_FUNCTION_ARGS)
1912 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1913 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1918 len1 = VARSIZE(arg1) - VARHDRSZ;
1919 len2 = VARSIZE(arg2) - VARHDRSZ;
1921 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1923 PG_FREE_IF_COPY(arg1, 0);
1924 PG_FREE_IF_COPY(arg2, 1);
1926 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1930 byteagt(PG_FUNCTION_ARGS)
1932 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1933 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1938 len1 = VARSIZE(arg1) - VARHDRSZ;
1939 len2 = VARSIZE(arg2) - VARHDRSZ;
1941 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1943 PG_FREE_IF_COPY(arg1, 0);
1944 PG_FREE_IF_COPY(arg2, 1);
1946 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1950 byteage(PG_FUNCTION_ARGS)
1952 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1953 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1958 len1 = VARSIZE(arg1) - VARHDRSZ;
1959 len2 = VARSIZE(arg2) - VARHDRSZ;
1961 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1963 PG_FREE_IF_COPY(arg1, 0);
1964 PG_FREE_IF_COPY(arg2, 1);
1966 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1970 byteacmp(PG_FUNCTION_ARGS)
1972 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1973 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1978 len1 = VARSIZE(arg1) - VARHDRSZ;
1979 len2 = VARSIZE(arg2) - VARHDRSZ;
1981 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1982 if ((cmp == 0) && (len1 != len2))
1983 cmp = (len1 < len2) ? -1 : 1;
1985 PG_FREE_IF_COPY(arg1, 0);
1986 PG_FREE_IF_COPY(arg2, 1);
1988 PG_RETURN_INT32(cmp);
1992 * appendStringInfoText
1994 * Append a text to str.
1995 * Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
1998 appendStringInfoText(StringInfo str, const text *t)
2000 appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
2005 * replace all occurrences of 'old_sub_str' in 'orig_str'
2006 * with 'new_sub_str' to form 'new_str'
2008 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2009 * otherwise returns 'new_str'
2012 replace_text(PG_FUNCTION_ARGS)
2014 text *src_text = PG_GETARG_TEXT_P(0);
2015 text *from_sub_text = PG_GETARG_TEXT_P(1);
2016 text *to_sub_text = PG_GETARG_TEXT_P(2);
2017 int src_text_len = TEXTLEN(src_text);
2018 int from_sub_text_len = TEXTLEN(from_sub_text);
2026 if (src_text_len == 0 || from_sub_text_len == 0)
2027 PG_RETURN_TEXT_P(src_text);
2029 curr_posn = TEXTPOS(src_text, from_sub_text);
2031 /* When the from_sub_text is not found, there is nothing to do. */
2033 PG_RETURN_TEXT_P(src_text);
2035 str = makeStringInfo();
2036 buf_text = src_text;
2038 while (curr_posn > 0)
2040 left_text = text_substring(PointerGetDatum(buf_text),
2041 1, curr_posn - 1, false);
2042 right_text = text_substring(PointerGetDatum(buf_text),
2043 curr_posn + from_sub_text_len, -1, true);
2045 appendStringInfoText(str, left_text);
2046 appendStringInfoText(str, to_sub_text);
2048 if (buf_text != src_text)
2051 buf_text = right_text;
2052 curr_posn = TEXTPOS(buf_text, from_sub_text);
2055 appendStringInfoText(str, buf_text);
2056 if (buf_text != src_text)
2059 ret_text = PG_STR_GET_TEXT(str->data);
2063 PG_RETURN_TEXT_P(ret_text);
2067 * check_replace_text_has_escape_char
2069 * check whether replace_text contains escape char.
2072 check_replace_text_has_escape_char(const text *replace_text)
2074 const char *p = VARDATA(replace_text);
2075 const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2077 if (pg_database_encoding_max_length() == 1)
2079 for (; p < p_end; p++)
2087 for (; p < p_end; p += pg_mblen(p))
2098 * appendStringInfoRegexpSubstr
2100 * Append replace_text to str, substituting regexp back references for
2104 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2105 regmatch_t *pmatch, text *src_text)
2107 const char *p = VARDATA(replace_text);
2108 const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2109 int eml = pg_database_encoding_max_length();
2113 const char *chunk_start = p;
2117 /* Find next escape char. */
2120 for (; p < p_end && *p != '\\'; p++)
2125 for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2129 /* Copy the text we just scanned over, if any. */
2130 if (p > chunk_start)
2131 appendBinaryStringInfo(str, chunk_start, p - chunk_start);
2133 /* Done if at end of string, else advance over escape char. */
2140 /* Escape at very end of input. Treat same as unexpected char */
2141 appendStringInfoChar(str, '\\');
2145 if (*p >= '1' && *p <= '9')
2147 /* Use the back reference of regexp. */
2150 so = pmatch[idx].rm_so;
2151 eo = pmatch[idx].rm_eo;
2156 /* Use the entire matched string. */
2157 so = pmatch[0].rm_so;
2158 eo = pmatch[0].rm_eo;
2161 else if (*p == '\\')
2163 /* \\ means transfer one \ to output. */
2164 appendStringInfoChar(str, '\\');
2171 * If escape char is not followed by any expected char, just treat
2172 * it as ordinary data to copy. (XXX would it be better to throw
2175 appendStringInfoChar(str, '\\');
2179 if (so != -1 && eo != -1)
2182 * Copy the text that is back reference of regexp. Because so and
2183 * eo are counted in characters not bytes, it's easiest to use
2184 * text_substring to pull out the correct chunk of text.
2188 append_text = text_substring(PointerGetDatum(src_text),
2189 so + 1, (eo - so), false);
2190 appendStringInfoText(str, append_text);
2196 #define REGEXP_REPLACE_BACKREF_CNT 10
2199 * replace_text_regexp
2201 * replace text that matches to regexp in src_text to replace_text.
2203 * Note: to avoid having to include regex.h in builtins.h, we declare
2204 * the regexp argument as void *, but really it's regex_t *.
2207 replace_text_regexp(text *src_text, void *regexp,
2208 text *replace_text, bool glob)
2211 regex_t *re = (regex_t *) regexp;
2212 int src_text_len = VARSIZE(src_text) - VARHDRSZ;
2213 StringInfo str = makeStringInfo();
2215 regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
2222 /* Convert data string to wide characters. */
2223 data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2224 data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
2226 /* Check whether replace_text has escape char. */
2227 have_escape = check_replace_text_has_escape_char(replace_text);
2229 for (search_start = data_pos = 0; search_start <= data_len;)
2231 regexec_result = pg_regexec(re,
2235 NULL, /* no details */
2236 REGEXP_REPLACE_BACKREF_CNT,
2240 if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
2245 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2247 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2248 errmsg("regular expression failed: %s", errMsg)));
2251 if (regexec_result == REG_NOMATCH)
2255 * Copy the text to the left of the match position. Because we are
2256 * working with character not byte indexes, it's easiest to use
2257 * text_substring to pull out the needed data.
2259 if (pmatch[0].rm_so - data_pos > 0)
2263 left_text = text_substring(PointerGetDatum(src_text),
2265 pmatch[0].rm_so - data_pos,
2267 appendStringInfoText(str, left_text);
2272 * Copy the replace_text. Process back references when the
2273 * replace_text has escape characters.
2276 appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
2278 appendStringInfoText(str, replace_text);
2280 search_start = data_pos = pmatch[0].rm_eo;
2283 * When global option is off, replace the first instance only.
2289 * Search from next character when the matching text is zero width.
2291 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2296 * Copy the text to the right of the last match.
2298 if (data_pos < data_len)
2302 right_text = text_substring(PointerGetDatum(src_text),
2303 data_pos + 1, -1, true);
2304 appendStringInfoText(str, right_text);
2308 ret_text = PG_STR_GET_TEXT(str->data);
2318 * parse input string
2319 * return ord item (1 based)
2320 * based on provided field separator
2323 split_text(PG_FUNCTION_ARGS)
2325 text *inputstring = PG_GETARG_TEXT_P(0);
2326 text *fldsep = PG_GETARG_TEXT_P(1);
2327 int fldnum = PG_GETARG_INT32(2);
2328 int inputstring_len = TEXTLEN(inputstring);
2329 int fldsep_len = TEXTLEN(fldsep);
2334 /* field number is 1 based */
2337 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2338 errmsg("field position must be greater than zero")));
2340 /* return empty string for empty input string */
2341 if (inputstring_len < 1)
2342 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2344 /* empty field separator */
2347 /* if first field, return input string, else empty string */
2349 PG_RETURN_TEXT_P(inputstring);
2351 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2354 start_posn = text_position(inputstring, fldsep, fldnum - 1);
2355 end_posn = text_position(inputstring, fldsep, fldnum);
2357 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2359 /* if first field, return input string, else empty string */
2361 PG_RETURN_TEXT_P(inputstring);
2363 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2365 else if (start_posn == 0)
2367 /* first field requested */
2368 result_text = LEFT(inputstring, fldsep);
2369 PG_RETURN_TEXT_P(result_text);
2371 else if (end_posn == 0)
2373 /* last field requested */
2374 result_text = text_substring(PointerGetDatum(inputstring),
2375 start_posn + fldsep_len,
2377 PG_RETURN_TEXT_P(result_text);
2381 /* interior field requested */
2382 result_text = text_substring(PointerGetDatum(inputstring),
2383 start_posn + fldsep_len,
2384 end_posn - start_posn - fldsep_len,
2386 PG_RETURN_TEXT_P(result_text);
2392 * parse input string
2393 * return text array of elements
2394 * based on provided field separator
2397 text_to_array(PG_FUNCTION_ARGS)
2399 text *inputstring = PG_GETARG_TEXT_P(0);
2400 text *fldsep = PG_GETARG_TEXT_P(1);
2401 int inputstring_len = TEXTLEN(inputstring);
2402 int fldsep_len = TEXTLEN(fldsep);
2407 ArrayBuildState *astate = NULL;
2409 /* return NULL for empty input string */
2410 if (inputstring_len < 1)
2414 * empty field separator return one element, 1D, array using the input
2418 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2419 CStringGetDatum(inputstring), 1));
2421 /* start with end position holding the initial start position */
2423 for (fldnum = 1;; fldnum++) /* field number is 1 based */
2426 bool disnull = false;
2428 start_posn = end_posn;
2429 end_posn = text_position(inputstring, fldsep, fldnum);
2431 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2436 * first element return one element, 1D, array using the input
2439 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2440 CStringGetDatum(inputstring), 1));
2444 /* otherwise create array and exit */
2445 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2446 CurrentMemoryContext));
2449 else if (start_posn == 0)
2451 /* first field requested */
2452 result_text = LEFT(inputstring, fldsep);
2454 else if (end_posn == 0)
2456 /* last field requested */
2457 result_text = text_substring(PointerGetDatum(inputstring),
2458 start_posn + fldsep_len,
2463 /* interior field requested */
2464 result_text = text_substring(PointerGetDatum(inputstring),
2465 start_posn + fldsep_len,
2466 end_posn - start_posn - fldsep_len,
2470 /* stash away current value */
2471 dvalue = PointerGetDatum(result_text);
2472 astate = accumArrayResult(astate, dvalue,
2474 CurrentMemoryContext);
2477 /* never reached -- keep compiler quiet */
2483 * concatenate Cstring representation of input array elements
2484 * using provided field separator
2487 array_to_text(PG_FUNCTION_ARGS)
2489 ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
2490 char *fldsep = PG_TEXTARG_GET_STR(1);
2498 StringInfo result_str = makeStringInfo();
2499 bool printed = false;
2504 ArrayMetaState *my_extra;
2506 ndims = ARR_NDIM(v);
2508 nitems = ArrayGetNItems(ndims, dims);
2510 /* if there are no elements, return an empty string */
2512 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2514 element_type = ARR_ELEMTYPE(v);
2517 * We arrange to look up info about element type, including its output
2518 * conversion proc, only once per series of calls, assuming the element
2519 * type doesn't change underneath us.
2521 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2522 if (my_extra == NULL)
2524 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2525 sizeof(ArrayMetaState));
2526 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2527 my_extra->element_type = ~element_type;
2530 if (my_extra->element_type != element_type)
2533 * Get info about element type, including its output conversion proc
2535 get_type_io_data(element_type, IOFunc_output,
2536 &my_extra->typlen, &my_extra->typbyval,
2537 &my_extra->typalign, &my_extra->typdelim,
2538 &my_extra->typioparam, &my_extra->typiofunc);
2539 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2540 fcinfo->flinfo->fn_mcxt);
2541 my_extra->element_type = element_type;
2543 typlen = my_extra->typlen;
2544 typbyval = my_extra->typbyval;
2545 typalign = my_extra->typalign;
2547 p = ARR_DATA_PTR(v);
2548 bitmap = ARR_NULLBITMAP(v);
2551 for (i = 0; i < nitems; i++)
2556 /* Get source element, checking for NULL */
2557 if (bitmap && (*bitmap & bitmask) == 0)
2559 /* we ignore nulls */
2563 itemvalue = fetch_att(p, typbyval, typlen);
2565 value = DatumGetCString(FunctionCall1(&my_extra->proc,
2569 appendStringInfo(result_str, "%s%s", fldsep, value);
2571 appendStringInfoString(result_str, value);
2574 p = att_addlength(p, typlen, PointerGetDatum(p));
2575 p = (char *) att_align(p, typalign);
2578 /* advance bitmap pointer if any */
2582 if (bitmask == 0x100)
2590 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2595 * Convert a int32 to a string containing a base 16 (hex) representation of
2599 to_hex32(PG_FUNCTION_ARGS)
2601 uint32 value = (uint32) PG_GETARG_INT32(0);
2604 const char *digits = "0123456789abcdef";
2605 char buf[32]; /* bigger than needed, but reasonable */
2607 ptr = buf + sizeof(buf) - 1;
2612 *--ptr = digits[value % HEXBASE];
2614 } while (ptr > buf && value);
2616 result_text = PG_STR_GET_TEXT(ptr);
2617 PG_RETURN_TEXT_P(result_text);
2621 * Convert a int64 to a string containing a base 16 (hex) representation of
2625 to_hex64(PG_FUNCTION_ARGS)
2627 uint64 value = (uint64) PG_GETARG_INT64(0);
2630 const char *digits = "0123456789abcdef";
2631 char buf[32]; /* bigger than needed, but reasonable */
2633 ptr = buf + sizeof(buf) - 1;
2638 *--ptr = digits[value % HEXBASE];
2640 } while (ptr > buf && value);
2642 result_text = PG_STR_GET_TEXT(ptr);
2643 PG_RETURN_TEXT_P(result_text);
2647 * Create an md5 hash of a text string and return it as hex
2649 * md5 produces a 16 byte (128 bit) hash; double it for hex
2651 #define MD5_HASH_LEN 32
2654 md5_text(PG_FUNCTION_ARGS)
2656 text *in_text = PG_GETARG_TEXT_P(0);
2658 char hexsum[MD5_HASH_LEN + 1];
2661 /* Calculate the length of the buffer using varlena metadata */
2662 len = VARSIZE(in_text) - VARHDRSZ;
2664 /* get the hash result */
2665 if (pg_md5_hash(VARDATA(in_text), len, hexsum) == false)
2667 (errcode(ERRCODE_OUT_OF_MEMORY),
2668 errmsg("out of memory")));
2670 /* convert to text and return it */
2671 result_text = PG_STR_GET_TEXT(hexsum);
2672 PG_RETURN_TEXT_P(result_text);
2676 * Create an md5 hash of a bytea field and return it as a hex string:
2677 * 16-byte md5 digest is represented in 32 hex characters.
2680 md5_bytea(PG_FUNCTION_ARGS)
2682 bytea *in = PG_GETARG_BYTEA_P(0);
2684 char hexsum[MD5_HASH_LEN + 1];
2687 len = VARSIZE(in) - VARHDRSZ;
2688 if (pg_md5_hash(VARDATA(in), len, hexsum) == false)
2690 (errcode(ERRCODE_OUT_OF_MEMORY),
2691 errmsg("out of memory")));
2693 result_text = PG_STR_GET_TEXT(hexsum);
2694 PG_RETURN_TEXT_P(result_text);
2698 * Return the size of a datum, possibly compressed
2700 * Works on any data type
2703 pg_column_size(PG_FUNCTION_ARGS)
2705 Datum value = PG_GETARG_DATUM(0);
2709 /* On first call, get the input type's typlen, and save at *fn_extra */
2710 if (fcinfo->flinfo->fn_extra == NULL)
2712 /* Lookup the datatype of the supplied argument */
2713 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
2715 typlen = get_typlen(argtypeid);
2716 if (typlen == 0) /* should not happen */
2717 elog(ERROR, "cache lookup failed for type %u", argtypeid);
2719 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2721 *((int *) fcinfo->flinfo->fn_extra) = typlen;
2724 typlen = *((int *) fcinfo->flinfo->fn_extra);
2728 /* varlena type, possibly toasted */
2729 result = toast_datum_size(value);
2731 else if (typlen == -2)
2734 result = strlen(DatumGetCString(value)) + 1;
2738 /* ordinary fixed-width type */
2742 PG_RETURN_INT32(result);