1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.129 2005/07/21 04:41:43 momjian Exp $
13 *-------------------------------------------------------------------------
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_type.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/crypt.h"
23 #include "libpq/pqformat.h"
24 #include "mb/pg_wchar.h"
25 #include "miscadmin.h"
26 #include "parser/scansup.h"
27 #include "utils/array.h"
28 #include "utils/builtins.h"
29 #include "utils/lsyscache.h"
30 #include "utils/pg_locale.h"
31 #include "regex/regex.h"
34 typedef struct varlena unknown;
36 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
37 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
38 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
39 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
40 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
42 #define PG_TEXTARG_GET_STR(arg_) \
43 DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
44 #define PG_TEXT_GET_STR(textp_) \
45 DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
46 #define PG_STR_GET_TEXT(str_) \
47 DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
48 #define TEXTLEN(textp) \
49 text_length(PointerGetDatum(textp))
50 #define TEXTPOS(buf_text, from_sub_text) \
51 text_position(buf_text, from_sub_text, 1)
52 #define LEFT(buf_text, from_sub_text) \
53 text_substring(PointerGetDatum(buf_text), \
55 TEXTPOS(buf_text, from_sub_text) - 1, false)
57 static int text_cmp(text *arg1, text *arg2);
58 static int32 text_length(Datum str);
59 static int32 text_position(text *t1, text *t2, int matchnum);
60 static text *text_substring(Datum str,
63 bool length_not_specified);
65 static void appendStringInfoText(StringInfo str, const text *t);
68 /*****************************************************************************
70 *****************************************************************************/
73 #define VAL(CH) ((CH) - '0')
74 #define DIG(VAL) ((VAL) + '0')
77 * byteain - converts from printable representation of byte array
79 * Non-printable characters must be passed as '\nnn' (octal) and are
80 * converted to internal form. '\' must be passed as '\\'.
81 * ereport(ERROR, ...) if bad form.
84 * The input is scaned twice.
85 * The error checking of input is minimal.
88 byteain(PG_FUNCTION_ARGS)
90 char *inputText = PG_GETARG_CSTRING(0);
96 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
100 else if ((tp[0] == '\\') &&
101 (tp[1] >= '0' && tp[1] <= '3') &&
102 (tp[2] >= '0' && tp[2] <= '7') &&
103 (tp[3] >= '0' && tp[3] <= '7'))
105 else if ((tp[0] == '\\') &&
111 * one backslash, not followed by 0 or ### valid octal
114 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
115 errmsg("invalid input syntax for type bytea")));
120 result = (bytea *) palloc(byte);
121 VARATT_SIZEP(result) = byte; /* set varlena length */
124 rp = VARDATA(result);
129 else if ((tp[0] == '\\') &&
130 (tp[1] >= '0' && tp[1] <= '3') &&
131 (tp[2] >= '0' && tp[2] <= '7') &&
132 (tp[3] >= '0' && tp[3] <= '7'))
138 *rp++ = byte + VAL(tp[3]);
141 else if ((tp[0] == '\\') &&
150 * We should never get here. The first pass should not allow
154 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
155 errmsg("invalid input syntax for type bytea")));
159 PG_RETURN_BYTEA_P(result);
163 * byteaout - converts to printable representation of byte array
165 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
168 * NULL vlena should be an error--returning string with NULL for now.
171 byteaout(PG_FUNCTION_ARGS)
173 bytea *vlena = PG_GETARG_BYTEA_P(0);
177 int val; /* holds unprintable chars */
181 len = 1; /* empty string has 1 char */
183 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
187 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
192 rp = result = (char *) palloc(len);
194 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
201 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
205 rp[3] = DIG(val & 07);
207 rp[2] = DIG(val & 07);
209 rp[1] = DIG(val & 03);
216 PG_RETURN_CSTRING(result);
220 * bytearecv - converts external binary format to bytea
223 bytearecv(PG_FUNCTION_ARGS)
225 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
229 nbytes = buf->len - buf->cursor;
230 result = (bytea *) palloc(nbytes + VARHDRSZ);
231 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
232 pq_copymsgbytes(buf, VARDATA(result), nbytes);
233 PG_RETURN_BYTEA_P(result);
237 * byteasend - converts bytea to binary format
239 * This is a special case: just copy the input...
242 byteasend(PG_FUNCTION_ARGS)
244 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
246 PG_RETURN_BYTEA_P(vlena);
251 * textin - converts "..." to internal representation
254 textin(PG_FUNCTION_ARGS)
256 char *inputText = PG_GETARG_CSTRING(0);
260 /* verify encoding */
261 len = strlen(inputText);
262 pg_verifymbstr(inputText, len, false);
264 result = (text *) palloc(len + VARHDRSZ);
265 VARATT_SIZEP(result) = len + VARHDRSZ;
267 memcpy(VARDATA(result), inputText, len);
269 PG_RETURN_TEXT_P(result);
273 * textout - converts internal representation to "..."
276 textout(PG_FUNCTION_ARGS)
278 text *t = PG_GETARG_TEXT_P(0);
282 len = VARSIZE(t) - VARHDRSZ;
283 result = (char *) palloc(len + 1);
284 memcpy(result, VARDATA(t), len);
287 PG_RETURN_CSTRING(result);
291 * textrecv - converts external binary format to text
294 textrecv(PG_FUNCTION_ARGS)
296 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
301 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
303 /* verify encoding */
304 pg_verifymbstr(str, nbytes, false);
306 result = (text *) palloc(nbytes + VARHDRSZ);
307 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
308 memcpy(VARDATA(result), str, nbytes);
310 PG_RETURN_TEXT_P(result);
314 * textsend - converts text to binary format
317 textsend(PG_FUNCTION_ARGS)
319 text *t = PG_GETARG_TEXT_P(0);
322 pq_begintypsend(&buf);
323 pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
324 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
329 * unknownin - converts "..." to internal representation
332 unknownin(PG_FUNCTION_ARGS)
334 char *str = PG_GETARG_CSTRING(0);
336 /* representation is same as cstring */
337 PG_RETURN_CSTRING(pstrdup(str));
341 * unknownout - converts internal representation to "..."
344 unknownout(PG_FUNCTION_ARGS)
346 /* representation is same as cstring */
347 char *str = PG_GETARG_CSTRING(0);
349 PG_RETURN_CSTRING(pstrdup(str));
353 * unknownrecv - converts external binary format to unknown
356 unknownrecv(PG_FUNCTION_ARGS)
358 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
362 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
363 /* representation is same as cstring */
364 PG_RETURN_CSTRING(str);
368 * unknownsend - converts unknown to binary format
371 unknownsend(PG_FUNCTION_ARGS)
373 /* representation is same as cstring */
374 char *str = PG_GETARG_CSTRING(0);
377 pq_begintypsend(&buf);
378 pq_sendtext(&buf, str, strlen(str));
379 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
383 /* ========== PUBLIC ROUTINES ========== */
387 * returns the logical length of a text*
388 * (which is less than the VARSIZE of the text*)
391 textlen(PG_FUNCTION_ARGS)
393 Datum str = PG_GETARG_DATUM(0);
395 /* try to avoid decompressing argument */
396 PG_RETURN_INT32(text_length(str));
401 * Does the real work for textlen()
403 * This is broken out so it can be called directly by other string processing
404 * functions. Note that the argument is passed as a Datum, to indicate that
405 * it may still be in compressed form. We can avoid decompressing it at all
409 text_length(Datum str)
411 /* fastpath when max encoding length is one */
412 if (pg_database_encoding_max_length() == 1)
413 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
416 text *t = DatumGetTextP(str);
418 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
419 VARSIZE(t) - VARHDRSZ));
425 * returns the physical length of a text*
426 * (which is less than the VARSIZE of the text*)
429 textoctetlen(PG_FUNCTION_ARGS)
431 Datum str = PG_GETARG_DATUM(0);
433 /* We need not detoast the input at all */
434 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
439 * takes two text* and returns a text* that is the concatenation of
442 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
443 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
444 * Allocate space for output in all cases.
445 * XXX - thomas 1997-07-10
448 textcat(PG_FUNCTION_ARGS)
450 text *t1 = PG_GETARG_TEXT_P(0);
451 text *t2 = PG_GETARG_TEXT_P(1);
458 len1 = VARSIZE(t1) - VARHDRSZ;
462 len2 = VARSIZE(t2) - VARHDRSZ;
466 len = len1 + len2 + VARHDRSZ;
467 result = (text *) palloc(len);
469 /* Set size of result string... */
470 VARATT_SIZEP(result) = len;
472 /* Fill data field of result string... */
473 ptr = VARDATA(result);
475 memcpy(ptr, VARDATA(t1), len1);
477 memcpy(ptr + len1, VARDATA(t2), len2);
479 PG_RETURN_TEXT_P(result);
484 * Return a substring starting at the specified position.
485 * - thomas 1997-12-31
489 * - starting position (is one-based)
492 * If the starting position is zero or less, then return from the start of the string
493 * adjusting the length to be consistent with the "negative start" per SQL92.
494 * If the length is less than zero, return the remaining string.
496 * Added multibyte support.
497 * - Tatsuo Ishii 1998-4-21
498 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
499 * Formerly returned the entire string; now returns a portion.
500 * - Thomas Lockhart 1998-12-10
501 * Now uses faster TOAST-slicing interface
502 * - John Gray 2002-02-22
503 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
504 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
505 * error; if E < 1, return '', not entire string). Fixed MB related bug when
506 * S > LC and < LC + 4 sometimes garbage characters are returned.
507 * - Joe Conway 2002-08-10
510 text_substr(PG_FUNCTION_ARGS)
512 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
519 * text_substr_no_len -
520 * Wrapper to avoid opr_sanity failure due to
521 * one function accepting a different number of args.
524 text_substr_no_len(PG_FUNCTION_ARGS)
526 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
533 * Does the real work for text_substr() and text_substr_no_len()
535 * This is broken out so it can be called directly by other string processing
536 * functions. Note that the argument is passed as a Datum, to indicate that
537 * it may still be in compressed/toasted form. We can avoid detoasting all
538 * of it in some cases.
541 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
543 int32 eml = pg_database_encoding_max_length();
544 int32 S = start; /* start position */
545 int32 S1; /* adjusted start position */
546 int32 L1; /* adjusted substring length */
548 /* life is easy if the encoding max length is 1 */
553 if (length_not_specified) /* special case - get length to
562 * A negative value for L is the only way for the end position
563 * to be before the start. SQL99 says to throw an error.
567 (errcode(ERRCODE_SUBSTRING_ERROR),
568 errmsg("negative substring length not allowed")));
571 * A zero or negative value for the end position can happen if
572 * the start was negative or one. SQL99 says to return a
573 * zero-length string.
576 return PG_STR_GET_TEXT("");
582 * If the start position is past the end of the string, SQL99 says
583 * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
584 * do that for us. Convert to zero-based starting position
586 return DatumGetTextPSlice(str, S1 - 1, L1);
591 * When encoding max length is > 1, we can't get LC without
592 * detoasting, so we'll grab a conservatively large slice now and
593 * go back later to do the right thing
606 * if S is past the end of the string, the tuple toaster will
607 * return a zero-length string to us
612 * We need to start at position zero because there is no way to
613 * know in advance which byte offset corresponds to the supplied
618 if (length_not_specified) /* special case - get length to
620 slice_size = L1 = -1;
626 * A negative value for L is the only way for the end position
627 * to be before the start. SQL99 says to throw an error.
631 (errcode(ERRCODE_SUBSTRING_ERROR),
632 errmsg("negative substring length not allowed")));
635 * A zero or negative value for the end position can happen if
636 * the start was negative or one. SQL99 says to return a
637 * zero-length string.
640 return PG_STR_GET_TEXT("");
643 * if E is past the end of the string, the tuple toaster will
644 * truncate the length for us
649 * Total slice size in bytes can't be any longer than the
650 * start position plus substring length times the encoding max
653 slice_size = (S1 + L1) * eml;
655 slice = DatumGetTextPSlice(str, slice_start, slice_size);
657 /* see if we got back an empty string */
658 if ((VARSIZE(slice) - VARHDRSZ) == 0)
659 return PG_STR_GET_TEXT("");
661 /* Now we can get the actual length of the slice in MB characters */
662 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
665 * Check that the start position wasn't > slice_strlen. If so,
666 * SQL99 says to return a zero-length string.
668 if (S1 > slice_strlen)
669 return PG_STR_GET_TEXT("");
672 * Adjust L1 and E1 now that we know the slice string length.
673 * Again remember that S1 is one based, and slice_start is zero
677 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
679 E1 = slice_start + 1 + slice_strlen;
682 * Find the start position in the slice; remember S1 is not zero
686 for (i = 0; i < S1 - 1; i++)
689 /* hang onto a pointer to our start position */
693 * Count the actual bytes used by the substring of the requested
696 for (i = S1; i < E1; i++)
699 ret = (text *) palloc(VARHDRSZ + (p - s));
700 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
701 memcpy(VARDATA(ret), s, (p - s));
706 elog(ERROR, "invalid backend encoding: encoding max length < 1");
708 /* not reached: suppress compiler warning */
714 * Return the position of the specified substring.
715 * Implements the SQL92 POSITION() function.
716 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
717 * - thomas 1997-07-27
720 textpos(PG_FUNCTION_ARGS)
722 text *str = PG_GETARG_TEXT_P(0);
723 text *search_str = PG_GETARG_TEXT_P(1);
725 PG_RETURN_INT32(text_position(str, search_str, 1));
730 * Does the real work for textpos()
733 * t1 - string to be searched
734 * t2 - pattern to match within t1
735 * matchnum - number of the match to be found (1 is the first match)
737 * Character index of the first matched char, starting from 1,
740 * This is broken out so it can be called directly by other string processing
744 text_position(text *t1, text *t2, int matchnum)
754 return 0; /* result for 0th match */
756 if (VARSIZE(t2) <= VARHDRSZ)
757 return 1; /* result for empty pattern */
759 len1 = VARSIZE(t1) - VARHDRSZ;
760 len2 = VARSIZE(t2) - VARHDRSZ;
762 if (pg_database_encoding_max_length() == 1)
764 /* simple case - single byte encoding */
771 /* no use in searching str past point where search_str will fit */
774 for (p = 0; p <= px; p++)
776 if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
778 if (++match == matchnum)
789 /* not as simple - multibyte encoding */
795 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
796 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
797 len1 = pg_wchar_strlen(p1);
798 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
799 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
800 len2 = pg_wchar_strlen(p2);
802 /* no use in searching str past point where search_str will fit */
805 for (p = 0; p <= px; p++)
807 if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
809 if (++match == matchnum)
826 * Comparison function for text strings with given lengths.
827 * Includes locale support, but must copy strings to temporary memory
828 * to allow null-termination for inputs to strcoll().
832 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
837 * Unfortunately, there is no strncoll(), so in the non-C locale case
838 * we have to do some memory copying. This turns out to be
839 * significantly slower, so we optimize the case where LC_COLLATE is
840 * C. We also try to optimize relatively-short strings by avoiding
841 * palloc/pfree overhead.
843 #define STACKBUFLEN 1024
845 if (!lc_collate_is_c())
847 char a1buf[STACKBUFLEN];
848 char a2buf[STACKBUFLEN];
852 if (len1 >= STACKBUFLEN)
853 a1p = (char *) palloc(len1 + 1);
856 if (len2 >= STACKBUFLEN)
857 a2p = (char *) palloc(len2 + 1);
861 memcpy(a1p, arg1, len1);
863 memcpy(a2p, arg2, len2);
866 result = strcoll(a1p, a2p);
868 if (len1 >= STACKBUFLEN)
870 if (len2 >= STACKBUFLEN)
875 result = strncmp(arg1, arg2, Min(len1, len2));
876 if ((result == 0) && (len1 != len2))
877 result = (len1 < len2) ? -1 : 1;
885 * Internal comparison function for text strings.
889 text_cmp(text *arg1, text *arg2)
899 len1 = VARSIZE(arg1) - VARHDRSZ;
900 len2 = VARSIZE(arg2) - VARHDRSZ;
902 return varstr_cmp(a1p, len1, a2p, len2);
906 * Comparison functions for text strings.
908 * Note: btree indexes need these routines not to leak memory; therefore,
909 * be careful to free working copies of toasted datums. Most places don't
910 * need to be so careful.
914 texteq(PG_FUNCTION_ARGS)
916 text *arg1 = PG_GETARG_TEXT_P(0);
917 text *arg2 = PG_GETARG_TEXT_P(1);
920 /* fast path for different-length inputs */
921 if (VARSIZE(arg1) != VARSIZE(arg2))
924 result = (text_cmp(arg1, arg2) == 0);
926 PG_FREE_IF_COPY(arg1, 0);
927 PG_FREE_IF_COPY(arg2, 1);
929 PG_RETURN_BOOL(result);
933 textne(PG_FUNCTION_ARGS)
935 text *arg1 = PG_GETARG_TEXT_P(0);
936 text *arg2 = PG_GETARG_TEXT_P(1);
939 /* fast path for different-length inputs */
940 if (VARSIZE(arg1) != VARSIZE(arg2))
943 result = (text_cmp(arg1, arg2) != 0);
945 PG_FREE_IF_COPY(arg1, 0);
946 PG_FREE_IF_COPY(arg2, 1);
948 PG_RETURN_BOOL(result);
952 text_lt(PG_FUNCTION_ARGS)
954 text *arg1 = PG_GETARG_TEXT_P(0);
955 text *arg2 = PG_GETARG_TEXT_P(1);
958 result = (text_cmp(arg1, arg2) < 0);
960 PG_FREE_IF_COPY(arg1, 0);
961 PG_FREE_IF_COPY(arg2, 1);
963 PG_RETURN_BOOL(result);
967 text_le(PG_FUNCTION_ARGS)
969 text *arg1 = PG_GETARG_TEXT_P(0);
970 text *arg2 = PG_GETARG_TEXT_P(1);
973 result = (text_cmp(arg1, arg2) <= 0);
975 PG_FREE_IF_COPY(arg1, 0);
976 PG_FREE_IF_COPY(arg2, 1);
978 PG_RETURN_BOOL(result);
982 text_gt(PG_FUNCTION_ARGS)
984 text *arg1 = PG_GETARG_TEXT_P(0);
985 text *arg2 = PG_GETARG_TEXT_P(1);
988 result = (text_cmp(arg1, arg2) > 0);
990 PG_FREE_IF_COPY(arg1, 0);
991 PG_FREE_IF_COPY(arg2, 1);
993 PG_RETURN_BOOL(result);
997 text_ge(PG_FUNCTION_ARGS)
999 text *arg1 = PG_GETARG_TEXT_P(0);
1000 text *arg2 = PG_GETARG_TEXT_P(1);
1003 result = (text_cmp(arg1, arg2) >= 0);
1005 PG_FREE_IF_COPY(arg1, 0);
1006 PG_FREE_IF_COPY(arg2, 1);
1008 PG_RETURN_BOOL(result);
1012 bttextcmp(PG_FUNCTION_ARGS)
1014 text *arg1 = PG_GETARG_TEXT_P(0);
1015 text *arg2 = PG_GETARG_TEXT_P(1);
1018 result = text_cmp(arg1, arg2);
1020 PG_FREE_IF_COPY(arg1, 0);
1021 PG_FREE_IF_COPY(arg2, 1);
1023 PG_RETURN_INT32(result);
1028 text_larger(PG_FUNCTION_ARGS)
1030 text *arg1 = PG_GETARG_TEXT_P(0);
1031 text *arg2 = PG_GETARG_TEXT_P(1);
1034 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1036 PG_RETURN_TEXT_P(result);
1040 text_smaller(PG_FUNCTION_ARGS)
1042 text *arg1 = PG_GETARG_TEXT_P(0);
1043 text *arg2 = PG_GETARG_TEXT_P(1);
1046 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1048 PG_RETURN_TEXT_P(result);
1053 * The following operators support character-by-character comparison
1054 * of text data types, to allow building indexes suitable for LIKE
1059 internal_text_pattern_compare(text *arg1, text *arg2)
1063 result = memcmp(VARDATA(arg1), VARDATA(arg2),
1064 Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1067 else if (VARSIZE(arg1) < VARSIZE(arg2))
1069 else if (VARSIZE(arg1) > VARSIZE(arg2))
1077 text_pattern_lt(PG_FUNCTION_ARGS)
1079 text *arg1 = PG_GETARG_TEXT_P(0);
1080 text *arg2 = PG_GETARG_TEXT_P(1);
1083 result = internal_text_pattern_compare(arg1, arg2);
1085 PG_FREE_IF_COPY(arg1, 0);
1086 PG_FREE_IF_COPY(arg2, 1);
1088 PG_RETURN_BOOL(result < 0);
1093 text_pattern_le(PG_FUNCTION_ARGS)
1095 text *arg1 = PG_GETARG_TEXT_P(0);
1096 text *arg2 = PG_GETARG_TEXT_P(1);
1099 result = internal_text_pattern_compare(arg1, arg2);
1101 PG_FREE_IF_COPY(arg1, 0);
1102 PG_FREE_IF_COPY(arg2, 1);
1104 PG_RETURN_BOOL(result <= 0);
1109 text_pattern_eq(PG_FUNCTION_ARGS)
1111 text *arg1 = PG_GETARG_TEXT_P(0);
1112 text *arg2 = PG_GETARG_TEXT_P(1);
1115 if (VARSIZE(arg1) != VARSIZE(arg2))
1118 result = internal_text_pattern_compare(arg1, arg2);
1120 PG_FREE_IF_COPY(arg1, 0);
1121 PG_FREE_IF_COPY(arg2, 1);
1123 PG_RETURN_BOOL(result == 0);
1128 text_pattern_ge(PG_FUNCTION_ARGS)
1130 text *arg1 = PG_GETARG_TEXT_P(0);
1131 text *arg2 = PG_GETARG_TEXT_P(1);
1134 result = internal_text_pattern_compare(arg1, arg2);
1136 PG_FREE_IF_COPY(arg1, 0);
1137 PG_FREE_IF_COPY(arg2, 1);
1139 PG_RETURN_BOOL(result >= 0);
1144 text_pattern_gt(PG_FUNCTION_ARGS)
1146 text *arg1 = PG_GETARG_TEXT_P(0);
1147 text *arg2 = PG_GETARG_TEXT_P(1);
1150 result = internal_text_pattern_compare(arg1, arg2);
1152 PG_FREE_IF_COPY(arg1, 0);
1153 PG_FREE_IF_COPY(arg2, 1);
1155 PG_RETURN_BOOL(result > 0);
1160 text_pattern_ne(PG_FUNCTION_ARGS)
1162 text *arg1 = PG_GETARG_TEXT_P(0);
1163 text *arg2 = PG_GETARG_TEXT_P(1);
1166 if (VARSIZE(arg1) != VARSIZE(arg2))
1169 result = internal_text_pattern_compare(arg1, arg2);
1171 PG_FREE_IF_COPY(arg1, 0);
1172 PG_FREE_IF_COPY(arg2, 1);
1174 PG_RETURN_BOOL(result != 0);
1179 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1181 text *arg1 = PG_GETARG_TEXT_P(0);
1182 text *arg2 = PG_GETARG_TEXT_P(1);
1185 result = internal_text_pattern_compare(arg1, arg2);
1187 PG_FREE_IF_COPY(arg1, 0);
1188 PG_FREE_IF_COPY(arg2, 1);
1190 PG_RETURN_INT32(result);
1194 /*-------------------------------------------------------------
1197 * get the number of bytes contained in an instance of type 'bytea'
1198 *-------------------------------------------------------------
1201 byteaoctetlen(PG_FUNCTION_ARGS)
1203 Datum str = PG_GETARG_DATUM(0);
1205 /* We need not detoast the input at all */
1206 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1211 * takes two bytea* and returns a bytea* that is the concatenation of
1214 * Cloned from textcat and modified as required.
1217 byteacat(PG_FUNCTION_ARGS)
1219 bytea *t1 = PG_GETARG_BYTEA_P(0);
1220 bytea *t2 = PG_GETARG_BYTEA_P(1);
1227 len1 = VARSIZE(t1) - VARHDRSZ;
1231 len2 = VARSIZE(t2) - VARHDRSZ;
1235 len = len1 + len2 + VARHDRSZ;
1236 result = (bytea *) palloc(len);
1238 /* Set size of result string... */
1239 VARATT_SIZEP(result) = len;
1241 /* Fill data field of result string... */
1242 ptr = VARDATA(result);
1244 memcpy(ptr, VARDATA(t1), len1);
1246 memcpy(ptr + len1, VARDATA(t2), len2);
1248 PG_RETURN_BYTEA_P(result);
1251 #define PG_STR_GET_BYTEA(str_) \
1252 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1255 * Return a substring starting at the specified position.
1256 * Cloned from text_substr and modified as required.
1260 * - starting position (is one-based)
1261 * - string length (optional)
1263 * If the starting position is zero or less, then return from the start of the string
1264 * adjusting the length to be consistent with the "negative start" per SQL92.
1265 * If the length is less than zero, an ERROR is thrown. If no third argument
1266 * (length) is provided, the length to the end of the string is assumed.
1269 bytea_substr(PG_FUNCTION_ARGS)
1271 int S = PG_GETARG_INT32(1); /* start position */
1272 int S1; /* adjusted start position */
1273 int L1; /* adjusted substring length */
1277 if (fcinfo->nargs == 2)
1280 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1281 * everything to the end of the string if we pass it a negative
1289 int E = S + PG_GETARG_INT32(2);
1292 * A negative value for L is the only way for the end position to
1293 * be before the start. SQL99 says to throw an error.
1297 (errcode(ERRCODE_SUBSTRING_ERROR),
1298 errmsg("negative substring length not allowed")));
1301 * A zero or negative value for the end position can happen if the
1302 * start was negative or one. SQL99 says to return a zero-length
1306 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1312 * If the start position is past the end of the string, SQL99 says to
1313 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1314 * that for us. Convert to zero-based starting position
1316 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1320 * bytea_substr_no_len -
1321 * Wrapper to avoid opr_sanity failure due to
1322 * one function accepting a different number of args.
1325 bytea_substr_no_len(PG_FUNCTION_ARGS)
1327 return bytea_substr(fcinfo);
1332 * Return the position of the specified substring.
1333 * Implements the SQL92 POSITION() function.
1334 * Cloned from textpos and modified as required.
1337 byteapos(PG_FUNCTION_ARGS)
1339 bytea *t1 = PG_GETARG_BYTEA_P(0);
1340 bytea *t2 = PG_GETARG_BYTEA_P(1);
1349 if (VARSIZE(t2) <= VARHDRSZ)
1350 PG_RETURN_INT32(1); /* result for empty pattern */
1352 len1 = VARSIZE(t1) - VARHDRSZ;
1353 len2 = VARSIZE(t2) - VARHDRSZ;
1360 for (p = 0; p <= px; p++)
1362 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1370 PG_RETURN_INT32(pos);
1373 /*-------------------------------------------------------------
1376 * this routine treats "bytea" as an array of bytes.
1377 * It returns the Nth byte (a number between 0 and 255).
1378 *-------------------------------------------------------------
1381 byteaGetByte(PG_FUNCTION_ARGS)
1383 bytea *v = PG_GETARG_BYTEA_P(0);
1384 int32 n = PG_GETARG_INT32(1);
1388 len = VARSIZE(v) - VARHDRSZ;
1390 if (n < 0 || n >= len)
1392 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1393 errmsg("index %d out of valid range, 0..%d",
1396 byte = ((unsigned char *) VARDATA(v))[n];
1398 PG_RETURN_INT32(byte);
1401 /*-------------------------------------------------------------
1404 * This routine treats a "bytea" type like an array of bits.
1405 * It returns the value of the Nth bit (0 or 1).
1407 *-------------------------------------------------------------
1410 byteaGetBit(PG_FUNCTION_ARGS)
1412 bytea *v = PG_GETARG_BYTEA_P(0);
1413 int32 n = PG_GETARG_INT32(1);
1419 len = VARSIZE(v) - VARHDRSZ;
1421 if (n < 0 || n >= len * 8)
1423 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1424 errmsg("index %d out of valid range, 0..%d",
1430 byte = ((unsigned char *) VARDATA(v))[byteNo];
1432 if (byte & (1 << bitNo))
1438 /*-------------------------------------------------------------
1441 * Given an instance of type 'bytea' creates a new one with
1442 * the Nth byte set to the given value.
1444 *-------------------------------------------------------------
1447 byteaSetByte(PG_FUNCTION_ARGS)
1449 bytea *v = PG_GETARG_BYTEA_P(0);
1450 int32 n = PG_GETARG_INT32(1);
1451 int32 newByte = PG_GETARG_INT32(2);
1455 len = VARSIZE(v) - VARHDRSZ;
1457 if (n < 0 || n >= len)
1459 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1460 errmsg("index %d out of valid range, 0..%d",
1464 * Make a copy of the original varlena.
1466 res = (bytea *) palloc(VARSIZE(v));
1467 memcpy((char *) res, (char *) v, VARSIZE(v));
1472 ((unsigned char *) VARDATA(res))[n] = newByte;
1474 PG_RETURN_BYTEA_P(res);
1477 /*-------------------------------------------------------------
1480 * Given an instance of type 'bytea' creates a new one with
1481 * the Nth bit set to the given value.
1483 *-------------------------------------------------------------
1486 byteaSetBit(PG_FUNCTION_ARGS)
1488 bytea *v = PG_GETARG_BYTEA_P(0);
1489 int32 n = PG_GETARG_INT32(1);
1490 int32 newBit = PG_GETARG_INT32(2);
1498 len = VARSIZE(v) - VARHDRSZ;
1500 if (n < 0 || n >= len * 8)
1502 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1503 errmsg("index %d out of valid range, 0..%d",
1512 if (newBit != 0 && newBit != 1)
1514 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1515 errmsg("new bit must be 0 or 1")));
1518 * Make a copy of the original varlena.
1520 res = (bytea *) palloc(VARSIZE(v));
1521 memcpy((char *) res, (char *) v, VARSIZE(v));
1526 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1529 newByte = oldByte & (~(1 << bitNo));
1531 newByte = oldByte | (1 << bitNo);
1533 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1535 PG_RETURN_BYTEA_P(res);
1540 * Converts a text type to a Name type.
1543 text_name(PG_FUNCTION_ARGS)
1545 text *s = PG_GETARG_TEXT_P(0);
1549 len = VARSIZE(s) - VARHDRSZ;
1551 /* Truncate oversize input */
1552 if (len >= NAMEDATALEN)
1553 len = NAMEDATALEN - 1;
1556 printf("text- convert string length %d (%d) ->%d\n",
1557 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1560 result = (Name) palloc(NAMEDATALEN);
1561 memcpy(NameStr(*result), VARDATA(s), len);
1563 /* now null pad to full length... */
1564 while (len < NAMEDATALEN)
1566 *(NameStr(*result) + len) = '\0';
1570 PG_RETURN_NAME(result);
1574 * Converts a Name type to a text type.
1577 name_text(PG_FUNCTION_ARGS)
1579 Name s = PG_GETARG_NAME(0);
1583 len = strlen(NameStr(*s));
1586 printf("text- convert string length %d (%d) ->%d\n",
1587 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1590 result = palloc(VARHDRSZ + len);
1591 VARATT_SIZEP(result) = VARHDRSZ + len;
1592 memcpy(VARDATA(result), NameStr(*s), len);
1594 PG_RETURN_TEXT_P(result);
1599 * textToQualifiedNameList - convert a text object to list of names
1601 * This implements the input parsing needed by nextval() and other
1602 * functions that take a text parameter representing a qualified name.
1603 * We split the name at dots, downcase if not double-quoted, and
1604 * truncate names if they're too long.
1607 textToQualifiedNameList(text *textval)
1614 /* Convert to C string (handles possible detoasting). */
1615 /* Note we rely on being able to modify rawname below. */
1616 rawname = DatumGetCString(DirectFunctionCall1(textout,
1617 PointerGetDatum(textval)));
1619 if (!SplitIdentifierString(rawname, '.', &namelist))
1621 (errcode(ERRCODE_INVALID_NAME),
1622 errmsg("invalid name syntax")));
1624 if (namelist == NIL)
1626 (errcode(ERRCODE_INVALID_NAME),
1627 errmsg("invalid name syntax")));
1629 foreach(l, namelist)
1631 char *curname = (char *) lfirst(l);
1633 result = lappend(result, makeString(pstrdup(curname)));
1637 list_free(namelist);
1643 * SplitIdentifierString --- parse a string containing identifiers
1645 * This is the guts of textToQualifiedNameList, and is exported for use in
1646 * other situations such as parsing GUC variables. In the GUC case, it's
1647 * important to avoid memory leaks, so the API is designed to minimize the
1648 * amount of stuff that needs to be allocated and freed.
1651 * rawstring: the input string; must be overwritable! On return, it's
1652 * been modified to contain the separated identifiers.
1653 * separator: the separator punctuation expected between identifiers
1654 * (typically '.' or ','). Whitespace may also appear around
1657 * namelist: filled with a palloc'd list of pointers to identifiers within
1658 * rawstring. Caller should freeList() this even on error return.
1660 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1662 * Note that an empty string is considered okay here, though not in
1663 * textToQualifiedNameList.
1666 SplitIdentifierString(char *rawstring, char separator,
1669 char *nextp = rawstring;
1674 while (isspace((unsigned char) *nextp))
1675 nextp++; /* skip leading whitespace */
1678 return true; /* allow empty string */
1680 /* At the top of the loop, we are at start of a new identifier. */
1688 /* Quoted name --- collapse quote-quote pairs, no downcasing */
1689 curname = nextp + 1;
1692 endp = strchr(nextp + 1, '\"');
1694 return false; /* mismatched quotes */
1695 if (endp[1] != '\"')
1696 break; /* found end of quoted name */
1697 /* Collapse adjacent quotes into one quote, and look again */
1698 memmove(endp, endp + 1, strlen(endp));
1701 /* endp now points at the terminating quote */
1706 /* Unquoted name --- extends to separator or whitespace */
1711 while (*nextp && *nextp != separator &&
1712 !isspace((unsigned char) *nextp))
1715 if (curname == nextp)
1716 return false; /* empty unquoted name not allowed */
1719 * Downcase the identifier, using same code as main lexer
1722 * XXX because we want to overwrite the input in-place, we cannot
1723 * support a downcasing transformation that increases the
1724 * string length. This is not a problem given the current
1725 * implementation of downcase_truncate_identifier, but we'll
1726 * probably have to do something about this someday.
1728 len = endp - curname;
1729 downname = downcase_truncate_identifier(curname, len, false);
1730 Assert(strlen(downname) <= len);
1731 strncpy(curname, downname, len);
1735 while (isspace((unsigned char) *nextp))
1736 nextp++; /* skip trailing whitespace */
1738 if (*nextp == separator)
1741 while (isspace((unsigned char) *nextp))
1742 nextp++; /* skip leading whitespace for next */
1743 /* we expect another name, so done remains false */
1745 else if (*nextp == '\0')
1748 return false; /* invalid syntax */
1750 /* Now safe to overwrite separator with a null */
1753 /* Truncate name if it's overlength */
1754 truncate_identifier(curname, strlen(curname), false);
1757 * Finished isolating current name --- add it to list
1759 *namelist = lappend(*namelist, curname);
1761 /* Loop back if we didn't reach end of string */
1768 /*****************************************************************************
1769 * Comparison Functions used for bytea
1771 * Note: btree indexes need these routines not to leak memory; therefore,
1772 * be careful to free working copies of toasted datums. Most places don't
1773 * need to be so careful.
1774 *****************************************************************************/
1777 byteaeq(PG_FUNCTION_ARGS)
1779 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1780 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1785 len1 = VARSIZE(arg1) - VARHDRSZ;
1786 len2 = VARSIZE(arg2) - VARHDRSZ;
1788 /* fast path for different-length inputs */
1792 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1794 PG_FREE_IF_COPY(arg1, 0);
1795 PG_FREE_IF_COPY(arg2, 1);
1797 PG_RETURN_BOOL(result);
1801 byteane(PG_FUNCTION_ARGS)
1803 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1804 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1809 len1 = VARSIZE(arg1) - VARHDRSZ;
1810 len2 = VARSIZE(arg2) - VARHDRSZ;
1812 /* fast path for different-length inputs */
1816 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1818 PG_FREE_IF_COPY(arg1, 0);
1819 PG_FREE_IF_COPY(arg2, 1);
1821 PG_RETURN_BOOL(result);
1825 bytealt(PG_FUNCTION_ARGS)
1827 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1828 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1833 len1 = VARSIZE(arg1) - VARHDRSZ;
1834 len2 = VARSIZE(arg2) - VARHDRSZ;
1836 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1838 PG_FREE_IF_COPY(arg1, 0);
1839 PG_FREE_IF_COPY(arg2, 1);
1841 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1845 byteale(PG_FUNCTION_ARGS)
1847 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1848 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1853 len1 = VARSIZE(arg1) - VARHDRSZ;
1854 len2 = VARSIZE(arg2) - VARHDRSZ;
1856 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1858 PG_FREE_IF_COPY(arg1, 0);
1859 PG_FREE_IF_COPY(arg2, 1);
1861 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1865 byteagt(PG_FUNCTION_ARGS)
1867 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1868 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1873 len1 = VARSIZE(arg1) - VARHDRSZ;
1874 len2 = VARSIZE(arg2) - VARHDRSZ;
1876 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1878 PG_FREE_IF_COPY(arg1, 0);
1879 PG_FREE_IF_COPY(arg2, 1);
1881 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1885 byteage(PG_FUNCTION_ARGS)
1887 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1888 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1893 len1 = VARSIZE(arg1) - VARHDRSZ;
1894 len2 = VARSIZE(arg2) - VARHDRSZ;
1896 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1898 PG_FREE_IF_COPY(arg1, 0);
1899 PG_FREE_IF_COPY(arg2, 1);
1901 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1905 byteacmp(PG_FUNCTION_ARGS)
1907 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1908 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1913 len1 = VARSIZE(arg1) - VARHDRSZ;
1914 len2 = VARSIZE(arg2) - VARHDRSZ;
1916 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1917 if ((cmp == 0) && (len1 != len2))
1918 cmp = (len1 < len2) ? -1 : 1;
1920 PG_FREE_IF_COPY(arg1, 0);
1921 PG_FREE_IF_COPY(arg2, 1);
1923 PG_RETURN_INT32(cmp);
1927 * appendStringInfoText
1929 * Append a text to str.
1930 * Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
1933 appendStringInfoText(StringInfo str, const text *t)
1935 appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
1940 * replace all occurrences of 'old_sub_str' in 'orig_str'
1941 * with 'new_sub_str' to form 'new_str'
1943 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1944 * otherwise returns 'new_str'
1947 replace_text(PG_FUNCTION_ARGS)
1949 text *src_text = PG_GETARG_TEXT_P(0);
1950 text *from_sub_text = PG_GETARG_TEXT_P(1);
1951 text *to_sub_text = PG_GETARG_TEXT_P(2);
1952 int src_text_len = TEXTLEN(src_text);
1953 int from_sub_text_len = TEXTLEN(from_sub_text);
1961 if (src_text_len == 0 || from_sub_text_len == 0)
1962 PG_RETURN_TEXT_P(src_text);
1964 curr_posn = TEXTPOS(src_text, from_sub_text);
1966 /* When the from_sub_text is not found, there is nothing to do. */
1968 PG_RETURN_TEXT_P(src_text);
1970 str = makeStringInfo();
1971 buf_text = src_text;
1973 while (curr_posn > 0)
1975 left_text = text_substring(PointerGetDatum(buf_text),
1976 1, curr_posn - 1, false);
1977 right_text = text_substring(PointerGetDatum(buf_text),
1978 curr_posn + from_sub_text_len, -1, true);
1980 appendStringInfoText(str, left_text);
1981 appendStringInfoText(str, to_sub_text);
1983 if (buf_text != src_text)
1986 buf_text = right_text;
1987 curr_posn = TEXTPOS(buf_text, from_sub_text);
1990 appendStringInfoText(str, buf_text);
1991 if (buf_text != src_text)
1994 ret_text = PG_STR_GET_TEXT(str->data);
1998 PG_RETURN_TEXT_P(ret_text);
2002 * check_replace_text_has_escape_char
2003 * check whether replace_text has escape char.
2006 check_replace_text_has_escape_char(const text *replace_text)
2008 const char *p = VARDATA(replace_text);
2009 const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2011 if (pg_database_encoding_max_length() == 1)
2013 for (; p < p_end; p++)
2014 if (*p == '\\') return true;
2018 for (; p < p_end; p += pg_mblen(p))
2019 if (*p == '\\') return true;
2026 * appendStringInfoRegexpSubstr
2027 * append string by using back references of regexp.
2030 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2031 regmatch_t *pmatch, text *src_text)
2033 const char *p = VARDATA(replace_text);
2034 const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
2036 int eml = pg_database_encoding_max_length();
2038 int substr_start = 1;
2046 /* Find escape char. */
2050 for (; p < p_end && *p != '\\'; p++)
2055 for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2060 * Copy the text when there is a text in the left of escape char
2061 * or escape char is not found.
2065 text *append_text = text_substring(PointerGetDatum(replace_text),
2066 substr_start, ch_cnt, false);
2067 appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
2070 substr_start += ch_cnt + 1;
2072 if (p >= p_end) /* When escape char is not found. */
2075 /* See the next character of escape char. */
2079 if (*p >= '1' && *p <= '9')
2081 /* Use the back reference of regexp. */
2083 so = pmatch[idx].rm_so;
2084 eo = pmatch[idx].rm_eo;
2090 /* Use the entire matched string. */
2091 so = pmatch[0].rm_so;
2092 eo = pmatch[0].rm_eo;
2097 if (so != -1 && eo != -1)
2099 /* Copy the text that is back reference of regexp. */
2100 text *append_text = text_substring(PointerGetDatum(src_text),
2101 so + 1, (eo - so), false);
2102 appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
2108 #define REGEXP_REPLACE_BACKREF_CNT 10
2111 * replace_text_regexp
2112 * replace text that matches to regexp in src_text to replace_text.
2115 replace_text_regexp(PG_FUNCTION_ARGS)
2118 text *src_text = PG_GETARG_TEXT_P(0);
2119 int src_text_len = VARSIZE(src_text) - VARHDRSZ;
2120 regex_t *re = (regex_t *)PG_GETARG_POINTER(1);
2121 text *replace_text = PG_GETARG_TEXT_P(2);
2122 bool global = PG_GETARG_BOOL(3);
2123 StringInfo str = makeStringInfo();
2125 regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
2132 /* Convert data string to wide characters. */
2133 data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2134 data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
2136 /* Check whether replace_text has escape char. */
2137 have_escape = check_replace_text_has_escape_char(replace_text);
2139 for (search_start = data_pos = 0; search_start <= data_len;)
2141 regexec_result = pg_regexec(re,
2145 NULL, /* no details */
2146 REGEXP_REPLACE_BACKREF_CNT,
2150 if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
2155 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2157 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2158 errmsg("regular expression failed: %s", errMsg)));
2161 if (regexec_result == REG_NOMATCH)
2165 * Copy the text when there is a text in the left of matched position.
2167 if (pmatch[0].rm_so - data_pos > 0)
2169 text *left_text = text_substring(PointerGetDatum(src_text),
2171 pmatch[0].rm_so - data_pos, false);
2172 appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
2177 * Copy the replace_text. Process back references when the
2178 * replace_text has escape characters.
2181 appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
2183 appendStringInfoString(str, PG_TEXT_GET_STR(replace_text));
2185 search_start = data_pos = pmatch[0].rm_eo;
2188 * When global option is off, replace the first instance only.
2194 * Search from next character when the matching text is zero width.
2196 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2201 * Copy the text when there is a text at the right of last matched
2202 * or regexp is not matched.
2204 if (data_pos < data_len)
2206 text *right_text = text_substring(PointerGetDatum(src_text),
2207 data_pos + 1, -1, true);
2208 appendStringInfoString(str, PG_TEXT_GET_STR(right_text));
2212 ret_text = PG_STR_GET_TEXT(str->data);
2217 PG_RETURN_TEXT_P(ret_text);
2222 * parse input string
2223 * return ord item (1 based)
2224 * based on provided field separator
2227 split_text(PG_FUNCTION_ARGS)
2229 text *inputstring = PG_GETARG_TEXT_P(0);
2230 text *fldsep = PG_GETARG_TEXT_P(1);
2231 int fldnum = PG_GETARG_INT32(2);
2232 int inputstring_len = TEXTLEN(inputstring);
2233 int fldsep_len = TEXTLEN(fldsep);
2238 /* field number is 1 based */
2241 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2242 errmsg("field position must be greater than zero")));
2244 /* return empty string for empty input string */
2245 if (inputstring_len < 1)
2246 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2248 /* empty field separator */
2251 /* if first field, return input string, else empty string */
2253 PG_RETURN_TEXT_P(inputstring);
2255 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2258 start_posn = text_position(inputstring, fldsep, fldnum - 1);
2259 end_posn = text_position(inputstring, fldsep, fldnum);
2261 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2263 /* if first field, return input string, else empty string */
2265 PG_RETURN_TEXT_P(inputstring);
2267 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2269 else if (start_posn == 0)
2271 /* first field requested */
2272 result_text = LEFT(inputstring, fldsep);
2273 PG_RETURN_TEXT_P(result_text);
2275 else if (end_posn == 0)
2277 /* last field requested */
2278 result_text = text_substring(PointerGetDatum(inputstring),
2279 start_posn + fldsep_len,
2281 PG_RETURN_TEXT_P(result_text);
2285 /* interior field requested */
2286 result_text = text_substring(PointerGetDatum(inputstring),
2287 start_posn + fldsep_len,
2288 end_posn - start_posn - fldsep_len,
2290 PG_RETURN_TEXT_P(result_text);
2296 * parse input string
2297 * return text array of elements
2298 * based on provided field separator
2301 text_to_array(PG_FUNCTION_ARGS)
2303 text *inputstring = PG_GETARG_TEXT_P(0);
2304 text *fldsep = PG_GETARG_TEXT_P(1);
2305 int inputstring_len = TEXTLEN(inputstring);
2306 int fldsep_len = TEXTLEN(fldsep);
2311 ArrayBuildState *astate = NULL;
2313 /* return NULL for empty input string */
2314 if (inputstring_len < 1)
2318 * empty field separator return one element, 1D, array using the input
2322 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2323 CStringGetDatum(inputstring), 1));
2325 /* start with end position holding the initial start position */
2327 for (fldnum = 1;; fldnum++) /* field number is 1 based */
2330 bool disnull = false;
2332 start_posn = end_posn;
2333 end_posn = text_position(inputstring, fldsep, fldnum);
2335 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2340 * first element return one element, 1D, array using the
2343 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2344 CStringGetDatum(inputstring), 1));
2348 /* otherwise create array and exit */
2349 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2350 CurrentMemoryContext));
2353 else if (start_posn == 0)
2355 /* first field requested */
2356 result_text = LEFT(inputstring, fldsep);
2358 else if (end_posn == 0)
2360 /* last field requested */
2361 result_text = text_substring(PointerGetDatum(inputstring),
2362 start_posn + fldsep_len,
2367 /* interior field requested */
2368 result_text = text_substring(PointerGetDatum(inputstring),
2369 start_posn + fldsep_len,
2370 end_posn - start_posn - fldsep_len,
2374 /* stash away current value */
2375 dvalue = PointerGetDatum(result_text);
2376 astate = accumArrayResult(astate, dvalue,
2378 CurrentMemoryContext);
2381 /* never reached -- keep compiler quiet */
2387 * concatenate Cstring representation of input array elements
2388 * using provided field separator
2391 array_to_text(PG_FUNCTION_ARGS)
2393 ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
2394 char *fldsep = PG_TEXTARG_GET_STR(1);
2403 StringInfo result_str = makeStringInfo();
2405 ArrayMetaState *my_extra;
2407 p = ARR_DATA_PTR(v);
2408 ndims = ARR_NDIM(v);
2410 nitems = ArrayGetNItems(ndims, dims);
2412 /* if there are no elements, return an empty string */
2414 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2416 element_type = ARR_ELEMTYPE(v);
2419 * We arrange to look up info about element type, including its output
2420 * conversion proc, only once per series of calls, assuming the
2421 * element type doesn't change underneath us.
2423 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2424 if (my_extra == NULL)
2426 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2427 sizeof(ArrayMetaState));
2428 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2429 my_extra->element_type = InvalidOid;
2432 if (my_extra->element_type != element_type)
2435 * Get info about element type, including its output conversion
2438 get_type_io_data(element_type, IOFunc_output,
2439 &my_extra->typlen, &my_extra->typbyval,
2440 &my_extra->typalign, &my_extra->typdelim,
2441 &my_extra->typioparam, &my_extra->typiofunc);
2442 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2443 fcinfo->flinfo->fn_mcxt);
2444 my_extra->element_type = element_type;
2446 typlen = my_extra->typlen;
2447 typbyval = my_extra->typbyval;
2448 typalign = my_extra->typalign;
2450 for (i = 0; i < nitems; i++)
2455 itemvalue = fetch_att(p, typbyval, typlen);
2457 value = DatumGetCString(FunctionCall1(&my_extra->proc,
2461 appendStringInfo(result_str, "%s%s", fldsep, value);
2463 appendStringInfoString(result_str, value);
2465 p = att_addlength(p, typlen, PointerGetDatum(p));
2466 p = (char *) att_align(p, typalign);
2469 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2474 * Convert a int32 to a string containing a base 16 (hex) representation of
2478 to_hex32(PG_FUNCTION_ARGS)
2480 uint32 value = (uint32) PG_GETARG_INT32(0);
2483 const char *digits = "0123456789abcdef";
2484 char buf[32]; /* bigger than needed, but reasonable */
2486 ptr = buf + sizeof(buf) - 1;
2491 *--ptr = digits[value % HEXBASE];
2493 } while (ptr > buf && value);
2495 result_text = PG_STR_GET_TEXT(ptr);
2496 PG_RETURN_TEXT_P(result_text);
2500 * Convert a int64 to a string containing a base 16 (hex) representation of
2504 to_hex64(PG_FUNCTION_ARGS)
2506 uint64 value = (uint64) PG_GETARG_INT64(0);
2509 const char *digits = "0123456789abcdef";
2510 char buf[32]; /* bigger than needed, but reasonable */
2512 ptr = buf + sizeof(buf) - 1;
2517 *--ptr = digits[value % HEXBASE];
2519 } while (ptr > buf && value);
2521 result_text = PG_STR_GET_TEXT(ptr);
2522 PG_RETURN_TEXT_P(result_text);
2526 * Create an md5 hash of a text string and return it as hex
2528 * md5 produces a 16 byte (128 bit) hash; double it for hex
2530 #define MD5_HASH_LEN 32
2533 md5_text(PG_FUNCTION_ARGS)
2535 text *in_text = PG_GETARG_TEXT_P(0);
2537 char hexsum[MD5_HASH_LEN + 1];
2540 /* Calculate the length of the buffer using varlena metadata */
2541 len = VARSIZE(in_text) - VARHDRSZ;
2543 /* get the hash result */
2544 if (md5_hash(VARDATA(in_text), len, hexsum) == false)
2546 (errcode(ERRCODE_OUT_OF_MEMORY),
2547 errmsg("out of memory")));
2549 /* convert to text and return it */
2550 result_text = PG_STR_GET_TEXT(hexsum);
2551 PG_RETURN_TEXT_P(result_text);
2555 * Create an md5 hash of a bytea field and return it as a hex string:
2556 * 16-byte md5 digest is represented in 32 hex characters.
2559 md5_bytea(PG_FUNCTION_ARGS)
2561 bytea *in = PG_GETARG_BYTEA_P(0);
2563 char hexsum[MD5_HASH_LEN + 1];
2566 len = VARSIZE(in) - VARHDRSZ;
2567 if (md5_hash(VARDATA(in), len, hexsum) == false)
2569 (errcode(ERRCODE_OUT_OF_MEMORY),
2570 errmsg("out of memory")));
2572 result_text = PG_STR_GET_TEXT(hexsum);
2573 PG_RETURN_TEXT_P(result_text);
2577 * Return the length of a datum, possibly compressed
2580 pg_column_size(PG_FUNCTION_ARGS)
2582 Datum value = PG_GETARG_DATUM(0);
2585 /* fn_extra stores the fixed column length, or -1 for varlena. */
2586 if (fcinfo->flinfo->fn_extra == NULL) /* first call? */
2588 /* On the first call lookup the datatype of the supplied argument */
2589 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
2590 int typlen = get_typlen(argtypeid);
2595 /* Oid not in pg_type, should never happen. */
2596 elog(ERROR, "cache lookup failed for type %u", argtypeid);
2599 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2601 *(int *)fcinfo->flinfo->fn_extra = typlen;
2604 if (*(int *)fcinfo->flinfo->fn_extra != -1)
2605 PG_RETURN_INT32(*(int *)fcinfo->flinfo->fn_extra);
2608 result = toast_datum_size(value) - VARHDRSZ;
2609 PG_RETURN_INT32(result);