1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.110 2004/01/31 00:45:21 tgl Exp $
13 *-------------------------------------------------------------------------
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_type.h"
23 #include "lib/stringinfo.h"
24 #include "libpq/crypt.h"
25 #include "libpq/pqformat.h"
26 #include "utils/array.h"
27 #include "utils/builtins.h"
28 #include "utils/pg_locale.h"
29 #include "utils/lsyscache.h"
32 typedef struct varlena unknown;
34 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
35 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
36 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
37 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
38 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
40 #define PG_TEXTARG_GET_STR(arg_) \
41 DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
42 #define PG_TEXT_GET_STR(textp_) \
43 DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
44 #define PG_STR_GET_TEXT(str_) \
45 DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
46 #define TEXTLEN(textp) \
47 text_length(PointerGetDatum(textp))
48 #define TEXTPOS(buf_text, from_sub_text) \
49 text_position(buf_text, from_sub_text, 1)
50 #define TEXTDUP(textp) \
51 DatumGetTextPCopy(PointerGetDatum(textp))
52 #define LEFT(buf_text, from_sub_text) \
53 text_substring(PointerGetDatum(buf_text), \
55 TEXTPOS(buf_text, from_sub_text) - 1, false)
56 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
57 text_substring(PointerGetDatum(buf_text), \
58 TEXTPOS(buf_text, from_sub_text) + (from_sub_text_len), \
61 static int text_cmp(text *arg1, text *arg2);
62 static int32 text_length(Datum str);
63 static int32 text_position(text *t1, text *t2, int matchnum);
64 static text *text_substring(Datum str,
67 bool length_not_specified);
70 /*****************************************************************************
72 *****************************************************************************/
75 #define VAL(CH) ((CH) - '0')
76 #define DIG(VAL) ((VAL) + '0')
79 * byteain - converts from printable representation of byte array
81 * Non-printable characters must be passed as '\nnn' (octal) and are
82 * converted to internal form. '\' must be passed as '\\'.
83 * ereport(ERROR, ...) if bad form.
86 * The input is scaned twice.
87 * The error checking of input is minimal.
90 byteain(PG_FUNCTION_ARGS)
92 char *inputText = PG_GETARG_CSTRING(0);
98 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
102 else if ((tp[0] == '\\') &&
103 (tp[1] >= '0' && tp[1] <= '3') &&
104 (tp[2] >= '0' && tp[2] <= '7') &&
105 (tp[3] >= '0' && tp[3] <= '7'))
107 else if ((tp[0] == '\\') &&
113 * one backslash, not followed by 0 or ### valid octal
116 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
117 errmsg("invalid input syntax for type bytea")));
122 result = (bytea *) palloc(byte);
123 VARATT_SIZEP(result) = byte; /* set varlena length */
126 rp = VARDATA(result);
131 else if ((tp[0] == '\\') &&
132 (tp[1] >= '0' && tp[1] <= '3') &&
133 (tp[2] >= '0' && tp[2] <= '7') &&
134 (tp[3] >= '0' && tp[3] <= '7'))
140 *rp++ = byte + VAL(tp[3]);
143 else if ((tp[0] == '\\') &&
152 * We should never get here. The first pass should not allow
156 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
157 errmsg("invalid input syntax for type bytea")));
161 PG_RETURN_BYTEA_P(result);
165 * byteaout - converts to printable representation of byte array
167 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
170 * NULL vlena should be an error--returning string with NULL for now.
173 byteaout(PG_FUNCTION_ARGS)
175 bytea *vlena = PG_GETARG_BYTEA_P(0);
179 int val; /* holds unprintable chars */
183 len = 1; /* empty string has 1 char */
185 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
189 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
194 rp = result = (char *) palloc(len);
196 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
203 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
207 rp[3] = DIG(val & 07);
209 rp[2] = DIG(val & 07);
211 rp[1] = DIG(val & 03);
218 PG_RETURN_CSTRING(result);
222 * bytearecv - converts external binary format to bytea
225 bytearecv(PG_FUNCTION_ARGS)
227 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
231 nbytes = buf->len - buf->cursor;
232 result = (bytea *) palloc(nbytes + VARHDRSZ);
233 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
234 pq_copymsgbytes(buf, VARDATA(result), nbytes);
235 PG_RETURN_BYTEA_P(result);
239 * byteasend - converts bytea to binary format
241 * This is a special case: just copy the input...
244 byteasend(PG_FUNCTION_ARGS)
246 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
248 PG_RETURN_BYTEA_P(vlena);
253 * textin - converts "..." to internal representation
256 textin(PG_FUNCTION_ARGS)
258 char *inputText = PG_GETARG_CSTRING(0);
262 /* verify encoding */
263 len = strlen(inputText);
264 pg_verifymbstr(inputText, len, false);
266 result = (text *) palloc(len + VARHDRSZ);
267 VARATT_SIZEP(result) = len + VARHDRSZ;
269 memcpy(VARDATA(result), inputText, len);
271 PG_RETURN_TEXT_P(result);
275 * textout - converts internal representation to "..."
278 textout(PG_FUNCTION_ARGS)
280 text *t = PG_GETARG_TEXT_P(0);
284 len = VARSIZE(t) - VARHDRSZ;
285 result = (char *) palloc(len + 1);
286 memcpy(result, VARDATA(t), len);
289 PG_RETURN_CSTRING(result);
293 * textrecv - converts external binary format to text
296 textrecv(PG_FUNCTION_ARGS)
298 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
303 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
304 result = (text *) palloc(nbytes + VARHDRSZ);
305 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
306 memcpy(VARDATA(result), str, nbytes);
308 PG_RETURN_TEXT_P(result);
312 * textsend - converts text to binary format
315 textsend(PG_FUNCTION_ARGS)
317 text *t = PG_GETARG_TEXT_P(0);
320 pq_begintypsend(&buf);
321 pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
322 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
327 * unknownin - converts "..." to internal representation
330 unknownin(PG_FUNCTION_ARGS)
332 char *inputStr = PG_GETARG_CSTRING(0);
336 len = strlen(inputStr) + VARHDRSZ;
338 result = (unknown *) palloc(len);
339 VARATT_SIZEP(result) = len;
341 memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
343 PG_RETURN_UNKNOWN_P(result);
347 * unknownout - converts internal representation to "..."
350 unknownout(PG_FUNCTION_ARGS)
352 unknown *t = PG_GETARG_UNKNOWN_P(0);
356 len = VARSIZE(t) - VARHDRSZ;
357 result = (char *) palloc(len + 1);
358 memcpy(result, VARDATA(t), len);
361 PG_RETURN_CSTRING(result);
365 * unknownrecv - converts external binary format to unknown
368 unknownrecv(PG_FUNCTION_ARGS)
370 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
374 nbytes = buf->len - buf->cursor;
375 result = (unknown *) palloc(nbytes + VARHDRSZ);
376 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
377 pq_copymsgbytes(buf, VARDATA(result), nbytes);
378 PG_RETURN_UNKNOWN_P(result);
382 * unknownsend - converts unknown to binary format
384 * This is a special case: just copy the input, since it's
385 * effectively the same format as bytea
388 unknownsend(PG_FUNCTION_ARGS)
390 unknown *vlena = PG_GETARG_UNKNOWN_P_COPY(0);
392 PG_RETURN_UNKNOWN_P(vlena);
396 /* ========== PUBLIC ROUTINES ========== */
400 * returns the logical length of a text*
401 * (which is less than the VARSIZE of the text*)
404 textlen(PG_FUNCTION_ARGS)
406 Datum str = PG_GETARG_DATUM(0);
408 /* try to avoid decompressing argument */
409 PG_RETURN_INT32(text_length(str));
414 * Does the real work for textlen()
416 * This is broken out so it can be called directly by other string processing
417 * functions. Note that the argument is passed as a Datum, to indicate that
418 * it may still be in compressed form. We can avoid decompressing it at all
422 text_length(Datum str)
424 /* fastpath when max encoding length is one */
425 if (pg_database_encoding_max_length() == 1)
426 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
429 text *t = DatumGetTextP(str);
431 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
432 VARSIZE(t) - VARHDRSZ));
438 * returns the physical length of a text*
439 * (which is less than the VARSIZE of the text*)
442 textoctetlen(PG_FUNCTION_ARGS)
444 Datum str = PG_GETARG_DATUM(0);
446 /* We need not detoast the input at all */
447 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
452 * takes two text* and returns a text* that is the concatenation of
455 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
456 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
457 * Allocate space for output in all cases.
458 * XXX - thomas 1997-07-10
461 textcat(PG_FUNCTION_ARGS)
463 text *t1 = PG_GETARG_TEXT_P(0);
464 text *t2 = PG_GETARG_TEXT_P(1);
471 len1 = (VARSIZE(t1) - VARHDRSZ);
475 len2 = (VARSIZE(t2) - VARHDRSZ);
479 len = len1 + len2 + VARHDRSZ;
480 result = (text *) palloc(len);
482 /* Set size of result string... */
483 VARATT_SIZEP(result) = len;
485 /* Fill data field of result string... */
486 ptr = VARDATA(result);
488 memcpy(ptr, VARDATA(t1), len1);
490 memcpy(ptr + len1, VARDATA(t2), len2);
492 PG_RETURN_TEXT_P(result);
497 * Return a substring starting at the specified position.
498 * - thomas 1997-12-31
502 * - starting position (is one-based)
505 * If the starting position is zero or less, then return from the start of the string
506 * adjusting the length to be consistent with the "negative start" per SQL92.
507 * If the length is less than zero, return the remaining string.
509 * Added multibyte support.
510 * - Tatsuo Ishii 1998-4-21
511 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
512 * Formerly returned the entire string; now returns a portion.
513 * - Thomas Lockhart 1998-12-10
514 * Now uses faster TOAST-slicing interface
515 * - John Gray 2002-02-22
516 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
517 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
518 * error; if E < 1, return '', not entire string). Fixed MB related bug when
519 * S > LC and < LC + 4 sometimes garbage characters are returned.
520 * - Joe Conway 2002-08-10
523 text_substr(PG_FUNCTION_ARGS)
525 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
532 * text_substr_no_len -
533 * Wrapper to avoid opr_sanity failure due to
534 * one function accepting a different number of args.
537 text_substr_no_len(PG_FUNCTION_ARGS)
539 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
546 * Does the real work for text_substr() and text_substr_no_len()
548 * This is broken out so it can be called directly by other string processing
549 * functions. Note that the argument is passed as a Datum, to indicate that
550 * it may still be in compressed/toasted form. We can avoid detoasting all
551 * of it in some cases.
554 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
556 int32 eml = pg_database_encoding_max_length();
557 int32 S = start; /* start position */
558 int32 S1; /* adjusted start position */
559 int32 L1; /* adjusted substring length */
561 /* life is easy if the encoding max length is 1 */
566 if (length_not_specified) /* special case - get length to
575 * A negative value for L is the only way for the end position
576 * to be before the start. SQL99 says to throw an error.
580 (errcode(ERRCODE_SUBSTRING_ERROR),
581 errmsg("negative substring length not allowed")));
584 * A zero or negative value for the end position can happen if
585 * the start was negative or one. SQL99 says to return a
586 * zero-length string.
589 return PG_STR_GET_TEXT("");
595 * If the start position is past the end of the string, SQL99 says
596 * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
597 * do that for us. Convert to zero-based starting position
599 return DatumGetTextPSlice(str, S1 - 1, L1);
604 * When encoding max length is > 1, we can't get LC without
605 * detoasting, so we'll grab a conservatively large slice now and
606 * go back later to do the right thing
619 * if S is past the end of the string, the tuple toaster will
620 * return a zero-length string to us
625 * We need to start at position zero because there is no way to
626 * know in advance which byte offset corresponds to the supplied
631 if (length_not_specified) /* special case - get length to
633 slice_size = L1 = -1;
639 * A negative value for L is the only way for the end position
640 * to be before the start. SQL99 says to throw an error.
644 (errcode(ERRCODE_SUBSTRING_ERROR),
645 errmsg("negative substring length not allowed")));
648 * A zero or negative value for the end position can happen if
649 * the start was negative or one. SQL99 says to return a
650 * zero-length string.
653 return PG_STR_GET_TEXT("");
656 * if E is past the end of the string, the tuple toaster will
657 * truncate the length for us
662 * Total slice size in bytes can't be any longer than the
663 * start position plus substring length times the encoding max
666 slice_size = (S1 + L1) * eml;
668 slice = DatumGetTextPSlice(str, slice_start, slice_size);
670 /* see if we got back an empty string */
671 if ((VARSIZE(slice) - VARHDRSZ) == 0)
672 return PG_STR_GET_TEXT("");
674 /* Now we can get the actual length of the slice in MB characters */
675 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
678 * Check that the start position wasn't > slice_strlen. If so,
679 * SQL99 says to return a zero-length string.
681 if (S1 > slice_strlen)
682 return PG_STR_GET_TEXT("");
685 * Adjust L1 and E1 now that we know the slice string length.
686 * Again remember that S1 is one based, and slice_start is zero
690 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
692 E1 = slice_start + 1 + slice_strlen;
695 * Find the start position in the slice; remember S1 is not zero
699 for (i = 0; i < S1 - 1; i++)
702 /* hang onto a pointer to our start position */
706 * Count the actual bytes used by the substring of the requested
709 for (i = S1; i < E1; i++)
712 ret = (text *) palloc(VARHDRSZ + (p - s));
713 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
714 memcpy(VARDATA(ret), s, (p - s));
719 elog(ERROR, "invalid backend encoding: encoding max length < 1");
721 /* not reached: suppress compiler warning */
727 * Return the position of the specified substring.
728 * Implements the SQL92 POSITION() function.
729 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
730 * - thomas 1997-07-27
733 textpos(PG_FUNCTION_ARGS)
735 text *str = PG_GETARG_TEXT_P(0);
736 text *search_str = PG_GETARG_TEXT_P(1);
738 PG_RETURN_INT32(text_position(str, search_str, 1));
743 * Does the real work for textpos()
746 * t1 - string to be searched
747 * t2 - pattern to match within t1
748 * matchnum - number of the match to be found (1 is the first match)
750 * Character index of the first matched char, starting from 1,
753 * This is broken out so it can be called directly by other string processing
757 text_position(text *t1, text *t2, int matchnum)
767 return 0; /* result for 0th match */
769 if (VARSIZE(t2) <= VARHDRSZ)
770 return 1; /* result for empty pattern */
772 len1 = (VARSIZE(t1) - VARHDRSZ);
773 len2 = (VARSIZE(t2) - VARHDRSZ);
775 if (pg_database_encoding_max_length() == 1)
777 /* simple case - single byte encoding */
784 /* no use in searching str past point where search_str will fit */
787 for (p = 0; p <= px; p++)
789 if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
791 if (++match == matchnum)
802 /* not as simple - multibyte encoding */
808 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
809 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
810 len1 = pg_wchar_strlen(p1);
811 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
812 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
813 len2 = pg_wchar_strlen(p2);
815 /* no use in searching str past point where search_str will fit */
818 for (p = 0; p <= px; p++)
820 if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
822 if (++match == matchnum)
839 * Comparison function for text strings with given lengths.
840 * Includes locale support, but must copy strings to temporary memory
841 * to allow null-termination for inputs to strcoll().
845 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
850 * Unfortunately, there is no strncoll(), so in the non-C locale case
851 * we have to do some memory copying. This turns out to be
852 * significantly slower, so we optimize the case where LC_COLLATE is
853 * C. We also try to optimize relatively-short strings by avoiding
854 * palloc/pfree overhead.
856 #define STACKBUFLEN 1024
858 if (!lc_collate_is_c())
860 char a1buf[STACKBUFLEN];
861 char a2buf[STACKBUFLEN];
865 if (len1 >= STACKBUFLEN)
866 a1p = (char *) palloc(len1 + 1);
869 if (len2 >= STACKBUFLEN)
870 a2p = (char *) palloc(len2 + 1);
874 memcpy(a1p, arg1, len1);
876 memcpy(a2p, arg2, len2);
879 result = strcoll(a1p, a2p);
881 if (len1 >= STACKBUFLEN)
883 if (len2 >= STACKBUFLEN)
888 result = strncmp(arg1, arg2, Min(len1, len2));
889 if ((result == 0) && (len1 != len2))
890 result = (len1 < len2) ? -1 : 1;
898 * Internal comparison function for text strings.
902 text_cmp(text *arg1, text *arg2)
912 len1 = VARSIZE(arg1) - VARHDRSZ;
913 len2 = VARSIZE(arg2) - VARHDRSZ;
915 return varstr_cmp(a1p, len1, a2p, len2);
919 * Comparison functions for text strings.
921 * Note: btree indexes need these routines not to leak memory; therefore,
922 * be careful to free working copies of toasted datums. Most places don't
923 * need to be so careful.
927 texteq(PG_FUNCTION_ARGS)
929 text *arg1 = PG_GETARG_TEXT_P(0);
930 text *arg2 = PG_GETARG_TEXT_P(1);
933 /* fast path for different-length inputs */
934 if (VARSIZE(arg1) != VARSIZE(arg2))
937 result = (text_cmp(arg1, arg2) == 0);
939 PG_FREE_IF_COPY(arg1, 0);
940 PG_FREE_IF_COPY(arg2, 1);
942 PG_RETURN_BOOL(result);
946 textne(PG_FUNCTION_ARGS)
948 text *arg1 = PG_GETARG_TEXT_P(0);
949 text *arg2 = PG_GETARG_TEXT_P(1);
952 /* fast path for different-length inputs */
953 if (VARSIZE(arg1) != VARSIZE(arg2))
956 result = (text_cmp(arg1, arg2) != 0);
958 PG_FREE_IF_COPY(arg1, 0);
959 PG_FREE_IF_COPY(arg2, 1);
961 PG_RETURN_BOOL(result);
965 text_lt(PG_FUNCTION_ARGS)
967 text *arg1 = PG_GETARG_TEXT_P(0);
968 text *arg2 = PG_GETARG_TEXT_P(1);
971 result = (text_cmp(arg1, arg2) < 0);
973 PG_FREE_IF_COPY(arg1, 0);
974 PG_FREE_IF_COPY(arg2, 1);
976 PG_RETURN_BOOL(result);
980 text_le(PG_FUNCTION_ARGS)
982 text *arg1 = PG_GETARG_TEXT_P(0);
983 text *arg2 = PG_GETARG_TEXT_P(1);
986 result = (text_cmp(arg1, arg2) <= 0);
988 PG_FREE_IF_COPY(arg1, 0);
989 PG_FREE_IF_COPY(arg2, 1);
991 PG_RETURN_BOOL(result);
995 text_gt(PG_FUNCTION_ARGS)
997 text *arg1 = PG_GETARG_TEXT_P(0);
998 text *arg2 = PG_GETARG_TEXT_P(1);
1001 result = (text_cmp(arg1, arg2) > 0);
1003 PG_FREE_IF_COPY(arg1, 0);
1004 PG_FREE_IF_COPY(arg2, 1);
1006 PG_RETURN_BOOL(result);
1010 text_ge(PG_FUNCTION_ARGS)
1012 text *arg1 = PG_GETARG_TEXT_P(0);
1013 text *arg2 = PG_GETARG_TEXT_P(1);
1016 result = (text_cmp(arg1, arg2) >= 0);
1018 PG_FREE_IF_COPY(arg1, 0);
1019 PG_FREE_IF_COPY(arg2, 1);
1021 PG_RETURN_BOOL(result);
1025 bttextcmp(PG_FUNCTION_ARGS)
1027 text *arg1 = PG_GETARG_TEXT_P(0);
1028 text *arg2 = PG_GETARG_TEXT_P(1);
1031 result = text_cmp(arg1, arg2);
1033 PG_FREE_IF_COPY(arg1, 0);
1034 PG_FREE_IF_COPY(arg2, 1);
1036 PG_RETURN_INT32(result);
1041 text_larger(PG_FUNCTION_ARGS)
1043 text *arg1 = PG_GETARG_TEXT_P(0);
1044 text *arg2 = PG_GETARG_TEXT_P(1);
1047 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1049 PG_RETURN_TEXT_P(result);
1053 text_smaller(PG_FUNCTION_ARGS)
1055 text *arg1 = PG_GETARG_TEXT_P(0);
1056 text *arg2 = PG_GETARG_TEXT_P(1);
1059 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1061 PG_RETURN_TEXT_P(result);
1066 * The following operators support character-by-character comparison
1067 * of text data types, to allow building indexes suitable for LIKE
1072 internal_text_pattern_compare(text *arg1, text *arg2)
1076 result = memcmp(VARDATA(arg1), VARDATA(arg2),
1077 Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1080 else if (VARSIZE(arg1) < VARSIZE(arg2))
1082 else if (VARSIZE(arg1) > VARSIZE(arg2))
1090 text_pattern_lt(PG_FUNCTION_ARGS)
1092 text *arg1 = PG_GETARG_TEXT_P(0);
1093 text *arg2 = PG_GETARG_TEXT_P(1);
1096 result = internal_text_pattern_compare(arg1, arg2);
1098 PG_FREE_IF_COPY(arg1, 0);
1099 PG_FREE_IF_COPY(arg2, 1);
1101 PG_RETURN_BOOL(result < 0);
1106 text_pattern_le(PG_FUNCTION_ARGS)
1108 text *arg1 = PG_GETARG_TEXT_P(0);
1109 text *arg2 = PG_GETARG_TEXT_P(1);
1112 result = internal_text_pattern_compare(arg1, arg2);
1114 PG_FREE_IF_COPY(arg1, 0);
1115 PG_FREE_IF_COPY(arg2, 1);
1117 PG_RETURN_BOOL(result <= 0);
1122 text_pattern_eq(PG_FUNCTION_ARGS)
1124 text *arg1 = PG_GETARG_TEXT_P(0);
1125 text *arg2 = PG_GETARG_TEXT_P(1);
1128 if (VARSIZE(arg1) != VARSIZE(arg2))
1131 result = internal_text_pattern_compare(arg1, arg2);
1133 PG_FREE_IF_COPY(arg1, 0);
1134 PG_FREE_IF_COPY(arg2, 1);
1136 PG_RETURN_BOOL(result == 0);
1141 text_pattern_ge(PG_FUNCTION_ARGS)
1143 text *arg1 = PG_GETARG_TEXT_P(0);
1144 text *arg2 = PG_GETARG_TEXT_P(1);
1147 result = internal_text_pattern_compare(arg1, arg2);
1149 PG_FREE_IF_COPY(arg1, 0);
1150 PG_FREE_IF_COPY(arg2, 1);
1152 PG_RETURN_BOOL(result >= 0);
1157 text_pattern_gt(PG_FUNCTION_ARGS)
1159 text *arg1 = PG_GETARG_TEXT_P(0);
1160 text *arg2 = PG_GETARG_TEXT_P(1);
1163 result = internal_text_pattern_compare(arg1, arg2);
1165 PG_FREE_IF_COPY(arg1, 0);
1166 PG_FREE_IF_COPY(arg2, 1);
1168 PG_RETURN_BOOL(result > 0);
1173 text_pattern_ne(PG_FUNCTION_ARGS)
1175 text *arg1 = PG_GETARG_TEXT_P(0);
1176 text *arg2 = PG_GETARG_TEXT_P(1);
1179 if (VARSIZE(arg1) != VARSIZE(arg2))
1182 result = internal_text_pattern_compare(arg1, arg2);
1184 PG_FREE_IF_COPY(arg1, 0);
1185 PG_FREE_IF_COPY(arg2, 1);
1187 PG_RETURN_BOOL(result != 0);
1192 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1194 text *arg1 = PG_GETARG_TEXT_P(0);
1195 text *arg2 = PG_GETARG_TEXT_P(1);
1198 result = internal_text_pattern_compare(arg1, arg2);
1200 PG_FREE_IF_COPY(arg1, 0);
1201 PG_FREE_IF_COPY(arg2, 1);
1203 PG_RETURN_INT32(result);
1207 /*-------------------------------------------------------------
1210 * get the number of bytes contained in an instance of type 'bytea'
1211 *-------------------------------------------------------------
1214 byteaoctetlen(PG_FUNCTION_ARGS)
1216 Datum str = PG_GETARG_DATUM(0);
1218 /* We need not detoast the input at all */
1219 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1224 * takes two bytea* and returns a bytea* that is the concatenation of
1227 * Cloned from textcat and modified as required.
1230 byteacat(PG_FUNCTION_ARGS)
1232 bytea *t1 = PG_GETARG_BYTEA_P(0);
1233 bytea *t2 = PG_GETARG_BYTEA_P(1);
1240 len1 = (VARSIZE(t1) - VARHDRSZ);
1244 len2 = (VARSIZE(t2) - VARHDRSZ);
1248 len = len1 + len2 + VARHDRSZ;
1249 result = (bytea *) palloc(len);
1251 /* Set size of result string... */
1252 VARATT_SIZEP(result) = len;
1254 /* Fill data field of result string... */
1255 ptr = VARDATA(result);
1257 memcpy(ptr, VARDATA(t1), len1);
1259 memcpy(ptr + len1, VARDATA(t2), len2);
1261 PG_RETURN_BYTEA_P(result);
1264 #define PG_STR_GET_BYTEA(str_) \
1265 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1268 * Return a substring starting at the specified position.
1269 * Cloned from text_substr and modified as required.
1273 * - starting position (is one-based)
1274 * - string length (optional)
1276 * If the starting position is zero or less, then return from the start of the string
1277 * adjusting the length to be consistent with the "negative start" per SQL92.
1278 * If the length is less than zero, an ERROR is thrown. If no third argument
1279 * (length) is provided, the length to the end of the string is assumed.
1282 bytea_substr(PG_FUNCTION_ARGS)
1284 int S = PG_GETARG_INT32(1); /* start position */
1285 int S1; /* adjusted start position */
1286 int L1; /* adjusted substring length */
1290 if (fcinfo->nargs == 2)
1293 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1294 * everything to the end of the string if we pass it a negative
1302 int E = S + PG_GETARG_INT32(2);
1305 * A negative value for L is the only way for the end position to
1306 * be before the start. SQL99 says to throw an error.
1310 (errcode(ERRCODE_SUBSTRING_ERROR),
1311 errmsg("negative substring length not allowed")));
1314 * A zero or negative value for the end position can happen if the
1315 * start was negative or one. SQL99 says to return a zero-length
1319 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1325 * If the start position is past the end of the string, SQL99 says to
1326 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1327 * that for us. Convert to zero-based starting position
1329 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1333 * bytea_substr_no_len -
1334 * Wrapper to avoid opr_sanity failure due to
1335 * one function accepting a different number of args.
1338 bytea_substr_no_len(PG_FUNCTION_ARGS)
1340 return bytea_substr(fcinfo);
1345 * Return the position of the specified substring.
1346 * Implements the SQL92 POSITION() function.
1347 * Cloned from textpos and modified as required.
1350 byteapos(PG_FUNCTION_ARGS)
1352 bytea *t1 = PG_GETARG_BYTEA_P(0);
1353 bytea *t2 = PG_GETARG_BYTEA_P(1);
1362 if (VARSIZE(t2) <= VARHDRSZ)
1363 PG_RETURN_INT32(1); /* result for empty pattern */
1365 len1 = (VARSIZE(t1) - VARHDRSZ);
1366 len2 = (VARSIZE(t2) - VARHDRSZ);
1373 for (p = 0; p <= px; p++)
1375 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1383 PG_RETURN_INT32(pos);
1386 /*-------------------------------------------------------------
1389 * this routine treats "bytea" as an array of bytes.
1390 * It returns the Nth byte (a number between 0 and 255).
1391 *-------------------------------------------------------------
1394 byteaGetByte(PG_FUNCTION_ARGS)
1396 bytea *v = PG_GETARG_BYTEA_P(0);
1397 int32 n = PG_GETARG_INT32(1);
1401 len = VARSIZE(v) - VARHDRSZ;
1403 if (n < 0 || n >= len)
1405 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1406 errmsg("index %d out of valid range, 0..%d",
1409 byte = ((unsigned char *) VARDATA(v))[n];
1411 PG_RETURN_INT32(byte);
1414 /*-------------------------------------------------------------
1417 * This routine treats a "bytea" type like an array of bits.
1418 * It returns the value of the Nth bit (0 or 1).
1420 *-------------------------------------------------------------
1423 byteaGetBit(PG_FUNCTION_ARGS)
1425 bytea *v = PG_GETARG_BYTEA_P(0);
1426 int32 n = PG_GETARG_INT32(1);
1432 len = VARSIZE(v) - VARHDRSZ;
1434 if (n < 0 || n >= len * 8)
1436 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1437 errmsg("index %d out of valid range, 0..%d",
1443 byte = ((unsigned char *) VARDATA(v))[byteNo];
1445 if (byte & (1 << bitNo))
1451 /*-------------------------------------------------------------
1454 * Given an instance of type 'bytea' creates a new one with
1455 * the Nth byte set to the given value.
1457 *-------------------------------------------------------------
1460 byteaSetByte(PG_FUNCTION_ARGS)
1462 bytea *v = PG_GETARG_BYTEA_P(0);
1463 int32 n = PG_GETARG_INT32(1);
1464 int32 newByte = PG_GETARG_INT32(2);
1468 len = VARSIZE(v) - VARHDRSZ;
1470 if (n < 0 || n >= len)
1472 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1473 errmsg("index %d out of valid range, 0..%d",
1477 * Make a copy of the original varlena.
1479 res = (bytea *) palloc(VARSIZE(v));
1480 memcpy((char *) res, (char *) v, VARSIZE(v));
1485 ((unsigned char *) VARDATA(res))[n] = newByte;
1487 PG_RETURN_BYTEA_P(res);
1490 /*-------------------------------------------------------------
1493 * Given an instance of type 'bytea' creates a new one with
1494 * the Nth bit set to the given value.
1496 *-------------------------------------------------------------
1499 byteaSetBit(PG_FUNCTION_ARGS)
1501 bytea *v = PG_GETARG_BYTEA_P(0);
1502 int32 n = PG_GETARG_INT32(1);
1503 int32 newBit = PG_GETARG_INT32(2);
1511 len = VARSIZE(v) - VARHDRSZ;
1513 if (n < 0 || n >= len * 8)
1515 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1516 errmsg("index %d out of valid range, 0..%d",
1525 if (newBit != 0 && newBit != 1)
1527 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1528 errmsg("new bit must be 0 or 1")));
1531 * Make a copy of the original varlena.
1533 res = (bytea *) palloc(VARSIZE(v));
1534 memcpy((char *) res, (char *) v, VARSIZE(v));
1539 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1542 newByte = oldByte & (~(1 << bitNo));
1544 newByte = oldByte | (1 << bitNo);
1546 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1548 PG_RETURN_BYTEA_P(res);
1553 * Converts a text type to a Name type.
1556 text_name(PG_FUNCTION_ARGS)
1558 text *s = PG_GETARG_TEXT_P(0);
1562 len = VARSIZE(s) - VARHDRSZ;
1564 /* Truncate oversize input */
1565 if (len >= NAMEDATALEN)
1566 len = NAMEDATALEN - 1;
1569 printf("text- convert string length %d (%d) ->%d\n",
1570 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1573 result = (Name) palloc(NAMEDATALEN);
1574 memcpy(NameStr(*result), VARDATA(s), len);
1576 /* now null pad to full length... */
1577 while (len < NAMEDATALEN)
1579 *(NameStr(*result) + len) = '\0';
1583 PG_RETURN_NAME(result);
1587 * Converts a Name type to a text type.
1590 name_text(PG_FUNCTION_ARGS)
1592 Name s = PG_GETARG_NAME(0);
1596 len = strlen(NameStr(*s));
1599 printf("text- convert string length %d (%d) ->%d\n",
1600 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1603 result = palloc(VARHDRSZ + len);
1604 VARATT_SIZEP(result) = VARHDRSZ + len;
1605 memcpy(VARDATA(result), NameStr(*s), len);
1607 PG_RETURN_TEXT_P(result);
1612 * textToQualifiedNameList - convert a text object to list of names
1614 * This implements the input parsing needed by nextval() and other
1615 * functions that take a text parameter representing a qualified name.
1616 * We split the name at dots, downcase if not double-quoted, and
1617 * truncate names if they're too long.
1620 textToQualifiedNameList(text *textval, const char *caller)
1627 /* Convert to C string (handles possible detoasting). */
1628 /* Note we rely on being able to modify rawname below. */
1629 rawname = DatumGetCString(DirectFunctionCall1(textout,
1630 PointerGetDatum(textval)));
1632 if (!SplitIdentifierString(rawname, '.', &namelist))
1634 (errcode(ERRCODE_INVALID_NAME),
1635 errmsg("invalid name syntax")));
1637 if (namelist == NIL)
1639 (errcode(ERRCODE_INVALID_NAME),
1640 errmsg("invalid name syntax")));
1642 foreach(l, namelist)
1644 char *curname = (char *) lfirst(l);
1646 result = lappend(result, makeString(pstrdup(curname)));
1656 * SplitIdentifierString --- parse a string containing identifiers
1658 * This is the guts of textToQualifiedNameList, and is exported for use in
1659 * other situations such as parsing GUC variables. In the GUC case, it's
1660 * important to avoid memory leaks, so the API is designed to minimize the
1661 * amount of stuff that needs to be allocated and freed.
1664 * rawstring: the input string; must be overwritable! On return, it's
1665 * been modified to contain the separated identifiers.
1666 * separator: the separator punctuation expected between identifiers
1667 * (typically '.' or ','). Whitespace may also appear around
1670 * namelist: filled with a palloc'd list of pointers to identifiers within
1671 * rawstring. Caller should freeList() this even on error return.
1673 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1675 * Note that an empty string is considered okay here, though not in
1676 * textToQualifiedNameList.
1679 SplitIdentifierString(char *rawstring, char separator,
1682 char *nextp = rawstring;
1687 while (isspace((unsigned char) *nextp))
1688 nextp++; /* skip leading whitespace */
1691 return true; /* allow empty string */
1693 /* At the top of the loop, we are at start of a new identifier. */
1702 /* Quoted name --- collapse quote-quote pairs, no downcasing */
1703 curname = nextp + 1;
1706 endp = strchr(nextp + 1, '\"');
1708 return false; /* mismatched quotes */
1709 if (endp[1] != '\"')
1710 break; /* found end of quoted name */
1711 /* Collapse adjacent quotes into one quote, and look again */
1712 memmove(endp, endp + 1, strlen(endp));
1715 /* endp now points at the terminating quote */
1720 /* Unquoted name --- extends to separator or whitespace */
1722 while (*nextp && *nextp != separator &&
1723 !isspace((unsigned char) *nextp))
1726 * It's important that this match the identifier
1727 * downcasing code used by backend/parser/scan.l.
1729 if (isupper((unsigned char) *nextp))
1730 *nextp = tolower((unsigned char) *nextp);
1734 if (curname == nextp)
1735 return false; /* empty unquoted name not allowed */
1738 while (isspace((unsigned char) *nextp))
1739 nextp++; /* skip trailing whitespace */
1741 if (*nextp == separator)
1744 while (isspace((unsigned char) *nextp))
1745 nextp++; /* skip leading whitespace for next */
1746 /* we expect another name, so done remains false */
1748 else if (*nextp == '\0')
1751 return false; /* invalid syntax */
1753 /* Now safe to overwrite separator with a null */
1756 /* Truncate name if it's overlength; again, should match scan.l */
1757 curlen = strlen(curname);
1758 if (curlen >= NAMEDATALEN)
1760 curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
1761 curname[curlen] = '\0';
1765 * Finished isolating current name --- add it to list
1767 *namelist = lappend(*namelist, curname);
1769 /* Loop back if we didn't reach end of string */
1776 /*****************************************************************************
1777 * Comparison Functions used for bytea
1779 * Note: btree indexes need these routines not to leak memory; therefore,
1780 * be careful to free working copies of toasted datums. Most places don't
1781 * need to be so careful.
1782 *****************************************************************************/
1785 byteaeq(PG_FUNCTION_ARGS)
1787 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1788 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1793 len1 = VARSIZE(arg1) - VARHDRSZ;
1794 len2 = VARSIZE(arg2) - VARHDRSZ;
1796 /* fast path for different-length inputs */
1800 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1802 PG_FREE_IF_COPY(arg1, 0);
1803 PG_FREE_IF_COPY(arg2, 1);
1805 PG_RETURN_BOOL(result);
1809 byteane(PG_FUNCTION_ARGS)
1811 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1812 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1817 len1 = VARSIZE(arg1) - VARHDRSZ;
1818 len2 = VARSIZE(arg2) - VARHDRSZ;
1820 /* fast path for different-length inputs */
1824 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1826 PG_FREE_IF_COPY(arg1, 0);
1827 PG_FREE_IF_COPY(arg2, 1);
1829 PG_RETURN_BOOL(result);
1833 bytealt(PG_FUNCTION_ARGS)
1835 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1836 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1841 len1 = VARSIZE(arg1) - VARHDRSZ;
1842 len2 = VARSIZE(arg2) - VARHDRSZ;
1844 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1846 PG_FREE_IF_COPY(arg1, 0);
1847 PG_FREE_IF_COPY(arg2, 1);
1849 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1853 byteale(PG_FUNCTION_ARGS)
1855 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1856 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1861 len1 = VARSIZE(arg1) - VARHDRSZ;
1862 len2 = VARSIZE(arg2) - VARHDRSZ;
1864 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1866 PG_FREE_IF_COPY(arg1, 0);
1867 PG_FREE_IF_COPY(arg2, 1);
1869 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1873 byteagt(PG_FUNCTION_ARGS)
1875 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1876 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1881 len1 = VARSIZE(arg1) - VARHDRSZ;
1882 len2 = VARSIZE(arg2) - VARHDRSZ;
1884 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1886 PG_FREE_IF_COPY(arg1, 0);
1887 PG_FREE_IF_COPY(arg2, 1);
1889 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1893 byteage(PG_FUNCTION_ARGS)
1895 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1896 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1901 len1 = VARSIZE(arg1) - VARHDRSZ;
1902 len2 = VARSIZE(arg2) - VARHDRSZ;
1904 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1906 PG_FREE_IF_COPY(arg1, 0);
1907 PG_FREE_IF_COPY(arg2, 1);
1909 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1913 byteacmp(PG_FUNCTION_ARGS)
1915 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1916 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1921 len1 = VARSIZE(arg1) - VARHDRSZ;
1922 len2 = VARSIZE(arg2) - VARHDRSZ;
1924 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1925 if ((cmp == 0) && (len1 != len2))
1926 cmp = (len1 < len2) ? -1 : 1;
1928 PG_FREE_IF_COPY(arg1, 0);
1929 PG_FREE_IF_COPY(arg2, 1);
1931 PG_RETURN_INT32(cmp);
1936 * replace all occurrences of 'old_sub_str' in 'orig_str'
1937 * with 'new_sub_str' to form 'new_str'
1939 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1940 * otherwise returns 'new_str'
1943 replace_text(PG_FUNCTION_ARGS)
1945 text *src_text = PG_GETARG_TEXT_P(0);
1946 text *from_sub_text = PG_GETARG_TEXT_P(1);
1947 text *to_sub_text = PG_GETARG_TEXT_P(2);
1948 int src_text_len = TEXTLEN(src_text);
1949 int from_sub_text_len = TEXTLEN(from_sub_text);
1950 char *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1956 StringInfo str = makeStringInfo();
1958 if (src_text_len == 0 || from_sub_text_len == 0)
1959 PG_RETURN_TEXT_P(src_text);
1961 buf_text = TEXTDUP(src_text);
1962 curr_posn = TEXTPOS(buf_text, from_sub_text);
1964 while (curr_posn > 0)
1966 left_text = LEFT(buf_text, from_sub_text);
1967 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1969 appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
1970 appendStringInfoString(str, to_sub_str);
1974 buf_text = right_text;
1975 curr_posn = TEXTPOS(buf_text, from_sub_text);
1978 appendStringInfoString(str, PG_TEXT_GET_STR(buf_text));
1981 ret_text = PG_STR_GET_TEXT(str->data);
1985 PG_RETURN_TEXT_P(ret_text);
1990 * parse input string
1991 * return ord item (1 based)
1992 * based on provided field separator
1995 split_text(PG_FUNCTION_ARGS)
1997 text *inputstring = PG_GETARG_TEXT_P(0);
1998 text *fldsep = PG_GETARG_TEXT_P(1);
1999 int fldnum = PG_GETARG_INT32(2);
2000 int inputstring_len = TEXTLEN(inputstring);
2001 int fldsep_len = TEXTLEN(fldsep);
2006 /* field number is 1 based */
2009 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2010 errmsg("field position must be greater than zero")));
2012 /* return empty string for empty input string */
2013 if (inputstring_len < 1)
2014 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2016 /* empty field separator */
2019 /* if first field, return input string, else empty string */
2021 PG_RETURN_TEXT_P(inputstring);
2023 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2026 start_posn = text_position(inputstring, fldsep, fldnum - 1);
2027 end_posn = text_position(inputstring, fldsep, fldnum);
2029 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2031 /* if first field, return input string, else empty string */
2033 PG_RETURN_TEXT_P(inputstring);
2035 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2037 else if (start_posn == 0)
2039 /* first field requested */
2040 result_text = LEFT(inputstring, fldsep);
2041 PG_RETURN_TEXT_P(result_text);
2043 else if (end_posn == 0)
2045 /* last field requested */
2046 result_text = text_substring(PointerGetDatum(inputstring),
2047 start_posn + fldsep_len,
2049 PG_RETURN_TEXT_P(result_text);
2053 /* interior field requested */
2054 result_text = text_substring(PointerGetDatum(inputstring),
2055 start_posn + fldsep_len,
2056 end_posn - start_posn - fldsep_len,
2058 PG_RETURN_TEXT_P(result_text);
2064 * parse input string
2065 * return text array of elements
2066 * based on provided field separator
2069 text_to_array(PG_FUNCTION_ARGS)
2071 text *inputstring = PG_GETARG_TEXT_P(0);
2072 text *fldsep = PG_GETARG_TEXT_P(1);
2073 int inputstring_len = TEXTLEN(inputstring);
2074 int fldsep_len = TEXTLEN(fldsep);
2079 ArrayBuildState *astate = NULL;
2081 /* return NULL for empty input string */
2082 if (inputstring_len < 1)
2086 * empty field separator return one element, 1D, array using the input
2090 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2091 CStringGetDatum(inputstring), 1));
2093 /* start with end position holding the initial start position */
2095 for (fldnum = 1;; fldnum++) /* field number is 1 based */
2098 bool disnull = false;
2100 start_posn = end_posn;
2101 end_posn = text_position(inputstring, fldsep, fldnum);
2103 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2108 * first element return one element, 1D, array using the
2111 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2112 CStringGetDatum(inputstring), 1));
2116 /* otherwise create array and exit */
2117 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2118 CurrentMemoryContext));
2121 else if (start_posn == 0)
2123 /* first field requested */
2124 result_text = LEFT(inputstring, fldsep);
2126 else if (end_posn == 0)
2128 /* last field requested */
2129 result_text = text_substring(PointerGetDatum(inputstring),
2130 start_posn + fldsep_len,
2135 /* interior field requested */
2136 result_text = text_substring(PointerGetDatum(inputstring),
2137 start_posn + fldsep_len,
2138 end_posn - start_posn - fldsep_len,
2142 /* stash away current value */
2143 dvalue = PointerGetDatum(result_text);
2144 astate = accumArrayResult(astate, dvalue,
2146 CurrentMemoryContext);
2149 /* never reached -- keep compiler quiet */
2155 * concatenate Cstring representation of input array elements
2156 * using provided field separator
2159 array_to_text(PG_FUNCTION_ARGS)
2161 ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
2162 char *fldsep = PG_TEXTARG_GET_STR(1);
2172 StringInfo result_str = makeStringInfo();
2174 ArrayMetaState *my_extra;
2176 p = ARR_DATA_PTR(v);
2177 ndims = ARR_NDIM(v);
2179 nitems = ArrayGetNItems(ndims, dims);
2181 /* if there are no elements, return an empty string */
2183 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2185 element_type = ARR_ELEMTYPE(v);
2188 * We arrange to look up info about element type, including its output
2189 * conversion proc, only once per series of calls, assuming the
2190 * element type doesn't change underneath us.
2192 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2193 if (my_extra == NULL)
2195 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2196 sizeof(ArrayMetaState));
2197 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2198 my_extra->element_type = InvalidOid;
2201 if (my_extra->element_type != element_type)
2204 * Get info about element type, including its output conversion
2207 get_type_io_data(element_type, IOFunc_output,
2208 &my_extra->typlen, &my_extra->typbyval,
2209 &my_extra->typalign, &my_extra->typdelim,
2210 &my_extra->typelem, &my_extra->typiofunc);
2211 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2212 fcinfo->flinfo->fn_mcxt);
2213 my_extra->element_type = element_type;
2215 typlen = my_extra->typlen;
2216 typbyval = my_extra->typbyval;
2217 typalign = my_extra->typalign;
2218 typelem = my_extra->typelem;
2220 for (i = 0; i < nitems; i++)
2225 itemvalue = fetch_att(p, typbyval, typlen);
2227 value = DatumGetCString(FunctionCall3(&my_extra->proc,
2229 ObjectIdGetDatum(typelem),
2230 Int32GetDatum(-1)));
2233 appendStringInfo(result_str, "%s%s", fldsep, value);
2235 appendStringInfo(result_str, "%s", value);
2237 p = att_addlength(p, typlen, PointerGetDatum(p));
2238 p = (char *) att_align(p, typalign);
2241 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2246 * Convert a int32 to a string containing a base 16 (hex) representation of
2250 to_hex32(PG_FUNCTION_ARGS)
2252 uint32 value = (uint32) PG_GETARG_INT32(0);
2255 const char *digits = "0123456789abcdef";
2256 char buf[32]; /* bigger than needed, but reasonable */
2258 ptr = buf + sizeof(buf) - 1;
2263 *--ptr = digits[value % HEXBASE];
2265 } while (ptr > buf && value);
2267 result_text = PG_STR_GET_TEXT(ptr);
2268 PG_RETURN_TEXT_P(result_text);
2272 * Convert a int64 to a string containing a base 16 (hex) representation of
2276 to_hex64(PG_FUNCTION_ARGS)
2278 uint64 value = (uint64) PG_GETARG_INT64(0);
2281 const char *digits = "0123456789abcdef";
2282 char buf[32]; /* bigger than needed, but reasonable */
2284 ptr = buf + sizeof(buf) - 1;
2289 *--ptr = digits[value % HEXBASE];
2291 } while (ptr > buf && value);
2293 result_text = PG_STR_GET_TEXT(ptr);
2294 PG_RETURN_TEXT_P(result_text);
2298 * Create an md5 hash of a text string and return it as hex
2300 * md5 produces a 16 byte (128 bit) hash; double it for hex
2302 #define MD5_HASH_LEN 32
2305 md5_text(PG_FUNCTION_ARGS)
2307 char *buff = PG_TEXT_GET_STR(PG_GETARG_TEXT_P(0));
2308 size_t len = strlen(buff);
2312 /* leave room for the terminating '\0' */
2313 hexsum = (char *) palloc(MD5_HASH_LEN + 1);
2315 /* get the hash result */
2316 md5_hash((void *) buff, len, hexsum);
2318 /* convert to text and return it */
2319 result_text = PG_STR_GET_TEXT(hexsum);
2320 PG_RETURN_TEXT_P(result_text);