1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.121 2005/05/20 01:29:55 neilc Exp $
13 *-------------------------------------------------------------------------
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_type.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/crypt.h"
23 #include "libpq/pqformat.h"
24 #include "mb/pg_wchar.h"
25 #include "miscadmin.h"
26 #include "parser/scansup.h"
27 #include "utils/array.h"
28 #include "utils/builtins.h"
29 #include "utils/lsyscache.h"
30 #include "utils/pg_locale.h"
33 typedef struct varlena unknown;
35 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
36 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
37 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
38 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
39 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
41 #define PG_TEXTARG_GET_STR(arg_) \
42 DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
43 #define PG_TEXT_GET_STR(textp_) \
44 DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
45 #define PG_STR_GET_TEXT(str_) \
46 DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
47 #define TEXTLEN(textp) \
48 text_length(PointerGetDatum(textp))
49 #define TEXTPOS(buf_text, from_sub_text) \
50 text_position(buf_text, from_sub_text, 1)
51 #define TEXTDUP(textp) \
52 DatumGetTextPCopy(PointerGetDatum(textp))
53 #define LEFT(buf_text, from_sub_text) \
54 text_substring(PointerGetDatum(buf_text), \
56 TEXTPOS(buf_text, from_sub_text) - 1, false)
57 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
58 text_substring(PointerGetDatum(buf_text), \
59 TEXTPOS(buf_text, from_sub_text) + (from_sub_text_len), \
62 static int text_cmp(text *arg1, text *arg2);
63 static int32 text_length(Datum str);
64 static int32 text_position(text *t1, text *t2, int matchnum);
65 static text *text_substring(Datum str,
68 bool length_not_specified);
71 /*****************************************************************************
73 *****************************************************************************/
76 #define VAL(CH) ((CH) - '0')
77 #define DIG(VAL) ((VAL) + '0')
80 * byteain - converts from printable representation of byte array
82 * Non-printable characters must be passed as '\nnn' (octal) and are
83 * converted to internal form. '\' must be passed as '\\'.
84 * ereport(ERROR, ...) if bad form.
87 * The input is scaned twice.
88 * The error checking of input is minimal.
91 byteain(PG_FUNCTION_ARGS)
93 char *inputText = PG_GETARG_CSTRING(0);
99 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
103 else if ((tp[0] == '\\') &&
104 (tp[1] >= '0' && tp[1] <= '3') &&
105 (tp[2] >= '0' && tp[2] <= '7') &&
106 (tp[3] >= '0' && tp[3] <= '7'))
108 else if ((tp[0] == '\\') &&
114 * one backslash, not followed by 0 or ### valid octal
117 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
118 errmsg("invalid input syntax for type bytea")));
123 result = (bytea *) palloc(byte);
124 VARATT_SIZEP(result) = byte; /* set varlena length */
127 rp = VARDATA(result);
132 else if ((tp[0] == '\\') &&
133 (tp[1] >= '0' && tp[1] <= '3') &&
134 (tp[2] >= '0' && tp[2] <= '7') &&
135 (tp[3] >= '0' && tp[3] <= '7'))
141 *rp++ = byte + VAL(tp[3]);
144 else if ((tp[0] == '\\') &&
153 * We should never get here. The first pass should not allow
157 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
158 errmsg("invalid input syntax for type bytea")));
162 PG_RETURN_BYTEA_P(result);
166 * byteaout - converts to printable representation of byte array
168 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
171 * NULL vlena should be an error--returning string with NULL for now.
174 byteaout(PG_FUNCTION_ARGS)
176 bytea *vlena = PG_GETARG_BYTEA_P(0);
180 int val; /* holds unprintable chars */
184 len = 1; /* empty string has 1 char */
186 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
190 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
195 rp = result = (char *) palloc(len);
197 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
204 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
208 rp[3] = DIG(val & 07);
210 rp[2] = DIG(val & 07);
212 rp[1] = DIG(val & 03);
219 PG_RETURN_CSTRING(result);
223 * bytearecv - converts external binary format to bytea
226 bytearecv(PG_FUNCTION_ARGS)
228 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
232 nbytes = buf->len - buf->cursor;
233 result = (bytea *) palloc(nbytes + VARHDRSZ);
234 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
235 pq_copymsgbytes(buf, VARDATA(result), nbytes);
236 PG_RETURN_BYTEA_P(result);
240 * byteasend - converts bytea to binary format
242 * This is a special case: just copy the input...
245 byteasend(PG_FUNCTION_ARGS)
247 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
249 PG_RETURN_BYTEA_P(vlena);
254 * textin - converts "..." to internal representation
257 textin(PG_FUNCTION_ARGS)
259 char *inputText = PG_GETARG_CSTRING(0);
263 /* verify encoding */
264 len = strlen(inputText);
265 pg_verifymbstr(inputText, len, false);
267 result = (text *) palloc(len + VARHDRSZ);
268 VARATT_SIZEP(result) = len + VARHDRSZ;
270 memcpy(VARDATA(result), inputText, len);
272 PG_RETURN_TEXT_P(result);
276 * textout - converts internal representation to "..."
279 textout(PG_FUNCTION_ARGS)
281 text *t = PG_GETARG_TEXT_P(0);
285 len = VARSIZE(t) - VARHDRSZ;
286 result = (char *) palloc(len + 1);
287 memcpy(result, VARDATA(t), len);
290 PG_RETURN_CSTRING(result);
294 * textrecv - converts external binary format to text
297 textrecv(PG_FUNCTION_ARGS)
299 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
304 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
305 result = (text *) palloc(nbytes + VARHDRSZ);
306 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
307 memcpy(VARDATA(result), str, nbytes);
309 PG_RETURN_TEXT_P(result);
313 * textsend - converts text to binary format
316 textsend(PG_FUNCTION_ARGS)
318 text *t = PG_GETARG_TEXT_P(0);
321 pq_begintypsend(&buf);
322 pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
323 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
328 * unknownin - converts "..." to internal representation
331 unknownin(PG_FUNCTION_ARGS)
333 char *inputStr = PG_GETARG_CSTRING(0);
337 len = strlen(inputStr) + VARHDRSZ;
339 result = (unknown *) palloc(len);
340 VARATT_SIZEP(result) = len;
342 memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
344 PG_RETURN_UNKNOWN_P(result);
348 * unknownout - converts internal representation to "..."
351 unknownout(PG_FUNCTION_ARGS)
353 unknown *t = PG_GETARG_UNKNOWN_P(0);
357 len = VARSIZE(t) - VARHDRSZ;
358 result = (char *) palloc(len + 1);
359 memcpy(result, VARDATA(t), len);
362 PG_RETURN_CSTRING(result);
366 * unknownrecv - converts external binary format to unknown
369 unknownrecv(PG_FUNCTION_ARGS)
371 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
375 nbytes = buf->len - buf->cursor;
376 result = (unknown *) palloc(nbytes + VARHDRSZ);
377 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
378 pq_copymsgbytes(buf, VARDATA(result), nbytes);
379 PG_RETURN_UNKNOWN_P(result);
383 * unknownsend - converts unknown to binary format
385 * This is a special case: just copy the input, since it's
386 * effectively the same format as bytea
389 unknownsend(PG_FUNCTION_ARGS)
391 unknown *vlena = PG_GETARG_UNKNOWN_P_COPY(0);
393 PG_RETURN_UNKNOWN_P(vlena);
397 /* ========== PUBLIC ROUTINES ========== */
401 * returns the logical length of a text*
402 * (which is less than the VARSIZE of the text*)
405 textlen(PG_FUNCTION_ARGS)
407 Datum str = PG_GETARG_DATUM(0);
409 /* try to avoid decompressing argument */
410 PG_RETURN_INT32(text_length(str));
415 * Does the real work for textlen()
417 * This is broken out so it can be called directly by other string processing
418 * functions. Note that the argument is passed as a Datum, to indicate that
419 * it may still be in compressed form. We can avoid decompressing it at all
423 text_length(Datum str)
425 /* fastpath when max encoding length is one */
426 if (pg_database_encoding_max_length() == 1)
427 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
430 text *t = DatumGetTextP(str);
432 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
433 VARSIZE(t) - VARHDRSZ));
439 * returns the physical length of a text*
440 * (which is less than the VARSIZE of the text*)
443 textoctetlen(PG_FUNCTION_ARGS)
445 Datum str = PG_GETARG_DATUM(0);
447 /* We need not detoast the input at all */
448 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
453 * takes two text* and returns a text* that is the concatenation of
456 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
457 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
458 * Allocate space for output in all cases.
459 * XXX - thomas 1997-07-10
462 textcat(PG_FUNCTION_ARGS)
464 text *t1 = PG_GETARG_TEXT_P(0);
465 text *t2 = PG_GETARG_TEXT_P(1);
472 len1 = (VARSIZE(t1) - VARHDRSZ);
476 len2 = (VARSIZE(t2) - VARHDRSZ);
480 len = len1 + len2 + VARHDRSZ;
481 result = (text *) palloc(len);
483 /* Set size of result string... */
484 VARATT_SIZEP(result) = len;
486 /* Fill data field of result string... */
487 ptr = VARDATA(result);
489 memcpy(ptr, VARDATA(t1), len1);
491 memcpy(ptr + len1, VARDATA(t2), len2);
493 PG_RETURN_TEXT_P(result);
498 * Return a substring starting at the specified position.
499 * - thomas 1997-12-31
503 * - starting position (is one-based)
506 * If the starting position is zero or less, then return from the start of the string
507 * adjusting the length to be consistent with the "negative start" per SQL92.
508 * If the length is less than zero, return the remaining string.
510 * Added multibyte support.
511 * - Tatsuo Ishii 1998-4-21
512 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
513 * Formerly returned the entire string; now returns a portion.
514 * - Thomas Lockhart 1998-12-10
515 * Now uses faster TOAST-slicing interface
516 * - John Gray 2002-02-22
517 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
518 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
519 * error; if E < 1, return '', not entire string). Fixed MB related bug when
520 * S > LC and < LC + 4 sometimes garbage characters are returned.
521 * - Joe Conway 2002-08-10
524 text_substr(PG_FUNCTION_ARGS)
526 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
533 * text_substr_no_len -
534 * Wrapper to avoid opr_sanity failure due to
535 * one function accepting a different number of args.
538 text_substr_no_len(PG_FUNCTION_ARGS)
540 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
547 * Does the real work for text_substr() and text_substr_no_len()
549 * This is broken out so it can be called directly by other string processing
550 * functions. Note that the argument is passed as a Datum, to indicate that
551 * it may still be in compressed/toasted form. We can avoid detoasting all
552 * of it in some cases.
555 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
557 int32 eml = pg_database_encoding_max_length();
558 int32 S = start; /* start position */
559 int32 S1; /* adjusted start position */
560 int32 L1; /* adjusted substring length */
562 /* life is easy if the encoding max length is 1 */
567 if (length_not_specified) /* special case - get length to
576 * A negative value for L is the only way for the end position
577 * to be before the start. SQL99 says to throw an error.
581 (errcode(ERRCODE_SUBSTRING_ERROR),
582 errmsg("negative substring length not allowed")));
585 * A zero or negative value for the end position can happen if
586 * the start was negative or one. SQL99 says to return a
587 * zero-length string.
590 return PG_STR_GET_TEXT("");
596 * If the start position is past the end of the string, SQL99 says
597 * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
598 * do that for us. Convert to zero-based starting position
600 return DatumGetTextPSlice(str, S1 - 1, L1);
605 * When encoding max length is > 1, we can't get LC without
606 * detoasting, so we'll grab a conservatively large slice now and
607 * go back later to do the right thing
620 * if S is past the end of the string, the tuple toaster will
621 * return a zero-length string to us
626 * We need to start at position zero because there is no way to
627 * know in advance which byte offset corresponds to the supplied
632 if (length_not_specified) /* special case - get length to
634 slice_size = L1 = -1;
640 * A negative value for L is the only way for the end position
641 * to be before the start. SQL99 says to throw an error.
645 (errcode(ERRCODE_SUBSTRING_ERROR),
646 errmsg("negative substring length not allowed")));
649 * A zero or negative value for the end position can happen if
650 * the start was negative or one. SQL99 says to return a
651 * zero-length string.
654 return PG_STR_GET_TEXT("");
657 * if E is past the end of the string, the tuple toaster will
658 * truncate the length for us
663 * Total slice size in bytes can't be any longer than the
664 * start position plus substring length times the encoding max
667 slice_size = (S1 + L1) * eml;
669 slice = DatumGetTextPSlice(str, slice_start, slice_size);
671 /* see if we got back an empty string */
672 if ((VARSIZE(slice) - VARHDRSZ) == 0)
673 return PG_STR_GET_TEXT("");
675 /* Now we can get the actual length of the slice in MB characters */
676 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
679 * Check that the start position wasn't > slice_strlen. If so,
680 * SQL99 says to return a zero-length string.
682 if (S1 > slice_strlen)
683 return PG_STR_GET_TEXT("");
686 * Adjust L1 and E1 now that we know the slice string length.
687 * Again remember that S1 is one based, and slice_start is zero
691 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
693 E1 = slice_start + 1 + slice_strlen;
696 * Find the start position in the slice; remember S1 is not zero
700 for (i = 0; i < S1 - 1; i++)
703 /* hang onto a pointer to our start position */
707 * Count the actual bytes used by the substring of the requested
710 for (i = S1; i < E1; i++)
713 ret = (text *) palloc(VARHDRSZ + (p - s));
714 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
715 memcpy(VARDATA(ret), s, (p - s));
720 elog(ERROR, "invalid backend encoding: encoding max length < 1");
722 /* not reached: suppress compiler warning */
728 * Return the position of the specified substring.
729 * Implements the SQL92 POSITION() function.
730 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
731 * - thomas 1997-07-27
734 textpos(PG_FUNCTION_ARGS)
736 text *str = PG_GETARG_TEXT_P(0);
737 text *search_str = PG_GETARG_TEXT_P(1);
739 PG_RETURN_INT32(text_position(str, search_str, 1));
744 * Does the real work for textpos()
747 * t1 - string to be searched
748 * t2 - pattern to match within t1
749 * matchnum - number of the match to be found (1 is the first match)
751 * Character index of the first matched char, starting from 1,
754 * This is broken out so it can be called directly by other string processing
758 text_position(text *t1, text *t2, int matchnum)
768 return 0; /* result for 0th match */
770 if (VARSIZE(t2) <= VARHDRSZ)
771 return 1; /* result for empty pattern */
773 len1 = (VARSIZE(t1) - VARHDRSZ);
774 len2 = (VARSIZE(t2) - VARHDRSZ);
776 if (pg_database_encoding_max_length() == 1)
778 /* simple case - single byte encoding */
785 /* no use in searching str past point where search_str will fit */
788 for (p = 0; p <= px; p++)
790 if ((*p1 == *p2) && (strncmp(p1, p2, len2) == 0))
792 if (++match == matchnum)
803 /* not as simple - multibyte encoding */
809 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
810 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
811 len1 = pg_wchar_strlen(p1);
812 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
813 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
814 len2 = pg_wchar_strlen(p2);
816 /* no use in searching str past point where search_str will fit */
819 for (p = 0; p <= px; p++)
821 if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, len2) == 0))
823 if (++match == matchnum)
840 * Comparison function for text strings with given lengths.
841 * Includes locale support, but must copy strings to temporary memory
842 * to allow null-termination for inputs to strcoll().
846 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
851 * Unfortunately, there is no strncoll(), so in the non-C locale case
852 * we have to do some memory copying. This turns out to be
853 * significantly slower, so we optimize the case where LC_COLLATE is
854 * C. We also try to optimize relatively-short strings by avoiding
855 * palloc/pfree overhead.
857 #define STACKBUFLEN 1024
859 if (!lc_collate_is_c())
861 char a1buf[STACKBUFLEN];
862 char a2buf[STACKBUFLEN];
866 if (len1 >= STACKBUFLEN)
867 a1p = (char *) palloc(len1 + 1);
870 if (len2 >= STACKBUFLEN)
871 a2p = (char *) palloc(len2 + 1);
875 memcpy(a1p, arg1, len1);
877 memcpy(a2p, arg2, len2);
880 result = strcoll(a1p, a2p);
882 if (len1 >= STACKBUFLEN)
884 if (len2 >= STACKBUFLEN)
889 result = strncmp(arg1, arg2, Min(len1, len2));
890 if ((result == 0) && (len1 != len2))
891 result = (len1 < len2) ? -1 : 1;
899 * Internal comparison function for text strings.
903 text_cmp(text *arg1, text *arg2)
913 len1 = VARSIZE(arg1) - VARHDRSZ;
914 len2 = VARSIZE(arg2) - VARHDRSZ;
916 return varstr_cmp(a1p, len1, a2p, len2);
920 * Comparison functions for text strings.
922 * Note: btree indexes need these routines not to leak memory; therefore,
923 * be careful to free working copies of toasted datums. Most places don't
924 * need to be so careful.
928 texteq(PG_FUNCTION_ARGS)
930 text *arg1 = PG_GETARG_TEXT_P(0);
931 text *arg2 = PG_GETARG_TEXT_P(1);
934 /* fast path for different-length inputs */
935 if (VARSIZE(arg1) != VARSIZE(arg2))
938 result = (text_cmp(arg1, arg2) == 0);
940 PG_FREE_IF_COPY(arg1, 0);
941 PG_FREE_IF_COPY(arg2, 1);
943 PG_RETURN_BOOL(result);
947 textne(PG_FUNCTION_ARGS)
949 text *arg1 = PG_GETARG_TEXT_P(0);
950 text *arg2 = PG_GETARG_TEXT_P(1);
953 /* fast path for different-length inputs */
954 if (VARSIZE(arg1) != VARSIZE(arg2))
957 result = (text_cmp(arg1, arg2) != 0);
959 PG_FREE_IF_COPY(arg1, 0);
960 PG_FREE_IF_COPY(arg2, 1);
962 PG_RETURN_BOOL(result);
966 text_lt(PG_FUNCTION_ARGS)
968 text *arg1 = PG_GETARG_TEXT_P(0);
969 text *arg2 = PG_GETARG_TEXT_P(1);
972 result = (text_cmp(arg1, arg2) < 0);
974 PG_FREE_IF_COPY(arg1, 0);
975 PG_FREE_IF_COPY(arg2, 1);
977 PG_RETURN_BOOL(result);
981 text_le(PG_FUNCTION_ARGS)
983 text *arg1 = PG_GETARG_TEXT_P(0);
984 text *arg2 = PG_GETARG_TEXT_P(1);
987 result = (text_cmp(arg1, arg2) <= 0);
989 PG_FREE_IF_COPY(arg1, 0);
990 PG_FREE_IF_COPY(arg2, 1);
992 PG_RETURN_BOOL(result);
996 text_gt(PG_FUNCTION_ARGS)
998 text *arg1 = PG_GETARG_TEXT_P(0);
999 text *arg2 = PG_GETARG_TEXT_P(1);
1002 result = (text_cmp(arg1, arg2) > 0);
1004 PG_FREE_IF_COPY(arg1, 0);
1005 PG_FREE_IF_COPY(arg2, 1);
1007 PG_RETURN_BOOL(result);
1011 text_ge(PG_FUNCTION_ARGS)
1013 text *arg1 = PG_GETARG_TEXT_P(0);
1014 text *arg2 = PG_GETARG_TEXT_P(1);
1017 result = (text_cmp(arg1, arg2) >= 0);
1019 PG_FREE_IF_COPY(arg1, 0);
1020 PG_FREE_IF_COPY(arg2, 1);
1022 PG_RETURN_BOOL(result);
1026 bttextcmp(PG_FUNCTION_ARGS)
1028 text *arg1 = PG_GETARG_TEXT_P(0);
1029 text *arg2 = PG_GETARG_TEXT_P(1);
1032 result = text_cmp(arg1, arg2);
1034 PG_FREE_IF_COPY(arg1, 0);
1035 PG_FREE_IF_COPY(arg2, 1);
1037 PG_RETURN_INT32(result);
1042 text_larger(PG_FUNCTION_ARGS)
1044 text *arg1 = PG_GETARG_TEXT_P(0);
1045 text *arg2 = PG_GETARG_TEXT_P(1);
1048 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1050 PG_RETURN_TEXT_P(result);
1054 text_smaller(PG_FUNCTION_ARGS)
1056 text *arg1 = PG_GETARG_TEXT_P(0);
1057 text *arg2 = PG_GETARG_TEXT_P(1);
1060 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1062 PG_RETURN_TEXT_P(result);
1067 * The following operators support character-by-character comparison
1068 * of text data types, to allow building indexes suitable for LIKE
1073 internal_text_pattern_compare(text *arg1, text *arg2)
1077 result = memcmp(VARDATA(arg1), VARDATA(arg2),
1078 Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1081 else if (VARSIZE(arg1) < VARSIZE(arg2))
1083 else if (VARSIZE(arg1) > VARSIZE(arg2))
1091 text_pattern_lt(PG_FUNCTION_ARGS)
1093 text *arg1 = PG_GETARG_TEXT_P(0);
1094 text *arg2 = PG_GETARG_TEXT_P(1);
1097 result = internal_text_pattern_compare(arg1, arg2);
1099 PG_FREE_IF_COPY(arg1, 0);
1100 PG_FREE_IF_COPY(arg2, 1);
1102 PG_RETURN_BOOL(result < 0);
1107 text_pattern_le(PG_FUNCTION_ARGS)
1109 text *arg1 = PG_GETARG_TEXT_P(0);
1110 text *arg2 = PG_GETARG_TEXT_P(1);
1113 result = internal_text_pattern_compare(arg1, arg2);
1115 PG_FREE_IF_COPY(arg1, 0);
1116 PG_FREE_IF_COPY(arg2, 1);
1118 PG_RETURN_BOOL(result <= 0);
1123 text_pattern_eq(PG_FUNCTION_ARGS)
1125 text *arg1 = PG_GETARG_TEXT_P(0);
1126 text *arg2 = PG_GETARG_TEXT_P(1);
1129 if (VARSIZE(arg1) != VARSIZE(arg2))
1132 result = internal_text_pattern_compare(arg1, arg2);
1134 PG_FREE_IF_COPY(arg1, 0);
1135 PG_FREE_IF_COPY(arg2, 1);
1137 PG_RETURN_BOOL(result == 0);
1142 text_pattern_ge(PG_FUNCTION_ARGS)
1144 text *arg1 = PG_GETARG_TEXT_P(0);
1145 text *arg2 = PG_GETARG_TEXT_P(1);
1148 result = internal_text_pattern_compare(arg1, arg2);
1150 PG_FREE_IF_COPY(arg1, 0);
1151 PG_FREE_IF_COPY(arg2, 1);
1153 PG_RETURN_BOOL(result >= 0);
1158 text_pattern_gt(PG_FUNCTION_ARGS)
1160 text *arg1 = PG_GETARG_TEXT_P(0);
1161 text *arg2 = PG_GETARG_TEXT_P(1);
1164 result = internal_text_pattern_compare(arg1, arg2);
1166 PG_FREE_IF_COPY(arg1, 0);
1167 PG_FREE_IF_COPY(arg2, 1);
1169 PG_RETURN_BOOL(result > 0);
1174 text_pattern_ne(PG_FUNCTION_ARGS)
1176 text *arg1 = PG_GETARG_TEXT_P(0);
1177 text *arg2 = PG_GETARG_TEXT_P(1);
1180 if (VARSIZE(arg1) != VARSIZE(arg2))
1183 result = internal_text_pattern_compare(arg1, arg2);
1185 PG_FREE_IF_COPY(arg1, 0);
1186 PG_FREE_IF_COPY(arg2, 1);
1188 PG_RETURN_BOOL(result != 0);
1193 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1195 text *arg1 = PG_GETARG_TEXT_P(0);
1196 text *arg2 = PG_GETARG_TEXT_P(1);
1199 result = internal_text_pattern_compare(arg1, arg2);
1201 PG_FREE_IF_COPY(arg1, 0);
1202 PG_FREE_IF_COPY(arg2, 1);
1204 PG_RETURN_INT32(result);
1208 /*-------------------------------------------------------------
1211 * get the number of bytes contained in an instance of type 'bytea'
1212 *-------------------------------------------------------------
1215 byteaoctetlen(PG_FUNCTION_ARGS)
1217 Datum str = PG_GETARG_DATUM(0);
1219 /* We need not detoast the input at all */
1220 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1225 * takes two bytea* and returns a bytea* that is the concatenation of
1228 * Cloned from textcat and modified as required.
1231 byteacat(PG_FUNCTION_ARGS)
1233 bytea *t1 = PG_GETARG_BYTEA_P(0);
1234 bytea *t2 = PG_GETARG_BYTEA_P(1);
1241 len1 = (VARSIZE(t1) - VARHDRSZ);
1245 len2 = (VARSIZE(t2) - VARHDRSZ);
1249 len = len1 + len2 + VARHDRSZ;
1250 result = (bytea *) palloc(len);
1252 /* Set size of result string... */
1253 VARATT_SIZEP(result) = len;
1255 /* Fill data field of result string... */
1256 ptr = VARDATA(result);
1258 memcpy(ptr, VARDATA(t1), len1);
1260 memcpy(ptr + len1, VARDATA(t2), len2);
1262 PG_RETURN_BYTEA_P(result);
1265 #define PG_STR_GET_BYTEA(str_) \
1266 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1269 * Return a substring starting at the specified position.
1270 * Cloned from text_substr and modified as required.
1274 * - starting position (is one-based)
1275 * - string length (optional)
1277 * If the starting position is zero or less, then return from the start of the string
1278 * adjusting the length to be consistent with the "negative start" per SQL92.
1279 * If the length is less than zero, an ERROR is thrown. If no third argument
1280 * (length) is provided, the length to the end of the string is assumed.
1283 bytea_substr(PG_FUNCTION_ARGS)
1285 int S = PG_GETARG_INT32(1); /* start position */
1286 int S1; /* adjusted start position */
1287 int L1; /* adjusted substring length */
1291 if (fcinfo->nargs == 2)
1294 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1295 * everything to the end of the string if we pass it a negative
1303 int E = S + PG_GETARG_INT32(2);
1306 * A negative value for L is the only way for the end position to
1307 * be before the start. SQL99 says to throw an error.
1311 (errcode(ERRCODE_SUBSTRING_ERROR),
1312 errmsg("negative substring length not allowed")));
1315 * A zero or negative value for the end position can happen if the
1316 * start was negative or one. SQL99 says to return a zero-length
1320 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1326 * If the start position is past the end of the string, SQL99 says to
1327 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1328 * that for us. Convert to zero-based starting position
1330 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1334 * bytea_substr_no_len -
1335 * Wrapper to avoid opr_sanity failure due to
1336 * one function accepting a different number of args.
1339 bytea_substr_no_len(PG_FUNCTION_ARGS)
1341 return bytea_substr(fcinfo);
1346 * Return the position of the specified substring.
1347 * Implements the SQL92 POSITION() function.
1348 * Cloned from textpos and modified as required.
1351 byteapos(PG_FUNCTION_ARGS)
1353 bytea *t1 = PG_GETARG_BYTEA_P(0);
1354 bytea *t2 = PG_GETARG_BYTEA_P(1);
1363 if (VARSIZE(t2) <= VARHDRSZ)
1364 PG_RETURN_INT32(1); /* result for empty pattern */
1366 len1 = (VARSIZE(t1) - VARHDRSZ);
1367 len2 = (VARSIZE(t2) - VARHDRSZ);
1374 for (p = 0; p <= px; p++)
1376 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1384 PG_RETURN_INT32(pos);
1387 /*-------------------------------------------------------------
1390 * this routine treats "bytea" as an array of bytes.
1391 * It returns the Nth byte (a number between 0 and 255).
1392 *-------------------------------------------------------------
1395 byteaGetByte(PG_FUNCTION_ARGS)
1397 bytea *v = PG_GETARG_BYTEA_P(0);
1398 int32 n = PG_GETARG_INT32(1);
1402 len = VARSIZE(v) - VARHDRSZ;
1404 if (n < 0 || n >= len)
1406 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1407 errmsg("index %d out of valid range, 0..%d",
1410 byte = ((unsigned char *) VARDATA(v))[n];
1412 PG_RETURN_INT32(byte);
1415 /*-------------------------------------------------------------
1418 * This routine treats a "bytea" type like an array of bits.
1419 * It returns the value of the Nth bit (0 or 1).
1421 *-------------------------------------------------------------
1424 byteaGetBit(PG_FUNCTION_ARGS)
1426 bytea *v = PG_GETARG_BYTEA_P(0);
1427 int32 n = PG_GETARG_INT32(1);
1433 len = VARSIZE(v) - VARHDRSZ;
1435 if (n < 0 || n >= len * 8)
1437 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1438 errmsg("index %d out of valid range, 0..%d",
1444 byte = ((unsigned char *) VARDATA(v))[byteNo];
1446 if (byte & (1 << bitNo))
1452 /*-------------------------------------------------------------
1455 * Given an instance of type 'bytea' creates a new one with
1456 * the Nth byte set to the given value.
1458 *-------------------------------------------------------------
1461 byteaSetByte(PG_FUNCTION_ARGS)
1463 bytea *v = PG_GETARG_BYTEA_P(0);
1464 int32 n = PG_GETARG_INT32(1);
1465 int32 newByte = PG_GETARG_INT32(2);
1469 len = VARSIZE(v) - VARHDRSZ;
1471 if (n < 0 || n >= len)
1473 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1474 errmsg("index %d out of valid range, 0..%d",
1478 * Make a copy of the original varlena.
1480 res = (bytea *) palloc(VARSIZE(v));
1481 memcpy((char *) res, (char *) v, VARSIZE(v));
1486 ((unsigned char *) VARDATA(res))[n] = newByte;
1488 PG_RETURN_BYTEA_P(res);
1491 /*-------------------------------------------------------------
1494 * Given an instance of type 'bytea' creates a new one with
1495 * the Nth bit set to the given value.
1497 *-------------------------------------------------------------
1500 byteaSetBit(PG_FUNCTION_ARGS)
1502 bytea *v = PG_GETARG_BYTEA_P(0);
1503 int32 n = PG_GETARG_INT32(1);
1504 int32 newBit = PG_GETARG_INT32(2);
1512 len = VARSIZE(v) - VARHDRSZ;
1514 if (n < 0 || n >= len * 8)
1516 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1517 errmsg("index %d out of valid range, 0..%d",
1526 if (newBit != 0 && newBit != 1)
1528 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1529 errmsg("new bit must be 0 or 1")));
1532 * Make a copy of the original varlena.
1534 res = (bytea *) palloc(VARSIZE(v));
1535 memcpy((char *) res, (char *) v, VARSIZE(v));
1540 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1543 newByte = oldByte & (~(1 << bitNo));
1545 newByte = oldByte | (1 << bitNo);
1547 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1549 PG_RETURN_BYTEA_P(res);
1554 * Converts a text type to a Name type.
1557 text_name(PG_FUNCTION_ARGS)
1559 text *s = PG_GETARG_TEXT_P(0);
1563 len = VARSIZE(s) - VARHDRSZ;
1565 /* Truncate oversize input */
1566 if (len >= NAMEDATALEN)
1567 len = NAMEDATALEN - 1;
1570 printf("text- convert string length %d (%d) ->%d\n",
1571 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1574 result = (Name) palloc(NAMEDATALEN);
1575 memcpy(NameStr(*result), VARDATA(s), len);
1577 /* now null pad to full length... */
1578 while (len < NAMEDATALEN)
1580 *(NameStr(*result) + len) = '\0';
1584 PG_RETURN_NAME(result);
1588 * Converts a Name type to a text type.
1591 name_text(PG_FUNCTION_ARGS)
1593 Name s = PG_GETARG_NAME(0);
1597 len = strlen(NameStr(*s));
1600 printf("text- convert string length %d (%d) ->%d\n",
1601 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1604 result = palloc(VARHDRSZ + len);
1605 VARATT_SIZEP(result) = VARHDRSZ + len;
1606 memcpy(VARDATA(result), NameStr(*s), len);
1608 PG_RETURN_TEXT_P(result);
1613 * textToQualifiedNameList - convert a text object to list of names
1615 * This implements the input parsing needed by nextval() and other
1616 * functions that take a text parameter representing a qualified name.
1617 * We split the name at dots, downcase if not double-quoted, and
1618 * truncate names if they're too long.
1621 textToQualifiedNameList(text *textval, const char *caller)
1628 /* Convert to C string (handles possible detoasting). */
1629 /* Note we rely on being able to modify rawname below. */
1630 rawname = DatumGetCString(DirectFunctionCall1(textout,
1631 PointerGetDatum(textval)));
1633 if (!SplitIdentifierString(rawname, '.', &namelist))
1635 (errcode(ERRCODE_INVALID_NAME),
1636 errmsg("invalid name syntax")));
1638 if (namelist == NIL)
1640 (errcode(ERRCODE_INVALID_NAME),
1641 errmsg("invalid name syntax")));
1643 foreach(l, namelist)
1645 char *curname = (char *) lfirst(l);
1647 result = lappend(result, makeString(pstrdup(curname)));
1651 list_free(namelist);
1657 * SplitIdentifierString --- parse a string containing identifiers
1659 * This is the guts of textToQualifiedNameList, and is exported for use in
1660 * other situations such as parsing GUC variables. In the GUC case, it's
1661 * important to avoid memory leaks, so the API is designed to minimize the
1662 * amount of stuff that needs to be allocated and freed.
1665 * rawstring: the input string; must be overwritable! On return, it's
1666 * been modified to contain the separated identifiers.
1667 * separator: the separator punctuation expected between identifiers
1668 * (typically '.' or ','). Whitespace may also appear around
1671 * namelist: filled with a palloc'd list of pointers to identifiers within
1672 * rawstring. Caller should freeList() this even on error return.
1674 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1676 * Note that an empty string is considered okay here, though not in
1677 * textToQualifiedNameList.
1680 SplitIdentifierString(char *rawstring, char separator,
1683 char *nextp = rawstring;
1688 while (isspace((unsigned char) *nextp))
1689 nextp++; /* skip leading whitespace */
1692 return true; /* allow empty string */
1694 /* At the top of the loop, we are at start of a new identifier. */
1702 /* Quoted name --- collapse quote-quote pairs, no downcasing */
1703 curname = nextp + 1;
1706 endp = strchr(nextp + 1, '\"');
1708 return false; /* mismatched quotes */
1709 if (endp[1] != '\"')
1710 break; /* found end of quoted name */
1711 /* Collapse adjacent quotes into one quote, and look again */
1712 memmove(endp, endp + 1, strlen(endp));
1715 /* endp now points at the terminating quote */
1720 /* Unquoted name --- extends to separator or whitespace */
1725 while (*nextp && *nextp != separator &&
1726 !isspace((unsigned char) *nextp))
1729 if (curname == nextp)
1730 return false; /* empty unquoted name not allowed */
1733 * Downcase the identifier, using same code as main lexer
1736 * XXX because we want to overwrite the input in-place, we cannot
1737 * support a downcasing transformation that increases the
1738 * string length. This is not a problem given the current
1739 * implementation of downcase_truncate_identifier, but we'll
1740 * probably have to do something about this someday.
1742 len = endp - curname;
1743 downname = downcase_truncate_identifier(curname, len, false);
1744 Assert(strlen(downname) <= len);
1745 strncpy(curname, downname, len);
1749 while (isspace((unsigned char) *nextp))
1750 nextp++; /* skip trailing whitespace */
1752 if (*nextp == separator)
1755 while (isspace((unsigned char) *nextp))
1756 nextp++; /* skip leading whitespace for next */
1757 /* we expect another name, so done remains false */
1759 else if (*nextp == '\0')
1762 return false; /* invalid syntax */
1764 /* Now safe to overwrite separator with a null */
1767 /* Truncate name if it's overlength */
1768 truncate_identifier(curname, strlen(curname), false);
1771 * Finished isolating current name --- add it to list
1773 *namelist = lappend(*namelist, curname);
1775 /* Loop back if we didn't reach end of string */
1782 /*****************************************************************************
1783 * Comparison Functions used for bytea
1785 * Note: btree indexes need these routines not to leak memory; therefore,
1786 * be careful to free working copies of toasted datums. Most places don't
1787 * need to be so careful.
1788 *****************************************************************************/
1791 byteaeq(PG_FUNCTION_ARGS)
1793 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1794 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1799 len1 = VARSIZE(arg1) - VARHDRSZ;
1800 len2 = VARSIZE(arg2) - VARHDRSZ;
1802 /* fast path for different-length inputs */
1806 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1808 PG_FREE_IF_COPY(arg1, 0);
1809 PG_FREE_IF_COPY(arg2, 1);
1811 PG_RETURN_BOOL(result);
1815 byteane(PG_FUNCTION_ARGS)
1817 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1818 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1823 len1 = VARSIZE(arg1) - VARHDRSZ;
1824 len2 = VARSIZE(arg2) - VARHDRSZ;
1826 /* fast path for different-length inputs */
1830 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1832 PG_FREE_IF_COPY(arg1, 0);
1833 PG_FREE_IF_COPY(arg2, 1);
1835 PG_RETURN_BOOL(result);
1839 bytealt(PG_FUNCTION_ARGS)
1841 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1842 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1847 len1 = VARSIZE(arg1) - VARHDRSZ;
1848 len2 = VARSIZE(arg2) - VARHDRSZ;
1850 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1852 PG_FREE_IF_COPY(arg1, 0);
1853 PG_FREE_IF_COPY(arg2, 1);
1855 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1859 byteale(PG_FUNCTION_ARGS)
1861 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1862 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1867 len1 = VARSIZE(arg1) - VARHDRSZ;
1868 len2 = VARSIZE(arg2) - VARHDRSZ;
1870 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1872 PG_FREE_IF_COPY(arg1, 0);
1873 PG_FREE_IF_COPY(arg2, 1);
1875 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1879 byteagt(PG_FUNCTION_ARGS)
1881 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1882 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1887 len1 = VARSIZE(arg1) - VARHDRSZ;
1888 len2 = VARSIZE(arg2) - VARHDRSZ;
1890 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1892 PG_FREE_IF_COPY(arg1, 0);
1893 PG_FREE_IF_COPY(arg2, 1);
1895 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1899 byteage(PG_FUNCTION_ARGS)
1901 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1902 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1907 len1 = VARSIZE(arg1) - VARHDRSZ;
1908 len2 = VARSIZE(arg2) - VARHDRSZ;
1910 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1912 PG_FREE_IF_COPY(arg1, 0);
1913 PG_FREE_IF_COPY(arg2, 1);
1915 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1919 byteacmp(PG_FUNCTION_ARGS)
1921 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1922 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1927 len1 = VARSIZE(arg1) - VARHDRSZ;
1928 len2 = VARSIZE(arg2) - VARHDRSZ;
1930 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1931 if ((cmp == 0) && (len1 != len2))
1932 cmp = (len1 < len2) ? -1 : 1;
1934 PG_FREE_IF_COPY(arg1, 0);
1935 PG_FREE_IF_COPY(arg2, 1);
1937 PG_RETURN_INT32(cmp);
1942 * replace all occurrences of 'old_sub_str' in 'orig_str'
1943 * with 'new_sub_str' to form 'new_str'
1945 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1946 * otherwise returns 'new_str'
1949 replace_text(PG_FUNCTION_ARGS)
1951 text *src_text = PG_GETARG_TEXT_P(0);
1952 text *from_sub_text = PG_GETARG_TEXT_P(1);
1953 text *to_sub_text = PG_GETARG_TEXT_P(2);
1954 int src_text_len = TEXTLEN(src_text);
1955 int from_sub_text_len = TEXTLEN(from_sub_text);
1956 char *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1962 StringInfo str = makeStringInfo();
1964 if (src_text_len == 0 || from_sub_text_len == 0)
1965 PG_RETURN_TEXT_P(src_text);
1967 buf_text = TEXTDUP(src_text);
1968 curr_posn = TEXTPOS(buf_text, from_sub_text);
1970 while (curr_posn > 0)
1972 left_text = LEFT(buf_text, from_sub_text);
1973 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1975 appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
1976 appendStringInfoString(str, to_sub_str);
1980 buf_text = right_text;
1981 curr_posn = TEXTPOS(buf_text, from_sub_text);
1984 appendStringInfoString(str, PG_TEXT_GET_STR(buf_text));
1987 ret_text = PG_STR_GET_TEXT(str->data);
1991 PG_RETURN_TEXT_P(ret_text);
1996 * parse input string
1997 * return ord item (1 based)
1998 * based on provided field separator
2001 split_text(PG_FUNCTION_ARGS)
2003 text *inputstring = PG_GETARG_TEXT_P(0);
2004 text *fldsep = PG_GETARG_TEXT_P(1);
2005 int fldnum = PG_GETARG_INT32(2);
2006 int inputstring_len = TEXTLEN(inputstring);
2007 int fldsep_len = TEXTLEN(fldsep);
2012 /* field number is 1 based */
2015 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2016 errmsg("field position must be greater than zero")));
2018 /* return empty string for empty input string */
2019 if (inputstring_len < 1)
2020 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2022 /* empty field separator */
2025 /* if first field, return input string, else empty string */
2027 PG_RETURN_TEXT_P(inputstring);
2029 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2032 start_posn = text_position(inputstring, fldsep, fldnum - 1);
2033 end_posn = text_position(inputstring, fldsep, fldnum);
2035 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2037 /* if first field, return input string, else empty string */
2039 PG_RETURN_TEXT_P(inputstring);
2041 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2043 else if (start_posn == 0)
2045 /* first field requested */
2046 result_text = LEFT(inputstring, fldsep);
2047 PG_RETURN_TEXT_P(result_text);
2049 else if (end_posn == 0)
2051 /* last field requested */
2052 result_text = text_substring(PointerGetDatum(inputstring),
2053 start_posn + fldsep_len,
2055 PG_RETURN_TEXT_P(result_text);
2059 /* interior field requested */
2060 result_text = text_substring(PointerGetDatum(inputstring),
2061 start_posn + fldsep_len,
2062 end_posn - start_posn - fldsep_len,
2064 PG_RETURN_TEXT_P(result_text);
2070 * parse input string
2071 * return text array of elements
2072 * based on provided field separator
2075 text_to_array(PG_FUNCTION_ARGS)
2077 text *inputstring = PG_GETARG_TEXT_P(0);
2078 text *fldsep = PG_GETARG_TEXT_P(1);
2079 int inputstring_len = TEXTLEN(inputstring);
2080 int fldsep_len = TEXTLEN(fldsep);
2085 ArrayBuildState *astate = NULL;
2087 /* return NULL for empty input string */
2088 if (inputstring_len < 1)
2092 * empty field separator return one element, 1D, array using the input
2096 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2097 CStringGetDatum(inputstring), 1));
2099 /* start with end position holding the initial start position */
2101 for (fldnum = 1;; fldnum++) /* field number is 1 based */
2104 bool disnull = false;
2106 start_posn = end_posn;
2107 end_posn = text_position(inputstring, fldsep, fldnum);
2109 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2114 * first element return one element, 1D, array using the
2117 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2118 CStringGetDatum(inputstring), 1));
2122 /* otherwise create array and exit */
2123 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2124 CurrentMemoryContext));
2127 else if (start_posn == 0)
2129 /* first field requested */
2130 result_text = LEFT(inputstring, fldsep);
2132 else if (end_posn == 0)
2134 /* last field requested */
2135 result_text = text_substring(PointerGetDatum(inputstring),
2136 start_posn + fldsep_len,
2141 /* interior field requested */
2142 result_text = text_substring(PointerGetDatum(inputstring),
2143 start_posn + fldsep_len,
2144 end_posn - start_posn - fldsep_len,
2148 /* stash away current value */
2149 dvalue = PointerGetDatum(result_text);
2150 astate = accumArrayResult(astate, dvalue,
2152 CurrentMemoryContext);
2155 /* never reached -- keep compiler quiet */
2161 * concatenate Cstring representation of input array elements
2162 * using provided field separator
2165 array_to_text(PG_FUNCTION_ARGS)
2167 ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
2168 char *fldsep = PG_TEXTARG_GET_STR(1);
2177 StringInfo result_str = makeStringInfo();
2179 ArrayMetaState *my_extra;
2181 p = ARR_DATA_PTR(v);
2182 ndims = ARR_NDIM(v);
2184 nitems = ArrayGetNItems(ndims, dims);
2186 /* if there are no elements, return an empty string */
2188 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2190 element_type = ARR_ELEMTYPE(v);
2193 * We arrange to look up info about element type, including its output
2194 * conversion proc, only once per series of calls, assuming the
2195 * element type doesn't change underneath us.
2197 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2198 if (my_extra == NULL)
2200 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2201 sizeof(ArrayMetaState));
2202 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2203 my_extra->element_type = InvalidOid;
2206 if (my_extra->element_type != element_type)
2209 * Get info about element type, including its output conversion
2212 get_type_io_data(element_type, IOFunc_output,
2213 &my_extra->typlen, &my_extra->typbyval,
2214 &my_extra->typalign, &my_extra->typdelim,
2215 &my_extra->typioparam, &my_extra->typiofunc);
2216 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2217 fcinfo->flinfo->fn_mcxt);
2218 my_extra->element_type = element_type;
2220 typlen = my_extra->typlen;
2221 typbyval = my_extra->typbyval;
2222 typalign = my_extra->typalign;
2224 for (i = 0; i < nitems; i++)
2229 itemvalue = fetch_att(p, typbyval, typlen);
2231 value = DatumGetCString(FunctionCall1(&my_extra->proc,
2235 appendStringInfo(result_str, "%s%s", fldsep, value);
2237 appendStringInfoString(result_str, value);
2239 p = att_addlength(p, typlen, PointerGetDatum(p));
2240 p = (char *) att_align(p, typalign);
2243 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2248 * Convert a int32 to a string containing a base 16 (hex) representation of
2252 to_hex32(PG_FUNCTION_ARGS)
2254 uint32 value = (uint32) PG_GETARG_INT32(0);
2257 const char *digits = "0123456789abcdef";
2258 char buf[32]; /* bigger than needed, but reasonable */
2260 ptr = buf + sizeof(buf) - 1;
2265 *--ptr = digits[value % HEXBASE];
2267 } while (ptr > buf && value);
2269 result_text = PG_STR_GET_TEXT(ptr);
2270 PG_RETURN_TEXT_P(result_text);
2274 * Convert a int64 to a string containing a base 16 (hex) representation of
2278 to_hex64(PG_FUNCTION_ARGS)
2280 uint64 value = (uint64) PG_GETARG_INT64(0);
2283 const char *digits = "0123456789abcdef";
2284 char buf[32]; /* bigger than needed, but reasonable */
2286 ptr = buf + sizeof(buf) - 1;
2291 *--ptr = digits[value % HEXBASE];
2293 } while (ptr > buf && value);
2295 result_text = PG_STR_GET_TEXT(ptr);
2296 PG_RETURN_TEXT_P(result_text);
2300 * Create an md5 hash of a text string and return it as hex
2302 * md5 produces a 16 byte (128 bit) hash; double it for hex
2304 #define MD5_HASH_LEN 32
2307 md5_text(PG_FUNCTION_ARGS)
2309 text *in_text = PG_GETARG_TEXT_P(0);
2311 char hexsum[MD5_HASH_LEN + 1];
2314 /* Calculate the length of the buffer using varlena metadata */
2315 len = VARSIZE(in_text) - VARHDRSZ;
2317 /* get the hash result */
2318 if (md5_hash(VARDATA(in_text), len, hexsum) == false)
2320 (errcode(ERRCODE_OUT_OF_MEMORY),
2321 errmsg("out of memory")));
2323 /* convert to text and return it */
2324 result_text = PG_STR_GET_TEXT(hexsum);
2325 PG_RETURN_TEXT_P(result_text);
2329 * Create an md5 hash of a bytea field and return it as a hex string:
2330 * 16-byte md5 digest is represented in 32 hex characters.
2333 md5_bytea(PG_FUNCTION_ARGS)
2335 bytea *in = PG_GETARG_BYTEA_P(0);
2337 char hexsum[MD5_HASH_LEN + 1];
2340 len = VARSIZE(in) - VARHDRSZ;
2341 if (md5_hash(VARDATA(in), len, hexsum) == false)
2343 (errcode(ERRCODE_OUT_OF_MEMORY),
2344 errmsg("out of memory")));
2346 result_text = PG_STR_GET_TEXT(hexsum);
2347 PG_RETURN_TEXT_P(result_text);