1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.94 2002/12/06 05:20:17 momjian Exp $
13 *-------------------------------------------------------------------------
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "access/tuptoaster.h"
22 #include "lib/stringinfo.h"
23 #include "utils/builtins.h"
24 #include "utils/pg_locale.h"
26 extern bool md5_hash(const void *buff, size_t len, char *hexsum);
28 typedef struct varlena unknown;
30 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
31 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
32 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
33 #define PG_TEXTARG_GET_STR(arg_) \
34 DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
35 #define PG_TEXT_GET_STR(textp_) \
36 DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
37 #define PG_STR_GET_TEXT(str_) \
38 DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
39 #define TEXTLEN(textp) \
40 text_length(PointerGetDatum(textp))
41 #define TEXTPOS(buf_text, from_sub_text) \
42 text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
43 #define TEXTDUP(textp) \
44 DatumGetTextPCopy(PointerGetDatum(textp))
45 #define LEFT(buf_text, from_sub_text) \
46 text_substring(PointerGetDatum(buf_text), \
48 TEXTPOS(buf_text, from_sub_text) - 1, false)
49 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
50 text_substring(PointerGetDatum(buf_text), \
51 TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
54 static int text_cmp(text *arg1, text *arg2);
55 static int32 text_length(Datum str);
56 static int32 text_position(Datum str, Datum search_str, int matchnum);
57 static text *text_substring(Datum str,
60 bool length_not_specified);
63 /*****************************************************************************
65 *****************************************************************************/
68 #define VAL(CH) ((CH) - '0')
69 #define DIG(VAL) ((VAL) + '0')
72 * byteain - converts from printable representation of byte array
74 * Non-printable characters must be passed as '\nnn' (octal) and are
75 * converted to internal form. '\' must be passed as '\\'.
76 * elog(ERROR, ...) if bad form.
79 * The input is scaned twice.
80 * The error checking of input is minimal.
83 byteain(PG_FUNCTION_ARGS)
85 char *inputText = PG_GETARG_CSTRING(0);
91 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
95 else if ((tp[0] == '\\') &&
96 (tp[1] >= '0' && tp[1] <= '3') &&
97 (tp[2] >= '0' && tp[2] <= '7') &&
98 (tp[3] >= '0' && tp[3] <= '7'))
100 else if ((tp[0] == '\\') &&
106 * one backslash, not followed by 0 or ### valid octal
108 elog(ERROR, "Bad input string for type bytea");
113 result = (bytea *) palloc(byte);
114 result->vl_len = byte; /* set varlena length */
122 else if ((tp[0] == '\\') &&
123 (tp[1] >= '0' && tp[1] <= '3') &&
124 (tp[2] >= '0' && tp[2] <= '7') &&
125 (tp[3] >= '0' && tp[3] <= '7'))
131 *rp++ = byte + VAL(tp[3]);
134 else if ((tp[0] == '\\') &&
143 * We should never get here. The first pass should not allow
146 elog(ERROR, "Bad input string for type bytea");
150 PG_RETURN_BYTEA_P(result);
154 * byteaout - converts to printable representation of byte array
156 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
159 * NULL vlena should be an error--returning string with NULL for now.
162 byteaout(PG_FUNCTION_ARGS)
164 bytea *vlena = PG_GETARG_BYTEA_P(0);
168 int val; /* holds unprintable chars */
172 len = 1; /* empty string has 1 char */
174 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
178 else if (isprint((unsigned char) *vp))
183 rp = result = (char *) palloc(len);
185 for (i = vlena->vl_len - VARHDRSZ; i != 0; i--, vp++)
192 else if (isprint((unsigned char) *vp))
198 rp[3] = DIG(val & 07);
200 rp[2] = DIG(val & 07);
202 rp[1] = DIG(val & 03);
207 PG_RETURN_CSTRING(result);
212 * textin - converts "..." to internal representation
215 textin(PG_FUNCTION_ARGS)
217 char *inputText = PG_GETARG_CSTRING(0);
223 len = strlen(inputText) + VARHDRSZ;
225 if ((ermsg = pg_verifymbstr(inputText, len - VARHDRSZ)))
226 elog(ERROR, "%s", ermsg);
228 result = (text *) palloc(len);
229 VARATT_SIZEP(result) = len;
231 memcpy(VARDATA(result), inputText, len - VARHDRSZ);
234 convertstr(VARDATA(result), len - VARHDRSZ, 0);
237 PG_RETURN_TEXT_P(result);
241 * textout - converts internal representation to "..."
244 textout(PG_FUNCTION_ARGS)
246 text *t = PG_GETARG_TEXT_P(0);
250 len = VARSIZE(t) - VARHDRSZ;
251 result = (char *) palloc(len + 1);
252 memcpy(result, VARDATA(t), len);
256 convertstr(result, len, 1);
259 PG_RETURN_CSTRING(result);
264 * unknownin - converts "..." to internal representation
267 unknownin(PG_FUNCTION_ARGS)
269 char *inputStr = PG_GETARG_CSTRING(0);
273 len = strlen(inputStr) + VARHDRSZ;
275 result = (unknown *) palloc(len);
276 VARATT_SIZEP(result) = len;
278 memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
280 PG_RETURN_UNKNOWN_P(result);
285 * unknownout - converts internal representation to "..."
288 unknownout(PG_FUNCTION_ARGS)
290 unknown *t = PG_GETARG_UNKNOWN_P(0);
294 len = VARSIZE(t) - VARHDRSZ;
295 result = (char *) palloc(len + 1);
296 memcpy(result, VARDATA(t), len);
299 PG_RETURN_CSTRING(result);
303 /* ========== PUBLIC ROUTINES ========== */
307 * returns the logical length of a text*
308 * (which is less than the VARSIZE of the text*)
311 textlen(PG_FUNCTION_ARGS)
313 PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
318 * Does the real work for textlen()
319 * This is broken out so it can be called directly by other string processing
323 text_length(Datum str)
325 /* fastpath when max encoding length is one */
326 if (pg_database_encoding_max_length() == 1)
327 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
329 if (pg_database_encoding_max_length() > 1)
331 text *t = DatumGetTextP(str);
333 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
334 VARSIZE(t) - VARHDRSZ));
337 /* should never get here */
338 elog(ERROR, "Invalid backend encoding; encoding max length "
339 "is less than one.");
341 /* not reached: suppress compiler warning */
347 * returns the physical length of a text*
348 * (which is less than the VARSIZE of the text*)
351 textoctetlen(PG_FUNCTION_ARGS)
353 PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
358 * takes two text* and returns a text* that is the concatenation of
361 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
362 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
363 * Allocate space for output in all cases.
364 * XXX - thomas 1997-07-10
367 textcat(PG_FUNCTION_ARGS)
369 text *t1 = PG_GETARG_TEXT_P(0);
370 text *t2 = PG_GETARG_TEXT_P(1);
377 len1 = (VARSIZE(t1) - VARHDRSZ);
381 len2 = (VARSIZE(t2) - VARHDRSZ);
385 len = len1 + len2 + VARHDRSZ;
386 result = (text *) palloc(len);
388 /* Set size of result string... */
389 VARATT_SIZEP(result) = len;
391 /* Fill data field of result string... */
392 ptr = VARDATA(result);
394 memcpy(ptr, VARDATA(t1), len1);
396 memcpy(ptr + len1, VARDATA(t2), len2);
398 PG_RETURN_TEXT_P(result);
403 * Return a substring starting at the specified position.
404 * - thomas 1997-12-31
408 * - starting position (is one-based)
411 * If the starting position is zero or less, then return from the start of the string
412 * adjusting the length to be consistent with the "negative start" per SQL92.
413 * If the length is less than zero, return the remaining string.
415 * Note that the arguments operate on octet length,
416 * so not aware of multibyte character sets.
418 * Added multibyte support.
419 * - Tatsuo Ishii 1998-4-21
420 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
421 * Formerly returned the entire string; now returns a portion.
422 * - Thomas Lockhart 1998-12-10
423 * Now uses faster TOAST-slicing interface
424 * - John Gray 2002-02-22
425 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
426 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
427 * error; if E < 1, return '', not entire string). Fixed MB related bug when
428 * S > LC and < LC + 4 sometimes garbage characters are returned.
429 * - Joe Conway 2002-08-10
432 text_substr(PG_FUNCTION_ARGS)
434 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
441 * text_substr_no_len -
442 * Wrapper to avoid opr_sanity failure due to
443 * one function accepting a different number of args.
446 text_substr_no_len(PG_FUNCTION_ARGS)
448 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
455 * Does the real work for text_substr() and text_substr_no_len()
456 * This is broken out so it can be called directly by other string processing
460 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
462 int32 eml = pg_database_encoding_max_length();
463 int32 S = start; /* start position */
464 int32 S1; /* adjusted start position */
465 int32 L1; /* adjusted substring length */
467 /* life is easy if the encoding max length is 1 */
472 if (length_not_specified) /* special case - get length to
481 * A negative value for L is the only way for the end position
482 * to be before the start. SQL99 says to throw an error.
485 elog(ERROR, "negative substring length not allowed");
488 * A zero or negative value for the end position can happen if
489 * the start was negative or one. SQL99 says to return a
490 * zero-length string.
493 return PG_STR_GET_TEXT("");
499 * If the start position is past the end of the string, SQL99 says
500 * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
501 * do that for us. Convert to zero-based starting position
503 return DatumGetTextPSlice(str, S1 - 1, L1);
508 * When encoding max length is > 1, we can't get LC without
509 * detoasting, so we'll grab a conservatively large slice now and
510 * go back later to do the right thing
523 * if S is past the end of the string, the tuple toaster will
524 * return a zero-length string to us
529 * We need to start at position zero because there is no way to
530 * know in advance which byte offset corresponds to the supplied
535 if (length_not_specified) /* special case - get length to
537 slice_size = L1 = -1;
543 * A negative value for L is the only way for the end position
544 * to be before the start. SQL99 says to throw an error.
547 elog(ERROR, "negative substring length not allowed");
550 * A zero or negative value for the end position can happen if
551 * the start was negative or one. SQL99 says to return a
552 * zero-length string.
555 return PG_STR_GET_TEXT("");
558 * if E is past the end of the string, the tuple toaster will
559 * truncate the length for us
564 * Total slice size in bytes can't be any longer than the
565 * start position plus substring length times the encoding max
568 slice_size = (S1 + L1) * eml;
570 slice = DatumGetTextPSlice(str, slice_start, slice_size);
572 /* see if we got back an empty string */
573 if ((VARSIZE(slice) - VARHDRSZ) == 0)
574 return PG_STR_GET_TEXT("");
576 /* Now we can get the actual length of the slice in MB characters */
577 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
580 * Check that the start position wasn't > slice_strlen. If so,
581 * SQL99 says to return a zero-length string.
583 if (S1 > slice_strlen)
584 return PG_STR_GET_TEXT("");
587 * Adjust L1 and E1 now that we know the slice string length.
588 * Again remember that S1 is one based, and slice_start is zero
592 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
594 E1 = slice_start + 1 + slice_strlen;
597 * Find the start position in the slice; remember S1 is not zero
601 for (i = 0; i < S1 - 1; i++)
604 /* hang onto a pointer to our start position */
608 * Count the actual bytes used by the substring of the requested
611 for (i = S1; i < E1; i++)
614 ret = (text *) palloc(VARHDRSZ + (p - s));
615 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
616 memcpy(VARDATA(ret), s, (p - s));
621 elog(ERROR, "Invalid backend encoding; encoding max length "
622 "is less than one.");
624 /* not reached: suppress compiler warning */
625 return PG_STR_GET_TEXT("");
630 * Return the position of the specified substring.
631 * Implements the SQL92 POSITION() function.
632 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
633 * - thomas 1997-07-27
636 textpos(PG_FUNCTION_ARGS)
638 PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
643 * Does the real work for textpos()
644 * This is broken out so it can be called directly by other string processing
648 text_position(Datum str, Datum search_str, int matchnum)
650 int eml = pg_database_encoding_max_length();
651 text *t1 = DatumGetTextP(str);
652 text *t2 = DatumGetTextP(search_str);
661 return 0; /* result for 0th match */
663 if (VARSIZE(t2) <= VARHDRSZ)
664 PG_RETURN_INT32(1); /* result for empty pattern */
666 len1 = (VARSIZE(t1) - VARHDRSZ);
667 len2 = (VARSIZE(t2) - VARHDRSZ);
669 /* no use in searching str past point where search_str will fit */
672 if (eml == 1) /* simple case - single byte encoding */
680 for (p = 0; p <= px; p++)
682 if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
684 if (++match == matchnum)
693 else if (eml > 1) /* not as simple - multibyte encoding */
700 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
701 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
702 len1 = pg_wchar_strlen(p1);
703 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
704 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
705 len2 = pg_wchar_strlen(p2);
707 for (p = 0; p <= px; p++)
709 if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
711 if (++match == matchnum)
724 elog(ERROR, "Invalid backend encoding; encoding max length "
725 "is less than one.");
727 PG_RETURN_INT32(pos);
731 * Comparison function for text strings with given lengths.
732 * Includes locale support, but must copy strings to temporary memory
733 * to allow null-termination for inputs to strcoll().
737 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
742 * Unfortunately, there is no strncoll(), so in the non-C locale case
743 * we have to do some memory copying. This turns out to be
744 * significantly slower, so we optimize the case where LC_COLLATE is
745 * C. We also try to optimize relatively-short strings by avoiding
746 * palloc/pfree overhead.
748 #define STACKBUFLEN 1024
750 if (!lc_collate_is_c())
752 char a1buf[STACKBUFLEN];
753 char a2buf[STACKBUFLEN];
757 if (len1 >= STACKBUFLEN)
758 a1p = (char *) palloc(len1 + 1);
761 if (len2 >= STACKBUFLEN)
762 a2p = (char *) palloc(len2 + 1);
766 memcpy(a1p, arg1, len1);
768 memcpy(a2p, arg2, len2);
771 result = strcoll(a1p, a2p);
773 if (len1 >= STACKBUFLEN)
775 if (len2 >= STACKBUFLEN)
780 result = strncmp(arg1, arg2, Min(len1, len2));
781 if ((result == 0) && (len1 != len2))
782 result = (len1 < len2) ? -1 : 1;
790 * Internal comparison function for text strings.
794 text_cmp(text *arg1, text *arg2)
804 len1 = VARSIZE(arg1) - VARHDRSZ;
805 len2 = VARSIZE(arg2) - VARHDRSZ;
807 return varstr_cmp(a1p, len1, a2p, len2);
811 * Comparison functions for text strings.
813 * Note: btree indexes need these routines not to leak memory; therefore,
814 * be careful to free working copies of toasted datums. Most places don't
815 * need to be so careful.
819 texteq(PG_FUNCTION_ARGS)
821 text *arg1 = PG_GETARG_TEXT_P(0);
822 text *arg2 = PG_GETARG_TEXT_P(1);
825 /* fast path for different-length inputs */
826 if (VARSIZE(arg1) != VARSIZE(arg2))
829 result = (text_cmp(arg1, arg2) == 0);
831 PG_FREE_IF_COPY(arg1, 0);
832 PG_FREE_IF_COPY(arg2, 1);
834 PG_RETURN_BOOL(result);
838 textne(PG_FUNCTION_ARGS)
840 text *arg1 = PG_GETARG_TEXT_P(0);
841 text *arg2 = PG_GETARG_TEXT_P(1);
844 /* fast path for different-length inputs */
845 if (VARSIZE(arg1) != VARSIZE(arg2))
848 result = (text_cmp(arg1, arg2) != 0);
850 PG_FREE_IF_COPY(arg1, 0);
851 PG_FREE_IF_COPY(arg2, 1);
853 PG_RETURN_BOOL(result);
857 text_lt(PG_FUNCTION_ARGS)
859 text *arg1 = PG_GETARG_TEXT_P(0);
860 text *arg2 = PG_GETARG_TEXT_P(1);
863 result = (text_cmp(arg1, arg2) < 0);
865 PG_FREE_IF_COPY(arg1, 0);
866 PG_FREE_IF_COPY(arg2, 1);
868 PG_RETURN_BOOL(result);
872 text_le(PG_FUNCTION_ARGS)
874 text *arg1 = PG_GETARG_TEXT_P(0);
875 text *arg2 = PG_GETARG_TEXT_P(1);
878 result = (text_cmp(arg1, arg2) <= 0);
880 PG_FREE_IF_COPY(arg1, 0);
881 PG_FREE_IF_COPY(arg2, 1);
883 PG_RETURN_BOOL(result);
887 text_gt(PG_FUNCTION_ARGS)
889 text *arg1 = PG_GETARG_TEXT_P(0);
890 text *arg2 = PG_GETARG_TEXT_P(1);
893 result = (text_cmp(arg1, arg2) > 0);
895 PG_FREE_IF_COPY(arg1, 0);
896 PG_FREE_IF_COPY(arg2, 1);
898 PG_RETURN_BOOL(result);
902 text_ge(PG_FUNCTION_ARGS)
904 text *arg1 = PG_GETARG_TEXT_P(0);
905 text *arg2 = PG_GETARG_TEXT_P(1);
908 result = (text_cmp(arg1, arg2) >= 0);
910 PG_FREE_IF_COPY(arg1, 0);
911 PG_FREE_IF_COPY(arg2, 1);
913 PG_RETURN_BOOL(result);
917 bttextcmp(PG_FUNCTION_ARGS)
919 text *arg1 = PG_GETARG_TEXT_P(0);
920 text *arg2 = PG_GETARG_TEXT_P(1);
923 result = text_cmp(arg1, arg2);
925 PG_FREE_IF_COPY(arg1, 0);
926 PG_FREE_IF_COPY(arg2, 1);
928 PG_RETURN_INT32(result);
933 text_larger(PG_FUNCTION_ARGS)
935 text *arg1 = PG_GETARG_TEXT_P(0);
936 text *arg2 = PG_GETARG_TEXT_P(1);
939 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
941 PG_RETURN_TEXT_P(result);
945 text_smaller(PG_FUNCTION_ARGS)
947 text *arg1 = PG_GETARG_TEXT_P(0);
948 text *arg2 = PG_GETARG_TEXT_P(1);
951 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
953 PG_RETURN_TEXT_P(result);
956 /*-------------------------------------------------------------
959 * get the number of bytes contained in an instance of type 'bytea'
960 *-------------------------------------------------------------
963 byteaoctetlen(PG_FUNCTION_ARGS)
965 PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
970 * takes two bytea* and returns a bytea* that is the concatenation of
973 * Cloned from textcat and modified as required.
976 byteacat(PG_FUNCTION_ARGS)
978 bytea *t1 = PG_GETARG_BYTEA_P(0);
979 bytea *t2 = PG_GETARG_BYTEA_P(1);
986 len1 = (VARSIZE(t1) - VARHDRSZ);
990 len2 = (VARSIZE(t2) - VARHDRSZ);
994 len = len1 + len2 + VARHDRSZ;
995 result = (bytea *) palloc(len);
997 /* Set size of result string... */
998 VARATT_SIZEP(result) = len;
1000 /* Fill data field of result string... */
1001 ptr = VARDATA(result);
1003 memcpy(ptr, VARDATA(t1), len1);
1005 memcpy(ptr + len1, VARDATA(t2), len2);
1007 PG_RETURN_BYTEA_P(result);
1010 #define PG_STR_GET_BYTEA(str_) \
1011 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1014 * Return a substring starting at the specified position.
1015 * Cloned from text_substr and modified as required.
1019 * - starting position (is one-based)
1020 * - string length (optional)
1022 * If the starting position is zero or less, then return from the start of the string
1023 * adjusting the length to be consistent with the "negative start" per SQL92.
1024 * If the length is less than zero, an ERROR is thrown. If no third argument
1025 * (length) is provided, the length to the end of the string is assumed.
1028 bytea_substr(PG_FUNCTION_ARGS)
1030 int S = PG_GETARG_INT32(1); /* start position */
1031 int S1; /* adjusted start position */
1032 int L1; /* adjusted substring length */
1036 if (fcinfo->nargs == 2)
1039 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1040 * everything to the end of the string if we pass it a negative
1048 int E = S + PG_GETARG_INT32(2);
1051 * A negative value for L is the only way for the end position to
1052 * be before the start. SQL99 says to throw an error.
1055 elog(ERROR, "negative substring length not allowed");
1058 * A zero or negative value for the end position can happen if the
1059 * start was negative or one. SQL99 says to return a zero-length
1063 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1069 * If the start position is past the end of the string, SQL99 says to
1070 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1071 * that for us. Convert to zero-based starting position
1073 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1077 * bytea_substr_no_len -
1078 * Wrapper to avoid opr_sanity failure due to
1079 * one function accepting a different number of args.
1082 bytea_substr_no_len(PG_FUNCTION_ARGS)
1084 return bytea_substr(fcinfo);
1089 * Return the position of the specified substring.
1090 * Implements the SQL92 POSITION() function.
1091 * Cloned from textpos and modified as required.
1094 byteapos(PG_FUNCTION_ARGS)
1096 bytea *t1 = PG_GETARG_BYTEA_P(0);
1097 bytea *t2 = PG_GETARG_BYTEA_P(1);
1106 if (VARSIZE(t2) <= VARHDRSZ)
1107 PG_RETURN_INT32(1); /* result for empty pattern */
1109 len1 = (VARSIZE(t1) - VARHDRSZ);
1110 len2 = (VARSIZE(t2) - VARHDRSZ);
1117 for (p = 0; p <= px; p++)
1119 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1127 PG_RETURN_INT32(pos);
1130 /*-------------------------------------------------------------
1133 * this routine treats "bytea" as an array of bytes.
1134 * It returns the Nth byte (a number between 0 and 255).
1135 *-------------------------------------------------------------
1138 byteaGetByte(PG_FUNCTION_ARGS)
1140 bytea *v = PG_GETARG_BYTEA_P(0);
1141 int32 n = PG_GETARG_INT32(1);
1145 len = VARSIZE(v) - VARHDRSZ;
1147 if (n < 0 || n >= len)
1148 elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
1151 byte = ((unsigned char *) VARDATA(v))[n];
1153 PG_RETURN_INT32(byte);
1156 /*-------------------------------------------------------------
1159 * This routine treats a "bytea" type like an array of bits.
1160 * It returns the value of the Nth bit (0 or 1).
1162 *-------------------------------------------------------------
1165 byteaGetBit(PG_FUNCTION_ARGS)
1167 bytea *v = PG_GETARG_BYTEA_P(0);
1168 int32 n = PG_GETARG_INT32(1);
1174 len = VARSIZE(v) - VARHDRSZ;
1176 if (n < 0 || n >= len * 8)
1177 elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
1183 byte = ((unsigned char *) VARDATA(v))[byteNo];
1185 if (byte & (1 << bitNo))
1191 /*-------------------------------------------------------------
1194 * Given an instance of type 'bytea' creates a new one with
1195 * the Nth byte set to the given value.
1197 *-------------------------------------------------------------
1200 byteaSetByte(PG_FUNCTION_ARGS)
1202 bytea *v = PG_GETARG_BYTEA_P(0);
1203 int32 n = PG_GETARG_INT32(1);
1204 int32 newByte = PG_GETARG_INT32(2);
1208 len = VARSIZE(v) - VARHDRSZ;
1210 if (n < 0 || n >= len)
1211 elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
1215 * Make a copy of the original varlena.
1217 res = (bytea *) palloc(VARSIZE(v));
1218 memcpy((char *) res, (char *) v, VARSIZE(v));
1223 ((unsigned char *) VARDATA(res))[n] = newByte;
1225 PG_RETURN_BYTEA_P(res);
1228 /*-------------------------------------------------------------
1231 * Given an instance of type 'bytea' creates a new one with
1232 * the Nth bit set to the given value.
1234 *-------------------------------------------------------------
1237 byteaSetBit(PG_FUNCTION_ARGS)
1239 bytea *v = PG_GETARG_BYTEA_P(0);
1240 int32 n = PG_GETARG_INT32(1);
1241 int32 newBit = PG_GETARG_INT32(2);
1249 len = VARSIZE(v) - VARHDRSZ;
1251 if (n < 0 || n >= len * 8)
1252 elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
1261 if (newBit != 0 && newBit != 1)
1262 elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
1265 * Make a copy of the original varlena.
1267 res = (bytea *) palloc(VARSIZE(v));
1268 memcpy((char *) res, (char *) v, VARSIZE(v));
1273 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1276 newByte = oldByte & (~(1 << bitNo));
1278 newByte = oldByte | (1 << bitNo);
1280 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1282 PG_RETURN_BYTEA_P(res);
1287 * Converts a text type to a Name type.
1290 text_name(PG_FUNCTION_ARGS)
1292 text *s = PG_GETARG_TEXT_P(0);
1296 len = VARSIZE(s) - VARHDRSZ;
1298 /* Truncate oversize input */
1299 if (len >= NAMEDATALEN)
1300 len = NAMEDATALEN - 1;
1303 printf("text- convert string length %d (%d) ->%d\n",
1304 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1307 result = (Name) palloc(NAMEDATALEN);
1308 memcpy(NameStr(*result), VARDATA(s), len);
1310 /* now null pad to full length... */
1311 while (len < NAMEDATALEN)
1313 *(NameStr(*result) + len) = '\0';
1317 PG_RETURN_NAME(result);
1321 * Converts a Name type to a text type.
1324 name_text(PG_FUNCTION_ARGS)
1326 Name s = PG_GETARG_NAME(0);
1330 len = strlen(NameStr(*s));
1333 printf("text- convert string length %d (%d) ->%d\n",
1334 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1337 result = palloc(VARHDRSZ + len);
1338 VARATT_SIZEP(result) = VARHDRSZ + len;
1339 memcpy(VARDATA(result), NameStr(*s), len);
1341 PG_RETURN_TEXT_P(result);
1346 * textToQualifiedNameList - convert a text object to list of names
1348 * This implements the input parsing needed by nextval() and other
1349 * functions that take a text parameter representing a qualified name.
1350 * We split the name at dots, downcase if not double-quoted, and
1351 * truncate names if they're too long.
1354 textToQualifiedNameList(text *textval, const char *caller)
1361 /* Convert to C string (handles possible detoasting). */
1362 /* Note we rely on being able to modify rawname below. */
1363 rawname = DatumGetCString(DirectFunctionCall1(textout,
1364 PointerGetDatum(textval)));
1366 if (!SplitIdentifierString(rawname, '.', &namelist))
1367 elog(ERROR, "%s: invalid name syntax", caller);
1369 if (namelist == NIL)
1370 elog(ERROR, "%s: invalid name syntax", caller);
1372 foreach(l, namelist)
1374 char *curname = (char *) lfirst(l);
1376 result = lappend(result, makeString(pstrdup(curname)));
1386 * SplitIdentifierString --- parse a string containing identifiers
1388 * This is the guts of textToQualifiedNameList, and is exported for use in
1389 * other situations such as parsing GUC variables. In the GUC case, it's
1390 * important to avoid memory leaks, so the API is designed to minimize the
1391 * amount of stuff that needs to be allocated and freed.
1394 * rawstring: the input string; must be overwritable! On return, it's
1395 * been modified to contain the separated identifiers.
1396 * separator: the separator punctuation expected between identifiers
1397 * (typically '.' or ','). Whitespace may also appear around
1400 * namelist: filled with a palloc'd list of pointers to identifiers within
1401 * rawstring. Caller should freeList() this even on error return.
1403 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1405 * Note that an empty string is considered okay here, though not in
1406 * textToQualifiedNameList.
1409 SplitIdentifierString(char *rawstring, char separator,
1412 char *nextp = rawstring;
1417 while (isspace((unsigned char) *nextp))
1418 nextp++; /* skip leading whitespace */
1421 return true; /* allow empty string */
1423 /* At the top of the loop, we are at start of a new identifier. */
1432 /* Quoted name --- collapse quote-quote pairs, no downcasing */
1433 curname = nextp + 1;
1436 endp = strchr(nextp + 1, '\"');
1438 return false; /* mismatched quotes */
1439 if (endp[1] != '\"')
1440 break; /* found end of quoted name */
1441 /* Collapse adjacent quotes into one quote, and look again */
1442 memmove(endp, endp + 1, strlen(endp));
1445 /* endp now points at the terminating quote */
1450 /* Unquoted name --- extends to separator or whitespace */
1452 while (*nextp && *nextp != separator &&
1453 !isspace((unsigned char) *nextp))
1456 * It's important that this match the identifier
1457 * downcasing code used by backend/parser/scan.l.
1459 if (isupper((unsigned char) *nextp))
1460 *nextp = tolower((unsigned char) *nextp);
1464 if (curname == nextp)
1465 return false; /* empty unquoted name not allowed */
1468 while (isspace((unsigned char) *nextp))
1469 nextp++; /* skip trailing whitespace */
1471 if (*nextp == separator)
1474 while (isspace((unsigned char) *nextp))
1475 nextp++; /* skip leading whitespace for next */
1476 /* we expect another name, so done remains false */
1478 else if (*nextp == '\0')
1481 return false; /* invalid syntax */
1483 /* Now safe to overwrite separator with a null */
1486 /* Truncate name if it's overlength; again, should match scan.l */
1487 curlen = strlen(curname);
1488 if (curlen >= NAMEDATALEN)
1490 curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
1491 curname[curlen] = '\0';
1495 * Finished isolating current name --- add it to list
1497 *namelist = lappend(*namelist, curname);
1499 /* Loop back if we didn't reach end of string */
1506 /*****************************************************************************
1507 * Comparison Functions used for bytea
1509 * Note: btree indexes need these routines not to leak memory; therefore,
1510 * be careful to free working copies of toasted datums. Most places don't
1511 * need to be so careful.
1512 *****************************************************************************/
1515 byteaeq(PG_FUNCTION_ARGS)
1517 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1518 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1523 len1 = VARSIZE(arg1) - VARHDRSZ;
1524 len2 = VARSIZE(arg2) - VARHDRSZ;
1526 /* fast path for different-length inputs */
1530 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1532 PG_FREE_IF_COPY(arg1, 0);
1533 PG_FREE_IF_COPY(arg2, 1);
1535 PG_RETURN_BOOL(result);
1539 byteane(PG_FUNCTION_ARGS)
1541 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1542 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1547 len1 = VARSIZE(arg1) - VARHDRSZ;
1548 len2 = VARSIZE(arg2) - VARHDRSZ;
1550 /* fast path for different-length inputs */
1554 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1556 PG_FREE_IF_COPY(arg1, 0);
1557 PG_FREE_IF_COPY(arg2, 1);
1559 PG_RETURN_BOOL(result);
1563 bytealt(PG_FUNCTION_ARGS)
1565 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1566 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1571 len1 = VARSIZE(arg1) - VARHDRSZ;
1572 len2 = VARSIZE(arg2) - VARHDRSZ;
1574 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1576 PG_FREE_IF_COPY(arg1, 0);
1577 PG_FREE_IF_COPY(arg2, 1);
1579 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1583 byteale(PG_FUNCTION_ARGS)
1585 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1586 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1591 len1 = VARSIZE(arg1) - VARHDRSZ;
1592 len2 = VARSIZE(arg2) - VARHDRSZ;
1594 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1596 PG_FREE_IF_COPY(arg1, 0);
1597 PG_FREE_IF_COPY(arg2, 1);
1599 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1603 byteagt(PG_FUNCTION_ARGS)
1605 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1606 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1611 len1 = VARSIZE(arg1) - VARHDRSZ;
1612 len2 = VARSIZE(arg2) - VARHDRSZ;
1614 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1616 PG_FREE_IF_COPY(arg1, 0);
1617 PG_FREE_IF_COPY(arg2, 1);
1619 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1623 byteage(PG_FUNCTION_ARGS)
1625 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1626 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1631 len1 = VARSIZE(arg1) - VARHDRSZ;
1632 len2 = VARSIZE(arg2) - VARHDRSZ;
1634 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1636 PG_FREE_IF_COPY(arg1, 0);
1637 PG_FREE_IF_COPY(arg2, 1);
1639 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1643 byteacmp(PG_FUNCTION_ARGS)
1645 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1646 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1651 len1 = VARSIZE(arg1) - VARHDRSZ;
1652 len2 = VARSIZE(arg2) - VARHDRSZ;
1654 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1655 if ((cmp == 0) && (len1 != len2))
1656 cmp = (len1 < len2) ? -1 : 1;
1658 PG_FREE_IF_COPY(arg1, 0);
1659 PG_FREE_IF_COPY(arg2, 1);
1661 PG_RETURN_INT32(cmp);
1666 * replace all occurences of 'old_sub_str' in 'orig_str'
1667 * with 'new_sub_str' to form 'new_str'
1669 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1670 * otherwise returns 'new_str'
1673 replace_text(PG_FUNCTION_ARGS)
1680 text *src_text = PG_GETARG_TEXT_P(0);
1681 int src_text_len = TEXTLEN(src_text);
1682 text *from_sub_text = PG_GETARG_TEXT_P(1);
1683 int from_sub_text_len = TEXTLEN(from_sub_text);
1684 text *to_sub_text = PG_GETARG_TEXT_P(2);
1685 char *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1686 StringInfo str = makeStringInfo();
1688 if (src_text_len == 0 || from_sub_text_len == 0)
1689 PG_RETURN_TEXT_P(src_text);
1691 buf_text = TEXTDUP(src_text);
1692 curr_posn = TEXTPOS(buf_text, from_sub_text);
1694 while (curr_posn > 0)
1696 left_text = LEFT(buf_text, from_sub_text);
1697 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1699 appendStringInfo(str, PG_TEXT_GET_STR(left_text));
1700 appendStringInfo(str, to_sub_str);
1704 buf_text = right_text;
1705 curr_posn = TEXTPOS(buf_text, from_sub_text);
1708 appendStringInfo(str, PG_TEXT_GET_STR(buf_text));
1711 ret_text = PG_STR_GET_TEXT(str->data);
1715 PG_RETURN_TEXT_P(ret_text);
1720 * parse input string
1721 * return ord item (1 based)
1722 * based on provided field separator
1725 split_text(PG_FUNCTION_ARGS)
1727 text *inputstring = PG_GETARG_TEXT_P(0);
1728 int inputstring_len = TEXTLEN(inputstring);
1729 text *fldsep = PG_GETARG_TEXT_P(1);
1730 int fldsep_len = TEXTLEN(fldsep);
1731 int fldnum = PG_GETARG_INT32(2);
1736 /* return empty string for empty input string */
1737 if (inputstring_len < 1)
1738 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1740 /* empty field separator */
1743 if (fldnum == 1) /* first field - just return the input
1745 PG_RETURN_TEXT_P(inputstring);
1747 /* otherwise return an empty string */
1748 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1751 /* field number is 1 based */
1753 elog(ERROR, "field position must be > 0");
1755 start_posn = text_position(PointerGetDatum(inputstring),
1756 PointerGetDatum(fldsep),
1758 end_posn = text_position(PointerGetDatum(inputstring),
1759 PointerGetDatum(fldsep),
1762 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
1764 if (fldnum == 1) /* first field - just return the input
1766 PG_RETURN_TEXT_P(inputstring);
1768 /* otherwise return an empty string */
1769 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1771 else if ((start_posn != 0) && (end_posn == 0))
1773 /* last field requested */
1774 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
1775 PG_RETURN_TEXT_P(result_text);
1777 else if ((start_posn == 0) && (end_posn != 0))
1779 /* first field requested */
1780 result_text = LEFT(inputstring, fldsep);
1781 PG_RETURN_TEXT_P(result_text);
1785 /* prior to last field requested */
1786 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
1787 PG_RETURN_TEXT_P(result_text);
1793 * Convert a int32 to a string containing a base 16 (hex) representation of
1797 to_hex32(PG_FUNCTION_ARGS)
1799 static char digits[] = "0123456789abcdef";
1800 char buf[32]; /* bigger than needed, but reasonable */
1803 int32 value = PG_GETARG_INT32(0);
1805 ptr = buf + sizeof(buf) - 1;
1810 *--ptr = digits[value % HEXBASE];
1812 } while (ptr > buf && value);
1814 result_text = PG_STR_GET_TEXT(ptr);
1815 PG_RETURN_TEXT_P(result_text);
1819 * Convert a int64 to a string containing a base 16 (hex) representation of
1823 to_hex64(PG_FUNCTION_ARGS)
1825 static char digits[] = "0123456789abcdef";
1826 char buf[32]; /* bigger than needed, but reasonable */
1829 int64 value = PG_GETARG_INT64(0);
1831 ptr = buf + sizeof(buf) - 1;
1836 *--ptr = digits[value % HEXBASE];
1838 } while (ptr > buf && value);
1840 result_text = PG_STR_GET_TEXT(ptr);
1841 PG_RETURN_TEXT_P(result_text);
1845 * Create an md5 hash of a text string and return it as hex
1847 * md5 produces a 16 byte (128 bit) hash; double it for hex
1849 #define MD5_HASH_LEN 32
1852 md5_text(PG_FUNCTION_ARGS)
1854 char *buff = PG_TEXT_GET_STR(PG_GETARG_TEXT_P(0));
1855 size_t len = strlen(buff);
1859 /* leave room for the terminating '\0' */
1860 hexsum = (char *) palloc(MD5_HASH_LEN + 1);
1862 /* get the hash result */
1863 md5_hash((void *) buff, len, hexsum);
1865 /* convert to text and return it */
1866 result_text = PG_STR_GET_TEXT(hexsum);
1867 PG_RETURN_TEXT_P(result_text);