1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.99 2003/06/24 23:14:46 momjian Exp $
13 *-------------------------------------------------------------------------
19 #include "mb/pg_wchar.h"
20 #include "miscadmin.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_type.h"
23 #include "lib/stringinfo.h"
24 #include "libpq/crypt.h"
25 #include "libpq/pqformat.h"
26 #include "utils/array.h"
27 #include "utils/builtins.h"
28 #include "utils/pg_locale.h"
29 #include "utils/lsyscache.h"
32 typedef struct varlena unknown;
34 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
35 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
36 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
37 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
38 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
40 #define PG_TEXTARG_GET_STR(arg_) \
41 DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
42 #define PG_TEXT_GET_STR(textp_) \
43 DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
44 #define PG_STR_GET_TEXT(str_) \
45 DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
46 #define TEXTLEN(textp) \
47 text_length(PointerGetDatum(textp))
48 #define TEXTPOS(buf_text, from_sub_text) \
49 text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
50 #define TEXTDUP(textp) \
51 DatumGetTextPCopy(PointerGetDatum(textp))
52 #define LEFT(buf_text, from_sub_text) \
53 text_substring(PointerGetDatum(buf_text), \
55 TEXTPOS(buf_text, from_sub_text) - 1, false)
56 #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
57 text_substring(PointerGetDatum(buf_text), \
58 TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
61 static int text_cmp(text *arg1, text *arg2);
62 static int32 text_length(Datum str);
63 static int32 text_position(Datum str, Datum search_str, int matchnum);
64 static text *text_substring(Datum str,
67 bool length_not_specified);
70 /*****************************************************************************
72 *****************************************************************************/
75 #define VAL(CH) ((CH) - '0')
76 #define DIG(VAL) ((VAL) + '0')
79 * byteain - converts from printable representation of byte array
81 * Non-printable characters must be passed as '\nnn' (octal) and are
82 * converted to internal form. '\' must be passed as '\\'.
83 * elog(ERROR, ...) if bad form.
86 * The input is scaned twice.
87 * The error checking of input is minimal.
90 byteain(PG_FUNCTION_ARGS)
92 char *inputText = PG_GETARG_CSTRING(0);
98 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
102 else if ((tp[0] == '\\') &&
103 (tp[1] >= '0' && tp[1] <= '3') &&
104 (tp[2] >= '0' && tp[2] <= '7') &&
105 (tp[3] >= '0' && tp[3] <= '7'))
107 else if ((tp[0] == '\\') &&
113 * one backslash, not followed by 0 or ### valid octal
115 elog(ERROR, "Bad input string for type bytea");
120 result = (bytea *) palloc(byte);
121 VARATT_SIZEP(result) = byte; /* set varlena length */
124 rp = VARDATA(result);
129 else if ((tp[0] == '\\') &&
130 (tp[1] >= '0' && tp[1] <= '3') &&
131 (tp[2] >= '0' && tp[2] <= '7') &&
132 (tp[3] >= '0' && tp[3] <= '7'))
138 *rp++ = byte + VAL(tp[3]);
141 else if ((tp[0] == '\\') &&
150 * We should never get here. The first pass should not allow
153 elog(ERROR, "Bad input string for type bytea");
157 PG_RETURN_BYTEA_P(result);
161 * byteaout - converts to printable representation of byte array
163 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
166 * NULL vlena should be an error--returning string with NULL for now.
169 byteaout(PG_FUNCTION_ARGS)
171 bytea *vlena = PG_GETARG_BYTEA_P(0);
175 int val; /* holds unprintable chars */
179 len = 1; /* empty string has 1 char */
181 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
185 else if (isprint((unsigned char) *vp))
190 rp = result = (char *) palloc(len);
192 for (i = VARSIZE(vlena) - VARHDRSZ; i != 0; i--, vp++)
199 else if (isprint((unsigned char) *vp))
205 rp[3] = DIG(val & 07);
207 rp[2] = DIG(val & 07);
209 rp[1] = DIG(val & 03);
214 PG_RETURN_CSTRING(result);
218 * bytearecv - converts external binary format to bytea
221 bytearecv(PG_FUNCTION_ARGS)
223 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
227 nbytes = buf->len - buf->cursor;
228 result = (bytea *) palloc(nbytes + VARHDRSZ);
229 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
230 pq_copymsgbytes(buf, VARDATA(result), nbytes);
231 PG_RETURN_BYTEA_P(result);
235 * byteasend - converts bytea to binary format
237 * This is a special case: just copy the input...
240 byteasend(PG_FUNCTION_ARGS)
242 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
244 PG_RETURN_BYTEA_P(vlena);
249 * textin - converts "..." to internal representation
252 textin(PG_FUNCTION_ARGS)
254 char *inputText = PG_GETARG_CSTRING(0);
260 len = strlen(inputText) + VARHDRSZ;
262 if ((ermsg = pg_verifymbstr(inputText, len - VARHDRSZ)))
263 elog(ERROR, "%s", ermsg);
265 result = (text *) palloc(len);
266 VARATT_SIZEP(result) = len;
268 memcpy(VARDATA(result), inputText, len - VARHDRSZ);
271 convertstr(VARDATA(result), len - VARHDRSZ, 0);
274 PG_RETURN_TEXT_P(result);
278 * textout - converts internal representation to "..."
281 textout(PG_FUNCTION_ARGS)
283 text *t = PG_GETARG_TEXT_P(0);
287 len = VARSIZE(t) - VARHDRSZ;
288 result = (char *) palloc(len + 1);
289 memcpy(result, VARDATA(t), len);
293 convertstr(result, len, 1);
296 PG_RETURN_CSTRING(result);
300 * textrecv - converts external binary format to text
303 textrecv(PG_FUNCTION_ARGS)
305 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
310 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
311 result = (text *) palloc(nbytes + VARHDRSZ);
312 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
313 memcpy(VARDATA(result), str, nbytes);
315 PG_RETURN_TEXT_P(result);
319 * textsend - converts text to binary format
322 textsend(PG_FUNCTION_ARGS)
324 text *t = PG_GETARG_TEXT_P(0);
327 pq_begintypsend(&buf);
328 pq_sendtext(&buf, VARDATA(t), VARSIZE(t) - VARHDRSZ);
329 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
334 * unknownin - converts "..." to internal representation
337 unknownin(PG_FUNCTION_ARGS)
339 char *inputStr = PG_GETARG_CSTRING(0);
343 len = strlen(inputStr) + VARHDRSZ;
345 result = (unknown *) palloc(len);
346 VARATT_SIZEP(result) = len;
348 memcpy(VARDATA(result), inputStr, len - VARHDRSZ);
350 PG_RETURN_UNKNOWN_P(result);
354 * unknownout - converts internal representation to "..."
357 unknownout(PG_FUNCTION_ARGS)
359 unknown *t = PG_GETARG_UNKNOWN_P(0);
363 len = VARSIZE(t) - VARHDRSZ;
364 result = (char *) palloc(len + 1);
365 memcpy(result, VARDATA(t), len);
368 PG_RETURN_CSTRING(result);
372 * unknownrecv - converts external binary format to unknown
375 unknownrecv(PG_FUNCTION_ARGS)
377 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
381 nbytes = buf->len - buf->cursor;
382 result = (unknown *) palloc(nbytes + VARHDRSZ);
383 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
384 pq_copymsgbytes(buf, VARDATA(result), nbytes);
385 PG_RETURN_UNKNOWN_P(result);
389 * unknownsend - converts unknown to binary format
391 * This is a special case: just copy the input, since it's
392 * effectively the same format as bytea
395 unknownsend(PG_FUNCTION_ARGS)
397 unknown *vlena = PG_GETARG_UNKNOWN_P_COPY(0);
399 PG_RETURN_UNKNOWN_P(vlena);
403 /* ========== PUBLIC ROUTINES ========== */
407 * returns the logical length of a text*
408 * (which is less than the VARSIZE of the text*)
411 textlen(PG_FUNCTION_ARGS)
413 PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
418 * Does the real work for textlen()
419 * This is broken out so it can be called directly by other string processing
423 text_length(Datum str)
425 /* fastpath when max encoding length is one */
426 if (pg_database_encoding_max_length() == 1)
427 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
429 if (pg_database_encoding_max_length() > 1)
431 text *t = DatumGetTextP(str);
433 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
434 VARSIZE(t) - VARHDRSZ));
437 /* should never get here */
438 elog(ERROR, "Invalid backend encoding; encoding max length "
439 "is less than one.");
441 /* not reached: suppress compiler warning */
447 * returns the physical length of a text*
448 * (which is less than the VARSIZE of the text*)
451 textoctetlen(PG_FUNCTION_ARGS)
453 PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
458 * takes two text* and returns a text* that is the concatenation of
461 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
462 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
463 * Allocate space for output in all cases.
464 * XXX - thomas 1997-07-10
467 textcat(PG_FUNCTION_ARGS)
469 text *t1 = PG_GETARG_TEXT_P(0);
470 text *t2 = PG_GETARG_TEXT_P(1);
477 len1 = (VARSIZE(t1) - VARHDRSZ);
481 len2 = (VARSIZE(t2) - VARHDRSZ);
485 len = len1 + len2 + VARHDRSZ;
486 result = (text *) palloc(len);
488 /* Set size of result string... */
489 VARATT_SIZEP(result) = len;
491 /* Fill data field of result string... */
492 ptr = VARDATA(result);
494 memcpy(ptr, VARDATA(t1), len1);
496 memcpy(ptr + len1, VARDATA(t2), len2);
498 PG_RETURN_TEXT_P(result);
503 * Return a substring starting at the specified position.
504 * - thomas 1997-12-31
508 * - starting position (is one-based)
511 * If the starting position is zero or less, then return from the start of the string
512 * adjusting the length to be consistent with the "negative start" per SQL92.
513 * If the length is less than zero, return the remaining string.
515 * Note that the arguments operate on octet length,
516 * so not aware of multibyte character sets.
518 * Added multibyte support.
519 * - Tatsuo Ishii 1998-4-21
520 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
521 * Formerly returned the entire string; now returns a portion.
522 * - Thomas Lockhart 1998-12-10
523 * Now uses faster TOAST-slicing interface
524 * - John Gray 2002-02-22
525 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
526 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
527 * error; if E < 1, return '', not entire string). Fixed MB related bug when
528 * S > LC and < LC + 4 sometimes garbage characters are returned.
529 * - Joe Conway 2002-08-10
532 text_substr(PG_FUNCTION_ARGS)
534 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
541 * text_substr_no_len -
542 * Wrapper to avoid opr_sanity failure due to
543 * one function accepting a different number of args.
546 text_substr_no_len(PG_FUNCTION_ARGS)
548 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
555 * Does the real work for text_substr() and text_substr_no_len()
556 * This is broken out so it can be called directly by other string processing
560 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
562 int32 eml = pg_database_encoding_max_length();
563 int32 S = start; /* start position */
564 int32 S1; /* adjusted start position */
565 int32 L1; /* adjusted substring length */
567 /* life is easy if the encoding max length is 1 */
572 if (length_not_specified) /* special case - get length to
581 * A negative value for L is the only way for the end position
582 * to be before the start. SQL99 says to throw an error.
585 elog(ERROR, "negative substring length not allowed");
588 * A zero or negative value for the end position can happen if
589 * the start was negative or one. SQL99 says to return a
590 * zero-length string.
593 return PG_STR_GET_TEXT("");
599 * If the start position is past the end of the string, SQL99 says
600 * to return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will
601 * do that for us. Convert to zero-based starting position
603 return DatumGetTextPSlice(str, S1 - 1, L1);
608 * When encoding max length is > 1, we can't get LC without
609 * detoasting, so we'll grab a conservatively large slice now and
610 * go back later to do the right thing
623 * if S is past the end of the string, the tuple toaster will
624 * return a zero-length string to us
629 * We need to start at position zero because there is no way to
630 * know in advance which byte offset corresponds to the supplied
635 if (length_not_specified) /* special case - get length to
637 slice_size = L1 = -1;
643 * A negative value for L is the only way for the end position
644 * to be before the start. SQL99 says to throw an error.
647 elog(ERROR, "negative substring length not allowed");
650 * A zero or negative value for the end position can happen if
651 * the start was negative or one. SQL99 says to return a
652 * zero-length string.
655 return PG_STR_GET_TEXT("");
658 * if E is past the end of the string, the tuple toaster will
659 * truncate the length for us
664 * Total slice size in bytes can't be any longer than the
665 * start position plus substring length times the encoding max
668 slice_size = (S1 + L1) * eml;
670 slice = DatumGetTextPSlice(str, slice_start, slice_size);
672 /* see if we got back an empty string */
673 if ((VARSIZE(slice) - VARHDRSZ) == 0)
674 return PG_STR_GET_TEXT("");
676 /* Now we can get the actual length of the slice in MB characters */
677 slice_strlen = pg_mbstrlen_with_len(VARDATA(slice), VARSIZE(slice) - VARHDRSZ);
680 * Check that the start position wasn't > slice_strlen. If so,
681 * SQL99 says to return a zero-length string.
683 if (S1 > slice_strlen)
684 return PG_STR_GET_TEXT("");
687 * Adjust L1 and E1 now that we know the slice string length.
688 * Again remember that S1 is one based, and slice_start is zero
692 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
694 E1 = slice_start + 1 + slice_strlen;
697 * Find the start position in the slice; remember S1 is not zero
701 for (i = 0; i < S1 - 1; i++)
704 /* hang onto a pointer to our start position */
708 * Count the actual bytes used by the substring of the requested
711 for (i = S1; i < E1; i++)
714 ret = (text *) palloc(VARHDRSZ + (p - s));
715 VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
716 memcpy(VARDATA(ret), s, (p - s));
721 elog(ERROR, "Invalid backend encoding; encoding max length "
722 "is less than one.");
724 /* not reached: suppress compiler warning */
725 return PG_STR_GET_TEXT("");
730 * Return the position of the specified substring.
731 * Implements the SQL92 POSITION() function.
732 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
733 * - thomas 1997-07-27
736 textpos(PG_FUNCTION_ARGS)
738 PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
743 * Does the real work for textpos()
744 * This is broken out so it can be called directly by other string processing
748 text_position(Datum str, Datum search_str, int matchnum)
750 int eml = pg_database_encoding_max_length();
751 text *t1 = DatumGetTextP(str);
752 text *t2 = DatumGetTextP(search_str);
761 return 0; /* result for 0th match */
763 if (VARSIZE(t2) <= VARHDRSZ)
764 PG_RETURN_INT32(1); /* result for empty pattern */
766 len1 = (VARSIZE(t1) - VARHDRSZ);
767 len2 = (VARSIZE(t2) - VARHDRSZ);
769 /* no use in searching str past point where search_str will fit */
772 if (eml == 1) /* simple case - single byte encoding */
780 for (p = 0; p <= px; p++)
782 if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
784 if (++match == matchnum)
793 else if (eml > 1) /* not as simple - multibyte encoding */
800 ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
801 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
802 len1 = pg_wchar_strlen(p1);
803 ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
804 (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
805 len2 = pg_wchar_strlen(p2);
807 for (p = 0; p <= px; p++)
809 if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
811 if (++match == matchnum)
824 elog(ERROR, "Invalid backend encoding; encoding max length "
825 "is less than one.");
827 PG_RETURN_INT32(pos);
831 * Comparison function for text strings with given lengths.
832 * Includes locale support, but must copy strings to temporary memory
833 * to allow null-termination for inputs to strcoll().
837 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
842 * Unfortunately, there is no strncoll(), so in the non-C locale case
843 * we have to do some memory copying. This turns out to be
844 * significantly slower, so we optimize the case where LC_COLLATE is
845 * C. We also try to optimize relatively-short strings by avoiding
846 * palloc/pfree overhead.
848 #define STACKBUFLEN 1024
850 if (!lc_collate_is_c())
852 char a1buf[STACKBUFLEN];
853 char a2buf[STACKBUFLEN];
857 if (len1 >= STACKBUFLEN)
858 a1p = (char *) palloc(len1 + 1);
861 if (len2 >= STACKBUFLEN)
862 a2p = (char *) palloc(len2 + 1);
866 memcpy(a1p, arg1, len1);
868 memcpy(a2p, arg2, len2);
871 result = strcoll(a1p, a2p);
873 if (len1 >= STACKBUFLEN)
875 if (len2 >= STACKBUFLEN)
880 result = strncmp(arg1, arg2, Min(len1, len2));
881 if ((result == 0) && (len1 != len2))
882 result = (len1 < len2) ? -1 : 1;
890 * Internal comparison function for text strings.
894 text_cmp(text *arg1, text *arg2)
904 len1 = VARSIZE(arg1) - VARHDRSZ;
905 len2 = VARSIZE(arg2) - VARHDRSZ;
907 return varstr_cmp(a1p, len1, a2p, len2);
911 * Comparison functions for text strings.
913 * Note: btree indexes need these routines not to leak memory; therefore,
914 * be careful to free working copies of toasted datums. Most places don't
915 * need to be so careful.
919 texteq(PG_FUNCTION_ARGS)
921 text *arg1 = PG_GETARG_TEXT_P(0);
922 text *arg2 = PG_GETARG_TEXT_P(1);
925 /* fast path for different-length inputs */
926 if (VARSIZE(arg1) != VARSIZE(arg2))
929 result = (text_cmp(arg1, arg2) == 0);
931 PG_FREE_IF_COPY(arg1, 0);
932 PG_FREE_IF_COPY(arg2, 1);
934 PG_RETURN_BOOL(result);
938 textne(PG_FUNCTION_ARGS)
940 text *arg1 = PG_GETARG_TEXT_P(0);
941 text *arg2 = PG_GETARG_TEXT_P(1);
944 /* fast path for different-length inputs */
945 if (VARSIZE(arg1) != VARSIZE(arg2))
948 result = (text_cmp(arg1, arg2) != 0);
950 PG_FREE_IF_COPY(arg1, 0);
951 PG_FREE_IF_COPY(arg2, 1);
953 PG_RETURN_BOOL(result);
957 text_lt(PG_FUNCTION_ARGS)
959 text *arg1 = PG_GETARG_TEXT_P(0);
960 text *arg2 = PG_GETARG_TEXT_P(1);
963 result = (text_cmp(arg1, arg2) < 0);
965 PG_FREE_IF_COPY(arg1, 0);
966 PG_FREE_IF_COPY(arg2, 1);
968 PG_RETURN_BOOL(result);
972 text_le(PG_FUNCTION_ARGS)
974 text *arg1 = PG_GETARG_TEXT_P(0);
975 text *arg2 = PG_GETARG_TEXT_P(1);
978 result = (text_cmp(arg1, arg2) <= 0);
980 PG_FREE_IF_COPY(arg1, 0);
981 PG_FREE_IF_COPY(arg2, 1);
983 PG_RETURN_BOOL(result);
987 text_gt(PG_FUNCTION_ARGS)
989 text *arg1 = PG_GETARG_TEXT_P(0);
990 text *arg2 = PG_GETARG_TEXT_P(1);
993 result = (text_cmp(arg1, arg2) > 0);
995 PG_FREE_IF_COPY(arg1, 0);
996 PG_FREE_IF_COPY(arg2, 1);
998 PG_RETURN_BOOL(result);
1002 text_ge(PG_FUNCTION_ARGS)
1004 text *arg1 = PG_GETARG_TEXT_P(0);
1005 text *arg2 = PG_GETARG_TEXT_P(1);
1008 result = (text_cmp(arg1, arg2) >= 0);
1010 PG_FREE_IF_COPY(arg1, 0);
1011 PG_FREE_IF_COPY(arg2, 1);
1013 PG_RETURN_BOOL(result);
1017 bttextcmp(PG_FUNCTION_ARGS)
1019 text *arg1 = PG_GETARG_TEXT_P(0);
1020 text *arg2 = PG_GETARG_TEXT_P(1);
1023 result = text_cmp(arg1, arg2);
1025 PG_FREE_IF_COPY(arg1, 0);
1026 PG_FREE_IF_COPY(arg2, 1);
1028 PG_RETURN_INT32(result);
1033 text_larger(PG_FUNCTION_ARGS)
1035 text *arg1 = PG_GETARG_TEXT_P(0);
1036 text *arg2 = PG_GETARG_TEXT_P(1);
1039 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1041 PG_RETURN_TEXT_P(result);
1045 text_smaller(PG_FUNCTION_ARGS)
1047 text *arg1 = PG_GETARG_TEXT_P(0);
1048 text *arg2 = PG_GETARG_TEXT_P(1);
1051 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1053 PG_RETURN_TEXT_P(result);
1058 * The following operators support character-by-character comparison
1059 * of text data types, to allow building indexes suitable for LIKE
1064 internal_text_pattern_compare(text *arg1, text *arg2)
1068 result = memcmp(VARDATA(arg1), VARDATA(arg2),
1069 Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
1072 else if (VARSIZE(arg1) < VARSIZE(arg2))
1074 else if (VARSIZE(arg1) > VARSIZE(arg2))
1082 text_pattern_lt(PG_FUNCTION_ARGS)
1084 text *arg1 = PG_GETARG_TEXT_P(0);
1085 text *arg2 = PG_GETARG_TEXT_P(1);
1088 result = internal_text_pattern_compare(arg1, arg2);
1090 PG_FREE_IF_COPY(arg1, 0);
1091 PG_FREE_IF_COPY(arg2, 1);
1093 PG_RETURN_BOOL(result < 0);
1098 text_pattern_le(PG_FUNCTION_ARGS)
1100 text *arg1 = PG_GETARG_TEXT_P(0);
1101 text *arg2 = PG_GETARG_TEXT_P(1);
1104 result = internal_text_pattern_compare(arg1, arg2);
1106 PG_FREE_IF_COPY(arg1, 0);
1107 PG_FREE_IF_COPY(arg2, 1);
1109 PG_RETURN_BOOL(result <= 0);
1114 text_pattern_eq(PG_FUNCTION_ARGS)
1116 text *arg1 = PG_GETARG_TEXT_P(0);
1117 text *arg2 = PG_GETARG_TEXT_P(1);
1120 if (VARSIZE(arg1) != VARSIZE(arg2))
1123 result = internal_text_pattern_compare(arg1, arg2);
1125 PG_FREE_IF_COPY(arg1, 0);
1126 PG_FREE_IF_COPY(arg2, 1);
1128 PG_RETURN_BOOL(result == 0);
1133 text_pattern_ge(PG_FUNCTION_ARGS)
1135 text *arg1 = PG_GETARG_TEXT_P(0);
1136 text *arg2 = PG_GETARG_TEXT_P(1);
1139 result = internal_text_pattern_compare(arg1, arg2);
1141 PG_FREE_IF_COPY(arg1, 0);
1142 PG_FREE_IF_COPY(arg2, 1);
1144 PG_RETURN_BOOL(result >= 0);
1149 text_pattern_gt(PG_FUNCTION_ARGS)
1151 text *arg1 = PG_GETARG_TEXT_P(0);
1152 text *arg2 = PG_GETARG_TEXT_P(1);
1155 result = internal_text_pattern_compare(arg1, arg2);
1157 PG_FREE_IF_COPY(arg1, 0);
1158 PG_FREE_IF_COPY(arg2, 1);
1160 PG_RETURN_BOOL(result > 0);
1165 text_pattern_ne(PG_FUNCTION_ARGS)
1167 text *arg1 = PG_GETARG_TEXT_P(0);
1168 text *arg2 = PG_GETARG_TEXT_P(1);
1171 if (VARSIZE(arg1) != VARSIZE(arg2))
1174 result = internal_text_pattern_compare(arg1, arg2);
1176 PG_FREE_IF_COPY(arg1, 0);
1177 PG_FREE_IF_COPY(arg2, 1);
1179 PG_RETURN_BOOL(result != 0);
1184 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1186 text *arg1 = PG_GETARG_TEXT_P(0);
1187 text *arg2 = PG_GETARG_TEXT_P(1);
1190 result = internal_text_pattern_compare(arg1, arg2);
1192 PG_FREE_IF_COPY(arg1, 0);
1193 PG_FREE_IF_COPY(arg2, 1);
1195 PG_RETURN_INT32(result);
1199 /*-------------------------------------------------------------
1202 * get the number of bytes contained in an instance of type 'bytea'
1203 *-------------------------------------------------------------
1206 byteaoctetlen(PG_FUNCTION_ARGS)
1208 PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
1213 * takes two bytea* and returns a bytea* that is the concatenation of
1216 * Cloned from textcat and modified as required.
1219 byteacat(PG_FUNCTION_ARGS)
1221 bytea *t1 = PG_GETARG_BYTEA_P(0);
1222 bytea *t2 = PG_GETARG_BYTEA_P(1);
1229 len1 = (VARSIZE(t1) - VARHDRSZ);
1233 len2 = (VARSIZE(t2) - VARHDRSZ);
1237 len = len1 + len2 + VARHDRSZ;
1238 result = (bytea *) palloc(len);
1240 /* Set size of result string... */
1241 VARATT_SIZEP(result) = len;
1243 /* Fill data field of result string... */
1244 ptr = VARDATA(result);
1246 memcpy(ptr, VARDATA(t1), len1);
1248 memcpy(ptr + len1, VARDATA(t2), len2);
1250 PG_RETURN_BYTEA_P(result);
1253 #define PG_STR_GET_BYTEA(str_) \
1254 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1257 * Return a substring starting at the specified position.
1258 * Cloned from text_substr and modified as required.
1262 * - starting position (is one-based)
1263 * - string length (optional)
1265 * If the starting position is zero or less, then return from the start of the string
1266 * adjusting the length to be consistent with the "negative start" per SQL92.
1267 * If the length is less than zero, an ERROR is thrown. If no third argument
1268 * (length) is provided, the length to the end of the string is assumed.
1271 bytea_substr(PG_FUNCTION_ARGS)
1273 int S = PG_GETARG_INT32(1); /* start position */
1274 int S1; /* adjusted start position */
1275 int L1; /* adjusted substring length */
1279 if (fcinfo->nargs == 2)
1282 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs
1283 * everything to the end of the string if we pass it a negative
1291 int E = S + PG_GETARG_INT32(2);
1294 * A negative value for L is the only way for the end position to
1295 * be before the start. SQL99 says to throw an error.
1298 elog(ERROR, "negative substring length not allowed");
1301 * A zero or negative value for the end position can happen if the
1302 * start was negative or one. SQL99 says to return a zero-length
1306 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1312 * If the start position is past the end of the string, SQL99 says to
1313 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
1314 * that for us. Convert to zero-based starting position
1316 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1320 * bytea_substr_no_len -
1321 * Wrapper to avoid opr_sanity failure due to
1322 * one function accepting a different number of args.
1325 bytea_substr_no_len(PG_FUNCTION_ARGS)
1327 return bytea_substr(fcinfo);
1332 * Return the position of the specified substring.
1333 * Implements the SQL92 POSITION() function.
1334 * Cloned from textpos and modified as required.
1337 byteapos(PG_FUNCTION_ARGS)
1339 bytea *t1 = PG_GETARG_BYTEA_P(0);
1340 bytea *t2 = PG_GETARG_BYTEA_P(1);
1349 if (VARSIZE(t2) <= VARHDRSZ)
1350 PG_RETURN_INT32(1); /* result for empty pattern */
1352 len1 = (VARSIZE(t1) - VARHDRSZ);
1353 len2 = (VARSIZE(t2) - VARHDRSZ);
1360 for (p = 0; p <= px; p++)
1362 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1370 PG_RETURN_INT32(pos);
1373 /*-------------------------------------------------------------
1376 * this routine treats "bytea" as an array of bytes.
1377 * It returns the Nth byte (a number between 0 and 255).
1378 *-------------------------------------------------------------
1381 byteaGetByte(PG_FUNCTION_ARGS)
1383 bytea *v = PG_GETARG_BYTEA_P(0);
1384 int32 n = PG_GETARG_INT32(1);
1388 len = VARSIZE(v) - VARHDRSZ;
1390 if (n < 0 || n >= len)
1391 elog(ERROR, "byteaGetByte: index %d out of range [0..%d]",
1394 byte = ((unsigned char *) VARDATA(v))[n];
1396 PG_RETURN_INT32(byte);
1399 /*-------------------------------------------------------------
1402 * This routine treats a "bytea" type like an array of bits.
1403 * It returns the value of the Nth bit (0 or 1).
1405 *-------------------------------------------------------------
1408 byteaGetBit(PG_FUNCTION_ARGS)
1410 bytea *v = PG_GETARG_BYTEA_P(0);
1411 int32 n = PG_GETARG_INT32(1);
1417 len = VARSIZE(v) - VARHDRSZ;
1419 if (n < 0 || n >= len * 8)
1420 elog(ERROR, "byteaGetBit: index %d out of range [0..%d]",
1426 byte = ((unsigned char *) VARDATA(v))[byteNo];
1428 if (byte & (1 << bitNo))
1434 /*-------------------------------------------------------------
1437 * Given an instance of type 'bytea' creates a new one with
1438 * the Nth byte set to the given value.
1440 *-------------------------------------------------------------
1443 byteaSetByte(PG_FUNCTION_ARGS)
1445 bytea *v = PG_GETARG_BYTEA_P(0);
1446 int32 n = PG_GETARG_INT32(1);
1447 int32 newByte = PG_GETARG_INT32(2);
1451 len = VARSIZE(v) - VARHDRSZ;
1453 if (n < 0 || n >= len)
1454 elog(ERROR, "byteaSetByte: index %d out of range [0..%d]",
1458 * Make a copy of the original varlena.
1460 res = (bytea *) palloc(VARSIZE(v));
1461 memcpy((char *) res, (char *) v, VARSIZE(v));
1466 ((unsigned char *) VARDATA(res))[n] = newByte;
1468 PG_RETURN_BYTEA_P(res);
1471 /*-------------------------------------------------------------
1474 * Given an instance of type 'bytea' creates a new one with
1475 * the Nth bit set to the given value.
1477 *-------------------------------------------------------------
1480 byteaSetBit(PG_FUNCTION_ARGS)
1482 bytea *v = PG_GETARG_BYTEA_P(0);
1483 int32 n = PG_GETARG_INT32(1);
1484 int32 newBit = PG_GETARG_INT32(2);
1492 len = VARSIZE(v) - VARHDRSZ;
1494 if (n < 0 || n >= len * 8)
1495 elog(ERROR, "byteaSetBit: index %d out of range [0..%d]",
1504 if (newBit != 0 && newBit != 1)
1505 elog(ERROR, "byteaSetBit: new bit must be 0 or 1");
1508 * Make a copy of the original varlena.
1510 res = (bytea *) palloc(VARSIZE(v));
1511 memcpy((char *) res, (char *) v, VARSIZE(v));
1516 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1519 newByte = oldByte & (~(1 << bitNo));
1521 newByte = oldByte | (1 << bitNo);
1523 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1525 PG_RETURN_BYTEA_P(res);
1530 * Converts a text type to a Name type.
1533 text_name(PG_FUNCTION_ARGS)
1535 text *s = PG_GETARG_TEXT_P(0);
1539 len = VARSIZE(s) - VARHDRSZ;
1541 /* Truncate oversize input */
1542 if (len >= NAMEDATALEN)
1543 len = NAMEDATALEN - 1;
1546 printf("text- convert string length %d (%d) ->%d\n",
1547 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1550 result = (Name) palloc(NAMEDATALEN);
1551 memcpy(NameStr(*result), VARDATA(s), len);
1553 /* now null pad to full length... */
1554 while (len < NAMEDATALEN)
1556 *(NameStr(*result) + len) = '\0';
1560 PG_RETURN_NAME(result);
1564 * Converts a Name type to a text type.
1567 name_text(PG_FUNCTION_ARGS)
1569 Name s = PG_GETARG_NAME(0);
1573 len = strlen(NameStr(*s));
1576 printf("text- convert string length %d (%d) ->%d\n",
1577 VARSIZE(s) - VARHDRSZ, VARSIZE(s), len);
1580 result = palloc(VARHDRSZ + len);
1581 VARATT_SIZEP(result) = VARHDRSZ + len;
1582 memcpy(VARDATA(result), NameStr(*s), len);
1584 PG_RETURN_TEXT_P(result);
1589 * textToQualifiedNameList - convert a text object to list of names
1591 * This implements the input parsing needed by nextval() and other
1592 * functions that take a text parameter representing a qualified name.
1593 * We split the name at dots, downcase if not double-quoted, and
1594 * truncate names if they're too long.
1597 textToQualifiedNameList(text *textval, const char *caller)
1604 /* Convert to C string (handles possible detoasting). */
1605 /* Note we rely on being able to modify rawname below. */
1606 rawname = DatumGetCString(DirectFunctionCall1(textout,
1607 PointerGetDatum(textval)));
1609 if (!SplitIdentifierString(rawname, '.', &namelist))
1610 elog(ERROR, "%s: invalid name syntax", caller);
1612 if (namelist == NIL)
1613 elog(ERROR, "%s: invalid name syntax", caller);
1615 foreach(l, namelist)
1617 char *curname = (char *) lfirst(l);
1619 result = lappend(result, makeString(pstrdup(curname)));
1629 * SplitIdentifierString --- parse a string containing identifiers
1631 * This is the guts of textToQualifiedNameList, and is exported for use in
1632 * other situations such as parsing GUC variables. In the GUC case, it's
1633 * important to avoid memory leaks, so the API is designed to minimize the
1634 * amount of stuff that needs to be allocated and freed.
1637 * rawstring: the input string; must be overwritable! On return, it's
1638 * been modified to contain the separated identifiers.
1639 * separator: the separator punctuation expected between identifiers
1640 * (typically '.' or ','). Whitespace may also appear around
1643 * namelist: filled with a palloc'd list of pointers to identifiers within
1644 * rawstring. Caller should freeList() this even on error return.
1646 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
1648 * Note that an empty string is considered okay here, though not in
1649 * textToQualifiedNameList.
1652 SplitIdentifierString(char *rawstring, char separator,
1655 char *nextp = rawstring;
1660 while (isspace((unsigned char) *nextp))
1661 nextp++; /* skip leading whitespace */
1664 return true; /* allow empty string */
1666 /* At the top of the loop, we are at start of a new identifier. */
1675 /* Quoted name --- collapse quote-quote pairs, no downcasing */
1676 curname = nextp + 1;
1679 endp = strchr(nextp + 1, '\"');
1681 return false; /* mismatched quotes */
1682 if (endp[1] != '\"')
1683 break; /* found end of quoted name */
1684 /* Collapse adjacent quotes into one quote, and look again */
1685 memmove(endp, endp + 1, strlen(endp));
1688 /* endp now points at the terminating quote */
1693 /* Unquoted name --- extends to separator or whitespace */
1695 while (*nextp && *nextp != separator &&
1696 !isspace((unsigned char) *nextp))
1699 * It's important that this match the identifier
1700 * downcasing code used by backend/parser/scan.l.
1702 if (isupper((unsigned char) *nextp))
1703 *nextp = tolower((unsigned char) *nextp);
1707 if (curname == nextp)
1708 return false; /* empty unquoted name not allowed */
1711 while (isspace((unsigned char) *nextp))
1712 nextp++; /* skip trailing whitespace */
1714 if (*nextp == separator)
1717 while (isspace((unsigned char) *nextp))
1718 nextp++; /* skip leading whitespace for next */
1719 /* we expect another name, so done remains false */
1721 else if (*nextp == '\0')
1724 return false; /* invalid syntax */
1726 /* Now safe to overwrite separator with a null */
1729 /* Truncate name if it's overlength; again, should match scan.l */
1730 curlen = strlen(curname);
1731 if (curlen >= NAMEDATALEN)
1733 curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
1734 curname[curlen] = '\0';
1738 * Finished isolating current name --- add it to list
1740 *namelist = lappend(*namelist, curname);
1742 /* Loop back if we didn't reach end of string */
1749 /*****************************************************************************
1750 * Comparison Functions used for bytea
1752 * Note: btree indexes need these routines not to leak memory; therefore,
1753 * be careful to free working copies of toasted datums. Most places don't
1754 * need to be so careful.
1755 *****************************************************************************/
1758 byteaeq(PG_FUNCTION_ARGS)
1760 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1761 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1766 len1 = VARSIZE(arg1) - VARHDRSZ;
1767 len2 = VARSIZE(arg2) - VARHDRSZ;
1769 /* fast path for different-length inputs */
1773 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
1775 PG_FREE_IF_COPY(arg1, 0);
1776 PG_FREE_IF_COPY(arg2, 1);
1778 PG_RETURN_BOOL(result);
1782 byteane(PG_FUNCTION_ARGS)
1784 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1785 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1790 len1 = VARSIZE(arg1) - VARHDRSZ;
1791 len2 = VARSIZE(arg2) - VARHDRSZ;
1793 /* fast path for different-length inputs */
1797 result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
1799 PG_FREE_IF_COPY(arg1, 0);
1800 PG_FREE_IF_COPY(arg2, 1);
1802 PG_RETURN_BOOL(result);
1806 bytealt(PG_FUNCTION_ARGS)
1808 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1809 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1814 len1 = VARSIZE(arg1) - VARHDRSZ;
1815 len2 = VARSIZE(arg2) - VARHDRSZ;
1817 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1819 PG_FREE_IF_COPY(arg1, 0);
1820 PG_FREE_IF_COPY(arg2, 1);
1822 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
1826 byteale(PG_FUNCTION_ARGS)
1828 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1829 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1834 len1 = VARSIZE(arg1) - VARHDRSZ;
1835 len2 = VARSIZE(arg2) - VARHDRSZ;
1837 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1839 PG_FREE_IF_COPY(arg1, 0);
1840 PG_FREE_IF_COPY(arg2, 1);
1842 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
1846 byteagt(PG_FUNCTION_ARGS)
1848 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1849 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1854 len1 = VARSIZE(arg1) - VARHDRSZ;
1855 len2 = VARSIZE(arg2) - VARHDRSZ;
1857 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1859 PG_FREE_IF_COPY(arg1, 0);
1860 PG_FREE_IF_COPY(arg2, 1);
1862 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
1866 byteage(PG_FUNCTION_ARGS)
1868 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1869 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1874 len1 = VARSIZE(arg1) - VARHDRSZ;
1875 len2 = VARSIZE(arg2) - VARHDRSZ;
1877 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1879 PG_FREE_IF_COPY(arg1, 0);
1880 PG_FREE_IF_COPY(arg2, 1);
1882 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1886 byteacmp(PG_FUNCTION_ARGS)
1888 bytea *arg1 = PG_GETARG_BYTEA_P(0);
1889 bytea *arg2 = PG_GETARG_BYTEA_P(1);
1894 len1 = VARSIZE(arg1) - VARHDRSZ;
1895 len2 = VARSIZE(arg2) - VARHDRSZ;
1897 cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1898 if ((cmp == 0) && (len1 != len2))
1899 cmp = (len1 < len2) ? -1 : 1;
1901 PG_FREE_IF_COPY(arg1, 0);
1902 PG_FREE_IF_COPY(arg2, 1);
1904 PG_RETURN_INT32(cmp);
1909 * replace all occurrences of 'old_sub_str' in 'orig_str'
1910 * with 'new_sub_str' to form 'new_str'
1912 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
1913 * otherwise returns 'new_str'
1916 replace_text(PG_FUNCTION_ARGS)
1923 text *src_text = PG_GETARG_TEXT_P(0);
1924 int src_text_len = TEXTLEN(src_text);
1925 text *from_sub_text = PG_GETARG_TEXT_P(1);
1926 int from_sub_text_len = TEXTLEN(from_sub_text);
1927 text *to_sub_text = PG_GETARG_TEXT_P(2);
1928 char *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
1929 StringInfo str = makeStringInfo();
1931 if (src_text_len == 0 || from_sub_text_len == 0)
1932 PG_RETURN_TEXT_P(src_text);
1934 buf_text = TEXTDUP(src_text);
1935 curr_posn = TEXTPOS(buf_text, from_sub_text);
1937 while (curr_posn > 0)
1939 left_text = LEFT(buf_text, from_sub_text);
1940 right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
1942 appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
1943 appendStringInfoString(str, to_sub_str);
1947 buf_text = right_text;
1948 curr_posn = TEXTPOS(buf_text, from_sub_text);
1951 appendStringInfoString(str, PG_TEXT_GET_STR(buf_text));
1954 ret_text = PG_STR_GET_TEXT(str->data);
1958 PG_RETURN_TEXT_P(ret_text);
1963 * parse input string
1964 * return ord item (1 based)
1965 * based on provided field separator
1968 split_text(PG_FUNCTION_ARGS)
1970 text *inputstring = PG_GETARG_TEXT_P(0);
1971 int inputstring_len = TEXTLEN(inputstring);
1972 text *fldsep = PG_GETARG_TEXT_P(1);
1973 int fldsep_len = TEXTLEN(fldsep);
1974 int fldnum = PG_GETARG_INT32(2);
1979 /* return empty string for empty input string */
1980 if (inputstring_len < 1)
1981 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1983 /* empty field separator */
1986 if (fldnum == 1) /* first field - just return the input
1988 PG_RETURN_TEXT_P(inputstring);
1989 else /* otherwise return an empty string */
1990 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
1993 /* field number is 1 based */
1995 elog(ERROR, "field position must be > 0");
1997 start_posn = text_position(PointerGetDatum(inputstring),
1998 PointerGetDatum(fldsep),
2000 end_posn = text_position(PointerGetDatum(inputstring),
2001 PointerGetDatum(fldsep),
2004 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2006 if (fldnum == 1) /* first field - just return the input
2008 PG_RETURN_TEXT_P(inputstring);
2009 else /* otherwise return an empty string */
2010 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2012 else if ((start_posn != 0) && (end_posn == 0))
2014 /* last field requested */
2015 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
2016 PG_RETURN_TEXT_P(result_text);
2018 else if ((start_posn == 0) && (end_posn != 0))
2020 /* first field requested */
2021 result_text = LEFT(inputstring, fldsep);
2022 PG_RETURN_TEXT_P(result_text);
2026 /* prior to last field requested */
2027 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
2028 PG_RETURN_TEXT_P(result_text);
2034 * parse input string
2035 * return text array of elements
2036 * based on provided field separator
2039 text_to_array(PG_FUNCTION_ARGS)
2041 text *inputstring = PG_GETARG_TEXT_P(0);
2042 int inputstring_len = TEXTLEN(inputstring);
2043 text *fldsep = PG_GETARG_TEXT_P(1);
2044 int fldsep_len = TEXTLEN(fldsep);
2048 text *result_text = NULL;
2049 ArrayBuildState *astate = NULL;
2050 MemoryContext oldcontext = CurrentMemoryContext;
2052 /* return NULL for empty input string */
2053 if (inputstring_len < 1)
2056 /* empty field separator
2057 * return one element, 1D, array using the input string */
2059 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2060 CStringGetDatum(inputstring), 1));
2062 /* start with end position holding the initial start position */
2064 for (fldnum=1;;fldnum++) /* field number is 1 based */
2067 bool disnull = false;
2069 start_posn = end_posn;
2070 end_posn = text_position(PointerGetDatum(inputstring),
2071 PointerGetDatum(fldsep),
2074 if ((start_posn == 0) && (end_posn == 0)) /* fldsep not found */
2079 * return one element, 1D, array using the input string */
2080 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2081 CStringGetDatum(inputstring), 1));
2085 /* otherwise create array and exit */
2086 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, oldcontext));
2089 else if ((start_posn != 0) && (end_posn == 0))
2091 /* last field requested */
2092 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
2094 else if ((start_posn == 0) && (end_posn != 0))
2096 /* first field requested */
2097 result_text = LEFT(inputstring, fldsep);
2101 /* prior to last field requested */
2102 result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn - fldsep_len, false);
2105 /* stash away current value */
2106 dvalue = PointerGetDatum(result_text);
2107 astate = accumArrayResult(astate, dvalue,
2108 disnull, TEXTOID, oldcontext);
2112 /* never reached -- keep compiler quiet */
2118 * concatenate Cstring representation of input array elements
2119 * using provided field separator
2122 array_to_text(PG_FUNCTION_ARGS)
2124 ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
2125 char *fldsep = PG_TEXTARG_GET_STR(1);
2126 int nitems, *dims, ndims;
2134 FmgrInfo outputproc;
2136 StringInfo result_str = makeStringInfo();
2138 ArrayMetaState *my_extra;
2140 p = ARR_DATA_PTR(v);
2141 ndims = ARR_NDIM(v);
2143 nitems = ArrayGetNItems(ndims, dims);
2145 /* if there are no elements, return an empty string */
2147 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
2149 element_type = ARR_ELEMTYPE(v);
2152 * We arrange to look up info about element type, including its output
2153 * conversion proc only once per series of calls, assuming the element
2154 * type doesn't change underneath us.
2156 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2157 if (my_extra == NULL)
2159 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2160 sizeof(ArrayMetaState));
2161 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2162 my_extra->element_type = InvalidOid;
2165 if (my_extra->element_type != element_type)
2167 /* Get info about element type, including its output conversion proc */
2168 get_type_metadata(element_type, IOFunc_output,
2169 &typlen, &typbyval, &typdelim,
2170 &typelem, &typoutput, &typalign);
2171 fmgr_info(typoutput, &outputproc);
2173 my_extra->element_type = element_type;
2174 my_extra->typlen = typlen;
2175 my_extra->typbyval = typbyval;
2176 my_extra->typdelim = typdelim;
2177 my_extra->typelem = typelem;
2178 my_extra->typiofunc = typoutput;
2179 my_extra->typalign = typalign;
2180 my_extra->proc = outputproc;
2184 typlen = my_extra->typlen;
2185 typbyval = my_extra->typbyval;
2186 typdelim = my_extra->typdelim;
2187 typelem = my_extra->typelem;
2188 typoutput = my_extra->typiofunc;
2189 typalign = my_extra->typalign;
2190 outputproc = my_extra->proc;
2193 for (i = 0; i < nitems; i++)
2198 itemvalue = fetch_att(p, typbyval, typlen);
2200 value = DatumGetCString(FunctionCall3(&outputproc,
2202 ObjectIdGetDatum(typelem),
2203 Int32GetDatum(-1)));
2206 appendStringInfo(result_str, "%s%s", fldsep, value);
2208 appendStringInfo(result_str, "%s", value);
2210 p = att_addlength(p, typlen, PointerGetDatum(p));
2211 p = (char *) att_align(p, typalign);
2214 PG_RETURN_TEXT_P(PG_STR_GET_TEXT(result_str->data));
2219 * Convert a int32 to a string containing a base 16 (hex) representation of
2223 to_hex32(PG_FUNCTION_ARGS)
2225 static char digits[] = "0123456789abcdef";
2226 char buf[32]; /* bigger than needed, but reasonable */
2229 int32 value = PG_GETARG_INT32(0);
2231 ptr = buf + sizeof(buf) - 1;
2236 *--ptr = digits[value % HEXBASE];
2238 } while (ptr > buf && value);
2240 result_text = PG_STR_GET_TEXT(ptr);
2241 PG_RETURN_TEXT_P(result_text);
2245 * Convert a int64 to a string containing a base 16 (hex) representation of
2249 to_hex64(PG_FUNCTION_ARGS)
2251 static char digits[] = "0123456789abcdef";
2252 char buf[32]; /* bigger than needed, but reasonable */
2255 int64 value = PG_GETARG_INT64(0);
2257 ptr = buf + sizeof(buf) - 1;
2262 *--ptr = digits[value % HEXBASE];
2264 } while (ptr > buf && value);
2266 result_text = PG_STR_GET_TEXT(ptr);
2267 PG_RETURN_TEXT_P(result_text);
2271 * Create an md5 hash of a text string and return it as hex
2273 * md5 produces a 16 byte (128 bit) hash; double it for hex
2275 #define MD5_HASH_LEN 32
2278 md5_text(PG_FUNCTION_ARGS)
2280 char *buff = PG_TEXT_GET_STR(PG_GETARG_TEXT_P(0));
2281 size_t len = strlen(buff);
2285 /* leave room for the terminating '\0' */
2286 hexsum = (char *) palloc(MD5_HASH_LEN + 1);
2288 /* get the hash result */
2289 md5_hash((void *) buff, len, hexsum);
2291 /* convert to text and return it */
2292 result_text = PG_STR_GET_TEXT(hexsum);
2293 PG_RETURN_TEXT_P(result_text);