1 /*-------------------------------------------------------------------------
3 * Oracle compatible functions.
5 * Copyright (c) 1996-2007, PostgreSQL Global Development Group
7 * Author: Edmund Mergl <E.Mergl@bawue.de>
8 * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
12 * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.72 2007/09/21 22:52:52 tgl Exp $
14 *-------------------------------------------------------------------------
21 * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
22 * declare them in <wchar.h>.
31 #include "utils/builtins.h"
32 #include "utils/pg_locale.h"
33 #include "mb/pg_wchar.h"
37 * If the system provides the needed functions for wide-character manipulation
38 * (which are all standardized by C99), then we implement upper/lower/initcap
39 * using wide-character functions. Otherwise we use the traditional <ctype.h>
40 * functions, which of course will not work as desired in multibyte character
41 * sets. Note that in either case we are effectively assuming that the
42 * database character encoding matches the encoding implied by LC_CTYPE.
44 * We assume if we have these two functions, we have their friends too, and
45 * can use the wide-character method.
47 #if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
48 #define USE_WIDE_UPPER_LOWER
49 char *wstring_lower (char *str);
50 char *wstring_upper(char *str);
53 static text *dotrim(const char *string, int stringlen,
54 const char *set, int setlen,
55 bool doltrim, bool dortrim);
58 #ifdef USE_WIDE_UPPER_LOWER
61 * Convert a TEXT value into a palloc'd wchar string.
64 texttowcs(const text *txt)
66 int nbytes = VARSIZE_ANY_EXHDR(txt);
71 /* Overflow paranoia */
73 nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
75 (errcode(ERRCODE_OUT_OF_MEMORY),
76 errmsg("out of memory")));
78 /* Need a null-terminated version of the input */
79 workstr = (char *) palloc(nbytes + 1);
80 memcpy(workstr, VARDATA_ANY(txt), nbytes);
81 workstr[nbytes] = '\0';
83 /* Output workspace cannot have more codes than input bytes */
84 result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
86 /* Do the conversion */
87 ncodes = mbstowcs(result, workstr, nbytes + 1);
89 if (ncodes == (size_t) -1)
92 * Invalid multibyte character encountered. We try to give a useful
93 * error message by letting pg_verifymbstr check the string. But it's
94 * possible that the string is OK to us, and not OK to mbstowcs ---
95 * this suggests that the LC_CTYPE locale is different from the
96 * database encoding. Give a generic error message if verifymbstr
97 * can't find anything wrong.
99 pg_verifymbstr(workstr, nbytes, false);
101 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
102 errmsg("invalid multibyte character for locale"),
103 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
106 Assert(ncodes <= (size_t) nbytes);
113 * Convert a wchar string into a palloc'd TEXT value. The wchar string
114 * must be zero-terminated, but we also require the caller to pass the string
115 * length, since it will know it anyway in current uses.
118 wcstotext(const wchar_t *str, int ncodes)
123 /* Overflow paranoia */
125 ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
127 (errcode(ERRCODE_OUT_OF_MEMORY),
128 errmsg("out of memory")));
130 /* Make workspace certainly large enough for result */
131 result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
133 /* Do the conversion */
134 nbytes = wcstombs((char *) VARDATA(result), str,
135 (ncodes + 1) * MB_CUR_MAX);
137 if (nbytes == (size_t) -1)
139 /* Invalid multibyte character encountered ... shouldn't happen */
141 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
142 errmsg("invalid multibyte character for locale")));
145 Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
147 SET_VARSIZE(result, nbytes + VARHDRSZ);
151 #endif /* USE_WIDE_UPPER_LOWER */
155 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
156 * To make use of the upper/lower functionality, we need to map UTF8 to
157 * UTF16, which for some reason mbstowcs and wcstombs won't do for us.
158 * This conversion layer takes care of it.
163 /* texttowcs for the case of UTF8 to UTF16 */
165 win32_utf8_texttowcs(const text *txt)
167 int nbytes = VARSIZE_ANY_EXHDR(txt);
171 /* Overflow paranoia */
173 nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
175 (errcode(ERRCODE_OUT_OF_MEMORY),
176 errmsg("out of memory")));
178 /* Output workspace cannot have more codes than input bytes */
179 result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
181 /* stupid Microsloth API does not work for zero-length input */
186 /* Do the conversion */
187 r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes,
190 if (!r) /* assume it's NO_UNICODE_TRANSLATION */
192 /* see notes above about error reporting */
193 pg_verifymbstr(VARDATA_ANY(txt), nbytes, false);
195 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
196 errmsg("invalid multibyte character for locale"),
197 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
207 /* wcstotext for the case of UTF16 to UTF8 */
209 win32_utf8_wcstotext(const wchar_t *str)
215 nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
216 if (nbytes == 0) /* shouldn't happen */
218 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
219 errmsg("UTF-16 to UTF-8 translation failed: %lu",
222 result = palloc(nbytes + VARHDRSZ);
224 r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
226 if (r == 0) /* shouldn't happen */
228 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
229 errmsg("UTF-16 to UTF-8 translation failed: %lu",
232 SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */
237 /* interface layer to check which encoding is in use */
240 win32_texttowcs(const text *txt)
242 if (GetDatabaseEncoding() == PG_UTF8)
243 return win32_utf8_texttowcs(txt);
245 return texttowcs(txt);
249 win32_wcstotext(const wchar_t *str, int ncodes)
251 if (GetDatabaseEncoding() == PG_UTF8)
252 return win32_utf8_wcstotext(str);
254 return wcstotext(str, ncodes);
257 /* use macros to cause routines below to call interface layer */
259 #define texttowcs win32_texttowcs
260 #define wcstotext win32_wcstotext
263 #ifdef USE_WIDE_UPPER_LOWER
265 * string_upper and string_lower are used for correct multibyte upper/lower
266 * transformations localized strings. Returns pointers to transformated
270 wstring_upper(char *str)
276 int nbytes = strlen(str);
279 in_text = palloc(nbytes + VARHDRSZ);
280 memcpy(VARDATA(in_text), str, nbytes);
281 SET_VARSIZE(in_text, nbytes + VARHDRSZ);
283 workspace = texttowcs(in_text);
285 for (i = 0; workspace[i] != 0; i++)
286 workspace[i] = towupper(workspace[i]);
288 out_text = wcstotext(workspace, i);
290 nbytes = VARSIZE(out_text) - VARHDRSZ;
291 result = palloc(nbytes + 1);
292 memcpy(result, VARDATA(out_text), nbytes);
294 result[nbytes] = '\0';
304 wstring_lower(char *str)
310 int nbytes = strlen(str);
313 in_text = palloc(nbytes + VARHDRSZ);
314 memcpy(VARDATA(in_text), str, nbytes);
315 SET_VARSIZE(in_text, nbytes + VARHDRSZ);
317 workspace = texttowcs(in_text);
319 for (i = 0; workspace[i] != 0; i++)
320 workspace[i] = towlower(workspace[i]);
322 out_text = wcstotext(workspace, i);
324 nbytes = VARSIZE(out_text) - VARHDRSZ;
325 result = palloc(nbytes + 1);
326 memcpy(result, VARDATA(out_text), nbytes);
328 result[nbytes] = '\0';
336 #endif /* USE_WIDE_UPPER_LOWER */
338 /********************************************************************
344 * text lower(text string)
348 * Returns string, with all letters forced to lowercase.
350 ********************************************************************/
353 lower(PG_FUNCTION_ARGS)
355 #ifdef USE_WIDE_UPPER_LOWER
358 * Use wide char code only when max encoding length > 1 and ctype != C.
359 * Some operating systems fail with multi-byte encodings and a C locale.
360 * Also, for a C locale there is no need to process as multibyte.
362 if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
364 text *string = PG_GETARG_TEXT_PP(0);
369 workspace = texttowcs(string);
371 for (i = 0; workspace[i] != 0; i++)
372 workspace[i] = towlower(workspace[i]);
374 result = wcstotext(workspace, i);
378 PG_RETURN_TEXT_P(result);
381 #endif /* USE_WIDE_UPPER_LOWER */
383 text *string = PG_GETARG_TEXT_P_COPY(0);
388 * Since we copied the string, we can scribble directly on the value
390 ptr = VARDATA(string);
391 m = VARSIZE(string) - VARHDRSZ;
395 *ptr = tolower((unsigned char) *ptr);
399 PG_RETURN_TEXT_P(string);
404 /********************************************************************
410 * text upper(text string)
414 * Returns string, with all letters forced to uppercase.
416 ********************************************************************/
419 upper(PG_FUNCTION_ARGS)
421 #ifdef USE_WIDE_UPPER_LOWER
424 * Use wide char code only when max encoding length > 1 and ctype != C.
425 * Some operating systems fail with multi-byte encodings and a C locale.
426 * Also, for a C locale there is no need to process as multibyte.
428 if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
430 text *string = PG_GETARG_TEXT_PP(0);
435 workspace = texttowcs(string);
437 for (i = 0; workspace[i] != 0; i++)
438 workspace[i] = towupper(workspace[i]);
440 result = wcstotext(workspace, i);
444 PG_RETURN_TEXT_P(result);
447 #endif /* USE_WIDE_UPPER_LOWER */
449 text *string = PG_GETARG_TEXT_P_COPY(0);
454 * Since we copied the string, we can scribble directly on the value
456 ptr = VARDATA(string);
457 m = VARSIZE(string) - VARHDRSZ;
461 *ptr = toupper((unsigned char) *ptr);
465 PG_RETURN_TEXT_P(string);
470 /********************************************************************
476 * text initcap(text string)
480 * Returns string, with first letter of each word in uppercase, all
481 * other letters in lowercase. A word is defined as a sequence of
482 * alphanumeric characters, delimited by non-alphanumeric
485 ********************************************************************/
488 initcap(PG_FUNCTION_ARGS)
490 #ifdef USE_WIDE_UPPER_LOWER
493 * Use wide char code only when max encoding length > 1 and ctype != C.
494 * Some operating systems fail with multi-byte encodings and a C locale.
495 * Also, for a C locale there is no need to process as multibyte.
497 if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
499 text *string = PG_GETARG_TEXT_PP(0);
505 workspace = texttowcs(string);
507 for (i = 0; workspace[i] != 0; i++)
510 workspace[i] = towlower(workspace[i]);
512 workspace[i] = towupper(workspace[i]);
513 wasalnum = iswalnum(workspace[i]);
516 result = wcstotext(workspace, i);
520 PG_RETURN_TEXT_P(result);
523 #endif /* USE_WIDE_UPPER_LOWER */
525 text *string = PG_GETARG_TEXT_P_COPY(0);
531 * Since we copied the string, we can scribble directly on the value
533 ptr = VARDATA(string);
534 m = VARSIZE(string) - VARHDRSZ;
539 *ptr = tolower((unsigned char) *ptr);
541 *ptr = toupper((unsigned char) *ptr);
542 wasalnum = isalnum((unsigned char) *ptr);
546 PG_RETURN_TEXT_P(string);
551 /********************************************************************
557 * text lpad(text string1, int4 len, text string2)
561 * Returns string1, left-padded to length len with the sequence of
562 * characters in string2. If len is less than the length of string1,
563 * instead truncate (on the right) to len.
565 ********************************************************************/
568 lpad(PG_FUNCTION_ARGS)
570 text *string1 = PG_GETARG_TEXT_PP(0);
571 int32 len = PG_GETARG_INT32(1);
572 text *string2 = PG_GETARG_TEXT_PP(2);
585 /* Negative len is silently taken as zero */
589 s1len = VARSIZE_ANY_EXHDR(string1);
591 s1len = 0; /* shouldn't happen */
593 s2len = VARSIZE_ANY_EXHDR(string2);
595 s2len = 0; /* shouldn't happen */
597 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
600 s1len = len; /* truncate string1 to len chars */
603 len = s1len; /* nothing to pad with, so don't pad */
605 bytelen = pg_database_encoding_max_length() * len;
607 /* check for integer overflow */
608 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
610 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
611 errmsg("requested length too large")));
613 ret = (text *) palloc(VARHDRSZ + bytelen);
617 ptr2 = ptr2start = VARDATA_ANY(string2);
618 ptr2end = ptr2 + s2len;
619 ptr_ret = VARDATA(ret);
623 int mlen = pg_mblen(ptr2);
625 memcpy(ptr_ret, ptr2, mlen);
628 if (ptr2 == ptr2end) /* wrap around at end of s2 */
632 ptr1 = VARDATA_ANY(string1);
636 int mlen = pg_mblen(ptr1);
638 memcpy(ptr_ret, ptr1, mlen);
643 SET_VARSIZE(ret, ptr_ret - (char *) ret);
645 PG_RETURN_TEXT_P(ret);
649 /********************************************************************
655 * text rpad(text string1, int4 len, text string2)
659 * Returns string1, right-padded to length len with the sequence of
660 * characters in string2. If len is less than the length of string1,
661 * instead truncate (on the right) to len.
663 ********************************************************************/
666 rpad(PG_FUNCTION_ARGS)
668 text *string1 = PG_GETARG_TEXT_PP(0);
669 int32 len = PG_GETARG_INT32(1);
670 text *string2 = PG_GETARG_TEXT_PP(2);
683 /* Negative len is silently taken as zero */
687 s1len = VARSIZE_ANY_EXHDR(string1);
689 s1len = 0; /* shouldn't happen */
691 s2len = VARSIZE_ANY_EXHDR(string2);
693 s2len = 0; /* shouldn't happen */
695 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
698 s1len = len; /* truncate string1 to len chars */
701 len = s1len; /* nothing to pad with, so don't pad */
703 bytelen = pg_database_encoding_max_length() * len;
705 /* Check for integer overflow */
706 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
708 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
709 errmsg("requested length too large")));
711 ret = (text *) palloc(VARHDRSZ + bytelen);
714 ptr1 = VARDATA_ANY(string1);
715 ptr_ret = VARDATA(ret);
719 int mlen = pg_mblen(ptr1);
721 memcpy(ptr_ret, ptr1, mlen);
726 ptr2 = ptr2start = VARDATA_ANY(string2);
727 ptr2end = ptr2 + s2len;
731 int mlen = pg_mblen(ptr2);
733 memcpy(ptr_ret, ptr2, mlen);
736 if (ptr2 == ptr2end) /* wrap around at end of s2 */
740 SET_VARSIZE(ret, ptr_ret - (char *) ret);
742 PG_RETURN_TEXT_P(ret);
746 /********************************************************************
752 * text btrim(text string, text set)
756 * Returns string with characters removed from the front and back
757 * up to the first character not in set.
759 ********************************************************************/
762 btrim(PG_FUNCTION_ARGS)
764 text *string = PG_GETARG_TEXT_PP(0);
765 text *set = PG_GETARG_TEXT_PP(1);
768 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
769 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
772 PG_RETURN_TEXT_P(ret);
775 /********************************************************************
777 * btrim1 --- btrim with set fixed as ' '
779 ********************************************************************/
782 btrim1(PG_FUNCTION_ARGS)
784 text *string = PG_GETARG_TEXT_PP(0);
787 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
791 PG_RETURN_TEXT_P(ret);
795 * Common implementation for btrim, ltrim, rtrim
798 dotrim(const char *string, int stringlen,
799 const char *set, int setlen,
800 bool doltrim, bool dortrim)
805 /* Nothing to do if either string or set is empty */
806 if (stringlen > 0 && setlen > 0)
808 if (pg_database_encoding_max_length() > 1)
811 * In the multibyte-encoding case, build arrays of pointers to
812 * character starts, so that we can avoid inefficient checks in
815 const char **stringchars;
816 const char **setchars;
829 stringchars = (const char **) palloc(stringlen * sizeof(char *));
830 stringmblen = (int *) palloc(stringlen * sizeof(int));
836 stringchars[stringnchars] = p;
837 stringmblen[stringnchars] = mblen = pg_mblen(p);
843 setchars = (const char **) palloc(setlen * sizeof(char *));
844 setmblen = (int *) palloc(setlen * sizeof(int));
850 setchars[setnchars] = p;
851 setmblen[setnchars] = mblen = pg_mblen(p);
857 resultndx = 0; /* index in stringchars[] */
858 resultnchars = stringnchars;
862 while (resultnchars > 0)
864 str_pos = stringchars[resultndx];
865 str_len = stringmblen[resultndx];
866 for (i = 0; i < setnchars; i++)
868 if (str_len == setmblen[i] &&
869 memcmp(str_pos, setchars[i], str_len) == 0)
873 break; /* no match here */
875 stringlen -= str_len;
883 while (resultnchars > 0)
885 str_pos = stringchars[resultndx + resultnchars - 1];
886 str_len = stringmblen[resultndx + resultnchars - 1];
887 for (i = 0; i < setnchars; i++)
889 if (str_len == setmblen[i] &&
890 memcmp(str_pos, setchars[i], str_len) == 0)
894 break; /* no match here */
895 stringlen -= str_len;
908 * In the single-byte-encoding case, we don't need such overhead.
912 while (stringlen > 0)
914 char str_ch = *string;
916 for (i = 0; i < setlen; i++)
918 if (str_ch == set[i])
922 break; /* no match here */
930 while (stringlen > 0)
932 char str_ch = string[stringlen - 1];
934 for (i = 0; i < setlen; i++)
936 if (str_ch == set[i])
940 break; /* no match here */
947 /* Return selected portion of string */
948 result = (text *) palloc(VARHDRSZ + stringlen);
949 SET_VARSIZE(result, VARHDRSZ + stringlen);
950 memcpy(VARDATA(result), string, stringlen);
955 /********************************************************************
961 * bytea byteatrim(byta string, bytea set)
965 * Returns string with characters removed from the front and back
966 * up to the first character not in set.
968 * Cloned from btrim and modified as required.
969 ********************************************************************/
972 byteatrim(PG_FUNCTION_ARGS)
974 bytea *string = PG_GETARG_BYTEA_PP(0);
975 bytea *set = PG_GETARG_BYTEA_PP(1);
986 stringlen = VARSIZE_ANY_EXHDR(string);
987 setlen = VARSIZE_ANY_EXHDR(set);
989 if (stringlen <= 0 || setlen <= 0)
990 PG_RETURN_BYTEA_P(string);
993 ptr = VARDATA_ANY(string);
994 end = ptr + stringlen - 1;
995 ptr2start = VARDATA_ANY(set);
996 end2 = ptr2start + setlen - 1;
1001 while (ptr2 <= end2)
1016 while (ptr2 <= end2)
1028 ret = (bytea *) palloc(VARHDRSZ + m);
1029 SET_VARSIZE(ret, VARHDRSZ + m);
1030 memcpy(VARDATA(ret), ptr, m);
1032 PG_RETURN_BYTEA_P(ret);
1035 /********************************************************************
1041 * text ltrim(text string, text set)
1045 * Returns string with initial characters removed up to the first
1046 * character not in set.
1048 ********************************************************************/
1051 ltrim(PG_FUNCTION_ARGS)
1053 text *string = PG_GETARG_TEXT_PP(0);
1054 text *set = PG_GETARG_TEXT_PP(1);
1057 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
1058 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
1061 PG_RETURN_TEXT_P(ret);
1064 /********************************************************************
1066 * ltrim1 --- ltrim with set fixed as ' '
1068 ********************************************************************/
1071 ltrim1(PG_FUNCTION_ARGS)
1073 text *string = PG_GETARG_TEXT_PP(0);
1076 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
1080 PG_RETURN_TEXT_P(ret);
1083 /********************************************************************
1089 * text rtrim(text string, text set)
1093 * Returns string with final characters removed after the last
1094 * character not in set.
1096 ********************************************************************/
1099 rtrim(PG_FUNCTION_ARGS)
1101 text *string = PG_GETARG_TEXT_PP(0);
1102 text *set = PG_GETARG_TEXT_PP(1);
1105 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
1106 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
1109 PG_RETURN_TEXT_P(ret);
1112 /********************************************************************
1114 * rtrim1 --- rtrim with set fixed as ' '
1116 ********************************************************************/
1119 rtrim1(PG_FUNCTION_ARGS)
1121 text *string = PG_GETARG_TEXT_PP(0);
1124 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
1128 PG_RETURN_TEXT_P(ret);
1132 /********************************************************************
1138 * text translate(text string, text from, text to)
1142 * Returns string after replacing all occurrences of characters in from
1143 * with the corresponding character in to. If from is longer than to,
1144 * occurrences of the extra characters in from are deleted.
1145 * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
1147 ********************************************************************/
1150 translate(PG_FUNCTION_ARGS)
1152 text *string = PG_GETARG_TEXT_PP(0);
1153 text *from = PG_GETARG_TEXT_PP(1);
1154 text *to = PG_GETARG_TEXT_PP(2);
1172 m = VARSIZE_ANY_EXHDR(string);
1175 PG_RETURN_TEXT_P(string);
1177 fromlen = VARSIZE_ANY_EXHDR(from);
1178 from_ptr = VARDATA_ANY(from);
1179 tolen = VARSIZE_ANY_EXHDR(to);
1180 to_ptr = VARDATA_ANY(to);
1182 str_len = VARSIZE_ANY_EXHDR(string);
1183 source = VARDATA_ANY(string);
1185 estimate_len = (tolen * 1.0 / fromlen + 0.5) * str_len;
1186 estimate_len = estimate_len > str_len ? estimate_len : str_len;
1188 result = (text *) palloc(estimate_len + VARHDRSZ);
1189 target = VARDATA(result);
1194 source_len = pg_mblen(source);
1197 for (i = 0; i < fromlen; i += len)
1199 len = pg_mblen(&from_ptr[i]);
1200 if (len == source_len &&
1201 memcmp(source, &from_ptr[i], len) == 0)
1211 for (i = 0; i < from_index; i++)
1214 if (p >= (to_ptr + tolen))
1217 if (p < (to_ptr + tolen))
1220 memcpy(target, p, len);
1228 /* no match, so copy */
1229 memcpy(target, source, source_len);
1230 target += source_len;
1231 retlen += source_len;
1234 source += source_len;
1238 SET_VARSIZE(result, retlen + VARHDRSZ);
1241 * There may be some wasted space in the result if deletions occurred, but
1242 * it's not worth reallocating it; the function result probably won't live
1246 PG_RETURN_TEXT_P(result);
1249 /********************************************************************
1255 * int ascii(text string)
1259 * Returns the decimal representation of the first character from
1261 * If the string is empty we return 0.
1262 * If the database encoding is UTF8, we return the Unicode codepoint.
1263 * If the database encoding is any other multi-byte encoding, we
1264 * return the value of the first byte if it is an ASCII character
1265 * (range 1 .. 127), or raise an error.
1266 * For all other encodings we return the value of the first byte,
1269 ********************************************************************/
1272 ascii(PG_FUNCTION_ARGS)
1274 text *string = PG_GETARG_TEXT_PP(0);
1275 int encoding = GetDatabaseEncoding();
1276 unsigned char *data;
1278 if (VARSIZE_ANY_EXHDR(string) <= 0)
1281 data = (unsigned char *) VARDATA_ANY(string);
1283 if (encoding == PG_UTF8 && *data > 127)
1285 /* return the code point for Unicode */
1287 int result = 0, tbytes = 0, i;
1291 result = *data & 0x07;
1294 else if (*data >= 0xE0)
1296 result = *data & 0x0F;
1301 Assert (*data > 0xC0);
1302 result = *data & 0x1f;
1306 Assert (tbytes > 0);
1308 for (i = 1; i <= tbytes; i++)
1310 Assert ((data[i] & 0xC0) == 0x80);
1311 result = (result << 6) + (data[i] & 0x3f);
1314 PG_RETURN_INT32(result);
1318 if (pg_encoding_max_length(encoding) > 1 && *data > 127)
1320 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1321 errmsg("requested character too large")));
1324 PG_RETURN_INT32((int32) *data);
1328 /********************************************************************
1338 * Returns the character having the binary equivalent to val.
1340 * For UTF8 we treat the argumwent as a Unicode code point.
1341 * For other multi-byte encodings we raise an error for arguments
1342 * outside the strict ASCII range (1..127).
1344 * It's important that we don't ever return a value that is not valid
1345 * in the database encoding, so that this doesn't become a way for
1346 * invalid data to enter the database.
1348 ********************************************************************/
1351 chr(PG_FUNCTION_ARGS)
1353 uint32 cvalue = PG_GETARG_UINT32(0);
1355 int encoding = GetDatabaseEncoding();
1357 if (encoding == PG_UTF8 && cvalue > 127)
1359 /* for Unicode we treat the argument as a code point */
1363 /* We only allow valid Unicode code points */
1364 if (cvalue > 0x001fffff)
1366 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1367 errmsg("requested character too large for encoding: %d",
1370 if (cvalue > 0xffff)
1372 else if (cvalue > 0x07ff)
1377 result = (text *) palloc(VARHDRSZ + bytes);
1378 SET_VARSIZE(result, VARHDRSZ + bytes);
1379 wch = VARDATA(result);
1383 wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
1384 wch[1] = 0x80 | (cvalue & 0x3F);;
1386 else if (bytes == 3)
1388 wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
1389 wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
1390 wch[2] = 0x80 | (cvalue & 0x3F);
1394 wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
1395 wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
1396 wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
1397 wch[3] = 0x80 | (cvalue & 0x3F);
1406 /* Error out on arguments that make no sense or that we
1407 * can't validly represent in the encoding.
1412 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1413 errmsg("null character not permitted")));
1415 is_mb = pg_encoding_max_length(encoding) > 1;
1417 if ((is_mb && (cvalue > 255)) || (! is_mb && (cvalue > 127)))
1419 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1420 errmsg("requested character too large for encoding: %d",
1424 result = (text *) palloc(VARHDRSZ + 1);
1425 SET_VARSIZE(result, VARHDRSZ + 1);
1426 *VARDATA(result) = (char) cvalue;
1429 PG_RETURN_TEXT_P(result);
1432 /********************************************************************
1438 * text repeat(text string, int val)
1442 * Repeat string by val.
1444 ********************************************************************/
1447 repeat(PG_FUNCTION_ARGS)
1449 text *string = PG_GETARG_TEXT_PP(0);
1450 int32 count = PG_GETARG_INT32(1);
1461 slen = VARSIZE_ANY_EXHDR(string);
1462 tlen = VARHDRSZ + (count * slen);
1464 /* Check for integer overflow */
1465 if (slen != 0 && count != 0)
1467 int check = count * slen;
1468 int check2 = check + VARHDRSZ;
1470 if ((check / slen) != count || check2 <= check)
1472 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1473 errmsg("requested length too large")));
1476 result = (text *) palloc(tlen);
1478 SET_VARSIZE(result, tlen);
1479 cp = VARDATA(result);
1480 sp = VARDATA_ANY(string);
1481 for (i = 0; i < count; i++)
1483 memcpy(cp, sp, slen);
1487 PG_RETURN_TEXT_P(result);