1 /*-------------------------------------------------------------------------
3 * Oracle compatible functions.
5 * Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Author: Edmund Mergl <E.Mergl@bawue.de>
8 * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
12 * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.79 2008/05/19 18:08:16 tgl Exp $
14 *-------------------------------------------------------------------------
21 * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
22 * declare them in <wchar.h>.
31 #include "utils/builtins.h"
32 #include "utils/pg_locale.h"
33 #include "mb/pg_wchar.h"
37 * If the system provides the needed functions for wide-character manipulation
38 * (which are all standardized by C99), then we implement upper/lower/initcap
39 * using wide-character functions. Otherwise we use the traditional <ctype.h>
40 * functions, which of course will not work as desired in multibyte character
41 * sets. Note that in either case we are effectively assuming that the
42 * database character encoding matches the encoding implied by LC_CTYPE.
44 * We assume if we have these two functions, we have their friends too, and
45 * can use the wide-character method.
47 #if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
48 #define USE_WIDE_UPPER_LOWER
49 char *wstring_lower(char *str);
50 char *wstring_upper(char *str);
51 wchar_t *texttowcs(const text *txt);
52 text *wcstotext(const wchar_t *str, int ncodes);
55 static text *dotrim(const char *string, int stringlen,
56 const char *set, int setlen,
57 bool doltrim, bool dortrim);
60 #ifdef USE_WIDE_UPPER_LOWER
63 * Convert a TEXT value into a palloc'd wchar string.
66 texttowcs(const text *txt)
68 int nbytes = VARSIZE_ANY_EXHDR(txt);
73 /* Overflow paranoia */
75 nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
77 (errcode(ERRCODE_OUT_OF_MEMORY),
78 errmsg("out of memory")));
80 /* Need a null-terminated version of the input */
81 workstr = text_to_cstring(txt);
83 /* Output workspace cannot have more codes than input bytes */
84 result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
86 /* Do the conversion */
87 ncodes = mbstowcs(result, workstr, nbytes + 1);
89 if (ncodes == (size_t) -1)
92 * Invalid multibyte character encountered. We try to give a useful
93 * error message by letting pg_verifymbstr check the string. But it's
94 * possible that the string is OK to us, and not OK to mbstowcs ---
95 * this suggests that the LC_CTYPE locale is different from the
96 * database encoding. Give a generic error message if verifymbstr
97 * can't find anything wrong.
99 pg_verifymbstr(workstr, nbytes, false);
101 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
102 errmsg("invalid multibyte character for locale"),
103 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
106 Assert(ncodes <= (size_t) nbytes);
113 * Convert a wchar string into a palloc'd TEXT value. The wchar string
114 * must be zero-terminated, but we also require the caller to pass the string
115 * length, since it will know it anyway in current uses.
118 wcstotext(const wchar_t *str, int ncodes)
123 /* Overflow paranoia */
125 ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
127 (errcode(ERRCODE_OUT_OF_MEMORY),
128 errmsg("out of memory")));
130 /* Make workspace certainly large enough for result */
131 result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
133 /* Do the conversion */
134 nbytes = wcstombs((char *) VARDATA(result), str,
135 (ncodes + 1) * MB_CUR_MAX);
137 if (nbytes == (size_t) -1)
139 /* Invalid multibyte character encountered ... shouldn't happen */
141 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
142 errmsg("invalid multibyte character for locale")));
145 Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
147 SET_VARSIZE(result, nbytes + VARHDRSZ);
151 #endif /* USE_WIDE_UPPER_LOWER */
155 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
156 * To make use of the upper/lower functionality, we need to map UTF8 to
157 * UTF16, which for some reason mbstowcs and wcstombs won't do for us.
158 * This conversion layer takes care of it.
163 /* texttowcs for the case of UTF8 to UTF16 */
165 win32_utf8_texttowcs(const text *txt)
167 int nbytes = VARSIZE_ANY_EXHDR(txt);
171 /* Overflow paranoia */
173 nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
175 (errcode(ERRCODE_OUT_OF_MEMORY),
176 errmsg("out of memory")));
178 /* Output workspace cannot have more codes than input bytes */
179 result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
181 /* stupid Microsloth API does not work for zero-length input */
186 /* Do the conversion */
187 r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes,
190 if (r <= 0) /* assume it's NO_UNICODE_TRANSLATION */
192 /* see notes above about error reporting */
193 pg_verifymbstr(VARDATA_ANY(txt), nbytes, false);
195 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
196 errmsg("invalid multibyte character for locale"),
197 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
201 /* Append trailing null wchar (MultiByteToWideChar won't have) */
208 /* wcstotext for the case of UTF16 to UTF8 */
210 win32_utf8_wcstotext(const wchar_t *str)
216 /* Compute size of output string (this *will* include trailing null) */
217 nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
218 if (nbytes <= 0) /* shouldn't happen */
220 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
221 errmsg("UTF-16 to UTF-8 translation failed: %lu",
224 result = palloc(nbytes + VARHDRSZ);
226 r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
228 if (r != nbytes) /* shouldn't happen */
230 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
231 errmsg("UTF-16 to UTF-8 translation failed: %lu",
234 SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */
239 /* interface layer to check which encoding is in use */
242 win32_texttowcs(const text *txt)
244 if (GetDatabaseEncoding() == PG_UTF8)
245 return win32_utf8_texttowcs(txt);
247 return texttowcs(txt);
251 win32_wcstotext(const wchar_t *str, int ncodes)
253 if (GetDatabaseEncoding() == PG_UTF8)
254 return win32_utf8_wcstotext(str);
256 return wcstotext(str, ncodes);
259 /* use macros to cause routines below to call interface layer */
261 #define texttowcs win32_texttowcs
262 #define wcstotext win32_wcstotext
265 #ifdef USE_WIDE_UPPER_LOWER
267 * string_upper and string_lower are used for correct multibyte upper/lower
268 * transformations localized strings. Returns pointers to transformated
272 wstring_upper(char *str)
280 in_text = cstring_to_text(str);
281 workspace = texttowcs(in_text);
283 for (i = 0; workspace[i] != 0; i++)
284 workspace[i] = towupper(workspace[i]);
286 out_text = wcstotext(workspace, i);
287 result = text_to_cstring(out_text);
297 wstring_lower(char *str)
305 in_text = cstring_to_text(str);
306 workspace = texttowcs(in_text);
308 for (i = 0; workspace[i] != 0; i++)
309 workspace[i] = towlower(workspace[i]);
311 out_text = wcstotext(workspace, i);
312 result = text_to_cstring(out_text);
320 #endif /* USE_WIDE_UPPER_LOWER */
322 /********************************************************************
328 * text lower(text string)
332 * Returns string, with all letters forced to lowercase.
334 ********************************************************************/
337 lower(PG_FUNCTION_ARGS)
339 #ifdef USE_WIDE_UPPER_LOWER
342 * Use wide char code only when max encoding length > 1 and ctype != C.
343 * Some operating systems fail with multi-byte encodings and a C locale.
344 * Also, for a C locale there is no need to process as multibyte.
346 if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
348 text *string = PG_GETARG_TEXT_PP(0);
353 workspace = texttowcs(string);
355 for (i = 0; workspace[i] != 0; i++)
356 workspace[i] = towlower(workspace[i]);
358 result = wcstotext(workspace, i);
362 PG_RETURN_TEXT_P(result);
365 #endif /* USE_WIDE_UPPER_LOWER */
367 text *string = PG_GETARG_TEXT_P_COPY(0);
372 * Since we copied the string, we can scribble directly on the value
374 ptr = VARDATA(string);
375 m = VARSIZE(string) - VARHDRSZ;
379 *ptr = tolower((unsigned char) *ptr);
383 PG_RETURN_TEXT_P(string);
388 /********************************************************************
394 * text upper(text string)
398 * Returns string, with all letters forced to uppercase.
400 ********************************************************************/
403 upper(PG_FUNCTION_ARGS)
405 #ifdef USE_WIDE_UPPER_LOWER
408 * Use wide char code only when max encoding length > 1 and ctype != C.
409 * Some operating systems fail with multi-byte encodings and a C locale.
410 * Also, for a C locale there is no need to process as multibyte.
412 if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
414 text *string = PG_GETARG_TEXT_PP(0);
419 workspace = texttowcs(string);
421 for (i = 0; workspace[i] != 0; i++)
422 workspace[i] = towupper(workspace[i]);
424 result = wcstotext(workspace, i);
428 PG_RETURN_TEXT_P(result);
431 #endif /* USE_WIDE_UPPER_LOWER */
433 text *string = PG_GETARG_TEXT_P_COPY(0);
438 * Since we copied the string, we can scribble directly on the value
440 ptr = VARDATA(string);
441 m = VARSIZE(string) - VARHDRSZ;
445 *ptr = toupper((unsigned char) *ptr);
449 PG_RETURN_TEXT_P(string);
454 /********************************************************************
460 * text initcap(text string)
464 * Returns string, with first letter of each word in uppercase, all
465 * other letters in lowercase. A word is defined as a sequence of
466 * alphanumeric characters, delimited by non-alphanumeric
469 ********************************************************************/
472 initcap(PG_FUNCTION_ARGS)
474 #ifdef USE_WIDE_UPPER_LOWER
477 * Use wide char code only when max encoding length > 1 and ctype != C.
478 * Some operating systems fail with multi-byte encodings and a C locale.
479 * Also, for a C locale there is no need to process as multibyte.
481 if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
483 text *string = PG_GETARG_TEXT_PP(0);
489 workspace = texttowcs(string);
491 for (i = 0; workspace[i] != 0; i++)
494 workspace[i] = towlower(workspace[i]);
496 workspace[i] = towupper(workspace[i]);
497 wasalnum = iswalnum(workspace[i]);
500 result = wcstotext(workspace, i);
504 PG_RETURN_TEXT_P(result);
507 #endif /* USE_WIDE_UPPER_LOWER */
509 text *string = PG_GETARG_TEXT_P_COPY(0);
515 * Since we copied the string, we can scribble directly on the value
517 ptr = VARDATA(string);
518 m = VARSIZE(string) - VARHDRSZ;
523 *ptr = tolower((unsigned char) *ptr);
525 *ptr = toupper((unsigned char) *ptr);
526 wasalnum = isalnum((unsigned char) *ptr);
530 PG_RETURN_TEXT_P(string);
535 /********************************************************************
541 * text lpad(text string1, int4 len, text string2)
545 * Returns string1, left-padded to length len with the sequence of
546 * characters in string2. If len is less than the length of string1,
547 * instead truncate (on the right) to len.
549 ********************************************************************/
552 lpad(PG_FUNCTION_ARGS)
554 text *string1 = PG_GETARG_TEXT_PP(0);
555 int32 len = PG_GETARG_INT32(1);
556 text *string2 = PG_GETARG_TEXT_PP(2);
569 /* Negative len is silently taken as zero */
573 s1len = VARSIZE_ANY_EXHDR(string1);
575 s1len = 0; /* shouldn't happen */
577 s2len = VARSIZE_ANY_EXHDR(string2);
579 s2len = 0; /* shouldn't happen */
581 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
584 s1len = len; /* truncate string1 to len chars */
587 len = s1len; /* nothing to pad with, so don't pad */
589 bytelen = pg_database_encoding_max_length() * len;
591 /* check for integer overflow */
592 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
594 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
595 errmsg("requested length too large")));
597 ret = (text *) palloc(VARHDRSZ + bytelen);
601 ptr2 = ptr2start = VARDATA_ANY(string2);
602 ptr2end = ptr2 + s2len;
603 ptr_ret = VARDATA(ret);
607 int mlen = pg_mblen(ptr2);
609 memcpy(ptr_ret, ptr2, mlen);
612 if (ptr2 == ptr2end) /* wrap around at end of s2 */
616 ptr1 = VARDATA_ANY(string1);
620 int mlen = pg_mblen(ptr1);
622 memcpy(ptr_ret, ptr1, mlen);
627 SET_VARSIZE(ret, ptr_ret - (char *) ret);
629 PG_RETURN_TEXT_P(ret);
633 /********************************************************************
639 * text rpad(text string1, int4 len, text string2)
643 * Returns string1, right-padded to length len with the sequence of
644 * characters in string2. If len is less than the length of string1,
645 * instead truncate (on the right) to len.
647 ********************************************************************/
650 rpad(PG_FUNCTION_ARGS)
652 text *string1 = PG_GETARG_TEXT_PP(0);
653 int32 len = PG_GETARG_INT32(1);
654 text *string2 = PG_GETARG_TEXT_PP(2);
667 /* Negative len is silently taken as zero */
671 s1len = VARSIZE_ANY_EXHDR(string1);
673 s1len = 0; /* shouldn't happen */
675 s2len = VARSIZE_ANY_EXHDR(string2);
677 s2len = 0; /* shouldn't happen */
679 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
682 s1len = len; /* truncate string1 to len chars */
685 len = s1len; /* nothing to pad with, so don't pad */
687 bytelen = pg_database_encoding_max_length() * len;
689 /* Check for integer overflow */
690 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
692 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
693 errmsg("requested length too large")));
695 ret = (text *) palloc(VARHDRSZ + bytelen);
698 ptr1 = VARDATA_ANY(string1);
699 ptr_ret = VARDATA(ret);
703 int mlen = pg_mblen(ptr1);
705 memcpy(ptr_ret, ptr1, mlen);
710 ptr2 = ptr2start = VARDATA_ANY(string2);
711 ptr2end = ptr2 + s2len;
715 int mlen = pg_mblen(ptr2);
717 memcpy(ptr_ret, ptr2, mlen);
720 if (ptr2 == ptr2end) /* wrap around at end of s2 */
724 SET_VARSIZE(ret, ptr_ret - (char *) ret);
726 PG_RETURN_TEXT_P(ret);
730 /********************************************************************
736 * text btrim(text string, text set)
740 * Returns string with characters removed from the front and back
741 * up to the first character not in set.
743 ********************************************************************/
746 btrim(PG_FUNCTION_ARGS)
748 text *string = PG_GETARG_TEXT_PP(0);
749 text *set = PG_GETARG_TEXT_PP(1);
752 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
753 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
756 PG_RETURN_TEXT_P(ret);
759 /********************************************************************
761 * btrim1 --- btrim with set fixed as ' '
763 ********************************************************************/
766 btrim1(PG_FUNCTION_ARGS)
768 text *string = PG_GETARG_TEXT_PP(0);
771 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
775 PG_RETURN_TEXT_P(ret);
779 * Common implementation for btrim, ltrim, rtrim
782 dotrim(const char *string, int stringlen,
783 const char *set, int setlen,
784 bool doltrim, bool dortrim)
788 /* Nothing to do if either string or set is empty */
789 if (stringlen > 0 && setlen > 0)
791 if (pg_database_encoding_max_length() > 1)
794 * In the multibyte-encoding case, build arrays of pointers to
795 * character starts, so that we can avoid inefficient checks in
798 const char **stringchars;
799 const char **setchars;
812 stringchars = (const char **) palloc(stringlen * sizeof(char *));
813 stringmblen = (int *) palloc(stringlen * sizeof(int));
819 stringchars[stringnchars] = p;
820 stringmblen[stringnchars] = mblen = pg_mblen(p);
826 setchars = (const char **) palloc(setlen * sizeof(char *));
827 setmblen = (int *) palloc(setlen * sizeof(int));
833 setchars[setnchars] = p;
834 setmblen[setnchars] = mblen = pg_mblen(p);
840 resultndx = 0; /* index in stringchars[] */
841 resultnchars = stringnchars;
845 while (resultnchars > 0)
847 str_pos = stringchars[resultndx];
848 str_len = stringmblen[resultndx];
849 for (i = 0; i < setnchars; i++)
851 if (str_len == setmblen[i] &&
852 memcmp(str_pos, setchars[i], str_len) == 0)
856 break; /* no match here */
858 stringlen -= str_len;
866 while (resultnchars > 0)
868 str_pos = stringchars[resultndx + resultnchars - 1];
869 str_len = stringmblen[resultndx + resultnchars - 1];
870 for (i = 0; i < setnchars; i++)
872 if (str_len == setmblen[i] &&
873 memcmp(str_pos, setchars[i], str_len) == 0)
877 break; /* no match here */
878 stringlen -= str_len;
891 * In the single-byte-encoding case, we don't need such overhead.
895 while (stringlen > 0)
897 char str_ch = *string;
899 for (i = 0; i < setlen; i++)
901 if (str_ch == set[i])
905 break; /* no match here */
913 while (stringlen > 0)
915 char str_ch = string[stringlen - 1];
917 for (i = 0; i < setlen; i++)
919 if (str_ch == set[i])
923 break; /* no match here */
930 /* Return selected portion of string */
931 return cstring_to_text_with_len(string, stringlen);
934 /********************************************************************
940 * bytea byteatrim(byta string, bytea set)
944 * Returns string with characters removed from the front and back
945 * up to the first character not in set.
947 * Cloned from btrim and modified as required.
948 ********************************************************************/
951 byteatrim(PG_FUNCTION_ARGS)
953 bytea *string = PG_GETARG_BYTEA_PP(0);
954 bytea *set = PG_GETARG_BYTEA_PP(1);
965 stringlen = VARSIZE_ANY_EXHDR(string);
966 setlen = VARSIZE_ANY_EXHDR(set);
968 if (stringlen <= 0 || setlen <= 0)
969 PG_RETURN_BYTEA_P(string);
972 ptr = VARDATA_ANY(string);
973 end = ptr + stringlen - 1;
974 ptr2start = VARDATA_ANY(set);
975 end2 = ptr2start + setlen - 1;
1007 ret = (bytea *) palloc(VARHDRSZ + m);
1008 SET_VARSIZE(ret, VARHDRSZ + m);
1009 memcpy(VARDATA(ret), ptr, m);
1011 PG_RETURN_BYTEA_P(ret);
1014 /********************************************************************
1020 * text ltrim(text string, text set)
1024 * Returns string with initial characters removed up to the first
1025 * character not in set.
1027 ********************************************************************/
1030 ltrim(PG_FUNCTION_ARGS)
1032 text *string = PG_GETARG_TEXT_PP(0);
1033 text *set = PG_GETARG_TEXT_PP(1);
1036 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
1037 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
1040 PG_RETURN_TEXT_P(ret);
1043 /********************************************************************
1045 * ltrim1 --- ltrim with set fixed as ' '
1047 ********************************************************************/
1050 ltrim1(PG_FUNCTION_ARGS)
1052 text *string = PG_GETARG_TEXT_PP(0);
1055 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
1059 PG_RETURN_TEXT_P(ret);
1062 /********************************************************************
1068 * text rtrim(text string, text set)
1072 * Returns string with final characters removed after the last
1073 * character not in set.
1075 ********************************************************************/
1078 rtrim(PG_FUNCTION_ARGS)
1080 text *string = PG_GETARG_TEXT_PP(0);
1081 text *set = PG_GETARG_TEXT_PP(1);
1084 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
1085 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
1088 PG_RETURN_TEXT_P(ret);
1091 /********************************************************************
1093 * rtrim1 --- rtrim with set fixed as ' '
1095 ********************************************************************/
1098 rtrim1(PG_FUNCTION_ARGS)
1100 text *string = PG_GETARG_TEXT_PP(0);
1103 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
1107 PG_RETURN_TEXT_P(ret);
1111 /********************************************************************
1117 * text translate(text string, text from, text to)
1121 * Returns string after replacing all occurrences of characters in from
1122 * with the corresponding character in to. If from is longer than to,
1123 * occurrences of the extra characters in from are deleted.
1124 * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
1126 ********************************************************************/
1129 translate(PG_FUNCTION_ARGS)
1131 text *string = PG_GETARG_TEXT_PP(0);
1132 text *from = PG_GETARG_TEXT_PP(1);
1133 text *to = PG_GETARG_TEXT_PP(2);
1149 m = VARSIZE_ANY_EXHDR(string);
1151 PG_RETURN_TEXT_P(string);
1152 source = VARDATA_ANY(string);
1154 fromlen = VARSIZE_ANY_EXHDR(from);
1155 from_ptr = VARDATA_ANY(from);
1156 tolen = VARSIZE_ANY_EXHDR(to);
1157 to_ptr = VARDATA_ANY(to);
1160 * The worst-case expansion is to substitute a max-length character for a
1161 * single-byte character at each position of the string.
1163 worst_len = pg_database_encoding_max_length() * m;
1165 /* check for integer overflow */
1166 if (worst_len / pg_database_encoding_max_length() != m)
1168 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1169 errmsg("requested length too large")));
1171 result = (text *) palloc(worst_len + VARHDRSZ);
1172 target = VARDATA(result);
1177 source_len = pg_mblen(source);
1180 for (i = 0; i < fromlen; i += len)
1182 len = pg_mblen(&from_ptr[i]);
1183 if (len == source_len &&
1184 memcmp(source, &from_ptr[i], len) == 0)
1194 for (i = 0; i < from_index; i++)
1197 if (p >= (to_ptr + tolen))
1200 if (p < (to_ptr + tolen))
1203 memcpy(target, p, len);
1211 /* no match, so copy */
1212 memcpy(target, source, source_len);
1213 target += source_len;
1214 retlen += source_len;
1217 source += source_len;
1221 SET_VARSIZE(result, retlen + VARHDRSZ);
1224 * The function result is probably much bigger than needed, if we're using
1225 * a multibyte encoding, but it's not worth reallocating it; the result
1226 * probably won't live long anyway.
1229 PG_RETURN_TEXT_P(result);
1232 /********************************************************************
1238 * int ascii(text string)
1242 * Returns the decimal representation of the first character from
1244 * If the string is empty we return 0.
1245 * If the database encoding is UTF8, we return the Unicode codepoint.
1246 * If the database encoding is any other multi-byte encoding, we
1247 * return the value of the first byte if it is an ASCII character
1248 * (range 1 .. 127), or raise an error.
1249 * For all other encodings we return the value of the first byte,
1252 ********************************************************************/
1255 ascii(PG_FUNCTION_ARGS)
1257 text *string = PG_GETARG_TEXT_PP(0);
1258 int encoding = GetDatabaseEncoding();
1259 unsigned char *data;
1261 if (VARSIZE_ANY_EXHDR(string) <= 0)
1264 data = (unsigned char *) VARDATA_ANY(string);
1266 if (encoding == PG_UTF8 && *data > 127)
1268 /* return the code point for Unicode */
1276 result = *data & 0x07;
1279 else if (*data >= 0xE0)
1281 result = *data & 0x0F;
1286 Assert(*data > 0xC0);
1287 result = *data & 0x1f;
1293 for (i = 1; i <= tbytes; i++)
1295 Assert((data[i] & 0xC0) == 0x80);
1296 result = (result << 6) + (data[i] & 0x3f);
1299 PG_RETURN_INT32(result);
1303 if (pg_encoding_max_length(encoding) > 1 && *data > 127)
1305 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1306 errmsg("requested character too large")));
1309 PG_RETURN_INT32((int32) *data);
1313 /********************************************************************
1323 * Returns the character having the binary equivalent to val.
1325 * For UTF8 we treat the argumwent as a Unicode code point.
1326 * For other multi-byte encodings we raise an error for arguments
1327 * outside the strict ASCII range (1..127).
1329 * It's important that we don't ever return a value that is not valid
1330 * in the database encoding, so that this doesn't become a way for
1331 * invalid data to enter the database.
1333 ********************************************************************/
1336 chr (PG_FUNCTION_ARGS)
1338 uint32 cvalue = PG_GETARG_UINT32(0);
1340 int encoding = GetDatabaseEncoding();
1342 if (encoding == PG_UTF8 && cvalue > 127)
1344 /* for Unicode we treat the argument as a code point */
1348 /* We only allow valid Unicode code points */
1349 if (cvalue > 0x001fffff)
1351 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1352 errmsg("requested character too large for encoding: %d",
1355 if (cvalue > 0xffff)
1357 else if (cvalue > 0x07ff)
1362 result = (text *) palloc(VARHDRSZ + bytes);
1363 SET_VARSIZE(result, VARHDRSZ + bytes);
1364 wch = VARDATA(result);
1368 wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
1369 wch[1] = 0x80 | (cvalue & 0x3F);;
1371 else if (bytes == 3)
1373 wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
1374 wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
1375 wch[2] = 0x80 | (cvalue & 0x3F);
1379 wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
1380 wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
1381 wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
1382 wch[3] = 0x80 | (cvalue & 0x3F);
1392 * Error out on arguments that make no sense or that we can't validly
1393 * represent in the encoding.
1398 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1399 errmsg("null character not permitted")));
1401 is_mb = pg_encoding_max_length(encoding) > 1;
1403 if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1405 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1406 errmsg("requested character too large for encoding: %d",
1410 result = (text *) palloc(VARHDRSZ + 1);
1411 SET_VARSIZE(result, VARHDRSZ + 1);
1412 *VARDATA(result) = (char) cvalue;
1415 PG_RETURN_TEXT_P(result);
1418 /********************************************************************
1424 * text repeat(text string, int val)
1428 * Repeat string by val.
1430 ********************************************************************/
1433 repeat(PG_FUNCTION_ARGS)
1435 text *string = PG_GETARG_TEXT_PP(0);
1436 int32 count = PG_GETARG_INT32(1);
1447 slen = VARSIZE_ANY_EXHDR(string);
1448 tlen = VARHDRSZ + (count * slen);
1450 /* Check for integer overflow */
1451 if (slen != 0 && count != 0)
1453 int check = count * slen;
1454 int check2 = check + VARHDRSZ;
1456 if ((check / slen) != count || check2 <= check)
1458 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1459 errmsg("requested length too large")));
1462 result = (text *) palloc(tlen);
1464 SET_VARSIZE(result, tlen);
1465 cp = VARDATA(result);
1466 sp = VARDATA_ANY(string);
1467 for (i = 0; i < count; i++)
1469 memcpy(cp, sp, slen);
1473 PG_RETURN_TEXT_P(result);