1 /*-------------------------------------------------------------------------
4 * locale compatibility layer for tsearch
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
10 * $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.10 2008/06/18 20:55:42 tgl Exp $
12 *-------------------------------------------------------------------------
16 #include "storage/fd.h"
17 #include "tsearch/ts_locale.h"
18 #include "tsearch/ts_public.h"
20 static void tsearch_readline_callback(void *arg);
23 #ifdef USE_WIDE_UPPER_LOWER
26 t_isdigit(const char *ptr)
28 int clen = pg_mblen(ptr);
31 if (clen == 1 || lc_ctype_is_c())
32 return isdigit(TOUCHAR(ptr));
34 char2wchar(character, 2, ptr, clen);
36 return iswdigit((wint_t) character[0]);
40 t_isspace(const char *ptr)
42 int clen = pg_mblen(ptr);
45 if (clen == 1 || lc_ctype_is_c())
46 return isspace(TOUCHAR(ptr));
48 char2wchar(character, 2, ptr, clen);
50 return iswspace((wint_t) character[0]);
54 t_isalpha(const char *ptr)
56 int clen = pg_mblen(ptr);
59 if (clen == 1 || lc_ctype_is_c())
60 return isalpha(TOUCHAR(ptr));
62 char2wchar(character, 2, ptr, clen);
64 return iswalpha((wint_t) character[0]);
68 t_isprint(const char *ptr)
70 int clen = pg_mblen(ptr);
73 if (clen == 1 || lc_ctype_is_c())
74 return isprint(TOUCHAR(ptr));
76 char2wchar(character, 2, ptr, clen);
78 return iswprint((wint_t) character[0]);
80 #endif /* USE_WIDE_UPPER_LOWER */
84 * Set up to read a file using tsearch_readline(). This facility is
85 * better than just reading the file directly because it provides error
86 * context pointing to the specific line where a problem is detected.
90 * tsearch_readline_state trst;
92 * if (!tsearch_readline_begin(&trst, filename))
94 * (errcode(ERRCODE_CONFIG_FILE_ERROR),
95 * errmsg("could not open stop-word file \"%s\": %m",
97 * while ((line = tsearch_readline(&trst)) != NULL)
99 * tsearch_readline_end(&trst);
101 * Note that the caller supplies the ereport() for file open failure;
102 * this is so that a custom message can be provided. The filename string
103 * passed to tsearch_readline_begin() must remain valid through
104 * tsearch_readline_end().
107 tsearch_readline_begin(tsearch_readline_state *stp,
108 const char *filename)
110 if ((stp->fp = AllocateFile(filename, "r")) == NULL)
112 stp->filename = filename;
115 /* Setup error traceback support for ereport() */
116 stp->cb.callback = tsearch_readline_callback;
117 stp->cb.arg = (void *) stp;
118 stp->cb.previous = error_context_stack;
119 error_context_stack = &stp->cb;
124 * Read the next line from a tsearch data file (expected to be in UTF-8), and
125 * convert it to database encoding if needed. The returned string is palloc'd.
126 * NULL return means EOF.
129 tsearch_readline(tsearch_readline_state *stp)
135 result = t_readline(stp->fp);
136 stp->curline = result;
141 * Close down after reading a file with tsearch_readline()
144 tsearch_readline_end(tsearch_readline_state *stp)
147 /* Pop the error context stack */
148 error_context_stack = stp->cb.previous;
152 * Error context callback for errors occurring while reading a tsearch
153 * configuration file.
156 tsearch_readline_callback(void *arg)
158 tsearch_readline_state *stp = (tsearch_readline_state *) arg;
161 * We can't include the text of the config line for errors that occur
162 * during t_readline() itself. This is only partly a consequence of
163 * our arms-length use of that routine: the major cause of such
164 * errors is encoding violations, and we daren't try to print error
165 * messages containing badly-encoded data.
168 errcontext("line %d of configuration file \"%s\": \"%s\"",
173 errcontext("line %d of configuration file \"%s\"",
180 * Read the next line from a tsearch data file (expected to be in UTF-8), and
181 * convert it to database encoding if needed. The returned string is palloc'd.
182 * NULL return means EOF.
184 * Note: direct use of this function is now deprecated. Go through
185 * tsearch_readline() to provide better error reporting.
192 char buf[4096]; /* lines must not be longer than this */
194 if (fgets(buf, sizeof(buf), fp) == NULL)
199 /* Make sure the input is valid UTF-8 */
200 (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
203 recoded = (char *) pg_do_encoding_conversion((unsigned char *) buf,
206 GetDatabaseEncoding());
208 if (recoded == NULL) /* should not happen */
209 elog(ERROR, "encoding conversion failed");
214 * conversion didn't pstrdup, so we must. We can use the length of the
215 * original string, because no conversion was done.
217 recoded = pnstrdup(recoded, len);
224 * lowerstr --- fold null-terminated string to lower case
226 * Returned string is palloc'd
229 lowerstr(const char *str)
231 return lowerstr_with_len(str, strlen(str));
235 * lowerstr_with_len --- fold string to lower case
237 * Input string need not be null-terminated.
239 * Returned string is palloc'd
242 lowerstr_with_len(const char *str, int len)
249 #ifdef USE_WIDE_UPPER_LOWER
252 * Use wide char code only when max encoding length > 1 and ctype != C.
253 * Some operating systems fail with multi-byte encodings and a C locale.
254 * Also, for a C locale there is no need to process as multibyte. From
255 * backend/utils/adt/oracle_compat.c Teodor
257 if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
264 * alloc number of wchar_t for worst case, len contains number of
265 * bytes >= number of characters and alloc 1 wchar_t for 0, because
266 * wchar2char wants zero-terminated string
268 wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
270 wlen = char2wchar(wstr, len + 1, str, len);
275 *wptr = towlower((wint_t) *wptr);
280 * Alloc result string for worst case + '\0'
282 len = pg_database_encoding_max_length() * wlen + 1;
283 out = (char *) palloc(len);
285 wlen = wchar2char(out, wstr, len);
291 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
292 errmsg("conversion from wchar_t to server encoding failed: %m")));
296 #endif /* USE_WIDE_UPPER_LOWER */
298 const char *ptr = str;
301 outptr = out = (char *) palloc(sizeof(char) * (len + 1));
302 while ((ptr - str) < len && *ptr)
304 *outptr++ = tolower(TOUCHAR(ptr));