1 /*-------------------------------------------------------------------------
4 * like expression handling code.
7 * A big hack of the regexp.c code!! Contributed by
8 * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
10 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
14 * src/backend/utils/adt/like.c
16 *-------------------------------------------------------------------------
22 #include "catalog/pg_collation.h"
23 #include "mb/pg_wchar.h"
24 #include "miscadmin.h"
25 #include "utils/builtins.h"
26 #include "utils/pg_locale.h"
31 #define LIKE_ABORT (-1)
34 static int SB_MatchText(char *t, int tlen, char *p, int plen,
35 pg_locale_t locale, bool locale_is_c);
36 static text *SB_do_like_escape(text *, text *);
38 static int MB_MatchText(char *t, int tlen, char *p, int plen,
39 pg_locale_t locale, bool locale_is_c);
40 static text *MB_do_like_escape(text *, text *);
42 static int UTF8_MatchText(char *t, int tlen, char *p, int plen,
43 pg_locale_t locale, bool locale_is_c);
45 static int SB_IMatchText(char *t, int tlen, char *p, int plen,
46 pg_locale_t locale, bool locale_is_c);
48 static int GenericMatchText(char *s, int slen, char *p, int plen);
49 static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
51 /*--------------------
52 * Support routine for MatchText. Compares given multibyte streams
53 * as wide characters. If they match, returns 1 otherwise returns 0.
57 wchareq(char *p1, char *p2)
61 /* Optimization: quickly compare the first byte. */
65 p1_len = pg_mblen(p1);
66 if (pg_mblen(p2) != p1_len)
69 /* They are the same length */
79 * Formerly we had a routine iwchareq() here that tried to do case-insensitive
80 * comparison of multibyte characters. It did not work at all, however,
81 * because it relied on tolower() which has a single-byte API ... and
82 * towlower() wouldn't be much better since we have no suitably cheap way
83 * of getting a single character transformed to the system's wchar_t format.
84 * So now, we just downcase the strings using lower() and apply regular LIKE
85 * comparison. This should be revisited when we install better locale support.
89 * We do handle case-insensitive matching for single-byte encodings using
90 * fold-on-the-fly processing, however.
93 SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
96 return pg_ascii_tolower(c);
99 return tolower_l(c, locale);
102 return pg_tolower(c);
106 #define NextByte(p, plen) ((p)++, (plen)--)
108 /* Set up to compile like_match.c for multibyte characters */
109 #define CHAREQ(p1, p2) wchareq((p1), (p2))
110 #define NextChar(p, plen) \
111 do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
112 #define CopyAdvChar(dst, src, srclen) \
113 do { int __l = pg_mblen(src); \
116 *(dst)++ = *(src)++; \
119 #define MatchText MB_MatchText
120 #define do_like_escape MB_do_like_escape
122 #include "like_match.c"
124 /* Set up to compile like_match.c for single-byte characters */
125 #define CHAREQ(p1, p2) (*(p1) == *(p2))
126 #define NextChar(p, plen) NextByte((p), (plen))
127 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
129 #define MatchText SB_MatchText
130 #define do_like_escape SB_do_like_escape
132 #include "like_match.c"
134 /* setup to compile like_match.c for single byte case insensitive matches */
135 #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
136 #define NextChar(p, plen) NextByte((p), (plen))
137 #define MatchText SB_IMatchText
139 #include "like_match.c"
141 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
143 #define NextChar(p, plen) \
144 do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
145 #define MatchText UTF8_MatchText
147 #include "like_match.c"
149 /* Generic for all cases not requiring inline case-folding */
151 GenericMatchText(char *s, int slen, char *p, int plen)
153 if (pg_database_encoding_max_length() == 1)
154 return SB_MatchText(s, slen, p, plen, 0, true);
155 else if (GetDatabaseEncoding() == PG_UTF8)
156 return UTF8_MatchText(s, slen, p, plen, 0, true);
158 return MB_MatchText(s, slen, p, plen, 0, true);
162 Generic_Text_IC_like(text *str, text *pat, Oid collation)
170 * For efficiency reasons, in the single byte case we don't call lower()
171 * on the pattern and text, but instead call SB_lower_char on each
172 * character. In the multi-byte case we don't have much choice :-(
175 if (pg_database_encoding_max_length() > 1)
177 /* lower's result is never packed, so OK to use old macros here */
178 pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
179 PointerGetDatum(pat)));
180 p = VARDATA_ANY(pat);
181 plen = VARSIZE_ANY_EXHDR(pat);
182 str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
183 PointerGetDatum(str)));
184 s = VARDATA_ANY(str);
185 slen = VARSIZE_ANY_EXHDR(str);
186 if (GetDatabaseEncoding() == PG_UTF8)
187 return UTF8_MatchText(s, slen, p, plen, 0, true);
189 return MB_MatchText(s, slen, p, plen, 0, true);
194 * Here we need to prepare locale information for SB_lower_char. This
195 * should match the methods used in str_tolower().
197 pg_locale_t locale = 0;
198 bool locale_is_c = false;
200 if (lc_ctype_is_c(collation))
202 else if (collation != DEFAULT_COLLATION_OID)
204 if (!OidIsValid(collation))
207 * This typically means that the parser could not resolve a
208 * conflict of implicit collations, so report it that way.
211 (errcode(ERRCODE_INDETERMINATE_COLLATION),
212 errmsg("could not determine which collation to use for ILIKE"),
213 errhint("Use the COLLATE clause to set the collation explicitly.")));
215 locale = pg_newlocale_from_collation(collation);
218 p = VARDATA_ANY(pat);
219 plen = VARSIZE_ANY_EXHDR(pat);
220 s = VARDATA_ANY(str);
221 slen = VARSIZE_ANY_EXHDR(str);
222 return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
227 * interface routines called by the function manager
231 namelike(PG_FUNCTION_ARGS)
233 Name str = PG_GETARG_NAME(0);
234 text *pat = PG_GETARG_TEXT_PP(1);
243 p = VARDATA_ANY(pat);
244 plen = VARSIZE_ANY_EXHDR(pat);
246 result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
248 PG_RETURN_BOOL(result);
252 namenlike(PG_FUNCTION_ARGS)
254 Name str = PG_GETARG_NAME(0);
255 text *pat = PG_GETARG_TEXT_PP(1);
264 p = VARDATA_ANY(pat);
265 plen = VARSIZE_ANY_EXHDR(pat);
267 result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
269 PG_RETURN_BOOL(result);
273 textlike(PG_FUNCTION_ARGS)
275 text *str = PG_GETARG_TEXT_PP(0);
276 text *pat = PG_GETARG_TEXT_PP(1);
283 s = VARDATA_ANY(str);
284 slen = VARSIZE_ANY_EXHDR(str);
285 p = VARDATA_ANY(pat);
286 plen = VARSIZE_ANY_EXHDR(pat);
288 result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
290 PG_RETURN_BOOL(result);
294 textnlike(PG_FUNCTION_ARGS)
296 text *str = PG_GETARG_TEXT_PP(0);
297 text *pat = PG_GETARG_TEXT_PP(1);
304 s = VARDATA_ANY(str);
305 slen = VARSIZE_ANY_EXHDR(str);
306 p = VARDATA_ANY(pat);
307 plen = VARSIZE_ANY_EXHDR(pat);
309 result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
311 PG_RETURN_BOOL(result);
315 bytealike(PG_FUNCTION_ARGS)
317 bytea *str = PG_GETARG_BYTEA_PP(0);
318 bytea *pat = PG_GETARG_BYTEA_PP(1);
325 s = VARDATA_ANY(str);
326 slen = VARSIZE_ANY_EXHDR(str);
327 p = VARDATA_ANY(pat);
328 plen = VARSIZE_ANY_EXHDR(pat);
330 result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
332 PG_RETURN_BOOL(result);
336 byteanlike(PG_FUNCTION_ARGS)
338 bytea *str = PG_GETARG_BYTEA_PP(0);
339 bytea *pat = PG_GETARG_BYTEA_PP(1);
346 s = VARDATA_ANY(str);
347 slen = VARSIZE_ANY_EXHDR(str);
348 p = VARDATA_ANY(pat);
349 plen = VARSIZE_ANY_EXHDR(pat);
351 result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
353 PG_RETURN_BOOL(result);
357 * Case-insensitive versions
361 nameiclike(PG_FUNCTION_ARGS)
363 Name str = PG_GETARG_NAME(0);
364 text *pat = PG_GETARG_TEXT_PP(1);
368 strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
370 result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
372 PG_RETURN_BOOL(result);
376 nameicnlike(PG_FUNCTION_ARGS)
378 Name str = PG_GETARG_NAME(0);
379 text *pat = PG_GETARG_TEXT_PP(1);
383 strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
385 result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
387 PG_RETURN_BOOL(result);
391 texticlike(PG_FUNCTION_ARGS)
393 text *str = PG_GETARG_TEXT_PP(0);
394 text *pat = PG_GETARG_TEXT_PP(1);
397 result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
399 PG_RETURN_BOOL(result);
403 texticnlike(PG_FUNCTION_ARGS)
405 text *str = PG_GETARG_TEXT_PP(0);
406 text *pat = PG_GETARG_TEXT_PP(1);
409 result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
411 PG_RETURN_BOOL(result);
415 * like_escape() --- given a pattern and an ESCAPE string,
416 * convert the pattern to use Postgres' standard backslash escape convention.
419 like_escape(PG_FUNCTION_ARGS)
421 text *pat = PG_GETARG_TEXT_PP(0);
422 text *esc = PG_GETARG_TEXT_PP(1);
425 if (pg_database_encoding_max_length() == 1)
426 result = SB_do_like_escape(pat, esc);
428 result = MB_do_like_escape(pat, esc);
430 PG_RETURN_TEXT_P(result);
434 * like_escape_bytea() --- given a pattern and an ESCAPE string,
435 * convert the pattern to use Postgres' standard backslash escape convention.
438 like_escape_bytea(PG_FUNCTION_ARGS)
440 bytea *pat = PG_GETARG_BYTEA_PP(0);
441 bytea *esc = PG_GETARG_BYTEA_PP(1);
442 bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
444 PG_RETURN_BYTEA_P((bytea *) result);