1 /*-------------------------------------------------------------------------
4 * like expression handling code.
7 * A big hack of the regexp.c code!! Contributed by
8 * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
10 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
14 * $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.66 2006/10/04 00:29:59 momjian Exp $
16 *-------------------------------------------------------------------------
22 #include "mb/pg_wchar.h"
23 #include "utils/builtins.h"
28 #define LIKE_ABORT (-1)
31 static int MatchText(char *t, int tlen, char *p, int plen);
32 static int MatchTextIC(char *t, int tlen, char *p, int plen);
33 static int MatchBytea(char *t, int tlen, char *p, int plen);
34 static text *do_like_escape(text *, text *);
36 static int MBMatchText(char *t, int tlen, char *p, int plen);
37 static int MBMatchTextIC(char *t, int tlen, char *p, int plen);
38 static text *MB_do_like_escape(text *, text *);
40 /*--------------------
41 * Support routine for MatchText. Compares given multibyte streams
42 * as wide characters. If they match, returns 1 otherwise returns 0.
46 wchareq(char *p1, char *p2)
50 /* Optimization: quickly compare the first byte. */
54 p1_len = pg_mblen(p1);
55 if (pg_mblen(p2) != p1_len)
58 /* They are the same length */
68 * Formerly we had a routine iwchareq() here that tried to do case-insensitive
69 * comparison of multibyte characters. It did not work at all, however,
70 * because it relied on tolower() which has a single-byte API ... and
71 * towlower() wouldn't be much better since we have no suitably cheap way
72 * of getting a single character transformed to the system's wchar_t format.
73 * So now, we just downcase the strings using lower() and apply regular LIKE
74 * comparison. This should be revisited when we install better locale support.
76 * Note that MBMatchText and MBMatchTextIC do exactly the same thing now.
77 * Is it worth refactoring to avoid duplicated code? They might become
78 * different again in the future.
81 /* Set up to compile like_match.c for multibyte characters */
82 #define CHAREQ(p1, p2) wchareq(p1, p2)
83 #define ICHAREQ(p1, p2) wchareq(p1, p2)
84 #define NextChar(p, plen) \
85 do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
86 #define CopyAdvChar(dst, src, srclen) \
87 do { int __l = pg_mblen(src); \
90 *(dst)++ = *(src)++; \
93 #define MatchText MBMatchText
94 #define MatchTextIC MBMatchTextIC
95 #define do_like_escape MB_do_like_escape
97 #include "like_match.c"
105 #undef do_like_escape
107 /* Set up to compile like_match.c for single-byte characters */
108 #define CHAREQ(p1, p2) (*(p1) == *(p2))
109 #define ICHAREQ(p1, p2) (tolower((unsigned char) *(p1)) == tolower((unsigned char) *(p2)))
110 #define NextChar(p, plen) ((p)++, (plen)--)
111 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
113 #include "like_match.c"
115 /* And some support for BYTEA */
116 #define BYTEA_CHAREQ(p1, p2) (*(p1) == *(p2))
117 #define BYTEA_NextChar(p, plen) ((p)++, (plen)--)
118 #define BYTEA_CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
122 * interface routines called by the function manager
126 namelike(PG_FUNCTION_ARGS)
128 Name str = PG_GETARG_NAME(0);
129 text *pat = PG_GETARG_TEXT_P(1);
139 plen = (VARSIZE(pat) - VARHDRSZ);
141 if (pg_database_encoding_max_length() == 1)
142 result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
144 result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
146 PG_RETURN_BOOL(result);
150 namenlike(PG_FUNCTION_ARGS)
152 Name str = PG_GETARG_NAME(0);
153 text *pat = PG_GETARG_TEXT_P(1);
163 plen = (VARSIZE(pat) - VARHDRSZ);
165 if (pg_database_encoding_max_length() == 1)
166 result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
168 result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
170 PG_RETURN_BOOL(result);
174 textlike(PG_FUNCTION_ARGS)
176 text *str = PG_GETARG_TEXT_P(0);
177 text *pat = PG_GETARG_TEXT_P(1);
185 slen = (VARSIZE(str) - VARHDRSZ);
187 plen = (VARSIZE(pat) - VARHDRSZ);
189 if (pg_database_encoding_max_length() == 1)
190 result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
192 result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
194 PG_RETURN_BOOL(result);
198 textnlike(PG_FUNCTION_ARGS)
200 text *str = PG_GETARG_TEXT_P(0);
201 text *pat = PG_GETARG_TEXT_P(1);
209 slen = (VARSIZE(str) - VARHDRSZ);
211 plen = (VARSIZE(pat) - VARHDRSZ);
213 if (pg_database_encoding_max_length() == 1)
214 result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
216 result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
218 PG_RETURN_BOOL(result);
222 bytealike(PG_FUNCTION_ARGS)
224 bytea *str = PG_GETARG_BYTEA_P(0);
225 bytea *pat = PG_GETARG_BYTEA_P(1);
233 slen = (VARSIZE(str) - VARHDRSZ);
235 plen = (VARSIZE(pat) - VARHDRSZ);
237 result = (MatchBytea(s, slen, p, plen) == LIKE_TRUE);
239 PG_RETURN_BOOL(result);
243 byteanlike(PG_FUNCTION_ARGS)
245 bytea *str = PG_GETARG_BYTEA_P(0);
246 bytea *pat = PG_GETARG_BYTEA_P(1);
254 slen = (VARSIZE(str) - VARHDRSZ);
256 plen = (VARSIZE(pat) - VARHDRSZ);
258 result = (MatchBytea(s, slen, p, plen) != LIKE_TRUE);
260 PG_RETURN_BOOL(result);
264 * Case-insensitive versions
268 nameiclike(PG_FUNCTION_ARGS)
270 Name str = PG_GETARG_NAME(0);
271 text *pat = PG_GETARG_TEXT_P(1);
278 if (pg_database_encoding_max_length() == 1)
283 plen = (VARSIZE(pat) - VARHDRSZ);
284 result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
288 /* Force inputs to lower case to achieve case insensitivity */
291 strtext = DatumGetTextP(DirectFunctionCall1(name_text,
293 strtext = DatumGetTextP(DirectFunctionCall1(lower,
294 PointerGetDatum(strtext)));
295 pat = DatumGetTextP(DirectFunctionCall1(lower,
296 PointerGetDatum(pat)));
298 s = VARDATA(strtext);
299 slen = (VARSIZE(strtext) - VARHDRSZ);
301 plen = (VARSIZE(pat) - VARHDRSZ);
302 result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
305 PG_RETURN_BOOL(result);
309 nameicnlike(PG_FUNCTION_ARGS)
311 Name str = PG_GETARG_NAME(0);
312 text *pat = PG_GETARG_TEXT_P(1);
319 if (pg_database_encoding_max_length() == 1)
324 plen = (VARSIZE(pat) - VARHDRSZ);
325 result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
329 /* Force inputs to lower case to achieve case insensitivity */
332 strtext = DatumGetTextP(DirectFunctionCall1(name_text,
334 strtext = DatumGetTextP(DirectFunctionCall1(lower,
335 PointerGetDatum(strtext)));
336 pat = DatumGetTextP(DirectFunctionCall1(lower,
337 PointerGetDatum(pat)));
339 s = VARDATA(strtext);
340 slen = (VARSIZE(strtext) - VARHDRSZ);
342 plen = (VARSIZE(pat) - VARHDRSZ);
343 result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
346 PG_RETURN_BOOL(result);
350 texticlike(PG_FUNCTION_ARGS)
352 text *str = PG_GETARG_TEXT_P(0);
353 text *pat = PG_GETARG_TEXT_P(1);
360 if (pg_database_encoding_max_length() == 1)
363 slen = (VARSIZE(str) - VARHDRSZ);
365 plen = (VARSIZE(pat) - VARHDRSZ);
366 result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
370 /* Force inputs to lower case to achieve case insensitivity */
371 str = DatumGetTextP(DirectFunctionCall1(lower,
372 PointerGetDatum(str)));
373 pat = DatumGetTextP(DirectFunctionCall1(lower,
374 PointerGetDatum(pat)));
376 slen = (VARSIZE(str) - VARHDRSZ);
378 plen = (VARSIZE(pat) - VARHDRSZ);
379 result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
382 PG_RETURN_BOOL(result);
386 texticnlike(PG_FUNCTION_ARGS)
388 text *str = PG_GETARG_TEXT_P(0);
389 text *pat = PG_GETARG_TEXT_P(1);
396 if (pg_database_encoding_max_length() == 1)
399 slen = (VARSIZE(str) - VARHDRSZ);
401 plen = (VARSIZE(pat) - VARHDRSZ);
402 result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
406 /* Force inputs to lower case to achieve case insensitivity */
407 str = DatumGetTextP(DirectFunctionCall1(lower,
408 PointerGetDatum(str)));
409 pat = DatumGetTextP(DirectFunctionCall1(lower,
410 PointerGetDatum(pat)));
412 slen = (VARSIZE(str) - VARHDRSZ);
414 plen = (VARSIZE(pat) - VARHDRSZ);
415 result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
418 PG_RETURN_BOOL(result);
422 * like_escape() --- given a pattern and an ESCAPE string,
423 * convert the pattern to use Postgres' standard backslash escape convention.
426 like_escape(PG_FUNCTION_ARGS)
428 text *pat = PG_GETARG_TEXT_P(0);
429 text *esc = PG_GETARG_TEXT_P(1);
432 if (pg_database_encoding_max_length() == 1)
433 result = do_like_escape(pat, esc);
435 result = MB_do_like_escape(pat, esc);
437 PG_RETURN_TEXT_P(result);
441 * like_escape_bytea() --- given a pattern and an ESCAPE string,
442 * convert the pattern to use Postgres' standard backslash escape convention.
445 like_escape_bytea(PG_FUNCTION_ARGS)
447 bytea *pat = PG_GETARG_BYTEA_P(0);
448 bytea *esc = PG_GETARG_BYTEA_P(1);
458 plen = (VARSIZE(pat) - VARHDRSZ);
460 elen = (VARSIZE(esc) - VARHDRSZ);
463 * Worst-case pattern growth is 2x --- unlikely, but it's hardly worth
464 * trying to calculate the size more accurately than that.
466 result = (text *) palloc(plen * 2 + VARHDRSZ);
472 * No escape character is wanted. Double any backslashes in the
473 * pattern to make them act like ordinary characters.
479 BYTEA_CopyAdvChar(r, p, plen);
485 * The specified escape must be only a single character.
487 BYTEA_NextChar(e, elen);
490 (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
491 errmsg("invalid escape string"),
492 errhint("Escape string must be empty or one character.")));
497 * If specified escape is '\', just copy the pattern as-is.
501 memcpy(result, pat, VARSIZE(pat));
502 PG_RETURN_BYTEA_P(result);
506 * Otherwise, convert occurrences of the specified escape character to
507 * '\', and double occurrences of '\' --- unless they immediately
508 * follow an escape character!
513 if (BYTEA_CHAREQ(p, e) && !afterescape)
516 BYTEA_NextChar(p, plen);
524 BYTEA_NextChar(p, plen);
529 BYTEA_CopyAdvChar(r, p, plen);
535 VARATT_SIZEP(result) = r - ((char *) result);
537 PG_RETURN_BYTEA_P(result);
541 * Same as above, but specifically for bytea (binary) datatype
544 MatchBytea(char *t, int tlen, char *p, int plen)
546 /* Fast path for match-everything pattern */
547 if ((plen == 1) && (*p == '%'))
550 while ((tlen > 0) && (plen > 0))
554 /* Next pattern char must match literally, whatever it is */
555 BYTEA_NextChar(p, plen);
556 if ((plen <= 0) || !BYTEA_CHAREQ(t, p))
561 /* %% is the same as % according to the SQL standard */
562 /* Advance past all %'s */
563 while ((plen > 0) && (*p == '%'))
564 BYTEA_NextChar(p, plen);
565 /* Trailing percent matches everything. */
570 * Otherwise, scan for a text position at which we can match the
571 * rest of the pattern.
576 * Optimization to prevent most recursion: don't recurse
577 * unless first pattern char might match this text char.
579 if (BYTEA_CHAREQ(t, p) || (*p == '\\') || (*p == '_'))
581 int matched = MatchBytea(t, tlen, p, plen);
583 if (matched != LIKE_FALSE)
584 return matched; /* TRUE or ABORT */
587 BYTEA_NextChar(t, tlen);
591 * End of text with no match, so no point in trying later places
592 * to start matching this pattern.
596 else if ((*p != '_') && !BYTEA_CHAREQ(t, p))
599 * Not the single-character wildcard and no explicit match? Then
605 BYTEA_NextChar(t, tlen);
606 BYTEA_NextChar(p, plen);
610 return LIKE_FALSE; /* end of pattern, but not of text */
612 /* End of input string. Do we have matching pattern remaining? */
613 while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of
615 BYTEA_NextChar(p, plen);
620 * End of text with no match, so no point in trying later places to start
621 * matching this pattern.