* A big hack of the regexp.c code!! Contributed by
* Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
*
- * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/like.c,v 1.47 2001/10/04 02:15:47 ishii Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.66 2006/10/04 00:29:59 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include <ctype.h>
-#ifdef MULTIBYTE
#include "mb/pg_wchar.h"
-#endif
#include "utils/builtins.h"
#define LIKE_ABORT (-1)
-static int MatchText(unsigned char *t, int tlen,
- unsigned char *p, int plen);
-static int MatchTextIC(unsigned char *t, int tlen,
- unsigned char *p, int plen);
-static int MatchBytea(unsigned char *t, int tlen,
- unsigned char *p, int plen);
+static int MatchText(char *t, int tlen, char *p, int plen);
+static int MatchTextIC(char *t, int tlen, char *p, int plen);
+static int MatchBytea(char *t, int tlen, char *p, int plen);
static text *do_like_escape(text *, text *);
-#ifdef MULTIBYTE
-static int MBMatchText(unsigned char *t, int tlen,
- unsigned char *p, int plen);
-static int MBMatchTextIC(unsigned char *t, int tlen,
- unsigned char *p, int plen);
+static int MBMatchText(char *t, int tlen, char *p, int plen);
+static int MBMatchTextIC(char *t, int tlen, char *p, int plen);
static text *MB_do_like_escape(text *, text *);
/*--------------------
*--------------------
*/
static int
-wchareq(unsigned char *p1, unsigned char *p2)
+wchareq(char *p1, char *p2)
{
- int l;
+ int p1_len;
- l = pg_mblen(p1);
- if (pg_mblen(p2) != l)
- return (0);
- while (l--)
+ /* Optimization: quickly compare the first byte. */
+ if (*p1 != *p2)
+ return 0;
+
+ p1_len = pg_mblen(p1);
+ if (pg_mblen(p2) != p1_len)
+ return 0;
+
+ /* They are the same length */
+ while (p1_len--)
{
if (*p1++ != *p2++)
- return (0);
+ return 0;
}
- return (1);
+ return 1;
}
-/*--------------------
- * Support routine for MatchTextIC. Compares given multibyte streams
- * as wide characters ignoring case.
- * If they match, returns 1 otherwise returns 0.
- *--------------------
+/*
+ * Formerly we had a routine iwchareq() here that tried to do case-insensitive
+ * comparison of multibyte characters. It did not work at all, however,
+ * because it relied on tolower() which has a single-byte API ... and
+ * towlower() wouldn't be much better since we have no suitably cheap way
+ * of getting a single character transformed to the system's wchar_t format.
+ * So now, we just downcase the strings using lower() and apply regular LIKE
+ * comparison. This should be revisited when we install better locale support.
+ *
+ * Note that MBMatchText and MBMatchTextIC do exactly the same thing now.
+ * Is it worth refactoring to avoid duplicated code? They might become
+ * different again in the future.
*/
-#define CHARMAX 0x80
-
-static int
-iwchareq(unsigned char *p1, unsigned char *p2)
-{
- int c1[2],
- c2[2];
- int l;
-
- /*
- * short cut. if *p1 and *p2 is lower than CHARMAX, then we could
- * assume they are ASCII
- */
- if (*p1 < CHARMAX && *p2 < CHARMAX)
- return (tolower(*p1) == tolower(*p2));
-
- /*
- * if one of them is an ASCII while the other is not, then they must
- * be different characters
- */
- else if (*p1 < CHARMAX || *p2 < CHARMAX)
- return (0);
-
- /*
- * ok, p1 and p2 are both > CHARMAX, then they must be multi-byte
- * characters
- */
- l = pg_mblen(p1);
- (void) pg_mb2wchar_with_len(p1, (pg_wchar *) c1, l);
- c1[0] = tolower(c1[0]);
- l = pg_mblen(p2);
- (void) pg_mb2wchar_with_len(p2, (pg_wchar *) c2, l);
- c2[0] = tolower(c2[0]);
- return (c1[0] == c2[0]);
-}
-
-#endif
-#ifdef MULTIBYTE
+/* Set up to compile like_match.c for multibyte characters */
#define CHAREQ(p1, p2) wchareq(p1, p2)
-#define ICHAREQ(p1, p2) iwchareq(p1, p2)
+#define ICHAREQ(p1, p2) wchareq(p1, p2)
#define NextChar(p, plen) \
do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
#define CopyAdvChar(dst, src, srclen) \
} while (0)
#define MatchText MBMatchText
-#define MatchTextIC MBMatchTextIC
+#define MatchTextIC MBMatchTextIC
#define do_like_escape MB_do_like_escape
+
#include "like_match.c"
+
#undef CHAREQ
#undef ICHAREQ
#undef NextChar
#undef MatchText
#undef MatchTextIC
#undef do_like_escape
-#endif
+/* Set up to compile like_match.c for single-byte characters */
#define CHAREQ(p1, p2) (*(p1) == *(p2))
-#define ICHAREQ(p1, p2) (tolower(*(p1)) == tolower(*(p2)))
+#define ICHAREQ(p1, p2) (tolower((unsigned char) *(p1)) == tolower((unsigned char) *(p2)))
#define NextChar(p, plen) ((p)++, (plen)--)
#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
+#include "like_match.c"
+
+/* And some support for BYTEA */
#define BYTEA_CHAREQ(p1, p2) (*(p1) == *(p2))
#define BYTEA_NextChar(p, plen) ((p)++, (plen)--)
#define BYTEA_CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
-#include "like_match.c"
+
/*
* interface routines called by the function manager
Name str = PG_GETARG_NAME(0);
text *pat = PG_GETARG_TEXT_P(1);
bool result;
- unsigned char *s,
+ char *s,
*p;
int slen,
plen;
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
-#ifdef MULTIBYTE
if (pg_database_encoding_max_length() == 1)
- result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
+ result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
else
- result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
-#else
- result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
-#endif
+ result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
PG_RETURN_BOOL(result);
}
Name str = PG_GETARG_NAME(0);
text *pat = PG_GETARG_TEXT_P(1);
bool result;
- unsigned char *s,
+ char *s,
*p;
int slen,
plen;
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
-#ifdef MULTIBYTE
if (pg_database_encoding_max_length() == 1)
- result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
+ result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
else
- result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
-#else
- result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
-#endif
+ result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
PG_RETURN_BOOL(result);
}
text *str = PG_GETARG_TEXT_P(0);
text *pat = PG_GETARG_TEXT_P(1);
bool result;
- unsigned char *s,
+ char *s,
*p;
int slen,
plen;
+
s = VARDATA(str);
slen = (VARSIZE(str) - VARHDRSZ);
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
-#ifdef MULTIBYTE
if (pg_database_encoding_max_length() == 1)
- result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
+ result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
else
- result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
-#else
- result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
-#endif
+ result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
PG_RETURN_BOOL(result);
}
text *str = PG_GETARG_TEXT_P(0);
text *pat = PG_GETARG_TEXT_P(1);
bool result;
- unsigned char *s,
+ char *s,
*p;
int slen,
plen;
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
-#ifdef MULTIBYTE
if (pg_database_encoding_max_length() == 1)
- result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
+ result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
else
- result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
-#else
- result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
-#endif
+ result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
PG_RETURN_BOOL(result);
}
bytea *str = PG_GETARG_BYTEA_P(0);
bytea *pat = PG_GETARG_BYTEA_P(1);
bool result;
- unsigned char *s,
+ char *s,
*p;
int slen,
plen;
bytea *str = PG_GETARG_BYTEA_P(0);
bytea *pat = PG_GETARG_BYTEA_P(1);
bool result;
- unsigned char *s,
+ char *s,
*p;
int slen,
plen;
Name str = PG_GETARG_NAME(0);
text *pat = PG_GETARG_TEXT_P(1);
bool result;
- unsigned char *s,
+ char *s,
*p;
int slen,
plen;
- s = NameStr(*str);
- slen = strlen(s);
- p = VARDATA(pat);
- plen = (VARSIZE(pat) - VARHDRSZ);
-
-#ifdef MULTIBYTE
if (pg_database_encoding_max_length() == 1)
- result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+ {
+ s = NameStr(*str);
+ slen = strlen(s);
+ p = VARDATA(pat);
+ plen = (VARSIZE(pat) - VARHDRSZ);
+ result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+ }
else
- result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
-#else
- result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
-#endif
+ {
+ /* Force inputs to lower case to achieve case insensitivity */
+ text *strtext;
+
+ strtext = DatumGetTextP(DirectFunctionCall1(name_text,
+ NameGetDatum(str)));
+ strtext = DatumGetTextP(DirectFunctionCall1(lower,
+ PointerGetDatum(strtext)));
+ pat = DatumGetTextP(DirectFunctionCall1(lower,
+ PointerGetDatum(pat)));
+
+ s = VARDATA(strtext);
+ slen = (VARSIZE(strtext) - VARHDRSZ);
+ p = VARDATA(pat);
+ plen = (VARSIZE(pat) - VARHDRSZ);
+ result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+ }
PG_RETURN_BOOL(result);
}
Name str = PG_GETARG_NAME(0);
text *pat = PG_GETARG_TEXT_P(1);
bool result;
- unsigned char *s,
+ char *s,
*p;
int slen,
plen;
- s = NameStr(*str);
- slen = strlen(s);
- p = VARDATA(pat);
- plen = (VARSIZE(pat) - VARHDRSZ);
-
-#ifdef MULTIBYTE
if (pg_database_encoding_max_length() == 1)
- result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+ {
+ s = NameStr(*str);
+ slen = strlen(s);
+ p = VARDATA(pat);
+ plen = (VARSIZE(pat) - VARHDRSZ);
+ result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+ }
else
- result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
-#else
- result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
-#endif
+ {
+ /* Force inputs to lower case to achieve case insensitivity */
+ text *strtext;
+
+ strtext = DatumGetTextP(DirectFunctionCall1(name_text,
+ NameGetDatum(str)));
+ strtext = DatumGetTextP(DirectFunctionCall1(lower,
+ PointerGetDatum(strtext)));
+ pat = DatumGetTextP(DirectFunctionCall1(lower,
+ PointerGetDatum(pat)));
+
+ s = VARDATA(strtext);
+ slen = (VARSIZE(strtext) - VARHDRSZ);
+ p = VARDATA(pat);
+ plen = (VARSIZE(pat) - VARHDRSZ);
+ result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+ }
PG_RETURN_BOOL(result);
}
text *str = PG_GETARG_TEXT_P(0);
text *pat = PG_GETARG_TEXT_P(1);
bool result;
- unsigned char *s,
+ char *s,
*p;
int slen,
plen;
- s = VARDATA(str);
- slen = (VARSIZE(str) - VARHDRSZ);
- p = VARDATA(pat);
- plen = (VARSIZE(pat) - VARHDRSZ);
-
-#ifdef MULTIBYTE
if (pg_database_encoding_max_length() == 1)
- result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+ {
+ s = VARDATA(str);
+ slen = (VARSIZE(str) - VARHDRSZ);
+ p = VARDATA(pat);
+ plen = (VARSIZE(pat) - VARHDRSZ);
+ result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+ }
else
- result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
-#else
- result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
-#endif
+ {
+ /* Force inputs to lower case to achieve case insensitivity */
+ str = DatumGetTextP(DirectFunctionCall1(lower,
+ PointerGetDatum(str)));
+ pat = DatumGetTextP(DirectFunctionCall1(lower,
+ PointerGetDatum(pat)));
+ s = VARDATA(str);
+ slen = (VARSIZE(str) - VARHDRSZ);
+ p = VARDATA(pat);
+ plen = (VARSIZE(pat) - VARHDRSZ);
+ result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+ }
PG_RETURN_BOOL(result);
}
text *str = PG_GETARG_TEXT_P(0);
text *pat = PG_GETARG_TEXT_P(1);
bool result;
- unsigned char *s,
+ char *s,
*p;
int slen,
plen;
- s = VARDATA(str);
- slen = (VARSIZE(str) - VARHDRSZ);
- p = VARDATA(pat);
- plen = (VARSIZE(pat) - VARHDRSZ);
-
-#ifdef MULTIBYTE
if (pg_database_encoding_max_length() == 1)
- result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+ {
+ s = VARDATA(str);
+ slen = (VARSIZE(str) - VARHDRSZ);
+ p = VARDATA(pat);
+ plen = (VARSIZE(pat) - VARHDRSZ);
+ result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+ }
else
- result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
-#else
- result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
-#endif
+ {
+ /* Force inputs to lower case to achieve case insensitivity */
+ str = DatumGetTextP(DirectFunctionCall1(lower,
+ PointerGetDatum(str)));
+ pat = DatumGetTextP(DirectFunctionCall1(lower,
+ PointerGetDatum(pat)));
+ s = VARDATA(str);
+ slen = (VARSIZE(str) - VARHDRSZ);
+ p = VARDATA(pat);
+ plen = (VARSIZE(pat) - VARHDRSZ);
+ result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+ }
PG_RETURN_BOOL(result);
}
text *esc = PG_GETARG_TEXT_P(1);
text *result;
-#ifdef MULTIBYTE
if (pg_database_encoding_max_length() == 1)
- result = do_like_escape(pat, esc);
+ result = do_like_escape(pat, esc);
else
- result = MB_do_like_escape(pat, esc);
-#else
- result = do_like_escape(pat, esc);
-#endif
+ result = MB_do_like_escape(pat, esc);
PG_RETURN_TEXT_P(result);
}
bytea *pat = PG_GETARG_BYTEA_P(0);
bytea *esc = PG_GETARG_BYTEA_P(1);
bytea *result;
- unsigned char *p,
+ char *p,
*e,
*r;
int plen,
if (elen == 0)
{
-
/*
* No escape character is wanted. Double any backslashes in the
* pattern to make them act like ordinary characters.
}
else
{
-
/*
* The specified escape must be only a single character.
*/
BYTEA_NextChar(e, elen);
if (elen != 0)
- elog(ERROR, "ESCAPE string must be empty or one character");
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+ errmsg("invalid escape string"),
+ errhint("Escape string must be empty or one character.")));
+
e = VARDATA(esc);
/*
}
/*
- * Otherwise, convert occurrences of the specified escape
- * character to '\', and double occurrences of '\' --- unless they
- * immediately follow an escape character!
+ * Otherwise, convert occurrences of the specified escape character to
+ * '\', and double occurrences of '\' --- unless they immediately
+ * follow an escape character!
*/
afterescape = false;
while (plen > 0)
}
}
- VARATT_SIZEP(result) = r - ((unsigned char *) result);
+ VARATT_SIZEP(result) = r - ((char *) result);
PG_RETURN_BYTEA_P(result);
}
* Same as above, but specifically for bytea (binary) datatype
*/
static int
-MatchBytea(unsigned char *t, int tlen, unsigned char *p, int plen)
+MatchBytea(char *t, int tlen, char *p, int plen)
{
/* Fast path for match-everything pattern */
if ((plen == 1) && (*p == '%'))
return LIKE_TRUE;
/*
- * Otherwise, scan for a text position at which we can match
- * the rest of the pattern.
+ * Otherwise, scan for a text position at which we can match the
+ * rest of the pattern.
*/
while (tlen > 0)
{
-
/*
* Optimization to prevent most recursion: don't recurse
* unless first pattern char might match this text char.
}
/*
- * End of text with no match, so no point in trying later
- * places to start matching this pattern.
+ * End of text with no match, so no point in trying later places
+ * to start matching this pattern.
*/
return LIKE_ABORT;
}
else if ((*p != '_') && !BYTEA_CHAREQ(t, p))
{
-
/*
- * Not the single-character wildcard and no explicit match?
- * Then time to quit...
+ * Not the single-character wildcard and no explicit match? Then
+ * time to quit...
*/
return LIKE_FALSE;
}
return LIKE_TRUE;
/*
- * End of text with no match, so no point in trying later places to
- * start matching this pattern.
+ * End of text with no match, so no point in trying later places to start
+ * matching this pattern.
*/
return LIKE_ABORT;
} /* MatchBytea() */