]> granicus.if.org Git - postgresql/commitdiff
Improve efficiency of LIKE/ILIKE code, especially for multi-byte charsets,
authorAndrew Dunstan <andrew@dunslane.net>
Sat, 2 Jun 2007 02:03:42 +0000 (02:03 +0000)
committerAndrew Dunstan <andrew@dunslane.net>
Sat, 2 Jun 2007 02:03:42 +0000 (02:03 +0000)
and most especially for UTF8. Remove unnecessary special cases for bytea
processing and single-byte charset ILIKE.  a ILIKE b is now processed as
lower(a) LIKE lower(b) in all cases. The code is now considerably simpler. All
comparisons are now performed byte-wise, and the text and pattern are also
advanced byte-wise where it is safe to do so - essentially where a wildcard is
not being matched.
Andrew Dunstan, from an original patch by ITAGAKI Takahiro, with ideas from
Tom Lane and Mark Mielke.

src/backend/utils/adt/like.c
src/backend/utils/adt/like_match.c

index 46f223b38ffd1519b941ddd4d1254d5a6532eab4..de5d7e7c8599344588a15ab3d7425745892d4b31 100644 (file)
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *     $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.68 2007/02/27 23:48:08 tgl Exp $
+ *     $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.69 2007/06/02 02:03:42 adunstan Exp $
  *
  *-------------------------------------------------------------------------
  */
 #define LIKE_ABORT                                             (-1)
 
 
-static int     MatchText(char *t, int tlen, char *p, int plen);
-static int     MatchTextIC(char *t, int tlen, char *p, int plen);
-static int     MatchBytea(char *t, int tlen, char *p, int plen);
-static text *do_like_escape(text *, text *);
+static int     SB_MatchText(char *t, int tlen, char *p, int plen);
+static text *SB_do_like_escape(text *, text *);
 
-static int     MBMatchText(char *t, int tlen, char *p, int plen);
-static int     MBMatchTextIC(char *t, int tlen, char *p, int plen);
+static int     MB_MatchText(char *t, int tlen, char *p, int plen);
 static text *MB_do_like_escape(text *, text *);
 
+static int     UTF8_MatchText(char *t, int tlen, char *p, int plen);
+
+static int     GenericMatchText(char *s, int slen, char* p, int plen);
+static int     Generic_Text_IC_like(text *str, text *pat);
+
 /*--------------------
  * Support routine for MatchText. Compares given multibyte streams
  * as wide characters. If they match, returns 1 otherwise returns 0.
  *--------------------
  */
-static int
+static inline int
 wchareq(char *p1, char *p2)
 {
        int                     p1_len;
@@ -72,15 +74,12 @@ wchareq(char *p1, char *p2)
  * of getting a single character transformed to the system's wchar_t format.
  * So now, we just downcase the strings using lower() and apply regular LIKE
  * comparison. This should be revisited when we install better locale support.
- *
- * Note that MBMatchText and MBMatchTextIC do exactly the same thing now.
- * Is it worth refactoring to avoid duplicated code?  They might become
- * different again in the future.
  */
 
+#define NextByte(p, plen)      ((p)++, (plen)--)
+
 /* Set up to compile like_match.c for multibyte characters */
-#define CHAREQ(p1, p2) wchareq(p1, p2)
-#define ICHAREQ(p1, p2) wchareq(p1, p2)
+#define CHAREQ(p1, p2) wchareq((p1), (p2))
 #define NextChar(p, plen) \
        do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
 #define CopyAdvChar(dst, src, srclen) \
@@ -90,33 +89,59 @@ wchareq(char *p1, char *p2)
                         *(dst)++ = *(src)++; \
           } while (0)
 
-#define MatchText      MBMatchText
-#define MatchTextIC MBMatchTextIC
+#define MatchText      MB_MatchText
 #define do_like_escape MB_do_like_escape
 
 #include "like_match.c"
 
-#undef CHAREQ
-#undef ICHAREQ
-#undef NextChar
-#undef CopyAdvChar
-#undef MatchText
-#undef MatchTextIC
-#undef do_like_escape
-
 /* Set up to compile like_match.c for single-byte characters */
 #define CHAREQ(p1, p2) (*(p1) == *(p2))
-#define ICHAREQ(p1, p2) (tolower((unsigned char) *(p1)) == tolower((unsigned char) *(p2)))
-#define NextChar(p, plen) ((p)++, (plen)--)
+#define NextChar(p, plen) NextByte((p), (plen))
 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
 
+#define MatchText      SB_MatchText
+#define do_like_escape SB_do_like_escape
+
+#include "like_match.c"
+
+
+/* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
+
+#define NextChar(p, plen) \
+       do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 ) 
+#define MatchText      UTF8_MatchText
+
 #include "like_match.c"
 
-/* And some support for BYTEA */
-#define BYTEA_CHAREQ(p1, p2) (*(p1) == *(p2))
-#define BYTEA_NextChar(p, plen) ((p)++, (plen)--)
-#define BYTEA_CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
+static inline int
+GenericMatchText(char *s, int slen, char* p, int plen)
+{
+       if (pg_database_encoding_max_length() == 1)
+               return SB_MatchText(s, slen, p, plen);
+       else if (GetDatabaseEncoding() == PG_UTF8)
+               return UTF8_MatchText(s, slen, p, plen);
+       else
+               return MB_MatchText(s, slen, p, plen);
+}
+
+static inline int
+Generic_Text_IC_like(text *str, text *pat)
+{
+       char       *s,
+                          *p;
+       int                     slen,
+                               plen;
+
+       /* Force inputs to lower case to achieve case insensitivity */
+       str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str)));
+       pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat)));
+       s = VARDATA(str);
+       slen = (VARSIZE(str) - VARHDRSZ);
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
 
+       return GenericMatchText(s, slen, p, plen);
+}
 
 /*
  *     interface routines called by the function manager
@@ -138,10 +163,7 @@ namelike(PG_FUNCTION_ARGS)
        p = VARDATA(pat);
        plen = (VARSIZE(pat) - VARHDRSZ);
 
-       if (pg_database_encoding_max_length() == 1)
-               result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
-       else
-               result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
+       result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -162,10 +184,7 @@ namenlike(PG_FUNCTION_ARGS)
        p = VARDATA(pat);
        plen = (VARSIZE(pat) - VARHDRSZ);
 
-       if (pg_database_encoding_max_length() == 1)
-               result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
-       else
-               result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
+       result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -186,10 +205,7 @@ textlike(PG_FUNCTION_ARGS)
        p = VARDATA(pat);
        plen = (VARSIZE(pat) - VARHDRSZ);
 
-       if (pg_database_encoding_max_length() == 1)
-               result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
-       else
-               result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
+       result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -210,10 +226,7 @@ textnlike(PG_FUNCTION_ARGS)
        p = VARDATA(pat);
        plen = (VARSIZE(pat) - VARHDRSZ);
 
-       if (pg_database_encoding_max_length() == 1)
-               result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
-       else
-               result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
+       result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -234,7 +247,7 @@ bytealike(PG_FUNCTION_ARGS)
        p = VARDATA(pat);
        plen = (VARSIZE(pat) - VARHDRSZ);
 
-       result = (MatchBytea(s, slen, p, plen) == LIKE_TRUE);
+       result = (SB_MatchText(s, slen, p, plen) == LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -255,7 +268,7 @@ byteanlike(PG_FUNCTION_ARGS)
        p = VARDATA(pat);
        plen = (VARSIZE(pat) - VARHDRSZ);
 
-       result = (MatchBytea(s, slen, p, plen) != LIKE_TRUE);
+       result = (SB_MatchText(s, slen, p, plen) != LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -270,37 +283,11 @@ nameiclike(PG_FUNCTION_ARGS)
        Name            str = PG_GETARG_NAME(0);
        text       *pat = PG_GETARG_TEXT_P(1);
        bool            result;
-       char       *s,
-                          *p;
-       int                     slen,
-                               plen;
-
-       if (pg_database_encoding_max_length() == 1)
-       {
-               s = NameStr(*str);
-               slen = strlen(s);
-               p = VARDATA(pat);
-               plen = (VARSIZE(pat) - VARHDRSZ);
-               result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
-       }
-       else
-       {
-               /* Force inputs to lower case to achieve case insensitivity */
-               text       *strtext;
+       text       *strtext;
 
-               strtext = DatumGetTextP(DirectFunctionCall1(name_text,
+       strtext = DatumGetTextP(DirectFunctionCall1(name_text,
                                                                                                        NameGetDatum(str)));
-               strtext = DatumGetTextP(DirectFunctionCall1(lower,
-                                                                                                 PointerGetDatum(strtext)));
-               pat = DatumGetTextP(DirectFunctionCall1(lower,
-                                                                                               PointerGetDatum(pat)));
-
-               s = VARDATA(strtext);
-               slen = (VARSIZE(strtext) - VARHDRSZ);
-               p = VARDATA(pat);
-               plen = (VARSIZE(pat) - VARHDRSZ);
-               result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
-       }
+       result = (Generic_Text_IC_like(strtext, pat) == LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -311,37 +298,11 @@ nameicnlike(PG_FUNCTION_ARGS)
        Name            str = PG_GETARG_NAME(0);
        text       *pat = PG_GETARG_TEXT_P(1);
        bool            result;
-       char       *s,
-                          *p;
-       int                     slen,
-                               plen;
-
-       if (pg_database_encoding_max_length() == 1)
-       {
-               s = NameStr(*str);
-               slen = strlen(s);
-               p = VARDATA(pat);
-               plen = (VARSIZE(pat) - VARHDRSZ);
-               result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
-       }
-       else
-       {
-               /* Force inputs to lower case to achieve case insensitivity */
-               text       *strtext;
+       text       *strtext;
 
-               strtext = DatumGetTextP(DirectFunctionCall1(name_text,
+       strtext = DatumGetTextP(DirectFunctionCall1(name_text,
                                                                                                        NameGetDatum(str)));
-               strtext = DatumGetTextP(DirectFunctionCall1(lower,
-                                                                                                 PointerGetDatum(strtext)));
-               pat = DatumGetTextP(DirectFunctionCall1(lower,
-                                                                                               PointerGetDatum(pat)));
-
-               s = VARDATA(strtext);
-               slen = (VARSIZE(strtext) - VARHDRSZ);
-               p = VARDATA(pat);
-               plen = (VARSIZE(pat) - VARHDRSZ);
-               result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
-       }
+       result = (Generic_Text_IC_like(strtext, pat) != LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -352,32 +313,8 @@ texticlike(PG_FUNCTION_ARGS)
        text       *str = PG_GETARG_TEXT_P(0);
        text       *pat = PG_GETARG_TEXT_P(1);
        bool            result;
-       char       *s,
-                          *p;
-       int                     slen,
-                               plen;
 
-       if (pg_database_encoding_max_length() == 1)
-       {
-               s = VARDATA(str);
-               slen = (VARSIZE(str) - VARHDRSZ);
-               p = VARDATA(pat);
-               plen = (VARSIZE(pat) - VARHDRSZ);
-               result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
-       }
-       else
-       {
-               /* Force inputs to lower case to achieve case insensitivity */
-               str = DatumGetTextP(DirectFunctionCall1(lower,
-                                                                                               PointerGetDatum(str)));
-               pat = DatumGetTextP(DirectFunctionCall1(lower,
-                                                                                               PointerGetDatum(pat)));
-               s = VARDATA(str);
-               slen = (VARSIZE(str) - VARHDRSZ);
-               p = VARDATA(pat);
-               plen = (VARSIZE(pat) - VARHDRSZ);
-               result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
-       }
+       result = (Generic_Text_IC_like(str, pat) == LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -388,32 +325,8 @@ texticnlike(PG_FUNCTION_ARGS)
        text       *str = PG_GETARG_TEXT_P(0);
        text       *pat = PG_GETARG_TEXT_P(1);
        bool            result;
-       char       *s,
-                          *p;
-       int                     slen,
-                               plen;
 
-       if (pg_database_encoding_max_length() == 1)
-       {
-               s = VARDATA(str);
-               slen = (VARSIZE(str) - VARHDRSZ);
-               p = VARDATA(pat);
-               plen = (VARSIZE(pat) - VARHDRSZ);
-               result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
-       }
-       else
-       {
-               /* Force inputs to lower case to achieve case insensitivity */
-               str = DatumGetTextP(DirectFunctionCall1(lower,
-                                                                                               PointerGetDatum(str)));
-               pat = DatumGetTextP(DirectFunctionCall1(lower,
-                                                                                               PointerGetDatum(pat)));
-               s = VARDATA(str);
-               slen = (VARSIZE(str) - VARHDRSZ);
-               p = VARDATA(pat);
-               plen = (VARSIZE(pat) - VARHDRSZ);
-               result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
-       }
+       result = (Generic_Text_IC_like(str, pat) != LIKE_TRUE);
 
        PG_RETURN_BOOL(result);
 }
@@ -430,7 +343,7 @@ like_escape(PG_FUNCTION_ARGS)
        text       *result;
 
        if (pg_database_encoding_max_length() == 1)
-               result = do_like_escape(pat, esc);
+               result = SB_do_like_escape(pat, esc);
        else
                result = MB_do_like_escape(pat, esc);
 
@@ -446,179 +359,8 @@ like_escape_bytea(PG_FUNCTION_ARGS)
 {
        bytea      *pat = PG_GETARG_BYTEA_P(0);
        bytea      *esc = PG_GETARG_BYTEA_P(1);
-       bytea      *result;
-       char       *p,
-                          *e,
-                          *r;
-       int                     plen,
-                               elen;
-       bool            afterescape;
-
-       p = VARDATA(pat);
-       plen = (VARSIZE(pat) - VARHDRSZ);
-       e = VARDATA(esc);
-       elen = (VARSIZE(esc) - VARHDRSZ);
-
-       /*
-        * Worst-case pattern growth is 2x --- unlikely, but it's hardly worth
-        * trying to calculate the size more accurately than that.
-        */
-       result = (text *) palloc(plen * 2 + VARHDRSZ);
-       r = VARDATA(result);
-
-       if (elen == 0)
-       {
-               /*
-                * No escape character is wanted.  Double any backslashes in the
-                * pattern to make them act like ordinary characters.
-                */
-               while (plen > 0)
-               {
-                       if (*p == '\\')
-                               *r++ = '\\';
-                       BYTEA_CopyAdvChar(r, p, plen);
-               }
-       }
-       else
-       {
-               /*
-                * The specified escape must be only a single character.
-                */
-               BYTEA_NextChar(e, elen);
-               if (elen != 0)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
-                                        errmsg("invalid escape string"),
-                                 errhint("Escape string must be empty or one character.")));
-
-               e = VARDATA(esc);
-
-               /*
-                * If specified escape is '\', just copy the pattern as-is.
-                */
-               if (*e == '\\')
-               {
-                       memcpy(result, pat, VARSIZE(pat));
-                       PG_RETURN_BYTEA_P(result);
-               }
-
-               /*
-                * Otherwise, convert occurrences of the specified escape character to
-                * '\', and double occurrences of '\' --- unless they immediately
-                * follow an escape character!
-                */
-               afterescape = false;
-               while (plen > 0)
-               {
-                       if (BYTEA_CHAREQ(p, e) && !afterescape)
-                       {
-                               *r++ = '\\';
-                               BYTEA_NextChar(p, plen);
-                               afterescape = true;
-                       }
-                       else if (*p == '\\')
-                       {
-                               *r++ = '\\';
-                               if (!afterescape)
-                                       *r++ = '\\';
-                               BYTEA_NextChar(p, plen);
-                               afterescape = false;
-                       }
-                       else
-                       {
-                               BYTEA_CopyAdvChar(r, p, plen);
-                               afterescape = false;
-                       }
-               }
-       }
-
-       SET_VARSIZE(result, r - ((char *) result));
+       bytea      *result = SB_do_like_escape((text *)pat, (text *)esc);
 
-       PG_RETURN_BYTEA_P(result);
+       PG_RETURN_BYTEA_P((bytea *)result);
 }
 
-/*
- * Same as above, but specifically for bytea (binary) datatype
- */
-static int
-MatchBytea(char *t, int tlen, char *p, int plen)
-{
-       /* Fast path for match-everything pattern */
-       if ((plen == 1) && (*p == '%'))
-               return LIKE_TRUE;
-
-       while ((tlen > 0) && (plen > 0))
-       {
-               if (*p == '\\')
-               {
-                       /* Next pattern char must match literally, whatever it is */
-                       BYTEA_NextChar(p, plen);
-                       if ((plen <= 0) || !BYTEA_CHAREQ(t, p))
-                               return LIKE_FALSE;
-               }
-               else if (*p == '%')
-               {
-                       /* %% is the same as % according to the SQL standard */
-                       /* Advance past all %'s */
-                       while ((plen > 0) && (*p == '%'))
-                               BYTEA_NextChar(p, plen);
-                       /* Trailing percent matches everything. */
-                       if (plen <= 0)
-                               return LIKE_TRUE;
-
-                       /*
-                        * Otherwise, scan for a text position at which we can match the
-                        * rest of the pattern.
-                        */
-                       while (tlen > 0)
-                       {
-                               /*
-                                * Optimization to prevent most recursion: don't recurse
-                                * unless first pattern char might match this text char.
-                                */
-                               if (BYTEA_CHAREQ(t, p) || (*p == '\\') || (*p == '_'))
-                               {
-                                       int                     matched = MatchBytea(t, tlen, p, plen);
-
-                                       if (matched != LIKE_FALSE)
-                                               return matched; /* TRUE or ABORT */
-                               }
-
-                               BYTEA_NextChar(t, tlen);
-                       }
-
-                       /*
-                        * End of text with no match, so no point in trying later places
-                        * to start matching this pattern.
-                        */
-                       return LIKE_ABORT;
-               }
-               else if ((*p != '_') && !BYTEA_CHAREQ(t, p))
-               {
-                       /*
-                        * Not the single-character wildcard and no explicit match? Then
-                        * time to quit...
-                        */
-                       return LIKE_FALSE;
-               }
-
-               BYTEA_NextChar(t, tlen);
-               BYTEA_NextChar(p, plen);
-       }
-
-       if (tlen > 0)
-               return LIKE_FALSE;              /* end of pattern, but not of text */
-
-       /* End of input string.  Do we have matching pattern remaining? */
-       while ((plen > 0) && (*p == '%'))       /* allow multiple %'s at end of
-                                                                                * pattern */
-               BYTEA_NextChar(p, plen);
-       if (plen <= 0)
-               return LIKE_TRUE;
-
-       /*
-        * End of text with no match, so no point in trying later places to start
-        * matching this pattern.
-        */
-       return LIKE_ABORT;
-}      /* MatchBytea() */
index 22e2705fb368f7f51cb0b81806d5d53f67c9488b..62f8bc40a15e815af80bb668dca629b5ff0f61ea 100644 (file)
@@ -3,23 +3,21 @@
  * like_match.c
  *       like expression handling internal code.
  *
- * This file is included by like.c *twice*, to provide an optimization
- * for single-byte encodings.
+ * This file is included by like.c three times, to provide natching code for
+ * single-byte encodings, UTF8, and for other multi-byte encodings.
+ * UTF8 is a special case because we can use a much more efficient version
+ * of NextChar than can be used for other multi-byte encodings.
  *
  * Before the inclusion, we need to define following macros:
  *
- * CHAREQ
- * ICHAREQ
- * NextChar
- * CopyAdvChar
- * MatchText (MBMatchText)
- * MatchTextIC (MBMatchTextIC)
- * do_like_escape (MB_do_like_escape)
+ * NextChar 
+ * MatchText - to name of function wanted
+ * do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
  *
  * Copyright (c) 1996-2007, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *     $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.15 2007/02/27 23:48:08 tgl Exp $
+ *     $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.16 2007/06/02 02:03:42 adunstan Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -77,21 +75,36 @@ MatchText(char *t, int tlen, char *p, int plen)
        if ((plen == 1) && (*p == '%'))
                return LIKE_TRUE;
 
+       /*
+        * In this loop, we advance by char when matching wildcards (and thus
+        * on recursive entry to this function we are properly char-synced). On
+        * other occasions it is safe to advance by byte, as the text and pattern
+        * will be in lockstep. This allows us to perform all comparisons  between
+        * the text and pattern on a byte by byte basis, even for multi-byte
+        * encodings.
+        */
+
        while ((tlen > 0) && (plen > 0))
        {
                if (*p == '\\')
                {
-                       /* Next pattern char must match literally, whatever it is */
-                       NextChar(p, plen);
-                       if ((plen <= 0) || !CHAREQ(t, p))
+                       /* Next byte must match literally, whatever it is */
+                       NextByte(p, plen);
+                       if ((plen <= 0) || *p != *t )
                                return LIKE_FALSE;
                }
                else if (*p == '%')
                {
+                       /*
+                        * % processing is essentially a search for a match for what 
+                        * follows the %, plus a recursive match of the remainder.
+                        * We succeed if and only if both conditions are met.
+                        */
+
                        /* %% is the same as % according to the SQL standard */
                        /* Advance past all %'s */
                        while ((plen > 0) && (*p == '%'))
-                               NextChar(p, plen);
+                               NextByte(p, plen);
                        /* Trailing percent matches everything. */
                        if (plen <= 0)
                                return LIKE_TRUE;
@@ -100,107 +113,62 @@ MatchText(char *t, int tlen, char *p, int plen)
                         * Otherwise, scan for a text position at which we can match the
                         * rest of the pattern.
                         */
-                       while (tlen > 0)
-                       {
-                               /*
-                                * Optimization to prevent most recursion: don't recurse
-                                * unless first pattern char might match this text char.
-                                */
-                               if (CHAREQ(t, p) || (*p == '\\') || (*p == '_'))
-                               {
-                                       int                     matched = MatchText(t, tlen, p, plen);
+                       if (*p == '_')
 
-                                       if (matched != LIKE_FALSE)
-                                               return matched; /* TRUE or ABORT */
-                               }
+                       {
+                               /* %_ is the same as _% - avoid matching _ repeatedly */
 
                                NextChar(t, tlen);
-                       }
+                               NextByte(p, plen);
 
-                       /*
-                        * End of text with no match, so no point in trying later places
-                        * to start matching this pattern.
-                        */
-                       return LIKE_ABORT;
-               }
-               else if ((*p != '_') && !CHAREQ(t, p))
-               {
-                       /*
-                        * Not the single-character wildcard and no explicit match? Then
-                        * time to quit...
-                        */
-                       return LIKE_FALSE;
-               }
-
-               NextChar(t, tlen);
-               NextChar(p, plen);
-       }
-
-       if (tlen > 0)
-               return LIKE_FALSE;              /* end of pattern, but not of text */
+                               if (tlen <= 0)
+                               {
+                                       return (plen <= 0) ? LIKE_TRUE : LIKE_ABORT;
+                               }
+                               else if (plen <= 0)
+                               {
+                                       return LIKE_FALSE;
+                               }
 
-       /* End of input string.  Do we have matching pattern remaining? */
-       while ((plen > 0) && (*p == '%'))       /* allow multiple %'s at end of
-                                                                                * pattern */
-               NextChar(p, plen);
-       if (plen <= 0)
-               return LIKE_TRUE;
+                               while (tlen > 0)
+                               {
+                                       int                     matched = MatchText(t, tlen, p, plen);
+                                               
+                                       if (matched != LIKE_FALSE)
+                                                       return matched; /* TRUE or ABORT */
 
-       /*
-        * End of text with no match, so no point in trying later places to start
-        * matching this pattern.
-        */
-       return LIKE_ABORT;
-}      /* MatchText() */
+                                       NextChar(t, tlen);
+                               }
+                       }
+                       else
+                       {
 
-/*
- * Same as above, but ignore case
- */
-static int
-MatchTextIC(char *t, int tlen, char *p, int plen)
-{
-       /* Fast path for match-everything pattern */
-       if ((plen == 1) && (*p == '%'))
-               return LIKE_TRUE;
+                               char firstpat = *p ;
 
-       while ((tlen > 0) && (plen > 0))
-       {
-               if (*p == '\\')
-               {
-                       /* Next pattern char must match literally, whatever it is */
-                       NextChar(p, plen);
-                       if ((plen <= 0) || !ICHAREQ(t, p))
-                               return LIKE_FALSE;
-               }
-               else if (*p == '%')
-               {
-                       /* %% is the same as % according to the SQL standard */
-                       /* Advance past all %'s */
-                       while ((plen > 0) && (*p == '%'))
-                               NextChar(p, plen);
-                       /* Trailing percent matches everything. */
-                       if (plen <= 0)
-                               return LIKE_TRUE;
+                               if (*p == '\\')
+                               {
+                                       if (plen < 2)
+                                               return LIKE_FALSE;
+                                       firstpat = p[1];
+                               }
 
-                       /*
-                        * Otherwise, scan for a text position at which we can match the
-                        * rest of the pattern.
-                        */
-                       while (tlen > 0)
-                       {
-                               /*
-                                * Optimization to prevent most recursion: don't recurse
-                                * unless first pattern char might match this text char.
-                                */
-                               if (ICHAREQ(t, p) || (*p == '\\') || (*p == '_'))
+                               while (tlen > 0)
                                {
-                                       int                     matched = MatchTextIC(t, tlen, p, plen);
+                                       /*
+                                        * Optimization to prevent most recursion: don't recurse
+                                        * unless first pattern byte matches first text byte.
+                                        */
+                                       if (*t == firstpat)
+                                       {
+                                               int                     matched = MatchText(t, tlen, p, plen);
+                                               
+                                               if (matched != LIKE_FALSE)
+                                                       return matched; /* TRUE or ABORT */
+                                       }
+
+                                       NextChar(t, tlen);
 
-                                       if (matched != LIKE_FALSE)
-                                               return matched; /* TRUE or ABORT */
                                }
-
-                               NextChar(t, tlen);
                        }
 
                        /*
@@ -209,7 +177,13 @@ MatchTextIC(char *t, int tlen, char *p, int plen)
                         */
                        return LIKE_ABORT;
                }
-               else if ((*p != '_') && !ICHAREQ(t, p))
+               else if (*p == '_')
+               {
+                       NextChar(t, tlen);
+                       NextByte(p, plen);
+                       continue;
+               }
+               else if (*t != *p)
                {
                        /*
                         * Not the single-character wildcard and no explicit match? Then
@@ -217,9 +191,20 @@ MatchTextIC(char *t, int tlen, char *p, int plen)
                         */
                        return LIKE_FALSE;
                }
-
-               NextChar(t, tlen);
-               NextChar(p, plen);
+               /*
+                * It is safe to use NextByte instead of NextChar here, even for
+                * multi-byte character sets, because we are not following 
+                * immediately after a wildcard character.
+                * If we are in the middle of a multibyte character, we must 
+                * already have matched at least one byte of the character from 
+                * both text and pattern; so we cannot get out-of-sync
+                * on character boundaries.  And we know that no backend-legal 
+                * encoding allows ASCII characters such as '%' to appear as 
+                * non-first bytes of characters, so we won't mistakenly detect 
+                * a new wildcard.
+                */
+               NextByte(t, tlen);
+               NextByte(p, plen);
        }
 
        if (tlen > 0)
@@ -228,7 +213,8 @@ MatchTextIC(char *t, int tlen, char *p, int plen)
        /* End of input string.  Do we have matching pattern remaining? */
        while ((plen > 0) && (*p == '%'))       /* allow multiple %'s at end of
                                                                                 * pattern */
-               NextChar(p, plen);
+               NextByte(p, plen);
+
        if (plen <= 0)
                return LIKE_TRUE;
 
@@ -237,12 +223,14 @@ MatchTextIC(char *t, int tlen, char *p, int plen)
         * matching this pattern.
         */
        return LIKE_ABORT;
-}      /* MatchTextIC() */
+}      /* MatchText() */
 
 /*
  * like_escape() --- given a pattern and an ESCAPE string,
  * convert the pattern to use Postgres' standard backslash escape convention.
  */
+#ifdef do_like_escape
+
 static text *
 do_like_escape(text *pat, text *esc)
 {
@@ -336,3 +324,17 @@ do_like_escape(text *pat, text *esc)
 
        return result;
 }
+#endif /* do_like_escape */
+
+#ifdef CHAREQ
+#undef CHAREQ
+#endif
+
+#undef NextChar
+#undef CopyAdvChar
+#undef MatchText
+
+#ifdef do_like_escape
+#undef do_like_escape
+#endif
+