pgindent run for 8.2.

[postgresql] / src / backend / utils / adt / like.c
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c

index f3f8b9854e11dcd51d866717e02c3c215b1475ce..4223bffb1883af2531f18fcc5fe667be3cc8c1ac 100644 (file)
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -7,11 +7,11 @@
   *             A big hack of the regexp.c code!! Contributed by
   *             Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
   *
- * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *     $Header: /cvsroot/pgsql/src/backend/utils/adt/like.c,v 1.47 2001/10/04 02:15:47 ishii Exp $
+ *     $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.66 2006/10/04 00:29:59 momjian Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,9 +19,7 @@
  
  #include <ctype.h>
  
-#ifdef MULTIBYTE
  #include "mb/pg_wchar.h"
-#endif
  #include "utils/builtins.h"
  
  
@@ -30,19 +28,13 @@
  #define LIKE_ABORT                                             (-1)
  
  
-static int MatchText(unsigned char *t, int tlen,
-                 unsigned char *p, int plen);
-static int MatchTextIC(unsigned char *t, int tlen,
-                       unsigned char *p, int plen);
-static int MatchBytea(unsigned char *t, int tlen,
-                 unsigned char *p, int plen);
+static int     MatchText(char *t, int tlen, char *p, int plen);
+static int     MatchTextIC(char *t, int tlen, char *p, int plen);
+static int     MatchBytea(char *t, int tlen, char *p, int plen);
  static text *do_like_escape(text *, text *);
  
-#ifdef MULTIBYTE
-static int MBMatchText(unsigned char *t, int tlen,
-                 unsigned char *p, int plen);
-static int MBMatchTextIC(unsigned char *t, int tlen,
-                       unsigned char *p, int plen);
+static int     MBMatchText(char *t, int tlen, char *p, int plen);
+static int     MBMatchTextIC(char *t, int tlen, char *p, int plen);
  static text *MB_do_like_escape(text *, text *);
  
  /*--------------------
@@ -51,68 +43,44 @@ static text *MB_do_like_escape(text *, text *);
   *--------------------
   */
  static int
-wchareq(unsigned char *p1, unsigned char *p2)
+wchareq(char *p1, char *p2)
  {
-       int                     l;
+       int                     p1_len;
  
-       l = pg_mblen(p1);
-       if (pg_mblen(p2) != l)
-               return (0);
-       while (l--)
+       /* Optimization:  quickly compare the first byte. */
+       if (*p1 != *p2)
+               return 0;
+
+       p1_len = pg_mblen(p1);
+       if (pg_mblen(p2) != p1_len)
+               return 0;
+
+       /* They are the same length */
+       while (p1_len--)
         {
                 if (*p1++ != *p2++)
-                       return (0);
+                       return 0;
         }
-       return (1);
+       return 1;
  }
  
-/*--------------------
- * Support routine for MatchTextIC. Compares given multibyte streams
- * as wide characters ignoring case.
- * If they match, returns 1 otherwise returns 0.
- *--------------------
+/*
+ * Formerly we had a routine iwchareq() here that tried to do case-insensitive
+ * comparison of multibyte characters. It did not work at all, however,
+ * because it relied on tolower() which has a single-byte API ... and
+ * towlower() wouldn't be much better since we have no suitably cheap way
+ * of getting a single character transformed to the system's wchar_t format.
+ * So now, we just downcase the strings using lower() and apply regular LIKE
+ * comparison. This should be revisited when we install better locale support.
+ *
+ * Note that MBMatchText and MBMatchTextIC do exactly the same thing now.
+ * Is it worth refactoring to avoid duplicated code?  They might become
+ * different again in the future.
   */
-#define CHARMAX 0x80
-
-static int
-iwchareq(unsigned char *p1, unsigned char *p2)
-{
-       int                     c1[2],
-                               c2[2];
-       int                     l;
-
-       /*
-        * short cut. if *p1 and *p2 is lower than CHARMAX, then we could
-        * assume they are ASCII
-        */
-       if (*p1 < CHARMAX && *p2 < CHARMAX)
-               return (tolower(*p1) == tolower(*p2));
-
-       /*
-        * if one of them is an ASCII while the other is not, then they must
-        * be different characters
-        */
-       else if (*p1 < CHARMAX || *p2 < CHARMAX)
-               return (0);
-
-       /*
-        * ok, p1 and p2 are both > CHARMAX, then they must be multi-byte
-        * characters
-        */
-       l = pg_mblen(p1);
-       (void) pg_mb2wchar_with_len(p1, (pg_wchar *) c1, l);
-       c1[0] = tolower(c1[0]);
-       l = pg_mblen(p2);
-       (void) pg_mb2wchar_with_len(p2, (pg_wchar *) c2, l);
-       c2[0] = tolower(c2[0]);
-       return (c1[0] == c2[0]);
-}
-
-#endif
  
-#ifdef MULTIBYTE
+/* Set up to compile like_match.c for multibyte characters */
  #define CHAREQ(p1, p2) wchareq(p1, p2)
-#define ICHAREQ(p1, p2) iwchareq(p1, p2)
+#define ICHAREQ(p1, p2) wchareq(p1, p2)
  #define NextChar(p, plen) \
         do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
  #define CopyAdvChar(dst, src, srclen) \
@@ -123,9 +91,11 @@ iwchareq(unsigned char *p1, unsigned char *p2)
            } while (0)
  
  #define MatchText      MBMatchText
-#define MatchTextIC    MBMatchTextIC
+#define MatchTextIC MBMatchTextIC
  #define do_like_escape MB_do_like_escape
+
  #include "like_match.c"
+
  #undef CHAREQ
  #undef ICHAREQ
  #undef NextChar
@@ -133,17 +103,20 @@ iwchareq(unsigned char *p1, unsigned char *p2)
  #undef MatchText
  #undef MatchTextIC
  #undef do_like_escape
-#endif
  
+/* Set up to compile like_match.c for single-byte characters */
  #define CHAREQ(p1, p2) (*(p1) == *(p2))
-#define ICHAREQ(p1, p2) (tolower(*(p1)) == tolower(*(p2)))
+#define ICHAREQ(p1, p2) (tolower((unsigned char) *(p1)) == tolower((unsigned char) *(p2)))
  #define NextChar(p, plen) ((p)++, (plen)--)
  #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
  
+#include "like_match.c"
+
+/* And some support for BYTEA */
  #define BYTEA_CHAREQ(p1, p2) (*(p1) == *(p2))
  #define BYTEA_NextChar(p, plen) ((p)++, (plen)--)
  #define BYTEA_CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
-#include "like_match.c"
+
  
  /*
   *     interface routines called by the function manager
@@ -155,7 +128,7 @@ namelike(PG_FUNCTION_ARGS)
         Name            str = PG_GETARG_NAME(0);
         text       *pat = PG_GETARG_TEXT_P(1);
         bool            result;
-       unsigned char *s,
+       char       *s,
                            *p;
         int                     slen,
                                 plen;
@@ -165,14 +138,10 @@ namelike(PG_FUNCTION_ARGS)
         p = VARDATA(pat);
         plen = (VARSIZE(pat) - VARHDRSZ);
  
-#ifdef MULTIBYTE
         if (pg_database_encoding_max_length() == 1)
-           result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
+               result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
         else
-           result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);      
-#else
-       result = (MatchText(s, slen, p, plen) == LIKE_TRUE);    
-#endif
+               result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
  
         PG_RETURN_BOOL(result);
  }
@@ -183,7 +152,7 @@ namenlike(PG_FUNCTION_ARGS)
         Name            str = PG_GETARG_NAME(0);
         text       *pat = PG_GETARG_TEXT_P(1);
         bool            result;
-       unsigned char *s,
+       char       *s,
                            *p;
         int                     slen,
                                 plen;
@@ -193,14 +162,10 @@ namenlike(PG_FUNCTION_ARGS)
         p = VARDATA(pat);
         plen = (VARSIZE(pat) - VARHDRSZ);
  
-#ifdef MULTIBYTE
         if (pg_database_encoding_max_length() == 1)
-           result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
+               result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
         else
-           result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);      
-#else
-       result = (MatchText(s, slen, p, plen) != LIKE_TRUE);    
-#endif
+               result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
  
         PG_RETURN_BOOL(result);
  }
@@ -211,23 +176,20 @@ textlike(PG_FUNCTION_ARGS)
         text       *str = PG_GETARG_TEXT_P(0);
         text       *pat = PG_GETARG_TEXT_P(1);
         bool            result;
-       unsigned char *s,
+       char       *s,
                            *p;
         int                     slen,
                                 plen;
+
         s = VARDATA(str);
         slen = (VARSIZE(str) - VARHDRSZ);
         p = VARDATA(pat);
         plen = (VARSIZE(pat) - VARHDRSZ);
  
-#ifdef MULTIBYTE
         if (pg_database_encoding_max_length() == 1)
-           result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
+               result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
         else
-           result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);      
-#else
-       result = (MatchText(s, slen, p, plen) == LIKE_TRUE);    
-#endif
+               result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
  
         PG_RETURN_BOOL(result);
  }
@@ -238,7 +200,7 @@ textnlike(PG_FUNCTION_ARGS)
         text       *str = PG_GETARG_TEXT_P(0);
         text       *pat = PG_GETARG_TEXT_P(1);
         bool            result;
-       unsigned char *s,
+       char       *s,
                            *p;
         int                     slen,
                                 plen;
@@ -248,14 +210,10 @@ textnlike(PG_FUNCTION_ARGS)
         p = VARDATA(pat);
         plen = (VARSIZE(pat) - VARHDRSZ);
  
-#ifdef MULTIBYTE
         if (pg_database_encoding_max_length() == 1)
-           result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
+               result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
         else
-           result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);      
-#else
-       result = (MatchText(s, slen, p, plen) != LIKE_TRUE);    
-#endif
+               result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
  
         PG_RETURN_BOOL(result);
  }
@@ -266,7 +224,7 @@ bytealike(PG_FUNCTION_ARGS)
         bytea      *str = PG_GETARG_BYTEA_P(0);
         bytea      *pat = PG_GETARG_BYTEA_P(1);
         bool            result;
-       unsigned char *s,
+       char       *s,
                            *p;
         int                     slen,
                                 plen;
@@ -287,7 +245,7 @@ byteanlike(PG_FUNCTION_ARGS)
         bytea      *str = PG_GETARG_BYTEA_P(0);
         bytea      *pat = PG_GETARG_BYTEA_P(1);
         bool            result;
-       unsigned char *s,
+       char       *s,
                            *p;
         int                     slen,
                                 plen;
@@ -312,24 +270,37 @@ nameiclike(PG_FUNCTION_ARGS)
         Name            str = PG_GETARG_NAME(0);
         text       *pat = PG_GETARG_TEXT_P(1);
         bool            result;
-       unsigned char *s,
+       char       *s,
                            *p;
         int                     slen,
                                 plen;
  
-       s = NameStr(*str);
-       slen = strlen(s);
-       p = VARDATA(pat);
-       plen = (VARSIZE(pat) - VARHDRSZ);
-
-#ifdef MULTIBYTE
         if (pg_database_encoding_max_length() == 1)
-           result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+       {
+               s = NameStr(*str);
+               slen = strlen(s);
+               p = VARDATA(pat);
+               plen = (VARSIZE(pat) - VARHDRSZ);
+               result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+       }
         else
-           result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);    
-#else
-       result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);  
-#endif
+       {
+               /* Force inputs to lower case to achieve case insensitivity */
+               text       *strtext;
+
+               strtext = DatumGetTextP(DirectFunctionCall1(name_text,
+                                                                                                       NameGetDatum(str)));
+               strtext = DatumGetTextP(DirectFunctionCall1(lower,
+                                                                                                 PointerGetDatum(strtext)));
+               pat = DatumGetTextP(DirectFunctionCall1(lower,
+                                                                                               PointerGetDatum(pat)));
+
+               s = VARDATA(strtext);
+               slen = (VARSIZE(strtext) - VARHDRSZ);
+               p = VARDATA(pat);
+               plen = (VARSIZE(pat) - VARHDRSZ);
+               result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+       }
  
         PG_RETURN_BOOL(result);
  }
@@ -340,24 +311,37 @@ nameicnlike(PG_FUNCTION_ARGS)
         Name            str = PG_GETARG_NAME(0);
         text       *pat = PG_GETARG_TEXT_P(1);
         bool            result;
-       unsigned char *s,
+       char       *s,
                            *p;
         int                     slen,
                                 plen;
  
-       s = NameStr(*str);
-       slen = strlen(s);
-       p = VARDATA(pat);
-       plen = (VARSIZE(pat) - VARHDRSZ);
-
-#ifdef MULTIBYTE
         if (pg_database_encoding_max_length() == 1)
-           result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+       {
+               s = NameStr(*str);
+               slen = strlen(s);
+               p = VARDATA(pat);
+               plen = (VARSIZE(pat) - VARHDRSZ);
+               result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+       }
         else
-           result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);    
-#else
-       result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);  
-#endif
+       {
+               /* Force inputs to lower case to achieve case insensitivity */
+               text       *strtext;
+
+               strtext = DatumGetTextP(DirectFunctionCall1(name_text,
+                                                                                                       NameGetDatum(str)));
+               strtext = DatumGetTextP(DirectFunctionCall1(lower,
+                                                                                                 PointerGetDatum(strtext)));
+               pat = DatumGetTextP(DirectFunctionCall1(lower,
+                                                                                               PointerGetDatum(pat)));
+
+               s = VARDATA(strtext);
+               slen = (VARSIZE(strtext) - VARHDRSZ);
+               p = VARDATA(pat);
+               plen = (VARSIZE(pat) - VARHDRSZ);
+               result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+       }
  
         PG_RETURN_BOOL(result);
  }
@@ -368,24 +352,32 @@ texticlike(PG_FUNCTION_ARGS)
         text       *str = PG_GETARG_TEXT_P(0);
         text       *pat = PG_GETARG_TEXT_P(1);
         bool            result;
-       unsigned char *s,
+       char       *s,
                            *p;
         int                     slen,
                                 plen;
  
-       s = VARDATA(str);
-       slen = (VARSIZE(str) - VARHDRSZ);
-       p = VARDATA(pat);
-       plen = (VARSIZE(pat) - VARHDRSZ);
-
-#ifdef MULTIBYTE
         if (pg_database_encoding_max_length() == 1)
-           result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+       {
+               s = VARDATA(str);
+               slen = (VARSIZE(str) - VARHDRSZ);
+               p = VARDATA(pat);
+               plen = (VARSIZE(pat) - VARHDRSZ);
+               result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+       }
         else
-           result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);    
-#else
-       result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);  
-#endif
+       {
+               /* Force inputs to lower case to achieve case insensitivity */
+               str = DatumGetTextP(DirectFunctionCall1(lower,
+                                                                                               PointerGetDatum(str)));
+               pat = DatumGetTextP(DirectFunctionCall1(lower,
+                                                                                               PointerGetDatum(pat)));
+               s = VARDATA(str);
+               slen = (VARSIZE(str) - VARHDRSZ);
+               p = VARDATA(pat);
+               plen = (VARSIZE(pat) - VARHDRSZ);
+               result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+       }
  
         PG_RETURN_BOOL(result);
  }
@@ -396,24 +388,32 @@ texticnlike(PG_FUNCTION_ARGS)
         text       *str = PG_GETARG_TEXT_P(0);
         text       *pat = PG_GETARG_TEXT_P(1);
         bool            result;
-       unsigned char *s,
+       char       *s,
                            *p;
         int                     slen,
                                 plen;
  
-       s = VARDATA(str);
-       slen = (VARSIZE(str) - VARHDRSZ);
-       p = VARDATA(pat);
-       plen = (VARSIZE(pat) - VARHDRSZ);
-
-#ifdef MULTIBYTE
         if (pg_database_encoding_max_length() == 1)
-           result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+       {
+               s = VARDATA(str);
+               slen = (VARSIZE(str) - VARHDRSZ);
+               p = VARDATA(pat);
+               plen = (VARSIZE(pat) - VARHDRSZ);
+               result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+       }
         else
-           result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);    
-#else
-       result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);  
-#endif
+       {
+               /* Force inputs to lower case to achieve case insensitivity */
+               str = DatumGetTextP(DirectFunctionCall1(lower,
+                                                                                               PointerGetDatum(str)));
+               pat = DatumGetTextP(DirectFunctionCall1(lower,
+                                                                                               PointerGetDatum(pat)));
+               s = VARDATA(str);
+               slen = (VARSIZE(str) - VARHDRSZ);
+               p = VARDATA(pat);
+               plen = (VARSIZE(pat) - VARHDRSZ);
+               result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+       }
  
         PG_RETURN_BOOL(result);
  }
@@ -429,14 +429,10 @@ like_escape(PG_FUNCTION_ARGS)
         text       *esc = PG_GETARG_TEXT_P(1);
         text       *result;
  
-#ifdef MULTIBYTE
         if (pg_database_encoding_max_length() == 1)
-           result = do_like_escape(pat, esc);
+               result = do_like_escape(pat, esc);
         else
-           result = MB_do_like_escape(pat, esc);
-#else
-       result = do_like_escape(pat, esc);
-#endif
+               result = MB_do_like_escape(pat, esc);
  
         PG_RETURN_TEXT_P(result);
  }
@@ -451,7 +447,7 @@ like_escape_bytea(PG_FUNCTION_ARGS)
         bytea      *pat = PG_GETARG_BYTEA_P(0);
         bytea      *esc = PG_GETARG_BYTEA_P(1);
         bytea      *result;
-       unsigned char *p,
+       char       *p,
                            *e,
                            *r;
         int                     plen,
@@ -472,7 +468,6 @@ like_escape_bytea(PG_FUNCTION_ARGS)
  
         if (elen == 0)
         {
-
                 /*
                  * No escape character is wanted.  Double any backslashes in the
                  * pattern to make them act like ordinary characters.
@@ -486,13 +481,16 @@ like_escape_bytea(PG_FUNCTION_ARGS)
         }
         else
         {
-
                 /*
                  * The specified escape must be only a single character.
                  */
                 BYTEA_NextChar(e, elen);
                 if (elen != 0)
-                       elog(ERROR, "ESCAPE string must be empty or one character");
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+                                        errmsg("invalid escape string"),
+                                 errhint("Escape string must be empty or one character.")));
+
                 e = VARDATA(esc);
  
                 /*
@@ -505,9 +503,9 @@ like_escape_bytea(PG_FUNCTION_ARGS)
                 }
  
                 /*
-                * Otherwise, convert occurrences of the specified escape
-                * character to '\', and double occurrences of '\' --- unless they
-                * immediately follow an escape character!
+                * Otherwise, convert occurrences of the specified escape character to
+                * '\', and double occurrences of '\' --- unless they immediately
+                * follow an escape character!
                  */
                 afterescape = false;
                 while (plen > 0)
@@ -534,7 +532,7 @@ like_escape_bytea(PG_FUNCTION_ARGS)
                 }
         }
  
-       VARATT_SIZEP(result) = r - ((unsigned char *) result);
+       VARATT_SIZEP(result) = r - ((char *) result);
  
         PG_RETURN_BYTEA_P(result);
  }
@@ -543,7 +541,7 @@ like_escape_bytea(PG_FUNCTION_ARGS)
   * Same as above, but specifically for bytea (binary) datatype
   */
  static int
-MatchBytea(unsigned char *t, int tlen, unsigned char *p, int plen)
+MatchBytea(char *t, int tlen, char *p, int plen)
  {
         /* Fast path for match-everything pattern */
         if ((plen == 1) && (*p == '%'))
@@ -569,12 +567,11 @@ MatchBytea(unsigned char *t, int tlen, unsigned char *p, int plen)
                                 return LIKE_TRUE;
  
                         /*
-                        * Otherwise, scan for a text position at which we can match
-                        * the rest of the pattern.
+                        * Otherwise, scan for a text position at which we can match the
+                        * rest of the pattern.
                          */
                         while (tlen > 0)
                         {
-
                                 /*
                                  * Optimization to prevent most recursion: don't recurse
                                  * unless first pattern char might match this text char.
@@ -591,17 +588,16 @@ MatchBytea(unsigned char *t, int tlen, unsigned char *p, int plen)
                         }
  
                         /*
-                        * End of text with no match, so no point in trying later
-                        * places to start matching this pattern.
+                        * End of text with no match, so no point in trying later places
+                        * to start matching this pattern.
                          */
                         return LIKE_ABORT;
                 }
                 else if ((*p != '_') && !BYTEA_CHAREQ(t, p))
                 {
-
                         /*
-                        * Not the single-character wildcard and no explicit match?
-                        * Then time to quit...
+                        * Not the single-character wildcard and no explicit match? Then
+                        * time to quit...
                          */
                         return LIKE_FALSE;
                 }
@@ -621,8 +617,8 @@ MatchBytea(unsigned char *t, int tlen, unsigned char *p, int plen)
                 return LIKE_TRUE;
  
         /*
-        * End of text with no match, so no point in trying later places to
-        * start matching this pattern.
+        * End of text with no match, so no point in trying later places to start
+        * matching this pattern.
          */
         return LIKE_ABORT;
  }      /* MatchBytea() */