]> granicus.if.org Git - postgresql/blobdiff - src/backend/utils/adt/like.c
Error message editing in utils/adt. Again thanks to Joe Conway for doing
[postgresql] / src / backend / utils / adt / like.c
index bae7ae062e140ae9c4ddcd418cee35aa8a57fc46..0f832aa6e13b0f917a08327a2bc1890dac0995fc 100644 (file)
 /*-------------------------------------------------------------------------
  *
- * like.c--
+ * like.c
  *       like expression handling code.
  *
- * Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *       /usr/local/devel/pglite/cvs/src/backend/utils/adt/like.c,v 1.1 1995/07/30 23:55:36 emkxp01 Exp
- *
- *
  *      NOTES
  *             A big hack of the regexp.c code!! Contributed by
  *             Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
  *
+ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *     $Header: /cvsroot/pgsql/src/backend/utils/adt/like.c,v 1.54 2003/07/27 04:53:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-#include <string.h>
-#include "postgres.h"                  /* postgres system include file */
-#include "utils/palloc.h"
-#include "utils/builtins.h"            /* where the function declarations go */
+#include "postgres.h"
+
+#include <ctype.h>
+
 #include "mb/pg_wchar.h"
+#include "utils/builtins.h"
 
-static int     like(pg_wchar *text, pg_wchar *p);
 
-/*
- *     interface routines called by the function manager
+#define LIKE_TRUE                                              1
+#define LIKE_FALSE                                             0
+#define LIKE_ABORT                                             (-1)
+
+
+static int MatchText(unsigned char *t, int tlen,
+                 unsigned char *p, int plen);
+static int MatchTextIC(unsigned char *t, int tlen,
+                       unsigned char *p, int plen);
+static int MatchBytea(unsigned char *t, int tlen,
+                  unsigned char *p, int plen);
+static text *do_like_escape(text *, text *);
+
+static int MBMatchText(unsigned char *t, int tlen,
+                       unsigned char *p, int plen);
+static int MBMatchTextIC(unsigned char *t, int tlen,
+                         unsigned char *p, int plen);
+static text *MB_do_like_escape(text *, text *);
+
+/*--------------------
+ * Support routine for MatchText. Compares given multibyte streams
+ * as wide characters. If they match, returns 1 otherwise returns 0.
+ *--------------------
  */
+static int
+wchareq(unsigned char *p1, unsigned char *p2)
+{
+       int                     l;
 
-/*
-   fixedlen_like:
-
-   a generic fixed length like routine
-                s              - the string to match against  (not necessarily null-terminated)
-                p                 - the pattern
-                charlen   - the length of the string
-*/
-static bool
-fixedlen_like(char *s, struct varlena * p, int charlen)
+       l = pg_mblen(p1);
+       if (pg_mblen(p2) != l)
+               return (0);
+       while (l--)
+       {
+               if (*p1++ != *p2++)
+                       return (0);
+       }
+       return (1);
+}
+
+/*--------------------
+ * Support routine for MatchTextIC. Compares given multibyte streams
+ * as wide characters ignoring case.
+ * If they match, returns 1 otherwise returns 0.
+ *--------------------
+ */
+#define CHARMAX 0x80
+
+static int
+iwchareq(unsigned char *p1, unsigned char *p2)
 {
-       pg_wchar           *sterm,
-                          *pterm;
-       int                     result;
-       int     len;
-
-       if (!s || !p)
-               return FALSE;
-
-       /* be sure sterm is null-terminated */
-#ifdef MULTIBYTE
-       sterm = (pg_wchar *) palloc((charlen + 1)*sizeof(pg_wchar));
-       (void)pg_mb2wchar_with_len((unsigned char *)s,sterm,charlen);
-#else
-       sterm = (char *) palloc(charlen + 1);
-       StrNCpy(sterm, s, charlen + 1);
-#endif
+       int                     c1[2],
+                               c2[2];
+       int                     l;
+
+       /*
+        * short cut. if *p1 and *p2 is lower than CHARMAX, then we could
+        * assume they are ASCII
+        */
+       if (*p1 < CHARMAX && *p2 < CHARMAX)
+               return (tolower(*p1) == tolower(*p2));
 
        /*
-        * p is a text = varlena, not a string so we have to make a string
-        * from the vl_data field of the struct.
+        * if one of them is an ASCII while the other is not, then they must
+        * be different characters
         */
+       else if (*p1 < CHARMAX || *p2 < CHARMAX)
+               return (0);
+
+       /*
+        * ok, p1 and p2 are both > CHARMAX, then they must be multibyte
+        * characters
+        */
+       l = pg_mblen(p1);
+       (void) pg_mb2wchar_with_len(p1, (pg_wchar *) c1, l);
+       c1[0] = tolower(c1[0]);
+       l = pg_mblen(p2);
+       (void) pg_mb2wchar_with_len(p2, (pg_wchar *) c2, l);
+       c2[0] = tolower(c2[0]);
+       return (c1[0] == c2[0]);
+}
 
-       /* palloc the length of the text + the null character */
-       len = VARSIZE(p) - VARHDRSZ;
-#ifdef MULTIBYTE
-       pterm = (pg_wchar *) palloc((len + 1)*sizeof(pg_wchar));
-       (void)pg_mb2wchar_with_len((unsigned char *)VARDATA(p),pterm,len);
-#else
-       pterm = (char *) palloc(len + 1);
-       memmove(pterm, VARDATA(p), len);
-       *(pterm + len) = (char) NULL;
-#endif
+#define CHAREQ(p1, p2) wchareq(p1, p2)
+#define ICHAREQ(p1, p2) iwchareq(p1, p2)
+#define NextChar(p, plen) \
+       do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
+#define CopyAdvChar(dst, src, srclen) \
+       do { int __l = pg_mblen(src); \
+                (srclen) -= __l; \
+                while (__l-- > 0) \
+                        *(dst)++ = *(src)++; \
+          } while (0)
 
-       /* do the regexp matching */
-       result = like(sterm, pterm);
+#define MatchText      MBMatchText
+#define MatchTextIC MBMatchTextIC
+#define do_like_escape MB_do_like_escape
+#include "like_match.c"
+#undef CHAREQ
+#undef ICHAREQ
+#undef NextChar
+#undef CopyAdvChar
+#undef MatchText
+#undef MatchTextIC
+#undef do_like_escape
 
-       pfree(sterm);
-       pfree(pterm);
+#define CHAREQ(p1, p2) (*(p1) == *(p2))
+#define ICHAREQ(p1, p2) (tolower(*(p1)) == tolower(*(p2)))
+#define NextChar(p, plen) ((p)++, (plen)--)
+#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
 
-       return (bool) result;
+#define BYTEA_CHAREQ(p1, p2) (*(p1) == *(p2))
+#define BYTEA_NextChar(p, plen) ((p)++, (plen)--)
+#define BYTEA_CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
+#include "like_match.c"
+
+/*
+ *     interface routines called by the function manager
+ */
+
+Datum
+namelike(PG_FUNCTION_ARGS)
+{
+       Name            str = PG_GETARG_NAME(0);
+       text       *pat = PG_GETARG_TEXT_P(1);
+       bool            result;
+       unsigned char *s,
+                          *p;
+       int                     slen,
+                               plen;
+
+       s = NameStr(*str);
+       slen = strlen(s);
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
+
+       if (pg_database_encoding_max_length() == 1)
+               result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
+       else
+               result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
+
+       PG_RETURN_BOOL(result);
 }
 
-bool
-namelike(NameData *n, struct varlena * p)
+Datum
+namenlike(PG_FUNCTION_ARGS)
 {
-       if (!n)
-               return FALSE;
-       return fixedlen_like(n->data, p, NAMEDATALEN);
+       Name            str = PG_GETARG_NAME(0);
+       text       *pat = PG_GETARG_TEXT_P(1);
+       bool            result;
+       unsigned char *s,
+                          *p;
+       int                     slen,
+                               plen;
+
+       s = NameStr(*str);
+       slen = strlen(s);
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
+
+       if (pg_database_encoding_max_length() == 1)
+               result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
+       else
+               result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
+
+       PG_RETURN_BOOL(result);
 }
 
-bool
-namenlike(NameData *s, struct varlena * p)
+Datum
+textlike(PG_FUNCTION_ARGS)
 {
-       return !namelike(s, p);
+       text       *str = PG_GETARG_TEXT_P(0);
+       text       *pat = PG_GETARG_TEXT_P(1);
+       bool            result;
+       unsigned char *s,
+                          *p;
+       int                     slen,
+                               plen;
+
+       s = VARDATA(str);
+       slen = (VARSIZE(str) - VARHDRSZ);
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
+
+       if (pg_database_encoding_max_length() == 1)
+               result = (MatchText(s, slen, p, plen) == LIKE_TRUE);
+       else
+               result = (MBMatchText(s, slen, p, plen) == LIKE_TRUE);
+
+       PG_RETURN_BOOL(result);
 }
 
-bool
-textlike(struct varlena * s, struct varlena * p)
+Datum
+textnlike(PG_FUNCTION_ARGS)
 {
-       if (!s)
-               return FALSE;
-       return fixedlen_like(VARDATA(s), p, VARSIZE(s) - VARHDRSZ);
+       text       *str = PG_GETARG_TEXT_P(0);
+       text       *pat = PG_GETARG_TEXT_P(1);
+       bool            result;
+       unsigned char *s,
+                          *p;
+       int                     slen,
+                               plen;
+
+       s = VARDATA(str);
+       slen = (VARSIZE(str) - VARHDRSZ);
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
+
+       if (pg_database_encoding_max_length() == 1)
+               result = (MatchText(s, slen, p, plen) != LIKE_TRUE);
+       else
+               result = (MBMatchText(s, slen, p, plen) != LIKE_TRUE);
+
+       PG_RETURN_BOOL(result);
 }
 
-bool
-textnlike(struct varlena * s, struct varlena * p)
+Datum
+bytealike(PG_FUNCTION_ARGS)
 {
-       return !textlike(s, p);
+       bytea      *str = PG_GETARG_BYTEA_P(0);
+       bytea      *pat = PG_GETARG_BYTEA_P(1);
+       bool            result;
+       unsigned char *s,
+                          *p;
+       int                     slen,
+                               plen;
+
+       s = VARDATA(str);
+       slen = (VARSIZE(str) - VARHDRSZ);
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
+
+       result = (MatchBytea(s, slen, p, plen) == LIKE_TRUE);
+
+       PG_RETURN_BOOL(result);
 }
 
+Datum
+byteanlike(PG_FUNCTION_ARGS)
+{
+       bytea      *str = PG_GETARG_BYTEA_P(0);
+       bytea      *pat = PG_GETARG_BYTEA_P(1);
+       bool            result;
+       unsigned char *s,
+                          *p;
+       int                     slen,
+                               plen;
 
-/*     $Revision: 1.19 $
-**     "like.c" A first attempt at a LIKE operator for Postgres95.
-**
-**     Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
-**     Rich $alz is now <rsalz@bbn.com>.
-**     Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
-**
-**     This code was shamelessly stolen from the "pql" code by myself and
-**     slightly modified :)
-**
-**     All references to the word "star" were replaced by "percent"
-**     All references to the word "wild" were replaced by "like"
-**
-**     All the nice shell RE matching stuff was replaced by just "_" and "%"
-**
-**     As I don't have a copy of the SQL standard handy I wasn't sure whether
-**     to leave in the '\' escape character handling. (I suspect the standard
-**     handles "%%" as a single literal percent)
-**
-**     Keith Parks. <keith@mtcc.demon.co.uk>
-**
-**     [SQL92 lets you specify the escape character by saying
-**      LIKE <pattern> ESCAPE <escape character>. We are a small operation
-**      so we force you to use '\'. - ay 7/95]
-**
-*/
+       s = VARDATA(str);
+       slen = (VARSIZE(str) - VARHDRSZ);
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
 
-#define LIKE_TRUE                                              1
-#define LIKE_FALSE                                             0
-#define LIKE_ABORT                                             -1
+       result = (MatchBytea(s, slen, p, plen) != LIKE_TRUE);
+
+       PG_RETURN_BOOL(result);
+}
 
 /*
-**     Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
-*/
-static int
-DoMatch(pg_wchar *text, pg_wchar *p)
+ * Case-insensitive versions
+ */
+
+Datum
+nameiclike(PG_FUNCTION_ARGS)
+{
+       Name            str = PG_GETARG_NAME(0);
+       text       *pat = PG_GETARG_TEXT_P(1);
+       bool            result;
+       unsigned char *s,
+                          *p;
+       int                     slen,
+                               plen;
+
+       s = NameStr(*str);
+       slen = strlen(s);
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
+
+       if (pg_database_encoding_max_length() == 1)
+               result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+       else
+               result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+
+       PG_RETURN_BOOL(result);
+}
+
+Datum
+nameicnlike(PG_FUNCTION_ARGS)
+{
+       Name            str = PG_GETARG_NAME(0);
+       text       *pat = PG_GETARG_TEXT_P(1);
+       bool            result;
+       unsigned char *s,
+                          *p;
+       int                     slen,
+                               plen;
+
+       s = NameStr(*str);
+       slen = strlen(s);
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
+
+       if (pg_database_encoding_max_length() == 1)
+               result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+       else
+               result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+
+       PG_RETURN_BOOL(result);
+}
+
+Datum
+texticlike(PG_FUNCTION_ARGS)
+{
+       text       *str = PG_GETARG_TEXT_P(0);
+       text       *pat = PG_GETARG_TEXT_P(1);
+       bool            result;
+       unsigned char *s,
+                          *p;
+       int                     slen,
+                               plen;
+
+       s = VARDATA(str);
+       slen = (VARSIZE(str) - VARHDRSZ);
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
+
+       if (pg_database_encoding_max_length() == 1)
+               result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+       else
+               result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
+
+       PG_RETURN_BOOL(result);
+}
+
+Datum
+texticnlike(PG_FUNCTION_ARGS)
+{
+       text       *str = PG_GETARG_TEXT_P(0);
+       text       *pat = PG_GETARG_TEXT_P(1);
+       bool            result;
+       unsigned char *s,
+                          *p;
+       int                     slen,
+                               plen;
+
+       s = VARDATA(str);
+       slen = (VARSIZE(str) - VARHDRSZ);
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
+
+       if (pg_database_encoding_max_length() == 1)
+               result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+       else
+               result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
+
+       PG_RETURN_BOOL(result);
+}
+
+/*
+ * like_escape() --- given a pattern and an ESCAPE string,
+ * convert the pattern to use Postgres' standard backslash escape convention.
+ */
+Datum
+like_escape(PG_FUNCTION_ARGS)
 {
-       int                     matched;
+       text       *pat = PG_GETARG_TEXT_P(0);
+       text       *esc = PG_GETARG_TEXT_P(1);
+       text       *result;
 
-       for (; *p; text ++, p++)
+       if (pg_database_encoding_max_length() == 1)
+               result = do_like_escape(pat, esc);
+       else
+               result = MB_do_like_escape(pat, esc);
+
+       PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * like_escape_bytea() --- given a pattern and an ESCAPE string,
+ * convert the pattern to use Postgres' standard backslash escape convention.
+ */
+Datum
+like_escape_bytea(PG_FUNCTION_ARGS)
+{
+       bytea      *pat = PG_GETARG_BYTEA_P(0);
+       bytea      *esc = PG_GETARG_BYTEA_P(1);
+       bytea      *result;
+       unsigned char *p,
+                          *e,
+                          *r;
+       int                     plen,
+                               elen;
+       bool            afterescape;
+
+       p = VARDATA(pat);
+       plen = (VARSIZE(pat) - VARHDRSZ);
+       e = VARDATA(esc);
+       elen = (VARSIZE(esc) - VARHDRSZ);
+
+       /*
+        * Worst-case pattern growth is 2x --- unlikely, but it's hardly worth
+        * trying to calculate the size more accurately than that.
+        */
+       result = (text *) palloc(plen * 2 + VARHDRSZ);
+       r = VARDATA(result);
+
+       if (elen == 0)
        {
-               if (*text == '\0' && *p != '%')
-                       return LIKE_ABORT;
-               switch (*p)
+               /*
+                * No escape character is wanted.  Double any backslashes in the
+                * pattern to make them act like ordinary characters.
+                */
+               while (plen > 0)
                {
-                       case '\\':
-                               /* Literal match with following character. */
-                               p++;
-                               /* FALLTHROUGH */
-                       default:
-                               if (*text !=*p)
-                                       return LIKE_FALSE;
-                               continue;
-                       case '_':
-                               /* Match anything. */
-                               continue;
-                       case '%':
-                               while (*++p == '%')
-                                       /* Consecutive percents act just like one. */
-                                       continue;
-                               if (*p == '\0')
-                                       /* Trailing percent matches everything. */
-                                       return LIKE_TRUE;
-                               while (*text)
-                                       if ((matched = DoMatch(text ++, p)) != LIKE_FALSE)
-                                               return matched;
-                               return LIKE_ABORT;
+                       if (*p == '\\')
+                               *r++ = '\\';
+                       BYTEA_CopyAdvChar(r, p, plen);
                }
        }
+       else
+       {
+               /*
+                * The specified escape must be only a single character.
+                */
+               BYTEA_NextChar(e, elen);
+               if (elen != 0)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+                                        errmsg("invalid escape string"),
+                                        errhint("Escape string must be empty or one character.")));
 
-       return *text == '\0';
-}
+               e = VARDATA(esc);
+
+               /*
+                * If specified escape is '\', just copy the pattern as-is.
+                */
+               if (*e == '\\')
+               {
+                       memcpy(result, pat, VARSIZE(pat));
+                       PG_RETURN_BYTEA_P(result);
+               }
+
+               /*
+                * Otherwise, convert occurrences of the specified escape
+                * character to '\', and double occurrences of '\' --- unless they
+                * immediately follow an escape character!
+                */
+               afterescape = false;
+               while (plen > 0)
+               {
+                       if (BYTEA_CHAREQ(p, e) && !afterescape)
+                       {
+                               *r++ = '\\';
+                               BYTEA_NextChar(p, plen);
+                               afterescape = true;
+                       }
+                       else if (*p == '\\')
+                       {
+                               *r++ = '\\';
+                               if (!afterescape)
+                                       *r++ = '\\';
+                               BYTEA_NextChar(p, plen);
+                               afterescape = false;
+                       }
+                       else
+                       {
+                               BYTEA_CopyAdvChar(r, p, plen);
+                               afterescape = false;
+                       }
+               }
+       }
 
+       VARATT_SIZEP(result) = r - ((unsigned char *) result);
+
+       PG_RETURN_BYTEA_P(result);
+}
 
 /*
-**     User-level routine.  Returns TRUE or FALSE.
-*/
+ * Same as above, but specifically for bytea (binary) datatype
+ */
 static int
-like(pg_wchar *text, pg_wchar *p)
+MatchBytea(unsigned char *t, int tlen, unsigned char *p, int plen)
 {
-       if (p[0] == '%' && p[1] == '\0')
-               return TRUE;
-       return DoMatch(text, p) == LIKE_TRUE;
-}
+       /* Fast path for match-everything pattern */
+       if ((plen == 1) && (*p == '%'))
+               return LIKE_TRUE;
+
+       while ((tlen > 0) && (plen > 0))
+       {
+               if (*p == '\\')
+               {
+                       /* Next pattern char must match literally, whatever it is */
+                       BYTEA_NextChar(p, plen);
+                       if ((plen <= 0) || !BYTEA_CHAREQ(t, p))
+                               return LIKE_FALSE;
+               }
+               else if (*p == '%')
+               {
+                       /* %% is the same as % according to the SQL standard */
+                       /* Advance past all %'s */
+                       while ((plen > 0) && (*p == '%'))
+                               BYTEA_NextChar(p, plen);
+                       /* Trailing percent matches everything. */
+                       if (plen <= 0)
+                               return LIKE_TRUE;
+
+                       /*
+                        * Otherwise, scan for a text position at which we can match
+                        * the rest of the pattern.
+                        */
+                       while (tlen > 0)
+                       {
+                               /*
+                                * Optimization to prevent most recursion: don't recurse
+                                * unless first pattern char might match this text char.
+                                */
+                               if (BYTEA_CHAREQ(t, p) || (*p == '\\') || (*p == '_'))
+                               {
+                                       int                     matched = MatchBytea(t, tlen, p, plen);
+
+                                       if (matched != LIKE_FALSE)
+                                               return matched; /* TRUE or ABORT */
+                               }
+
+                               BYTEA_NextChar(t, tlen);
+                       }
+
+                       /*
+                        * End of text with no match, so no point in trying later
+                        * places to start matching this pattern.
+                        */
+                       return LIKE_ABORT;
+               }
+               else if ((*p != '_') && !BYTEA_CHAREQ(t, p))
+               {
+                       /*
+                        * Not the single-character wildcard and no explicit match?
+                        * Then time to quit...
+                        */
+                       return LIKE_FALSE;
+               }
+
+               BYTEA_NextChar(t, tlen);
+               BYTEA_NextChar(p, plen);
+       }
+
+       if (tlen > 0)
+               return LIKE_FALSE;              /* end of pattern, but not of text */
+
+       /* End of input string.  Do we have matching pattern remaining? */
+       while ((plen > 0) && (*p == '%'))       /* allow multiple %'s at end of
+                                                                                * pattern */
+               BYTEA_NextChar(p, plen);
+       if (plen <= 0)
+               return LIKE_TRUE;
+
+       /*
+        * End of text with no match, so no point in trying later places to
+        * start matching this pattern.
+        */
+       return LIKE_ABORT;
+}      /* MatchBytea() */