granicus.if.org Git - postgresql/blob - src/port/pgstrcasecmp.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * pgstrcasecmp.c
   4  *         Portable SQL-like case-independent comparisons and conversions.
   5  *
   6  * SQL99 specifies Unicode-aware case normalization, which we don't yet
   7  * have the infrastructure for.  Instead we use tolower() to provide a
   8  * locale-aware translation.  However, there are some locales where this
   9  * is not right either (eg, Turkish may do strange things with 'i' and
  10  * 'I').  Our current compromise is to use tolower() for characters with
  11  * the high bit set, and use an ASCII-only downcasing for 7-bit
  12  * characters.
  13  *
  14  * NB: this code should match downcase_truncate_identifier() in scansup.c.
  15  *
  16  *
  17  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  18  *
  19  * $PostgreSQL: pgsql/src/port/pgstrcasecmp.c,v 1.12 2010/01/02 16:58:13 momjian Exp $
  20  *
  21  *-------------------------------------------------------------------------
  22  */
  23 #include "c.h"
  24
  25 #include <ctype.h>
  26
  27
  28 /*
  29  * Case-independent comparison of two null-terminated strings.
  30  */
  31 int
  32 pg_strcasecmp(const char *s1, const char *s2)
  33 {
  34         for (;;)
  35         {
  36                 unsigned char ch1 = (unsigned char) *s1++;
  37                 unsigned char ch2 = (unsigned char) *s2++;
  38
  39                 if (ch1 != ch2)
  40                 {
  41                         if (ch1 >= 'A' && ch1 <= 'Z')
  42                                 ch1 += 'a' - 'A';
  43                         else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
  44                                 ch1 = tolower(ch1);
  45
  46                         if (ch2 >= 'A' && ch2 <= 'Z')
  47                                 ch2 += 'a' - 'A';
  48                         else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
  49                                 ch2 = tolower(ch2);
  50
  51                         if (ch1 != ch2)
  52                                 return (int) ch1 - (int) ch2;
  53                 }
  54                 if (ch1 == 0)
  55                         break;
  56         }
  57         return 0;
  58 }
  59
  60 /*
  61  * Case-independent comparison of two not-necessarily-null-terminated strings.
  62  * At most n bytes will be examined from each string.
  63  */
  64 int
  65 pg_strncasecmp(const char *s1, const char *s2, size_t n)
  66 {
  67         while (n-- > 0)
  68         {
  69                 unsigned char ch1 = (unsigned char) *s1++;
  70                 unsigned char ch2 = (unsigned char) *s2++;
  71
  72                 if (ch1 != ch2)
  73                 {
  74                         if (ch1 >= 'A' && ch1 <= 'Z')
  75                                 ch1 += 'a' - 'A';
  76                         else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
  77                                 ch1 = tolower(ch1);
  78
  79                         if (ch2 >= 'A' && ch2 <= 'Z')
  80                                 ch2 += 'a' - 'A';
  81                         else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
  82                                 ch2 = tolower(ch2);
  83
  84                         if (ch1 != ch2)
  85                                 return (int) ch1 - (int) ch2;
  86                 }
  87                 if (ch1 == 0)
  88                         break;
  89         }
  90         return 0;
  91 }
  92
  93 /*
  94  * Fold a character to upper case.
  95  *
  96  * Unlike some versions of toupper(), this is safe to apply to characters
  97  * that aren't lower case letters.  Note however that the whole thing is
  98  * a bit bogus for multibyte character sets.
  99  */
 100 unsigned char
 101 pg_toupper(unsigned char ch)
 102 {
 103         if (ch >= 'a' && ch <= 'z')
 104                 ch += 'A' - 'a';
 105         else if (IS_HIGHBIT_SET(ch) && islower(ch))
 106                 ch = toupper(ch);
 107         return ch;
 108 }
 109
 110 /*
 111  * Fold a character to lower case.
 112  *
 113  * Unlike some versions of tolower(), this is safe to apply to characters
 114  * that aren't upper case letters.  Note however that the whole thing is
 115  * a bit bogus for multibyte character sets.
 116  */
 117 unsigned char
 118 pg_tolower(unsigned char ch)
 119 {
 120         if (ch >= 'A' && ch <= 'Z')
 121                 ch += 'a' - 'A';
 122         else if (IS_HIGHBIT_SET(ch) && isupper(ch))
 123                 ch = tolower(ch);
 124         return ch;
 125 }