]> granicus.if.org Git - postgresql/commitdiff
Mop-up for commit 85feb77aa09cda9ff3e12cf95c757c499dc25343.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 22 Sep 2017 15:35:12 +0000 (11:35 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 22 Sep 2017 15:35:12 +0000 (11:35 -0400)
Adjust commentary in regc_pg_locale.c to remove mention of the possibility
of not having <wctype.h> functions, since we no longer consider that.

Eliminate duplicate code in wparser_def.c by generalizing the p_iswhat
macro to take a parameter saying what to return for non-ASCII chars
in C locale.  (That's not really a consequence of the
USE_WIDE_UPPER_LOWER-ectomy, but I noticed it while doing that.)

src/backend/regex/regc_pg_locale.c
src/backend/tsearch/wparser_def.c

index b2122e9e8fabc437bac439e2f1fa706890a0b8de..e39ee7ae09fa148de60a278a4e316d9c1dae2672 100644 (file)
  *
  * 2. In the "default" collation (which is supposed to obey LC_CTYPE):
  *
- * 2a. When working in UTF8 encoding, we use the <wctype.h> functions if
- * available.  This assumes that every platform uses Unicode codepoints
- * directly as the wchar_t representation of Unicode.  On some platforms
+ * 2a. When working in UTF8 encoding, we use the <wctype.h> functions.
+ * This assumes that every platform uses Unicode codepoints directly
+ * as the wchar_t representation of Unicode.  On some platforms
  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
  *
- * 2b. In all other encodings, or on machines that lack <wctype.h>, we use
- * the <ctype.h> functions for pg_wchar values up to 255, and punt for values
- * above that.  This is only 100% correct in single-byte encodings such as
- * LATINn.  However, non-Unicode multibyte encodings are mostly Far Eastern
- * character sets for which the properties being tested here aren't very
- * relevant for higher code values anyway.  The difficulty with using the
- * <wctype.h> functions with non-Unicode multibyte encodings is that we can
- * have no certainty that the platform's wchar_t representation matches
- * what we do in pg_wchar conversions.
+ * 2b. In all other encodings, we use the <ctype.h> functions for pg_wchar
+ * values up to 255, and punt for values above that.  This is 100% correct
+ * only in single-byte encodings such as LATINn.  However, non-Unicode
+ * multibyte encodings are mostly Far Eastern character sets for which the
+ * properties being tested here aren't very relevant for higher code values
+ * anyway.  The difficulty with using the <wctype.h> functions with
+ * non-Unicode multibyte encodings is that we can have no certainty that
+ * the platform's wchar_t representation matches what we do in pg_wchar
+ * conversions.
  *
  * 3. Other collations are only supported on platforms that HAVE_LOCALE_T.
  * Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
index c118357336d5023948f87fa094e764c06a10d970..8450e1c08e07791d3a54341b99e39a5a3ec8ba03 100644 (file)
@@ -427,94 +427,45 @@ TParserCopyClose(TParser *prs)
  *     - if locale is C then we use pgwstr instead of wstr.
  */
 
-#define p_iswhat(type)                                                                                                         \
+#define p_iswhat(type, nonascii)                                                                                       \
+                                                                                                                                                       \
 static int                                                                                                                                     \
-p_is##type(TParser *prs) {                                                                                                     \
-       Assert( prs->state );                                                                                                   \
-       if ( prs->usewide )                                                                                                             \
+p_is##type(TParser *prs)                                                                                                       \
+{                                                                                                                                                      \
+       Assert(prs->state);                                                                                                             \
+       if (prs->usewide)                                                                                                               \
        {                                                                                                                                               \
-               if ( prs->pgwstr )                                                                                                      \
+               if (prs->pgwstr)                                                                                                        \
                {                                                                                                                                       \
                        unsigned int c = *(prs->pgwstr + prs->state->poschar);                  \
-                       if ( c > 0x7f )                                                                                                 \
-                               return 0;                                                                                                       \
-                       return is##type( c );                                                                                   \
+                       if (c > 0x7f)                                                                                                   \
+                               return nonascii;                                                                                        \
+                       return is##type(c);                                                                                             \
                }                                                                                                                                       \
-               return isw##type( *( prs->wstr + prs->state->poschar ) );                       \
+               return isw##type(*(prs->wstr + prs->state->poschar));                           \
        }                                                                                                                                               \
-                                                                                                                                                       \
-       return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
-}      \
+       return is##type(*(unsigned char *) (prs->str + prs->state->posbyte));   \
+}                                                                                                                                                      \
                                                                                                                                                        \
 static int                                                                                                                                     \
-p_isnot##type(TParser *prs) {                                                                                          \
+p_isnot##type(TParser *prs)                                                                                                    \
+{                                                                                                                                                      \
        return !p_is##type(prs);                                                                                                \
 }
 
-static int
-p_isalnum(TParser *prs)
-{
-       Assert(prs->state);
-
-       if (prs->usewide)
-       {
-               if (prs->pgwstr)
-               {
-                       unsigned int c = *(prs->pgwstr + prs->state->poschar);
-
-                       /*
-                        * any non-ascii symbol with multibyte encoding with C-locale is
-                        * an alpha character
-                        */
-                       if (c > 0x7f)
-                               return 1;
-
-                       return isalnum(c);
-               }
-
-               return iswalnum(*(prs->wstr + prs->state->poschar));
-       }
-
-       return isalnum(*(unsigned char *) (prs->str + prs->state->posbyte));
-}
-static int
-p_isnotalnum(TParser *prs)
-{
-       return !p_isalnum(prs);
-}
-
-static int
-p_isalpha(TParser *prs)
-{
-       Assert(prs->state);
-
-       if (prs->usewide)
-       {
-               if (prs->pgwstr)
-               {
-                       unsigned int c = *(prs->pgwstr + prs->state->poschar);
-
-                       /*
-                        * any non-ascii symbol with multibyte encoding with C-locale is
-                        * an alpha character
-                        */
-                       if (c > 0x7f)
-                               return 1;
-
-                       return isalpha(c);
-               }
-
-               return iswalpha(*(prs->wstr + prs->state->poschar));
-       }
-
-       return isalpha(*(unsigned char *) (prs->str + prs->state->posbyte));
-}
-
-static int
-p_isnotalpha(TParser *prs)
-{
-       return !p_isalpha(prs);
-}
+/*
+ * In C locale with a multibyte encoding, any non-ASCII symbol is considered
+ * an alpha character, but not a member of other char classes.
+ */
+p_iswhat(alnum, 1)
+p_iswhat(alpha, 1)
+p_iswhat(digit, 0)
+p_iswhat(lower, 0)
+p_iswhat(print, 0)
+p_iswhat(punct, 0)
+p_iswhat(space, 0)
+p_iswhat(upper, 0)
+p_iswhat(xdigit, 0)
 
 /* p_iseq should be used only for ascii symbols */
 
@@ -525,14 +476,6 @@ p_iseq(TParser *prs, char c)
        return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0;
 }
 
-p_iswhat(digit)
-p_iswhat(lower)
-p_iswhat(print)
-p_iswhat(punct)
-p_iswhat(space)
-p_iswhat(upper)
-p_iswhat(xdigit)
-
 static int
 p_isEOF(TParser *prs)
 {