From e9e50b5aa3f95e60b64db4389e10d427a336c546 Mon Sep 17 00:00:00 2001
From: Andy Heninger <andy.heninger@gmail.com>
Date: Tue, 28 Aug 2018 11:15:51 -0700
Subject: [PATCH] =?utf8?q?ICU-13844=20Regex=20unicode=20properties,=20add?=
 =?utf8?q?=20IsXXX=20forms=20for=20Java=20compatib=E2=80=A6=20(#61)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

* ICU-13844 Regex unicode properties, add IsXXX forms for Java compatibility.

* ICU-13844 Regex Property Review fixes from Markus.
---
 icu4c/source/i18n/regexcmp.cpp          | 361 ++++++++++++------------
 icu4c/source/test/testdata/regextst.txt |  56 ++++
 2 files changed, 237 insertions(+), 180 deletions(-)
diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp
index ca1008c0d06..a674d22c929 100644
--- a/icu4c/source/i18n/regexcmp.cpp
+++ b/icu4c/source/i18n/regexcmp.cpp
@@ -28,6 +28,7 @@
 #include "patternprops.h"
 #include "putilimp.h"
 #include "cmemory.h"
+#include "cstr.h"
 #include "cstring.h"
 #include "uvectr32.h"
 #include "uvectr64.h"
@@ -3892,7 +3893,7 @@ void RegexCompile::stripNOPs() {
 //
 //------------------------------------------------------------------------------
 void RegexCompile::error(UErrorCode e) {
-    if (U_SUCCESS(*fStatus)) {
+    if (U_SUCCESS(*fStatus) || e == U_MEMORY_ALLOCATION_ERROR) {
         *fStatus = e;
         // Hmm. fParseErr (UParseError) line & offset fields are int32_t in public
         // API (see common/unicode/parseerr.h), while fLineNum and fCharNum are
@@ -4370,209 +4371,209 @@ static inline void addIdentifierIgnorable(UnicodeSet *set, UErrorCode& ec) {
 //     Includes trying the Java "properties" that aren't supported as
 //     normal ICU UnicodeSet properties
 //
-static const UChar posSetPrefix[] = {0x5b, 0x5c, 0x70, 0x7b, 0}; // "[\p{"
-static const UChar negSetPrefix[] = {0x5b, 0x5c, 0x50, 0x7b, 0}; // "[\P{"
 UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UBool negated) {
-    UnicodeString   setExpr;
-    UnicodeSet      *set;
-    uint32_t        usetFlags = 0;
 
     if (U_FAILURE(*fStatus)) {
-        return NULL;
+        return nullptr;
     }
+    LocalPointer<UnicodeSet> set;
+    UErrorCode status = U_ZERO_ERROR;
 
-    //
-    //  First try the property as we received it
-    //
-    if (negated) {
-        setExpr.append(negSetPrefix, -1);
-    } else {
-        setExpr.append(posSetPrefix, -1);
-    }
-    setExpr.append(propName);
-    setExpr.append(chRBrace);
-    setExpr.append(chRBracket);
-    if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
-        usetFlags |= USET_CASE_INSENSITIVE;
-    }
-    set = new UnicodeSet(setExpr, usetFlags, NULL, *fStatus);
-    if (U_SUCCESS(*fStatus)) {
-       return set;
-    }
-    delete set;
-    set = NULL;
-
-    //
-    //  The property as it was didn't work.
-
-    //  Do [:word:]. It is not recognized as a property by UnicodeSet.  "word" not standard POSIX
-    //     or standard Java, but many other regular expression packages do recognize it.
-
-    if (propName.caseCompare(UNICODE_STRING_SIMPLE("word"), 0) == 0) {
-        *fStatus = U_ZERO_ERROR;
-        set = new UnicodeSet(*(fRXPat->fStaticSets[URX_ISWORD_SET]));
-        if (set == NULL) {
-            *fStatus = U_MEMORY_ALLOCATION_ERROR;
-            return set;
+    do {      // non-loop, exists to allow breaks from the block.
+        //
+        //  First try the property as we received it
+        //
+        UnicodeString   setExpr;
+        uint32_t        usetFlags = 0;
+        setExpr.append(u"[\\p{", -1);
+        setExpr.append(propName);
+        setExpr.append(u"}]", -1);
+        if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
+            usetFlags |= USET_CASE_INSENSITIVE;
         }
-        if (negated) {
-            set->complement();
+        set.adoptInsteadAndCheckErrorCode(new UnicodeSet(setExpr, usetFlags, NULL, status), status);
+        if (U_SUCCESS(status) || status == U_MEMORY_ALLOCATION_ERROR) {
+            break;
         }
-        return set;
-    }
 
+        //
+        //  The incoming property wasn't directly recognized by ICU.
 
-    //    Do Java fixes -
-    //       InGreek -> InGreek or Coptic, that being the official Unicode name for that block.
-    //       InCombiningMarksforSymbols -> InCombiningDiacriticalMarksforSymbols.
-    //
-    //       Note on Spaces:  either "InCombiningMarksForSymbols" or "InCombining Marks for Symbols"
-    //                        is accepted by Java.  The property part of the name is compared
-    //                        case-insenstively.  The spaces must be exactly as shown, either
-    //                        all there, or all omitted, with exactly one at each position
-    //                        if they are present.  From checking against JDK 1.6
-    //
-    //       This code should be removed when ICU properties support the Java  compatibility names
-    //          (ICU 4.0?)
-    //
-    UnicodeString mPropName = propName;
-    if (mPropName.caseCompare(UNICODE_STRING_SIMPLE("InGreek"), 0) == 0) {
-        mPropName = UNICODE_STRING_SIMPLE("InGreek and Coptic");
-    }
-    if (mPropName.caseCompare(UNICODE_STRING_SIMPLE("InCombining Marks for Symbols"), 0) == 0 ||
-        mPropName.caseCompare(UNICODE_STRING_SIMPLE("InCombiningMarksforSymbols"), 0) == 0) {
-        mPropName = UNICODE_STRING_SIMPLE("InCombining Diacritical Marks for Symbols");
-    }
-    else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) {
-        mPropName = UNICODE_STRING_SIMPLE("javaValidCodePoint");
-    }
+        //  Check [:word:] and [:all:]. These are not recognized as a properties by ICU UnicodeSet.
+        //     Java accepts 'word' with mixed case.
+        //     Java accepts 'all' only in all lower case.
 
-    //    See if the property looks like a Java "InBlockName", which
-    //    we will recast as "Block=BlockName"
-    //
-    if (mPropName.startsWith(u"In", 2) && propName.length()>=3) {
-        setExpr.truncate(4);   // Leaves "[\p{", or "[\P{"
-        setExpr.append(u"Block=", -1);
-        setExpr.append(UnicodeString(mPropName, 2));  // Property with the leading "In" removed.
-        setExpr.append(chRBrace);
-        setExpr.append(chRBracket);
-        *fStatus = U_ZERO_ERROR;
-        set = new UnicodeSet(setExpr, usetFlags, NULL, *fStatus);
-        if (U_SUCCESS(*fStatus)) {
-            return set;
+        status = U_ZERO_ERROR;
+        if (propName.caseCompare(u"word", -1, 0) == 0) {
+            set.adoptInsteadAndCheckErrorCode(new UnicodeSet(*(fRXPat->fStaticSets[URX_ISWORD_SET])), status);
+            break;
+        }
+        if (propName.compare(u"all", -1) == 0) {
+            set.adoptInsteadAndCheckErrorCode(new UnicodeSet(0, 0x10ffff), status);
+            break;
         }
-        delete set;
-        set = NULL;
-    }
 
-    if (propName.startsWith(UNICODE_STRING_SIMPLE("java")) ||
-        propName.compare(UNICODE_STRING_SIMPLE("all")) == 0)
-    {
-        UErrorCode localStatus = U_ZERO_ERROR;
-        //setExpr.remove();
-        set = new UnicodeSet();
-        //
-        //  Try the various Java specific properties.
-        //   These all begin with "java"
+
+        //    Do Java InBlock expressions
         //
-        if (mPropName.compare(UNICODE_STRING_SIMPLE("javaDefined")) == 0) {
-            addCategory(set, U_GC_CN_MASK, localStatus);
-            set->complement();
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaDigit")) == 0) {
-            addCategory(set, U_GC_ND_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaIdentifierIgnorable")) == 0) {
-            addIdentifierIgnorable(set, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaISOControl")) == 0) {
-            set->add(0, 0x1F).add(0x7F, 0x9F);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaJavaIdentifierPart")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-            addCategory(set, U_GC_SC_MASK, localStatus);
-            addCategory(set, U_GC_PC_MASK, localStatus);
-            addCategory(set, U_GC_ND_MASK, localStatus);
-            addCategory(set, U_GC_NL_MASK, localStatus);
-            addCategory(set, U_GC_MC_MASK, localStatus);
-            addCategory(set, U_GC_MN_MASK, localStatus);
-            addIdentifierIgnorable(set, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaJavaIdentifierStart")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-            addCategory(set, U_GC_NL_MASK, localStatus);
-            addCategory(set, U_GC_SC_MASK, localStatus);
-            addCategory(set, U_GC_PC_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLetter")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLetterOrDigit")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-            addCategory(set, U_GC_ND_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLowerCase")) == 0) {
-            addCategory(set, U_GC_LL_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaMirrored")) == 0) {
-            set->applyIntPropertyValue(UCHAR_BIDI_MIRRORED, 1, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaSpaceChar")) == 0) {
-            addCategory(set, U_GC_Z_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaSupplementaryCodePoint")) == 0) {
-            set->add(0x10000, UnicodeSet::MAX_VALUE);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaTitleCase")) == 0) {
-            addCategory(set, U_GC_LT_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUnicodeIdentifierStart")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-            addCategory(set, U_GC_NL_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUnicodeIdentifierPart")) == 0) {
-            addCategory(set, U_GC_L_MASK, localStatus);
-            addCategory(set, U_GC_PC_MASK, localStatus);
-            addCategory(set, U_GC_ND_MASK, localStatus);
-            addCategory(set, U_GC_NL_MASK, localStatus);
-            addCategory(set, U_GC_MC_MASK, localStatus);
-            addCategory(set, U_GC_MN_MASK, localStatus);
-            addIdentifierIgnorable(set, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUpperCase")) == 0) {
-            addCategory(set, U_GC_LU_MASK, localStatus);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaValidCodePoint")) == 0) {
-            set->add(0, UnicodeSet::MAX_VALUE);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaWhitespace")) == 0) {
-            addCategory(set, U_GC_Z_MASK, localStatus);
-            set->removeAll(UnicodeSet().add(0xa0).add(0x2007).add(0x202f));
-            set->add(9, 0x0d).add(0x1c, 0x1f);
-        }
-        else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) {
-            set->add(0, UnicodeSet::MAX_VALUE);
+        UnicodeString mPropName = propName;
+        if (mPropName.startsWith(u"In", 2) && mPropName.length() >= 3) {
+            status = U_ZERO_ERROR;
+            set.adoptInsteadAndCheckErrorCode(new UnicodeSet(), status);
+            if (U_FAILURE(status)) {
+                break;
+            }
+            UnicodeString blockName(mPropName, 2);  // Property with the leading "In" removed.
+            set->applyPropertyAlias(UnicodeString(u"Block"), blockName, status);
+            break;
         }
 
-        if (U_SUCCESS(localStatus) && !set->isEmpty()) {
-            *fStatus = U_ZERO_ERROR;
-            if (usetFlags & USET_CASE_INSENSITIVE) {
+        //  Check for the Java form "IsBooleanPropertyValue", which we will recast
+        //  as "BooleanPropertyValue". The property value can be either a
+        //  a General Category or a Script Name.
+
+        if (propName.startsWith(u"Is", 2) && propName.length()>=3) {
+            mPropName.remove(0, 2);      // Strip the "Is"
+            if (mPropName.indexOf(u'=') >= 0) {
+                // Reject any "Is..." property expression containing an '=', that is,
+                // any non-binary property expression.
+                status = U_REGEX_PROPERTY_SYNTAX;
+                break;
+            }
+
+            if (mPropName.caseCompare(u"assigned", -1, 0) == 0) {
+                mPropName.setTo(u"unassigned", -1);
+                negated = !negated;
+            } else if (mPropName.caseCompare(u"TitleCase", -1, 0) == 0) {
+                mPropName.setTo(u"Titlecase_Letter", -1);
+            }
+
+            mPropName.insert(0, u"[\\p{", -1);
+            mPropName.append(u"}]", -1);
+            set.adoptInsteadAndCheckErrorCode(new UnicodeSet(mPropName, *fStatus), status);
+
+            if (U_SUCCESS(status) && !set->isEmpty() && (usetFlags & USET_CASE_INSENSITIVE)) {
                 set->closeOver(USET_CASE_INSENSITIVE);
             }
-            if (negated) {
+            break;
+
+        }
+
+        if (propName.startsWith(u"java", -1)) {
+            status = U_ZERO_ERROR;
+            set.adoptInsteadAndCheckErrorCode(new UnicodeSet(), status);
+            if (U_FAILURE(status)) {
+                break;
+            }
+            //
+            //  Try the various Java specific properties.
+            //   These all begin with "java"
+            //
+            if (propName.compare(u"javaDefined", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_CN_MASK, status);
                 set->complement();
             }
-            return set;
+            else if (propName.compare(u"javaDigit", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_ND_MASK, status);
+            }
+            else if (propName.compare(u"javaIdentifierIgnorable", -1) == 0) {
+                addIdentifierIgnorable(set.getAlias(), status);
+            }
+            else if (propName.compare(u"javaISOControl", -1) == 0) {
+                set->add(0, 0x1F).add(0x7F, 0x9F);
+            }
+            else if (propName.compare(u"javaJavaIdentifierPart", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+                addCategory(set.getAlias(), U_GC_SC_MASK, status);
+                addCategory(set.getAlias(), U_GC_PC_MASK, status);
+                addCategory(set.getAlias(), U_GC_ND_MASK, status);
+                addCategory(set.getAlias(), U_GC_NL_MASK, status);
+                addCategory(set.getAlias(), U_GC_MC_MASK, status);
+                addCategory(set.getAlias(), U_GC_MN_MASK, status);
+                addIdentifierIgnorable(set.getAlias(), status);
+            }
+            else if (propName.compare(u"javaJavaIdentifierStart", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+                addCategory(set.getAlias(), U_GC_NL_MASK, status);
+                addCategory(set.getAlias(), U_GC_SC_MASK, status);
+                addCategory(set.getAlias(), U_GC_PC_MASK, status);
+            }
+            else if (propName.compare(u"javaLetter", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+            }
+            else if (propName.compare(u"javaLetterOrDigit", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+                addCategory(set.getAlias(), U_GC_ND_MASK, status);
+            }
+            else if (propName.compare(u"javaLowerCase", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_LL_MASK, status);
+            }
+            else if (propName.compare(u"javaMirrored", -1) == 0) {
+                set->applyIntPropertyValue(UCHAR_BIDI_MIRRORED, 1, status);
+            }
+            else if (propName.compare(u"javaSpaceChar", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_Z_MASK, status);
+            }
+            else if (propName.compare(u"javaSupplementaryCodePoint", -1) == 0) {
+                set->add(0x10000, UnicodeSet::MAX_VALUE);
+            }
+            else if (propName.compare(u"javaTitleCase", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_LT_MASK, status);
+            }
+            else if (propName.compare(u"javaUnicodeIdentifierStart", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+                addCategory(set.getAlias(), U_GC_NL_MASK, status);
+            }
+            else if (propName.compare(u"javaUnicodeIdentifierPart", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_L_MASK, status);
+                addCategory(set.getAlias(), U_GC_PC_MASK, status);
+                addCategory(set.getAlias(), U_GC_ND_MASK, status);
+                addCategory(set.getAlias(), U_GC_NL_MASK, status);
+                addCategory(set.getAlias(), U_GC_MC_MASK, status);
+                addCategory(set.getAlias(), U_GC_MN_MASK, status);
+                addIdentifierIgnorable(set.getAlias(), status);
+            }
+            else if (propName.compare(u"javaUpperCase", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_LU_MASK, status);
+            }
+            else if (propName.compare(u"javaValidCodePoint", -1) == 0) {
+                set->add(0, UnicodeSet::MAX_VALUE);
+            }
+            else if (propName.compare(u"javaWhitespace", -1) == 0) {
+                addCategory(set.getAlias(), U_GC_Z_MASK, status);
+                set->removeAll(UnicodeSet().add(0xa0).add(0x2007).add(0x202f));
+                set->add(9, 0x0d).add(0x1c, 0x1f);
+            } else {
+                status = U_REGEX_PROPERTY_SYNTAX;
+            }
+
+            if (U_SUCCESS(status) && !set->isEmpty() && (usetFlags & USET_CASE_INSENSITIVE)) {
+                set->closeOver(USET_CASE_INSENSITIVE);
+            }
+            break;
+        }
+
+        // Unrecognized property. ICU didn't like it as it was, and none of the Java compatibility
+        // extensions matched it.
+        status = U_REGEX_PROPERTY_SYNTAX;
+    } while (false);   // End of do loop block. Code above breaks out of the block on success or hard failure.
+
+    if (U_SUCCESS(status)) {
+        U_ASSERT(set.isValid());
+        if (negated) {
+            set->complement();
         }
-        delete set;
-        set = NULL;
+        return set.orphan();
+    } else {
+        if (status == U_ILLEGAL_ARGUMENT_ERROR) {
+            status = U_REGEX_PROPERTY_SYNTAX;
+        }
+        error(status);
+        return nullptr;
     }
-    error(*fStatus);
-    return NULL;
 }
 
 
-
 //
 //  SetEval   Part of the evaluation of [set expressions].
 //            Perform any pending (stacked) operations with precedence
diff --git a/icu4c/source/test/testdata/regextst.txt b/icu4c/source/test/testdata/regextst.txt
index 977a7b34590..ee7bdf2227e 100644
--- a/icu4c/source/test/testdata/regextst.txt
+++ b/icu4c/source/test/testdata/regextst.txt
@@ -1363,6 +1363,62 @@
 "abc"                  Z3       "abc abc <0>abc</0> xyz"
 "abc"                  z4       "abc abc abc xyz"
 
+# Bug #13844  Verify that non-standard Java property names are recognized.
+"[\p{IsAlphabetic}]"            " <0>A</0>"
+"[\P{IsAlphabetic}]"            "A<0> </0>"
+"[\p{IsIdeographic}]"           "A<0>ã</0>"
+"[\P{IsIdeographic}]"           "ã<0>A</0>"
+"[\p{IsLetter}]"                " <0>A</0>"
+"[\P{IsLetter}]"                "A<0> </0>"
+"[\p{Letter}]"                  " <0>A</0>"
+"[\p{IsLowercase}]"             "A<0>a</0>"
+"[\P{IsLowercase}]"             "a<0>A</0>"
+"[\p{IsUppercase}]"             "a<0>A</0>"
+"[\P{IsUppercase}]"             "A<0>a</0>"
+"[\p{IsTitlecase}]"             "D<0>Ç²</0>"
+"[\P{IsTitlecase}]"             "Ç²<0>D</0>"
+"[\p{IsPunctuation}]"           " <0>&</0>"
+"[\P{IsPunctuation}]"           "&<0> </0>"
+"[\p{IsControl}]"               " <0>\x{82}</0>"
+"[\P{IsControl}]"               "\x{82}<0> </0>"
+"[\p{IsWhite_Space}]"           "x<0> </0>"
+"[\P{IsWhite_Space}]"           " <0>x</0>"
+"[\p{IsDigit}]"                 " <0>4</0>"
+"[\P{IsDigit}]"                 "4<0> </0>"
+"[\p{IsHex_Digit}]"             " <0>F</0>"
+"[\P{IsHex_Digit}]"             "F<0> </0>"
+"[\p{IsJoin_Control}]"          " <0>\x{200d}</0>"
+"[\P{IsJoin_Control}]"          "\x{200d}<0> </0>"
+"[\p{IsNoncharacter_Code_Point}]"     "A<0>\x{5fffe}</0>"
+"[\p{IsAssigned}]"              "\x{10ffff}<0>a</0>"
+"[\P{IsAssigned}]"              "a<0>\x{10ffff}</0>"
+
+"[\p{InBasic Latin}]"           "ã<0>A</0>"
+"[\p{InBasicLatin}]"            "ã<0>A</0>"
+"[\p{InBasic-Latin}]"           "ã<0>A</0>"    # ICU accepts '-'; Java does not.
+"[\p{InBasic_Latin}]"           "ã<0>A</0>"
+"[\p{Inbasiclatin}]"            "ã<0>A</0>"
+"[\p{inbasiclatin}]"       E    "ã<0>A</0>"    # "In" must be cased as shown. Property name part is case insensitive.
+"[\p{InCombining_Marks_for_Symbols}]"    "a<0>\x{20DD}</0>"    # COMBINING ENCLOSING CIRCLE
+
+"[\p{all}]*"                    "<0>\x{00}abc\x{10ffff}</0>"
+"[\p{javaBadProperty}]"    E    "whatever"
+"[\p{IsBadProperty}]"      E    "whatever"
+"[\p{InBadBlock}]"         E    "whatever"
+"[\p{In}]"                 E    "whatever"
+"[\p{Is}]"                 E    "whatever"
+"[\p{java}]"                    "x<0>ê¦</0>"      # Note: "java" is a valid script code.
+
+"[\p{javaLowerCase}]+"             "A<0>a</0>"
+"[\p{javaLowerCase}]+"     i       "<0>Aa</0>"
+"[\P{javaLowerCase}]+"             "<0>A</0>a"
+"[\P{javaLowerCase}]+"     i       "Aa"          # No Match because case fold of the set happens first, then negation.
+                                                 #  JDK is not case insensitive w named properties, even though
+                                                 #  the insensitive match flag is set. A JDK bug?
+
+"[a-z]+"                   i       "<0>Aa</0>"   # Matches JDK behavior.
+"[^a-z]+"                  i       "Aa"          # (no match) which is JDK behavior. Case fold first, then negation.
+
 #  Random debugging, Temporary
 #
 
-- 
2.50.1