From 524748c6bfee282fc150b3614523ec1a7191e608 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Thu, 12 Mar 2020 19:21:24 -0700
Subject: [PATCH] ICU-20984 StringPiece & ByteSink overloads for char8_t*

---
 icu4c/source/common/ucasemap.cpp              |  6 +-
 icu4c/source/common/unicode/bytestream.h      | 34 +++++++
 icu4c/source/common/unicode/stringpiece.h     | 88 +++++++++++++++++--
 icu4c/source/test/intltest/collationtest.cpp  | 17 ++--
 .../intltest/compactdecimalformattest.cpp     | 61 ++++++-------
 icu4c/source/test/intltest/regextst.cpp       |  5 +-
 icu4c/source/test/intltest/strcase.cpp        | 25 +++---
 icu4c/source/test/intltest/strtest.cpp        | 66 +++++++++++++-
 icu4c/source/test/intltest/strtest.h          |  2 +
 icu4c/source/test/intltest/tstnorm.cpp        | 29 +++---
 icu4c/source/test/intltest/uts46test.cpp      |  2 +-
 11 files changed, 258 insertions(+), 77 deletions(-)
diff --git a/icu4c/source/common/ucasemap.cpp b/icu4c/source/common/ucasemap.cpp
index cc998c993d7..ed72bda828f 100644
--- a/icu4c/source/common/ucasemap.cpp
+++ b/icu4c/source/common/ucasemap.cpp
@@ -687,13 +687,13 @@ void toUpper(uint32_t options,
             if (change) {
                 ByteSinkUtil::appendTwoBytes(upper, sink);
                 if ((data & HAS_EITHER_DIALYTIKA) != 0) {
-                    sink.Append(reinterpret_cast<const char*>(u8"\u0308"), 2);  // restore or add a dialytika
+                    sink.AppendU8(u8"\u0308", 2);  // restore or add a dialytika
                 }
                 if (addTonos) {
-                    sink.Append(reinterpret_cast<const char*>(u8"\u0301"), 2);
+                    sink.AppendU8(u8"\u0301", 2);
                 }
                 while (numYpogegrammeni > 0) {
-                    sink.Append(reinterpret_cast<const char*>(u8"\u0399"), 2);
+                    sink.AppendU8(u8"\u0399", 2);
                     --numYpogegrammeni;
                 }
             }
diff --git a/icu4c/source/common/unicode/bytestream.h b/icu4c/source/common/unicode/bytestream.h
index 0d60492fe23..7fe24062228 100644
--- a/icu4c/source/common/unicode/bytestream.h
+++ b/icu4c/source/common/unicode/bytestream.h
@@ -71,6 +71,40 @@ public:
    */
   virtual void Append(const char* bytes, int32_t n) = 0;
 
+#ifndef U_HIDE_DRAFT_API
+  /**
+   * Appends n bytes to this. Same as Append().
+   * Call AppendU8() with u8"string literals" which are const char * in C++11
+   * but const char8_t * in C++20.
+   * If the compiler does support char8_t as a distinct type,
+   * then an AppendU8() overload for that is defined and will be chosen.
+   *
+   * @param bytes the pointer to the bytes
+   * @param n the number of bytes; must be non-negative
+   * @draft ICU 67
+   */
+  inline void AppendU8(const char* bytes, int32_t n) {
+    Append(bytes, n);
+  }
+
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+  /**
+   * Appends n bytes to this. Same as Append() but for a const char8_t * pointer.
+   * Call AppendU8() with u8"string literals" which are const char * in C++11
+   * but const char8_t * in C++20.
+   * If the compiler does support char8_t as a distinct type,
+   * then this AppendU8() overload for that is defined and will be chosen.
+   *
+   * @param bytes the pointer to the bytes
+   * @param n the number of bytes; must be non-negative
+   * @draft ICU 67
+   */
+  inline void AppendU8(const char8_t* bytes, int32_t n) {
+    Append(reinterpret_cast<const char*>(bytes), n);
+  }
+#endif
+#endif  // U_HIDE_DRAFT_API
+
   /**
    * Returns a writable buffer for appending and writes the buffer's capacity to
    * *result_capacity. Guarantees *result_capacity>=min_capacity.
diff --git a/icu4c/source/common/unicode/stringpiece.h b/icu4c/source/common/unicode/stringpiece.h
index ba2240e6ac0..52c1e9ebd24 100644
--- a/icu4c/source/common/unicode/stringpiece.h
+++ b/icu4c/source/common/unicode/stringpiece.h
@@ -67,19 +67,50 @@ class U_COMMON_API StringPiece : public UMemory {
    * Default constructor, creates an empty StringPiece.
    * @stable ICU 4.2
    */
-  StringPiece() : ptr_(NULL), length_(0) { }
+  StringPiece() : ptr_(nullptr), length_(0) { }
+
   /**
    * Constructs from a NUL-terminated const char * pointer.
    * @param str a NUL-terminated const char * pointer
    * @stable ICU 4.2
    */
   StringPiece(const char* str);
+#ifndef U_HIDE_DRAFT_API
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+  /**
+   * Constructs from a NUL-terminated const char8_t * pointer.
+   * @param str a NUL-terminated const char8_t * pointer
+   * @draft ICU 67
+   */
+  StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
+#endif
+  /**
+   * Constructs an empty StringPiece.
+   * Needed for type disambiguation from multiple other overloads.
+   * @param p nullptr
+   * @draft ICU 67
+   */
+  StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
+#endif  // U_HIDE_DRAFT_API
+
   /**
    * Constructs from a std::string.
    * @stable ICU 4.2
    */
   StringPiece(const std::string& str)
     : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
+#ifndef U_HIDE_DRAFT_API
+#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
+  /**
+   * Constructs from a std::u8string.
+   * @draft ICU 67
+   */
+  StringPiece(const std::u8string& str)
+    : ptr_(reinterpret_cast<const char*>(str.data())),
+      length_(static_cast<int32_t>(str.size())) { }
+#endif
+#endif  // U_HIDE_DRAFT_API
+
 #ifndef U_HIDE_DRAFT_API
   /**
    * Constructs from some other implementation of a string piece class, from any
@@ -88,7 +119,7 @@ class U_COMMON_API StringPiece : public UMemory {
    * \code{.cpp}
    *
    *   struct OtherStringPieceClass {
-   *     const char* data();
+   *     const char* data();  // or const char8_t*
    *     size_t size();
    *   };
    *
@@ -97,16 +128,25 @@ class U_COMMON_API StringPiece : public UMemory {
    * The other string piece class will typically be std::string_view from C++17
    * or absl::string_view from Abseil.
    *
+   * Starting with C++20, data() may also return a const char8_t* pointer,
+   * as from std::u8string_view.
+   *
    * @param str the other string piece
    * @draft ICU 65
    */
   template <typename T,
             typename = typename std::enable_if<
-                std::is_same<decltype(T().data()), const char*>::value &&
+                (std::is_same<decltype(T().data()), const char*>::value
+#if defined(__cpp_char8_t)
+                    || std::is_same<decltype(T().data()), const char8_t*>::value
+#endif
+                ) &&
                 std::is_same<decltype(T().size()), size_t>::value>::type>
   StringPiece(T str)
-      : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) {}
+      : ptr_(reinterpret_cast<const char*>(str.data())),
+        length_(static_cast<int32_t>(str.size())) {}
 #endif  // U_HIDE_DRAFT_API
+
   /**
    * Constructs from a const char * pointer and a specified length.
    * @param offset a const char * pointer (need not be terminated)
@@ -114,6 +154,19 @@ class U_COMMON_API StringPiece : public UMemory {
    * @stable ICU 4.2
    */
   StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
+#ifndef U_HIDE_DRAFT_API
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+  /**
+   * Constructs from a const char8_t * pointer and a specified length.
+   * @param str a const char8_t * pointer (need not be terminated)
+   * @param len the length of the string; must be non-negative
+   * @draft ICU 67
+   */
+  StringPiece(const char8_t* str, int32_t len) :
+      StringPiece(reinterpret_cast<const char*>(str), len) {}
+#endif
+#endif  // U_HIDE_DRAFT_API
+
   /**
    * Substring of another StringPiece.
    * @param x the other StringPiece
@@ -132,7 +185,7 @@ class U_COMMON_API StringPiece : public UMemory {
   StringPiece(const StringPiece& x, int32_t pos, int32_t len);
 
   /**
-   * Returns the string pointer. May be NULL if it is empty.
+   * Returns the string pointer. May be nullptr if it is empty.
    *
    * data() may return a pointer to a buffer with embedded NULs, and the
    * returned buffer may or may not be null terminated.  Therefore it is
@@ -165,7 +218,7 @@ class U_COMMON_API StringPiece : public UMemory {
    * Sets to an empty string.
    * @stable ICU 4.2
    */
-  void clear() { ptr_ = NULL; length_ = 0; }
+  void clear() { ptr_ = nullptr; length_ = 0; }
 
   /**
    * Reset the stringpiece to refer to new data.
@@ -182,6 +235,29 @@ class U_COMMON_API StringPiece : public UMemory {
    */
   void set(const char* str);
 
+#ifndef U_HIDE_DRAFT_API
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+  /**
+   * Resets the stringpiece to refer to new data.
+   * @param xdata pointer the new string data. Need not be NUL-terminated.
+   * @param len the length of the new data
+   * @draft ICU 67
+   */
+  inline void set(const char8_t* xdata, int32_t len) {
+      set(reinterpret_cast<const char*>(xdata), len);
+  }
+
+  /**
+   * Resets the stringpiece to refer to new data.
+   * @param str a pointer to a NUL-terminated string.
+   * @draft ICU 67
+   */
+  inline void set(const char8_t* str) {
+      set(reinterpret_cast<const char*>(str));
+  }
+#endif
+#endif  // U_HIDE_DRAFT_API
+
   /**
    * Removes the first n string units.
    * @param n prefix length, must be non-negative and <=length()
diff --git a/icu4c/source/test/intltest/collationtest.cpp b/icu4c/source/test/intltest/collationtest.cpp
index 9562e4d4aeb..de51eece5c4 100644
--- a/icu4c/source/test/intltest/collationtest.cpp
+++ b/icu4c/source/test/intltest/collationtest.cpp
@@ -22,6 +22,7 @@
 #include "unicode/sortkey.h"
 #include "unicode/std_string.h"
 #include "unicode/strenum.h"
+#include "unicode/stringpiece.h"
 #include "unicode/tblcoll.h"
 #include "unicode/uiter.h"
 #include "unicode/uniset.h"
@@ -293,15 +294,15 @@ void CollationTest::TestIllegalUTF8() {
     }
     coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, errorCode);
 
-    static const char *strings[] = {
+    static const StringPiece strings[] = {
         // string with U+FFFD == illegal byte sequence
-        reinterpret_cast<const char*>(u8"a\uFFFDz"),                    reinterpret_cast<const char*>("a\x80z"),  // trail byte
-        reinterpret_cast<const char*>(u8"a\uFFFD\uFFFDz"),              reinterpret_cast<const char*>("a\xc1\x81z"),  // non-shortest form
-        reinterpret_cast<const char*>(u8"a\uFFFD\uFFFD\uFFFDz"),        reinterpret_cast<const char*>("a\xe0\x82\x83z"),  // non-shortest form
-        reinterpret_cast<const char*>(u8"a\uFFFD\uFFFD\uFFFDz"),        reinterpret_cast<const char*>("a\xed\xa0\x80z"),  // lead surrogate: would be U+D800
-        reinterpret_cast<const char*>(u8"a\uFFFD\uFFFD\uFFFDz"),        reinterpret_cast<const char*>("a\xed\xbf\xbfz"),  // trail surrogate: would be U+DFFF
-        reinterpret_cast<const char*>(u8"a\uFFFD\uFFFD\uFFFD\uFFFDz"),  reinterpret_cast<const char*>("a\xf0\x8f\xbf\xbfz"),  // non-shortest form
-        reinterpret_cast<const char*>(u8"a\uFFFD\uFFFD\uFFFD\uFFFDz"),  reinterpret_cast<const char*>("a\xf4\x90\x80\x80z")  // out of range: would be U+110000
+        u8"a\uFFFDz",                   "a\x80z",  // trail byte
+        u8"a\uFFFD\uFFFDz",             "a\xc1\x81z",  // non-shortest form
+        u8"a\uFFFD\uFFFD\uFFFDz",       "a\xe0\x82\x83z",  // non-shortest form
+        u8"a\uFFFD\uFFFD\uFFFDz",       "a\xed\xa0\x80z",  // lead surrogate: would be U+D800
+        u8"a\uFFFD\uFFFD\uFFFDz",       "a\xed\xbf\xbfz",  // trail surrogate: would be U+DFFF
+        u8"a\uFFFD\uFFFD\uFFFD\uFFFDz", "a\xf0\x8f\xbf\xbfz",  // non-shortest form
+        u8"a\uFFFD\uFFFD\uFFFD\uFFFDz", "a\xf4\x90\x80\x80z"  // out of range: would be U+110000
     };
 
     for(int32_t i = 0; i < UPRV_LENGTHOF(strings); i += 2) {
diff --git a/icu4c/source/test/intltest/compactdecimalformattest.cpp b/icu4c/source/test/intltest/compactdecimalformattest.cpp
index 90afecdb293..1f51f70d010 100644
--- a/icu4c/source/test/intltest/compactdecimalformattest.cpp
+++ b/icu4c/source/test/intltest/compactdecimalformattest.cpp
@@ -23,6 +23,7 @@
 
 typedef struct ExpectedResult {
   double value;
+  // Invariant characters, will be converted to UTF-16 and then unescaped.
   const char *expected;
 } ExpectedResult;
 
@@ -185,38 +186,38 @@ static ExpectedResult kChineseCurrencyTestData[] = {
         {123456789012345.0, "\\u00A5120\\u4E07\\u4EBF"},
 };
 static ExpectedResult kGermanCurrencyTestData[] = {
-        {1.0, reinterpret_cast<const char*>(u8"1\\u00A0\\u20AC")},
-        {12.0, reinterpret_cast<const char*>(u8"12\\u00A0\\u20AC")},
-        {123.0, reinterpret_cast<const char*>(u8"120\\u00A0\\u20AC")},
-        {1234.0, reinterpret_cast<const char*>(u8"1200\\u00A0\\u20AC")},
-        {12345.0, reinterpret_cast<const char*>(u8"12.000\\u00A0\\u20AC")},
-        {123456.0, reinterpret_cast<const char*>(u8"120.000\\u00A0\\u20AC")},
-        {1234567.0, reinterpret_cast<const char*>(u8"1,2\\u00A0Mio.\\u00A0\\u20AC")},
-        {12345678.0, reinterpret_cast<const char*>(u8"12\\u00A0Mio.\\u00A0\\u20AC")},
-        {123456789.0, reinterpret_cast<const char*>(u8"120\\u00A0Mio.\\u00A0\\u20AC")},
-        {1234567890.0, reinterpret_cast<const char*>(u8"1,2\\u00A0Mrd.\\u00A0\\u20AC")},
-        {12345678901.0, reinterpret_cast<const char*>(u8"12\\u00A0Mrd.\\u00A0\\u20AC")},
-        {123456789012.0, reinterpret_cast<const char*>(u8"120\\u00A0Mrd.\\u00A0\\u20AC")},
-        {1234567890123.0, reinterpret_cast<const char*>(u8"1,2\\u00A0Bio.\\u00A0\\u20AC")},
-        {12345678901234.0, reinterpret_cast<const char*>(u8"12\\u00A0Bio.\\u00A0\\u20AC")},
-        {123456789012345.0, reinterpret_cast<const char*>(u8"120\\u00A0Bio.\\u00A0\\u20AC")},
+        {1.0, "1\\u00A0\\u20AC"},
+        {12.0, "12\\u00A0\\u20AC"},
+        {123.0, "120\\u00A0\\u20AC"},
+        {1234.0, "1200\\u00A0\\u20AC"},
+        {12345.0, "12.000\\u00A0\\u20AC"},
+        {123456.0, "120.000\\u00A0\\u20AC"},
+        {1234567.0, "1,2\\u00A0Mio.\\u00A0\\u20AC"},
+        {12345678.0, "12\\u00A0Mio.\\u00A0\\u20AC"},
+        {123456789.0, "120\\u00A0Mio.\\u00A0\\u20AC"},
+        {1234567890.0, "1,2\\u00A0Mrd.\\u00A0\\u20AC"},
+        {12345678901.0, "12\\u00A0Mrd.\\u00A0\\u20AC"},
+        {123456789012.0, "120\\u00A0Mrd.\\u00A0\\u20AC"},
+        {1234567890123.0, "1,2\\u00A0Bio.\\u00A0\\u20AC"},
+        {12345678901234.0, "12\\u00A0Bio.\\u00A0\\u20AC"},
+        {123456789012345.0, "120\\u00A0Bio.\\u00A0\\u20AC"},
 };
 static ExpectedResult kEnglishCurrencyTestData[] = {
-        {1.0, reinterpret_cast<const char*>(u8"$1")},
-        {12.0, reinterpret_cast<const char*>(u8"$12")},
-        {123.0, reinterpret_cast<const char*>(u8"$120")},
-        {1234.0, reinterpret_cast<const char*>(u8"$1.2K")},
-        {12345.0, reinterpret_cast<const char*>(u8"$12K")},
-        {123456.0, reinterpret_cast<const char*>(u8"$120K")},
-        {1234567.0, reinterpret_cast<const char*>(u8"$1.2M")},
-        {12345678.0, reinterpret_cast<const char*>(u8"$12M")},
-        {123456789.0, reinterpret_cast<const char*>(u8"$120M")},
-        {1234567890.0, reinterpret_cast<const char*>(u8"$1.2B")},
-        {12345678901.0, reinterpret_cast<const char*>(u8"$12B")},
-        {123456789012.0, reinterpret_cast<const char*>(u8"$120B")},
-        {1234567890123.0, reinterpret_cast<const char*>(u8"$1.2T")},
-        {12345678901234.0, reinterpret_cast<const char*>(u8"$12T")},
-        {123456789012345.0, reinterpret_cast<const char*>(u8"$120T")},
+        {1.0, "$1"},
+        {12.0, "$12"},
+        {123.0, "$120"},
+        {1234.0, "$1.2K"},
+        {12345.0, "$12K"},
+        {123456.0, "$120K"},
+        {1234567.0, "$1.2M"},
+        {12345678.0, "$12M"},
+        {123456789.0, "$120M"},
+        {1234567890.0, "$1.2B"},
+        {12345678901.0, "$12B"},
+        {123456789012.0, "$120B"},
+        {1234567890123.0, "$1.2T"},
+        {12345678901234.0, "$12T"},
+        {123456789012345.0, "$120T"},
 };
 
 
diff --git a/icu4c/source/test/intltest/regextst.cpp b/icu4c/source/test/intltest/regextst.cpp
index 311c7bc94b9..5f7e36b3ae1 100644
--- a/icu4c/source/test/intltest/regextst.cpp
+++ b/icu4c/source/test/intltest/regextst.cpp
@@ -31,6 +31,7 @@
 
 #include "unicode/localpointer.h"
 #include "unicode/regex.h"
+#include "unicode/stringpiece.h"
 #include "unicode/uchar.h"
 #include "unicode/ucnv.h"
 #include "unicode/uniset.h"
@@ -5838,11 +5839,11 @@ void RegexTest::TestBug12884() {
     REGEX_ASSERT(status == U_REGEX_TIME_OUT);
 
     // UText, wrapping non-UTF-16 text, also takes a different execution path.
-    const char *text8 = reinterpret_cast<const char*>(u8"Â¿QuÃ© es Unicode?  Unicode proporciona un nÃºmero Ãºnico para cada"
+    StringPiece text8(u8"Â¿QuÃ© es Unicode?  Unicode proporciona un nÃºmero Ãºnico para cada"
                           "carÃ¡cter, sin importar la plataforma, sin importar el programa,"
                           "sin importar el idioma.");
     status = U_ZERO_ERROR;
-    LocalUTextPointer ut(utext_openUTF8(NULL, text8, -1, &status));
+    LocalUTextPointer ut(utext_openUTF8(NULL, text8.data(), text8.length(), &status));
     REGEX_CHECK_STATUS;
     m.reset(ut.getAlias());
     m.find(status);
diff --git a/icu4c/source/test/intltest/strcase.cpp b/icu4c/source/test/intltest/strcase.cpp
index dc81fb45132..4093c519262 100644
--- a/icu4c/source/test/intltest/strcase.cpp
+++ b/icu4c/source/test/intltest/strcase.cpp
@@ -1314,7 +1314,8 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
     Edits edits;
 
     int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT,
-                                          reinterpret_cast<const char*>(u8"IstanBul"), 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+                                          reinterpret_cast<const char*>(u8"IstanBul"), 8,
+                                          dest, UPRV_LENGTHOF(dest), &edits, errorCode);
     assertEquals(u"toLower(IstanBul)", UnicodeString(u"Ä±b"),
                  UnicodeString::fromUTF8(StringPiece(dest, length)));
     static const EditChange lowerExpectedChanges[] = {
@@ -1330,7 +1331,8 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
 
     edits.reset();
     length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT,
-                                  reinterpret_cast<const char*>(u8"Î Î±ÏÎ¬ÏÎ±"), 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+                                  reinterpret_cast<const char*>(u8"Î Î±ÏÎ¬ÏÎ±"), 6 * 2,
+                                  dest, UPRV_LENGTHOF(dest), &edits, errorCode);
     assertEquals(u"toUpper(Î Î±ÏÎ¬ÏÎ±)", UnicodeString(u"ÎÎ¤ÎÎ¤Î"),
                  UnicodeString::fromUTF8(StringPiece(dest, length)));
     static const EditChange upperExpectedChanges[] = {
@@ -1370,7 +1372,8 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() {
     // No explicit nor automatic edits.reset(). Edits should be appended.
     length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET |
                                    U_FOLD_CASE_EXCLUDE_SPECIAL_I,
-                               reinterpret_cast<const char*>(u8"IÃtanBul"), 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+                               reinterpret_cast<const char*>(u8"IÃtanBul"), 1 + 2 + 6,
+                               dest, UPRV_LENGTHOF(dest), &edits, errorCode);
     assertEquals(u"foldCase(IÃtanBul)", UnicodeString(u"Ä±ssb"),
                  UnicodeString::fromUTF8(StringPiece(dest, length)));
     static const EditChange foldExpectedChanges[] = {
@@ -1454,44 +1457,44 @@ void StringCaseTest::TestCaseMapUTF8ToString() {
     StringByteSink<std::string> sink(&dest);
 
     // Omit unchanged text.
-    CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, reinterpret_cast<const char*>(u8"IstanBul"), sink, nullptr, errorCode);
+    CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, u8"IstanBul", sink, nullptr, errorCode);
     assertEquals(u"toLower(IstanBul)", UnicodeString(u"Ä±b"), UnicodeString::fromUTF8(dest));
     dest.clear();
-    CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, reinterpret_cast<const char*>(u8"Î Î±ÏÎ¬ÏÎ±"), sink, nullptr, errorCode);
+    CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, u8"Î Î±ÏÎ¬ÏÎ±", sink, nullptr, errorCode);
     assertEquals(u"toUpper(Î Î±ÏÎ¬ÏÎ±)", UnicodeString(u"ÎÎ¤ÎÎ¤Î"),
                  UnicodeString::fromUTF8(dest));
 #if !UCONFIG_NO_BREAK_ITERATION
     dest.clear();
     CaseMap::utf8ToTitle(
         "nl", U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
-        nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), sink, nullptr, errorCode);
+        nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
                  UnicodeString::fromUTF8(dest));
 #endif
     dest.clear();
     CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
-                      reinterpret_cast<const char*>(u8"IÃtanBul"), sink, nullptr, errorCode);
+                      u8"IÃtanBul", sink, nullptr, errorCode);
     assertEquals(u"foldCase(IÃtanBul)", UnicodeString(u"Ä±ssb"),
                  UnicodeString::fromUTF8(dest));
 
     // Return the whole result string.
     dest.clear();
-    CaseMap::utf8ToLower("tr", 0, reinterpret_cast<const char*>(u8"IstanBul"), sink, nullptr, errorCode);
+    CaseMap::utf8ToLower("tr", 0, u8"IstanBul", sink, nullptr, errorCode);
     assertEquals(u"toLower(IstanBul)", UnicodeString(u"Ä±stanbul"),
                  UnicodeString::fromUTF8(dest));
     dest.clear();
-    CaseMap::utf8ToUpper("el", 0, reinterpret_cast<const char*>(u8"Î Î±ÏÎ¬ÏÎ±"), sink, nullptr, errorCode);
+    CaseMap::utf8ToUpper("el", 0, u8"Î Î±ÏÎ¬ÏÎ±", sink, nullptr, errorCode);
     assertEquals(u"toUpper(Î Î±ÏÎ¬ÏÎ±)", UnicodeString(u"Î ÎÎ¤ÎÎ¤Î"),
                  UnicodeString::fromUTF8(dest));
 #if !UCONFIG_NO_BREAK_ITERATION
     dest.clear();
     CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
-                         nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), sink, nullptr, errorCode);
+                         nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"),
                  UnicodeString::fromUTF8(dest));
 #endif
     dest.clear();
-    CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, reinterpret_cast<const char*>(u8"IÃtanBul"), sink, nullptr, errorCode);
+    CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"IÃtanBul", sink, nullptr, errorCode);
     assertEquals(u"foldCase(IÃtanBul)", UnicodeString(u"Ä±sstanbul"),
                  UnicodeString::fromUTF8(dest));
 }
diff --git a/icu4c/source/test/intltest/strtest.cpp b/icu4c/source/test/intltest/strtest.cpp
index 1665a03cdb8..cf00cd4241d 100644
--- a/icu4c/source/test/intltest/strtest.cpp
+++ b/icu4c/source/test/intltest/strtest.cpp
@@ -248,9 +248,11 @@ void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, ch
 #ifdef U_HAVE_STRING_VIEW
     TESTCASE_AUTO(TestStringPieceStringView);
 #endif
+    TESTCASE_AUTO(TestStringPieceU8);
     TESTCASE_AUTO(TestByteSink);
     TESTCASE_AUTO(TestCheckedArrayByteSink);
     TESTCASE_AUTO(TestStringByteSink);
+    TESTCASE_AUTO(TestStringByteSinkAppendU8);
     TESTCASE_AUTO(TestCharString);
     TESTCASE_AUTO(TestCStr);
     TESTCASE_AUTO(Testctou);
@@ -265,7 +267,7 @@ StringTest::TestStringPiece() {
         errln("StringPiece() failed");
     }
     // Construct from NULL const char * pointer.
-    StringPiece null(NULL);
+    StringPiece null((const char *)nullptr);
     if(!null.empty() || null.data()!=NULL || null.length()!=0 || null.size()!=0) {
         errln("StringPiece(NULL) failed");
     }
@@ -395,7 +397,7 @@ StringTest::TestStringPiece() {
 void
 StringTest::TestStringPieceComparisons() {
     StringPiece empty;
-    StringPiece null(NULL);
+    StringPiece null(nullptr);
     StringPiece abc("abc");
     StringPiece abcd("abcdefg", 4);
     StringPiece abx("abx");
@@ -521,6 +523,52 @@ StringTest::TestStringPieceStringView() {
 }
 #endif
 
+void
+StringTest::TestStringPieceU8() {
+    // ICU-20984 "mitigate some C++20 char8_t breakages"
+    // For the following APIs there are overloads for both
+    // const char * and const char8_t *.
+    // A u8"string literal" has one type or the other
+    // depending on C++ version and compiler settings.
+    StringPiece abc(u8"abc");
+    assertEquals("abc.length", 3, abc.length());
+    assertEquals("abc", "\x61\x62\x63", abc.data());
+
+    StringPiece abc3(u8"abcdef", 3);
+    assertEquals("abc3.length", 3, abc3.length());
+    assertEquals("abc3[0]", 0x61, abc3.data()[0]);
+    assertEquals("abc3[1]", 0x62, abc3.data()[1]);
+    assertEquals("abc3[2]", 0x63, abc3.data()[2]);
+
+    StringPiece uvw("q");
+    uvw.set(u8"uvw");
+    assertEquals("uvw.length", 3, uvw.length());
+    assertEquals("uvw", "\x75\x76\x77", uvw.data());
+
+    StringPiece xyz("r");
+    xyz.set(u8"xyzXYZ", 3);
+    assertEquals("xyz.length", 3, xyz.length());
+    assertEquals("xyz[0]", 0x78, xyz.data()[0]);
+    assertEquals("xyz[1]", 0x79, xyz.data()[1]);
+    assertEquals("xyz[2]", 0x7a, xyz.data()[2]);
+
+    StringPiece null(nullptr);
+    assertTrue("null is empty", null.empty());
+    assertTrue("null is null", null.data() == nullptr);
+
+#ifdef __cpp_lib_char8_t
+    std::u8string_view u8sv(u8"sv");  // C++20
+    StringPiece u8svsp(u8sv);
+    assertEquals("u8svsp.length", 2, u8svsp.length());
+    assertEquals("u8svsp", "\x73\x76", u8svsp.data());
+
+    std::u8string u8str(u8"str");  // C++20
+    StringPiece u8strsp(u8str);
+    assertEquals("u8strsp.length", 3, u8strsp.length());
+    assertEquals("u8strsp", "\x73\x74\x72", u8strsp.data());
+#endif  // __cpp_lib_char8_t
+}
+
 // Verify that ByteSink is subclassable and Flush() overridable.
 class SimpleByteSink : public ByteSink {
 public:
@@ -653,6 +701,20 @@ StringTest::TestStringByteSink() {
     }
 }
 
+void
+StringTest::TestStringByteSinkAppendU8() {
+    // ICU-20984 "mitigate some C++20 char8_t breakages"
+    // For the following APIs there are overloads for both
+    // const char * and const char8_t *.
+    // A u8"string literal" has one type or the other
+    // depending on C++ version and compiler settings.
+    std::string result("abc");
+    StringByteSink<std::string> sink(&result);
+    sink.AppendU8("def", 3);
+    sink.AppendU8(u8"ghijkl", 4);
+    assertEquals("abcdefghij", "abcdef\x67\x68\x69\x6a", result.c_str());
+}
+
 #if defined(_MSC_VER)
 #include <vector>
 #endif
diff --git a/icu4c/source/test/intltest/strtest.h b/icu4c/source/test/intltest/strtest.h
index 8359f84823a..2a1b98804f3 100644
--- a/icu4c/source/test/intltest/strtest.h
+++ b/icu4c/source/test/intltest/strtest.h
@@ -49,9 +49,11 @@ private:
 #ifdef U_HAVE_STRING_VIEW
     void TestStringPieceStringView();
 #endif
+    void TestStringPieceU8();
     void TestByteSink();
     void TestCheckedArrayByteSink();
     void TestStringByteSink();
+    void TestStringByteSinkAppendU8();
     void TestSTLCompatibility();
     void TestCharString();
     void TestCStr();
diff --git a/icu4c/source/test/intltest/tstnorm.cpp b/icu4c/source/test/intltest/tstnorm.cpp
index 886df6f15ad..e478872d53e 100644
--- a/icu4c/source/test/intltest/tstnorm.cpp
+++ b/icu4c/source/test/intltest/tstnorm.cpp
@@ -14,6 +14,7 @@
 #include "unicode/errorcode.h"
 #include "unicode/normlzr.h"
 #include "unicode/stringoptions.h"
+#include "unicode/stringpiece.h"
 #include "unicode/uniset.h"
 #include "unicode/usetiter.h"
 #include "unicode/schriter.h"
@@ -1573,15 +1574,15 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
     if(errorCode.errDataIfFailureAndReset("Normalizer2::getNFKCCasefoldInstance() call failed")) {
         return;
     }
-    static const char *const src =
-        reinterpret_cast<const char*>(u8"  AÃA\u0308A\u0308\u00ad\u0323Ã\u0323,\u00ad\u1100\u1161ê°\u11A8ê°\u3133  ");
-    std::string expected = reinterpret_cast<const char*>(u8"  aÃ¤Ã¤áº¡\u0308áº¡\u0308,ê°ê°ê°  ");
+    static const StringPiece src =
+        u8"  AÃA\u0308A\u0308\u00ad\u0323Ã\u0323,\u00ad\u1100\u1161ê°\u11A8ê°\u3133  ";
+    StringPiece expected = u8"  aÃ¤Ã¤áº¡\u0308áº¡\u0308,ê°ê°ê°  ";
     std::string result;
     StringByteSink<std::string> sink(&result, static_cast<int32_t>(expected.length()));
     Edits edits;
     nfkc_cf->normalizeUTF8(0, src, sink, &edits, errorCode);
     assertSuccess("normalizeUTF8 with Edits", errorCode.get());
-    assertEquals("normalizeUTF8 with Edits", expected.c_str(), result.c_str());
+    assertEquals("normalizeUTF8 with Edits", expected.data(), result.c_str());
     static const EditChange expectedChanges[] = {
         { FALSE, 2, 2 },  // 2 spaces
         { TRUE, 1, 1 },  // Aâa
@@ -1607,12 +1608,12 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
     assertTrue("isNormalizedUTF8(normalized)", nfkc_cf->isNormalizedUTF8(result, errorCode));
 
     // Omit unchanged text.
-    expected = reinterpret_cast<const char*>(u8"aÃ¤Ã¤áº¡\u0308áº¡\u0308ê°ê°ê°");
+    expected = u8"aÃ¤Ã¤áº¡\u0308áº¡\u0308ê°ê°ê°";
     result.clear();
     edits.reset();
     nfkc_cf->normalizeUTF8(U_OMIT_UNCHANGED_TEXT, src, sink, &edits, errorCode);
     assertSuccess("normalizeUTF8 omit unchanged", errorCode.get());
-    assertEquals("normalizeUTF8 omit unchanged", expected.c_str(), result.c_str());
+    assertEquals("normalizeUTF8 omit unchanged", expected.data(), result.c_str());
     assertTrue("normalizeUTF8 omit unchanged hasChanges", edits.hasChanges());
     assertEquals("normalizeUTF8 omit unchanged numberOfChanges", 9, edits.numberOfChanges());
     TestUtility::checkEditsIter(*this, u"normalizeUTF8 omit unchanged",
@@ -1623,12 +1624,12 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
     // With filter: The normalization code does not see the "A" substrings.
     UnicodeSet filter(u"[^A]", errorCode);
     FilteredNormalizer2 fn2(*nfkc_cf, filter);
-    expected = reinterpret_cast<const char*>(u8"  AÃ¤A\u0308A\u0323\u0308áº¡\u0308,ê°ê°ê°  ");
+    expected = u8"  AÃ¤A\u0308A\u0323\u0308áº¡\u0308,ê°ê°ê°  ";
     result.clear();
     edits.reset();
     fn2.normalizeUTF8(0, src, sink, &edits, errorCode);
     assertSuccess("filtered normalizeUTF8", errorCode.get());
-    assertEquals("filtered normalizeUTF8", expected.c_str(), result.c_str());
+    assertEquals("filtered normalizeUTF8", expected.data(), result.c_str());
     static const EditChange filteredChanges[] = {
         { FALSE, 3, 3 },  // 2 spaces + A
         { TRUE, 2, 2 },  // ÃâÃ¤
@@ -1655,12 +1656,12 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
     // Omit unchanged text.
     // Note that the result is not normalized because the inner normalizer
     // does not see text across filter spans.
-    expected = reinterpret_cast<const char*>(u8"Ã¤\u0323\u0308áº¡\u0308ê°ê°ê°");
+    expected = u8"Ã¤\u0323\u0308áº¡\u0308ê°ê°ê°";
     result.clear();
     edits.reset();
     fn2.normalizeUTF8(U_OMIT_UNCHANGED_TEXT, src, sink, &edits, errorCode);
     assertSuccess("filtered normalizeUTF8 omit unchanged", errorCode.get());
-    assertEquals("filtered normalizeUTF8 omit unchanged", expected.c_str(), result.c_str());
+    assertEquals("filtered normalizeUTF8 omit unchanged", expected.data(), result.c_str());
     assertTrue("filtered normalizeUTF8 omit unchanged hasChanges", edits.hasChanges());
     assertEquals("filtered normalizeUTF8 omit unchanged numberOfChanges", 7, edits.numberOfChanges());
     TestUtility::checkEditsIter(*this, u"filtered normalizeUTF8 omit unchanged",
@@ -1777,13 +1778,13 @@ BasicNormalizerTest::TestComposeJamoTBase() {
     assertFalse("isNormalized(LV+11A7)", nfkc->isNormalized(s, errorCode));
     assertTrue("isNormalized(normalized)", nfkc->isNormalized(result, errorCode));
 
-    std::string s8(reinterpret_cast<const char*>(u8"\u1100\u1161\u11A7\u1100\u314F\u11A7ê°\u11A7"));
-    std::string expected8(reinterpret_cast<const char*>(u8"ê°\u11A7ê°\u11A7ê°\u11A7"));
+    StringPiece s8(u8"\u1100\u1161\u11A7\u1100\u314F\u11A7ê°\u11A7");
+    StringPiece expected8(u8"ê°\u11A7ê°\u11A7ê°\u11A7");
     std::string result8;
-    StringByteSink<std::string> sink(&result8, static_cast<int32_t>(expected8.length()));
+    StringByteSink<std::string> sink(&result8, expected8.length());
     nfkc->normalizeUTF8(0, s8, sink, nullptr, errorCode);
     assertSuccess("normalizeUTF8(LV+11A7)", errorCode.get());
-    assertEquals("normalizeUTF8(LV+11A7)", expected8.c_str(), result8.c_str());
+    assertEquals("normalizeUTF8(LV+11A7)", expected8.data(), result8.c_str());
     assertFalse("isNormalizedUTF8(LV+11A7)", nfkc->isNormalizedUTF8(s8, errorCode));
     assertTrue("isNormalizedUTF8(normalized)", nfkc->isNormalizedUTF8(result8, errorCode));
 }
diff --git a/icu4c/source/test/intltest/uts46test.cpp b/icu4c/source/test/intltest/uts46test.cpp
index e11fdf2bc7a..b399d2dd724 100644
--- a/icu4c/source/test/intltest/uts46test.cpp
+++ b/icu4c/source/test/intltest/uts46test.cpp
@@ -160,7 +160,7 @@ void UTS46Test::TestAPI() {
     char buffer[100];
     TestCheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
     errorCode=U_ZERO_ERROR;
-    nontrans->labelToUnicodeUTF8(StringPiece(NULL, 5), sink, info, errorCode);
+    nontrans->labelToUnicodeUTF8(StringPiece((const char *)NULL, 5), sink, info, errorCode);
     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || sink.NumberOfBytesWritten()!=0) {
         errln("N.labelToUnicodeUTF8(StringPiece(NULL, 5)) did not set illegal-argument-error ",
               "or did output something - %s",
-- 
2.50.1