*/
+#include "unicode/bytestream.h"
#include "unicode/locid.h"
#include "unicode/strenum.h"
+#include "unicode/stringpiece.h"
#include "unicode/uloc.h"
#include "putilimp.h"
#include "mutex.h"
locale_set_default_internal(localeID, status);
}
+Locale U_EXPORT2
+Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
+{
+ Locale result(Locale::eBOGUS);
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ // TODO: Remove the need for a const char* to a NUL terminated buffer.
+ const CharString tag_nul(tag, status);
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ // If a BCP-47 language tag is passed as the language parameter to the
+ // normal Locale constructor, it will actually fall back to invoking
+ // uloc_forLanguageTag() to parse it if it somehow is able to detect that
+ // the string actually is BCP-47. This works well for things like strings
+ // using BCP-47 extensions, but it does not at all work for things like
+ // BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
+ // interpret as ICU locale IDs and because of that won't trigger the BCP-47
+ // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
+ // and then Locale::init(), instead of just calling the normal constructor.
+
+ // All simple language tags will have the exact same length as ICU locale
+ // ID strings as they have as BCP-47 strings (like "en_US" for "en-US").
+ CharString localeID;
+ int32_t resultCapacity = tag.size();
+
+ char* buffer;
+ int32_t parsedLength, reslen;
+
+ for (;;) {
+ buffer = localeID.getAppendBuffer(
+ /*minCapacity=*/resultCapacity,
+ /*desiredCapacityHint=*/resultCapacity,
+ resultCapacity,
+ status);
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ reslen = uloc_forLanguageTag(
+ tag_nul.data(),
+ buffer,
+ resultCapacity,
+ &parsedLength,
+ &status);
+
+ if (status != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+
+ // For all BCP-47 language tags that use extensions, the corresponding
+ // ICU locale ID will be longer but uloc_forLanguageTag() does compute
+ // the exact length needed so this memory reallocation will be done at
+ // most once.
+ resultCapacity = reslen;
+ status = U_ZERO_ERROR;
+ }
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ if (parsedLength != tag.size()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return result;
+ }
+
+ localeID.append(buffer, reslen, status);
+ if (status == U_STRING_NOT_TERMINATED_WARNING) {
+ status = U_ZERO_ERROR; // Terminators provided by CharString.
+ }
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ result.init(localeID.data(), /*canonicalize=*/FALSE);
+ if (result.isBogus()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return result;
+}
+
+void
+Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (fIsBogus) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ // All simple language tags will have the exact same length as BCP-47
+ // strings as they have as ICU locale IDs (like "en-US" for "en_US").
+ LocalMemory<char> scratch;
+ int32_t scratch_capacity = uprv_strlen(fullName);
+
+ if (scratch_capacity == 0) {
+ scratch_capacity = 3; // "und"
+ }
+
+ char* buffer;
+ int32_t result_capacity, reslen;
+
+ for (;;) {
+ if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ buffer = sink.GetAppendBuffer(
+ /*min_capacity=*/scratch_capacity,
+ /*desired_capacity_hint=*/scratch_capacity,
+ scratch.getAlias(),
+ scratch_capacity,
+ &result_capacity);
+
+ reslen = uloc_toLanguageTag(
+ fullName,
+ buffer,
+ result_capacity,
+ /*strict=*/FALSE,
+ &status);
+
+ if (status != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+
+ // For some very few edge cases a language tag will be longer as a
+ // BCP-47 string than it is as an ICU locale ID. Most notoriously "C"
+ // expands to the BCP-47 tag "en-US-u-va-posix", 16 times longer, and
+ // it'll take several calls to uloc_toLanguageTag() to figure that out.
+ // https://unicode-org.atlassian.net/browse/ICU-20132
+ scratch_capacity = reslen;
+ status = U_ZERO_ERROR;
+ }
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ sink.Append(buffer, reslen);
+ if (status == U_STRING_NOT_TERMINATED_WARNING) {
+ status = U_ZERO_ERROR; // Terminators not used.
+ }
+}
+
Locale U_EXPORT2
Locale::createFromName (const char *name)
{
#ifndef LOCID_H
#define LOCID_H
+#include "unicode/bytestream.h"
+#include "unicode/stringpiece.h"
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/putil.h"
UErrorCode& success);
#endif /* U_HIDE_SYSTEM_API */
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Returns a Locale for the specified BCP47 language tag string.
+ * If the specified language tag contains any ill-formed subtags,
+ * the first such subtag and all following subtags are ignored.
+ * <p>
+ * This implements the 'Language-Tag' production of BCP47, and so
+ * supports grandfathered (regular and irregular) as well as private
+ * use language tags. Private use tags are represented as 'x-whatever',
+ * and grandfathered tags are converted to their canonical replacements
+ * where they exist. Note that a few grandfathered tags have no modern
+ * replacement, these will be converted using the fallback described in
+ * the first paragraph, so some information might be lost.
+ * @param tag the input BCP47 language tag.
+ * @param status error information if creating the Locale failed.
+ * @return the Locale for the specified BCP47 language tag.
+ * @draft ICU 63
+ */
+ static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status);
+
+ /**
+ * Returns a well-formed language tag for this Locale.
+ * <p>
+ * <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
+ * requirement will be silently omitted from the result.
+ *
+ * If this function fails, partial output may have been written to the sink.
+ *
+ * @param sink the output sink receiving the BCP47 language
+ * tag for this Locale.
+ * @param status error information if creating the language tag failed.
+ * @draft ICU 63
+ */
+ void toLanguageTag(ByteSink& sink, UErrorCode& status) const;
+
+ /**
+ * Returns a well-formed language tag for this Locale.
+ * <p>
+ * <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
+ * requirement will be silently omitted from the result.
+ *
+ * @param status error information if creating the language tag failed.
+ * @return the BCP47 language tag for this Locale.
+ * @draft ICU 63
+ */
+ template<typename StringClass>
+ inline StringClass toLanguageTag(UErrorCode& status) const;
+#endif // U_HIDE_DRAFT_API
+
/**
* Creates a locale which has had minimal canonicalization
* as per uloc_getName().
return !operator==(other);
}
+#ifndef U_HIDE_DRAFT_API
+template<typename StringClass> inline StringClass
+Locale::toLanguageTag(UErrorCode& status) const
+{
+ StringClass result;
+ StringByteSink<StringClass> sink(&result);
+ toLanguageTag(sink, status);
+ return result;
+}
+#endif // U_HIDE_DRAFT_API
+
inline const char *
Locale::getCountry() const
{
#include "unicode/brkiter.h"
#include "unicode/coll.h"
#include "unicode/ustring.h"
+#include "unicode/std_string.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
TESTCASE_AUTO(TestIsRightToLeft);
TESTCASE_AUTO(TestBug13277);
TESTCASE_AUTO(TestBug13554);
+ TESTCASE_AUTO(TestForLanguageTag);
+ TESTCASE_AUTO(TestToLanguageTag);
TESTCASE_AUTO_END;
}
}
}
+void LocaleTest::TestForLanguageTag() {
+ IcuTestErrorCode status(*this, "TestForLanguageTag()");
+
+ static const char tag_en[] = "en-US";
+ static const char tag_oed[] = "en-GB-oed";
+ static const char tag_af[] = "af-t-ar-i0-handwrit-u-ca-coptic-x-foo";
+ static const char tag_ill[] = "!";
+ static const char tag_no_nul[] = { 'e', 'n', '-', 'G', 'B' };
+
+ static const Locale loc_en("en_US");
+ static const Locale loc_oed("en_GB@x=oed");
+ static const Locale loc_af("af@calendar=coptic;t=ar-i0-handwrit;x=foo");
+ static const Locale loc_null("");
+ static const Locale loc_gb("en_GB");
+
+ Locale result_en = Locale::forLanguageTag(tag_en, status);
+ status.errIfFailureAndReset("\"%s\"", tag_en);
+ assertEquals(tag_en, loc_en.getName(), result_en.getName());
+
+ Locale result_oed = Locale::forLanguageTag(tag_oed, status);
+ status.errIfFailureAndReset("\"%s\"", tag_oed);
+ assertEquals(tag_oed, loc_oed.getName(), result_oed.getName());
+
+ Locale result_af = Locale::forLanguageTag(tag_af, status);
+ status.errIfFailureAndReset("\"%s\"", tag_af);
+ assertEquals(tag_af, loc_af.getName(), result_af.getName());
+
+ Locale result_ill = Locale::forLanguageTag(tag_ill, status);
+ assertEquals(tag_ill, U_ILLEGAL_ARGUMENT_ERROR, status.reset());
+ assertTrue(result_ill.getName(), result_ill.isBogus());
+
+ Locale result_null = Locale::forLanguageTag(nullptr, status);
+ status.errIfFailureAndReset("nullptr");
+ assertEquals("nullptr", loc_null.getName(), result_null.getName());
+
+ StringPiece sp_substr(tag_oed, 5); // "en-GB", no NUL.
+ Locale result_substr = Locale::forLanguageTag(sp_substr, status);
+ status.errIfFailureAndReset("\"%.*s\"", sp_substr.size(), sp_substr.data());
+ assertEquals(CharString(sp_substr, status).data(),
+ loc_gb.getName(), result_substr.getName());
+
+ StringPiece sp_no_nul(tag_no_nul, sizeof tag_no_nul); // "en-GB", no NUL.
+ Locale result_no_nul = Locale::forLanguageTag(sp_no_nul, status);
+ status.errIfFailureAndReset("\"%.*s\"", sp_no_nul.size(), sp_no_nul.data());
+ assertEquals(CharString(sp_no_nul, status).data(),
+ loc_gb.getName(), result_no_nul.getName());
+}
+void LocaleTest::TestToLanguageTag() {
+ IcuTestErrorCode status(*this, "TestToLanguageTag()");
+
+ static const Locale loc_c("C");
+ static const Locale loc_en("en_US");
+ static const Locale loc_af("af@calendar=coptic;t=ar-i0-handwrit;x=foo");
+ static const Locale loc_empty("");
+ static const Locale loc_ill("!");
+
+ static const char tag_c[] = "en-US-u-va-posix";
+ static const char tag_en[] = "en-US";
+ static const char tag_af[] = "af-t-ar-i0-handwrit-u-ca-coptic-x-foo";
+ static const char tag_und[] = "und";
+
+ std::string result;
+ StringByteSink<std::string> sink(&result);
+ loc_c.toLanguageTag(sink, status);
+ status.errIfFailureAndReset("\"%s\"", loc_c.getName());
+ assertEquals(loc_c.getName(), tag_c, result.c_str());
+
+ std::string result_c = loc_c.toLanguageTag<std::string>(status);
+ status.errIfFailureAndReset("\"%s\"", loc_c.getName());
+ assertEquals(loc_c.getName(), tag_c, result_c.c_str());
+
+ std::string result_en = loc_en.toLanguageTag<std::string>(status);
+ status.errIfFailureAndReset("\"%s\"", loc_en.getName());
+ assertEquals(loc_en.getName(), tag_en, result_en.c_str());
+
+ std::string result_af = loc_af.toLanguageTag<std::string>(status);
+ status.errIfFailureAndReset("\"%s\"", loc_af.getName());
+ assertEquals(loc_af.getName(), tag_af, result_af.c_str());
+
+ std::string result_empty = loc_empty.toLanguageTag<std::string>(status);
+ status.errIfFailureAndReset("\"%s\"", loc_empty.getName());
+ assertEquals(loc_empty.getName(), tag_und, result_empty.c_str());
+
+ std::string result_ill = loc_ill.toLanguageTag<std::string>(status);
+ status.errIfFailureAndReset("\"%s\"", loc_ill.getName());
+ assertEquals(loc_ill.getName(), tag_und, result_ill.c_str());
+
+ Locale loc_bogus;
+ loc_bogus.setToBogus();
+ std::string result_bogus = loc_bogus.toLanguageTag<std::string>(status);
+ assertEquals("bogus", U_ILLEGAL_ARGUMENT_ERROR, status.reset());
+ assertTrue(result_bogus.c_str(), result_bogus.empty());
+}
void TestBug13277();
void TestBug13554();
+ void TestForLanguageTag();
+ void TestToLanguageTag();
+
private:
void _checklocs(const char* label,
const char* req,