From f81a01e01bc9c4430814730aaf98227e25aedfef Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 29 Aug 2014 23:07:42 +0000 Subject: [PATCH] ICU-10883 UTS 46 toUnicode set error for empty label including just xn-- X-SVN-Rev: 36297 --- icu4c/source/common/uts46.cpp | 34 +++++++++--------------- icu4c/source/test/intltest/uts46test.cpp | 8 +++--- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/icu4c/source/common/uts46.cpp b/icu4c/source/common/uts46.cpp index 11d5dc5d2bf..80900a019b4 100644 --- a/icu4c/source/common/uts46.cpp +++ b/icu4c/source/common/uts46.cpp @@ -319,9 +319,7 @@ UTS46::process(const UnicodeString &src, info.reset(); int32_t srcLength=src.length(); if(srcLength==0) { - if(toASCII) { - info.errors|=UIDNA_ERROR_EMPTY_LABEL; - } + info.errors|=UIDNA_ERROR_EMPTY_LABEL; return dest; } UChar *destArray=dest.getBuffer(srcLength); @@ -379,12 +377,11 @@ UTS46::process(const UnicodeString &src, ++i; // '.' was copied to dest already break; } - if(toASCII) { - if(i==labelStart) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - } else if((i-labelStart)>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } + if(i==labelStart) { + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; + } + if(toASCII && (i-labelStart)>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; } info.errors|=info.labelErrors; info.labelErrors=0; @@ -420,9 +417,7 @@ UTS46::processUTF8(const StringPiece &src, // Arguments are fine, reset output values. info.reset(); if(srcLength==0) { - if(toASCII) { - info.errors|=UIDNA_ERROR_EMPTY_LABEL; - } + info.errors|=UIDNA_ERROR_EMPTY_LABEL; dest.Flush(); return; } @@ -480,12 +475,11 @@ UTS46::processUTF8(const StringPiece &src, if(isLabel) { break; // Replacing with U+FFFD can be complicated for toASCII. } - if(toASCII) { - if(i==labelStart) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - } else if((i-labelStart)>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } + if(i==labelStart) { + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; + } + if(toASCII && (i-labelStart)>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; } info.errors|=info.labelErrors; info.labelErrors=0; @@ -744,9 +738,7 @@ UTS46::processLabel(UnicodeString &dest, } // Validity check if(labelLength==0) { - if(toASCII) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - } + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); } // labelLength>0 diff --git a/icu4c/source/test/intltest/uts46test.cpp b/icu4c/source/test/intltest/uts46test.cpp index a1a9cbdba85..5bdf305a666 100644 --- a/icu4c/source/test/intltest/uts46test.cpp +++ b/icu4c/source/test/intltest/uts46test.cpp @@ -465,6 +465,7 @@ static const TestCase testCases[]={ "1234567890123456789012345678901234567890123456789012345678901", UIDNA_ERROR_LABEL_TOO_LONG|UIDNA_ERROR_DOMAIN_NAME_TOO_LONG }, // hyphen errors and empty-label errors + // Ticket #10883: ToUnicode also checks for empty labels. { ".", "B", ".", UIDNA_ERROR_EMPTY_LABEL }, { "\\uFF0E", "B", ".", UIDNA_ERROR_EMPTY_LABEL }, // "xn---q----jra"=="-q--a-umlaut-" @@ -478,11 +479,13 @@ static const TestCase testCases[]={ UIDNA_ERROR_EMPTY_LABEL|UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN| UIDNA_ERROR_HYPHEN_3_4 }, { "a..c", "B", "a..c", UIDNA_ERROR_EMPTY_LABEL }, + { "a.xn--.c", "B", "a..c", UIDNA_ERROR_EMPTY_LABEL }, { "a.-b.", "B", "a.-b.", UIDNA_ERROR_LEADING_HYPHEN }, { "a.b-.c", "B", "a.b-.c", UIDNA_ERROR_TRAILING_HYPHEN }, { "a.-.c", "B", "a.-.c", UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN }, { "a.bc--de.f", "B", "a.bc--de.f", UIDNA_ERROR_HYPHEN_3_4 }, { "\\u00E4.\\u00AD.c", "B", "\\u00E4..c", UIDNA_ERROR_EMPTY_LABEL }, + { "\\u00E4.xn--.c", "B", "\\u00E4..c", UIDNA_ERROR_EMPTY_LABEL }, { "\\u00E4.-b.", "B", "\\u00E4.-b.", UIDNA_ERROR_LEADING_HYPHEN }, { "\\u00E4.b-.c", "B", "\\u00E4.b-.c", UIDNA_ERROR_TRAILING_HYPHEN }, { "\\u00E4.-.c", "B", "\\u00E4.-.c", UIDNA_ERROR_LEADING_HYPHEN|UIDNA_ERROR_TRAILING_HYPHEN }, @@ -598,10 +601,9 @@ void UTS46Test::TestSomeCases() { ) { continue; } - // ToUnicode does not set length errors. + // ToUnicode does not set length-overflow errors. uint32_t uniErrors=testCase.errors&~ - (UIDNA_ERROR_EMPTY_LABEL| - UIDNA_ERROR_LABEL_TOO_LONG| + (UIDNA_ERROR_LABEL_TOO_LONG| UIDNA_ERROR_DOMAIN_NAME_TOO_LONG); char mode=testCase.o[0]; if(mode=='B' || mode=='N') { -- 2.40.0