From: ellson Date: Tue, 15 Aug 2006 01:39:30 +0000 (+0000) Subject: - fix stupid mistake with UTF8 character copying X-Git-Tag: LAST_LIBGRAPH~32^2~5980 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f8fbb21208cc9893e8cf791a017da500536a6ed7;p=graphviz - fix stupid mistake with UTF8 character copying - add russian.dot utf8 rtest graph --- diff --git a/lib/common/utils.c b/lib/common/utils.c index 75b269efa..07d9066fb 100644 --- a/lib/common/utils.c +++ b/lib/common/utils.c @@ -1391,42 +1391,29 @@ char* htmlEntityUTF8 (char* s) c = (v & 0x3F) | 0x80; } } - else { - c = '&'; - } } } else if (c < 0xE0) { /* copy 2 byte UTF8 characters */ - if ((s[1] & 0xC0) == 0x80) { + if ((*s & 0xC0) == 0x80) { rc = agxbputc(&xb, c); c = *(unsigned char*)s++; } else { - agerr(AGERR, "Invalid UTF8 found in input. Perhaps \"-Gcharset=latin1\" is needed?\n"); + agerr(AGERR, "Invalid 2-byte UTF8 found in input. Perhaps \"-Gcharset=latin1\" is needed?\n"); exit(EXIT_FAILURE); } - /* - * (if we didn't just exit) - * A two-byte-character lead-byte not followed by trail-byte - * represents itself. - */ } else if (c < 0xF0) { /* copy 3 byte UTF8 characters */ - if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)) { + if (((*s & 0xC0) == 0x80) && ((s[1] & 0xC0) == 0x80)) { rc = agxbputc(&xb, c); c = *(unsigned char*)s++; rc = agxbputc(&xb, c); c = *(unsigned char*)s++; } else { - agerr(AGERR, "Invalid UTF8 found in input. Perhaps \"-Gcharset=latin1\" is needed?\n"); + agerr(AGERR, "Invalid 3-byte UTF8 found in input. Perhaps \"-Gcharset=latin1\" is needed?\n"); exit(EXIT_FAILURE); } - /* - * (if we didn't just exit) - * A three-byte-character lead-byte not followed by - * two trail-bytes represents itself. - */ } else { agerr(AGERR, "UTF8 codes > 3 bytes are not currently supported\n");