From: erg Date: Fri, 22 Sep 2006 22:16:33 +0000 (+0000) Subject: Fix bug 1025 - if charset != latin1, have postscript check for latin1 X-Git-Tag: LAST_LIBGRAPH~32^2~5870 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b785f8840ab1a8a4f93f9fdeebca66bd20a2142b;p=graphviz Fix bug 1025 - if charset != latin1, have postscript check for latin1 values in the utf-8 and convert them to latin1 for the postscript output. --- diff --git a/lib/common/psusershape.c b/lib/common/psusershape.c index f9cfe3498..c7cfd4a1e 100644 --- a/lib/common/psusershape.c +++ b/lib/common/psusershape.c @@ -194,17 +194,55 @@ void epsf_define(FILE * of) } } +enum {ASCII, LATIN1, NONLATIN}; + +/* charsetOf: + * Assuming legal utf-8 input, determine if + * the character value range is ascii, latin-1 or otherwise. + */ +static int +charsetOf (char* s) +{ + int r = ASCII; + unsigned char c; + + while ((c = *(unsigned char*)s++)) { + if (c < 0x7F) + continue; + else if ((c & 0xFC) == 0xC0) { + r = LATIN1; + s++; /* eat second byte */ + } + else return NONLATIN; + } + return r; +} + char *ps_string(char *ins, int latin) { char *s; char *base; static agxbuf xb; + static int warned; int rc; if (latin) base = utf8ToLatin1 (ins); - else + else switch (charsetOf (ins)) { + case ASCII : base = ins; + break; + case LATIN1 : + base = utf8ToLatin1 (ins); + break; + case NONLATIN : + if (!warned) { + agerr (AGWARN, "UTF-8 input uses non-Latin1 characters which cannot be handled in PostScript output"); + warned = 1; + } + base = ins; + break; + } if (xb.buf == NULL) agxbinit (&xb, 0, NULL); diff --git a/plugin/core/gvrender_core_ps.c b/plugin/core/gvrender_core_ps.c index 61f486a11..ed854b67b 100644 --- a/plugin/core/gvrender_core_ps.c +++ b/plugin/core/gvrender_core_ps.c @@ -103,7 +103,13 @@ static void psgen_begin_graph(GVJ_t * job) epsf_define(job->output_file); } isLatin1 = (GD_charset(obj->u.g) == CHAR_LATIN1); - if (isLatin1 && !setupLatin1) { + /* We always setup Latin1. The charset info is always output, + * and installing it is cheap. With it installed, we can then + * rely on ps_string to convert UTF-8 characters whose encoding + * is in the range of Latin-1 into the Latin-1 equivalent and + * get the expected PostScript output. + */ + if (!setupLatin1) { core_fputs(job, "setupLatin1\n"); /* as defined in ps header */ setupLatin1 = TRUE; }