From: erg <devnull@localhost>
Date: Fri, 22 Sep 2006 22:16:33 +0000 (+0000)
Subject: Fix bug 1025 - if charset != latin1, have postscript check for latin1
X-Git-Tag: LAST_LIBGRAPH~32^2~5870
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b785f8840ab1a8a4f93f9fdeebca66bd20a2142b;p=graphviz

Fix bug 1025 - if charset != latin1, have postscript check for latin1
values in the utf-8 and convert them to latin1 for the postscript output.
---

diff --git a/lib/common/psusershape.c b/lib/common/psusershape.c
index f9cfe3498..c7cfd4a1e 100644
--- a/lib/common/psusershape.c
+++ b/lib/common/psusershape.c
@@ -194,17 +194,55 @@ void epsf_define(FILE * of)
     }
 }
 
+enum {ASCII, LATIN1, NONLATIN};
+
+/* charsetOf:
+ * Assuming legal utf-8 input, determine if
+ * the character value range is ascii, latin-1 or otherwise.
+ */
+static int
+charsetOf (char* s)
+{
+    int r = ASCII;
+    unsigned char c;
+
+    while ((c = *(unsigned char*)s++)) {
+	if (c < 0x7F) 
+	    continue;
+	else if ((c & 0xFC) == 0xC0) {
+	    r = LATIN1;
+	    s++; /* eat second byte */
+	}
+	else return NONLATIN;
+    }
+    return r;
+}
+
 char *ps_string(char *ins, int latin)
 {
     char *s;
     char *base;
     static agxbuf  xb;
+    static int warned;
     int rc;
 
     if (latin)
         base = utf8ToLatin1 (ins);
-    else
+    else switch (charsetOf (ins)) {
+    case ASCII :
         base = ins;
+	break;
+    case LATIN1 :
+        base = utf8ToLatin1 (ins);
+	break;
+    case NONLATIN :
+        if (!warned) {
+	    agerr (AGWARN, "UTF-8 input uses non-Latin1 characters which cannot be handled in PostScript output");
+	    warned = 1;
+	}
+        base = ins;
+	break;
+    }
 
     if (xb.buf == NULL)
         agxbinit (&xb, 0, NULL);
diff --git a/plugin/core/gvrender_core_ps.c b/plugin/core/gvrender_core_ps.c
index 61f486a11..ed854b67b 100644
--- a/plugin/core/gvrender_core_ps.c
+++ b/plugin/core/gvrender_core_ps.c
@@ -103,7 +103,13 @@ static void psgen_begin_graph(GVJ_t * job)
         epsf_define(job->output_file);
     }
     isLatin1 = (GD_charset(obj->u.g) == CHAR_LATIN1);
-    if (isLatin1 && !setupLatin1) {
+    /* We always setup Latin1. The charset info is always output,
+     * and installing it is cheap. With it installed, we can then
+     * rely on ps_string to convert UTF-8 characters whose encoding
+     * is in the range of Latin-1 into the Latin-1 equivalent and
+     * get the expected PostScript output.
+     */
+    if (!setupLatin1) {
 	core_fputs(job, "setupLatin1\n");	/* as defined in ps header */
 	setupLatin1 = TRUE;
     }