]> granicus.if.org Git - graphviz/commitdiff
For charset=utf8, if illegal utf8 found, assume input is latin-1, while
authorEmden Gansner <erg@research.att.com>
Wed, 4 Apr 2012 15:39:41 +0000 (11:39 -0400)
committerEmden Gansner <erg@research.att.com>
Wed, 4 Apr 2012 15:39:41 +0000 (11:39 -0400)
warning the user.

lib/common/htmltable.c
lib/common/labels.c
lib/common/utils.c
lib/common/utils.h

index 571ee4d5941a60c492546c0e5a6d42d6f9f0a516..261c3c84afcbc41ea97c820db1221f0ef0a91278 100644 (file)
@@ -1985,7 +1985,7 @@ int make_html_label(void *obj, textlabel_t * lp)
            s = latin1ToUTF8(lp->text);
            break;
        default: /* UTF8 */
-           s = htmlEntityUTF8(lp->text);
+           s = htmlEntityUTF8(lp->text, env.g);
            break;
        }
        free(lp->text);
index c657fe425a7e34a51c2cc0f51988cfd234488723..941edd1a211b0f77e95e25c19c66576ec2e84668 100644 (file)
@@ -176,7 +176,7 @@ textlabel_t *make_label(void *obj, char *str, int kind, double fontsize, char *f
            s = latin1ToUTF8(rv->text);
            break;
        default: /* UTF8 */
-           s = htmlEntityUTF8(rv->text);
+           s = htmlEntityUTF8(rv->text, g);
            break;
        }
         free(rv->text);
index fad56347279fd34a00ed23a1ec51437f367aef1e..b51d6444b9999a2a9903749d4f635a212a53aed6 100644 (file)
@@ -1538,9 +1538,35 @@ htmlEntity (char** s)
     return n;
 }
 
-/* substitute html entities like: &#123; and: &amp; with the UTF8 equivalents */
-char* htmlEntityUTF8 (char* s)
+static unsigned char
+cvtAndAppend (unsigned char c, agxbuf* xb)
 {
+    char buf[2];
+    char* s;
+    char* p;
+    int len;
+
+    buf[0] = c;
+    buf[1] = '\0';
+
+    p = s = latin1ToUTF8 (buf);
+    len = strlen(s);
+    while (len-- > 1)
+       agxbputc(xb, *p++);
+    c = *p;
+    free (s);
+    return c;
+}
+
+/* htmlEntityUTF8:
+ * substitute html entities like: &#123; and: &amp; with the UTF8 equivalents
+ * check for invalid utf8. If found, treat a single byte as Latin-1, convert it to
+ * utf8 and warn the user.
+ */
+char* htmlEntityUTF8 (char* s, graph_t* g)
+{
+    static graph_t* lastg;
+    static boolean warned;
     char*  ns;
     agxbuf xb;
     unsigned char buf[BUFSIZ];
@@ -1548,6 +1574,11 @@ char* htmlEntityUTF8 (char* s)
     unsigned int v;
     int rc;
 
+    if (lastg != g) {
+       lastg = g;
+       warned = 0;
+    }
+
     agxbinit(&xb, BUFSIZ, buf);
 
     while ((c = *(unsigned char*)s++)) {
@@ -1582,9 +1613,12 @@ char* htmlEntityUTF8 (char* s)
                rc = agxbputc(&xb, c);
                c = *(unsigned char*)s++;
            }
-           else {
-               agerr(AGERR, "Invalid 2-byte UTF8 found in input. Perhaps \"-Gcharset=latin1\" is needed?\n");
-               return "";
+           else { 
+               if (!warned) {
+                   agerr(AGWARN, "Invalid 2-byte UTF8 found in input of graph %s - treated as Latin-1. Perhaps \"-Gcharset=latin1\" is needed?\n", agnameof(g));
+                   warned = 1;
+               }
+               c = cvtAndAppend (c, &xb);
            }
        }
        else if (c < 0xF0) { /* copy 3 byte UTF8 characters */
@@ -1595,13 +1629,19 @@ char* htmlEntityUTF8 (char* s)
                c = *(unsigned char*)s++;
            }
            else {
-               agerr(AGERR, "Invalid 3-byte UTF8 found in input. Perhaps \"-Gcharset=latin1\" is needed?\n");
-               return "";
+               if (!warned) {
+                   agerr(AGWARN, "Invalid 3-byte UTF8 found in input of graph %s - treated as Latin-1. Perhaps \"-Gcharset=latin1\" is needed?\n", agnameof(g));
+                   warned = 1;
+               }
+               c = cvtAndAppend (c, &xb);
            }
        }
        else  {
-           agerr(AGERR, "UTF8 codes > 3 bytes are not currently supported. Or perhaps \"-Gcharset=latin1\" is needed?\n");
-           return "";
+           if (!warned) {
+               agerr(AGWARN, "UTF8 codes > 3 bytes are not currently supported (graph %s) - treated as Latin-1. Perhaps \"-Gcharset=latin1\" is needed?\n", agnameof(g));
+               warned = 1;
+           }
+           c = cvtAndAppend (c, &xb);
         }
        rc = agxbputc(&xb, c);
     }
index 285b545009e9463da13a8c84039e8be9e2453e16..5440125076ae18fd5494bb1979afe10628ddbf8a 100644 (file)
@@ -101,7 +101,7 @@ extern "C" {
     extern int processClusterEdges(graph_t * g);
 
     extern char *latin1ToUTF8(char *);
-    extern char *htmlEntityUTF8(char *);
+    extern char *htmlEntityUTF8(char *, graph_t* g);
     extern char* utf8ToLatin1 (char* ins);
     extern char* scanEntity (char* t, agxbuf* xb);