From: Matthew Fernandez Date: Sat, 23 Oct 2021 19:07:55 +0000 (-0700) Subject: add a test for XML escaping X-Git-Tag: 2.50.0~64^2~1 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6035e56c917d908e9e1e0366e8068a49d4ccecb3;p=graphviz add a test for XML escaping This functionality was previously indirectly tested through some other graph processing that uses escaping. However, this introduces some unit testing of this function giving us an extra safe guard and an easier way to diagnose problems with this functionality. Related to #1868. --- diff --git a/lib/common/xml.c b/lib/common/xml.c index 50bf82a02..db39ad1a6 100644 --- a/lib/common/xml.c +++ b/lib/common/xml.c @@ -190,3 +190,68 @@ int xml_escape(const char *s, xml_flags_t flags, } return rc; } + +#ifdef TEST_XML +// compile the below test stub with: +// +// ${CC} -std=c99 -DTEST_XML -Ilib -Ilib/gvc -Ilib/pathplan -Ilib/cgraph +// -Ilib/cdt lib/common/xml.c + +#include + +static int put(void *stream, const char *s) { return fputs(s, stream); } + +// stub for testing above functionality +int main(int argc, char **argv) { + + xml_flags_t flags = {0}; + + while (true) { + static const struct option opts[] = { + {"dash", no_argument, 0, 'd'}, + {"nbsp", no_argument, 0, 'n'}, + {"raw", no_argument, 0, 'r'}, + {"utf8", no_argument, 0, 'u'}, + {0, 0, 0, 0}, + }; + + int index; + int c = getopt_long(argc, argv, "dnru", opts, &index); + + if (c == -1) + break; + + switch (c) { + + case 'd': + flags.dash = 1; + break; + + case 'n': + flags.nbsp = 1; + break; + + case 'r': + flags.raw = 1; + break; + + case 'u': + flags.utf8 = 1; + break; + + default: + fprintf(stderr, "unexpected error\n"); + return EXIT_FAILURE; + } + } + + // escape all input we received + for (int i = optind; i < argc; ++i) { + int r = xml_escape(argv[i], flags, put, stdout); + if (r < 0) + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} +#endif diff --git a/rtest/test_misc.py b/rtest/test_misc.py index 7284f4dab..68b63f089 100644 --- a/rtest/test_misc.py +++ b/rtest/test_misc.py @@ -2,8 +2,18 @@ Graphviz miscellaneous test cases """ +import itertools import json +import os +from pathlib import Path +import platform import subprocess +import sys +import tempfile +import pytest + +sys.path.append(os.path.dirname(__file__)) +from gvtest import compile_c, ROOT #pylint: disable=C0413 def test_json_node_order(): """ @@ -71,3 +81,109 @@ def test_json_edge_order(): edges = [(data["objects"][e["tail"]]["name"], data["objects"][e["head"]]["name"]) for e in data["edges"]] assert edges == expected + +@pytest.mark.skipif(platform.system() != "Linux", + reason="TODO: make this test case portable") +def test_xml_escape(): + """ + Check the functionality of ../lib/common/xml.c:xml_escape. + """ + + # locate our test program + xml_c = Path(__file__).parent / "../lib/common/xml.c" + assert xml_c.exists(), "missing xml.c" + + with tempfile.TemporaryDirectory() as tmp: + + # write a dummy config.h to allow standalone compilation + with open(Path(tmp) / "config.h", "wt") as _: + pass + + # compile the stub to something we can run + xml_exe = Path(tmp) / "xml.exe" + cflags = ["-std=c99", "-DTEST_XML", "-I", tmp, "-I", ROOT / "lib", "-I", + ROOT / "lib/gvc", "-I", ROOT / "lib/pathplan", "-I", + ROOT / "lib/cgraph", "-I", ROOT / "lib/cdt", "-Wall", "-Wextra"] + compile_c(xml_c, cflags, dst=xml_exe) + + def escape(dash: bool, nbsp: bool, raw: bool, utf8: bool, s: str) -> str: + args = [xml_exe] + if dash: + args += ["--dash"] + if nbsp: + args += ["--nbsp"] + if raw: + args += ["--raw"] + if utf8: + args += ["--utf8"] + args += [s] + + # We would like to pass `encoding="utf-8"`, or even better + # `universal_newlines=True`. However, neither of these seem to work as + # described in Python == 3.6. Observable using Ubuntu 18.04 in CI. + # Instead, we encode and decode manually. + args = [str(a).encode("utf-8") for a in args] + out = subprocess.check_output(args) + decoded = out.decode("utf-8") + + return decoded + + for dash, nbsp, raw, utf8 in itertools.product((False, True), repeat=4): + + # something basic with nothing escapable + plain = "the quick brown fox" + plain_escaped = escape(dash, nbsp, raw, utf8, plain) + assert plain == plain_escaped, "text incorrectly modified" + + # basic tag escaping + tag = "template void foo(T t);" + tag_escaped = escape(dash, nbsp, raw, utf8, tag) + assert tag_escaped == "template <typename T> void foo(T t);", \ + "incorrect < or > escaping" + + # something with an embedded escape + embedded = "salt & pepper" + embedded_escaped = escape(dash, nbsp, raw, utf8, embedded) + if raw: + assert embedded_escaped == "salt &amp; pepper", "missing & escape" + else: + assert embedded_escaped == embedded, "text incorrectly modified" + + # hyphen escaping + hyphen = "UTF-8" + hyphen_escaped = escape(dash, nbsp, raw, utf8, hyphen) + if dash: + assert hyphen_escaped == "UTF-8", "incorrect dash escape" + else: + assert hyphen_escaped == hyphen, "text incorrectly modified" + + # line endings + nl = "the quick\nbrown\rfox" + nl_escaped = escape(dash, nbsp, raw, utf8, nl) + if raw: + assert nl_escaped == "the quick brown fox", \ + "incorrect new line escape" + else: + # allow benign modification of the \r + assert nl_escaped in (nl, "the quick\nbrown\nfox"), \ + "text incorrectly modified" + + # non-breaking space escaping + two = "the quick brown fox" + two_escaped = escape(dash, nbsp, raw, utf8, two) + if nbsp: + assert two_escaped == "the quick  brown fox", \ + "incorrect nbsp escape" + else: + assert two_escaped == two, "text incorrectly modified" + + # cases from table in https://en.wikipedia.org/wiki/UTF-8 + for c, expected in (("$", "$"), ("¢", "¢"), ("ह", "ह"), + ("€", "€"), ("한", "한"), + ("𐍈", "𐍈")): + unescaped = f"character |{c}|" + escaped = escape(dash, nbsp, raw, utf8, unescaped) + if utf8: + assert escaped == f"character |{expected}|", "bad UTF-8 escaping" + else: + assert escaped == unescaped, "bad UTF-8 passthrough"