From: Matthew Fernandez Date: Sat, 13 Aug 2022 01:28:09 +0000 (-0700) Subject: cgraph: rewrite scanner to use an agxbuf X-Git-Tag: 5.0.1~6^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2d7852a049b729fb0db71c5a8ef2366215ae2035;p=graphviz cgraph: rewrite scanner to use an agxbuf In a sort of Chesterton’s fence¹ sense, I do not understand the existing code. While I understand the first order motivation (dynamic string buffering inline), I do not follow the implementation. It manages to hit the trifecta: 1. NULL pointer dereference. Failures of allocation functions were not checked. 2. Memory leaks. Calls to `agstrdup` and `agstrdup_html` allocate an owned string internally, yet this code was acting as if they stole the `Sbuf` backing memory. 3. Not-obviously-correct and repeated computation of the end of the buffer to append to. `addstr` seemed overly complex and inefficient. Static initialization of an agxbuf was a bit complicated until 04a248348798a4a32fc9ac7e5d860a84ab5ee49f, though it was still possible. Following that commit, it becomes a no-brainer. The code after this change still leaks memory. Perhaps in future we should reset (`agxbfree(&Sbuf); memset(&Sbuf, 0, sizeof(Sbuf));`) the buffer after `agstrdup` or `agstrdup_html`, though there would be a performance cost to this. ¹ https://en.wikipedia.org/wiki/G._K._Chesterton#Chesterton's_fence --- diff --git a/lib/cgraph/scan.l b/lib/cgraph/scan.l index a668cf75a..db6b6be88 100644 --- a/lib/cgraph/scan.l +++ b/lib/cgraph/scan.l @@ -23,9 +23,11 @@ %option noinput %{ +#include #include #include #include +#include #include #include #include @@ -68,40 +70,25 @@ int gv_isatty_suppression; #endif /* buffer for arbitrary length strings (longer than BUFSIZ) */ -static char *Sbuf,*Sptr,*Send; +static agxbuf Sbuf; + static void beginstr(void) { - if (Sbuf == NULL) { - Sbuf = malloc(BUFSIZ); - Send = Sbuf + BUFSIZ; - } - Sptr = Sbuf; - *Sptr = 0; + // nothing required, but we should not have pending string data + assert(agxblen(&Sbuf) == 0 && + "pending string data that was not consumed (missing " + "endstr()/endhtmlstr()?)"); } static void addstr(char *src) { - char c; - if (Sptr > Sbuf) Sptr--; - do { - do {c = *Sptr++ = *src++;} while (c && Sptr < Send); - if (c) { - long sz = Send - Sbuf; - long off = Sptr - Sbuf; - sz *= 2; - Sbuf = realloc(Sbuf,sz); - Send = Sbuf + sz; - Sptr = Sbuf + off; - } - } while (c); + agxbput(&Sbuf, src); } static void endstr(void) { - aaglval.str = agstrdup(Ag_G_global,Sbuf); - *Sbuf = 0; + aaglval.str = agstrdup(Ag_G_global, agxbuse(&Sbuf)); } static void endstr_html(void) { - aaglval.str = agstrdup_html(Ag_G_global,Sbuf); - *Sbuf = 0; + aaglval.str = agstrdup_html(Ag_G_global, agxbuse(&Sbuf)); } static void @@ -247,20 +234,20 @@ void aagerror(const char *str) else switch (YYSTATE) { case qstring : agxbprint(&xb, " scanning a quoted string (missing endquote? longer than %d?)", YY_BUF_SIZE); - if (*Sbuf) { - size_t len = strlen(Sbuf); + if (agxblen(&Sbuf) > 0) { + size_t len = agxblen(&Sbuf); if (len > 80) - Sbuf[80] = '\0'; - agxbprint (&xb, "\nString starting:\"%s", Sbuf); + len = 80; + agxbprint(&xb, "\nString starting:\"%.*s", (int)len, Sbuf.buf); } break; case hstring : agxbprint(&xb, " scanning a HTML string (missing '>'? bad nesting? longer than %d?)", YY_BUF_SIZE); - if (*Sbuf) { - size_t len = strlen(Sbuf); + if (agxblen(&Sbuf)) { + size_t len = agxblen(&Sbuf); if (len > 80) - Sbuf[80] = '\0'; - agxbprint (&xb, "\nString starting:<%s", Sbuf); + len = 80; + agxbprint(&xb, "\nString starting:<%.*s", (int)len, Sbuf.buf); } break; case comment :