]> granicus.if.org Git - re2c/commitdiff
Use explicit stack instead of implicit recursion.
authorUlya Trofimovich <skvadrik@gmail.com>
Mon, 31 Jul 2017 13:22:19 +0000 (14:22 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Mon, 31 Jul 2017 13:22:19 +0000 (14:22 +0100)
With CXXFLAGS='-fsanitize=addres' GCC complains about stack overflow.

re2c/src/re/split_charset.cc

index 4c1a4b0ee4c74ea27201b34190cb449b2ead0829..21c282dfe6bdf0e691d530cb813c6c8d0e53b4ad 100644 (file)
@@ -1,35 +1,11 @@
 #include "src/util/c99_stdint.h"
 #include <set>
+#include <stack>
 
 #include "src/re/re.h"
 
 namespace re2c {
 
-static void split(const RE* re, std::set<uint32_t> &cs)
-{
-       switch (re->type) {
-               case RE::NIL: break;
-               case RE::TAG: break;
-               case RE::SYM:
-                       for (const Range *r = re->sym; r; r = r->next()) {
-                               cs.insert(r->lower());
-                               cs.insert(r->upper());
-                       }
-                       break;
-               case RE::ALT:
-                       split(re->alt.re1, cs);
-                       split(re->alt.re2, cs);
-                       break;
-               case RE::CAT:
-                       split(re->cat.re1, cs);
-                       split(re->cat.re2, cs);
-                       break;
-               case RE::ITER:
-                       split(re->iter.re, cs);
-                       break;
-       }
-}
-
 /* The original set of code units (charset) might be very large.
  * A common trick it is to split charset into disjoint character ranges
  * and choose a representative of each range (we choose lower bound).
@@ -40,12 +16,36 @@ static void split(const RE* re, std::set<uint32_t> &cs)
 void split_charset(RESpec &spec)
 {
        std::set<uint32_t> cs;
+       std::stack<const RE*> todo;
 
        std::vector<RE*>::const_iterator
                i = spec.res.begin(),
                e = spec.res.end();
-       for (; i != e; ++i) {
-               split(*i, cs);
+       for (; i != e; ++i) todo.push(*i);
+       while (!todo.empty()) {
+               const RE *re = todo.top();
+               todo.pop();
+               switch (re->type) {
+                       case RE::NIL: break;
+                       case RE::TAG: break;
+                       case RE::SYM:
+                               for (const Range *r = re->sym; r; r = r->next()) {
+                                       cs.insert(r->lower());
+                                       cs.insert(r->upper());
+                               }
+                               break;
+                       case RE::ALT:
+                               todo.push(re->alt.re2);
+                               todo.push(re->alt.re1);
+                               break;
+                       case RE::CAT:
+                               todo.push(re->cat.re2);
+                               todo.push(re->cat.re1);
+                               break;
+                       case RE::ITER:
+                               todo.push(re->iter.re);
+                               break;
+               }
        }
        cs.insert(0);
        cs.insert(spec.opts->encoding.nCodeUnits());