]> granicus.if.org Git - re2c/commitdiff
Stop loosing arcs in DFA loops when building path cover with --skeleton.
authorUlya Trofimovich <skvadrik@gmail.com>
Mon, 10 Aug 2015 11:03:07 +0000 (12:03 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Mon, 10 Aug 2015 11:03:07 +0000 (12:03 +0100)
The problem with the algorithm was that when one and the same skeleton
node is visited for the 3rd time, the algorithm would abandon this
branch and drop all the path prefixes that lead to it. It's ok when
bruteforcing all paths (the arcs in the dropped prefixes will occur in
other paths that lead to final states), but it's not ok for path cover:
some arcs may have occured only in these dropped prefixes).

The fix is easy: just check if the branch was cancelled and in case it
was try the same prefixes with another branch.

This small example demonstrates the problem:

    /*!re2c
        [^\x00]* [\x00] {}
    */

DFA skeleton looks like this:

    state 0:
        arcs to state 0: [0x01, 0xFF]
        arcs to state 1: [0x00]
    state 1: final

Before the fix, re2c would generate the following paths (provided
path cover construction is forced instead of generating all paths):

    - 0x01,0x00
    - 0x00

Clearly the [0xFF] arc looping from state 0 to state 0 is missing
(uncovered). After the fix:

    - 0x01,0x00
    - 0xFF,0x01,0x00
    - 0x01,0xFF,0x00
    - 0x00

Not a minimal path cover, but we don't need it. We just need a small
enough one.

re2c/src/codegen/skeleton/skeleton.cc

index bd3f19eebafe164af6fe1de4eaee6c4554097ebd..a974c978b8e6f38163dc6c8aaeb38acf39470209 100644 (file)
@@ -129,12 +129,15 @@ arccount_t Node::estimate_size_cover (arccount_t wid, arccount_t len)
                        {
                                return arccount_t::limit ();
                        }
-                       if (!path_len_init && i->first->path_len_init)
+                       if (i->first->path_len_init)
                        {
-                               path_len_init = true;
-                               path_len = i->first->path_len + arccount_t (1u);
+                               w = w + new_wid;
+                               if (!path_len_init)
+                               {
+                                       path_len_init = true;
+                                       path_len = i->first->path_len + arccount_t (1u);
+                               }
                        }
-                       w = w + new_wid;
                }
                return size;
        }
@@ -194,16 +197,20 @@ void Node::generate_paths_cover (const std::vector<Path> & prefixes, std::vector
                {
                        std::vector<Path> zs;
                        const size_t new_wid = i->second.size ();
-                       for (size_t j = 0; j < new_wid; ++j, ++w)
+                       for (size_t j = 0; j < new_wid; ++j)
                        {
-                               zs.push_back (prefixes[w % wid]);
+                               zs.push_back (prefixes[(w + j) % wid]);
                                zs[j].extend (rule, i->second[j]);
                        }
                        i->first->generate_paths_cover (zs, results);
-                       if (path == NULL && i->first->path != NULL)
+                       if (i->first->path != NULL)
                        {
-                               path = new Path (Path::chars_t (1, i->second[0]), 0, rule);
-                               path->append (i->first->path);
+                               w += new_wid;
+                               if (path == NULL)
+                               {
+                                       path = new Path (Path::chars_t (1, i->second[0]), 0, rule);
+                                       path->append (i->first->path);
+                               }
                        }
                }
        }