]> granicus.if.org Git - re2c/commitdiff
Skeleton: simplified path structure.
authorUlya Trofimovich <skvadrik@gmail.com>
Mon, 14 Mar 2016 22:23:03 +0000 (22:23 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Mon, 14 Mar 2016 22:23:03 +0000 (22:23 +0000)
Just store pointers to skeleton nodes (instead of bookkeeping arcs,
contexts and rules in each path). All the necessary information can
be easily retrieved from nodes when path is baing dumped to file.

Three tests in '--skeleton' mode have been broken by this commit.
Actually, these are not breakages: these cases reveal incorrect
re2c-generated code. The change is due to the fact that skeleton
now doesn't simulate contexts that go *after* the matched rule:

    ------o------o------> ... (fallback to rule)
        rule   context

re2c/src/ir/skeleton/generate_data.cc
re2c/src/ir/skeleton/path.h
re2c/src/ir/skeleton/skeleton.cc
re2c/src/ir/skeleton/skeleton.h

index 60af8376eb1809e913e744c9f5ec78e2f91d8ac5..ca51171ce89fedfbe8cad4849312792c0329bcec 100644 (file)
@@ -57,7 +57,7 @@ template <typename cunit_t, typename key_t>
 {
        if (end () && suffix == NULL)
        {
-               suffix = new path_t (rule, ctx);
+               suffix = new path_t(this);
        }
        if (suffix != NULL)
        {
@@ -71,12 +71,12 @@ template <typename cunit_t, typename key_t>
                        i != arcs.end () && !size.overflow(); ++i)
                {
                        path_t new_prefix = prefix;
-                       new_prefix.extend (i->first->rule, i->first->ctx, &i->second);
+                       new_prefix.extend (i->first);
                        i->first->cover<cunit_t, key_t> (new_prefix, input, keys, size);
                        if (i->first->suffix != NULL && suffix == NULL)
                        {
-                               suffix = new path_t (rule, ctx);
-                               suffix->extend (i->first->rule, i->first->ctx, &i->second);
+                               suffix = new path_t(this);
+                               suffix->extend (i->first);
                                suffix->append (i->first->suffix);
                        }
                }
@@ -86,7 +86,7 @@ template <typename cunit_t, typename key_t>
 template <typename cunit_t, typename key_t>
        void Skeleton::generate_paths_cunit_key (FILE * input, FILE * keys)
 {
-       path_t prefix (nodes->rule, nodes->ctx);
+       path_t prefix (nodes);
        Node::covers_t size = Node::covers_t::from32(0u);
 
        nodes->cover<cunit_t, key_t> (prefix, input, keys, size);
@@ -183,7 +183,7 @@ template <typename cunit_t, typename key_t>
        size_t count = 0;
        for (size_t i = 0; i < len; ++i)
        {
-               count = std::max (count, path[i]->size ());
+               count = std::max (count, path[i].size ());
        }
 
        const Node::covers_t size = Node::covers_t::from64(len) * Node::covers_t::from64(count);
@@ -194,7 +194,7 @@ template <typename cunit_t, typename key_t>
                cunit_t * buffer = new cunit_t [buffer_size];
                for (size_t i = 0; i < len; ++i)
                {
-                       const std::vector<uint32_t> & arc = *path[i];
+                       const std::vector<uint32_t> & arc = path[i];
                        const size_t width = arc.size ();
                        for (size_t j = 0; j < count; ++j)
                        {
index a3452fdf9bdba31c26ebc79b1749137f0995ab63..2150a9eb24de0ad059512dba40b0b1954dcbc6c5 100644 (file)
@@ -4,95 +4,84 @@
 #include "src/util/c99_stdint.h"
 #include <vector>
 
-#include "src/parse/rules.h"
+#include "src/ir/skeleton/skeleton.h"
 
 namespace re2c
 {
 
 class path_t
 {
-public:
-       typedef std::vector<uint32_t> arc_t;
-
-private:
-       std::vector<const arc_t *> arcs;
-
-       const RuleInfo *rule;
-       size_t rule_pos;
-
-       bool ctx;
-       size_t ctx_pos;
+       std::vector<Node*> arcs;
 
 public:
-       explicit path_t (const RuleInfo *r, bool c)
-               : arcs ()
-               , rule (r)
-               , rule_pos (0)
-               , ctx (c)
-               , ctx_pos (0)
-       {}
-       path_t(const path_t &p)
-               : arcs(p.arcs)
-               , rule(p.rule)
-               , rule_pos(p.rule_pos)
-               , ctx(p.ctx)
-               , ctx_pos(p.ctx_pos)
-       {}
+       explicit path_t(Node *n) : arcs()
+       {
+               arcs.push_back(n);
+       }
+       path_t(const path_t &p) : arcs(p.arcs) {}
        path_t &operator=(const path_t &p)
        {
                new (this) path_t(p);
                return *this;
        }
-       size_t len () const
+       size_t len() const
        {
-               return arcs.size ();
+               return arcs.size() - 1;
        }
        size_t len_matching () const
        {
-               if (rule) {
+               std::vector<Node*>::const_reverse_iterator
+                       tail = arcs.rbegin(),
+                       head = arcs.rend();
+               for (; tail != head; ++tail) {
+                       RuleInfo *rule = (*tail)->rule;
+                       if (rule == NULL) {
+                               continue;
+                       }
+                       const size_t len = static_cast<size_t>(head - tail) - 1;
                        switch (rule->ctx_len) {
-                               case 0:   return rule_pos;
-                               case ~0u: return ctx_pos;
-                               default:  return rule_pos - rule->ctx_len;
+                               case 0:
+                                       return len;
+                               case ~0u:
+                                       for (; tail != head; ++tail) {
+                                               if ((*tail)->ctx) {
+                                                       return static_cast<size_t>(head - tail) - 1;
+                                               }
+                                       }
+                                       assert(false);
+                               default:
+                                       return len - rule->ctx_len;
                        }
                }
                return 0;
        }
-       rule_rank_t match () const
+       rule_rank_t match() const
        {
-               return rule ? rule->rank : rule_rank_t::none();
+               std::vector<Node*>::const_reverse_iterator
+                       tail = arcs.rbegin(),
+                       head = arcs.rend();
+               for (; tail != head; ++tail) {
+                       RuleInfo *rule = (*tail)->rule;
+                       if (rule != NULL) {
+                               return rule->rank;
+                       }
+               }
+               return rule_rank_t::none();
        }
-       const arc_t * operator [] (size_t i) const
+       const std::vector<uint32_t>& operator[](size_t i) const
        {
-               return arcs[i];
+               Node *n1 = arcs[i];
+               Node *n2 = arcs[i + 1];
+               return n1->arcs[n2];
        }
-       void extend (const RuleInfo *r, bool c, const arc_t * a)
+       void extend(Node *n)
        {
-               arcs.push_back (a);
-               if (r)
-               {
-                       rule = r;
-                       rule_pos = arcs.size ();
-               }
-               if (c)
-               {
-                       ctx = true;
-                       ctx_pos = arcs.size ();
-               }
+               arcs.push_back(n);
        }
-       void append (const path_t * p)
+       void append(const path_t *p)
        {
-               if (p->rule)
-               {
-                       rule = p->rule;
-                       rule_pos = arcs.size () + p->rule_pos;
-               }
-               if (p->ctx)
-               {
-                       ctx = true;
-                       ctx_pos = arcs.size () + p->ctx_pos;
-               }
-               arcs.insert (arcs.end (), p->arcs.begin (), p->arcs.end ());
+               assert(arcs.back() == p->arcs.front());
+               arcs.insert(arcs.end(), p->arcs.begin() + 1, p->arcs.end());
        }
 };
 
index cbc3c7f0371cf66f5a09ababfe134883edc1c48e..91ca12880e3566f7a909f28910ed43e7c29ebab4 100644 (file)
@@ -6,6 +6,7 @@
 #include "src/conf/msg.h"
 #include "src/ir/dfa/dfa.h"
 #include "src/ir/regexp/regexp.h"
+#include "src/ir/skeleton/path.h"
 #include "src/ir/skeleton/skeleton.h"
 
 namespace re2c
@@ -24,11 +25,7 @@ Node::Node ()
 
 void Node::init(bool c, RuleInfo *r, const std::vector<std::pair<Node*, uint32_t> > &a)
 {
-       if (r)
-       {
-               rule = r;
-       }
-
+       rule = r;
        ctx = c;
 
        uint32_t lb = 0;
index f89135dce787fc90f746c66e4cb677e079cc320a..dddb2f097cb9334f9deac6c7331454bb803383e0 100644 (file)
@@ -13,7 +13,6 @@
 
 #include "src/ir/regexp/regexp.h"
 #include "src/ir/rule_rank.h"
-#include "src/ir/skeleton/path.h"
 #include "src/ir/skeleton/way.h"
 #include "src/parse/rules.h"
 #include "src/util/local_increment.h"
@@ -26,6 +25,7 @@ namespace re2c
 struct dfa_t;
 struct OutputFile;
 class RuleInfo;
+struct path_t;
 
 struct Node
 {
@@ -62,7 +62,7 @@ struct Node
        // We don't need all paths anyway, just some examples.
        typedef u32lim_t<1024> nakeds_t; // ~1Kb
 
-       typedef std::map<Node *, path_t::arc_t> arcs_t;
+       typedef std::map<Node *, std::vector<uint32_t> > arcs_t;
        typedef std::map<Node *, way_arc_t> arcsets_t;
        typedef local_increment_t<uint8_t> local_inc;