From 60a8dfff965e8953aa2073c714e22e5f461ac2ec Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 5 Oct 2015 15:44:50 +0100 Subject: [PATCH] Support trailing context with '--skeleton'. Trialing contexts are currently broken (overlapping trailing contexts cannot be tracked with a single 'YYCTXMARKER'). For now, re2c with '--skeleton' mimics this incorrect behaviour: information about context is lost by the time DFA is constructed, so skeleton has no way to figure out the right order of things. --- re2c/src/codegen/skeleton/generate_data.cc | 10 ++--- re2c/src/codegen/skeleton/path.h | 48 ++++++++++++++++++---- re2c/src/codegen/skeleton/skeleton.cc | 5 +++ re2c/src/codegen/skeleton/skeleton.h | 5 +++ 4 files changed, 56 insertions(+), 12 deletions(-) diff --git a/re2c/src/codegen/skeleton/generate_data.cc b/re2c/src/codegen/skeleton/generate_data.cc index 6e039f92..0985514a 100644 --- a/re2c/src/codegen/skeleton/generate_data.cc +++ b/re2c/src/codegen/skeleton/generate_data.cc @@ -111,7 +111,7 @@ template for (arcs_t::iterator i = arcs.begin (); i != arcs.end (); ++i) { multipath_t new_prefix = prefix; - new_prefix.extend (i->first->rule, &i->second); + new_prefix.extend (i->first->rule, i->first->restorectx, i->first->ctx, &i->second); i->first->permutate (new_prefix, input, keys); } } @@ -152,7 +152,7 @@ template } else if (end ()) { - suffix = new path_t (rule); + suffix = new path_t (rule, restorectx, ctx); } else if (loop < 2) { @@ -160,7 +160,7 @@ template for (arcs_t::iterator i = arcs.begin (); i != arcs.end (); ++i) { multipath_t new_prefix = prefix; - new_prefix.extend (i->first->rule, &i->second); + new_prefix.extend (i->first->rule, i->first->restorectx, i->first->ctx, &i->second); size = size + i->first->cover (new_prefix, input, keys); if (size.overflow ()) { @@ -168,7 +168,7 @@ template } if (i->first->suffix != NULL && suffix == NULL) { - suffix = new path_t (rule); + suffix = new path_t (rule, restorectx, ctx); suffix->append (i->second[0], i->first->suffix); } } @@ -179,7 +179,7 @@ template template void Skeleton::generate_paths_cunit_key (FILE * input, FILE * keys) { - multipath_t prefix (nodes->rule); + multipath_t prefix (nodes->rule, nodes->restorectx, nodes->ctx); if (nodes->sizeof_permutate (Node::permuts_t (1u), Node::permuts_t (0u)).overflow ()) { if (nodes->cover (prefix, input, keys).overflow ()) diff --git a/re2c/src/codegen/skeleton/path.h b/re2c/src/codegen/skeleton/path.h index c30471d4..dc54fd9e 100644 --- a/re2c/src/codegen/skeleton/path.h +++ b/re2c/src/codegen/skeleton/path.h @@ -13,14 +13,22 @@ template class generic_path_t { std::vector arcs; + rule_rank_t rule; + bool restorectx; size_t rule_pos; + bool ctx; + size_t ctx_pos; + public: - explicit generic_path_t (rule_rank_t r) + explicit generic_path_t (rule_rank_t r, bool rc, bool c) : arcs () , rule (r) + , restorectx (rc) , rule_pos (0) + , ctx (c) + , ctx_pos (0) {} size_t len () const { @@ -28,7 +36,9 @@ public: } size_t len_matching () const { - return rule_pos; + return restorectx + ? ctx_pos + : rule_pos; } rule_rank_t match () const { @@ -38,14 +48,20 @@ public: { return arcs[i]; } - void extend (rule_rank_t r, const arc_t & a) + void extend (rule_rank_t r, bool rc, bool c, const arc_t & a) { arcs.push_back (a); if (!r.is_none ()) { rule = r; + restorectx = rc; rule_pos = arcs.size (); } + if (c) + { + ctx = true; + ctx_pos = arcs.size (); + } } void append (const arc_t & a, const generic_path_t * p) { @@ -53,8 +69,14 @@ public: if (!p->rule.is_none ()) { rule = p->rule; + restorectx = p->restorectx; rule_pos = arcs.size () + p->rule_pos; } + if (p->ctx) + { + ctx = true; + ctx_pos = arcs.size () + p->ctx_pos; + } arcs.insert (arcs.end (), p->arcs.begin (), p->arcs.end ()); } @@ -67,17 +89,29 @@ public: template size_t len_matching (const generic_path_t & prefix, const generic_path_t & suffix) { - return suffix.rule.is_none () + const bool none = suffix.rule.is_none (); + bool restorectx = none + ? prefix.restorectx + : suffix.restorectx; + const size_t rule_pos = none ? prefix.rule_pos : prefix.arcs.size () + suffix.rule_pos; + + const size_t ctx_pos = suffix.ctx + ? prefix.arcs.size () + suffix.ctx_pos + : prefix.ctx_pos; + + return restorectx + ? ctx_pos + : rule_pos; } template rule_rank_t match (const generic_path_t & prefix, const generic_path_t & suffix) { - return suffix.match ().is_none () - ? prefix.match () - : suffix.match (); + return suffix.rule.is_none () + ? prefix.rule + : suffix.rule; } typedef generic_path_t path_t; diff --git a/re2c/src/codegen/skeleton/skeleton.cc b/re2c/src/codegen/skeleton/skeleton.cc index e87e572a..e4bedcad 100644 --- a/re2c/src/codegen/skeleton/skeleton.cc +++ b/re2c/src/codegen/skeleton/skeleton.cc @@ -12,6 +12,8 @@ Node::Node () , arcsets () , loop (0) , rule (rule_rank_t::none ()) + , restorectx (false) + , ctx (false) , dist (DIST_ERROR) , suffix (NULL) {} @@ -22,8 +24,11 @@ void Node::init (const State * s, const s2n_map & s2n) if (is_accepting) { rule = s->rule->rank; + restorectx = s->rule->ctx->fixedLength () != 0; } + ctx = s && s->isPreCtxt; + const bool is_final = !s || (s->go.nSpans == 1 && !s->go.span[0].to); if (!is_final) { diff --git a/re2c/src/codegen/skeleton/skeleton.h b/re2c/src/codegen/skeleton/skeleton.h index 4439a27d..50ddd227 100644 --- a/re2c/src/codegen/skeleton/skeleton.h +++ b/re2c/src/codegen/skeleton/skeleton.h @@ -47,6 +47,11 @@ struct Node // rule number for corresponding DFA state (if any) rule_rank_t rule; + // whether this rule must rollback input position to the beginnig of trailing context + bool restorectx; + + // start of trailing context + bool ctx; // maximal distance to end node (assuming one iteration per loop) static const uint32_t DIST_ERROR; -- 2.40.0