Moved all notes (lengthy comments with names) to the beginning of file.

author Ulya Trofimovich <skvadrik@gmail.com>

Sat, 11 Aug 2018 08:01:57 +0000 (09:01 +0100)

committer Ulya Trofimovich <skvadrik@gmail.com>

Sat, 11 Aug 2018 08:01:57 +0000 (09:01 +0100)
author Ulya Trofimovich <skvadrik@gmail.com>
Sat, 11 Aug 2018 08:01:57 +0000 (09:01 +0100)
committer Ulya Trofimovich <skvadrik@gmail.com>
Sat, 11 Aug 2018 08:01:57 +0000 (09:01 +0100)
diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc

index f408ba337c478fe423d1feb1ec49611e3904550f..13db4da8e595aa19e83ebf9fed69c786336cb31e 100644 (file)
--- a/re2c/src/dfa/closure.cc
+++ b/re2c/src/dfa/closure.cc
@@ -19,6 +19,63 @@
  namespace re2c
  {
  
+/* note [epsilon-closures in tagged NFA]
+ *
+ * The closure includes all NFA states that are reachable by epsilon-paths
+ * from the given set of states and either are final or have non-epsilon
+ * transitions. Note that by construction NFA states cannot have both
+ * epsilon and non-epsilon transitions.
+ *
+ * Each closure state might be reachable by multiple epsilon-paths with
+ * different tags: this means that the regular expression is ambiguous
+ * and can be parsed in different ways. Which parse to choose depends on the
+ * disambiguation policy. RE2C supports two policies: leftmost greedy and
+ * POSIX.
+ *
+ * We use Goldber-Radzik algorithm to find the "shortest path".
+ * Both disambiguation policies forbid epsilon-cycles with negative weight.
+ */
+
+
+/* note [at most one final item per closure]
+ *
+ * By construction NFA has exactly one final state per rule. Thus closure
+ * has at most one final item per rule (in other words, all final items
+ * in closure belong to different rules). The rule with the highest priority
+ * shadowes all other rules. Final items that correspond to shadowed rules
+ * are useless and should be removed as early as possible.
+ *
+ * If we let such items remain in closure, they may prevent the new DFA
+ * state from being merged with other states. This won't affect the final
+ * program: meaningless finalizing tags will be removed by dead code
+ * elimination and subsequent minimization will merge equivalent final
+ * states. However, it's better not to add useless final items at all.
+ *
+ * Note that the first final item reached by the epsilon-closure it the one
+ * with the highest priority (see note [closure items are sorted by rule]).
+ */
+
+
+/* note [the difference between TDFA(0) and TDFA(1)]
+ *
+ * TDFA(0) performs epsilon-closure after transition on symbol,
+ * while TDFA(1) performs it before the transition and uses the lookahead
+ * symbol to filter the closure.
+ *
+ * TDFA(0) is one step ahead of TDFA(1): it consumes a symol, then builds
+ * epsilon-closure, eagerly applies all tags reachable by it and goes to
+ * the next state.
+ *
+ * TDFA(1) is more lazy: it builds epsilon-closure, then filters it with
+ * respect to the current symbol (uses only those states which have outgoing
+ * transitions on this symbol), then applies corresponding tags (probably
+ * not all tags applied by TDFA(0)) and then consumes the symbol and goes
+ * to the next state.
+ *
+ * Thus in general TDFA(1) raises less conflicts than TDFA(0).
+ */
+
+
  static void closure_posix(const closure_t &init, closure_t &done, closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags, const prectable_t *prectbl, size_t noldclos);
  static void closure_leftmost(const closure_t &init, closure_t &done, closure_t *shadow, Tagpool &tagpool);
  static void prune(closure_t &clos, std::valarray<Rule> &rules);
@@ -28,6 +85,7 @@ static void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &ta
         const prectable_t *prectbl_old, prectable_t *&prectbl_new, size_t noldclos);
  static bool cmpby_rule_state(const clos_t &x, const clos_t &y);
  
+
  tcmd_t *closure(dfa_t &dfa, closure_t &clos1, closure_t &clos2,
         Tagpool &tagpool, newvers_t &newvers, closure_t *shadow,
         const prectable_t *prectbl_old, prectable_t *&prectbl_new, size_t noldclos)
@@ -56,6 +114,7 @@ tcmd_t *closure(dfa_t &dfa, closure_t &clos1, closure_t &clos2,
         return cmd;
  }
  
+
  bool cmpby_rule_state(const clos_t &x, const clos_t &y)
  {
         const nfa_state_t *sx = x.state, *sy = y.state;
@@ -69,23 +128,6 @@ bool cmpby_rule_state(const clos_t &x, const clos_t &y)
  }
  
  
-/* note [epsilon-closures in tagged NFA]
- *
- * The closure includes all NFA states that are reachable by epsilon-paths
- * from the given set of states and either are final or have non-epsilon
- * transitions. Note that by construction NFA states cannot have both
- * epsilon and non-epsilon transitions.
- *
- * Each closure state might be reachable by multiple epsilon-paths with
- * different tags: this means that the regular expression is ambiguous
- * and can be parsed in different ways. Which parse to choose depends on the
- * disambiguation policy. RE2C supports two policies: leftmost greedy and
- * POSIX.
- *
- * We use Goldber-Radzik algorithm to find the "shortest path".
- * Both disambiguation policies forbid epsilon-cycles with negative weight.
- */
-
  static nfa_state_t *relax(clos_t x, closure_t &done,
         closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags,
         const prectable_t *prectbl, size_t noldclos)
@@ -120,6 +162,7 @@ static nfa_state_t *relax(clos_t x, closure_t &done,
         return q;
  }
  
+
  static nfa_state_t *explore(nfa_state_t *q, closure_t &done,
         closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags,
         const prectable_t *prectbl, size_t noldclos)
@@ -163,6 +206,7 @@ static nfa_state_t *explore(nfa_state_t *q, closure_t &done,
         return p;
  }
  
+
  void closure_posix(const closure_t &init, closure_t &done,
         closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags,
         const prectable_t *prectbl, size_t noldclos)
@@ -255,23 +299,6 @@ void closure_posix(const closure_t &init, closure_t &done,
         }
  }
  
-/* note [at most one final item per closure]
- *
- * By construction NFA has exactly one final state per rule. Thus closure
- * has at most one final item per rule (in other words, all final items
- * in closure belong to different rules). The rule with the highest priority
- * shadowes all other rules. Final items that correspond to shadowed rules
- * are useless and should be removed as early as possible.
- *
- * If we let such items remain in closure, they may prevent the new DFA
- * state from being merged with other states. This won't affect the final
- * program: meaningless finalizing tags will be removed by dead code
- * elimination and subsequent minimization will merge equivalent final
- * states. However, it's better not to add useless final items at all.
- *
- * Note that the first final item reached by the epsilon-closure it the one
- * with the highest priority (see note [closure items are sorted by rule]).
- */
  
  void closure_leftmost(const closure_t &init, closure_t &done,
         closure_t *shadow, Tagpool &tagpool)
@@ -328,6 +355,7 @@ void closure_leftmost(const closure_t &init, closure_t &done,
         }
  }
  
+
  void prune(closure_t &clos, std::valarray<Rule> &rules)
  {
         clositer_t b = clos.begin(), e = clos.end(), i, j;
@@ -351,24 +379,6 @@ void prune(closure_t &clos, std::valarray<Rule> &rules)
         clos.resize(n);
  }
  
-/* note [the difference between TDFA(0) and TDFA(1)]
- *
- * TDFA(0) performs epsilon-closure after transition on symbol,
- * while TDFA(1) performs it before the transition and uses the lookahead
- * symbol to filter the closure.
- *
- * TDFA(0) is one step ahead of TDFA(1): it consumes a symol, then builds
- * epsilon-closure, eagerly applies all tags reachable by it and goes to
- * the next state.
- *
- * TDFA(1) is more lazy: it builds epsilon-closure, then filters it with
- * respect to the current symbol (uses only those states which have outgoing
- * transitions on this symbol), then applies corresponding tags (probably
- * not all tags applied by TDFA(0)) and then consumes the symbol and goes
- * to the next state.
- *
- * Thus in general TDFA(1) raises less conflicts than TDFA(0).
- */
  
  void lower_lookahead_to_transition(closure_t &clos)
  {
@@ -378,6 +388,7 @@ void lower_lookahead_to_transition(closure_t &clos)
         }
  }
  
+
  tcmd_t *generate_versions(dfa_t &dfa, closure_t &clos, Tagpool &tagpool, newvers_t &newvers)
  {
         tcmd_t *cmd = NULL;
@@ -460,12 +471,14 @@ tcmd_t *generate_versions(dfa_t &dfa, closure_t &clos, Tagpool &tagpool, newvers
         return cmd;
  }
  
+
  static inline int32_t pack(int32_t longest, int32_t leftmost)
  {
         // leftmost: higher 2 bits, longest: lower 30 bits
         return longest | (leftmost << 30);
  }
  
+
  void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags,
         const prectable_t *prectbl_old, prectable_t *&prectbl_new, size_t noldclos)
  {
diff --git a/re2c/src/dfa/dead_rules.cc b/re2c/src/dfa/dead_rules.cc

index 296b1f6b15fee769367a665524f37e9a67a553cd..d09601490e4eab3943f81e9cd0d23eafb7c13db6 100644 (file)
--- a/re2c/src/dfa/dead_rules.cc
+++ b/re2c/src/dfa/dead_rules.cc
@@ -45,6 +45,24 @@ struct tcmd_t;
   * which is further propagated back to the start state of DFA.
   */
  
+
+/* note [fallback states]
+ *
+ * Find states that are accepting, but may be shadowed
+ * by other accepting states: when the short rule matches,
+ * lexer must try to match longer rules; if this attempt is
+ * unsuccessful it must fallback to the short match.
+ *
+ * In order to find fallback states we need to know if
+ * "none-rule" is reachable from the given state, the information
+ * we have after rule liveness analyses. Fallback states are
+ * needed at different points in time (both before and after
+ * certain transformations on DFA). Fortunately, fallback states
+ * are not affected by these transformations, so we can calculate
+ * them here and save for future use.
+ */
+
+
  // reversed DFA
  struct rdfa_t
  {
@@ -106,6 +124,7 @@ struct rdfa_t
         FORBID_COPY(rdfa_t);
  };
  
+
  static void backprop(const rdfa_t &rdfa, bool *live,
         size_t rule, size_t state)
  {
@@ -128,6 +147,7 @@ static void backprop(const rdfa_t &rdfa, bool *live,
         }
  }
  
+
  static void liveness_analyses(const rdfa_t &rdfa, bool *live)
  {
         for (size_t i = 0; i < rdfa.nstates; ++i) {
@@ -138,6 +158,7 @@ static void liveness_analyses(const rdfa_t &rdfa, bool *live)
         }
  }
  
+
  static void warn_dead_rules(const dfa_t &dfa, size_t defrule,
         const std::string &cond, const bool *live, Warn &warn)
  {
@@ -164,6 +185,7 @@ static void warn_dead_rules(const dfa_t &dfa, size_t defrule,
         }
  }
  
+
  static void remove_dead_final_states(dfa_t &dfa, const bool *fallthru)
  {
         const size_t
@@ -192,21 +214,7 @@ static void remove_dead_final_states(dfa_t &dfa, const bool *fallthru)
         }
  }
  
-/* note [fallback states]
- *
- * Find states that are accepting, but may be shadowed
- * by other accepting states: when the short rule matches,
- * lexer must try to match longer rules; if this attempt is
- * unsuccessful it must fallback to the short match.
- *
- * In order to find fallback states we need to know if
- * "none-rule" is reachable from the given state, the information
- * we have after rule liveness analyses. Fallback states are
- * needed at different points in time (both before and after
- * certain transformations on DFA). Fortunately, fallback states
- * are not affected by these transformations, so we can calculate
- * them here and save for future use.
- */
+
  static void find_fallback_states(dfa_t &dfa, const bool *fallthru)
  {
         const size_t
@@ -230,6 +238,7 @@ static void find_fallback_states(dfa_t &dfa, const bool *fallthru)
         }
  }
  
+
  void cutoff_dead_rules(dfa_t &dfa, size_t defrule, const std::string &cond, Warn &warn)
  {
         const rdfa_t rdfa(dfa);
diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc

index 93263d82b09882d72509766a71f5f2ee25a2d323..3334e9a0b57f6fbba9d4efbd66ad42d2e9144698 100644 (file)
--- a/re2c/src/dfa/determinization.cc
+++ b/re2c/src/dfa/determinization.cc
@@ -31,8 +31,10 @@ static void warn_nondeterministic_tags(const kernels_t &kernels,
         const Tagpool &tagpool, const std::vector<Tag> &tags,
         const std::valarray<Rule> &rules, const std::string &cond, Warn &warn);
  
+
  const size_t dfa_t::NIL = std::numeric_limits<size_t>::max();
  
+
  nfa_state_t *transition(nfa_state_t *state, uint32_t symbol)
  {
         if (state->type != nfa_state_t::RAN) {
@@ -46,6 +48,7 @@ nfa_state_t *transition(nfa_state_t *state, uint32_t symbol)
         return NULL;
  }
  
+
  void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol)
  {
         clos.clear();
@@ -58,6 +61,7 @@ void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol)
         }
  }
  
+
  dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
         const std::string &cond, Warn &warn)
         : states()
@@ -128,13 +132,11 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
         }
  }
  
-/*
- * For each tag, find maximal number of parallel versions of this tag
- * used in each kernel (degree of non-determinism) and warn about tags with
- * maximum degree two or more.
- *
- * WARNING: this function assumes that kernel items are grouped by rule
- */
+
+// For each tag, find maximal number of parallel versions of this tag
+// used in each kernel (degree of non-determinism) and warn about tags with
+// maximum degree two or more.
+// WARNING: this function assumes that kernel items are grouped by rule
  void warn_nondeterministic_tags(const kernels_t &kernels,
         const Tagpool &tagpool, const std::vector<Tag> &tags,
         const std::valarray<Rule> &rules, const std::string &cond, Warn &warn)
@@ -179,6 +181,7 @@ void warn_nondeterministic_tags(const kernels_t &kernels,
         }
  }
  
+
  dfa_t::~dfa_t()
  {
         std::vector<dfa_state_t*>::iterator
diff --git a/re2c/src/dfa/dump.cc b/re2c/src/dfa/dump.cc

index 646097b66410d98122d374e0a06d745fea393bcc..cf74a9277425c959d70acd1fdcb540903d1fe893 100644 (file)
--- a/re2c/src/dfa/dump.cc
+++ b/re2c/src/dfa/dump.cc
@@ -22,6 +22,7 @@ static void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid, size_t sy
  static const char *tagname(const Tag &t);
  static void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers);
  
+
  dump_dfa_t::dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n)
         : debug(pool.opts->dump_dfa_raw)
         , dfa(d)
@@ -39,6 +40,7 @@ dump_dfa_t::dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n)
                 "  edge[arrowhead=vee fontname=fixed]\n\n");
  }
  
+
  dump_dfa_t::~dump_dfa_t()
  {
         if (!debug) return;
@@ -47,11 +49,13 @@ dump_dfa_t::~dump_dfa_t()
         fprintf(stderr, "}\n");
  }
  
+
  uint32_t dump_dfa_t::index(const nfa_state_t *s) const
  {
         return static_cast<uint32_t>(s - base);
  }
  
+
  static void dump_history(const dfa_t &dfa, const tagtree_t &h, hidx_t i)
  {
         if (i == HROOT) {
@@ -72,6 +76,7 @@ static void dump_history(const dfa_t &dfa, const tagtree_t &h, hidx_t i)
         fprintf(stderr, " ");
  }
  
+
  void dump_dfa_t::closure_tags(cclositer_t c)
  {
         if (!debug) return;
@@ -90,6 +95,7 @@ void dump_dfa_t::closure_tags(cclositer_t c)
         }
  }
  
+
  void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew)
  {
         if (!debug) return;
@@ -126,6 +132,7 @@ void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew)
         fprintf(stderr, "</TABLE>>]\n");
  }
  
+
  void dump_dfa_t::state0(const closure_t &clos)
  {
         if (!debug) return;
@@ -148,6 +155,7 @@ void dump_dfa_t::state0(const closure_t &clos)
         }
  }
  
+
  void dump_dfa_t::state(const closure_t &clos, size_t state, size_t symbol, bool isnew)
  {
         if (!debug) return;
@@ -189,6 +197,7 @@ void dump_dfa_t::state(const closure_t &clos, size_t state, size_t symbol, bool
         }
  }
  
+
  void dump_dfa_t::final(size_t state, const nfa_state_t *port)
  {
         if (!debug) return;
@@ -210,6 +219,7 @@ void dump_dfa_t::final(size_t state, const nfa_state_t *port)
         fprintf(stderr, "\"]\n");
  }
  
+
  void dump_dfa(const dfa_t &dfa)
  {
         const size_t
@@ -273,6 +283,7 @@ void dump_dfa(const dfa_t &dfa)
         fprintf(stderr, "}\n");
  }
  
+
  void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid,
         size_t sym, const tcpool_t &tcpool)
  {
@@ -280,6 +291,7 @@ void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid,
         dump_tcmd(cmd);
  }
  
+
  void dump_tcmd(const tcmd_t *p)
  {
         if (!p) return;
@@ -302,11 +314,13 @@ void dump_tcmd(const tcmd_t *p)
         }
  }
  
+
  const char *tagname(const Tag &t)
  {
         return t.name ? t.name->c_str() : "";
  }
  
+
  void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers)
  {
         if (ttran == HROOT) return;
diff --git a/re2c/src/dfa/fallback_tags.cc b/re2c/src/dfa/fallback_tags.cc

index 5eee256329d90436c87f7d5302fd253a74966472..06353320d685d1601f086f9dfb2361adf1e8e746 100644 (file)
--- a/re2c/src/dfa/fallback_tags.cc
+++ b/re2c/src/dfa/fallback_tags.cc
@@ -9,8 +9,6 @@
  namespace re2c
  {
  
-static void find_overwritten_tags(const dfa_t &dfa, size_t state, bool *been, bool *owrt);
-
  /* note [fallback tags]
   *
   * We need to backup tags in fallback states, because they may be
@@ -36,6 +34,10 @@ static void find_overwritten_tags(const dfa_t &dfa, size_t state, bool *been, bo
   * only create backup if the origin is overwritten on some path.
   */
  
+
+static void find_overwritten_tags(const dfa_t &dfa, size_t state, bool *been, bool *owrt);
+
+
  void find_overwritten_tags(const dfa_t &dfa, size_t state,
         bool *been, bool *owrt)
  {
@@ -55,6 +57,7 @@ void find_overwritten_tags(const dfa_t &dfa, size_t state,
         }
  }
  
+
  // overwritten tags need 'copy' on all outgoing non-accepting paths
  // ('copy' commands must go first, before potential overwrites)
  static void backup(dfa_t &dfa, dfa_state_t *s, tagver_t l, tagver_t r)
@@ -68,6 +71,7 @@ static void backup(dfa_t &dfa, dfa_state_t *s, tagver_t l, tagver_t r)
         }
  }
  
+
  // WARNING: this function assumes that falthrough and fallback
  // attributes of DFA states have already been calculated, see
  // note [fallback states]
diff --git a/re2c/src/dfa/fillpoints.cc b/re2c/src/dfa/fillpoints.cc

index 1e2c844b5f7ee3716776781a702a100b22a80f9b..c43af1a95644b5c8a015e3df5aca3c7e7ac4930a 100644 (file)
--- a/re2c/src/dfa/fillpoints.cc
+++ b/re2c/src/dfa/fillpoints.cc
@@ -8,21 +8,6 @@
  namespace re2c
  {
  
-static const size_t SCC_INF = std::numeric_limits<size_t>::max();
-static const size_t SCC_UND = SCC_INF - 1;
-
-static bool loopback(size_t node, size_t narcs, const size_t *arcs)
-{
-       for (size_t i = 0; i < narcs; ++i)
-       {
-               if (arcs[i] == node)
-               {
-                       return true;
-               }
-       }
-       return false;
-}
-
  /*
   * node [finding strongly connected components of DFA]
   *
@@ -47,6 +32,25 @@ static bool loopback(size_t node, size_t narcs, const size_t *arcs)
   * stack (SCC_INF).
   *
   */
+
+
+static const size_t SCC_INF = std::numeric_limits<size_t>::max();
+static const size_t SCC_UND = SCC_INF - 1;
+
+
+static bool loopback(size_t node, size_t narcs, const size_t *arcs)
+{
+       for (size_t i = 0; i < narcs; ++i)
+       {
+               if (arcs[i] == node)
+               {
+                       return true;
+               }
+       }
+       return false;
+}
+
+
  static void scc(
         const dfa_t &dfa,
         std::stack<size_t> &stack,
@@ -94,6 +98,7 @@ static void scc(
         }
  }
  
+
  static void calc_fill(
         const dfa_t &dfa,
         const std::vector<bool> &trivial,
@@ -124,6 +129,7 @@ static void calc_fill(
         }
  }
  
+
  void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
  {
         const size_t size = dfa.states.size();
diff --git a/re2c/src/dfa/find_state.cc b/re2c/src/dfa/find_state.cc

index ac6ea0d90e516122969dc58b93c01839a83e4f58..e288a47b8484479b8974a3478bb07d8baebb2114 100644 (file)
--- a/re2c/src/dfa/find_state.cc
+++ b/re2c/src/dfa/find_state.cc
@@ -16,6 +16,67 @@
  namespace re2c
  {
  
+/* note [mapping ignores items with lookahead tags]
+ *
+ * Consider two items X and Y being mapped.
+ *
+ * If tag T belongs to lookahead tags of item X, then all
+ * outgoing transitions from item X update T. Which means
+ * that it doesn't matter what particular version T has in X:
+ * whatever version it has, it will be overwritten by any
+ * outgoing transition.
+ *
+ * Note that lookahead tags are identical for both items
+ * X and Y, because we only try to map DFA states with
+ * identical lookahead tags.
+ */
+
+
+/* note [save(X), copy(Y,X) optimization]
+ *
+ * save(X) command followed by a copy(Y,X) command can be optimized to
+ * save(Y). This helps reduce the number commands and versions (new version
+ * X is gone), but what is more important, it allows to put copy commands
+ * in front of save commands. This order is necessary when it comes to
+ * fallback commands.
+ *
+ * Note that in case of injective mapping there may be more than one copy
+ * command matching the same save command: save(X), copy(Y,X), copy(Z,X).
+ * In this case save command must be replicated for each copy command:
+ * save(Y), save(Z).
+ *
+ * For each save(X) command there must be at least one copy(Y,X) command
+ * (exactly one case of bijective mapping). This is because X version in
+ * save(X) command must be a new version which cannot occur in the older
+ * DFA state. Thus all save commands are transformed (maybe replicated) by
+ * copy commands, and some copy commands are erased by save commands.
+ *
+ * This optimization is applied after checking priority violation, so it
+ * cannot affect the check.
+*/
+
+
+/* note [bijective mappings]
+ *
+ * Suppose we have just constructed a new DFA state Y and want to map it
+ * to an existing DFA state X. States must have identical sets of NFA
+ * substates and identical sets of lookahead tags for each substate.
+ * Furtermore, there must be bijective mapping between versions of X and Y
+ * and this mapping must preserve version order (respect priorities).
+ *
+ * Bijective mappings have a nice property: there is only one possible state
+ * X to which Y can be mapped. Indeed, if there was another state Z that
+ * can be bijectively mapped to Y preserving priorities, then Z itself can
+ * be mapped to X: both (1) and (2) are symmetrical in case of bijection
+ * and the relation is transitive. So the existence of Z is a contradiction.
+ *
+ * In principle, non-bijective mappings are also possible if the new state
+ * is less versatile than the old one (surjection from X to Y). However,
+ * non-bijective mappings lack the 'unique counterpart' property and need
+ * more complex analysis (and are not so useful after all), so we drop them.
+ */
+
+
  struct kernel_eq_t
  {
         Tagpool &tagpool;
@@ -199,43 +260,6 @@ bool kernel_eq_t::operator()(const kernel_t *x, const kernel_t *y) const
                 && equal_lookahead_tags(x, y, tagpool, tags);
  }
  
-/* note [mapping ignores items with lookahead tags]
- *
- * Consider two items X and Y being mapped.
- *
- * If tag T belongs to lookahead tags of item X, then all
- * outgoing transitions from item X update T. Which means
- * that it doesn't matter what particular version T has in X:
- * whatever version it has, it will be overwritten by any
- * outgoing transition.
- *
- * Note that lookahead tags are identical for both items
- * X and Y, because we only try to map DFA states with
- * identical lookahead tags.
- */
-
-/* note [save(X), copy(Y,X) optimization]
- *
- * save(X) command followed by a copy(Y,X) command can be optimized to
- * save(Y). This helps reduce the number commands and versions (new version
- * X is gone), but what is more important, it allows to put copy commands
- * in front of save commands. This order is necessary when it comes to
- * fallback commands.
- *
- * Note that in case of injective mapping there may be more than one copy
- * command matching the same save command: save(X), copy(Y,X), copy(Z,X).
- * In this case save command must be replicated for each copy command:
- * save(Y), save(Z).
- *
- * For each save(X) command there must be at least one copy(Y,X) command
- * (exactly one case of bijective mapping). This is because X version in
- * save(X) command must be a new version which cannot occur in the older
- * DFA state. Thus all save commands are transformed (maybe replicated) by
- * copy commands, and some copy commands are erased by save commands.
- *
- * This optimization is applied after checking priority violation, so it
- * cannot affect the check.
-*/
  
  bool kernels_t::operator()(const kernel_t *x, const kernel_t *y)
  {
@@ -326,25 +350,6 @@ bool kernels_t::operator()(const kernel_t *x, const kernel_t *y)
         return !nontrivial_cycles;
  }
  
-/* note [bijective mappings]
- *
- * Suppose we have just constructed a new DFA state Y and want to map it
- * to an existing DFA state X. States must have identical sets of NFA
- * substates and identical sets of lookahead tags for each substate.
- * Furtermore, there must be bijective mapping between versions of X and Y
- * and this mapping must preserve version order (respect priorities).
- *
- * Bijective mappings have a nice property: there is only one possible state
- * X to which Y can be mapped. Indeed, if there was another state Z that
- * can be bijectively mapped to Y preserving priorities, then Z itself can
- * be mapped to X: both (1) and (2) are symmetrical in case of bijection
- * and the relation is transitive. So the existence of Z is a contradiction.
- *
- * In principle, non-bijective mappings are also possible if the new state
- * is less versatile than the old one (surjection from X to Y). However,
- * non-bijective mappings lack the 'unique counterpart' property and need
- * more complex analysis (and are not so useful after all), so we drop them.
- */
  
  size_t kernels_t::insert(const closure_t &closure, tagver_t maxver,
         const prectable_t *prectbl, tcmd_t *&acts, bool &is_new)
@@ -386,6 +391,7 @@ size_t kernels_t::insert(const closure_t &closure, tagver_t maxver,
         return x;
  }
  
+
  static tcmd_t *finalizer(const clos_t &clos, size_t ridx,
         dfa_t &dfa, const Tagpool &tagpool, const std::vector<Tag> &tags)
  {
@@ -418,6 +424,7 @@ static tcmd_t *finalizer(const clos_t &clos, size_t ridx,
         return copy;
  }
  
+
  void find_state(dfa_t &dfa, size_t origin, size_t symbol, kernels_t &kernels,
         const closure_t &closure, tcmd_t *acts, dump_dfa_t &dump, const prectable_t *prectbl)
  {
diff --git a/re2c/src/dfa/minimization.cc b/re2c/src/dfa/minimization.cc

index 6404b45d2093f0c2e3713a171bf3430939fd61c2..000e1a9d35edd6e9a7321f62f0acaef14c387bf6 100644 (file)
--- a/re2c/src/dfa/minimization.cc
+++ b/re2c/src/dfa/minimization.cc
@@ -23,6 +23,34 @@ namespace re2c
   * the same symbol that go to distinguishable states. The algorithm
   * loops until the matrix stops changing.
   */
+
+
+/*
+ * note [DFA minimization: Moore algorithm]
+ *
+ * The algorithm maintains partition of DFA states.
+ * Initial partition is coarse: states are distinguished according
+ * to their rule and tag set. Partition is gradually refined: each
+ * set of states is split into minimal number of subsets such that
+ * for all states in a subset transitions on the same symbol go to
+ * the same set of states.
+ * The algorithm loops until partition stops changing.
+ */
+
+
+/* note [distinguish states by tags]
+ *
+ * Final states may have 'rule' tags: tags that must be set when lexer
+ * takes epsilon-transition to the binded action. Final states with
+ * the same rule but different sets on 'rule' tags cannot be merged.
+ *
+ * Compare the following two cases:
+ *     "ac" | "bc"
+ *     "ac" @p | "bc"
+ * Tail "c" can be deduplicated in the 1st case, but not in the 2nd.
+ */
+
+
  static void minimization_table(
         size_t *part,
         const std::vector<dfa_state_t*> &states,
@@ -112,17 +140,7 @@ static void minimization_table(
         delete[] tbl;
  }
  
-/*
- * note [DFA minimization: Moore algorithm]
- *
- * The algorithm maintains partition of DFA states.
- * Initial partition is coarse: states are distinguished according
- * to their rule and tag set. Partition is gradually refined: each
- * set of states is split into minimal number of subsets such that
- * for all states in a subset transitions on the same symbol go to
- * the same set of states.
- * The algorithm loops until partition stops changing.
- */
+
  static void minimization_moore(
         size_t *part,
         const std::vector<dfa_state_t*> &states,
@@ -213,17 +231,6 @@ static void minimization_moore(
         delete[] next;
  }
  
-/* note [distinguish states by tags]
- *
- * Final states may have 'rule' tags: tags that must be set when lexer
- * takes epsilon-transition to the binded action. Final states with
- * the same rule but different sets on 'rule' tags cannot be merged.
- *
- * Compare the following two cases:
- *     "ac" | "bc"
- *     "ac" @p | "bc"
- * Tail "c" can be deduplicated in the 1st case, but not in the 2nd.
- */
  
  void minimization(dfa_t &dfa, dfa_minimization_t type)
  {
diff --git a/re2c/src/dfa/tagpool.cc b/re2c/src/dfa/tagpool.cc

index 0cc0feac6a88a4a0e7eea4d77ef99c67b11736dd..330fd7b39b365034528eb34454cf75093029d2b5 100644 (file)
--- a/re2c/src/dfa/tagpool.cc
+++ b/re2c/src/dfa/tagpool.cc
@@ -20,6 +20,7 @@ struct eqtag_t
         }
  };
  
+
  Tagpool::Tagpool(const opt_t *o, size_t n)
         : lookup()
         , opts(o)
@@ -32,6 +33,7 @@ Tagpool::Tagpool(const opt_t *o, size_t n)
         , cstack()
  {}
  
+
  Tagpool::~Tagpool()
  {
         delete[] buffer;
@@ -41,12 +43,14 @@ Tagpool::~Tagpool()
         }
  }
  
+
  size_t Tagpool::insert_const(tagver_t ver)
  {
         std::fill(buffer, buffer + ntags, ver);
         return insert(buffer);
  }
  
+
  size_t Tagpool::insert_succ(tagver_t fst)
  {
         for (size_t i = 0; i < ntags; ++i) {
@@ -55,6 +59,7 @@ size_t Tagpool::insert_succ(tagver_t fst)
         return insert(buffer);
  }
  
+
  size_t Tagpool::insert(const tagver_t *tags)
  {
         const size_t size = ntags * sizeof(tagver_t);
@@ -71,6 +76,7 @@ size_t Tagpool::insert(const tagver_t *tags)
         return lookup.push(hash, copy);
  }
  
+
  const tagver_t *Tagpool::operator[](size_t idx) const
  {
         return lookup[idx];
diff --git a/re2c/src/dfa/tagtree.cc b/re2c/src/dfa/tagtree.cc

index 64c76f820a604238d3aa71e52226f289c66842c6..bf056f98165b36206159e56c8bf43d3370d58234 100644 (file)
--- a/re2c/src/dfa/tagtree.cc
+++ b/re2c/src/dfa/tagtree.cc
@@ -9,16 +9,22 @@ namespace re2c
  
  static const tagver_t DELIM = TAGVER_CURSOR - 1;
  
+
  tagtree_t::tagtree_t(): nodes(), path1(), path2() {}
  
+
  hidx_t tagtree_t::pred(hidx_t i) const { return nodes[i].pred; }
  
+
  tag_info_t tagtree_t::info(hidx_t i) const { return nodes[i].info; }
  
+
  tagver_t tagtree_t::elem(hidx_t i) const { return nodes[i].info.neg ? TAGVER_BOTTOM : TAGVER_CURSOR; }
  
+
  size_t tagtree_t::tag(hidx_t i) const { return nodes[i].info.idx; }
  
+
  hidx_t tagtree_t::push(hidx_t idx, tag_info_t info)
  {
         node_t x = {idx, info};
@@ -26,6 +32,7 @@ hidx_t tagtree_t::push(hidx_t idx, tag_info_t info)
         return static_cast<hidx_t>(nodes.size() - 1);
  }
  
+
  tagver_t tagtree_t::last(hidx_t i, size_t t) const
  {
         for (; i != HROOT; i = pred(i)) {
@@ -34,6 +41,7 @@ tagver_t tagtree_t::last(hidx_t i, size_t t) const
         return TAGVER_ZERO;
  }
  
+
  int32_t tagtree_t::compare_reversed(hidx_t x, hidx_t y, size_t t) const
  {
         // compare in reverse, from tail to head: direction makes
@@ -51,6 +59,7 @@ int32_t tagtree_t::compare_reversed(hidx_t x, hidx_t y, size_t t) const
         }
  }
  
+
  static void reconstruct_history(const tagtree_t &history,
         std::vector<tag_info_t> &path, hidx_t idx)
  {
@@ -60,18 +69,21 @@ static void reconstruct_history(const tagtree_t &history,
         }
  }
  
+
  static inline int32_t unpack_longest(int32_t value)
  {
         // lower 30 bits
         return value & 0x3fffFFFF;
  }
  
+
  static inline int32_t unpack_leftmost(int32_t value)
  {
         // higher 2 bits
         return value >> 30u;
  }
  
+
  int32_t tagtree_t::precedence(const clos_t &x, const clos_t &y,
         int32_t &rhox, int32_t &rhoy, const prectable_t *prectbl,
         const std::vector<Tag> &tags, size_t nclos)
diff --git a/re2c/src/dfa/tcmd.cc b/re2c/src/dfa/tcmd.cc

index a7ad22e963836b23e62e9f0e974d270b2521067a..4fffd2d1190887b7d25388405d0474e23d9682a1 100644 (file)
--- a/re2c/src/dfa/tcmd.cc
+++ b/re2c/src/dfa/tcmd.cc
@@ -7,24 +7,6 @@
  namespace re2c
  {
  
-static uint32_t hash_tcmd(const tcmd_t *tcmd);
-
-bool tcmd_t::equal(const tcmd_t &x, const tcmd_t &y)
-{
-       return x.lhs == y.lhs
-               && x.rhs == y.rhs
-               && equal_history(x.history, y.history);
-}
-
-bool tcmd_t::equal_history(const tagver_t *h, const tagver_t *g)
-{
-       for (;;) {
-               if (*h != *g) return false;
-               if (*h == TAGVER_ZERO) return true;
-               ++h; ++g;
-       }
-}
-
  /* note [topological ordering of copy commands]
   *
   * The order in which copy commands are executed is important:
@@ -52,11 +34,34 @@ bool tcmd_t::equal_history(const tagver_t *h, const tagver_t *g)
   * The algorithm starts and ends with all-zero in-degree buffer.
   */
  
+
+static uint32_t hash_tcmd(const tcmd_t *tcmd);
+
+
+bool tcmd_t::equal(const tcmd_t &x, const tcmd_t &y)
+{
+       return x.lhs == y.lhs
+               && x.rhs == y.rhs
+               && equal_history(x.history, y.history);
+}
+
+
+bool tcmd_t::equal_history(const tagver_t *h, const tagver_t *g)
+{
+       for (;;) {
+               if (*h != *g) return false;
+               if (*h == TAGVER_ZERO) return true;
+               ++h; ++g;
+       }
+}
+
+
  bool tcmd_t::iscopy(const tcmd_t *x)
  {
         return x->rhs != TAGVER_ZERO && x->history[0] == TAGVER_ZERO;
  }
  
+
  bool tcmd_t::isset(const tcmd_t *x)
  {
         if (x->rhs == TAGVER_ZERO) {
@@ -66,11 +71,13 @@ bool tcmd_t::isset(const tcmd_t *x)
         return false;
  }
  
+
  bool tcmd_t::isadd(const tcmd_t *x)
  {
         return x->rhs != TAGVER_ZERO && x->history[0] != TAGVER_ZERO;
  }
  
+
  bool tcmd_t::topsort(tcmd_t **phead, uint32_t *indeg)
  {
         tcmd_t *x0 = *phead, *x, *y0 = NULL, **py;
@@ -115,6 +122,7 @@ bool tcmd_t::topsort(tcmd_t **phead, uint32_t *indeg)
         return nontrivial_cycles;
  }
  
+
  tcpool_t::tcpool_t()
         : alc()
         , index()
@@ -123,6 +131,7 @@ tcpool_t::tcpool_t()
         assert(TCID0 == insert(NULL));
  }
  
+
  tcmd_t *tcpool_t::make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs)
  {
         tcmd_t *p = alc.alloct<tcmd_t>(1);
@@ -133,6 +142,7 @@ tcmd_t *tcpool_t::make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs)
         return p;
  }
  
+
  tcmd_t *tcpool_t::make_set(tcmd_t *next, tagver_t lhs, tagver_t set)
  {
         const size_t size = sizeof(tcmd_t) + sizeof(tagver_t);
@@ -145,6 +155,7 @@ tcmd_t *tcpool_t::make_set(tcmd_t *next, tagver_t lhs, tagver_t set)
         return p;
  }
  
+
  tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs,
         const tagtree_t &history, hidx_t hidx, size_t tag)
  {
@@ -168,6 +179,7 @@ tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs,
         return p;
  }
  
+
  tcmd_t *tcpool_t::copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs,
         const tagver_t *history)
  {
@@ -183,6 +195,7 @@ tcmd_t *tcpool_t::copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs,
         return p;
  }
  
+
  uint32_t hash_tcmd(const tcmd_t *tcmd)
  {
         uint32_t h = 0;
@@ -194,6 +207,7 @@ uint32_t hash_tcmd(const tcmd_t *tcmd)
         return h;
  }
  
+
  struct tcmd_eq_t
  {
         bool operator()(const tcmd_t *x, const tcmd_t *y) const
@@ -208,6 +222,7 @@ struct tcmd_eq_t
         }
  };
  
+
  tcid_t tcpool_t::insert(const tcmd_t *tcmd)
  {
         const uint32_t h = hash_tcmd(tcmd);
@@ -221,6 +236,7 @@ tcid_t tcpool_t::insert(const tcmd_t *tcmd)
         return static_cast<tcid_t>(id);
  }
  
+
  const tcmd_t *tcpool_t::operator[](tcid_t id) const
  {
         return index[id];
author	Ulya Trofimovich <skvadrik@gmail.com>
	Sat, 11 Aug 2018 08:01:57 +0000 (09:01 +0100)
committer	Ulya Trofimovich <skvadrik@gmail.com>
	Sat, 11 Aug 2018 08:01:57 +0000 (09:01 +0100)
re2c/src/dfa/closure.cc		patch \| blob \| history
re2c/src/dfa/dead_rules.cc		patch \| blob \| history
re2c/src/dfa/determinization.cc		patch \| blob \| history
re2c/src/dfa/dump.cc		patch \| blob \| history
re2c/src/dfa/fallback_tags.cc		patch \| blob \| history
re2c/src/dfa/fillpoints.cc		patch \| blob \| history
re2c/src/dfa/find_state.cc		patch \| blob \| history
re2c/src/dfa/minimization.cc		patch \| blob \| history
re2c/src/dfa/tagpool.cc		patch \| blob \| history
re2c/src/dfa/tagtree.cc		patch \| blob \| history
re2c/src/dfa/tcmd.cc		patch \| blob \| history