From: Ulya Trofimovich Date: Fri, 28 Jul 2017 10:17:10 +0000 (+0100) Subject: POSIX disambiguation: use the same comparison algorithm for orbit and non-orbit tags. X-Git-Tag: 1.0~39^2~18 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ca9f601f1655e65b593c485b00c67f9b58bba577;p=re2c POSIX disambiguation: use the same comparison algorithm for orbit and non-orbit tags. Previously we needed a different algorithm for non-orbit tags, because disambiguation was based on both start and end tags. Non-orbit start tags cannot be compared incrementally, like orbit tags, becuse default value may be discovered on a later step than non-default value. Non-orbit end tags do not have this problem: since negative tags are inserted at the end of alternatives, default value is always discovered on the same step as non-default value (provided that all higher-priority tags agree and comparison reaches this tag at all). Now that start tags are ignored, we can use incremental comparison for both orbit and non-orbit subhistories, which simplifies the code. --- diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc index 23e61b48..a5a4b358 100644 --- a/re2c/src/dfa/closure.cc +++ b/re2c/src/dfa/closure.cc @@ -62,7 +62,12 @@ bool cmpby_rule_state(const clos_t &x, const clos_t &y) // Skip non-orbit start tags: their position is fixed on some higher-priority // tag (except the very first tag, but in RE2C match is always anchored). // We cannot skip orbit start tag because the corresponding orbit end tag is -// hoisted out of loop (by construction) and is, in fact, non-orbit. +// hoisted out of loop (by construction) and is, in fact, non-orbit; but we can +// skip orbit end tag instead. +// Skipping non-orbit start tags allows us to compare all subhistories in the +// same way (incrementally). Subhistories of non-orbit start tags cannot be +// compared incrementally, because default value may be added on a later step +// than non-default value. static bool redundant(size_t t, const std::vector &tags) { return (t % 2 == 0) != orbit(tags[t]); } @@ -219,7 +224,7 @@ int32_t compare_posix(const clos_t &c1, const clos_t &c2, const tagver_t o1 = tagpool[c1.order][t], o2 = tagpool[c2.order][t]; - const int32_t cmp = h.compare_histories(i1, i2, o1, o2, t, orbit(tags[t])); + const int32_t cmp = h.compare_histories(i1, i2, o1, o2, t); if (cmp != 0) return cmp; } return 0; @@ -436,7 +441,6 @@ struct cmp_posix_t { Tagpool &tagpool; size_t tag; - bool orbit; bool operator()(cclositer_t x, cclositer_t y) { const hidx_t i1 = x->tlook, i2 = y->tlook; @@ -444,7 +448,7 @@ struct cmp_posix_t o1 = tagpool[x->order][tag], o2 = tagpool[y->order][tag]; // comparison result is inverted, because orders are used as offsets - return tagpool.history.compare_last_subhistories(i1, i2, o1, o2, tag, orbit) > 0; + return tagpool.history.compare_last_subhistories(i1, i2, o1, o2, tag) > 0; } }; @@ -477,7 +481,7 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector &tags) for (size_t t = 0; t < ntag; ++t) { if (redundant(t, tags)) continue; - cmp_posix_t cmp = {tagpool, t, orbit(tags[t])}; + cmp_posix_t cmp = {tagpool, t}; std::sort(ps, pe, cmp); tagver_t m = 0; o = os0; diff --git a/re2c/src/dfa/tagtree.cc b/re2c/src/dfa/tagtree.cc index 0011c205..5e9afcc0 100644 --- a/re2c/src/dfa/tagtree.cc +++ b/re2c/src/dfa/tagtree.cc @@ -100,23 +100,17 @@ static int32_t subhistory_list(const tagtree_t &history, // a single bottom value, or one or more cursor values (exactly one for // non-orbit subhistories). Because of the shortest-path algorithm earlier // subhistories do not necessarily coincide, so comparing only the last -// pair of subhistories is not enough. -// see note [POSIX orbit tags] +// pair of subhistories is not enough. See note [POSIX orbit tags]. int32_t tagtree_t::compare_histories(hidx_t x, hidx_t y, - tagver_t ox, tagver_t oy, size_t t, bool orbit) + tagver_t ox, tagver_t oy, size_t t) { const int32_t n1 = subhistory_list(*this, path1, x, t), n2 = subhistory_list(*this, path2, y, t); assert(n1 == n2); - if (orbit) { - path1.push_back(ox); - path2.push_back(oy); - } else { - if (path1.back() == DELIM) path1.push_back(ox); - if (path2.back() == DELIM) path2.push_back(oy); - } + path1.push_back(ox); + path2.push_back(oy); std::vector::const_reverse_iterator i1 = path1.rbegin(), e1 = path1.rend(), @@ -134,7 +128,7 @@ int32_t tagtree_t::compare_histories(hidx_t x, hidx_t y, } static void last_subhistory(const tagtree_t &history, std::vector &path, - hidx_t idx, tagver_t order, size_t tag, bool orbit) + hidx_t idx, tagver_t order, size_t tag) { path.clear(); hidx_t i = idx; @@ -144,16 +138,14 @@ static void last_subhistory(const tagtree_t &history, std::vector &pat path.push_back(history.elem(i)); } } - if (i == HROOT && (orbit || path.empty())) { - path.push_back(order); - } + if (i == HROOT) path.push_back(order); } int32_t tagtree_t::compare_last_subhistories(hidx_t x, hidx_t y, - tagver_t ox, tagver_t oy, size_t t, bool orbit) + tagver_t ox, tagver_t oy, size_t t) { - last_subhistory(*this, path1, x, ox, t, orbit); - last_subhistory(*this, path2, y, oy, t, orbit); + last_subhistory(*this, path1, x, ox, t); + last_subhistory(*this, path2, y, oy, t); return compare_reversed(path1, path2); } diff --git a/re2c/src/dfa/tagtree.h b/re2c/src/dfa/tagtree.h index 390b2891..6fc66734 100644 --- a/re2c/src/dfa/tagtree.h +++ b/re2c/src/dfa/tagtree.h @@ -34,8 +34,8 @@ struct tagtree_t size_t tag(hidx_t i) const; hidx_t push(hidx_t i, size_t t, tagver_t v); int32_t compare_plain(hidx_t x, hidx_t y, size_t t); - int32_t compare_histories(hidx_t x, hidx_t y, tagver_t ox, tagver_t oy, size_t t, bool orbit); - int32_t compare_last_subhistories(hidx_t x, hidx_t y, tagver_t ox, tagver_t oy, size_t t, bool orbit); + int32_t compare_histories(hidx_t x, hidx_t y, tagver_t ox, tagver_t oy, size_t t); + int32_t compare_last_subhistories(hidx_t x, hidx_t y, tagver_t ox, tagver_t oy, size_t t); tagver_t last(hidx_t i, size_t t) const; FORBID_COPY(tagtree_t); };