};
# endif
-#define YYPACT_NINF -44
+#define YYPACT_NINF -45
#define yypact_value_is_default(Yystate) \
- (!!((Yystate) == (-44)))
+ (!!((Yystate) == (-45)))
#define YYTABLE_NINF -1
STATE-NUM. */
static const yytype_int8 yypact[] =
{
- -44, 1, -44, -44, -7, -44, -44, -44, 8, 15,
- 20, -44, 20, -44, 42, 30, 31, 20, -44, 0,
- -44, -44, -44, -44, 16, 29, 35, 28, -44, 17,
- 25, -44, 20, 20, 20, -44, -44, -44, -44, -44,
- 26, -44, -44, 37, 43, -44, 38, 3, 50, -44,
- -44, -44, -44, -44, 39, 31, 20, -44, 51, 44,
- 54, 16, 16, -44, 57, 56, -44, -44, -44, -44,
- -44
+ -45, 0, -45, -45, 18, -45, -45, -45, 7, 22,
+ 19, -45, 19, -45, 41, 30, 10, -45, 19, -1,
+ -45, -45, -45, -45, 15, 28, 35, 34, -45, 27,
+ 24, -45, 19, 19, 19, -45, -45, -45, -45, -45,
+ 26, -45, -45, 38, 43, -45, 40, 2, 51, -45,
+ -45, -45, -45, -45, 39, 10, -45, -45, 52, 44,
+ 42, 15, 15, -45, 57, 56, -45, -45, -45, -45,
+ -45
};
/* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
/* YYPGOTO[NTERM-NUM]. */
static const yytype_int8 yypgoto[] =
{
- -44, -44, -44, -44, -44, -44, -43, 40, -44, 19,
- -8, 34, 36, -17, -44, 23, -44
+ -45, -45, -45, -45, -45, -45, -44, 45, -45, 16,
+ -9, 31, -14, -45, -45, 25, -45
};
/* YYDEFGOTO[NTERM-NUM]. */
number is the opposite. If YYTABLE_NINF, syntax error. */
static const yytype_uint8 yytable[] =
{
- 35, 2, 29, 36, 30, 20, 3, 4, 5, 28,
- 6, 7, 21, 7, 37, 8, 9, 61, 67, 68,
- 42, 22, 38, 39, 54, 10, 28, 10, 43, 23,
- 7, 24, 25, 50, 44, 22, 51, 33, 52, 35,
- 37, 32, 49, 23, 10, 33, 31, 48, 38, 39,
- 33, 47, 34, 58, 60, 59, 63, 64, 66, 33,
- 65, 69, 70, 57, 0, 46, 62, 55, 0, 0,
- 56
+ 2, 29, 36, 30, 35, 3, 4, 5, 28, 6,
+ 7, 21, 7, 37, 8, 9, 61, 67, 68, 42,
+ 56, 38, 39, 54, 10, 28, 10, 43, 22, 7,
+ 20, 34, 50, 44, 22, 51, 23, 52, 24, 25,
+ 37, 32, 23, 10, 33, 31, 66, 33, 38, 39,
+ 33, 47, 49, 48, 58, 59, 60, 63, 64, 33,
+ 65, 69, 70, 62, 55, 57, 0, 0, 0, 0,
+ 46
};
static const yytype_int8 yycheck[] =
{
- 17, 0, 10, 3, 12, 12, 5, 6, 7, 6,
- 9, 10, 4, 10, 14, 14, 15, 14, 61, 62,
- 4, 6, 22, 23, 32, 24, 6, 24, 12, 14,
- 10, 16, 17, 8, 18, 6, 11, 20, 13, 56,
- 14, 11, 25, 14, 24, 20, 4, 19, 22, 23,
- 20, 16, 21, 16, 16, 12, 6, 6, 4, 20,
- 16, 4, 6, 40, -1, 25, 47, 33, -1, -1,
- 34
+ 0, 10, 3, 12, 18, 5, 6, 7, 6, 9,
+ 10, 4, 10, 14, 14, 15, 14, 61, 62, 4,
+ 34, 22, 23, 32, 24, 6, 24, 12, 6, 10,
+ 12, 21, 8, 18, 6, 11, 14, 13, 16, 17,
+ 14, 11, 14, 24, 20, 4, 4, 20, 22, 23,
+ 20, 16, 25, 19, 16, 12, 16, 6, 6, 20,
+ 16, 4, 6, 47, 33, 40, -1, -1, -1, -1,
+ 25
};
/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
0, 27, 0, 5, 6, 7, 9, 10, 14, 15,
24, 28, 29, 31, 35, 36, 37, 38, 39, 42,
12, 4, 6, 14, 16, 17, 33, 34, 6, 36,
- 36, 4, 11, 20, 21, 39, 3, 14, 22, 23,
+ 36, 4, 11, 20, 21, 38, 3, 14, 22, 23,
40, 41, 4, 12, 18, 32, 33, 16, 19, 25,
8, 11, 13, 30, 36, 37, 38, 41, 16, 12,
16, 14, 35, 6, 6, 16, 4, 32, 32, 4,
&& c1.order == c2.order
&& c1.index == c2.index) return false;
- const hidx_t
- l1 = c1.tlook, l2 = c2.tlook,
- t1 = c1.ttran, t2 = c2.ttran;
- const tagver_t
- *v1 = tagpool[c1.tvers], *v2 = tagpool[c2.tvers],
- *o1 = tagpool[c1.order], *o2 = tagpool[c2.order];
+ const hidx_t l1 = c1.tlook, l2 = c2.tlook;
+ const tagver_t *o1 = tagpool[c1.order], *o2 = tagpool[c2.order];
tagver_t x, y;
tagtree_t &tagtree = tagpool.history;
for (size_t t = 0; t < tagpool.ntags; ++t) {
const Tag &tag = tags[t];
- // orbit capture tag: compare by order and tagged epsilon-paths
+ // orbit capture tag: compare by orders and tag histories
if (orbit(tag)) {
x = o1[t]; y = o2[t];
if (x < y) return false;
if (cmp < 0) return false;
if (cmp > 0) return true;
- assert(v1[t] == v2[t]);
-
- // open/close capture tag: maximize (on lookahead and versions);
- // if one is bottom and the other is not, fallback to leftmost
- // if both are bottoms, relay comparison to less prioritized tags
- // we don't use orders for minimize/maximize, because they are
- // already used for leftmost
+ // open/close capture tag: maximize (first, lookahead, then orders)
} else if (capture(tag)) {
x = tagtree.last(l1, t);
y = tagtree.last(l2, t);
- if (x == TAGVER_BOTTOM && y == TAGVER_BOTTOM) continue;
- if (x == TAGVER_BOTTOM || y == TAGVER_BOTTOM) goto leftmost;
- if (x > y) return false;
- if (x < y) return true;
-
- x = tagtree.last(t1, t);
- y = tagtree.last(t2, t);
- if (x == TAGVER_BOTTOM && y == TAGVER_BOTTOM) continue;
- if (x == TAGVER_BOTTOM || y == TAGVER_BOTTOM) goto leftmost;
- if (x > y) return false;
- if (x < y) return true;
-
- x = v1[t]; y = v2[t];
- if (x < 0 && y < 0) continue;
- if (x < 0 || y < 0) goto leftmost;
+ if (x == TAGVER_ZERO && y == TAGVER_ZERO) {
+ x = o1[t]; y = o2[t];
+ }
if (x > y) return false;
if (x < y) return true;
// simple tag: always prefer leftmost
} else {
- leftmost:
x = o1[t]; y = o2[t];
if (x < y) return false;
if (x > y) return true;
o[t] = static_cast<tagver_t>(d);
}
+ } else if (capture(tags[t])) {
+ std::set<tagver_t> keys;
+ for (c = b; c != e; ++c) {
+ tagver_t u = tagtree.last(c->tlook, t);
+ if (u == TAGVER_ZERO) {
+ u = tagpool[c->order][t];
+ }
+ keys.insert(u);
+ }
+ for (c = b; c != e; ++c, o += ntag) {
+ tagver_t u = tagtree.last(c->tlook, t);
+ if (u == TAGVER_ZERO) {
+ u = tagpool[c->order][t];
+ }
+ const ptrdiff_t d = std::distance(keys.begin(), keys.find(u));
+ o[t] = static_cast<tagver_t>(d);
+ }
+
// for simple tags and non-orbit capture tags item's order
// equals position of this item in leftmost NFA traversal
// (it's the same for all tags)
* (the way invalid code points are treated).
*/
+/* note [POSIX subexpression hierarchy]
+ *
+ * POSIX treats subexpressions with and without captures as equal,
+ * therefore we have to insert missing captures in subexpressions
+ * that influence disambiguation of existing captures. Such cases
+ * are: left alternative in union, if right alternative has captures;
+ * first operand in concatenation, if second operand has captures
+ * (unless all strings accepted by the first operand have the same
+ * length).
+ */
+
+static bool has_tags(const AST *ast)
+{
+ switch (ast->type) {
+ default: assert(false);
+ case AST::NIL:
+ case AST::STR:
+ case AST::CLS:
+ case AST::DOT:
+ case AST::DEFAULT:
+ case AST::DIFF: return false;
+ case AST::TAG:
+ case AST::CAP: return true;
+ case AST::ALT: return has_tags(ast->alt.ast1) || has_tags(ast->alt.ast2);
+ case AST::CAT: return has_tags(ast->cat.ast1) || has_tags(ast->cat.ast2);
+ case AST::REF: return has_tags(ast->ref.ast);
+ case AST::ITER: return has_tags(ast->iter.ast);
+ }
+}
+
+static size_t fixlen(const AST *ast)
+{
+ switch (ast->type) {
+ default: assert(false);
+ case AST::NIL:
+ case AST::TAG: return 0;
+ case AST::CLS:
+ case AST::DOT:
+ case AST::DEFAULT:
+ case AST::DIFF: return 1;
+ case AST::STR: return ast->str.chars->size();
+ case AST::ALT: {
+ const size_t
+ l1 = fixlen(ast->alt.ast1),
+ l2 = fixlen(ast->alt.ast2);
+ return l1 == l2 ? l1 : Tag::VARDIST;
+ }
+ case AST::CAT: {
+ const size_t
+ l1 = fixlen(ast->cat.ast1),
+ l2 = fixlen(ast->cat.ast2);
+ return l1 == Tag::VARDIST || l2 == Tag::VARDIST
+ ? Tag::VARDIST : l1 + l2;
+ }
+ case AST::REF: return fixlen(ast->ref.ast);
+ case AST::ITER: {
+ const size_t l = fixlen(ast->iter.ast);
+ const uint32_t m = ast->iter.min, n = ast->iter.max;
+ return l == Tag::VARDIST || m != n
+ ? Tag::VARDIST : l * (n - m);
+ }
+ case AST::CAP: return fixlen(ast->cap);
+ }
+}
+
+static bool is_capture(const AST *ast)
+{
+ return ast->type == AST::CAP
+ || (ast->type == AST::ITER && ast->iter.ast->type == AST::CAP);
+}
+
+static bool is_capture_or_fixlen(const AST *ast)
+{
+ return is_capture(ast) || fixlen(ast) != Tag::VARDIST;
+}
+
static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap)
{
RE::alc_t &alc = spec.alc;
// see note [default regexp]
return re_sym(alc, Range::ran(0, opts->encoding.nCodeUnits()));
case AST::ALT: {
- RE *x = ast_to_re(spec, ast->alt.ast1, ncap);
- RE *y = ast_to_re(spec, ast->alt.ast2, ncap);
+ RE *t1 = NULL, *t2 = NULL, *x, *y;
+ // see note [POSIX subexpression hierarchy]
+ if (opts->posix_captures && has_tags(ast->alt.ast2)
+ && !is_capture(ast->alt.ast1)) {
+ t1 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE1));
+ t2 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE2));
+ }
+ x = ast_to_re(spec, ast->alt.ast1, ncap);
+ x = re_cat(alc, t1, re_cat(alc, x, t2));
+ y = ast_to_re(spec, ast->alt.ast2, ncap);
return re_alt(alc, x, y);
}
case AST::DIFF: {
return re_class(alc, ast->line, ast->column, Range::sub(x->sym, y->sym), opts, warn);
}
case AST::CAT: {
- RE *x = ast_to_re(spec, ast->cat.ast1, ncap);
- RE *y = ast_to_re(spec, ast->cat.ast2, ncap);
+ RE *t1 = NULL, *t2 = NULL, *x, *y;
+ // see note [POSIX subexpression hierarchy]
+ if (opts->posix_captures && has_tags(ast->cat.ast2)
+ && !is_capture_or_fixlen(ast->cat.ast1)) {
+ t1 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE1));
+ t2 = re_tag(alc, tags.size(), false);
+ tags.push_back(Tag(Tag::FICTIVE2));
+ }
+ x = ast_to_re(spec, ast->cat.ast1, ncap);
+ x = re_cat(alc, t1, re_cat(alc, x, t2));
+ y = ast_to_re(spec, ast->cat.ast2, ncap);
return re_cat(alc, x, y);
}
case AST::TAG: {