From f1f579a3b404e67c0c4410676330294bb6c22e7e Mon Sep 17 00:00:00 2001 From: John Millaway Date: Mon, 27 Mar 2006 22:48:37 +0000 Subject: [PATCH] Implemented (?x:) syntax to allow whitespace in patterns. Added test for (?x:) syntax in test suite. --- scan.l | 84 +++++++++++++++++++++++++-------------- tests/test-ccl/scanner.l | 7 ++++ tests/test-ccl/test.input | 2 + 3 files changed, 63 insertions(+), 30 deletions(-) diff --git a/scan.l b/scan.l index 4ae7ea4..36b48a8 100644 --- a/scan.l +++ b/scan.l @@ -212,7 +212,7 @@ M4QEND "]]" } { - ")" BEGIN(SECT2); + ")" yy_pop_state(); [^\n\)]+ ; {NL} ++linenum; } @@ -508,7 +508,20 @@ M4QEND "]]" return '\n'; } } - {WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; + {WS}"|".*{NL} { + if (sf_skip_ws()){ + /* We're in the middle of a (?x: ) pattern. */ + /* Push back everything starting at the "|" */ + size_t amt; + amt = strchr (yytext, '|') - yytext; + yyless(amt); + } + else { + continued_action = true; + ++linenum; + return '\n'; + } + } ^{WS}"/*" { yyless( yyleng - 2 ); /* put back '/', '*' */ @@ -517,37 +530,48 @@ M4QEND "]]" BEGIN(ACTION); } - ^{WS} /* allow indented rules */ + ^{WS} /* allow indented rules */ ; {WS} { - /* This rule is separate from the one below because - * otherwise we get variable trailing context, so - * we can't build the scanner using -{f,F}. - */ - bracelevel = 0; - continued_action = false; - BEGIN(ACTION); - - if ( in_rule ) - { - doing_rule_action = true; - in_rule = false; - return '\n'; - } + if (sf_skip_ws()){ + /* We're in the middle of a (?x: ) pattern. */ + } + else{ + /* This rule is separate from the one below because + * otherwise we get variable trailing context, so + * we can't build the scanner using -{f,F}. + */ + bracelevel = 0; + continued_action = false; + BEGIN(ACTION); + + if ( in_rule ) + { + doing_rule_action = true; + in_rule = false; + return '\n'; + } + } } {OPTWS}{NL} { - bracelevel = 0; - continued_action = false; - BEGIN(ACTION); - unput( '\n' ); /* so sees it */ - - if ( in_rule ) - { - doing_rule_action = true; - in_rule = false; - return '\n'; - } + if (sf_skip_ws()){ + /* We're in the middle of a (?x: ) pattern. */ + ++linenum; + } + else{ + bracelevel = 0; + continued_action = false; + BEGIN(ACTION); + unput( '\n' ); /* so sees it */ + + if ( in_rule ) + { + doing_rule_action = true; + in_rule = false; + return '\n'; + } + } } ^{OPTWS}"<>" | @@ -639,7 +663,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ if ( lex_compat || nmdefptr[0] == '^' || (len > 0 && nmdefptr[len - 1] == '$') - || (end_is_ws && trlcontxt)) + || (end_is_ws && trlcontxt && !sf_skip_ws())) { /* don't use ()'s after all */ PUT_BACK_STRING((char *) nmdefptr, 0); @@ -656,7 +680,7 @@ nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ } } - "(?#" BEGIN(EXTENDED_COMMENT); + "(?#" yy_push_state(EXTENDED_COMMENT); "(?" sf_push(); BEGIN(GROUP_WITH_PARAMS); return '('; "(" sf_push(); return '('; ")" sf_pop(); return ')'; diff --git a/tests/test-ccl/scanner.l b/tests/test-ccl/scanner.l index 204ac39..914795c 100644 --- a/tests/test-ccl/scanner.l +++ b/tests/test-ccl/scanner.l @@ -67,6 +67,13 @@ ^"dot-all-1:"(?-s:XXX.*)@dot-all-1@\n err_abort(); ^"dot-all-1:"(?s:XXX.*)@dot-all-1@\n a_ok(); +^"x1:"(?x: a | b )+@x1@\n a_ok(); +^"x2:"(?x: a | + (?# Comment ) + b + )+@x2@\n a_ok(); + + .|\n { err_abort(); } %% diff --git a/tests/test-ccl/test.input b/tests/test-ccl/test.input index e8b82a8..f38b4d6 100644 --- a/tests/test-ccl/test.input +++ b/tests/test-ccl/test.input @@ -21,3 +21,5 @@ dot-all-1:XXX junk junk junk @dot-all-1@ +x1:abaabb@x1@ +x2:abaabb@x2@ -- 2.40.0