Added test in test suite for ccl union operator.
Documented ccl union operator.
Removed crufty ccl cache to prevent parser problems.
return d;
}
+/* ccl_set_union - create a new ccl as the set union of the two given ccls. */
+int
+ccl_set_union (int a, int b)
+{
+ int d, i;
+
+ /* create new class */
+ d = cclinit();
+
+ /* Add all of a */
+ for (i = 0; i < ccllen[a]; ++i)
+ ccladd (d, ccltbl[cclmap[a] + i]);
+
+ /* Add all of b */
+ for (i = 0; i < ccllen[b]; ++i)
+ ccladd (d, ccltbl[cclmap[b] + i]);
+
+ /* debug */
+ if (0){
+ fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, a);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, b);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, d);
+ fprintf(stderr, "\n)\n");
+ }
+ return d;
+}
+
/* cclinit - return an empty ccl */
associative, so @samp{[abc]@{-@}[b]@{-@}[c]} is the same as @samp{[a]}. Be careful
not to accidently create an empty set, which will never match.
+@item
+
+The @samp{@{+@}} operator computes the union of two character classes. For
+example, @samp{[a-z]@{+@}[0-9]} is the same as @samp{[a-z0-9]}. This operator
+is useful when preceded by the result of a difference operation, as in,
+@samp{[[:alpha:]]@{-@}[[:lower:]]@{+@}[q]}, which is equivalent to
+@samp{[A-Zq]} in the "C" locale.
@cindex trailing context, limits of
@cindex ^ as non-special character in patterns
extern int cclinit PROTO ((void)); /* make an empty ccl */
extern void cclnegate PROTO ((int)); /* negate a ccl */
extern int ccl_set_diff (int a, int b); /* set difference of two ccls. */
+extern int ccl_set_union (int a, int b); /* set union of two ccls. */
/* List the members of a set of characters in CCL form. */
extern void list_character_set PROTO ((FILE *, int[]));
%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
-%left CCL_OP_DIFF
+%left CCL_OP_DIFF CCL_OP_UNION
/*
*POSIX and AT&T lex place the
| fullccl
{
- if ( ! cclsorted )
/* Sort characters for fast searching. We
* use a shell sort since this list could
* be large.
}
;
fullccl:
- fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
+ fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
+ | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); }
| braceccl
;
/* Check to see if we've already encountered this
* ccl.
*/
- if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
+ if (0 /* <--- This "0" effectively disables the reuse of a
+ * character class (purely based on its source text).
+ * The reason it was disabled is so yacc/bison can parse
+ * ccl operations, such as ccl difference and union.
+ */
+ && (cclval = ccllookup( (Char *) nmstr )) != 0 )
{
if ( input() != ']' )
synerr( _( "bad character class" ) );
}
}
"{-}" return CCL_OP_DIFF;
+ "{+}" return CCL_OP_UNION;
/* Check for :space: at the end of the rule so we don't
return &empty_entry;
}
-
/* hashfunct - compute the hash value for "str" and hash size "hash_size" */
static int hashfunct (str, hash_size)
^"abcd-bc:"([abcd]{-}[bc])+@abcd-bc@\n printf("OK: %s", yytext); ++yylineno; return 1;
^"abcde-b-c:"([abcde]{-}[b]{-}[c])+@abcde-b-c@\n printf("OK: %s", yytext); ++yylineno; return 1;
^"^XY-^XYZ:"([^XY]{-}[^XYZ])+@^XY-^XYZ@\n printf("OK: %s", yytext); ++yylineno; return 1;
+
+^"a+d:"([[:alpha:]]{+}[[:digit:]])+"@a+d@"\n a_ok();
+^"a-u+Q:"([[:alpha:]]{-}[[:upper:]]{+}[Q])+"@a-u+Q@"\n a_ok();
+
^"ia:"(?i:a)+@ia@\n printf("OK: %s", yytext); ++yylineno; return 1;
^"iabc:"(?i:abc)+@iabc@\n printf("OK: %s", yytext); ++yylineno; return 1;
^"ia-c:"(?i:[a-c]+)@ia-c@\n printf("OK: %s", yytext); ++yylineno; return 1;
abcd-bc:aaaaddddaaaa@abcd-bc@
abcde-b-c:aaaaddddeeee@abcde-b-c@
^XY-^XYZ:ZZZZZZZZZZZ@^XY-^XYZ@
+a+d:abc0123xyz789@a+d@
+a-u+Q:abcQQQQxyz@a-u+Q@
ia:AaAa@ia@
iabc:ABCabcAbCaBc@iabc@
ia-c:ABCabcAbCaBc@ia-c@