From e4f934078d1cf37d890a9f3fbfd43b8f115fe599 Mon Sep 17 00:00:00 2001 From: helly Date: Sun, 25 Feb 2007 14:34:52 +0000 Subject: [PATCH] - Added inplace configuration 're2c:yych:conversion' --- re2c/CHANGELOG | 1 + re2c/code.cc | 43 +++++++--- re2c/globals.h | 1 + re2c/htdocs/manual.html | 12 +++ re2c/main.cc | 1 + re2c/re2c.1.in | 12 +++ re2c/test/config9.b.c | 184 ++++++++++++++++++++++++++++++++++++++++ re2c/test/config9.b.re | 83 ++++++++++++++++++ 8 files changed, 324 insertions(+), 13 deletions(-) create mode 100755 re2c/test/config9.b.c create mode 100755 re2c/test/config9.b.re diff --git a/re2c/CHANGELOG b/re2c/CHANGELOG index 3ea6cfea..8f73b3f9 100644 --- a/re2c/CHANGELOG +++ b/re2c/CHANGELOG @@ -1,5 +1,6 @@ Version 0.11.2 (????-??-??) --------------------------- +- Add inplace configuration 're2c:yych:conversion'. - Fixed -u switch code generation. - Added ability to avoid defines and overwrite variable and label names. diff --git a/re2c/code.cc b/re2c/code.cc index 1d3af097..5640ff05 100644 --- a/re2c/code.cc +++ b/re2c/code.cc @@ -287,7 +287,7 @@ void genGoTo(std::ostream &o, uint ind, const State *from, const State *to, bool { if (readCh && from->label + 1 != to->label) { - o << indent(ind) << mapCodeName["yych"] << " = *" << mapCodeName["YYCURSOR"] << ";\n"; + o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*" << mapCodeName["YYCURSOR"] << ";\n"; readCh = false; } @@ -300,7 +300,7 @@ void genIf(std::ostream &o, uint ind, const char *cmp, uint v, bool &readCh) o << indent(ind) << "if("; if (readCh) { - o << "(" << mapCodeName["yych"] << " = *" << mapCodeName["YYCURSOR"] << ")"; + o << "(" << mapCodeName["yych"] << " = " << yychConversion << "*" << mapCodeName["YYCURSOR"] << ")"; readCh = false; } else @@ -342,11 +342,11 @@ static void need(std::ostream &o, uint ind, uint n, bool & readCh, bool bSetMark if (bSetMarker) { - o << indent(ind) << mapCodeName["yych"] << " = *(" << mapCodeName["YYMARKER"] << " = " << mapCodeName["YYCURSOR"] << ");\n"; + o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*(" << mapCodeName["YYMARKER"] << " = " << mapCodeName["YYCURSOR"] << ");\n"; } else { - o << indent(ind) << mapCodeName["yych"] << " = *" << mapCodeName["YYCURSOR"] << ";\n"; + o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*" << mapCodeName["YYCURSOR"] << ";\n"; } readCh = false; } @@ -365,7 +365,7 @@ void Match::emit(std::ostream &o, uint ind, bool &readCh) const } else { - o << indent(ind) << mapCodeName["yych"] << " = *++" << mapCodeName["YYCURSOR"] << ";\n"; + o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*++" << mapCodeName["YYCURSOR"] << ";\n"; readCh = false; } @@ -389,7 +389,7 @@ void Enter::emit(std::ostream &o, uint ind, bool &readCh) const else { /* we shouldn't need 'rule-following' protection here */ - o << indent(ind) << mapCodeName["yych"] << " = *++" << mapCodeName["YYCURSOR"] << ";\n"; + o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*++" << mapCodeName["YYCURSOR"] << ";\n"; if (vUsedLabels.count(label)) { o << labelPrefix << label << ":\n"; @@ -413,7 +413,7 @@ void Initial::emit(std::ostream &o, uint ind, bool &readCh) const } else { - o << indent(ind) << mapCodeName["yych"] << " = *++" << mapCodeName["YYCURSOR"] << ";\n"; + o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*++" << mapCodeName["YYCURSOR"] << ";\n"; } } @@ -464,11 +464,11 @@ void Save::emit(std::ostream &o, uint ind, bool &readCh) const { if (bUsedYYMarker) { - o << indent(ind) << mapCodeName["yych"] << " = *(" << mapCodeName["YYMARKER"] << " = ++" << mapCodeName["YYCURSOR"] << ");\n"; + o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*(" << mapCodeName["YYMARKER"] << " = ++" << mapCodeName["YYCURSOR"] << ");\n"; } else { - o << indent(ind) << mapCodeName["yych"] << " = *++" << mapCodeName["YYCURSOR"] << ";\n"; + o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*++" << mapCodeName["YYCURSOR"] << ";\n"; } readCh = false; } @@ -528,7 +528,7 @@ void Accept::emit(std::ostream &o, uint ind, bool &readCh) const if (readCh) // shouldn't be necessary, but might become at some point { - o << indent(ind) << mapCodeName["yych"] << " = *" << mapCodeName["YYCURSOR"] << ";\n"; + o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*" << mapCodeName["YYCURSOR"] << ";\n"; readCh = false; } @@ -741,7 +741,7 @@ void Go::genSwitch(std::ostream &o, uint ind, const State *from, const State *ne if (readCh) { - o << indent(ind) << "switch((" << mapCodeName["yych"] << " = *" << mapCodeName["YYCURSOR"] << ")) {\n"; + o << indent(ind) << "switch((" << mapCodeName["yych"] << " = " << yychConversion << "*" << mapCodeName["YYCURSOR"] << ")) {\n"; readCh = false; } else @@ -894,7 +894,7 @@ void Go::genCpGoto(std::ostream &o, uint ind, const State *from, const State *ne if (readCh) { - sYych = "(" + mapCodeName["yych"] + " = *" + mapCodeName["YYCURSOR"] + ")"; + sYych = "(" + mapCodeName["yych"] + " = " + yychConversion + "*" + mapCodeName["YYCURSOR"] + ")"; } else { @@ -1012,7 +1012,7 @@ void Go::genGoto(std::ostream &o, uint ind, const State *from, const State *next go.unmap(this, to); if (readCh) { - sYych = "(" + mapCodeName["yych"] + " = *" + mapCodeName["YYCURSOR"] + ")"; + sYych = "(" + mapCodeName["yych"] + " = " + yychConversion + "*" + mapCodeName["YYCURSOR"] + ")"; } else { @@ -1691,6 +1691,19 @@ void Scanner::config(const Str& cfg, int num) { cGotoThreshold = num; } + else if (cfg.to_string() == "yych:conversion") + { + if (num) + { + yychConversion = "("; + yychConversion += mapCodeName["YYCTYPE"]; + yychConversion += ")"; + } + else + { + yychConversion = ""; + } + } else { fatal("unrecognized configuration name or illegal integer value"); @@ -1748,6 +1761,10 @@ void Scanner::config(const Str& cfg, const Str& val) { labelPrefix = strVal; } + else if (cfg.to_string() == "yych:conversion") + { + yychConversion = mapCodeName["YYCTYPE"]; + } else if (mapVariableKeys.find(cfg.to_string()) != mapVariableKeys.end()) { if (bFirstPass && !mapCodeName.insert( diff --git a/re2c/globals.h b/re2c/globals.h index e4f527c6..f974a936 100644 --- a/re2c/globals.h +++ b/re2c/globals.h @@ -36,6 +36,7 @@ extern bool bUsedYYMarker; extern bool bUseStartLabel; extern std::string startLabelName; extern std::string labelPrefix; +extern std::string yychConversion; extern uint maxFill; extern uint next_label; extern uint cGotoThreshold; diff --git a/re2c/htdocs/manual.html b/re2c/htdocs/manual.html index c6d441cf..97877910 100755 --- a/re2c/htdocs/manual.html +++ b/re2c/htdocs/manual.html @@ -385,6 +385,18 @@ placing a "/*!getstate:re2c */" comment. generation of jump tables rather than using nested if's and decision bitfields. The threshold is compared against a calculated estimation of if-s needed where every used bitmap divides the threshold by 2. +.TP +
re2c:yych:conversion = 0 ;
+
When the input uses signed characters and -s or -b switches are +in effect re2c allows to automatically convert to the unsigned character type +that is then necessary for its internal single character. When this setting +is zero or an empty string the conversion is disabled. Using a non zero number +the conversion is taken from YYCTYPE. If that is given by an inplace +configuration that value is being used. Otherwise it will be (YYCTYPE) +and changes to that configuration are no longer possible. When this setting is +a string the braces must be specified. Now assuming your input is a char* +buffer and you are using above mentioned switches you can set YYCTYPE to +unsigned char and this setting to either 1 or "(unsigned char)".
re2c:define:YYCTXMARKER = YYCTXMARKER ;
Allows to overwrite the define YYCTXMARKER and thus avoiding it by setting the value to the actual code needed.
diff --git a/re2c/main.cc b/re2c/main.cc index 29548ea3..3118b923 100644 --- a/re2c/main.cc +++ b/re2c/main.cc @@ -45,6 +45,7 @@ bool bUseYYFill = true; std::string startLabelName; std::string labelPrefix("yy"); +std::string yychConversion(""); uint maxFill = 1; uint next_label = 0; uint cGotoThreshold = 9; diff --git a/re2c/re2c.1.in b/re2c/re2c.1.in index 172a54fe..0b5bf4f7 100644 --- a/re2c/re2c.1.in +++ b/re2c/re2c.1.in @@ -457,6 +457,18 @@ generation of jump tables rather than using nested if's and decision bitfields. The threshold is compared against a calculated estimation of if-s needed where every used bitmap divides the threshold by 2. .TP +\fIre2c:yych:conversion\fP \fB=\fP 0 \fB;\fP +When the input uses signed characters and \fB-s\fP or \fB-b\fP switches are +in effect re2c allows to automatically convert to the unsigned character type +that is then necessary for its internal single character. When this setting +is zero or an empty string the conversion is disabled. Using a non zero number +the conversion is taken from \fBYYCTYPE\fP. If that is given by an inplace +configuration that value is being used. Otherwise it will be \fB(YYCTYPE)\fP +and changes to that configuration are no longer possible. When this setting is +a string the braces must be specified. Now assuming your input is a \fBchar*\fP +buffer and you are using above mentioned switches you can set \fBYYCTYPE\fP to +\fBunsigned char\fP and this setting to either \fB1\fP or \fB"(unsigned char)"\fP. +.TP \fIre2c:define:YYCTXMARKER\fP \fB=\fP YYCTXMARKER \fB;\fP Allows to overwrite the define YYCTXMARKER and thus avoiding it by setting the value to the actual code needed. diff --git a/re2c/test/config9.b.c b/re2c/test/config9.b.c new file mode 100755 index 00000000..65d902a7 --- /dev/null +++ b/re2c/test/config9.b.c @@ -0,0 +1,184 @@ +/* Generated by re2c */ +#line 1 "config9.b.re" +#include +#include + +struct Scanner +{ + Scanner(char *txt) + : cur(txt), lim(txt + strlen(txt)) + { + } + + char *cur; + char *lim; + char *ptr; + char *ctx; + char *tok; +}; + +enum What +{ + UNEXPECTED, + KEYWORD, + NUMBER, + EOI +}; + +char * tokens[] = { "UNEXPECTED", "KEYWORD", "NUMBER", "EOI" }; + +void fill(int) +{ +} + +int scan(Scanner &s) +{ + char *cursor = s.cur; + + if(cursor == s.lim) + return EOI; + +std: + s.tok = cursor; + +{ + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + +#line 81 "" + { + unsigned char curr; + + if((s.lim - s.cur) < 3) fill(3); + curr = (unsigned char)*s.cur; + if(curr <= ' ') { + if(curr <= 0x09) { + if(curr <= 0x08) goto xx9; + goto xx6; + } else { + if(curr <= 0x0A) goto xx8; + if(curr <= 0x1F) goto xx9; + goto xx6; + } + } else { + if(curr <= '9') { + if(curr <= '/') goto xx9; + goto xx4; + } else { + if(curr <= '`') goto xx9; + if(curr >= 'c') goto xx9; + } + } + s.ctx = s.cur + 1; + ++s.cur; + if((curr = (unsigned char)*s.cur) <= '/') goto xx3; + if(curr == '1') goto xx15; + if(curr <= '9') goto xx12; +xx3: +#line 68 "config9.b.re" + { + return UNEXPECTED; + } +#line 115 "" +xx4: + ++s.cur; + curr = (unsigned char)*s.cur; + goto xx11; +xx5: +#line 58 "config9.b.re" + { return NUMBER; } +#line 123 "" +xx6: + ++s.cur; +xx7: +#line 61 "config9.b.re" + { + if(s.cur == s.lim) + return EOI; + cursor = s.cur; + goto std; + } +#line 134 "" +xx8: + curr = (unsigned char)*++s.cur; + goto xx7; +xx9: + curr = (unsigned char)*++s.cur; + goto xx3; +xx10: + ++s.cur; + if(s.lim == s.cur) fill(1); + curr = (unsigned char)*s.cur; +xx11: + if(yybm[0+curr] & 128) { + goto xx10; + } + goto xx5; +xx12: + ++s.cur; + if(s.lim == s.cur) fill(1); + curr = (unsigned char)*s.cur; + if(curr <= '/') goto xx14; + if(curr <= '9') goto xx12; +xx14: + s.cur = s.ctx; +#line 57 "config9.b.re" + { return KEYWORD; } +#line 160 "" +xx15: + ++s.cur; + if((curr = (unsigned char)*s.cur) <= '/') goto xx16; + if(curr <= '9') goto xx12; +xx16: + s.cur = s.ctx; +#line 56 "config9.b.re" + { return KEYWORD; } +#line 169 "" + } +} +#line 71 "config9.b.re" + +} + +int main(int,char**) +{ + Scanner s("a77 a1 b8 b1"); + + int t, n = 0; + while ((t = scan(s)) != EOI) + { + std::cout << (++n) << ": " << tokens[t] << " = \""; std::cout.write(s.tok, s.cur-s.tok); std::cout << "\"" << std::endl; + } +} diff --git a/re2c/test/config9.b.re b/re2c/test/config9.b.re new file mode 100755 index 00000000..c8a338a8 --- /dev/null +++ b/re2c/test/config9.b.re @@ -0,0 +1,83 @@ +#include +#include + +struct Scanner +{ + Scanner(char *txt) + : cur(txt), lim(txt + strlen(txt)) + { + } + + char *cur; + char *lim; + char *ptr; + char *ctx; + char *tok; +}; + +enum What +{ + UNEXPECTED, + KEYWORD, + NUMBER, + EOI +}; + +char * tokens[] = { "UNEXPECTED", "KEYWORD", "NUMBER", "EOI" }; + +void fill(int) +{ +} + +int scan(Scanner &s) +{ + char *cursor = s.cur; + + if(cursor == s.lim) + return EOI; + +std: + s.tok = cursor; + +/*!re2c + +re2c:define:YYCTYPE = "unsigned char"; +re2c:define:YYCURSOR = s.cur; +re2c:define:YYLIMIT = s.lim; +re2c:define:YYMARKER = s.ptr; +re2c:define:YYCTXMARKER = s.ctx; +re2c:define:YYFILL = fill; + +re2c:variable:yych = curr; +re2c:labelprefix = xx; + +re2c:yych:conversion = 1; + +("a"|"b")/[1] { return KEYWORD; } +("a"|"b")/[0-9]+ { return KEYWORD; } +[0-9]+ { return NUMBER; } + +[ \t\n] + { + if(s.cur == s.lim) + return EOI; + cursor = s.cur; + goto std; + } +. + { + return UNEXPECTED; + } +*/ +} + +int main(int,char**) +{ + Scanner s("a77 a1 b8 b1"); + + int t, n = 0; + while ((t = scan(s)) != EOI) + { + std::cout << (++n) << ": " << tokens[t] << " = \""; std::cout.write(s.tok, s.cur-s.tok); std::cout << "\"" << std::endl; + } +} -- 2.50.1