From: nuffer Date: Mon, 2 Aug 2004 03:42:30 +0000 (+0000) Subject: Ran code through astyle to make it easier to read. X-Git-Tag: 0.13.6~689 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5aebcb24d6f342e1162f7c905cf44e4f5fb77d1b;p=re2c Ran code through astyle to make it easier to read. --- diff --git a/actions.cc b/actions.cc index 1ea4be78..4c1465a3 100644 --- a/actions.cc +++ b/actions.cc @@ -10,559 +10,802 @@ Symbol *Symbol::first = NULL; -Symbol::Symbol(const SubStr &str) : next(first), name(str), re(NULL) { - first = this; -} - -Symbol *Symbol::find(const SubStr &str){ - for(Symbol *sym = first; sym; sym = sym->next) - if(sym->name == str) return sym; - return new Symbol(str); -} - -void showIns(std::ostream &o, const Ins &i, const Ins &base){ - o.width(3); - o << &i - &base << ": "; - switch(i.i.tag){ - case CHAR: { - o << "match "; - for(const Ins *j = &(&i)[1]; j < (Ins*) i.i.link; ++j) - prtCh(o, j->c.value); - break; - } case GOTO: - o << "goto " << ((Ins*) i.i.link - &base); - break; - case FORK: - o << "fork " << ((Ins*) i.i.link - &base); - break; - case CTXT: - o << "term " << ((RuleOp*) i.i.link)->accept; - break; - case TERM: - o << "term " << ((RuleOp*) i.i.link)->accept; - break; - } - o << "\n"; -} - -uint RegExp::fixedLength(){ - return ~0; +Symbol::Symbol(const SubStr &str) : next(first), name(str), re(NULL) +{ + first = this; } -char *NullOp::type = "NullOp"; +Symbol *Symbol::find(const SubStr &str) +{ + for (Symbol *sym = first; sym; sym = sym->next) + if (sym->name == str) + return sym; + + return new Symbol(str); +} + +void showIns(std::ostream &o, const Ins &i, const Ins &base) +{ + o.width(3); + o << &i - &base << ": "; + + switch (i.i.tag) + { + + case CHAR: + { + o << "match "; + + for (const Ins *j = &(&i)[1]; j < (Ins*) i.i.link; ++j) + prtCh(o, j->c.value); + + break; + } + + case GOTO: + o << "goto " << ((Ins*) i.i.link - &base); + break; + + case FORK: + o << "fork " << ((Ins*) i.i.link - &base); + break; -void NullOp::calcSize(Char*){ - size = 0; + case CTXT: + o << "term " << ((RuleOp*) i.i.link)->accept; + break; + + case TERM: + o << "term " << ((RuleOp*) i.i.link)->accept; + break; + } + + o << "\n"; } -uint NullOp::fixedLength(){ - return 0; +uint RegExp::fixedLength() +{ + return ~0; } -void NullOp::compile(Char*, Ins*){ - ; +char *NullOp::type = "NullOp"; + +void NullOp::calcSize(Char*) +{ + size = 0; } -void NullOp::split(CharSet&){ - ; +uint NullOp::fixedLength() +{ + return 0; } -std::ostream& operator<<(std::ostream &o, const Range &r){ - if((r.ub - r.lb) == 1){ - prtCh(o, r.lb); - } else { - prtCh(o, r.lb); o << "-"; prtCh(o, r.ub-1); - } - return o << r.next; +void NullOp::compile(Char*, Ins*) +{ + ; } -Range *doUnion(Range *r1, Range *r2){ - Range *r, **rP = &r; - for(;;){ - Range *s; - if(r1->lb <= r2->lb){ - s = new Range(*r1); - } else { - s = new Range(*r2); +void NullOp::split(CharSet&) +{ + ; +} + +std::ostream& operator<<(std::ostream &o, const Range &r) +{ + if ((r.ub - r.lb) == 1) + { + prtCh(o, r.lb); + } + else + { + prtCh(o, r.lb); + o << "-"; + prtCh(o, r.ub - 1); } - *rP = s; - rP = &s->next; - for(;;){ - if(r1->lb <= r2->lb){ - if(r1->lb > s->ub) - break; - if(r1->ub > s->ub) - s->ub = r1->ub; - if(!(r1 = r1->next)){ - uint ub = 0; - for(; r2 && r2->lb <= s->ub; r2 = r2->next) - ub = r2->ub; - if(ub > s->ub) - s->ub = ub; - *rP = r2; - return r; + + return o << r.next; +} + +Range *doUnion(Range *r1, Range *r2) +{ + Range *r, **rP = &r; + + for (;;) + { + Range *s; + + if (r1->lb <= r2->lb) + { + s = new Range(*r1); } - } else { - if(r2->lb > s->ub) - break; - if(r2->ub > s->ub) - s->ub = r2->ub; - if(!(r2 = r2->next)){ - uint ub = 0; - for(; r1 && r1->lb <= s->ub; r1 = r1->next) - ub = r1->ub; - if(ub > s->ub) - s->ub = ub; - *rP = r1; - return r; + else + { + s = new Range(*r2); + } + + *rP = s; + rP = &s->next; + + for (;;) + { + if (r1->lb <= r2->lb) + { + if (r1->lb > s->ub) + break; + + if (r1->ub > s->ub) + s->ub = r1->ub; + + if (!(r1 = r1->next)) + { + uint ub = 0; + + for (; r2 && r2->lb <= s->ub; r2 = r2->next) + ub = r2->ub; + + if (ub > s->ub) + s->ub = ub; + + *rP = r2; + + return r; + } + } + else + { + if (r2->lb > s->ub) + break; + + if (r2->ub > s->ub) + s->ub = r2->ub; + + if (!(r2 = r2->next)) + { + uint ub = 0; + + for (; r1 && r1->lb <= s->ub; r1 = r1->next) + ub = r1->ub; + + if (ub > s->ub) + s->ub = ub; + + *rP = r1; + + return r; + } + } } - } } - } - *rP = NULL; - return r; -} - -Range *doDiff(Range *r1, Range *r2){ - Range *r, *s, **rP = &r; - for(; r1; r1 = r1->next){ - uint lb = r1->lb; - for(; r2 && r2->ub <= r1->lb; r2 = r2->next); - for(; r2 && r2->lb < r1->ub; r2 = r2->next){ - if(lb < r2->lb){ - *rP = s = new Range(lb, r2->lb); + + *rP = NULL; + return r; +} + +Range *doDiff(Range *r1, Range *r2) +{ + Range *r, *s, **rP = &r; + + for (; r1; r1 = r1->next) + { + uint lb = r1->lb; + + for (; r2 && r2->ub <= r1->lb; r2 = r2->next) + + ; + for (; r2 && r2->lb < r1->ub; r2 = r2->next) + { + if (lb < r2->lb) + { + *rP = s = new Range(lb, r2->lb); + rP = &s->next; + } + + if ((lb = r2->ub) >= r1->ub) + goto noMore; + } + + *rP = s = new Range(lb, r1->ub); rP = &s->next; - } - if((lb = r2->ub) >= r1->ub) - goto noMore; + +noMore: + ; } - *rP = s = new Range(lb, r1->ub); - rP = &s->next; - noMore:; - } - *rP = NULL; - return r; + + *rP = NULL; + return r; } -MatchOp *merge(MatchOp *m1, MatchOp *m2){ - if(!m1) - return m2; - if(!m2) - return m1; - return new MatchOp(doUnion(m1->match, m2->match)); +MatchOp *merge(MatchOp *m1, MatchOp *m2) +{ + if (!m1) + return m2; + + if (!m2) + return m1; + + return new MatchOp(doUnion(m1->match, m2->match)); } char *MatchOp::type = "MatchOp"; -void MatchOp::display(std::ostream &o) const{ - o << match; +void MatchOp::display(std::ostream &o) const +{ + o << match; } -void MatchOp::calcSize(Char *rep){ - size = 1; - for(Range *r = match; r; r = r->next) - for(uint c = r->lb; c < r->ub; ++c) - if(rep[c] == c) - ++size; +void MatchOp::calcSize(Char *rep) +{ + size = 1; + + for (Range *r = match; r; r = r->next) + for (uint c = r->lb; c < r->ub; ++c) + if (rep[c] == c) + ++size; } -uint MatchOp::fixedLength(){ - return 1; +uint MatchOp::fixedLength() +{ + return 1; } -void MatchOp::compile(Char *rep, Ins *i){ - i->i.tag = CHAR; - i->i.link = &i[size]; - Ins *j = &i[1]; - uint bump = size; - for(Range *r = match; r; r = r->next){ - for(uint c = r->lb; c < r->ub; ++c){ - if(rep[c] == c){ - j->c.value = c; - j->c.bump = --bump; - j++; - } +void MatchOp::compile(Char *rep, Ins *i) +{ + i->i.tag = CHAR; + i->i.link = &i[size]; + Ins *j = &i[1]; + uint bump = size; + + for (Range *r = match; r; r = r->next) + { + for (uint c = r->lb; c < r->ub; ++c) + { + if (rep[c] == c) + { + j->c.value = c; + j->c.bump = --bump; + j++; + } + } } - } -} - -void MatchOp::split(CharSet &s){ - for(Range *r = match; r; r = r->next){ - for(uint c = r->lb; c < r->ub; ++c){ - CharPtn *x = s.rep[c], *a = x->nxt; - if(!a){ - if(x->card == 1) - continue; - x->nxt = a = s.freeHead; - if(!(s.freeHead = s.freeHead->nxt)) - s.freeTail = &s.freeHead; - a->nxt = NULL; - x->fix = s.fix; - s.fix = x; - } - if(--(x->card) == 0){ - *s.freeTail = x; - *(s.freeTail = &x->nxt) = NULL; - } - s.rep[c] = a; - ++(a->card); +} + +void MatchOp::split(CharSet &s) +{ + for (Range *r = match; r; r = r->next) + { + for (uint c = r->lb; c < r->ub; ++c) + { + CharPtn *x = s.rep[c], *a = x->nxt; + + if (!a) + { + if (x->card == 1) + continue; + + x->nxt = a = s.freeHead; + + if (!(s.freeHead = s.freeHead->nxt)) + s.freeTail = &s.freeHead; + + a->nxt = NULL; + + x->fix = s.fix; + + s.fix = x; + } + + if (--(x->card) == 0) + { + *s.freeTail = x; + *(s.freeTail = &x->nxt) = NULL; + } + + s.rep[c] = a; + ++(a->card); + } } - } - for(; s.fix; s.fix = s.fix->fix) - if(s.fix->card) - s.fix->nxt = NULL; -} - -RegExp *mkDiff(RegExp *e1, RegExp *e2){ - MatchOp *m1, *m2; - if(!(m1 = (MatchOp*) e1->isA(MatchOp::type))) - return NULL; - if(!(m2 = (MatchOp*) e2->isA(MatchOp::type))) - return NULL; - Range *r = doDiff(m1->match, m2->match); - return r? (RegExp*) new MatchOp(r) : (RegExp*) new NullOp; -} - -RegExp *doAlt(RegExp *e1, RegExp *e2){ - if(!e1) - return e2; - if(!e2) - return e1; - return new AltOp(e1, e2); -} - -RegExp *mkAlt(RegExp *e1, RegExp *e2){ - AltOp *a; - MatchOp *m1, *m2; - if((a = (AltOp*) e1->isA(AltOp::type))){ - if((m1 = (MatchOp*) a->exp1->isA(MatchOp::type))) - e1 = a->exp2; - } else if((m1 = (MatchOp*) e1->isA(MatchOp::type))){ - e1 = NULL; - } - if((a = (AltOp*) e2->isA(AltOp::type))){ - if((m2 = (MatchOp*) a->exp1->isA(MatchOp::type))) - e2 = a->exp2; - } else if((m2 = (MatchOp*) e2->isA(MatchOp::type))){ - e2 = NULL; - } - return doAlt(merge(m1, m2), doAlt(e1, e2)); + + for (; s.fix; s.fix = s.fix->fix) + if (s.fix->card) + s.fix->nxt = NULL; +} + +RegExp *mkDiff(RegExp *e1, RegExp *e2) +{ + MatchOp *m1, *m2; + + if (!(m1 = (MatchOp*) e1->isA(MatchOp::type))) + return NULL; + + if (!(m2 = (MatchOp*) e2->isA(MatchOp::type))) + return NULL; + + Range *r = doDiff(m1->match, m2->match); + + return r ? (RegExp*) new MatchOp(r) : (RegExp*) new NullOp; +} + +RegExp *doAlt(RegExp *e1, RegExp *e2) +{ + if (!e1) + return e2; + + if (!e2) + return e1; + + return new AltOp(e1, e2); +} + +RegExp *mkAlt(RegExp *e1, RegExp *e2) +{ + AltOp *a; + MatchOp *m1, *m2; + + if ((a = (AltOp*) e1->isA(AltOp::type))) + { + if ((m1 = (MatchOp*) a->exp1->isA(MatchOp::type))) + e1 = a->exp2; + } + else if ((m1 = (MatchOp*) e1->isA(MatchOp::type))) + { + e1 = NULL; + } + + if ((a = (AltOp*) e2->isA(AltOp::type))) + { + if ((m2 = (MatchOp*) a->exp1->isA(MatchOp::type))) + e2 = a->exp2; + } + else if ((m2 = (MatchOp*) e2->isA(MatchOp::type))) + { + e2 = NULL; + } + + return doAlt(merge(m1, m2), doAlt(e1, e2)); } char *AltOp::type = "AltOp"; -void AltOp::calcSize(Char *rep){ - exp1->calcSize(rep); - exp2->calcSize(rep); - size = exp1->size + exp2->size + 2; +void AltOp::calcSize(Char *rep) +{ + exp1->calcSize(rep); + exp2->calcSize(rep); + size = exp1->size + exp2->size + 2; } -uint AltOp::fixedLength(){ - uint l1 = exp1->fixedLength(); - uint l2 = exp1->fixedLength(); - if(l1 != l2 || l1 == ~0u) - return ~0; - return l1; +uint AltOp::fixedLength() +{ + uint l1 = exp1->fixedLength(); + uint l2 = exp1->fixedLength(); + + if (l1 != l2 || l1 == ~0u) + return ~0; + + return l1; } -void AltOp::compile(Char *rep, Ins *i){ - i->i.tag = FORK; - Ins *j = &i[exp1->size + 1]; - i->i.link = &j[1]; - exp1->compile(rep, &i[1]); - j->i.tag = GOTO; - j->i.link = &j[exp2->size + 1]; - exp2->compile(rep, &j[1]); +void AltOp::compile(Char *rep, Ins *i) +{ + i->i.tag = FORK; + Ins *j = &i[exp1->size + 1]; + i->i.link = &j[1]; + exp1->compile(rep, &i[1]); + j->i.tag = GOTO; + j->i.link = &j[exp2->size + 1]; + exp2->compile(rep, &j[1]); } -void AltOp::split(CharSet &s){ - exp1->split(s); - exp2->split(s); +void AltOp::split(CharSet &s) +{ + exp1->split(s); + exp2->split(s); } char *CatOp::type = "CatOp"; -void CatOp::calcSize(Char *rep){ - exp1->calcSize(rep); - exp2->calcSize(rep); - size = exp1->size + exp2->size; +void CatOp::calcSize(Char *rep) +{ + exp1->calcSize(rep); + exp2->calcSize(rep); + size = exp1->size + exp2->size; } -uint CatOp::fixedLength(){ - uint l1, l2; - if((l1 = exp1->fixedLength()) != ~0u ) - if((l2 = exp2->fixedLength()) != ~0u) - return l1+l2; - return ~0; +uint CatOp::fixedLength() +{ + uint l1, l2; + + if ((l1 = exp1->fixedLength()) != ~0u ) + if ((l2 = exp2->fixedLength()) != ~0u) + return l1 + l2; + + return ~0; } -void CatOp::compile(Char *rep, Ins *i){ - exp1->compile(rep, &i[0]); - exp2->compile(rep, &i[exp1->size]); +void CatOp::compile(Char *rep, Ins *i) +{ + exp1->compile(rep, &i[0]); + exp2->compile(rep, &i[exp1->size]); } -void CatOp::split(CharSet &s){ - exp1->split(s); - exp2->split(s); +void CatOp::split(CharSet &s) +{ + exp1->split(s); + exp2->split(s); } char *CloseOp::type = "CloseOp"; -void CloseOp::calcSize(Char *rep){ - exp->calcSize(rep); - size = exp->size + 1; +void CloseOp::calcSize(Char *rep) +{ + exp->calcSize(rep); + size = exp->size + 1; } -void CloseOp::compile(Char *rep, Ins *i){ - exp->compile(rep, &i[0]); - i += exp->size; - i->i.tag = FORK; - i->i.link = i - exp->size; +void CloseOp::compile(Char *rep, Ins *i) +{ + exp->compile(rep, &i[0]); + i += exp->size; + i->i.tag = FORK; + i->i.link = i - exp->size; } -void CloseOp::split(CharSet &s){ - exp->split(s); +void CloseOp::split(CharSet &s) +{ + exp->split(s); } char *CloseVOp::type = "CloseVOp"; -void CloseVOp::calcSize(Char *rep){ - exp->calcSize(rep); - if(max >= 0) - size = (exp->size * min) + ((1 + exp->size) * (max - min)); - else - size = (exp->size * min) + 1; -} - -void CloseVOp::compile(Char *rep, Ins *i){ - Ins *jumppoint; - int st = 0; - jumppoint = i + ((1 + exp->size) * (max - min)); - for(st = min; st < max; st++) { - i->i.tag = FORK; - i->i.link = jumppoint; - i+=1; - exp->compile(rep, &i[0]); - i += exp->size; - } - for(st = 0; st < min; st++) { - exp->compile(rep, &i[0]); - i += exp->size; - if(max < 0 && st == 0) { - i->i.tag = FORK; - i->i.link = i - exp->size; - i++; - } - } -} - -void CloseVOp::split(CharSet &s){ - exp->split(s); +void CloseVOp::calcSize(Char *rep) +{ + exp->calcSize(rep); + + if (max >= 0) + size = (exp->size * min) + ((1 + exp->size) * (max - min)); + else + size = (exp->size * min) + 1; +} + +void CloseVOp::compile(Char *rep, Ins *i) +{ + Ins *jumppoint; + int st = 0; + jumppoint = i + ((1 + exp->size) * (max - min)); + + for (st = min; st < max; st++) + { + i->i.tag = FORK; + i->i.link = jumppoint; + i += 1; + exp->compile(rep, &i[0]); + i += exp->size; + } + + for (st = 0; st < min; st++) + { + exp->compile(rep, &i[0]); + i += exp->size; + + if (max < 0 && st == 0) + { + i->i.tag = FORK; + i->i.link = i - exp->size; + i++; + } + } +} + +void CloseVOp::split(CharSet &s) +{ + exp->split(s); } RegExp *expr(Scanner &); -uchar unescape(SubStr &s){ - s.len--; - uchar c; - if((c = *s.str++) != '\\' || s.len == 0) - return xlat[c]; - s.len--; - switch(c = *s.str++){ - case 'n': - return xlat['\n']; - case 't': - return xlat['\t']; - case 'v': - return xlat['\v']; - case 'b': - return xlat['\b']; - case 'r': - return xlat['\r']; - case 'f': - return xlat['\f']; - case 'a': - return xlat['\a']; - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': { - uchar v = c - '0'; - for(; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++) - v = v*8 + (c - '0'); - return v; - } default: - return xlat[c]; - } -} - -Range *getRange(SubStr &s){ - uchar lb = unescape(s), ub; - if(s.len < 2 || *s.str != '-'){ - ub = lb; - } else { - s.len--; s.str++; - ub = unescape(s); - if(ub < lb){ - uchar tmp; - tmp = lb; lb = ub; ub = tmp; +uchar unescape(SubStr &s) +{ + s.len--; + uchar c; + + if ((c = *s.str++) != '\\' || s.len == 0) + return xlat[c]; + + s.len--; + + switch (c = *s.str++) + { + + case 'n': + return xlat['\n']; + + case 't': + return xlat['\t']; + + case 'v': + return xlat['\v']; + + case 'b': + return xlat['\b']; + + case 'r': + return xlat['\r']; + + case 'f': + return xlat['\f']; + + case 'a': + return xlat['\a']; + + case '0': + + case '1': + + case '2': + + case '3': + + case '4': + + case '5': + + case '6': + + case '7': + { + uchar v = c - '0'; + + for (; s.len != 0 && '0' <= (c = *s.str) && c <= '7'; s.len--, s.str++) + v = v * 8 + (c - '0'); + + return v; + } + + default: + return xlat[c]; } - } - return new Range(lb, ub+1); } -RegExp *matchChar(uint c){ - return new MatchOp(new Range(c, c+1)); +Range *getRange(SubStr &s) +{ + uchar lb = unescape(s), ub; + + if (s.len < 2 || *s.str != '-') + { + ub = lb; + } + else + { + s.len--; + s.str++; + ub = unescape(s); + + if (ub < lb) + { + uchar tmp; + tmp = lb; + lb = ub; + ub = tmp; + } + } + + return new Range(lb, ub + 1); } -RegExp *strToRE(SubStr s){ - s.len -= 2; s.str += 1; - if(s.len == 0) - return new NullOp; - RegExp *re = matchChar(unescape(s)); - while(s.len > 0) - re = new CatOp(re, matchChar(unescape(s))); - return re; +RegExp *matchChar(uint c) +{ + return new MatchOp(new Range(c, c + 1)); } -RegExp *strToCaseInsensitiveRE(SubStr s){ - s.len -= 2; s.str += 1; - if(s.len == 0) - return new NullOp; +RegExp *strToRE(SubStr s) +{ + s.len -= 2; + s.str += 1; + + if (s.len == 0) + return new NullOp; + + RegExp *re = matchChar(unescape(s)); + + while (s.len > 0) + re = new CatOp(re, matchChar(unescape(s))); + + return re; +} + +RegExp *strToCaseInsensitiveRE(SubStr s) +{ + s.len -= 2; + s.str += 1; + + if (s.len == 0) + return new NullOp; + uchar c = unescape(s); + RegExp *re, *reL, *reU; - if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { - reL = matchChar(tolower(c)); - reU = matchChar(toupper(c)); - re = mkAlt(reL, reU); - } else { - re = matchChar(c); + + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) + { + reL = matchChar(tolower(c)); + reU = matchChar(toupper(c)); + re = mkAlt(reL, reU); } - while(s.len > 0) { + else + { + re = matchChar(c); + } + + while (s.len > 0) + { uchar c = unescape(s); - if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { - reL = matchChar(tolower(c)); - reU = matchChar(toupper(c)); + + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) + { + reL = matchChar(tolower(c)); + reU = matchChar(toupper(c)); re = new CatOp(re, mkAlt(reL, reU)); - } else { + } + else + { re = new CatOp(re, matchChar(c)); } } - return re; + + return re; } -RegExp *ranToRE(SubStr s){ - s.len -= 2; s.str += 1; - if(s.len == 0) - return new NullOp; - Range *r = getRange(s); - while(s.len > 0) - r = doUnion(r, getRange(s)); - return new MatchOp(r); +RegExp *ranToRE(SubStr s) +{ + s.len -= 2; + s.str += 1; + + if (s.len == 0) + return new NullOp; + + Range *r = getRange(s); + + while (s.len > 0) + r = doUnion(r, getRange(s)); + + return new MatchOp(r); } char *RuleOp::type = "RuleOp"; RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, uint a) - : exp(e), ctx(c), ins(NULL), accept(a), code(t) { - ; + : exp(e), ctx(c), ins(NULL), accept(a), code(t) +{ + ; } -void RuleOp::calcSize(Char *rep){ - exp->calcSize(rep); - ctx->calcSize(rep); - size = exp->size + ctx->size + 1; +void RuleOp::calcSize(Char *rep) +{ + exp->calcSize(rep); + ctx->calcSize(rep); + size = exp->size + ctx->size + 1; } -void RuleOp::compile(Char *rep, Ins *i){ - ins = i; - exp->compile(rep, &i[0]); - i += exp->size; - ctx->compile(rep, &i[0]); - i += ctx->size; - i->i.tag = TERM; - i->i.link = this; +void RuleOp::compile(Char *rep, Ins *i) +{ + ins = i; + exp->compile(rep, &i[0]); + i += exp->size; + ctx->compile(rep, &i[0]); + i += ctx->size; + i->i.tag = TERM; + i->i.link = this; } -void RuleOp::split(CharSet &s){ - exp->split(s); - ctx->split(s); +void RuleOp::split(CharSet &s) +{ + exp->split(s); + ctx->split(s); } extern void printSpan(std::ostream&, uint, uint); -void optimize(Ins *i){ - while(!isMarked(i)){ - mark(i); - if(i->i.tag == CHAR){ - i = (Ins*) i->i.link; - } else if(i->i.tag == GOTO || i->i.tag == FORK){ - Ins *target = (Ins*) i->i.link; - optimize(target); - if(target->i.tag == GOTO) - i->i.link = target->i.link == target? i : target; - if(i->i.tag == FORK){ - Ins *follow = (Ins*) &i[1]; - optimize(follow); - if(follow->i.tag == GOTO && follow->i.link == follow){ - i->i.tag = GOTO; - } else if(i->i.link == i){ - i->i.tag = GOTO; - i->i.link = follow; +void optimize(Ins *i) +{ + while (!isMarked(i)) + { + mark(i); + + if (i->i.tag == CHAR) + { + i = (Ins*) i->i.link; + } + else if (i->i.tag == GOTO || i->i.tag == FORK) + { + Ins *target = (Ins*) i->i.link; + optimize(target); + + if (target->i.tag == GOTO) + i->i.link = target->i.link == target ? i : target; + + if (i->i.tag == FORK) + { + Ins *follow = (Ins*) & i[1]; + optimize(follow); + + if (follow->i.tag == GOTO && follow->i.link == follow) + { + i->i.tag = GOTO; + } + else if (i->i.link == i) + { + i->i.tag = GOTO; + i->i.link = follow; + } + } + + return ; } + else + { + ++i; + } + } +} + +void genCode(std::ostream& o, RegExp *re) +{ + CharSet cs; + uint j; + memset(&cs, 0, sizeof(cs)); + + for (j = 0; j < nChars; ++j) + { + cs.rep[j] = &cs.ptn[0]; + cs.ptn[j].nxt = &cs.ptn[j + 1]; + } + + cs.freeHead = &cs.ptn[1]; + *(cs.freeTail = &cs.ptn[nChars - 1].nxt) = NULL; + cs.ptn[0].card = nChars; + cs.ptn[0].nxt = NULL; + re->split(cs); + /* + for(uint k = 0; k < nChars;){ + for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];); + printSpan(cerr, j, k); + cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl; } - return; - } else { - ++i; + */ + Char rep[nChars]; + + for (j = 0; j < nChars; ++j) + { + if (!cs.rep[j]->nxt) + cs.rep[j]->nxt = &cs.ptn[j]; + + rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]); } - } -} - -void genCode(std::ostream& o, RegExp *re){ - CharSet cs; - uint j; - memset(&cs, 0, sizeof(cs)); - for(j = 0; j < nChars; ++j){ - cs.rep[j] = &cs.ptn[0]; - cs.ptn[j].nxt = &cs.ptn[j+1]; - } - cs.freeHead = &cs.ptn[1]; - *(cs.freeTail = &cs.ptn[nChars-1].nxt) = NULL; - cs.ptn[0].card = nChars; - cs.ptn[0].nxt = NULL; - re->split(cs); -/* - for(uint k = 0; k < nChars;){ - for(j = k; ++k < nChars && cs.rep[k] == cs.rep[j];); - printSpan(cerr, j, k); - cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl; - } -*/ - Char rep[nChars]; - for(j = 0; j < nChars; ++j){ - if(!cs.rep[j]->nxt) - cs.rep[j]->nxt = &cs.ptn[j]; - rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]); - } - - re->calcSize(rep); - Ins *ins = new Ins[re->size+1]; - memset(ins, 0, (re->size+1)*sizeof(Ins)); - re->compile(rep, ins); - Ins *eoi = &ins[re->size]; - eoi->i.tag = GOTO; - eoi->i.link = eoi; - - optimize(ins); - for(j = 0; j < re->size;){ - unmark(&ins[j]); - if(ins[j].i.tag == CHAR){ - j = (Ins*) ins[j].i.link - ins; - } else { - j++; + + re->calcSize(rep); + Ins *ins = new Ins[re->size + 1]; + memset(ins, 0, (re->size + 1)*sizeof(Ins)); + re->compile(rep, ins); + Ins *eoi = &ins[re->size]; + eoi->i.tag = GOTO; + eoi->i.link = eoi; + + optimize(ins); + + for (j = 0; j < re->size;) + { + unmark(&ins[j]); + + if (ins[j].i.tag == CHAR) + { + j = (Ins*) ins[j].i.link - ins; + } + else + { + j++; + } } - } - DFA *dfa = new DFA(ins, re->size, 0, 256, rep); - dfa->emit(o); - delete dfa; - delete [] ins; + DFA *dfa = new DFA(ins, re->size, 0, 256, rep); + dfa->emit(o); + delete dfa; + delete [] ins; } diff --git a/basics.h b/basics.h index f43c25ac..a1233879 100644 --- a/basics.h +++ b/basics.h @@ -43,19 +43,19 @@ typedef unsigned long dword; #endif #ifndef HAVE_UINT -typedef unsigned int uint; +typedef unsigned int uint; #endif #ifndef HAVE_UCHAR -typedef unsigned char uchar; +typedef unsigned char uchar; #endif #ifndef HAVE_USHORT -typedef unsigned short ushort; +typedef unsigned short ushort; #endif #ifndef HAVE_ULONG -typedef unsigned long ulong; +typedef unsigned long ulong; #endif #endif diff --git a/code.cc b/code.cc index 73972e8f..e708d696 100644 --- a/code.cc +++ b/code.cc @@ -16,14 +16,18 @@ void Go::compact() { // arrange so that adjacent spans have different targets uint i = 0; - for(uint j = 1; j < nSpans; ++j) + + for (uint j = 1; j < nSpans; ++j) { - if(span[j].to != span[i].to) + if (span[j].to != span[i].to) { - ++i; span[i].to = span[j].to; + ++i; + span[i].to = span[j].to; } + span[i].ub = span[j].ub; } + nSpans = i + 1; } @@ -33,27 +37,35 @@ void Go::unmap(Go *base, State *x) uint lb = 0; s->ub = 0; s->to = NULL; - for(; b != e; ++b) + + for (; b != e; ++b) { - if(b->to == x) + if (b->to == x) { - if((s->ub - lb) > 1) + if ((s->ub - lb) > 1) { s->ub = b->ub; } - } else { - if(b->to != s->to) + } + else + { + if (b->to != s->to) { - if(s->ub) + if (s->ub) { - lb = s->ub; ++s; + lb = s->ub; + ++s; } + s->to = b->to; } + s->ub = b->ub; } } - s->ub = e[-1].ub; ++s; + + s->ub = e[ -1].ub; + ++s; nSpans = s - span; } @@ -61,15 +73,17 @@ void doGen(Go *g, State *s, uchar *bm, uchar m) { Span *b = g->span, *e = &b[g->nSpans]; uint lb = 0; - for(; b < e; ++b) + + for (; b < e; ++b) { - if(b->to == s) + if (b->to == s) { - for(; lb < b->ub; ++lb) + for (; lb < b->ub; ++lb) { bm[lb] |= m; } } + lb = b->ub; } } @@ -78,12 +92,14 @@ void prt(std::ostream& o, Go *g, State *s) { Span *b = g->span, *e = &b[g->nSpans]; uint lb = 0; - for(; b < e; ++b) + + for (; b < e; ++b) { - if(b->to == s) + if (b->to == s) { printSpan(o, lb, b->ub); } + lb = b->ub; } } @@ -94,40 +110,51 @@ bool matches(Go *g1, State *s1, Go *g2, State *s2) uint lb1 = 0; Span *b2 = g2->span, *e2 = &b2[g2->nSpans]; uint lb2 = 0; - for(;;) + + for (;;) { - for(; b1 < e1 && b1->to != s1; ++b1) + for (; b1 < e1 && b1->to != s1; ++b1) { lb1 = b1->ub; } - for(; b2 < e2 && b2->to != s2; ++b2) + + for (; b2 < e2 && b2->to != s2; ++b2) { lb2 = b2->ub; } - if(b1 == e1) + + if (b1 == e1) { return b2 == e2; } - if(b2 == e2){ + + if (b2 == e2) + { return false; } - if(lb1 != lb2 || b1->ub != b2->ub) + + if (lb1 != lb2 || b1->ub != b2->ub) { return false; } - ++b1; ++b2; + + ++b1; + ++b2; } } -class BitMap { +class BitMap +{ + public: static BitMap *first; - Go *go; - State *on; - BitMap *next; - uint i; - uchar m; - public: + Go *go; + State *on; + BitMap *next; + uint i; + uchar m; + +public: static BitMap *find(Go*, State*); static BitMap *find(State*); static void gen(std::ostream&, uint, uint); @@ -144,53 +171,62 @@ BitMap::BitMap(Go *g, State *x) : go(g), on(x), next(first) BitMap *BitMap::find(Go *g, State *x) { - for(BitMap *b = first; b; b = b->next) + for (BitMap *b = first; b; b = b->next) { - if(matches(b->go, b->on, g, x)) - { - return b; - } - } - return new BitMap(g, x); + if (matches(b->go, b->on, g, x)) + { + return b; + } + } + + return new BitMap(g, x); } BitMap *BitMap::find(State *x) { - for(BitMap *b = first; b; b = b->next) - { - if(b->on == x) + for (BitMap *b = first; b; b = b->next) + { + if (b->on == x) { - return b; - } - } - return NULL; + return b; + } + } + + return NULL; } void BitMap::gen(std::ostream &o, uint lb, uint ub) { BitMap *b = first; - if(b) + + if (b) { o << "\tstatic unsigned char yybm[] = {"; uint n = ub - lb; uchar *bm = new uchar[n]; memset(bm, 0, n); - for(uint i = 0; b; i += n) + + for (uint i = 0; b; i += n) { - for(uchar m = 0x80; b && m; b = b->next, m >>= 1) + for (uchar m = 0x80; b && m; b = b->next, m >>= 1) { - b->i = i; b->m = m; - doGen(b->go, b->on, bm-lb, m); + b->i = i; + b->m = m; + doGen(b->go, b->on, bm - lb, m); } - for(uint j = 0; j < n; ++j) + + for (uint j = 0; j < n; ++j) { - if(j%8 == 0) + if (j % 8 == 0) { - o << "\n\t"; ++oline; + o << "\n\t"; + ++oline; } + o << std::setw(3) << (uint) bm[j] << ", "; } } + o << "\n\t};\n"; oline += 2; } @@ -199,11 +235,14 @@ void BitMap::gen(std::ostream &o, uint lb, uint ub) void BitMap::stats() { uint n = 0; - for(BitMap *b = first; b; b = b->next) + + for (BitMap *b = first; b; b = b->next) { - prt(std::cerr, b->go, b->on); std::cerr << std::endl; + prt(std::cerr, b->go, b->on); + std::cerr << std::endl; ++n; } + std::cerr << n << " bitmaps\n"; first = NULL; } @@ -215,7 +254,8 @@ void genGoTo(std::ostream &o, State *from, State *to, bool & readCh) o << "\tyych = *YYCURSOR;\n"; readCh = false; } - o << "\tgoto yy" << to->label << ";\n"; + + o << "\tgoto yy" << to->label << ";\n"; ++oline; } @@ -230,6 +270,7 @@ void genIf(std::ostream &o, char *cmp, uint v, bool &readCh) { o << "\tif(yych "; } + o << cmp << " "; prtChOrHex(o, v); o << ")"; @@ -237,7 +278,7 @@ void genIf(std::ostream &o, char *cmp, uint v, bool &readCh) void indent(std::ostream &o, uint i) { - while(i-- > 0) + while (i-- > 0) { o << "\t"; } @@ -245,7 +286,7 @@ void indent(std::ostream &o, uint i) static void need(std::ostream &o, uint n, bool & readCh) { - if(n == 1) + if (n == 1) { o << "\tif(YYLIMIT == YYCURSOR) YYFILL(1);\n"; ++oline; @@ -255,6 +296,7 @@ static void need(std::ostream &o, uint n, bool & readCh) o << "\tif((YYLIMIT - YYCURSOR) < " << n << ") YYFILL(" << n << ");\n"; ++oline; } + o << "\tyych = *YYCURSOR;\n"; readCh = false; ++oline; @@ -277,8 +319,10 @@ void Match::emit(std::ostream &o, bool &readCh) o << "\tyych = *++YYCURSOR;\n"; readCh = false; } + ++oline; - if(state->link) + + if (state->link) { ++oline; need(o, state->depth, readCh); @@ -287,12 +331,15 @@ void Match::emit(std::ostream &o, bool &readCh) void Enter::emit(std::ostream &o, bool &readCh) { - if(state->link){ + if (state->link) + { o << "\t++YYCURSOR;\n"; o << "yy" << label << ":\n"; oline += 2; need(o, state->depth, readCh); - } else { + } + else + { /* we shouldn't need 'rule-following' protection here */ o << "\tyych = *++YYCURSOR;\n"; o << "yy" << label << ":\n"; @@ -305,63 +352,77 @@ void Save::emit(std::ostream &o, bool &readCh) { o << "\tyyaccept = " << selector << ";\n"; ++oline; - if(state->link){ + + if (state->link) + { o << "\tYYMARKER = ++YYCURSOR;\n"; ++oline; need(o, state->depth, readCh); - } else { + } + else + { o << "\tyych = *(YYMARKER = ++YYCURSOR);\n"; ++oline; readCh = false; } } -Move::Move(State *s) : Action(s) { - ; +Move::Move(State *s) : Action(s) +{ + ; } -void Move::emit(std::ostream &o, bool &readCh){ - ; +void Move::emit(std::ostream &o, bool &readCh) +{ + ; } Accept::Accept(State *x, uint n, uint *s, State **r) - : Action(x), nRules(n), saves(s), rules(r){ - ; + : Action(x), nRules(n), saves(s), rules(r) +{ + ; } void Accept::emit(std::ostream &o, bool &readCh) { bool first = true; - for(uint i = 0; i < nRules; ++i) - if(saves[i] != ~0u) - { - if(first) + + for (uint i = 0; i < nRules; ++i) + if (saves[i] != ~0u) { - first = false; - o << "\tYYCURSOR = YYMARKER;\n"; - o << "\tswitch(yyaccept){\n"; - oline += 2; + if (first) + { + first = false; + o << "\tYYCURSOR = YYMARKER;\n"; + o << "\tswitch(yyaccept){\n"; + oline += 2; + } + + o << "\tcase " << saves[i] << ":"; + genGoTo(o, state, rules[i], readCh); } - o << "\tcase " << saves[i] << ":"; - genGoTo(o, state, rules[i], readCh); - } - if(!first) + + if (!first) { o << "\t}\n"; ++oline; } } -Rule::Rule(State *s, RuleOp *r) : Action(s), rule(r) { - ; +Rule::Rule(State *s, RuleOp *r) : Action(s), rule(r) +{ + ; } void Rule::emit(std::ostream &o, bool &readCh) { uint back = rule->ctx->fixedLength(); - if(back != ~0u && back > 0u) { + + if (back != ~0u && back > 0u) + { o << "\tYYCURSOR -= " << back << ";"; } + o << "\n"; ++oline; line_source(rule->code->line, o); @@ -376,43 +437,61 @@ void Rule::emit(std::ostream &o, bool &readCh) void doLinear(std::ostream &o, uint i, Span *s, uint n, State *from, State *next, bool &readCh) { - for(;;) + for (;;) { State *bg = s[0].to; - while(n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1) + + while (n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1) { - if(s[1].to == next && n == 3) + if (s[1].to == next && n == 3) { - indent(o, i); genIf(o, "!=", s[0].ub, readCh); genGoTo(o, from, bg, readCh); - indent(o, i); genGoTo(o, from, next, readCh); - return; + indent(o, i); + genIf(o, "!=", s[0].ub, readCh); + genGoTo(o, from, bg, readCh); + indent(o, i); + genGoTo(o, from, next, readCh); + return ; } else { - indent(o, i); genIf(o, "==", s[0].ub, readCh); genGoTo(o, from, s[1].to, readCh); + indent(o, i); + genIf(o, "==", s[0].ub, readCh); + genGoTo(o, from, s[1].to, readCh); } - n -= 2; s += 2; + + n -= 2; + s += 2; } - if(n == 1) + + if (n == 1) { -// if(bg != next){ - indent(o, i); genGoTo(o, from, s[0].to, readCh); -// } - return; + // if(bg != next){ + indent(o, i); + genGoTo(o, from, s[0].to, readCh); + // } + return ; } - else if(n == 2 && bg == next) + else if (n == 2 && bg == next) { - indent(o, i); genIf(o, ">=", s[0].ub, readCh); genGoTo(o, from, s[1].to, readCh); - indent(o, i); genGoTo(o, from, next, readCh); - return; + indent(o, i); + genIf(o, ">=", s[0].ub, readCh); + genGoTo(o, from, s[1].to, readCh); + indent(o, i); + genGoTo(o, from, next, readCh); + return ; } else { - indent(o, i); genIf(o, "<=", s[0].ub - 1, readCh); genGoTo(o, from, bg, readCh); - n -= 1; s += 1; + indent(o, i); + genIf(o, "<=", s[0].ub - 1, readCh); + genGoTo(o, from, bg, readCh); + n -= 1; + s += 1; } } - indent(o, i); genGoTo(o, from, next, readCh); + + indent(o, i); + genGoTo(o, from, next, readCh); } void Go::genLinear(std::ostream &o, State *from, State *next, bool &readCh) @@ -422,15 +501,19 @@ void Go::genLinear(std::ostream &o, State *from, State *next, bool &readCh) void genCases(std::ostream &o, uint lb, Span *s) { - if(lb < s->ub) + if (lb < s->ub) { - for(;;) + for (;;) { - o << "\tcase "; prtChOrHex(o, lb); o << ":"; - if(++lb == s->ub) + o << "\tcase "; + prtChOrHex(o, lb); + o << ":"; + + if (++lb == s->ub) { break; } + o << "\n"; ++oline; } @@ -439,80 +522,93 @@ void genCases(std::ostream &o, uint lb, Span *s) void Go::genSwitch(std::ostream &o, State *from, State *next, bool &readCh) { - if(nSpans <= 2){ + if (nSpans <= 2) + { genLinear(o, from, next, readCh); } else { - State *def = span[nSpans-1].to; - Span **sP = new Span*[nSpans-1], **r, **s, **t; - + State *def = span[nSpans - 1].to; + Span **sP = new Span * [nSpans - 1], **r, **s, **t; + t = &sP[0]; - for(uint i = 0; i < nSpans; ++i) + + for (uint i = 0; i < nSpans; ++i) { - if(span[i].to != def) + if (span[i].to != def) { *(t++) = &span[i]; } } - + if (readCh) { o << "\tswitch((yych = *YYCURSOR)) {\n"; - readCh =false; + readCh = false; } else { o << "\tswitch(yych){\n"; } + ++oline; - while(t != &sP[0]) + + while (t != &sP[0]) { r = s = &sP[0]; - if(*s == &span[0]) - genCases(o, 0, *s); + + if (*s == &span[0]) + genCases(o, 0, *s); else - genCases(o, (*s)[-1].ub, *s); + genCases(o, (*s)[ -1].ub, *s); + State *to = (*s)->to; - while(++s < t) + + while (++s < t) { - if((*s)->to == to) + if ((*s)->to == to) { - genCases(o, (*s)[-1].ub, *s); + genCases(o, (*s)[ -1].ub, *s); } else { *(r++) = *s; } } + genGoTo(o, from, to, readCh); t = r; } + o << "\tdefault:"; genGoTo(o, from, def, readCh); o << "\t}\n"; ++oline; - + delete [] sP; } } void doBinary(std::ostream &o, uint i, Span *s, uint n, State *from, State *next, bool &readCh) { - if(n <= 4) + if (n <= 4) { doLinear(o, i, s, n, from, next, readCh); } else { - uint h = n/2; - indent(o, i); genIf(o, "<=", s[h-1].ub - 1, readCh); o << "{\n"; + uint h = n / 2; + indent(o, i); + genIf(o, "<=", s[h - 1].ub - 1, readCh); + o << "{\n"; ++oline; - doBinary(o, i+1, &s[0], h, from, next, readCh); - indent(o, i); o << "\t} else {\n"; + doBinary(o, i + 1, &s[0], h, from, next, readCh); + indent(o, i); + o << "\t} else {\n"; ++oline; - doBinary(o, i+1, &s[h], n - h, from, next, readCh); - indent(o, i); o << "\t}\n"; + doBinary(o, i + 1, &s[h], n - h, from, next, readCh); + indent(o, i); + o << "\t}\n"; ++oline; } } @@ -524,41 +620,46 @@ void Go::genBinary(std::ostream &o, State *from, State *next, bool &readCh) void Go::genBase(std::ostream &o, State *from, State *next, bool &readCh) { - if(nSpans == 0) + if (nSpans == 0) { - return; + return ; } - if(!sFlag) + + if (!sFlag) { genSwitch(o, from, next, readCh); - return; + return ; } - if(nSpans > 8) + + if (nSpans > 8) { - Span *bot = &span[0], *top = &span[nSpans-1]; + Span *bot = &span[0], *top = &span[nSpans - 1]; uint util; - if(bot[0].to == top[0].to) + + if (bot[0].to == top[0].to) { - util = (top[-1].ub - bot[0].ub)/(nSpans - 2); + util = (top[ -1].ub - bot[0].ub) / (nSpans - 2); } else { - if(bot[0].ub > (top[0].ub - top[-1].ub)) + if (bot[0].ub > (top[0].ub - top[ -1].ub)) { - util = (top[0].ub - bot[0].ub)/(nSpans - 1); + util = (top[0].ub - bot[0].ub) / (nSpans - 1); } else { - util = top[-1].ub/(nSpans - 1); + util = top[ -1].ub / (nSpans - 1); } } - if(util <= 2) + + if (util <= 2) { genSwitch(o, from, next, readCh); - return; + return ; } } - if(nSpans > 5) + + if (nSpans > 5) { genBinary(o, from, next, readCh); } @@ -570,20 +671,23 @@ void Go::genBase(std::ostream &o, State *from, State *next, bool &readCh) void Go::genGoto(std::ostream &o, State *from, State *next, bool &readCh) { - if(bFlag) + if (bFlag) { - for(uint i = 0; i < nSpans; ++i) + for (uint i = 0; i < nSpans; ++i) { State *to = span[i].to; - if(to && to->isBase) + + if (to && to->isBase) { BitMap *b = BitMap::find(to); - if(b && matches(b->go, b->on, this, to)) + + if (b && matches(b->go, b->on, this, to)) { Go go; go.span = new Span[nSpans]; go.unmap(this, to); o << "\tif(yybm[" << b->i << "+"; + if (readCh) { o << "(yych = *YYCURSOR)"; @@ -592,21 +696,24 @@ void Go::genGoto(std::ostream &o, State *from, State *next, bool &readCh) { o << "yych"; } + o << "] & " << (uint) b->m << ")"; genGoTo(o, from, to, readCh); go.genBase(o, from, next, readCh); delete [] go.span; - return; + return ; } } } } + genBase(o, from, next, readCh); } -void State::emit(std::ostream &o, bool &readCh){ +void State::emit(std::ostream &o, bool &readCh) +{ o << "yy" << label << ":"; -/* o << "\nfprintf(stderr, \"<" << label << ">\");\n";*/ + /* o << "\nfprintf(stderr, \"<" << label << ">\");\n";*/ action->emit(o, readCh); } @@ -616,29 +723,40 @@ uint merge(Span *x0, State *fg, State *bg) uint nf = fg->go.nSpans, nb = bg->go.nSpans; State *prev = NULL, *to; // NB: we assume both spans are for same range - for(;;) + + for (;;) { - if(f->ub == b->ub) + if (f->ub == b->ub) { - to = f->to == b->to? bg : f->to; - if(to == prev){ + to = f->to == b->to ? bg : f->to; + + if (to == prev) + { --x; } else { x->to = prev = to; } + x->ub = f->ub; - ++x; ++f; --nf; ++b; --nb; - if(nf == 0 && nb == 0) + ++x; + ++f; + --nf; + ++b; + --nb; + + if (nf == 0 && nb == 0) { return x - x0; } } - while(f->ub < b->ub) + + while (f->ub < b->ub) { - to = f->to == b->to? bg : f->to; - if(to == prev) + to = f->to == b->to ? bg : f->to; + + if (to == prev) { --x; } @@ -646,13 +764,18 @@ uint merge(Span *x0, State *fg, State *bg) { x->to = prev = to; } + x->ub = f->ub; - ++x; ++f; --nf; + ++x; + ++f; + --nf; } - while(b->ub < f->ub) + + while (b->ub < f->ub) { - to = b->to == f->to? bg : f->to; - if(to == prev) + to = b->to == f->to ? bg : f->to; + + if (to == prev) { --x; } @@ -660,28 +783,36 @@ uint merge(Span *x0, State *fg, State *bg) { x->to = prev = to; } + x->ub = b->ub; - ++x; ++b; --nb; + ++x; + ++b; + --nb; } } } const uint cInfinity = ~0; -class SCC { +class SCC +{ + public: State **top, **stk; + public: SCC(uint); ~SCC(); void traverse(State*); }; -SCC::SCC(uint size){ - top = stk = new State*[size]; +SCC::SCC(uint size) +{ + top = stk = new State * [size]; } -SCC::~SCC(){ +SCC::~SCC() +{ delete [] stk; } @@ -690,90 +821,106 @@ void SCC::traverse(State *x) *top = x; uint k = ++top - stk; x->depth = k; - for(uint i = 0; i < x->go.nSpans; ++i) + + for (uint i = 0; i < x->go.nSpans; ++i) { State *y = x->go.span[i].to; - if(y) + + if (y) { - if(y->depth == 0) + if (y->depth == 0) { traverse(y); } - if(y->depth < x->depth) + + if (y->depth < x->depth) { x->depth = y->depth; } } } - if(x->depth == k) + + if (x->depth == k) { do { (*--top)->depth = cInfinity; (*top)->link = x; - } while(*top != x); + } + while (*top != x); } } uint maxDist(State *s) { uint mm = 0; - for(uint i = 0; i < s->go.nSpans; ++i) + + for (uint i = 0; i < s->go.nSpans; ++i) { State *t = s->go.span[i].to; - if(t) + + if (t) { uint m = 1; - if(!t->link) + + if (!t->link) { m += maxDist(t); } - if(m > mm) + + if (m > mm) { mm = m; } } } + return mm; } void calcDepth(State *head) { State *t; - for(State *s = head; s; s = s->next) + + for (State *s = head; s; s = s->next) { - if(s->link == s){ - for(uint i = 0; i < s->go.nSpans; ++i) + if (s->link == s) + { + for (uint i = 0; i < s->go.nSpans; ++i) { t = s->go.span[i].to; - if(t && t->link == s) + + if (t && t->link == s) { goto inSCC; } } + s->link = NULL; - }else + } + else { - inSCC: + +inSCC: s->depth = maxDist(s); } } } - + void DFA::findSCCs() { SCC scc(nStates); State *s; - for(s = head; s; s = s->next) + for (s = head; s; s = s->next) { s->depth = 0; s->link = NULL; } - for(s = head; s; s = s->next) + for (s = head; s; s = s->next) { - if(!s->depth) + if (!s->depth) { scc.traverse(s); } @@ -808,9 +955,10 @@ void DFA::emit(std::ostream &o) head->depth = maxDist(head); uint nRules = 0; - for(s = head; s; s = s->next) + + for (s = head; s; s = s->next) { - if(s->rule && s->rule->accept >= nRules) + if (s->rule && s->rule->accept >= nRules) { nRules = s->rule->accept + 1; } @@ -821,77 +969,94 @@ void DFA::emit(std::ostream &o) memset(saves, ~0, (nRules)*sizeof(*saves)); // mark backtracking points - for(s = head; s; s = s->next) + + for (s = head; s; s = s->next) { - RuleOp *ignore = NULL; - if(s->rule) + RuleOp * ignore = NULL; + + if (s->rule) { - for(i = 0; i < s->go.nSpans; ++i) + for (i = 0; i < s->go.nSpans; ++i) { - if(s->go.span[i].to && !s->go.span[i].to->rule){ + if (s->go.span[i].to && !s->go.span[i].to->rule) + { delete s->action; - if(saves[s->rule->accept] == ~0u) + + if (saves[s->rule->accept] == ~0u) { saves[s->rule->accept] = nSaves++; } + (void) new Save(s, saves[s->rule->accept]); continue; } } + ignore = s->rule; } } // insert actions - State **rules = new State*[nRules]; + State **rules = new State * [nRules]; + memset(rules, 0, (nRules)*sizeof(*rules)); + State *accept = NULL; - for(s = head; s; s = s->next) + + for (s = head; s; s = s->next) { - State *ow; - if(!s->rule) + State * ow; + + if (!s->rule) { ow = accept; } else { - if(!rules[s->rule->accept]) + if (!rules[s->rule->accept]) { State *n = new State; (void) new Rule(n, s->rule); rules[s->rule->accept] = n; addState(&s->next, n); } + ow = rules[s->rule->accept]; } - for(i = 0; i < s->go.nSpans; ++i) - if(!s->go.span[i].to) - { - if(!ow) + + for (i = 0; i < s->go.nSpans; ++i) + if (!s->go.span[i].to) { - ow = accept = new State; - (void) new Accept(accept, nRules, saves, rules); - addState(&s->next, accept); + if (!ow) + { + ow = accept = new State; + (void) new Accept(accept, nRules, saves, rules); + addState(&s->next, accept); + } + + s->go.span[i].to = ow; } - s->go.span[i].to = ow; - } } // split ``base'' states into two parts - for(s = head; s; s = s->next) + for (s = head; s; s = s->next) { s->isBase = false; - if(s->link) + + if (s->link) { - for(i = 0; i < s->go.nSpans; ++i) + for (i = 0; i < s->go.nSpans; ++i) { - if(s->go.span[i].to == s){ + if (s->go.span[i].to == s) + { s->isBase = true; split(s); - if(bFlag) + + if (bFlag) { BitMap::find(&s->next->go, s); } + s = s->next; break; } @@ -901,29 +1066,34 @@ void DFA::emit(std::ostream &o) // find ``base'' state, if possible Span *span = new Span[ubChar - lbChar]; - for(s = head; s; s = s->next) + + for (s = head; s; s = s->next) { - if(!s->link) + if (!s->link) { - for(i = 0; i < s->go.nSpans; ++i) + for (i = 0; i < s->go.nSpans; ++i) { State *to = s->go.span[i].to; - if(to && to->isBase) + + if (to && to->isBase) { to = to->go.span[0].to; uint nSpans = merge(span, s, to); - if(nSpans < s->go.nSpans) + + if (nSpans < s->go.nSpans) { delete [] s->go.span; s->go.nSpans = nSpans; s->go.span = new Span[nSpans]; memcpy(s->go.span, span, nSpans*sizeof(Span)); } + break; } } } } + delete [] span; delete head->action; @@ -934,7 +1104,7 @@ void DFA::emit(std::ostream &o) oline += 3; - if(bFlag) + if (bFlag) { BitMap::gen(o, lbChar, ubChar); } @@ -943,17 +1113,18 @@ void DFA::emit(std::ostream &o) ++oline; (void) new Enter(head, label++); - for(s = head; s; s = s->next) + for (s = head; s; s = s->next) { s->label = label++; } - for(s = head; s; s = s->next) + for (s = head; s; s = s->next) { bool readCh = false; s->emit(o, readCh); s->go.genGoto(o, s, s->next, readCh); } + o << "}\n"; ++oline; diff --git a/dfa.cc b/dfa.cc index 8bc74707..27cdb05d 100644 --- a/dfa.cc +++ b/dfa.cc @@ -5,247 +5,353 @@ #include "substr.h" #include "dfa.h" -inline char octCh(uint c){ - return '0' + c%8; +inline char octCh(uint c) +{ + return '0' + c % 8; } -inline char hexCh(uint c){ - if (c < 10) - { - return '0' + c%16; - } - else - { - return 'A' + c%16 - 10; - } +inline char hexCh(uint c) +{ + if (c < 10) + { + return '0' + c % 16; + } + else + { + return 'A' + c % 16 - 10; + } } -void prtChOrHex(std::ostream& o, uchar c){ - uchar oc = talx[c]; - if (isprint(oc)) - { - o << '\''; - prtCh(o, c); - o << '\''; - } - else - { - o << "0x" << hexCh(c>>4) << hexCh(c); - } -} +void prtChOrHex(std::ostream& o, uchar c) +{ + uchar oc = talx[c]; -void prtCh(std::ostream &o, uchar c){ - uchar oc = talx[c]; - switch(oc){ - case '\'': o << "\\'"; break; - case '\n': o << "\\n"; break; - case '\t': o << "\\t"; break; - case '\v': o << "\\v"; break; - case '\b': o << "\\b"; break; - case '\r': o << "\\r"; break; - case '\f': o << "\\f"; break; - case '\a': o << "\\a"; break; - case '\\': o << "\\\\"; break; - default: - if(isprint(oc)) - o << (char) oc; + if (isprint(oc)) + { + o << '\''; + prtCh(o, c); + o << '\''; + } else - o << '\\' << octCh(c/64) << octCh(c/8) << octCh(c); - } + { + o << "0x" << hexCh(c >> 4) << hexCh(c); + } } -void printSpan(std::ostream &o, uint lb, uint ub){ - if(lb > ub) - o << "*"; - o << "["; - if((ub - lb) == 1){ - prtCh(o, lb); - } else { - prtCh(o, lb); - o << "-"; - prtCh(o, ub-1); - } - o << "]"; +void prtCh(std::ostream &o, uchar c) +{ + uchar oc = talx[c]; + + switch (oc) + { + + case '\'': + o << "\\'"; + break; + + case '\n': + o << "\\n"; + break; + + case '\t': + o << "\\t"; + break; + + case '\v': + o << "\\v"; + break; + + case '\b': + o << "\\b"; + break; + + case '\r': + o << "\\r"; + break; + + case '\f': + o << "\\f"; + break; + + case '\a': + o << "\\a"; + break; + + case '\\': + o << "\\\\"; + break; + + default: + + if (isprint(oc)) + o << (char) oc; + else + o << '\\' << octCh(c / 64) << octCh(c / 8) << octCh(c); + } } -uint Span::show(std::ostream &o, uint lb){ - if(to){ - printSpan(o, lb, ub); - o << " " << to->label << "; "; - } - return ub; +void printSpan(std::ostream &o, uint lb, uint ub) +{ + if (lb > ub) + o << "*"; + + o << "["; + + if ((ub - lb) == 1) + { + prtCh(o, lb); + } + else + { + prtCh(o, lb); + o << "-"; + prtCh(o, ub - 1); + } + + o << "]"; } -std::ostream& operator<<(std::ostream &o, const State &s){ - o << "state " << s.label; - if(s.rule) - o << " accepts " << s.rule->accept; - o << "\n"; - ++oline; - uint lb = 0; - for(uint i = 0; i < s.go.nSpans; ++i) - lb = s.go.span[i].show(o, lb); - return o; +uint Span::show(std::ostream &o, uint lb) +{ + if (to) + { + printSpan(o, lb, ub); + o << " " << to->label << "; "; + } + + return ub; } -std::ostream& operator<<(std::ostream &o, const DFA &dfa){ - for(State *s = dfa.head; s; s = s->next) - { - o << s << "\n\n"; +std::ostream& operator<<(std::ostream &o, const State &s) +{ + o << "state " << s.label; + + if (s.rule) + o << " accepts " << s.rule->accept; + + o << "\n"; + ++oline; - } - return o; + + uint lb = 0; + + for (uint i = 0; i < s.go.nSpans; ++i) + lb = s.go.span[i].show(o, lb); + + return o; } -State::State() : rule(NULL), link(NULL), kCount(0), kernel(NULL), action(NULL) { - go.nSpans = 0; - go.span = NULL; +std::ostream& operator<<(std::ostream &o, const DFA &dfa) +{ + for (State *s = dfa.head; s; s = s->next) + { + o << s << "\n\n"; + ++oline; + } + + return o; } -State::~State(){ - delete [] kernel; - delete [] go.span; +State::State() : rule(NULL), link(NULL), kCount(0), kernel(NULL), action(NULL) +{ + go.nSpans = 0; + go.span = NULL; } -static Ins **closure(Ins **cP, Ins *i){ - while(!isMarked(i)){ - mark(i); - *(cP++) = i; - if(i->i.tag == FORK){ - cP = closure(cP, i + 1); - i = (Ins*) i->i.link; - } else if(i->i.tag == GOTO){ - i = (Ins*) i->i.link; - } else - break; - } - return cP; +State::~State() +{ + delete [] kernel; + delete [] go.span; } -struct GoTo { - Char ch; - void *to; +static Ins **closure(Ins **cP, Ins *i) +{ + while (!isMarked(i)) + { + mark(i); + *(cP++) = i; + + if (i->i.tag == FORK) + { + cP = closure(cP, i + 1); + i = (Ins*) i->i.link; + } + else if (i->i.tag == GOTO) + { + i = (Ins*) i->i.link; + } + else + break; + } + + return cP; +} + +struct GoTo +{ + Char ch; + void *to; }; DFA::DFA(Ins *ins, uint ni, uint lb, uint ub, Char *rep) - : lbChar(lb), ubChar(ub) { - Ins **work = new Ins*[ni+1]; - uint nc = ub - lb; - GoTo *goTo = new GoTo[nc]; - Span *span = new Span[nc]; - memset((char*) goTo, 0, nc*sizeof(GoTo)); - tail = &head; - head = NULL; - nStates = 0; - toDo = NULL; - findState(work, closure(work, &ins[0]) - work); - while(toDo){ - State *s = toDo; - toDo = s->link; + : lbChar(lb), ubChar(ub) +{ + Ins **work = new Ins * [ni + 1]; + uint nc = ub - lb; + GoTo *goTo = new GoTo[nc]; + Span *span = new Span[nc]; + memset((char*) goTo, 0, nc*sizeof(GoTo)); + tail = &head; + head = NULL; + nStates = 0; + toDo = NULL; + findState(work, closure(work, &ins[0]) - work); - Ins **cP, **iP, *i; - uint nGoTos = 0; - uint j; - - s->rule = NULL; - for(iP = s->kernel; (i = *iP); ++iP){ - if(i->i.tag == CHAR){ - for(Ins *j = i + 1; j < (Ins*) i->i.link; ++j){ - if(!(j->c.link = goTo[j->c.value - lb].to)) - goTo[nGoTos++].ch = j->c.value; - goTo[j->c.value - lb].to = j; + while (toDo) + { + State *s = toDo; + toDo = s->link; + + Ins **cP, **iP, *i; + uint nGoTos = 0; + uint j; + + s->rule = NULL; + + for (iP = s->kernel; (i = *iP); ++iP) + { + if (i->i.tag == CHAR) + { + for (Ins *j = i + 1; j < (Ins*) i->i.link; ++j) + { + if (!(j->c.link = goTo[j->c.value - lb].to)) + goTo[nGoTos++].ch = j->c.value; + + goTo[j->c.value - lb].to = j; + } + } + else if (i->i.tag == TERM) + { + if (!s->rule || ((RuleOp*) i->i.link)->accept < s->rule->accept) + s->rule = (RuleOp*) i->i.link; + } } - } else if(i->i.tag == TERM){ - if(!s->rule || ((RuleOp*) i->i.link)->accept < s->rule->accept) - s->rule = (RuleOp*) i->i.link; - } - } - for(j = 0; j < nGoTos; ++j){ - GoTo *go = &goTo[goTo[j].ch - lb]; - i = (Ins*) go->to; - for(cP = work; i; i = (Ins*) i->c.link) - cP = closure(cP, i + i->c.bump); - go->to = findState(work, cP - work); - } + for (j = 0; j < nGoTos; ++j) + { + GoTo *go = &goTo[goTo[j].ch - lb]; + i = (Ins*) go->to; - s->go.nSpans = 0; - for(j = 0; j < nc;){ - State *to = (State*) goTo[rep[j]].to; - while(++j < nc && goTo[rep[j]].to == to); - span[s->go.nSpans].ub = lb + j; - span[s->go.nSpans].to = to; - s->go.nSpans++; - } + for (cP = work; i; i = (Ins*) i->c.link) + cP = closure(cP, i + i->c.bump); + + go->to = findState(work, cP - work); + } + + s->go.nSpans = 0; + + for (j = 0; j < nc;) + { + State *to = (State*) goTo[rep[j]].to; + + while (++j < nc && goTo[rep[j]].to == to) + + ; + span[s->go.nSpans].ub = lb + j; + + span[s->go.nSpans].to = to; + + s->go.nSpans++; + } + + for (j = nGoTos; j-- > 0;) + goTo[goTo[j].ch - lb].to = NULL; - for(j = nGoTos; j-- > 0;) - goTo[goTo[j].ch - lb].to = NULL; + s->go.span = new Span[s->go.nSpans]; - s->go.span = new Span[s->go.nSpans]; - memcpy((char*) s->go.span, (char*) span, s->go.nSpans*sizeof(Span)); + memcpy((char*) s->go.span, (char*) span, s->go.nSpans*sizeof(Span)); - (void) new Match(s); + (void) new Match(s); - } - delete [] work; - delete [] goTo; - delete [] span; + } + + delete [] work; + delete [] goTo; + delete [] span; } -DFA::~DFA(){ - State *s; - while((s = head)){ - head = s->next; - delete s; - } +DFA::~DFA() +{ + State *s; + + while ((s = head)) + { + head = s->next; + delete s; + } } -void DFA::addState(State **a, State *s){ - s->label = nStates++; - s->next = *a; - *a = s; - if(a == tail) - tail = &s->next; +void DFA::addState(State **a, State *s) +{ + s->label = nStates++; + s->next = *a; + *a = s; + + if (a == tail) + tail = &s->next; } -State *DFA::findState(Ins **kernel, uint kCount){ - Ins **cP, **iP, *i; - State *s; +State *DFA::findState(Ins **kernel, uint kCount) +{ + Ins **cP, **iP, *i; + State *s; - kernel[kCount] = NULL; + kernel[kCount] = NULL; - cP = kernel; - for(iP = kernel; (i = *iP); ++iP){ - if(i->i.tag == CHAR || i->i.tag == TERM){ - *cP++ = i; - } else { - unmark(i); + cP = kernel; + + for (iP = kernel; (i = *iP); ++iP) + { + if (i->i.tag == CHAR || i->i.tag == TERM) + { + *cP++ = i; + } + else + { + unmark(i); + } } - } - kCount = cP - kernel; - kernel[kCount] = NULL; - - for(s = head; s; s = s->next){ - if(s->kCount == kCount){ - for(iP = s->kernel; (i = *iP); ++iP) - if(!isMarked(i)) - goto nextState; - goto unmarkAll; - } - nextState:; - } - - s = new State; - addState(tail, s); - s->kCount = kCount; - s->kernel = new Ins*[kCount+1]; - memcpy(s->kernel, kernel, (kCount+1)*sizeof(Ins*)); - s->link = toDo; - toDo = s; + + kCount = cP - kernel; + kernel[kCount] = NULL; + + for (s = head; s; s = s->next) + { + if (s->kCount == kCount) + { + for (iP = s->kernel; (i = *iP); ++iP) + if (!isMarked(i)) + goto nextState; + + goto unmarkAll; + } + +nextState: + ; + } + + s = new State; + addState(tail, s); + s->kCount = kCount; + s->kernel = new Ins * [kCount + 1]; + memcpy(s->kernel, kernel, (kCount + 1)*sizeof(Ins*)); + s->link = toDo; + toDo = s; unmarkAll: - for(iP = kernel; (i = *iP); ++iP) - unmark(i); - return s; + for (iP = kernel; (i = *iP); ++iP) + unmark(i); + + return s; } diff --git a/dfa.h b/dfa.h index 37f4cc3b..1d3ffc73 100644 --- a/dfa.h +++ b/dfa.h @@ -10,166 +10,217 @@ extern void prtChOrHex(std::ostream&, uchar); extern void printSpan(std::ostream&, uint, uint); class DFA; + class State; -class Action { +class Action +{ + public: - State *state; + State *state; + public: - Action(State*); - virtual void emit(std::ostream&, bool&) = 0; - virtual bool isRule() const; - virtual bool isMatch() const; - virtual bool readAhead() const; + Action(State*); + virtual void emit(std::ostream&, bool&) = 0; + virtual bool isRule() const; + virtual bool isMatch() const; + virtual bool readAhead() const; }; -class Match: public Action { +class Match: public Action +{ + public: - Match(State*); - void emit(std::ostream&, bool&); - bool isMatch() const; + Match(State*); + void emit(std::ostream&, bool&); + bool isMatch() const; }; -class Enter: public Action { +class Enter: public Action +{ + public: - uint label; + uint label; + public: - Enter(State*, uint); - void emit(std::ostream&, bool&); + Enter(State*, uint); + void emit(std::ostream&, bool&); }; -class Save: public Match { +class Save: public Match +{ + public: - uint selector; + uint selector; + public: - Save(State*, uint); - void emit(std::ostream&, bool&); - bool isMatch() const; + Save(State*, uint); + void emit(std::ostream&, bool&); + bool isMatch() const; }; -class Move: public Action { +class Move: public Action +{ + public: - Move(State*); - void emit(std::ostream&, bool&); + Move(State*); + void emit(std::ostream&, bool&); }; -class Accept: public Action { +class Accept: public Action +{ + public: - uint nRules; - uint *saves; - State **rules; + uint nRules; + uint *saves; + State **rules; + public: - Accept(State*, uint, uint*, State**); - void emit(std::ostream&, bool&); + Accept(State*, uint, uint*, State**); + void emit(std::ostream&, bool&); }; -class Rule: public Action { +class Rule: public Action +{ + public: - RuleOp *rule; + RuleOp *rule; + public: - Rule(State*, RuleOp*); - void emit(std::ostream&, bool&); - bool isRule() const; + Rule(State*, RuleOp*); + void emit(std::ostream&, bool&); + bool isRule() const; }; -class Span { +class Span +{ + public: - uint ub; - State *to; + uint ub; + State *to; + public: - uint show(std::ostream&, uint); + uint show(std::ostream&, uint); }; -class Go { +class Go +{ + public: - uint nSpans; - Span *span; + uint nSpans; + Span *span; + public: - void genGoto(std::ostream&, State *from, State*, bool &readCh); - void genBase(std::ostream&, State *from, State*, bool &readCh); - void genLinear(std::ostream&, State *from, State*, bool &readCh); - void genBinary(std::ostream&, State *from, State*, bool &readCh); - void genSwitch(std::ostream&, State *from, State*, bool &readCh); - void compact(); - void unmap(Go*, State*); + void genGoto(std::ostream&, State *from, State*, bool &readCh); + void genBase(std::ostream&, State *from, State*, bool &readCh); + void genLinear(std::ostream&, State *from, State*, bool &readCh); + void genBinary(std::ostream&, State *from, State*, bool &readCh); + void genSwitch(std::ostream&, State *from, State*, bool &readCh); + void compact(); + void unmap(Go*, State*); }; -class State { -public: - uint label; - RuleOp *rule; - State *next; - State *link; - uint depth; // for finding SCCs - uint kCount; - Ins **kernel; - bool isBase:1; - Go go; - Action *action; -public: - State(); - ~State(); - void emit(std::ostream&, bool&); - friend std::ostream& operator<<(std::ostream&, const State&); - friend std::ostream& operator<<(std::ostream&, const State*); +class State +{ + +public: + uint label; + RuleOp *rule; + State *next; + State *link; + uint depth; // for finding SCCs + uint kCount; + Ins **kernel; + +bool isBase: + 1; + Go go; + Action *action; + +public: + State(); + ~State(); + void emit(std::ostream&, bool&); + friend std::ostream& operator<<(std::ostream&, const State&); + friend std::ostream& operator<<(std::ostream&, const State*); }; -class DFA { +class DFA +{ + public: - uint lbChar; - uint ubChar; - uint nStates; - State *head, **tail; - State *toDo; + uint lbChar; + uint ubChar; + uint nStates; + State *head, **tail; + State *toDo; + public: - DFA(Ins*, uint, uint, uint, Char*); - ~DFA(); - void addState(State**, State*); - State *findState(Ins**, uint); - void split(State*); + DFA(Ins*, uint, uint, uint, Char*); + ~DFA(); + void addState(State**, State*); + State *findState(Ins**, uint); + void split(State*); - void findSCCs(); - void emit(std::ostream&); + void findSCCs(); + void emit(std::ostream&); - friend std::ostream& operator<<(std::ostream&, const DFA&); - friend std::ostream& operator<<(std::ostream&, const DFA*); + friend std::ostream& operator<<(std::ostream&, const DFA&); + friend std::ostream& operator<<(std::ostream&, const DFA*); }; -inline Action::Action(State *s) : state(s) { - s->action = this; +inline Action::Action(State *s) : state(s) +{ + s->action = this; } inline bool Action::isRule() const - { return false; } +{ + return false; +} inline bool Action::isMatch() const - { return false; } +{ + return false; +} inline bool Action::readAhead() const - { return !isMatch() || (state && state->next && state->next->action && !state->next->action->isRule()); } +{ + return !isMatch() || (state && state->next && state->next->action && !state->next->action->isRule()); +} inline Match::Match(State *s) : Action(s) - { } +{ } inline bool Match::isMatch() const - { return true; } +{ + return true; +} inline Enter::Enter(State *s, uint l) : Action(s), label(l) - { } +{ } inline Save::Save(State *s, uint i) : Match(s), selector(i) - { } +{ } inline bool Save::isMatch() const - { return false; } +{ + return false; +} inline bool Rule::isRule() const - { return true; } +{ + return true; +} inline std::ostream& operator<<(std::ostream &o, const State *s) - { return o << *s; } +{ + return o << *s; +} inline std::ostream& operator<<(std::ostream &o, const DFA *dfa) - { return o << *dfa; } +{ + return o << *dfa; +} #endif diff --git a/ins.h b/ins.h index 0781ab2f..4fec59b9 100644 --- a/ins.h +++ b/ins.h @@ -14,28 +14,39 @@ const uint TERM = 3; const uint CTXT = 4; union Ins { - struct { - byte tag; - byte marked; - void *link; - } i; - struct { - ushort value; - ushort bump; - void *link; - } c; + + struct + { + byte tag; + byte marked; + void *link; + } + + i; + + struct + { + ushort value; + ushort bump; + void *link; + } + + c; }; -inline bool isMarked(Ins *i){ - return i->i.marked != 0; +inline bool isMarked(Ins *i) +{ + return i->i.marked != 0; } -inline void mark(Ins *i){ - i->i.marked = true; +inline void mark(Ins *i) +{ + i->i.marked = true; } -inline void unmark(Ins *i){ - i->i.marked = false; +inline void unmark(Ins *i) +{ + i->i.marked = false; } #endif diff --git a/main.cc b/main.cc index 39fa3247..2e35d67b 100644 --- a/main.cc +++ b/main.cc @@ -24,36 +24,39 @@ using namespace std; static char *opt_arg = NULL; static int opt_ind = 1; -static const mbo_opt_struct OPTIONS[] = { - {'?', 0, "help"}, - {'b', 0, "bit-vectors"}, - {'e', 0, "ecb"}, - {'h', 0, "help"}, - {'s', 0, "nested-ifs"}, - {'o', 1, "output"}, - {'v', 0, "version"} -}; +static const mbo_opt_struct OPTIONS[] = + { + {'?', 0, "help" + }, + + {'b', 0, "bit-vectors"}, + {'e', 0, "ecb"}, + {'h', 0, "help"}, + {'s', 0, "nested-ifs"}, + {'o', 1, "output"}, + {'v', 0, "version"} + }; static void usage() { cerr << "usage: re2c [-esbvh] file\n" - "\n" - "-? -h --help Display this info.\n" - "\n" - "-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n" - " coax better code out of the compiler. Most useful for\n" - " specifications with more than a few keywords (e.g. for\n" - " most programming languages).\n" - "\n" - "-e --ecb Cross-compile from an ASCII platform to\n" - " an EBCDIC one.\n" - "\n" - "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n" - " need this assist to generate better code.\n" - "\n" - "-o --output=output Specify the output file instead of stdout\n" - "\n" - "-v --version Show version information.\n"; + "\n" + "-? -h --help Display this info.\n" + "\n" + "-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n" + " coax better code out of the compiler. Most useful for\n" + " specifications with more than a few keywords (e.g. for\n" + " most programming languages).\n" + "\n" + "-e --ecb Cross-compile from an ASCII platform to\n" + " an EBCDIC one.\n" + "\n" + "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n" + " need this assist to generate better code.\n" + "\n" + "-o --output=output Specify the output file instead of stdout\n" + "\n" + "-v --version Show version information.\n"; } int main(int argc, char *argv[]) @@ -61,35 +64,46 @@ int main(int argc, char *argv[]) int c; fileName = NULL; - if (argc == 1) { + if (argc == 1) + { usage(); return 2; } - while ((c = mbo_getopt(argc, argv, OPTIONS, &opt_arg, &opt_ind, 0))!=-1) { - switch (c) { + while ((c = mbo_getopt(argc, argv, OPTIONS, &opt_arg, &opt_ind, 0)) != -1) + { + switch (c) + { + case 'b': - sFlag = true; - bFlag = true; - break; + sFlag = true; + bFlag = true; + break; + case 'e': - xlat = asc2ebc; - talx = ebc2asc; - break; + xlat = asc2ebc; + talx = ebc2asc; + break; + case 's': - sFlag = true; - break; + sFlag = true; + break; + case 'o': - outputFileName = opt_arg; - break; + outputFileName = opt_arg; + break; + case 'v': - cerr << "re2c " << PACKAGE_VERSION << "\n"; - return 2; + cerr << "re2c " << PACKAGE_VERSION << "\n"; + return 2; + case 'h': + case '?': + default: - usage(); - return 2; + usage(); + return 2; } } @@ -105,7 +119,9 @@ int main(int argc, char *argv[]) // set up the input stream istream* input = 0; + ifstream inputFile; + if (fileName[0] == '-' && fileName[1] == '\0') { fileName = ""; @@ -114,17 +130,21 @@ int main(int argc, char *argv[]) else { inputFile.open(fileName); + if (!inputFile) { cerr << "can't open " << fileName << "\n"; return 1; } + input = &inputFile; } // set up the output stream ostream* output = 0; + ofstream outputFile; + if (outputFileName == 0 || (fileName[0] == '-' && fileName[1] == '\0')) { outputFileName = ""; @@ -133,11 +153,13 @@ int main(int argc, char *argv[]) else { outputFile.open(outputFileName); + if (!outputFile) { cerr << "can't open " << outputFileName << "\n"; return 1; } + output = &outputFile; } diff --git a/mbo_getopt.cc b/mbo_getopt.cc index 89f5b291..870b9fff 100755 --- a/mbo_getopt.cc +++ b/mbo_getopt.cc @@ -1,5 +1,5 @@ /* - Author: Marcus Boerger + Author: Marcus Boerger */ /* $Id$ */ @@ -18,24 +18,30 @@ static int mbo_opt_error(int argc, char * const *argv, int oint, int optchr, int { if (show_err) { - fprintf(stderr, "Error in argument %d, char %d: ", oint, optchr+1); - switch(err) + fprintf(stderr, "Error in argument %d, char %d: ", oint, optchr + 1); + + switch (err) { - case OPTERRCOLON: + + case OPTERRCOLON: fprintf(stderr, ": in flags\n"); break; - case OPTERRNF: + + case OPTERRNF: fprintf(stderr, "option not found %c\n", argv[oint][optchr]); break; - case OPTERRARG: + + case OPTERRARG: fprintf(stderr, "no argument for option %c\n", argv[oint][optchr]); break; - default: + + default: fprintf(stderr, "unknown\n"); break; } } - return('?'); + + return ('?'); } int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct opts[], char **optarg, int *optind, int show_err) @@ -46,97 +52,145 @@ int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct opts[], char ** int opts_idx = -1; - if (*optind >= argc) { - return(EOF); + if (*optind >= argc) + { + return (EOF); } - if (!dash) { - if ((argv[*optind][0] != '-')) { - return(EOF); - } else { + + if (!dash) + { + if ((argv[*optind][0] != '-')) + { + return (EOF); + } + else + { if (!argv[*optind][1]) { /* * use to specify stdin. Need to let pgm process this and * the following args - */ - return(EOF); + */ + return (EOF); } } } - if ((argv[*optind][0] == '-') && (argv[*optind][1] == '-')) { + + if ((argv[*optind][0] == '-') && (argv[*optind][1] == '-')) + { /* '--' indicates end of args if not followed by a known long option name */ - while (1) { + + while (1) + { opts_idx++; - if (opts[opts_idx].opt_char == '-') { + + if (opts[opts_idx].opt_char == '-') + { (*optind)++; - return(EOF); - } else if (opts[opts_idx].opt_name && !strcmp(&argv[*optind][2], opts[opts_idx].opt_name)) { + return (EOF); + } + else if (opts[opts_idx].opt_name && !strcmp(&argv[*optind][2], opts[opts_idx].opt_name)) + { break; } } + optchr = 0; dash = 1; arg_start = 2 + strlen(opts[opts_idx].opt_name); } - if (!dash) { + + if (!dash) + { dash = 1; optchr = 1; } /* Check if the guy tries to do a -: kind of flag */ - if (argv[*optind][optchr] == ':') { + if (argv[*optind][optchr] == ':') + { dash = 0; (*optind)++; - return (mbo_opt_error(argc, argv, *optind-1, optchr, OPTERRCOLON, show_err)); + return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRCOLON, show_err)); } - if (opts_idx < 0) { - while (1) { + + if (opts_idx < 0) + { + while (1) + { opts_idx++; - if (opts[opts_idx].opt_char == '-') { + + if (opts[opts_idx].opt_char == '-') + { int errind = *optind; int errchr = optchr; - - if (!argv[*optind][optchr+1]) { + + if (!argv[*optind][optchr + 1]) + { dash = 0; (*optind)++; - } else { + } + else + { optchr++; } - return(mbo_opt_error(argc, argv, errind, errchr, OPTERRNF, show_err)); - } else if (argv[*optind][optchr] == opts[opts_idx].opt_char) { + + return (mbo_opt_error(argc, argv, errind, errchr, OPTERRNF, show_err)); + } + else if (argv[*optind][optchr] == opts[opts_idx].opt_char) + { break; } } } - if (opts[opts_idx].need_param) { + + if (opts[opts_idx].need_param) + { /* Check for cases where the value of the argument is in the form - or in the form - */ dash = 0; - if(!argv[*optind][arg_start]) { + + if (!argv[*optind][arg_start]) + { (*optind)++; - if (*optind == argc) { - return(mbo_opt_error(argc, argv, *optind-1, optchr, OPTERRARG, show_err)); + + if (*optind == argc) + { + return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRARG, show_err)); } + *optarg = argv[(*optind)++]; - } else { + } + else + { *optarg = &argv[*optind][arg_start]; (*optind)++; } + return opts[opts_idx].opt_char; - } else { - if (arg_start == 2) { - if (!argv[*optind][optchr+1]) + } + else + { + if (arg_start == 2) + { + if (!argv[*optind][optchr + 1]) { dash = 0; (*optind)++; - } else { + } + else + { optchr++; } - } else { + } + else + { (*optind)++; } + return opts[opts_idx].opt_char; } + assert(0); - return(0); /* never reached */ + return (0); /* never reached */ } diff --git a/mbo_getopt.h b/mbo_getopt.h index 3a8dc2f4..acbc15ae 100755 --- a/mbo_getopt.h +++ b/mbo_getopt.h @@ -1,5 +1,5 @@ /* - Author: Marcus Boerger + Author: Marcus Boerger */ /* $Id$ */ @@ -7,10 +7,14 @@ /* Define structure for one recognized option (both single char and long name). * If short_open is '-' this is the last option. */ -typedef struct _mbo_opt_struct { + +typedef struct _mbo_opt_struct +{ const char opt_char; - const int need_param; + const int need_param; const char * opt_name; -} mbo_opt_struct; +} + +mbo_opt_struct; int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct opts[], char **optarg, int *optind, int show_err); diff --git a/parser.h b/parser.h index f7b3fe60..82238960 100644 --- a/parser.h +++ b/parser.h @@ -6,15 +6,18 @@ #include "re.h" #include -class Symbol { +class Symbol +{ + public: - static Symbol *first; - Symbol *next; - Str name; - RegExp *re; + static Symbol *first; + Symbol *next; + Str name; + RegExp *re; + public: - Symbol(const SubStr&); - static Symbol *find(const SubStr&); + Symbol(const SubStr&); + static Symbol *find(const SubStr&); }; void line_source(unsigned int, std::ostream&); diff --git a/parser.tab.h b/parser.tab.h index e443f004..f07debe7 100644 --- a/parser.tab.h +++ b/parser.tab.h @@ -3,10 +3,10 @@ #ifndef YYSTYPE typedef union { - Symbol *symbol; - RegExp *regexp; - Token *token; - char op; + Symbol *symbol; + RegExp *regexp; + Token *token; + char op; } yystype; # define YYSTYPE yystype # define YYSTYPE_IS_TRIVIAL 1 diff --git a/re.h b/re.h index 2319bb75..c19942d4 100644 --- a/re.h +++ b/re.h @@ -6,195 +6,285 @@ #include "token.h" #include "ins.h" -typedef struct extop { - char op; - int minsize; - int maxsize; -} ExtOp; - -struct CharPtn { - uint card; - CharPtn *fix; - CharPtn *nxt; +typedef struct extop +{ + char op; + int minsize; + int maxsize; +} + +ExtOp; + +struct CharPtn +{ + uint card; + CharPtn *fix; + CharPtn *nxt; }; -struct CharSet { - CharPtn *fix; - CharPtn *freeHead, **freeTail; - CharPtn *rep[nChars]; - CharPtn ptn[nChars]; +struct CharSet +{ + CharPtn *fix; + CharPtn *freeHead, **freeTail; + CharPtn *rep[nChars]; + CharPtn ptn[nChars]; }; -class Range { +class Range +{ + public: - Range *next; - uint lb, ub; // [lb,ub) + Range *next; + uint lb, ub; // [lb,ub) + public: - Range(uint l, uint u) : next(NULL), lb(l), ub(u) + Range(uint l, uint u) : next(NULL), lb(l), ub(u) { } - Range(Range &r) : next(NULL), lb(r.lb), ub(r.ub) + + Range(Range &r) : next(NULL), lb(r.lb), ub(r.ub) { } - friend std::ostream& operator<<(std::ostream&, const Range&); - friend std::ostream& operator<<(std::ostream&, const Range*); + + friend std::ostream& operator<<(std::ostream&, const Range&); + friend std::ostream& operator<<(std::ostream&, const Range*); }; -inline std::ostream& operator<<(std::ostream &o, const Range *r){ - return r? o << *r : o; +inline std::ostream& operator<<(std::ostream &o, const Range *r) +{ + return r ? o << *r : o; } -class RegExp { +class RegExp +{ + public: - uint size; + uint size; + public: - virtual char *typeOf() = 0; - RegExp *isA(char *t) - { return typeOf() == t? this : NULL; } - virtual void split(CharSet&) = 0; - virtual void calcSize(Char*) = 0; - virtual uint fixedLength(); - virtual void compile(Char*, Ins*) = 0; - virtual void display(std::ostream&) const = 0; - friend std::ostream& operator<<(std::ostream&, const RegExp&); - friend std::ostream& operator<<(std::ostream&, const RegExp*); + virtual char *typeOf() = 0; + RegExp *isA(char *t) + { + return typeOf() == t ? this : NULL; + } + + virtual void split(CharSet&) = 0; + virtual void calcSize(Char*) = 0; + virtual uint fixedLength(); + virtual void compile(Char*, Ins*) = 0; + virtual void display(std::ostream&) const = 0; + friend std::ostream& operator<<(std::ostream&, const RegExp&); + friend std::ostream& operator<<(std::ostream&, const RegExp*); }; -inline std::ostream& operator<<(std::ostream &o, const RegExp &re){ - re.display(o); - return o; +inline std::ostream& operator<<(std::ostream &o, const RegExp &re) +{ + re.display(o); + return o; } -inline std::ostream& operator<<(std::ostream &o, const RegExp *re){ - return o << *re; +inline std::ostream& operator<<(std::ostream &o, const RegExp *re) +{ + return o << *re; } -class NullOp: public RegExp { +class NullOp: public RegExp +{ + public: - static char *type; + static char *type; + public: - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(std::ostream &o) const { - o << "_"; - } + char *typeOf() + { + return type; + } + + void split(CharSet&); + void calcSize(Char*); + uint fixedLength(); + void compile(Char*, Ins*); + void display(std::ostream &o) const + { + o << "_"; + } }; -class MatchOp: public RegExp { +class MatchOp: public RegExp +{ + public: - static char *type; - Range *match; + static char *type; + Range *match; + public: - MatchOp(Range *m) : match(m) + MatchOp(Range *m) : match(m) { } - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(std::ostream&) const; + + char *typeOf() + { + return type; + } + + void split(CharSet&); + void calcSize(Char*); + uint fixedLength(); + void compile(Char*, Ins*); + void display(std::ostream&) const; }; -class RuleOp: public RegExp { +class RuleOp: public RegExp +{ + private: - RegExp *exp; -public: - RegExp *ctx; - static char *type; - Ins *ins; - uint accept; - Token *code; - uint line; -public: - RuleOp(RegExp*, RegExp*, Token*, uint); - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - void compile(Char*, Ins*); - void display(std::ostream &o) const { - o << exp << "/" << ctx << ";"; - } + RegExp *exp; + +public: + RegExp *ctx; + static char *type; + Ins *ins; + uint accept; + Token *code; + uint line; + +public: + RuleOp(RegExp*, RegExp*, Token*, uint); + char *typeOf() + { + return type; + } + + void split(CharSet&); + void calcSize(Char*); + void compile(Char*, Ins*); + void display(std::ostream &o) const + { + o << exp << "/" << ctx << ";"; + } }; -class AltOp: public RegExp { +class AltOp: public RegExp +{ + private: - RegExp *exp1, *exp2; -public: - static char *type; -public: - AltOp(RegExp *e1, RegExp *e2) - { exp1 = e1; exp2 = e2; } - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(std::ostream &o) const { - o << exp1 << "|" << exp2; - } - friend RegExp *mkAlt(RegExp*, RegExp*); + RegExp *exp1, *exp2; + +public: + static char *type; + +public: + AltOp(RegExp *e1, RegExp *e2) + { + exp1 = e1; + exp2 = e2; + } + + char *typeOf() + { + return type; + } + + void split(CharSet&); + void calcSize(Char*); + uint fixedLength(); + void compile(Char*, Ins*); + void display(std::ostream &o) const + { + o << exp1 << "|" << exp2; + } + + friend RegExp *mkAlt(RegExp*, RegExp*); }; -class CatOp: public RegExp { +class CatOp: public RegExp +{ + private: - RegExp *exp1, *exp2; -public: - static char *type; -public: - CatOp(RegExp *e1, RegExp *e2) - { exp1 = e1; exp2 = e2; } - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(std::ostream &o) const { - o << exp1 << exp2; - } + RegExp *exp1, *exp2; + +public: + static char *type; + +public: + CatOp(RegExp *e1, RegExp *e2) + { + exp1 = e1; + exp2 = e2; + } + + char *typeOf() + { + return type; + } + + void split(CharSet&); + void calcSize(Char*); + uint fixedLength(); + void compile(Char*, Ins*); + void display(std::ostream &o) const + { + o << exp1 << exp2; + } }; -class CloseOp: public RegExp { +class CloseOp: public RegExp +{ + private: - RegExp *exp; -public: - static char *type; -public: - CloseOp(RegExp *e) - { exp = e; } - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - void compile(Char*, Ins*); - void display(std::ostream &o) const { - o << exp << "+"; - } + RegExp *exp; + +public: + static char *type; + +public: + CloseOp(RegExp *e) + { + exp = e; + } + + char *typeOf() + { + return type; + } + + void split(CharSet&); + void calcSize(Char*); + void compile(Char*, Ins*); + void display(std::ostream &o) const + { + o << exp << "+"; + } }; -class CloseVOp: public RegExp { +class CloseVOp: public RegExp +{ + private: - RegExp *exp; - int min; - int max; -public: - static char *type; -public: - CloseVOp(RegExp *e, int lb, int ub) - { exp = e; min = lb; max = ub; } - char *typeOf() - { return type; } - void split(CharSet&); - void calcSize(Char*); - void compile(Char*, Ins*); - void display(std::ostream &o) const { - o << exp << "+"; - } + RegExp *exp; + int min; + int max; + +public: + static char *type; + +public: + CloseVOp(RegExp *e, int lb, int ub) + { + exp = e; + min = lb; + max = ub; + } + + char *typeOf() + { + return type; + } + + void split(CharSet&); + void calcSize(Char*); + void compile(Char*, Ins*); + void display(std::ostream &o) const + { + o << exp << "+"; + } }; extern void genCode(std::ostream&, RegExp*); @@ -202,5 +292,5 @@ extern RegExp *mkDiff(RegExp*, RegExp*); extern RegExp *strToRE(SubStr); extern RegExp *ranToRE(SubStr); extern RegExp *strToCaseInsensitiveRE(SubStr s); - + #endif diff --git a/scanner.h b/scanner.h index 3a43a656..55a37a18 100644 --- a/scanner.h +++ b/scanner.h @@ -5,30 +5,36 @@ #include #include "token.h" -class Scanner { - private: - std::istream& in; - char *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; - uint tchar, tline, cline; - private: - char *fill(char*); - Scanner(const Scanner&); //unimplemented - Scanner& operator=(const Scanner&); //unimplemented - public: - Scanner(std::istream&); - int echo(std::ostream&); - int scan(); - void fatal(char*); - SubStr token(); - uint line(); +class Scanner +{ + +private: + std::istream& in; + char *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; + uint tchar, tline, cline; + +private: + char *fill(char*); + Scanner(const Scanner&); //unimplemented + Scanner& operator=(const Scanner&); //unimplemented + +public: + Scanner(std::istream&); + int echo(std::ostream&); + int scan(); + void fatal(char*); + SubStr token(); + uint line(); }; -inline SubStr Scanner::token(){ - return SubStr(tok, cur - tok); +inline SubStr Scanner::token() +{ + return SubStr(tok, cur - tok); } -inline uint Scanner::line(){ - return cline; +inline uint Scanner::line() +{ + return cline; } #endif diff --git a/substr.cc b/substr.cc index 20adfcd0..b7f8f72f 100644 --- a/substr.cc +++ b/substr.cc @@ -3,35 +3,42 @@ #include "substr.h" #include "globals.h" -void SubStr::out(std::ostream& o) const { - o.write(str, len); - for (size_t i = 0; i < (size_t)len; ++i) - { - if (str[i] == '\n') - ++oline; - } +void SubStr::out(std::ostream& o) const +{ + o.write(str, len); + + for (size_t i = 0; i < (size_t)len; ++i) + { + if (str[i] == '\n') + ++oline; + } } -bool operator==(const SubStr &s1, const SubStr &s2){ - return (bool) (s1.len == s2.len && memcmp(s1.str, s2.str, s1.len) == 0); +bool operator==(const SubStr &s1, const SubStr &s2) +{ + return (bool) (s1.len == s2.len && memcmp(s1.str, s2.str, s1.len) == 0); } -Str::Str(const SubStr& s) : SubStr(new char[s.len], s.len) { - memcpy(str, s.str, s.len); +Str::Str(const SubStr& s) : SubStr(new char[s.len], s.len) +{ + memcpy(str, s.str, s.len); } -Str::Str(Str& s) : SubStr(s.str, s.len) { - s.str = NULL; - s.len = 0; +Str::Str(Str& s) : SubStr(s.str, s.len) +{ + s.str = NULL; + s.len = 0; } -Str::Str() : SubStr((char*) NULL, 0) { - ; +Str::Str() : SubStr((char*) NULL, 0) +{ + ; } -Str::~Str() { - delete str; - str = (char*)-1; - len = (uint)-1; +Str::~Str() +{ + delete str; + str = (char*) - 1; + len = (uint) - 1; } diff --git a/substr.h b/substr.h index cd0afb89..476d39a8 100644 --- a/substr.h +++ b/substr.h @@ -5,42 +5,52 @@ #include #include "basics.h" -class SubStr { +class SubStr +{ + public: - char *str; - uint len; + char *str; + uint len; + public: - friend bool operator==(const SubStr &, const SubStr &); - SubStr(uchar*, uint); - SubStr(char*, uint); - SubStr(const SubStr&); - void out(std::ostream&) const; + friend bool operator==(const SubStr &, const SubStr &); + SubStr(uchar*, uint); + SubStr(char*, uint); + SubStr(const SubStr&); + void out(std::ostream&) const; }; -class Str: public SubStr { +class Str: public SubStr +{ + public: - Str(const SubStr&); - Str(Str&); - Str(); - ~Str(); + Str(const SubStr&); + Str(Str&); + Str(); + ~Str(); }; -inline std::ostream& operator<<(std::ostream& o, const SubStr &s){ - s.out(o); - return o; +inline std::ostream& operator<<(std::ostream& o, const SubStr &s) +{ + s.out(o); + return o; } -inline std::ostream& operator<<(std::ostream& o, const SubStr* s){ - return o << *s; +inline std::ostream& operator<<(std::ostream& o, const SubStr* s) +{ + return o << *s; } inline SubStr::SubStr(uchar *s, uint l) - : str((char*) s), len(l) { } + : str((char*) s), len(l) +{ } inline SubStr::SubStr(char *s, uint l) - : str(s), len(l) { } + : str(s), len(l) +{ } inline SubStr::SubStr(const SubStr &s) - : str(s.str), len(s.len) { } + : str(s.str), len(s.len) +{ } #endif diff --git a/token.h b/token.h index 3cbc5956..debb1714 100644 --- a/token.h +++ b/token.h @@ -4,16 +4,20 @@ #include "substr.h" -class Token { - public: - Str text; - uint line; - public: - Token(SubStr, uint); +class Token +{ + +public: + Str text; + uint line; + +public: + Token(SubStr, uint); }; -inline Token::Token(SubStr t, uint l) : text(t), line(l) { - ; +inline Token::Token(SubStr t, uint l) : text(t), line(l) +{ + ; } #endif diff --git a/translate.cc b/translate.cc index 9c0c3aa4..9331a485 100644 --- a/translate.cc +++ b/translate.cc @@ -1,62 +1,65 @@ /* $Id$ */ #include "globals.h" -uchar asc2asc[256] = { -0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, -0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f, -0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f, -0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f, -0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f, -0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f, -0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f, -0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f, -0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f, -0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f, -0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf, -0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf, -0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf, -0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, -0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef, -0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff -}; +uchar asc2asc[256] = + { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff + }; uchar *xlat = asc2asc; uchar *talx = asc2asc; -uchar asc2ebc[256] = { /* Based on ISO 8859/1 and Code Page 37 */ -0x00,0x01,0x02,0x03,0x37,0x2d,0x2e,0x2f,0x16,0x05,0x25,0x0b,0x0c,0x0d,0x0e,0x0f, -0x10,0x11,0x12,0x13,0x3c,0x3d,0x32,0x26,0x18,0x19,0x3f,0x27,0x1c,0x1d,0x1e,0x1f, -0x40,0x5a,0x7f,0x7b,0x5b,0x6c,0x50,0x7d,0x4d,0x5d,0x5c,0x4e,0x6b,0x60,0x4b,0x61, -0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0x7a,0x5e,0x4c,0x7e,0x6e,0x6f, -0x7c,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, -0xd7,0xd8,0xd9,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xba,0xe0,0xbb,0xb0,0x6d, -0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, -0x97,0x98,0x99,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xc0,0x4f,0xd0,0xa1,0x07, -0x20,0x21,0x22,0x23,0x24,0x15,0x06,0x17,0x28,0x29,0x2a,0x2b,0x2c,0x09,0x0a,0x1b, -0x30,0x31,0x1a,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3a,0x3b,0x04,0x14,0x3e,0xff, -0x41,0xaa,0x4a,0xb1,0x9f,0xb2,0x6a,0xb5,0xbd,0xb4,0x9a,0x8a,0x5f,0xca,0xaf,0xbc, -0x90,0x8f,0xea,0xfa,0xbe,0xa0,0xb6,0xb3,0x9d,0xda,0x9b,0x8b,0xb7,0xb8,0xb9,0xab, -0x64,0x65,0x62,0x66,0x63,0x67,0x9e,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77, -0xac,0x69,0xed,0xee,0xeb,0xef,0xec,0xbf,0x80,0xfd,0xfe,0xfb,0xfc,0xad,0x8e,0x59, -0x44,0x45,0x42,0x46,0x43,0x47,0x9c,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57, -0x8c,0x49,0xcd,0xce,0xcb,0xcf,0xcc,0xe1,0x70,0xdd,0xde,0xdb,0xdc,0x8d,0xae,0xdf -}; +uchar asc2ebc[256] = + { /* Based on ISO 8859/1 and Code Page 37 */ + 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, + 0x40, 0x5a, 0x7f, 0x7b, 0x5b, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, + 0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, + 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xba, 0xe0, 0xbb, 0xb0, 0x6d, + 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0xa1, 0x07, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b, + 0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3a, 0x3b, 0x04, 0x14, 0x3e, 0xff, + 0x41, 0xaa, 0x4a, 0xb1, 0x9f, 0xb2, 0x6a, 0xb5, 0xbd, 0xb4, 0x9a, 0x8a, 0x5f, 0xca, 0xaf, 0xbc, + 0x90, 0x8f, 0xea, 0xfa, 0xbe, 0xa0, 0xb6, 0xb3, 0x9d, 0xda, 0x9b, 0x8b, 0xb7, 0xb8, 0xb9, 0xab, + 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9e, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77, + 0xac, 0x69, 0xed, 0xee, 0xeb, 0xef, 0xec, 0xbf, 0x80, 0xfd, 0xfe, 0xfb, 0xfc, 0xad, 0x8e, 0x59, + 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9c, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57, + 0x8c, 0x49, 0xcd, 0xce, 0xcb, 0xcf, 0xcc, 0xe1, 0x70, 0xdd, 0xde, 0xdb, 0xdc, 0x8d, 0xae, 0xdf + }; -uchar ebc2asc[256] = { /* Based on ISO 8859/1 and Code Page 37 */ -0x00,0x01,0x02,0x03,0x9c,0x09,0x86,0x7f,0x97,0x8d,0x8e,0x0b,0x0c,0x0d,0x0e,0x0f, -0x10,0x11,0x12,0x13,0x9d,0x85,0x08,0x87,0x18,0x19,0x92,0x8f,0x1c,0x1d,0x1e,0x1f, -0x80,0x81,0x82,0x83,0x84,0x0a,0x17,0x1b,0x88,0x89,0x8a,0x8b,0x8c,0x05,0x06,0x07, -0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9a,0x9b,0x14,0x15,0x9e,0x1a, -0x20,0xa0,0xe2,0xe4,0xe0,0xe1,0xe3,0xe5,0xe7,0xf1,0xa2,0x2e,0x3c,0x28,0x2b,0x7c, -0x26,0xe9,0xea,0xeb,0xe8,0xed,0xee,0xef,0xec,0xdf,0x21,0x24,0x2a,0x29,0x3b,0xac, -0x2d,0x2f,0xc2,0xc4,0xc0,0xc1,0xc3,0xc5,0xc7,0xd1,0xa6,0x2c,0x25,0x5f,0x3e,0x3f, -0xf8,0xc9,0xca,0xcb,0xc8,0xcd,0xce,0xcf,0xcc,0x60,0x3a,0x23,0x40,0x27,0x3d,0x22, -0xd8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xab,0xbb,0xf0,0xfd,0xde,0xb1, -0xb0,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0xaa,0xba,0xe6,0xb8,0xc6,0xa4, -0xb5,0x7e,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0xa1,0xbf,0xd0,0xdd,0xfe,0xae, -0x5e,0xa3,0xa5,0xb7,0xa9,0xa7,0xb6,0xbc,0xbd,0xbe,0x5b,0x5d,0xaf,0xa8,0xb4,0xd7, -0x7b,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xad,0xf4,0xf6,0xf2,0xf3,0xf5, -0x7d,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50,0x51,0x52,0xb9,0xfb,0xfc,0xf9,0xfa,0xff, -0x5c,0xf7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0xb2,0xd4,0xd6,0xd2,0xd3,0xd5, -0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xb3,0xdb,0xdc,0xd9,0xda,0x9f -}; +uchar ebc2asc[256] = + { /* Based on ISO 8859/1 and Code Page 37 */ + 0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x9d, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x0a, 0x17, 0x1b, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, + 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a, + 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5, 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, + 0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0xac, + 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, + 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, 0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, + 0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xde, 0xb1, + 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4, + 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xfe, 0xae, + 0x5e, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0x5b, 0x5d, 0xaf, 0xa8, 0xb4, 0xd7, + 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5, + 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, + 0x5c, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f + };