// A common trick it is to split charset into disjoint character ranges
// and choose a representative of each range (we choose lower bound).
// The set of all representatives is the new (compacted) charset.
- // (Don't forget to include zero and exclude upper bound.)
+ // Don't forget to include zero and upper bound, even if they
+ // do not explicitely apper in ranges.
+ std::set<uint32_t> bounds;
+ re->split(bounds);
+ bounds.insert(0);
+ bounds.insert(cunits);
charset_t cs;
- re->split(cs);
- cs.insert(0);
- cs.erase(cunits);
+ for (std::set<uint32_t>::const_iterator i = bounds.begin(); i != bounds.end(); ++i)
+ {
+ cs.push_back(*i);
+ }
re->calcSize(cs);
void MatchOp::calcSize (const charset_t & cs)
{
size = 1;
+ uint32_t k = 0;
for (Range * r = match; r; r = r->next ())
{
- size += static_cast<uint32_t> (std::distance(
- cs.find(r->lower()),
- cs.find(r->upper())));
+ for (; cs[k] != r->lower(); ++k);
+ for (; cs[k] != r->upper(); ++k)
+ {
+ ++size;
+ }
}
}
i->i.link = &i[size];
Ins *j = &i[1];
uint32_t bump = size;
+ uint32_t k = 0;
for (Range *r = match; r; r = r->next ())
{
- charset_t::const_iterator l = cs.find(r->lower());
- charset_t::const_iterator u = cs.find(r->upper());
- for (; l != u; ++l)
+ for (; cs[k] != r->lower(); ++k);
+ for (; cs[k] != r->upper(); ++k)
{
- j->c.value = *l;
+ j->c.value = k;
j->c.bump = --bump;
j++;
}
namespace re2c {
-void AltOp::split (charset_t & cs)
+void AltOp::split (std::set<uint32_t> & cs)
{
exp1->split (cs);
exp2->split (cs);
}
-void CatOp::split (charset_t & cs)
+void CatOp::split (std::set<uint32_t> & cs)
{
exp1->split (cs);
exp2->split (cs);
}
-void CloseOp::split (charset_t & cs)
+void CloseOp::split (std::set<uint32_t> & cs)
{
exp->split (cs);
}
-void MatchOp::split (charset_t & cs)
+void MatchOp::split (std::set<uint32_t> & cs)
{
for (Range *r = match; r; r = r->next ())
{
}
}
-void NullOp::split (charset_t &) {}
+void NullOp::split (std::set<uint32_t> &) {}
-void RuleOp::split (charset_t & cs)
+void RuleOp::split (std::set<uint32_t> & cs)
{
exp->split (cs);
ctx->split (cs);
return cP;
}
-struct GoTo
-{
- uint32_t ch;
- void *to;
-};
-
DFA::DFA
( const std::string & c
, uint32_t l
}
Ins **work = new Ins * [ni + 1];
- uint32_t nc = ub - lb;
- GoTo *goTo = new GoTo[nc];
- Span *span = allocate<Span> (nc);
- memset((char*) goTo, 0, nc*sizeof(GoTo));
findState(work, closure(work, &ins[0]));
+ const size_t nc = cs.size() - 1; // (n + 1) bounds for n ranges
+ void **goTo = new void*[nc];
+ Span *span = allocate<Span> (nc);
+
while (toDo)
{
State *s = toDo;
toDo = s->link;
- uint32_t nGoTos = 0;
+ std::vector<uint32_t> preserved_order;
- s->rule = NULL;
+ memset(goTo, 0, nc * sizeof(void*));
+ s->rule = NULL;
for (uint32_t k = 0; k < s->kCount; ++k)
{
Ins * i = s->kernel[k];
{
for (Ins *j = i + 1; j < (Ins*) i->i.link; ++j)
{
- if (!(j->c.link = goTo[j->c.value - lb].to))
- goTo[nGoTos++].ch = j->c.value;
-
- goTo[j->c.value - lb].to = j;
+ if (!(j->c.link = goTo[j->c.value]))
+ {
+ preserved_order.push_back(j->c.value);
+ }
+ goTo[j->c.value] = j;
}
}
else if (i->i.tag == TERM)
}
}
- for (uint32_t j = 0; j < nGoTos; ++j)
+ for (uint32_t j = 0; j < preserved_order.size(); ++j)
{
- GoTo *go = &goTo[goTo[j].ch - lb];
- Ins * i = (Ins*) go->to;
-
- Ins ** cP = work;
- for (; i; i = (Ins*) i->c.link)
+ Ins **cP = work;
+ for (Ins *i = (Ins*)goTo[preserved_order[j]]; i; i = (Ins*) i->c.link)
+ {
cP = closure(cP, i + i->c.bump);
-
- go->to = findState(work, cP);
+ }
+ goTo[preserved_order[j]] = findState(work, cP);
}
s->go.nSpans = 0;
-
- for (charset_t::const_iterator j = cs.begin(); j != cs.end();)
+ for (uint32_t j = 0; j < nc;)
{
- State *to = (State*) goTo[*j].to;
- while (++j != cs.end() && goTo[*j].to == to) ;
- span[s->go.nSpans].ub = lb + (j == cs.end() ? nc : *j);
+ State *to = (State*) goTo[j];
+ while (++j < nc && goTo[j] == to) ;
+ span[s->go.nSpans].ub = cs[j];
span[s->go.nSpans].to = to;
s->go.nSpans++;
}
-
- for (uint32_t j = nGoTos; j-- > 0;)
- goTo[goTo[j].ch - lb].to = NULL;
-
s->go.span = allocate<Span> (s->go.nSpans);
-
memcpy((char*) s->go.span, (char*) span, s->go.nSpans*sizeof(Span));
}
#include "src/util/c99_stdint.h"
#include <iosfwd>
#include <set>
+#include <vector>
#include "src/util/free_list.h"
#include "src/util/forbid_copy.h"
union Ins;
-typedef std::set<uint32_t> charset_t;
+typedef std::vector<uint32_t> charset_t;
class RegExp
{
{
vFreeList.erase (this);
}
- virtual void split (charset_t &) = 0;
+ virtual void split (std::set<uint32_t> &) = 0;
virtual void calcSize (const charset_t &) = 0;
virtual uint32_t fixedLength ();
virtual uint32_t compile (const charset_t &, Ins *) = 0;
: exp1 (e1)
, exp2 (e2)
{}
- void split (charset_t &);
+ void split (std::set<uint32_t> &);
void calcSize (const charset_t &);
uint32_t fixedLength ();
uint32_t compile (const charset_t &, Ins *);
: exp1 (e1)
, exp2 (e2)
{}
- void split (charset_t &);
+ void split (std::set<uint32_t> &);
void calcSize (const charset_t &);
uint32_t fixedLength ();
uint32_t compile (const charset_t &, Ins *);
inline CloseOp (RegExp * e)
: exp (e)
{}
- void split (charset_t &);
+ void split (std::set<uint32_t> &);
void calcSize (const charset_t &);
uint32_t compile (const charset_t &, Ins *);
void decompile ();
inline MatchOp (Range * m)
: match (m)
{}
- void split (charset_t &);
+ void split (std::set<uint32_t> &);
void calcSize (const charset_t &);
uint32_t fixedLength ();
uint32_t compile (const charset_t &, Ins *);
class NullOp: public RegExp
{
public:
- void split (charset_t &);
+ void split (std::set<uint32_t> &);
void calcSize (const charset_t &);
uint32_t fixedLength ();
uint32_t compile (const charset_t &, Ins *);
ins_access = access;
}
void display (std::ostream & o) const;
- void split (charset_t &);
+ void split (std::set<uint32_t> &);
void calcSize (const charset_t &);
uint32_t compile (const charset_t &, Ins *);
void decompile ();