]> granicus.if.org Git - libexpat/commitdiff
Initial Revision
authorJames Clark <jjc@jclark.com>
Mon, 10 Nov 1997 06:10:11 +0000 (06:10 +0000)
committerJames Clark <jjc@jclark.com>
Mon, 10 Nov 1997 06:10:11 +0000 (06:10 +0000)
expat/xmltok/asciitab.h [new file with mode: 0755]
expat/xmltok/latin1tab.h [new file with mode: 0755]
expat/xmltok/utf8tab.h [new file with mode: 0755]
expat/xmltok/xmltok_impl.c [new file with mode: 0755]
expat/xmltok/xmltok_impl.h [new file with mode: 0755]

diff --git a/expat/xmltok/asciitab.h b/expat/xmltok/asciitab.h
new file mode 100755 (executable)
index 0000000..4938845
--- /dev/null
@@ -0,0 +1,32 @@
+/* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+/* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+/* 0x08 */ BT_NONXML, BT_S, BT_S, BT_NONXML,
+/* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML,
+/* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+/* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+/* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+/* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+/* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM,
+/* 0x24 */ BT_OTHER, BT_OTHER, BT_AMP, BT_APOS,
+/* 0x28 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0x2C */ BT_OTHER, BT_MINUS, BT_NAME, BT_SOL,
+/* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
+/* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT,
+/* 0x38 */ BT_DIGIT, BT_DIGIT, BT_NMSTRT, BT_SEMI,
+/* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST,
+/* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
+/* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
+/* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB,
+/* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT,
+/* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX,
+/* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT,
+/* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
+/* 0x7C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
diff --git a/expat/xmltok/latin1tab.h b/expat/xmltok/latin1tab.h
new file mode 100755 (executable)
index 0000000..e12369e
--- /dev/null
@@ -0,0 +1,32 @@
+/* 0x80 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER,
+/* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME,
+/* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER,
+/* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER,
+/* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
+/* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER,
+/* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
+/* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT,
diff --git a/expat/xmltok/utf8tab.h b/expat/xmltok/utf8tab.h
new file mode 100755 (executable)
index 0000000..80ab567
--- /dev/null
@@ -0,0 +1,33 @@
+
+/* 0x80 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL,
+/* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+/* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+/* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+/* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+/* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+/* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+/* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+/* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2,
+/* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
+/* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
+/* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
+/* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
+/* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4,
+/* 0xF4 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4,
+/* 0xF8 */ BT_LEAD5, BT_LEAD5, BT_LEAD5, BT_LEAD5,
+/* 0xFC */ BT_LEAD6, BT_LEAD6, BT_MALFORM, BT_MALFORM,
diff --git a/expat/xmltok/xmltok_impl.c b/expat/xmltok/xmltok_impl.c
new file mode 100755 (executable)
index 0000000..c736db0
--- /dev/null
@@ -0,0 +1,735 @@
+#define DO_LEAD_CASE(n, ptr, end, ret) \
+    case BT_LEAD ## n: \
+      if (end - ptr < n) \
+       return ret; \
+      ptr += n; \
+      break;
+#define MULTIBYTE_CASES(ptr, end, ret) \
+  DO_LEAD_CASE(2, ptr, end, ret) \
+  DO_LEAD_CASE(3, ptr, end, ret) \
+  DO_LEAD_CASE(4, ptr, end, ret) \
+  DO_LEAD_CASE(5, ptr, end, ret) \
+  DO_LEAD_CASE(6, ptr, end, ret)
+
+
+#define INVALID_CASES(ptr, nextTokPtr) \
+  case BT_NONXML: \
+  case BT_MALFORM: \
+  case BT_TRAIL: \
+    *(nextTokPtr) = (ptr); \
+    return XML_TOK_INVALID;
+
+#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
+   case BT_LEAD ## n: \
+     if (end - ptr < n) \
+       return XML_TOK_PARTIAL_CHAR; \
+     if (!IS_NAME_CHAR(enc, ptr, n)) { \
+       *nextTokPtr = ptr; \
+       return XML_TOK_INVALID; \
+     } \
+     ptr += n; \
+     break;
+
+#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
+  case BT_NONASCII: \
+    if (!IS_NAME_CHAR(enc, ptr, MINBPC)) { \
+      *nextTokPtr = ptr; \
+      return XML_TOK_INVALID; \
+    } \
+  case BT_NMSTRT: \
+  case BT_HEX: \
+  case BT_DIGIT: \
+  case BT_NAME: \
+  case BT_MINUS: \
+    ptr += MINBPC; \
+    break; \
+  CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
+  CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
+  CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) \
+  CHECK_NAME_CASE(5, enc, ptr, end, nextTokPtr) \
+  CHECK_NAME_CASE(6, enc, ptr, end, nextTokPtr)
+
+#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
+   case BT_LEAD ## n: \
+     if (end - ptr < n) \
+       return XML_TOK_PARTIAL_CHAR; \
+     if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
+       *nextTokPtr = ptr; \
+       return XML_TOK_INVALID; \
+     } \
+     ptr += n; \
+     break;
+
+#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
+  case BT_NONASCII: \
+    if (!IS_NMSTRT_CHAR(enc, ptr, MINBPC)) { \
+      *nextTokPtr = ptr; \
+      return XML_TOK_INVALID; \
+    } \
+  case BT_NMSTRT: \
+  case BT_HEX: \
+    ptr += MINBPC; \
+    break; \
+  CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
+  CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
+  CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) \
+  CHECK_NMSTRT_CASE(5, enc, ptr, end, nextTokPtr) \
+  CHECK_NMSTRT_CASE(6, enc, ptr, end, nextTokPtr)
+
+#ifndef PREFIX
+#define PREFIX(ident) ident
+#endif
+
+/* ptr points to character following "<!-" */
+
+static
+int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
+                       const char **nextTokPtr)
+{
+  if (ptr != end) {
+    if (!CHAR_MATCHES(enc, ptr, '-')) {
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+    ptr += MINBPC;
+    while (ptr != end) {
+      switch (BYTE_TYPE(enc, ptr)) {
+      MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
+      INVALID_CASES(ptr, nextTokPtr)
+      case BT_MINUS:
+       if ((ptr += MINBPC) == end)
+         return XML_TOK_PARTIAL;
+       if (CHAR_MATCHES(enc, ptr, '-')) {
+         if ((ptr += MINBPC) == end)
+           return XML_TOK_PARTIAL;
+         if (!CHAR_MATCHES(enc, ptr, '>')) {
+           *nextTokPtr = ptr;
+           return XML_TOK_INVALID;
+         }
+         *nextTokPtr = ptr + MINBPC;
+         return XML_TOK_COMMENT;
+       }
+       /* fall through */
+      default:
+       ptr += MINBPC;
+       break;
+      }
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "<!" */
+
+static
+int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
+                    const char **nextTokPtr)
+{
+  if (ptr != end) {
+    if (BYTE_TYPE(enc, ptr) == BT_MINUS)
+      return PREFIX(scanComment)(enc, ptr + MINBPC, end, nextTokPtr);
+    do {
+      switch (BYTE_TYPE(enc, ptr)) {
+      MULTIBYTE_CASES(ptr, end, (*nextTokPtr = ptr, XML_TOK_PROLOG_CHARS))
+      INVALID_CASES(ptr, nextTokPtr)
+      case BT_APOS:
+      case BT_QUOT:
+      case BT_LT:
+       *nextTokPtr = ptr;
+       return XML_TOK_PROLOG_CHARS;
+      }
+    } while ((ptr += MINBPC) != end);
+    *nextTokPtr = ptr;
+    return XML_TOK_PROLOG_CHARS;
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "<?" */
+
+static
+int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
+                  const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_PARTIAL;
+  switch (BYTE_TYPE(enc, ptr)) {
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+    case BT_S:
+      ptr += MINBPC;
+      while (ptr != end) {
+        switch (BYTE_TYPE(enc, ptr)) {
+        MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
+        INVALID_CASES(ptr, nextTokPtr)
+       case BT_QUEST:
+         ptr += MINBPC;
+         if (ptr == end)
+           return XML_TOK_PARTIAL;
+         if (CHAR_MATCHES(enc, ptr, '>')) {
+           *nextTokPtr = ptr + MINBPC;
+           return XML_TOK_PI;
+         }
+         break;
+       default:
+         ptr += MINBPC;
+         break;
+       }
+      }
+      return XML_TOK_PARTIAL;
+    case BT_QUEST:
+      ptr += MINBPC;
+      if (ptr == end)
+       return XML_TOK_PARTIAL;
+      if (CHAR_MATCHES(enc, ptr, '>')) {
+       *nextTokPtr = ptr + MINBPC;
+       return XML_TOK_PI;
+      }
+      /* fall through */
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "<![" */
+
+static
+int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
+                            const char **nextTokPtr)
+{
+  int i;
+  /* CDATA[]]> */
+  if (end - ptr < 9 * MINBPC)
+    return XML_TOK_PARTIAL;
+  for (i = 0; i < 6; i++, ptr += MINBPC) {
+    if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) {
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  end -= 2 * MINBPC;
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
+    INVALID_CASES(ptr, nextTokPtr)
+    case BT_RSQB:
+      if (CHAR_MATCHES(enc, ptr + MINBPC, ']')
+         && CHAR_MATCHES(enc, ptr + 2 * MINBPC, '>')) {
+       *nextTokPtr = ptr + 3 * MINBPC;
+       return XML_TOK_CDATA_SECTION;
+      }
+    /* fall through */
+    default:
+      ptr += MINBPC;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "</" */
+
+static
+int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
+                      const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_PARTIAL;
+  switch (BYTE_TYPE(enc, ptr)) {
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+    case BT_S:
+      for (ptr += MINBPC; ptr != end; ptr += MINBPC) {
+       switch (BYTE_TYPE(enc, ptr)) {
+       case BT_S:
+         break;
+       case BT_GT:
+         *nextTokPtr = ptr + MINBPC;
+          return XML_TOK_END_TAG;
+       default:
+         *nextTokPtr = ptr;
+         return XML_TOK_INVALID;
+       }
+      }
+      return XML_TOK_PARTIAL;
+    case BT_GT:
+      *nextTokPtr = ptr + MINBPC;
+      return XML_TOK_END_TAG;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "&#X" */
+
+static
+int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
+                          const char **nextTokPtr)
+{
+  if (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    case BT_DIGIT:
+    case BT_HEX:
+      break;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+    for (ptr += MINBPC; ptr != end; ptr += MINBPC) {
+      switch (BYTE_TYPE(enc, ptr)) {
+      case BT_DIGIT:
+      case BT_HEX:
+       break;
+      case BT_SEMI:
+       *nextTokPtr = ptr + MINBPC;
+       return XML_TOK_CHAR_REF;
+      default:
+       *nextTokPtr = ptr;
+       return XML_TOK_INVALID;
+      }
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "&#" */
+
+static
+int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
+                       const char **nextTokPtr)
+{
+  if (ptr != end) {
+    if (CHAR_MATCHES(enc, ptr, 'x'))
+      return PREFIX(scanHexCharRef)(enc, ptr + MINBPC, end, nextTokPtr);
+    switch (BYTE_TYPE(enc, ptr)) {
+    case BT_DIGIT:
+      break;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+    for (ptr += MINBPC; ptr != end; ptr += MINBPC) {
+      switch (BYTE_TYPE(enc, ptr)) {
+      case BT_DIGIT:
+       break;
+      case BT_SEMI:
+       *nextTokPtr = ptr + MINBPC;
+       return XML_TOK_CHAR_REF;
+      default:
+       *nextTokPtr = ptr;
+       return XML_TOK_INVALID;
+      }
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "&" */
+
+static
+int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
+                   const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_PARTIAL;
+  switch (BYTE_TYPE(enc, ptr)) {
+  CHECK_NMSTRT_CASES(end, ptr, end, nextTokPtr)
+  case BT_NUM:
+    return PREFIX(scanCharRef)(enc, ptr + MINBPC, end, nextTokPtr);
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+    case BT_SEMI:
+      *nextTokPtr = ptr + MINBPC;
+      return XML_TOK_ENTITY_REF;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following first character of attribute name */
+
+static
+int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
+                    const char **nextTokPtr)
+{
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+    case BT_S:
+      for (;;) {
+       int t;
+
+       ptr += MINBPC;
+       if (ptr == end)
+         return XML_TOK_PARTIAL;
+       t = BYTE_TYPE(enc, ptr);
+       if (t == BT_EQUALS)
+         break;
+       if (t != BT_S) {
+         *nextTokPtr = ptr;
+         return XML_TOK_INVALID;
+       }
+      }
+    /* fall through */
+    case BT_EQUALS:
+      {
+       int open;
+       for (;;) {
+         
+         ptr += MINBPC;
+         if (ptr == end)
+           return XML_TOK_PARTIAL;
+         open = BYTE_TYPE(enc, ptr);
+         if (open == BT_QUOT || open == BT_APOS)
+           break;
+         if (open != BT_S) {
+           *nextTokPtr = ptr;
+           return XML_TOK_INVALID;
+         }
+       }
+       ptr += MINBPC;
+       /* in attribute value */
+       for (;;) {
+         int t;
+         if (ptr == end)
+           return XML_TOK_PARTIAL;
+         t = BYTE_TYPE(enc, ptr);
+         if (t == open)
+           break;
+         switch (t) {
+         INVALID_CASES(ptr, nextTokPtr)
+          MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
+         case BT_AMP:
+           {
+             int tok = PREFIX(scanRef)(enc, ptr + MINBPC, end, &ptr);
+             if (tok <= 0) {
+               if (tok == XML_TOK_INVALID)
+                 *nextTokPtr = ptr;
+               return tok;
+             }
+             break;
+           }
+         case BT_LT:
+           *nextTokPtr = ptr;
+           return XML_TOK_INVALID;
+         default:
+           ptr += MINBPC;
+           break;
+         }
+       }
+       /* ptr points to closing quote */
+       for (;;) {
+         ptr += MINBPC;
+         if (ptr == end)
+           return XML_TOK_PARTIAL;
+         switch (BYTE_TYPE(enc, ptr)) {
+         CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+         case BT_S:
+           continue;
+         case BT_GT:
+           *nextTokPtr = ptr + MINBPC;
+           return XML_TOK_START_TAG;
+         case BT_SOL:
+           ptr += MINBPC;
+           if (ptr == end)
+             return XML_TOK_PARTIAL;
+           if (!CHAR_MATCHES(enc, ptr, '>')) {
+             *nextTokPtr = ptr;
+             return XML_TOK_INVALID;
+           }
+           *nextTokPtr = ptr + MINBPC;
+           return XML_TOK_EMPTY_ELEMENT;
+         default:
+           *nextTokPtr = ptr;
+           return XML_TOK_INVALID;
+         }
+         break;
+       }
+       break;
+      }
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "<" */
+
+static
+int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
+                  const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_PARTIAL;
+  switch (BYTE_TYPE(enc, ptr)) {
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+  case BT_EXCL:
+    if ((ptr += MINBPC) == end)
+      return XML_TOK_PARTIAL;
+    switch (BYTE_TYPE(enc, ptr)) {
+    case BT_MINUS:
+      return PREFIX(scanComment)(enc, ptr + MINBPC, end, nextTokPtr);
+    case BT_LSQB:
+      return PREFIX(scanCdataSection)(enc, ptr + MINBPC, end, nextTokPtr);
+    }
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  case BT_QUEST:
+    return PREFIX(scanPi)(enc, ptr + MINBPC, end, nextTokPtr);
+  case BT_SOL:
+    return PREFIX(scanEndTag)(enc, ptr + MINBPC, end, nextTokPtr);
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+  /* we have a start-tag */
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+    case BT_S:
+      {
+        ptr += MINBPC;
+       while (ptr != end) {
+         switch (BYTE_TYPE(enc, ptr)) {
+         CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+         case BT_GT:
+           goto gt;
+         case BT_SOL:
+           goto sol;
+         case BT_S:
+           ptr += MINBPC;
+           continue;
+         default:
+           *nextTokPtr = ptr;
+           return XML_TOK_INVALID;
+         }
+         return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
+       }
+       return XML_TOK_PARTIAL;
+      }
+    case BT_GT:
+    gt:
+      *nextTokPtr = ptr + MINBPC;
+      return XML_TOK_START_TAG;
+    case BT_SOL:
+    sol:
+      ptr += MINBPC;
+      if (ptr == end)
+       return XML_TOK_PARTIAL;
+      if (!CHAR_MATCHES(enc, ptr, '>')) {
+       *nextTokPtr = ptr;
+       return XML_TOK_INVALID;
+      }
+      *nextTokPtr = ptr + MINBPC;
+      return XML_TOK_EMPTY_ELEMENT;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+static
+int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
+                      const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_NONE;
+#if MINBPC > 1
+  {
+    size_t n = end - ptr;
+    if (n & (MINBPC - 1)) {
+      n &= ~(MINBPC - 1);
+      if (n == 0)
+       return XML_TOK_PARTIAL;
+      end = ptr + n;
+    }
+  }
+#endif
+  switch (BYTE_TYPE(enc, ptr)) {
+  case BT_LT:
+    return PREFIX(scanLt)(enc, ptr + MINBPC, end, nextTokPtr);
+  case BT_AMP:
+    return PREFIX(scanRef)(enc, ptr + MINBPC, end, nextTokPtr);
+  case BT_RSQB:
+    ptr += MINBPC;
+    if (ptr == end)
+      return XML_TOK_PARTIAL;
+    if (!CHAR_MATCHES(enc, ptr, ']'))
+      break;
+    ptr += MINBPC;
+    if (ptr == end)
+      return XML_TOK_PARTIAL;
+    if (!CHAR_MATCHES(enc, ptr, '>'))
+      break;
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  INVALID_CASES(ptr, nextTokPtr)
+  MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
+  default:
+    ptr += MINBPC;
+    break;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    MULTIBYTE_CASES(ptr, end, (*nextTokPtr = ptr, XML_TOK_DATA_CHARS))
+    case BT_RSQB:
+      if (ptr + MINBPC != end) {
+        if (!CHAR_MATCHES(enc, ptr + MINBPC, ']')) {
+          ptr += MINBPC;
+          break;
+        }
+        if (ptr + 2*MINBPC != end) {
+          if (!CHAR_MATCHES(enc, ptr + 2*MINBPC, '>')) {
+            ptr += MINBPC;
+            break;
+          }
+          *nextTokPtr = ptr + 2*MINBPC;
+          return XML_TOK_INVALID;
+        }
+      }
+      /* fall through */
+    case BT_AMP:
+    case BT_LT:
+    case BT_NONXML:
+    case BT_MALFORM:
+    case BT_TRAIL:
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    default:
+      ptr += MINBPC;
+      break;
+    }
+  }
+  *nextTokPtr = ptr;
+  return XML_TOK_DATA_CHARS;
+}
+
+static
+int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
+                     const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_NONE;
+#if MINBPC > 1
+  {
+    size_t n = end - ptr;
+    if (n & (MINBPC - 1)) {
+      n &= ~(MINBPC - 1);
+      if (n == 0)
+       return XML_TOK_PARTIAL;
+      end = ptr + n;
+    }
+  }
+#endif
+  switch (BYTE_TYPE(enc, ptr)) {
+  INVALID_CASES(ptr, nextTokPtr)
+  MULTIBYTE_CASES(ptr, end, XML_TOK_PARTIAL_CHAR)
+  case BT_QUOT:
+    {
+      for (ptr += MINBPC; ptr != end; ptr += MINBPC) {
+       if (BYTE_TYPE(enc, ptr) == BT_QUOT) {
+         *nextTokPtr = ptr + MINBPC;
+         return XML_TOK_LITERAL;
+       }
+      }
+      return XML_TOK_PARTIAL;
+    }
+  case BT_APOS:
+    {
+      for (ptr += MINBPC; ptr != end; ptr += MINBPC) {
+       if (BYTE_TYPE(enc, ptr) == BT_APOS) {
+         *nextTokPtr = ptr + MINBPC;
+         return XML_TOK_LITERAL;
+       }
+      }
+      return XML_TOK_PARTIAL;
+    }
+  case BT_LT:
+    {
+      ptr += MINBPC;
+      if (ptr == end)
+       return XML_TOK_PARTIAL;
+      switch (BYTE_TYPE(enc, ptr)) {
+      case BT_EXCL:
+       return PREFIX(scanDecl)(enc, ptr + MINBPC, end, nextTokPtr);
+      case BT_QUEST:
+       return PREFIX(scanPi)(enc, ptr + MINBPC, end, nextTokPtr);
+      case BT_NMSTRT:
+      case BT_HEX:
+      case BT_NONASCII:
+      case BT_LEAD2:
+      case BT_LEAD3:
+      case BT_LEAD4:
+      case BT_LEAD5:
+       return PREFIX(contentTok)(enc, ptr - MINBPC, end, nextTokPtr);
+      }
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  case BT_S:
+    do {
+      ptr += MINBPC;
+    } while (ptr != end && BYTE_TYPE(enc, ptr) == BT_S);
+    *nextTokPtr = ptr;
+    return XML_TOK_PROLOG_S;
+  default:
+    ptr += MINBPC;
+    break;
+  }
+  for (; ptr != end;) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    case BT_LT:
+    case BT_QUOT:
+    case BT_APOS:
+    case BT_NONXML:
+    case BT_MALFORM:
+    case BT_TRAIL:
+    case BT_S:
+      *nextTokPtr = ptr;
+      return XML_TOK_PROLOG_CHARS;
+    MULTIBYTE_CASES(ptr, end, (*nextTokPtr = ptr, XML_TOK_PROLOG_CHARS))
+    default:
+      ptr += MINBPC;
+      break;
+    }
+  }
+  *nextTokPtr = ptr;
+  return XML_TOK_PROLOG_CHARS;
+}
+
+#undef DO_LEAD_CASE
+#undef MULTIBYTE_CASES
+#undef INVALID_CASES
+#undef CHECK_NAME_CASE
+#undef CHECK_NAME_CASES
+#undef CHECK_NMSTRT_CASE
+#undef CHECK_NMSTRT_CASES
diff --git a/expat/xmltok/xmltok_impl.h b/expat/xmltok/xmltok_impl.h
new file mode 100755 (executable)
index 0000000..2aa55c9
--- /dev/null
@@ -0,0 +1,33 @@
+enum {
+  BT_NONXML,
+  BT_MALFORM,
+  BT_LT,
+  BT_AMP,
+  BT_LEAD2,
+  BT_LEAD3,
+  BT_LEAD4,
+  BT_LEAD5,
+  BT_LEAD6,
+  BT_TRAIL,
+  BT_GT,
+  BT_QUOT,
+  BT_APOS,
+  BT_EQUALS,
+  BT_QUEST,
+  BT_EXCL,
+  BT_SOL,
+  BT_SEMI,
+  BT_NUM,
+  BT_LSQB,
+  BT_RSQB,
+  BT_S,
+  BT_NMSTRT,
+  BT_HEX,
+  BT_DIGIT,
+  BT_NAME,
+  BT_MINUS,
+  BT_OTHER, /* known not to be a name or name start character */
+  BT_NONASCII /* might be a name or name start character */
+};
+
+#include <stddef.h>