#include <windows.h>
static
-int XmlSkipProlog(const char **s, const char *end, const char **nextTokP);
+int XmlSkipProlog(const char **s, const char *end, const char **nextTokP,
+ const ENCODING **enc);
int XmlParse(const char *s, size_t n, const char *filename)
{
const char *start = s;
const char *end = s + n;
const char *next;
- int tok = XmlSkipProlog(&s, end, &next);
+ const ENCODING *enc;
+ int tok = XmlSkipProlog(&s, end, &next, &enc);
for (;;) {
switch (tok) {
case XML_TOK_NONE:
return 1;
case XML_TOK_INVALID:
fprintf(stderr, "%s: well-formedness error at byte %lu\n",
- filename, (unsigned long)(s - start));
+ filename, (unsigned long)(next - start));
return 0;
case XML_TOK_PARTIAL:
fprintf(stderr, "%s: unclosed token started at byte %lu\n",
filename, (unsigned long)(s - start));
return 0;
+ case XML_TOK_PARTIAL_CHAR:
+ fprintf(stderr, "%s: malformed input\n", filename);
+ return 0;
case XML_TOK_COMMENT:
break;
case XML_TOK_START_TAG:
break;
}
s = next;
- tok = XmlContentTok(s, end, &next);
+ tok = XmlContentTok(enc, s, end, &next);
}
/* not reached */
}
static
-int XmlSkipProlog(const char **startp, const char *end, const char **nextTokP)
+int XmlSkipProlog(const char **startp, const char *end,
+ const char **nextTokP, const ENCODING **enc)
{
const char *s = *startp;
+ INIT_ENCODING initEnc;
+ XmlInitEncoding(&initEnc, enc);
for (;;) {
- int tok = XmlPrologTok(s, end, nextTokP);
+ int tok = XmlPrologTok(*enc, s, end, nextTokP);
switch (tok) {
- case XML_TOK_NONE:
+ case XML_TOK_START_TAG:
case XML_TOK_INVALID:
+ case XML_TOK_NONE:
case XML_TOK_PARTIAL:
- case XML_TOK_START_TAG:
*startp = s;
return tok;
default:
char *ptr;
size_t size;
int fd;
- int doneProlog;
+ int state;
int eof;
unsigned long endOffset;
+ const ENCODING *enc;
+ INIT_ENCODING initEnc;
};
#define XmlTokBufferOffset(tb) ((tb)->endOffset - ((tb)->end - (tb)->ptr))
tb->ptr = tb->buf;
tb->size = READSIZE;
tb->fd = fd;
- tb->doneProlog = 0;
+ tb->state = XML_PROLOG_STATE;
tb->eof = 0;
tb->endOffset = 0;
+ XmlInitEncoding(&(tb->initEnc), &(tb->enc));
}
void XmlTokBufferFree(struct XmlTokBuffer *tb)
for (;;) {
int nBytes;
const char *start = tb->ptr;
- if (!tb->doneProlog) {
- tok = XmlPrologTok(start, tb->end, &tb->ptr);
- if (tok == XML_TOK_START_TAG)
- tb->doneProlog = 1;
- }
- else
- tok = XmlContentTok(start, tb->end, &tb->ptr);
+ tok = XmlTok(tb->enc, tb->state, start, tb->end, &tb->ptr);
if (tok >= 0) {
+ if (tok == XML_TOK_START_TAG)
+ tb->state = XML_CONTENT_STATE;
*tokStart = start;
*tokLength = tb->ptr - start;
break;
fprintf(stderr, "usage: %s filename ...\n", argv[0]);
return 1;
}
+ fprintf(stderr, "version 0.1\n");
for (i = 1; i < argc; i++)
if (!doFile(argv[i]))
ret = 1;
-#ifdef _MSC_VER
-#define XMLTOKAPI __declspec(dllexport)
-#endif
-
-#include "xmltok.h"
-
-#ifdef UNICODE
-typedef wchar_t TCHAR;
-#else
-typedef char TCHAR;
-#endif
-
-#define DIGIT_CASES \
- case '0': case '1': case '2': case '3': case '4': \
- case '5': case '6': case '7': case '8': case '9':
+/* TODO
-#define HEX_DIGIT_CASES DIGIT_CASES \
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': \
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+Provide methods to convert to any of UTF-8, UTF-18, UCS-4.
-#define S_CASES case ' ': case '\t': case '\r': case '\n':
+Better prolog tokenization
-/* ptr points to character following "<!-" */
+<!NAME
+NMTOKEN
+NAME
+PEREF
-static
-int scanComment(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr)
-{
- if (ptr != end) {
- if (*ptr != '-') {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- for (++ptr; ptr != end; ptr++) {
- if (*ptr == '-') {
- if (++ptr == end)
- return XML_TOK_PARTIAL;
- if (*ptr == '-') {
- if (++ptr == end)
- return XML_TOK_PARTIAL;
- if (*ptr != '>') {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- *nextTokPtr = ptr + 1;
- return XML_TOK_COMMENT;
- }
- }
- }
- }
- return XML_TOK_PARTIAL;
-}
+*/
-/* ptr points to character following "<!" */
-
-static
-int scanDecl(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr)
-{
- if (ptr != end) {
- if (*ptr == '-')
- return scanComment(ptr + 1, end, nextTokPtr);
- do {
- switch (*ptr) {
- case '\'':
- case '"':
- case '<':
- *nextTokPtr = ptr;
- return XML_TOK_PROLOG_CHARS;
- }
- } while (++ptr != end);
- *nextTokPtr = ptr;
- return XML_TOK_PROLOG_CHARS;
- }
- return XML_TOK_PARTIAL;
-}
-
-/* ptr points to character following "<?" */
-
-static
-int scanPi(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr)
-{
- for (; ptr != end; ++ptr) {
- switch (*ptr) {
- case '?':
- if (ptr + 1 == end)
- return XML_TOK_PARTIAL;
- if (ptr[1] == '>') {
- *nextTokPtr = ptr + 2;
- return XML_TOK_PI;
- }
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-/* ptr points to character following "<" */
-
-static
-int scanStartTag(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr)
-{
- for (; ptr != end; ++ptr) {
- switch (*ptr) {
- case '<':
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- case '>':
- *nextTokPtr = ptr + 1;
- return XML_TOK_START_TAG;
- case '"':
- for (++ptr;; ++ptr) {
- if (ptr == end)
- return XML_TOK_PARTIAL;
- if (*ptr == '"')
- break;
- }
- break;
- case '\'':
- for (++ptr;; ++ptr) {
- if (ptr == end)
- return XML_TOK_PARTIAL;
- if (*ptr == '\'')
- break;
- }
- break;
- case '/':
- if (++ptr == end)
- return XML_TOK_PARTIAL;
- if (*ptr != '>') {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- *nextTokPtr = ptr + 1;
- return XML_TOK_EMPTY_ELEMENT;
- }
- }
- return XML_TOK_PARTIAL;
-}
-
-/* ptr points to character following "</" */
+#ifdef _MSC_VER
+#define XMLTOKAPI __declspec(dllexport)
+#endif
-static
-int scanEndTag(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr)
+#include "xmltok.h"
+#include "nametab.h"
+
+#define UCS2_GET_NAMING(pages, hi, lo) \
+ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
+
+/* A 2 byte UTF-8 representation splits the characters 11 bits
+between the bottom 5 and 6 bits of the bytes.
+We need 8 bits to index into pages, 3 bits to add to that index and
+5 bits to generate the mask. */
+#define UTF8_GET_NAMING2(pages, byte) \
+ (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ + ((((byte)[0]) & 3) << 1) \
+ + ((((byte)[1]) >> 5) & 1)] \
+ & (1 << (((byte)[1]) & 0x1F)))
+
+/* A 3 byte UTF-8 representation splits the characters 16 bits
+between the bottom 4, 6 and 6 bits of the bytes.
+We need 8 bits to index into pages, 3 bits to add to that index and
+5 bits to generate the mask. */
+#define UTF8_GET_NAMING3(pages, byte) \
+ (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
+ + ((((byte)[1]) >> 2) & 0xF)] \
+ << 3) \
+ + ((((byte)[1]) & 3) << 1) \
+ + ((((byte)[2]) >> 5) & 1)] \
+ & (1 << (((byte)[2]) & 0x1F)))
+
+#define UTF8_GET_NAMING(pages, p, n) \
+ ((n) == 2 \
+ ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
+ : ((n) == 3 \
+ ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
+ : 0))
+
+
+#include "xmltok_impl.h"
+
+struct normal_encoding {
+ ENCODING enc;
+ unsigned char type[256];
+};
+
+/* minimum bytes per character */
+#define MINBPC 1
+#define BYTE_TYPE(enc, p) \
+ (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
+#define IS_NAME_CHAR(enc, p, n) UTF8_GET_NAMING(namePages, p, n)
+#define IS_NMSTRT_CHAR(enc, p, n) UTF8_GET_NAMING(nmstrtPages, p, n)
+
+/* c is an ASCII character */
+#define CHAR_MATCHES(enc, p, c) (*(p) == c)
+
+#define PREFIX(ident) normal_ ## ident
+#include "xmltok_impl.c"
+
+#undef MINBPC
+#undef BYTE_TYPE
+#undef CHAR_MATCHES
+#undef IS_NAME_CHAR
+#undef IS_NMSTRT_CHAR
+
+const struct normal_encoding utf8_encoding = {
+ { { PREFIX(prologTok), PREFIX(contentTok) }, 1 },
+#include "asciitab.h"
+#include "utf8tab.h"
+};
+
+#undef PREFIX
+
+static unsigned char latin1tab[256] = {
+#include "asciitab.h"
+#include "latin1tab.h"
+};
+
+static int unicode_byte_type(char hi, char lo)
{
- for (; ptr != end; ++ptr) {
- switch (*ptr) {
- case '<':
- case '&':
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- case '>':
- *nextTokPtr = ptr + 1;
- return XML_TOK_END_TAG;
+ switch ((unsigned char)hi) {
+ case 0xD8: case 0xD9: case 0xDA: case 0xDB:
+ return BT_LEAD4;
+ case 0xDC: case 0xDD: case 0xDE: case 0xDF:
+ return BT_TRAIL;
+ case 0xFF:
+ switch ((unsigned char)lo) {
+ case 0xFF:
+ case 0xFE:
+ return BT_NONXML;
}
+ break;
}
- return XML_TOK_PARTIAL;
+ return BT_NONASCII;
}
-/* ptr points to character following "&#X" */
+#define PREFIX(ident) little2_ ## ident
+#define MINBPC 2
+#define BYTE_TYPE(enc, p) \
+ ((p)[1] == 0 ? latin1tab[(unsigned char)*(p)] : unicode_byte_type((p)[1], (p)[0]))
+#define CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
+#define IS_NAME_CHAR(enc, p, n) \
+ UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
+#define IS_NMSTRT_CHAR(enc, p, n) \
+ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
+
+#include "xmltok_impl.c"
+
+#undef MINBPC
+#undef BYTE_TYPE
+#undef CHAR_MATCHES
+#undef IS_NAME_CHAR
+#undef IS_NMSTRT_CHAR
+
+const struct encoding little2_encoding = {
+ { PREFIX(prologTok), PREFIX(contentTok) }, 2
+};
+
+#undef PREFIX
+
+#define PREFIX(ident) big2_ ## ident
+#define MINBPC 2
+/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
+#define BYTE_TYPE(enc, p) \
+ ((p)[0] == 0 ? latin1tab[(unsigned char)(p)[1]] : unicode_byte_type((p)[0], (p)[1]))
+#define CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
+#define IS_NAME_CHAR(enc, p, n) \
+ UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
+#define IS_NMSTRT_CHAR(enc, p, n) \
+ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
+
+#include "xmltok_impl.c"
+
+#undef MINBPC
+#undef BYTE_TYPE
+#undef CHAR_MATCHES
+#undef IS_NAME_CHAR
+#undef IS_NMSTRT_CHAR
+
+const struct encoding big2_encoding = {
+ { PREFIX(prologTok), PREFIX(contentTok) }, 2
+};
+
+#undef PREFIX
static
-int scanHexCharRef(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr)
+int initScan(const ENCODING *enc, int state, const char *ptr, const char *end,
+ const char **nextTokPtr)
{
- if (ptr != end) {
- switch (*ptr) {
- HEX_DIGIT_CASES
- break;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- for (++ptr; ptr != end; ++ptr) {
- switch (*ptr) {
- HEX_DIGIT_CASES
- break;
- case ';':
- *nextTokPtr = ptr + 1;
- return XML_TOK_CHAR_REF;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
+ const ENCODING **encPtr;
+
+ if (ptr == end)
+ return XML_TOK_NONE;
+ encPtr = ((const INIT_ENCODING *)enc)->encPtr;
+ if (ptr + 1 == end) {
+ switch ((unsigned char)*ptr) {
+ case 0xFE:
+ case 0xFF:
+ case 0x00:
+ case 0x3C:
+ return XML_TOK_PARTIAL;
}
}
- return XML_TOK_PARTIAL;
-}
-
-/* ptr points to character following "&#" */
-
-static
-int scanCharRef(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr)
-{
- if (ptr != end) {
- switch (*ptr) {
- case 'x':
- case 'X':
- return scanHexCharRef(ptr + 1, end, nextTokPtr);
- DIGIT_CASES
- break;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- for (++ptr; ptr != end; ++ptr) {
- switch (*ptr) {
- DIGIT_CASES
- break;
- case ';':
- *nextTokPtr = ptr + 1;
- return XML_TOK_CHAR_REF;
- default:
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
+ else {
+ switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
+ case 0x003C:
+ *encPtr = &big2_encoding;
+ return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
+ case 0xFEFF:
+ *nextTokPtr = ptr + 2;
+ *encPtr = &big2_encoding;
+ return XML_TOK_BOM;
+ case 0x3C00:
+ *encPtr = &little2_encoding;
+ return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
+ case 0xFFFE:
+ *nextTokPtr = ptr + 2;
+ *encPtr = &little2_encoding;
+ return XML_TOK_BOM;
}
}
- return XML_TOK_PARTIAL;
+ *encPtr = &utf8_encoding.enc;
+ return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
}
static
-int scanEntityRef(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr)
+int initScanProlog(const ENCODING *enc, const char *ptr, const char *end,
+ const char **nextTokPtr)
{
- for (; ptr != end; ++ptr) {
- switch (*ptr) {
- case '<':
- case '>':
- case '&':
- S_CASES
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- case ';':
- *nextTokPtr = ptr + 1;
- return XML_TOK_ENTITY_REF;
- }
- }
- return XML_TOK_PARTIAL;
+ return initScan(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr);
}
-/* ptr points to character following "<![" */
-
static
-int scanCdataSection(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr)
+int initScanContent(const ENCODING *enc, const char *ptr, const char *end,
+ const char **nextTokPtr)
{
- int i;
- /* CDATA[]]> */
- if (end - ptr < 9)
- return XML_TOK_PARTIAL;
- for (i = 0; i < 6; i++, ptr++) {
- if (*ptr != "CDATA["[i]) {
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- }
- end -= 2;
- for (; ptr != end; ++ptr) {
- if (*ptr == ']') {
- if (ptr[1] == ']' && ptr[2] == '>') {
- *nextTokPtr = ptr + 3;
- return XML_TOK_CDATA_SECTION;
- }
- }
- }
- return XML_TOK_PARTIAL;
-
+ return initScan(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr);
}
-int XmlContentTok(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr)
+void XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr)
{
- if (ptr != end) {
- switch (*ptr) {
- case '<':
- {
- ++ptr;
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (*ptr) {
- case '!':
- if (++ptr == end)
- return XML_TOK_PARTIAL;
- switch (*ptr) {
- case '-':
- return scanComment(ptr + 1, end, nextTokPtr);
- case '[':
- return scanCdataSection(ptr + 1, end, nextTokPtr);
- }
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- case '?':
- return scanPi(ptr + 1, end, nextTokPtr);
- case '/':
- return scanEndTag(ptr + 1, end, nextTokPtr);
- case '>':
- S_CASES
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- default:
- return scanStartTag(ptr, end, nextTokPtr);
- }
- }
- case '&':
- {
- ++ptr;
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (*ptr) {
- case '#':
- return scanCharRef(ptr + 1, end, nextTokPtr);
- S_CASES
- case ';':
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- }
- return scanEntityRef(ptr + 1, end, nextTokPtr);
- }
- default:
- {
- for (++ptr; ptr != end; ++ptr) {
- switch (*ptr) {
- case '&':
- case '<':
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- }
- }
- *nextTokPtr = ptr;
- return XML_TOK_DATA_CHARS;
- }
- }
- }
- return XML_TOK_NONE;
-}
-
-int XmlPrologTok(const TCHAR *ptr, const TCHAR *end, const TCHAR **nextTokPtr)
-{
- if (ptr != end) {
- switch (*ptr) {
- case '"':
- {
- for (++ptr; ptr != end; ++ptr) {
- if (*ptr == '"') {
- *nextTokPtr = ptr + 1;
- return XML_TOK_LITERAL;
- }
- }
- return XML_TOK_PARTIAL;
- }
- case '\'':
- {
- for (++ptr; ptr != end; ++ptr) {
- if (*ptr == '\'') {
- *nextTokPtr = ptr + 1;
- return XML_TOK_LITERAL;
- }
- }
- return XML_TOK_PARTIAL;
- }
- case '<':
- {
- ++ptr;
- if (ptr == end)
- return XML_TOK_PARTIAL;
- switch (*ptr) {
- case '!':
- return scanDecl(ptr + 1, end, nextTokPtr);
- case '?':
- return scanPi(ptr + 1, end, nextTokPtr);
- case '/':
- *nextTokPtr = ptr;
- return XML_TOK_INVALID;
- default:
- return XmlContentTok(ptr - 1, end, nextTokPtr);
- }
- }
- default:
- {
- for (++ptr; ptr != end; ++ptr) {
- switch (*ptr) {
- case '<':
- case '"':
- case '\'':
- *nextTokPtr = ptr;
- return XML_TOK_PROLOG_CHARS;
- }
- }
- *nextTokPtr = ptr;
- return XML_TOK_PROLOG_CHARS;
- }
- }
- }
- return XML_TOK_NONE;
+ p->initEnc.scanners[XML_PROLOG_STATE] = initScanProlog;
+ p->initEnc.scanners[XML_CONTENT_STATE] = initScanContent;
+ p->initEnc.minBytesPerChar = 1;
+ p->encPtr = encPtr;
+ *encPtr = &(p->initEnc);
}
#ifndef XmlTok_INCLUDED
#define XmlTok_INCLUDED 1
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#ifndef XMLTOKAPI
#define XMLTOKAPI /* as nothing */
#endif
-#include <stddef.h>
-
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
-#define XML_TOK_NONE -2 /* The string to be scanned is empty */
-#define XML_TOK_PARTIAL -1
+#define XML_TOK_NONE -3 /* The string to be scanned is empty */
+#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */
+#define XML_TOK_PARTIAL -1 /* only part of a token */
#define XML_TOK_INVALID 0
-#define XML_TOK_COMMENT 1
-#define XML_TOK_PI 2 /* processing instruction */
+#define XML_TOK_BOM 1 /* Byte order mark */
+#define XML_TOK_COMMENT 2
+#define XML_TOK_PI 3 /* processing instruction */
/* The following tokens are returned only by XmlPrologTok */
-#define XML_TOK_LITERAL 3
-#define XML_TOK_PROLOG_CHARS 4
+#define XML_TOK_LITERAL 4
+#define XML_TOK_PROLOG_CHARS 5
+#define XML_TOK_PROLOG_S 6
/* The following token is returned by XmlPrologTok when it detects the end
of the prolog and is also returned by XmlContentTok */
-#define XML_TOK_START_TAG 5
+#define XML_TOK_START_TAG 7
/* The following tokens are returned only by XmlContentTok */
-#define XML_TOK_END_TAG 6
-#define XML_TOK_EMPTY_ELEMENT 7 /* empty element tag <e/> */
-#define XML_TOK_DATA_CHARS 8
-#define XML_TOK_CDATA_SECTION 9
-#define XML_TOK_ENTITY_REF 10
-#define XML_TOK_CHAR_REF 11 /* numeric character reference */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
+#define XML_TOK_END_TAG 8
+#define XML_TOK_EMPTY_ELEMENT 9 /* empty element tag <e/> */
+#define XML_TOK_DATA_CHARS 10
+#define XML_TOK_CDATA_SECTION 11
+#define XML_TOK_ENTITY_REF 12
+#define XML_TOK_CHAR_REF 13 /* numeric character reference */
+
+#define XML_NSTATES 2
+#define XML_PROLOG_STATE 0
+#define XML_CONTENT_STATE 1
+
+typedef struct encoding {
+ int (*scanners[XML_NSTATES])(const struct encoding *,
+ const char *,
+ const char *,
+ const char **);
+ int minBytesPerChar;
+} ENCODING;
/*
Scan the string starting at ptr until the end of the next complete token,
literals, comments and processing instructions.
*/
-int XMLTOKAPI XmlPrologTokA(const char *ptr,
- const char *eptr,
- const char **nextTokPtr);
-int XMLTOKAPI XmlContentTokA(const char *ptr,
- const char *eptr,
- const char **nextTokPtr);
-int XMLTOKAPI XmlPrologTokW(const wchar_t *ptr,
- const wchar_t *eptr,
- const wchar_t **nextTokPtr);
-int XMLTOKAPI XmlContentTokW(const wchar_t *ptr,
- const wchar_t *eptr,
- const wchar_t **nextTokPtr);
+#define XmlTok(enc, state, ptr, end, nextTokPtr) \
+ (((enc)->scanners[state])(enc, ptr, end, nextTokPtr))
+
+#define XmlPrologTok(enc, ptr, end, nextTokPtr) \
+ XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
+
+#define XmlContentTok(enc, ptr, end, nextTokPtr) \
+ XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
+
+typedef struct {
+ ENCODING initEnc;
+ const ENCODING **encPtr;
+} INIT_ENCODING;
+
+void XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **);
#ifdef __cplusplus
}
#endif
-#ifdef UNICODE
-#define XmlPrologTok XmlPrologTokW
-#define XmlContentTok XmlContentTokW
-#else
-#define XmlPrologTok XmlPrologTokA
-#define XmlContentTok XmlContentTokA
-#endif
-
#endif /* not XmlTok_INCLUDED */